Skip to content

Commit

Permalink
upgraded baryon and dibaryon benchmarks for generic flavors
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Wagman committed Apr 4, 2023
2 parents b42bd61 + 82a43a4 commit e2d5f6b
Show file tree
Hide file tree
Showing 15 changed files with 3,374 additions and 2,496 deletions.
15 changes: 8 additions & 7 deletions benchmarks/compile_and_run_benchmarks_mpi.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#!/bin/bash

# Machine specific directories
MPIINCLUDE="-I/gscratch/sw/intel-2020u1/compilers_and_libraries_2020.1.217/linux/mpi/intel64/include"
MPILIB="-L/gscratch/sw/intel-2020u1/compilers_and_libraries_2020.1.217/linux/mpi/intel64/lib -L/gscratch/sw/intel-2020u1/compilers_and_libraries_2020.1.217/linux/mpi/intel64/lib/release"

MPIINCLUDE="-I/opt/ohpc/pub/compiler/intel/compilers_and_libraries_2019.4.243/linux/mpi/intel64/include"
MPILIB="-L/opt/ohpc/pub/compiler/intel/compilers_and_libraries_2019.4.243/linux/mpi/intel64/lib"

CXX="mpicxx -DWITH_MPI"
DXX="mpicxx -DWITH_MPI"
Expand All @@ -23,9 +22,11 @@ KERNEL_FOLDER=$1
KERNEL=$2
source configure_paths.sh

CXXFLAGS="-std=c++11 -O3 -fno-rtti -mavx2 -m64 $MPIINCLUDE $MPILIB -lmpi"
CORES=128
#CORES=2
CXXFLAGS="-std=c++11 -O3 -fno-rtti -mavx512f -m64 $MPIINCLUDE $MPILIB -lmpi"
#CORES=128
#CORES=216
#CORES=1
CORES=8

# Compile options
# - Make ${CXX} dump generated assembly
Expand Down Expand Up @@ -74,7 +75,7 @@ echo "Running ${KERNEL} wrapper"
#VTUNE_METRIC=memory-access
#PROFILING_COMMAND="amplxe-cl -collect ${VTUNE_METRIC} -result-dir vtune_results -quiet"
#rm -rf vtune_results
RUN_REF=1 RUN_TIRAMISU=1 HL_NUM_THREADS=$CORES LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HALIDE_LIB_DIRECTORY}:${ISL_LIB_DIRECTORY}:${TIRAMISU_ROOT}/build/:${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${HALIDE_LIB_DIRECTORY}:${TIRAMISU_ROOT}/build/:${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} ${PROFILING_COMMAND} mpirun -np ${CORES} ./${KERNEL}_wrapper
#RUN_REF=1 RUN_TIRAMISU=1 HL_NUM_THREADS=$CORES LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HALIDE_LIB_DIRECTORY}:${ISL_LIB_DIRECTORY}:${TIRAMISU_ROOT}/build/:${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${HALIDE_LIB_DIRECTORY}:${TIRAMISU_ROOT}/build/:${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} ${PROFILING_COMMAND} mpirun -np ${CORES} ./${KERNEL}_wrapper

#rm -rf ${KERNEL}_generator ${KERNEL}_wrapper generated_${KERNEL}.o generated_${KERNEL}.o.h

Expand Down
70 changes: 70 additions & 0 deletions benchmarks/run_benchmarks_mpi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/bin/bash

# Machine specific directories
MPIINCLUDE="-I/opt/ohpc/pub/compiler/intel/compilers_and_libraries_2019.4.243/linux/mpi/intel64/include"
MPILIB="-L/opt/ohpc/pub/compiler/intel/compilers_and_libraries_2019.4.243/linux/mpi/intel64/lib"

CXX="mpicxx -DWITH_MPI"
DXX="mpicxx -DWITH_MPI"

set -x

if [ $# -eq 0 ]; then
echo "Usage: TIRAMISU_SMALL=1 script.sh <KERNEL_FOLDER> <KERNEL_NAME_WITHOUT_EXTENSION>"
echo "Example: script.sh level1/axpy axpy"
exit
fi

# Define data sizes, possible value: -DTIRAMISU_XLARGE, -DTIRAMISU_LARGE, -DTIRAMISU_MEDIUM, -DTIRAMISU_SMALL
DEFINED_SIZE="-DTIRAMISU_LARGE"

KERNEL_FOLDER=$1
KERNEL=$2
source configure_paths.sh

CXXFLAGS="-m64 -std=c++11 -O3 -fno-rtti -mavx2 $MPIINCLUDE $MPILIB -lmpi"
CORES=8

# Compile options
# - Make ${CXX} dump generated assembly
# CXXFLAGS: -g -Wa,-alh
# - Get info about ${CXX} vectorization
# CXXFLAGS -fopt-info-vec
# - Pass options to the llvm compiler
# HL_LLVM_ARGS="-help"
# - Set thread number for Halide
# HL_NUM_THREADS=32
# - Execution env variables
# OMP_NUM_THREADS=48
# to set the number of threads to use by OpenMP.
# - Command to run Vtune
# source /data/scratch/yunming/intel_parallel_studio_cluster/parallel_studio_xe_2017/install/vtune_amplifier_xe/amplxe-vars.sh
# amplxe-cl -collect hpc-performance -result-dir vtune_results -quiet ./binary
# Guide: https://software.intel.com/en-us/vtune-amplifier-help-amplxe-cl-command-syntax

INCLUDES="-I${MKL_PREFIX}/include/ -I${TIRAMISU_ROOT}/include/ -I${HALIDE_SOURCE_DIRECTORY}/include/ -I${ISL_INCLUDE_DIRECTORY} -I${TIRAMISU_ROOT}/benchmarks/"
LIBRARIES="-ltiramisu ${MKL_FLAGS} -lHalide -lisl -lz -lpthread -ldl ${EXTRA_LIBRARIES}"
LIBRARIES_DIR="-L${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} -L${HALIDE_LIB_DIRECTORY}/ -L${ISL_LIB_DIRECTORY}/ -L${TIRAMISU_ROOT}/build/"

echo "Compiling ${KERNEL}"

cd ${KERNEL_FOLDER}

echo "Compiling ${KERNEL} wrapper"
${DXX} ${LANKA_OPTIONS} $CXXFLAGS ${INCLUDES} ${DEFINED_SIZE} ${KERNEL}_wrapper.cpp ${LIBRARIES_DIR} ${LIBRARIES} generated_${KERNEL}.o ${LIBRARIES} -o ${KERNEL}_wrapper

echo "Running ${KERNEL} wrapper"
# To enable profiling:
## Perf:
#PROFILING_COMMAND="perf stat -e cycles,instructions,cache-misses,L1-icache-load-misses,LLC-load-misses,dTLB-load-misses,cpu-migrations,context-switches,bus-cycles,cache-references,minor-faults"
## Vtune:
#VTUNE_METRIC=hpc-performance
#VTUNE_METRIC=memory-access
#PROFILING_COMMAND="amplxe-cl -collect ${VTUNE_METRIC} -result-dir vtune_results -quiet"
#rm -rf vtune_results

RUN_REF=1 RUN_TIRAMISU=1 HL_NUM_THREADS=$CORES LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HALIDE_LIB_DIRECTORY}:${ISL_LIB_DIRECTORY}:${TIRAMISU_ROOT}/build/:${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${HALIDE_LIB_DIRECTORY}:${TIRAMISU_ROOT}/build/:${MKL_PREFIX}/lib/${MKL_LIB_PATH_SUFFIX} ${PROFILING_COMMAND} srun -n ${CORES} --mpi=pmi2 ./${KERNEL}_wrapper

#rm -rf ${KERNEL}_generator ${KERNEL}_wrapper generated_${KERNEL}.o generated_${KERNEL}.o.h

cd -
Loading

0 comments on commit e2d5f6b

Please sign in to comment.