├── .gitignore ├── README.md ├── arm_conda.sh ├── batch_run.sh ├── data.config ├── hpcbench ├── hpcbench.py ├── init.sh ├── package ├── common │ ├── check_deps.sh │ ├── check_root.sh │ └── download.sh ├── ior │ └── master │ │ └── install.sh ├── openblas │ └── 0.3.18 │ │ └── install.sh └── osu │ └── 7.0.1 │ └── install.sh ├── requirements.yaml ├── result ├── test_result.json └── test_score.json ├── run.AI ├── run.balance ├── run.compute ├── run.network ├── run.storage ├── run.system ├── setting.py ├── templates ├── AI │ ├── maskrcnn.aarch64.config │ ├── maskrcnn.x86_64.config │ ├── resnet.aarch64.config │ └── resnet.x86_64.config ├── balance │ ├── balance.linux64.config │ └── stream │ │ └── main │ │ └── stream.linux64.config ├── compute │ ├── hpcg.aarch64.config │ ├── hpcg.x86_64.config │ ├── hpl.aarch64.config │ └── hpl.x86_64.config ├── network │ ├── osu.aarch64.config │ └── osu.x86_64.config ├── storage │ ├── ior.aarch64.config │ ├── ior.x86_64.config │ └── protocol │ │ ├── hadoop.aarch64.config │ │ ├── hadoop.x86_64.config │ │ ├── nfs.aarch64.config │ │ ├── nfs.x86_64.config │ │ ├── nfs_environment.md │ │ ├── nfs_environment.sh │ │ ├── posix.aarch64.config │ │ ├── posix.x86_64.config │ │ ├── warp.aarch64.config │ │ └── warp.x86_64.config └── system │ └── system.linux64.config └── utils ├── app.py ├── build.py ├── config.py ├── download.py ├── execute.py ├── install.py ├── invoke.py ├── machine.py ├── report_tmp.html ├── result.py ├── scheduler.py ├── score.py ├── standard_score.json └── tool.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.a 4 | *.com 5 | *.class 6 | *.dll 7 | *.exe 8 | *.o 9 | *.o.d 10 | *.py[ocd] 11 | *.so 12 | 13 | # Packages # 14 | ############ 15 | # it's better to unpack these files and commit the raw source 16 | # git has its own built in compression methods 17 | *.7z 18 | *.bz2 19 | *.bzip2 20 | *.dmg 21 | *.gz 22 | *.iso 23 | *.jar 24 | *.rar 25 | *.tar 26 | *.tbz2 27 | *.tgz 28 | *.zip 29 | 30 | # Python files # 31 | ################ 32 | # setup.py working directory 33 | build 34 | # sphinx build directory 35 | _build 36 | # setup.py dist directory 37 | dist 38 | doc/build 39 | doc/cdoc/build 40 | # Egg metadata 41 | *.egg-info 42 | # The shelf plugin uses this dir 43 | ./.shelf 44 | MANIFEST 45 | .cache 46 | pip-wheel-metadata 47 | .python-version 48 | 49 | # Logs and databases # 50 | ###################### 51 | *.log 52 | *.sql 53 | *.sqlite 54 | # Things specific to this project # 55 | ###################### 56 | env.sh 57 | build.sh 58 | test.py 59 | hostfile 60 | .vscode 61 | tmp 62 | downloads/* 63 | depend_install.sh 64 | build.sh 65 | .meta 66 | software/* 67 | test.sh 68 | *.swp 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hpcbenchmark是一个高性能集群计算性能评测工具集,本评测工具集引入与使用场景相关的性能指标,通过综合评分方法,为集群的计算、存储、网络和效率等关键维度,分别给出评价分数。 2 | 3 | 评测工具集准备了6大评测维度的工具模块,分别为: 4 | 5 | 1.计算性能维度 2.AI计算性能 3.存储性能维度 4.网络性能维度 5.系统能效维度 6.系统平衡性维度 6 | 7 | 用户可简单根据自己集群配置修改模板文件,即可对集群进行评测。 8 | 9 | # 项目目录结构 10 | 11 | 项目文件夹内包含以下几个文件和目录: 12 | 13 | ``` 14 | #模块测试目录 15 | benchmark 16 | #数据下载目录 17 | downloads 18 | #配置文件目录 19 | templates 20 | #测试结果目录 21 | result 22 | #软件安装目录 23 | software 24 | #初始环境文件 25 | init.sh 26 | #主程序 27 | hpcbench 28 | ``` 29 | 30 | # 项目测试环境 31 | HPCBench在下列环境分别通过测试: 32 | 1. X86 + NVIDIA GPU 33 | OS:CentOS 8 34 | CPU:Intel Platinum 8358 35 | GPU:NVIDIA A100 36 | 37 | 2. ARM + 昇腾NPU 38 | OS:openeuler 2203 39 | CPU: kunpeng 920 40 | NPU:Ascend 910 41 | 42 | # 评测工具集使用方法 43 | 44 | ## 依赖环境 45 | 46 | 运行环境:Python3 47 | 编译器:GCC-11.2.0 48 | MPI:OpenMPI-4.1.1 49 | CUDA:cuda-11.8 50 | 调度系统:slurm 51 | 52 | ## 安装hpcbenchmark 53 | 可以使用以下命令将hpcbenchmark仓库克隆到本地,并安装必须的依赖。 54 | 55 | ``` 56 | $ git clone https://github.com/SJTU-HPC/hpcbenchmarks.git 57 | $ conda env create -f requirements.yaml 58 | $ conda activate hpcbench 59 | ``` 60 | 61 | ## 初始化环境 62 | 用户需要根据具体集群配置简单修改初始环境文件`init.sh`,包括录入集群信息,以及调用GCC和OpenMPI命令 63 | 64 | 使用以下命令进行初始化环境操作: 65 | 66 | ``` 67 | $ source init.sh 68 | ``` 69 | 70 | ## 程序命令行参数 71 | 初始化完后,查看HPCBench的命令行参数: 72 | 73 | ``` 74 | $ ./hpcbench -h 75 | usage: hpcbench [-h] [--build] [--clean] [...] 76 | 77 | please put me into CASE directory, used for App Compiler/Clean/Run 78 | 79 | options: 80 | -h, --help show this help message and exit 81 | -v, --version get version info 82 | -use USE, --use USE Switch config file... 83 | -i, --info get machine info 84 | -l, --list get installed package info 85 | -install INSTALL [INSTALL ...], --install INSTALL [INSTALL ...] 86 | install dependency 87 | -remove REMOVE, --remove REMOVE 88 | remove software 89 | -find FIND, --find FIND 90 | find software 91 | -dp, --depend App dependency install 92 | -e, --env set environment App 93 | -b, --build compile App 94 | -cls, --clean clean App 95 | -r, --run run App 96 | -j, --job run job App 97 | -rb, --rbatch run batch App 98 | -d, --download Batch Download... 99 | -u, --update start update hpcbench... 100 | -check, --check start check hpcbench download url... 101 | -s, --score Calculate the score and output benchmark report 102 | 103 | ``` 104 | ## 模块测试示例 105 | 下面以``COMPUTE``测试模块中的``HPL``为例进行测试介绍: 106 | 107 | 该模块以两节点,每节点64核心512g运行内存配置进行计算,用户需要根据实际情况自行修改配置文件。 108 | 109 | ``` 110 | templates/compute/hpl.x86_64.config 111 | ``` 112 | 113 | ### 调用配置文件 114 | 115 | ``` 116 | $ ./hpcbench -use templates/compute/hpl.linux64.config 117 | Switch config file to templates/compute/hpl.linux64.config 118 | Successfully switched. config file saved in file .meta 119 | ``` 120 | 121 | ### 下载依赖文件 122 | 123 | ``` 124 | $ ./hpcbench -d 125 | ``` 126 | 127 | ### 安装依赖库 128 | 129 | ``` 130 | $ ./hpcbench -dp 131 | ``` 132 | 133 | ### 安装HPL 134 | 135 | ``` 136 | $ ./hpcbench -b 137 | ``` 138 | 139 | ### 提交作业 140 | 141 | ``` 142 | $ ./hpcbench -j 143 | ``` 144 | 145 | ### 查看测试结果 146 | hpl测试程序在计算完成后,在`result/compute`路径下会生成`hpl.txt`文件,查看可得知浮点计算速度为6303Gflops,符合精度要求。 147 | 148 | ``` 149 | $ tail -n30 result/compute/hpl.txt 150 | Column=000175872 Fraction=99.6% Gflops=6.307e+03 151 | Column=000176128 Fraction=99.7% Gflops=6.307e+03 152 | Column=000176384 Fraction=99.9% Gflops=6.307e+03 153 | ============================================================== 154 | T/V N NB P Q Time Gflops 155 | -------------------------------------------------------------- 156 | WR00R2R4 176640 256 8 16 582.89 6.3037e+03 157 | HPL_pdgesv() start time Tue Aug 22 00:24:39 2023 158 | 159 | HPL_pdgesv() end time Tue Aug 22 00:34:21 2023 160 | 161 | --VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV--VVV- 162 | Max aggregated wall time rfact . . . : 3.56 163 | + Max aggregated wall time pfact . . : 2.67 164 | + Max aggregated wall time mxswp . . : 2.50 165 | Max aggregated wall time update . . : 533.48 166 | + Max aggregated wall time laswp . . : 56.77 167 | Max aggregated wall time up tr sv . : 0.26 168 | -------------------------------------------------------------- 169 | ||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= 8.52254435e-04 ...... PASSED 170 | =========================================================== 171 | ``` 172 | 173 | ## 完成所有模块测试 174 | 175 | 模块测试前需要根据集群实际情况,修改对应模块下的配置文件,可执行快捷测试命令 176 | ``` 177 | # 计算性能 178 | $ ./run.compute 179 | 180 | # 网络性能 181 | $ ./run.network 182 | 183 | # 存储性能 184 | $ ./run.storage 185 | 186 | # AI计算性能 187 | $ ./run.AI 188 | 189 | # 系统平衡性 190 | $ ./run.balance 191 | 192 | # 系统能耗性 193 | $ ./run.system 194 | ``` 195 | 196 | ## 生成可视化报告 197 | 所有模块测试完后,执行以下命令可生成一个Report.html文件,可通过浏览器打开查看。 198 | 199 | ``` 200 | $ ./hpcbench -s 201 | ``` 202 | -------------------------------------------------------------------------------- /arm_conda.sh: -------------------------------------------------------------------------------- 1 | wget --no-check-certificate https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py39_4.9.2-Linux-aarch64.sh 2 | sh Miniconda3-py39_4.9.2-Linux-aarch64.sh 3 | -------------------------------------------------------------------------------- /batch_run.sh: -------------------------------------------------------------------------------- 1 | source ./init.sh 2 | ./hpcbench -e 3 | source ./env.sh 4 | 5 | cd $HPCbench_ROOT 6 | 7 | cd $RESULT_DIR 8 | exec 1>$RESULT_DIR/system.log 2>/dev/null 9 | # compute_efficiency 10 | echo "Calculating Compute_Effiency" 11 | CLUSTER_POWER=314.25 #w 12 | TOTAL_NODES=5 13 | TOTAL_CLUSTER_POWER=$(echo "scale=2; $CLUSTER_POWER*$TOTAL_NODES*0.875/1000"|bc) 14 | CLUSTER_HPL=$(python -c "from utils.result import extract_pflops;print(extract_pflops('$HPCbench_ROOT/result/compute/hpl.txt'))") #Pflops 15 | COMPUTE_EFFIENCY=$(echo "scale=2;$CLUSTER_HPL*1000/$TOTAL_CLUSTER_POWER"|bc) 16 | echo COMPUTE_EFFIENCY=$COMPUTE_EFFIENCY 17 | # IO_operation_rate 18 | echo "Calculating IO_OPERATION_RATE" 19 | IOPS=`cat $HPCbench_ROOT/result/storage/ior/iops.txt |grep write |awk 'NR==2 {print $3}'` 20 | STORAGE_POWER=384 21 | STORAGE_POWER=$(echo "scale=2; $STORAGE_POWER*0.8"|bc) 22 | IO_operation_rate=$(echo "scale=2; $IOPS/$STORAGE_POWER/1000"|bc) 23 | echo "IO_operation_rate=$IO_operation_rate" -------------------------------------------------------------------------------- /data.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | stream_mpi.c/2014.10.21 https://www.cs.virginia.edu/stream/FTP/Code/Versions/stream_mpi.c 6 | stream_mpi.f/2014.2.14 https://www.cs.virginia.edu/stream/FTP/Code/Versions/stream_mpi.f 7 | mysecond.c/2009.2.19 https://www.cs.virginia.edu/stream/FTP/Code/mysecond.c 8 | 9 | [DEPENDENCY] 10 | set -x 11 | set -e 12 | 13 | export CC=`which gcc` 14 | export CXX=`which g++` 15 | export FC=`which gfortran` 16 | 17 | mkdir -p ${HPCbench_TMP}/stream-1.8 18 | cd ${HPCbench_TMP} 19 | mv ${HPCbench_DOWNLOAD}/stream_mpi.c ${HPCbench_TMP}/stream-1.8 20 | mv ${HPCbench_DOWNLOAD}/stream_mpi.f ${HPCbench_TMP}/stream-1.8 21 | mv ${HPCbench_DOWNLOAD}/mysecond.c ${HPCbench_TMP}/stream-1.8 22 | 23 | [ENV] 24 | module purge 25 | module load intel-oneapi-compilers/2021.4.0 26 | module load intel-oneapi-mpi/2021.4.0 27 | export CC=mpiicc FC=mpiifort F77=mpiifort 28 | 29 | [APP] 30 | app_name = stream 31 | build_dir = ${HPCbench_TMP}/stream-1.8 32 | binary_dir = ${HPCbench_LIBS}/stream-1.8 33 | case_dir = 34 | 35 | [BUILD] 36 | mpiicc -O3 -ffreestanding -qopenmp -qopt-streaming-stores=always \ 37 | -DSTREAM_ARRAY_SIZE=8650752 -DNTIMES=20 -DVERBOSE \ 38 | stream_mpi.c -o stream_mpi_c 39 | icc -c mysecond.c 40 | mpiifort -c stream_mpi.f 41 | mpiifort -O3 -qopenmp -qopt-streaming-stores=always stream_mpi.o mysecond.o -o stream_mpi_f 42 | mkdir -p ${HPCbench_LIBS}/stream-1.8 43 | cp -r stream_mpi_* ${HPCbench_LIBS}/stream-1.8 44 | 45 | [RUN] 46 | run = ${HPCbench_LIBS}/stream-1.8/stream_mpi_f 47 | binary = 48 | nodes = 1 49 | -------------------------------------------------------------------------------- /hpcbench: -------------------------------------------------------------------------------- 1 | hpcbench.py -------------------------------------------------------------------------------- /hpcbench.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | from utils.scheduler import Scheduler 5 | 6 | parser = argparse.ArgumentParser(description=f'please put me into CASE directory, used for App Compiler/Clean/Run', 7 | usage='%(prog)s [-h] [--build] [--clean] [...]') 8 | parser.add_argument("-v","--version", help=f"get version info", action="store_true") 9 | parser.add_argument("-use","--use", help="Switch config file...", nargs=1) 10 | parser.add_argument("-i","--info", help=f"get machine info", action="store_true") 11 | parser.add_argument("-l","--list", help=f"get installed package info", action="store_true") 12 | parser.add_argument("-install","--install", help=f"install dependency", nargs='+') 13 | parser.add_argument("-remove","--remove", help=f"remove software", nargs=1) 14 | parser.add_argument("-find","--find", help=f"find software", nargs=1) 15 | # dependency install 16 | parser.add_argument("-dp","--depend", help=f"App dependency install", action="store_true") 17 | parser.add_argument("-e","--env", help=f"set environment App", action="store_true") 18 | parser.add_argument("-b","--build", help=f"compile App", action="store_true") 19 | parser.add_argument("-cls","--clean", help=f"clean App", action="store_true") 20 | parser.add_argument("-r","--run", help=f"run App", action="store_true") 21 | parser.add_argument("-j","--job", help=f"run job App", action="store_true") 22 | # batch run 23 | parser.add_argument("-rb","--rbatch", help=f"run batch App", action="store_true") 24 | # batch download 25 | parser.add_argument("-d","--download", help="Batch Download...", action="store_true") 26 | # update modulefile path 27 | parser.add_argument("-u","--update", help="start update hpcbench...", action="store_true") 28 | # check download url is good or not 29 | parser.add_argument("-check","--check", help="start check hpcbench download url...", action="store_true") 30 | parser.add_argument("-s","--score", help="Calculate the score and output benchmark report", action="store_true") 31 | args = parser.parse_args() 32 | 33 | 34 | if __name__ == '__main__': 35 | Scheduler(args).main() -------------------------------------------------------------------------------- /init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## User environment 4 | #module purge 5 | #module load openmpi/4.0.3-gcc-10.3.1 6 | export UCX_NET_DEVICES=mlx5_0:1 7 | export OMPI_MCA_btl=self,vader,tcp 8 | 9 | ## Cluster info 10 | export CLUSTER_NAME=kp920 11 | export GPU_PARTITION=asend01 12 | export CPU_PARTITION=arm128c256g 13 | export PARA_STORAGE_PATH=/lustre 14 | export TOTAL_NODES=5 15 | # A computing node's total cores 16 | export CPU_MAX_CORES=128 17 | # A computing node's power (W) 18 | export CLUSTER_POWER=314.25 19 | # The storage system total power (KW) 20 | export STORAGE_POWER=192 21 | # need testing 22 | export CLUSTER_BURSTBUFFER=111616 23 | export BW_BURSTBUFFER=12100.38 24 | 25 | ## Defult setting 26 | CUR_PATH=$(pwd) 27 | export HPCbench_ROOT=${CUR_PATH} 28 | export HPCbench_COMPILER=${CUR_PATH}/software/compiler 29 | export HPCbench_MPI=${CUR_PATH}/software/mpi 30 | export HPCbench_LIBS=${CUR_PATH}/software/libs 31 | export HPCbench_UTILS=${CUR_PATH}/software/utils 32 | export HPCbench_DOWNLOAD=${CUR_PATH}/downloads 33 | export HPCbench_MODULES=${CUR_PATH}/software/modulefiles 34 | export HPCbench_MODULEDEPS=${CUR_PATH}/software/moduledeps 35 | export HPCbench_BENCHMARK=${CUR_PATH}/benchmark 36 | export HPCbench_TMP=${CUR_PATH}/tmp 37 | export HPCbench_RESULT=${CUR_PATH}/result 38 | export DOWNLOAD_TOOL=${CUR_PATH}/package/common/download.sh 39 | export CHECK_DEPS=${CUR_PATH}/package/common/check_deps.sh 40 | export CHECK_ROOT=${CUR_PATH}/package/common/check_root.sh 41 | export gcc_version_number=$(gcc --version |grep GCC | awk '{ match($0, /[0-9]+\.[0-9]+\.[0-9]+/, version); print version[0] }') 42 | export arch=$(lscpu |grep Architecture|awk '{print $2}') 43 | export HADOOP_DATA=${CUR_PATH}/benchmark/storage/protocol/hadoop_data 44 | 45 | mkdir -p tmp downloads software 46 | if [ ! -d benchmark ];then 47 | mkdir -p benchmark/AI benchmark/compute benchmark/jobs benchmark/network benchmark/storage/ior benchmark/storage/protocol 48 | fi 49 | if [ ! -d result ];then 50 | mkdir -p result/AI result/balance result/compute result/network result/storage/ior result/storage/protocol result/system 51 | fi 52 | -------------------------------------------------------------------------------- /package/common/check_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #循环遍历脚本入参,查看是否存在 3 | if [ $# -eq 0 ];then 4 | echo "Usage: $0 para1 para2" 5 | exit 1 6 | fi 7 | flag=0 8 | result='' 9 | echo "Start checking dependency..." 10 | for i in $* #在$*中遍历参数,此时每个参数都是独立的,会遍历$#次 11 | do 12 | result=$(env|grep $i) 13 | if [ -z "$result" ];then 14 | echo "Please load $i first." 15 | flag=1 16 | else 17 | echo "$i detected." 18 | fi 19 | done 20 | 21 | if [ $flag == 0 ]; then 22 | echo 'CHECK SUCCESS' 23 | else 24 | echo 'CHECK FAILED' 25 | exit 1 26 | fi -------------------------------------------------------------------------------- /package/common/check_root.sh: -------------------------------------------------------------------------------- 1 | if [[ $EUID -ne 0 ]]; then 2 | echo "Warning:Permissions need to be elevated, some package may need to be installed by root or sudo." 3 | return 1 4 | fi 5 | return 0 -------------------------------------------------------------------------------- /package/common/download.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | download_path=$HPCbench_DOWNLOAD 3 | type_=wget 4 | url= 5 | filename= 6 | OPTIND=1 7 | 8 | while getopts ":u:f:t:" opt; 9 | do 10 | case $opt in 11 | #下载的链接 12 | u) url=$OPTARG;; 13 | #使用的下载类型,默认wget 14 | t) type_=$OPTARG;; 15 | #下载后重命名,可不添加 16 | f) filename=$OPTARG;; 17 | ?) echo -e "\033[0;31m[Error]\033[0m:Unknown parameter:"$opt 18 | exit 0;; 19 | esac 20 | 21 | done 22 | 23 | if [ ! "$url" ];then 24 | echo "Error: No available download link found" 25 | exit 0 26 | fi 27 | #如果需要重命名,则修改exist 28 | if [ "$filename" ];then 29 | exist_path=$download_path/$filename 30 | else 31 | if [ "$type_" == "git" ];then 32 | url=$(echo $url|sed 's/\.[^./]*$//') 33 | fi 34 | exist_path=$download_path/${url##*/} 35 | fi 36 | 37 | #判断文件是否存在 38 | if [ ! -e $exist_path ];then 39 | if [ "$type_" == "wget" ];then 40 | echo -e "\033[0;32m[Info]\033[0m:Using commands: wget $url -O $exist_path --no-check-certificate" 41 | wget $url -O $exist_path --no-check-certificate || rm -rf $exist_path 42 | elif [ "$type_" == "git" ];then 43 | echo -e "\033[0;32m[Info]\033[0m:Using commands: git clone $url $exist_path" 44 | git clone $url $exist_path 45 | else 46 | echo -e "\033[0;31m[Error]\033[0m:Unsupported download mode:"$type_ 47 | exit 0 48 | fi 49 | 50 | #下载失败 51 | if [ $? != 0 ];then 52 | rm -rf $exist_path 53 | echo -e "\033[0;31m[Error]\033[0m:Download failed:"$url 54 | exit 0 55 | fi 56 | else 57 | echo -e "\033[0;32m[Info]\033[0m:"$exist_path" already exist" 58 | fi 59 | -------------------------------------------------------------------------------- /package/ior/master/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | 5 | #Download the IOR source: 6 | git clone https://github.com/hpc/ior 7 | #Compile the software: 8 | cd ior 9 | ./bootstrap 10 | ./configure CC=mpicc --prefix=$1 11 | 12 | make 13 | make install 14 | -------------------------------------------------------------------------------- /package/openblas/0.3.18/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | . ${DOWNLOAD_TOOL} -u https://github.com/xianyi/OpenBLAS/archive/refs/tags/v0.3.18.tar.gz -f OpenBLAS-0.3.18.tar.gz 5 | cd ${HPCbench_TMP} 6 | rm -rf OpenBLAS-0.3.18 7 | tar -xzvf ${HPCbench_DOWNLOAD}/OpenBLAS-0.3.18.tar.gz 8 | cd OpenBLAS-0.3.18 9 | make -j 10 | make PREFIX=$1 install 11 | -------------------------------------------------------------------------------- /package/osu/7.0.1/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | set -e 4 | . ${DOWNLOAD_TOOL} -u http://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-7.0.1.tar.gz -f osu-micro-benchmarks-7.0.1.tar.gz 5 | cd ${HPCbench_TMP} 6 | tar -xvf ${HPCbench_DOWNLOAD}/osu-micro-benchmarks-7.0.1.tar.gz 7 | cd osu-micro-benchmarks-7.0.1/ 8 | ./configure --prefix=$1 CC=mpicc CXX=mpicxx 9 | make 10 | make install 11 | -------------------------------------------------------------------------------- /requirements.yaml: -------------------------------------------------------------------------------- 1 | name: hpcbench 2 | channels: 3 | - defaults 4 | - conda-forge 5 | - bioconda 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - _openmp_mutex=5.1=1_gnu 9 | - bzip2=1.0.8=h5eee18b_6 10 | - ca-certificates=2024.3.11=h06a4308_0 11 | - ld_impl_linux-64=2.38=h1181459_1 12 | - libffi=3.4.4=h6a678d5_1 13 | - libgcc-ng=11.2.0=h1234567_1 14 | - libgomp=11.2.0=h1234567_1 15 | - libstdcxx-ng=11.2.0=h1234567_1 16 | - libuuid=1.41.5=h5eee18b_0 17 | - ncurses=6.4=h6a678d5_0 18 | - openssl=3.0.13=h7f8727e_2 19 | - pip=24.0=py310h06a4308_0 20 | - python=3.10.14=h955ad1f_1 21 | - readline=8.2=h5eee18b_0 22 | - setuptools=69.5.1=py310h06a4308_0 23 | - sqlite=3.45.3=h5eee18b_0 24 | - tk=8.6.14=h39e8969_0 25 | - tzdata=2024a=h04d1e81_0 26 | - wheel=0.43.0=py310h06a4308_0 27 | - xz=5.4.6=h5eee18b_1 28 | - zlib=1.2.13=h5eee18b_1 29 | - pip: 30 | - environs==11.0.0 31 | - jinja2==3.1.4 32 | - loguru==0.7.2 33 | - markupsafe==2.1.5 34 | - marshmallow==3.21.3 35 | - packaging==24.0 36 | - prettytable==3.10.0 37 | - pyecharts==2.0.5 38 | - python-dotenv==1.0.1 39 | - simplejson==3.19.2 40 | - wcwidth==0.2.13 41 | -------------------------------------------------------------------------------- /result/test_result.json: -------------------------------------------------------------------------------- 1 | { 2 | "compute":{ 3 | "HPL":1.69, 4 | "HPCG":30 5 | }, 6 | "AI":{ 7 | "infering":5216, 8 | "training":1085 9 | }, 10 | "storage":{ 11 | "single_client_single_fluence":2, 12 | "single_client_multi_fluence":5.6, 13 | "aggregation_bandwidth":40, 14 | "IO_rate":2000000, 15 | "multi_request":57.2 16 | }, 17 | "network":{ 18 | "P2P_network_bandwidth":100, 19 | "P2P_message_latency":"1/1.2", 20 | "ratio":0.5 21 | }, 22 | "system":{ 23 | "compute_efficiency":7.3, 24 | "IO_operation_rate":50 25 | }, 26 | "balance":{ 27 | "mem2cpu":4, 28 | "buffer2mem":0.49, 29 | "file2buffer":30, 30 | "mem2buffer":3526, 31 | "buffer2file":3 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /result/test_score.json: -------------------------------------------------------------------------------- 1 | {"compute": {"HPL": {"name": "HPL双精度浮点计算性能", "weights": 0.6, "large": 148.6, "medium": 14.01, "small": 6.0, "mini": 0.3, "score": 35.20833333333332}, "HPCG": {"name": "HPCG双精度浮点计算性能", "weights": 0.4, "large": 2725.75, "medium": 355.44, "small": 175, "mini": 6, "score": 21.428571428571427}, "issue_score": 28.865864453564903}, "AI": {"infering": {"name": "图像推理任务的计算性能", "weights": 0.5, "large": 2000000, "medium": 1500000, "small": 100000, "mini": 750, "score": 6.52}, "training": {"name": "图像训练任务的计算性能", "weights": 0.5, "large": 10802, "medium": 254, "small": 10000, "mini": 560, "score": 13.5625}, "issue_score": 9.403589740093938}, "storage": {"single_client_single_fluence": {"name": "文件系统单客户端单流带宽", "weights": 0.2, "large": 8, "medium": 9, "small": 6, "mini": 1, "score": 41.666666666666664}, "single_client_multi_fluence": {"name": "文件系统单客户端多流带宽", "weights": 0.2, "large": 13, "medium": 21, "small": 11, "mini": 5, "score": 63.636363636363626}, "aggregation_bandwidth": {"name": "文件系统聚合带宽", "weights": 0.2, "large": 2500, "medium": 1760, "small": 200, "mini": 80, "score": 25.0}, "IO_rate": {"name": "文件系统聚合IO操作速率", "weights": 0.2, "large": 26000000, "medium": 14000000, "small": 17500000, "mini": 4300000, "score": 14.285714285714285}, "multi_request": {"name": "多协议平均访问效率", "weights": 0.2, "large": 62.0, "medium": 64.6, "small": 65, "mini": 65, "score": 100}, "issue_score": 39.37922967655749}, "network": {"P2P_network_bandwidth": {"name": "点对点网络带宽", "weights": 0.4, "large": 200, "medium": 200, "small": 200, "mini": 100, "score": 62.5}, "P2P_message_latency": {"name": "点对点消息延迟", "weights": 0.3, "large": "1/1.67", "medium": "1/3.7", "small": "1/4.0", "mini": "1/2.0", "score": 100}, "ratio": {"name": "网络对分带宽与注入带宽比值", "weights": 0.3, "large": 1.022, "medium": 2.06, "small": 1.5, "mini": 1, "score": 41.666666666666664}, "issue_score": 63.721887926789826}, "system": {"compute_efficiency": {"name": "单位功耗的浮点计算性能", "weights": 0.6, "large": 14.719, "medium": 3.56, "small": 20, "mini": 6, "score": 45.625}, "IO_operation_rate": {"name": "单位功耗的文件系统聚合IO速率", "weights": 0.4, "large": 2.57, "medium": 3.55, "small": 200, "mini": 100, "score": 31.25}, "issue_score": 39.215859247314846}, "balance": {"mem2cpu": {"name": "内存容量与处理器核心数比", "weights": 0.2, "large": 9.64, "medium": 1.66, "small": 4, "mini": 3.93, "score": 100}, "buffer2mem": {"name": "BurstBuffer与内存的容量比", "weights": 0.2, "large": 3.78, "medium": 2.3, "small": 2, "mini": 2.7, "score": 30.624999999999996}, "file2buffer": {"name": "并行文件系统与BurstBuffer的容量比", "weights": 0.2, "large": 23.87, "medium": 15, "small": 10, "mini": 17, "score": 100}, "mem2buffer": {"name": "内存与BurstBuffer的带宽比", "weights": 0.2, "large": 6000, "medium": 4000, "small": 1000, "mini": 125, "score": 100}, "buffer2file": {"name": "BurstBuffer与并行文件系统的带宽比", "weights": 0.2, "large": 4, "medium": 3, "small": 10, "mini": 5.5, "score": 37.5}, "issue_score": 64.86665050691026}, "sum_score": 40.90884692520521} -------------------------------------------------------------------------------- /run.AI: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 加载环境 4 | source init.sh 5 | 6 | # maskrcnn 7 | ./hpcbench -use templates/AI/maskrcnn.$arch.config 8 | ./hpcbench -d 9 | ./hpcbench -dp 10 | ./hpcbench -b 11 | ./hpcbench -rb 12 | ./hpcbench -j 13 | 14 | # hpcg 15 | ./hpcbench -use templates/compute/resnet.$arch.config 16 | ./hpcbench -d 17 | ./hpcbench -dp 18 | ./hpcbench -b 19 | ./hpcbench -rb 20 | ./hpcbench -j 21 | -------------------------------------------------------------------------------- /run.balance: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source init.sh 4 | #scratch bw test 5 | module use ./software/moduledeps/gcc${gcc_version_number}/ 6 | module load ior/master 7 | 8 | cat << \EOF > scratch_bw.slurm 9 | #!/bin/bash 10 | #SBATCH --job-name="aggreagate_bandwidth" 11 | #SBATCH -N 2 12 | #SBATCH --ntasks-per-node=64 13 | #SBATCH --output=logs/scratch_bandwidth.out 14 | #SBATCH --error=logs/scratch_bandwidth.out 15 | #SBATCH -p {{ CPU_PARTITION }} 16 | #SBATCH --exclusive 17 | 18 | NCT=2 19 | 20 | # Date Stamp for benchmark 21 | SEQ=64 22 | MAXPROCS=128 23 | DATA_SIZE=128 24 | 25 | BASE_DIR=$SCRATCH/iortest 26 | RESULT_DIR=$HPCbench_ROOT/result/balance 27 | 28 | NCT=2 #`grep -v ^# hfile |wc -l` 29 | DS=`date +"%F_%H:%M:%S"` 30 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 31 | 32 | while [ ${SEQ} -le ${MAXPROCS} ]; do 33 | NPROC=`expr ${NCT} \* ${SEQ}` 34 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 35 | # Alternatively, set to a static value and let the data size increase. 36 | # BSZ="1g" 37 | # BSZ="${DATA_SIZE}" 38 | mpirun \ 39 | ior -v -w -r -i 4 -F \ 40 | -o ${BASE_DIR}/ior-test3.file \ 41 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/aggregation_bandwidth.txt 42 | SEQ=`expr ${SEQ} \* 2` 43 | done 44 | EOF 45 | 46 | sbatch scratch_bw.slurm 47 | 48 | 49 | ./hpcbench -use templates/balance/balance.linux64.config 50 | ./hpcbench -rb 51 | 52 | -------------------------------------------------------------------------------- /run.compute: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 加载环境 4 | source init.sh 5 | 6 | # hpl 7 | ./hpcbench -install openblas/0.3.18 gcc 8 | ./hpcbench -use templates/compute/hpl.$arch.config 9 | ./hpcbench -d 10 | ./hpcbench -dp 11 | ./hpcbench -cls 12 | ./hpcbench -b 13 | ./hpcbench -j 14 | 15 | # hpcg 16 | ./hpcbench -use templates/compute/hpcg.$arch.config 17 | ./hpcbench -d 18 | ./hpcbench -dp 19 | ./hpcbench -cls 20 | ./hpcbench -b 21 | ./hpcbench -j 22 | 23 | -------------------------------------------------------------------------------- /run.network: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 加载环境 4 | source init.sh 5 | 6 | # osu 7 | ./hpcbench -use templates/network/osu.$arch.config 8 | ./hpcbench -d 9 | ./hpcbench -dp 10 | ./hpcbench -cls 11 | ./hpcbench -b 12 | ./hpcbench -j 13 | -------------------------------------------------------------------------------- /run.storage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 加载环境 4 | source init.sh 5 | 6 | # ior 7 | ./hpcbench -use templates/storage/ior.$arch.config 8 | ./hpcbench -d 9 | ./hpcbench -dp 10 | ./hpcbench -cls 11 | ./hpcbench -b 12 | ./hpcbench -j 13 | 14 | # protocol 15 | ## posix 16 | ./hpcbench -use templates/storage/protocol/posix.$arch.config 17 | ./hpcbench -rb 18 | 19 | ## hadoop 20 | ./hpcbench -use templates/storage/protocol/hadoop.$arch.config 21 | ./hpcbench -d 22 | ./hpcbench -dp 23 | ./hpcbench -b 24 | ./hpcbench -rb 25 | 26 | ## warp 27 | ./hpcbench -use templates/storage/protocol/warp.$arch.config 28 | ./hpcbench -d 29 | ./hpcbench -dp 30 | ./hpcbench -b 31 | ./hpcbench -rb 32 | 33 | ## nfs should be run with root or sudo 34 | ./hpcbench -use templates/storage/protocol/nfs.$arch.config 35 | ./hpcbench -d 36 | ./hpcbench -dp 37 | ./hpcbench -b 38 | ./hpcbench -rb 39 | -------------------------------------------------------------------------------- /run.system: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source init.sh 4 | ./hpcbench -use templates/system/system.linux64.config 5 | ./hpcbench -rb 6 | 7 | -------------------------------------------------------------------------------- /setting.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import sys 3 | from os.path import dirname, abspath, join 4 | from environs import Env 5 | from loguru import logger 6 | import shutil 7 | 8 | env = Env() 9 | env.read_env() 10 | 11 | # definition of flags 12 | IS_WINDOWS = platform.system().lower() == 'windows' 13 | 14 | # definition of dirs 15 | ROOT_DIR = dirname(abspath(__file__)) 16 | LOG_DIR = join(ROOT_DIR, env.str('LOG_DIR', 'logs')) 17 | 18 | # definition of environments 19 | CLUSTER_SCALE = None 20 | CLUSTER_NAME = env.str('CLUSTER_NAME') 21 | APP_DEBUG = env.bool('APP_DEBUG', False) 22 | APP_CONFIG = env.str('APP_CONFIG', None) 23 | 24 | HPCbench_RESULT = env.str('HPCbench_RESULT',join(ROOT_DIR, 'result')) 25 | HPCbench_BENCHMARK = env.str('HPCbench_BENCHMARK',join(ROOT_DIR, 'benchmark')) 26 | 27 | GPU_PARTITION = env.str('GPU_PARTITION') 28 | CPU_PARTITION = env.str('CPU_PARTITION') 29 | CPU_MAX_CORES = env.str('CPU_MAX_CORES') 30 | HADOOP_DATA = env.str('HADOOP_DATA') 31 | CLUSTER_POWER = env.str('CLUSTER_POWER', 10000) 32 | STORAGE_POWER = env.str('STORAGE_POWER', 10000) 33 | CLUSTER_BURSTBUFFER = env.str('CLUSTER_BURSTBUFFER', 10000) 34 | # CLUSTER_MEMORY = env.int('CLUSTER_MEMORY', 10000) 35 | BW_BURSTBUFFER = env.str('BW_BURSTBUFFER', 10000) 36 | PARA_STORAGE_PATH = env.str('PARA_STORAGE_PATH') 37 | TOTAL_NODES = env.str('TOTAL_NODES') 38 | 39 | ENABLE_LOG_FILE = env.bool('ENABLE_LOG_FILE', True) 40 | ENABLE_LOG_RUNTIME_FILE = env.bool('ENABLE_LOG_RUNTIME_FILE', True) 41 | ENABLE_LOG_ERROR_FILE = env.bool('ENABLE_LOG_ERROR_FILE', True) 42 | 43 | LOG_LEVEL = "DEBUG" if APP_DEBUG else "INFO" 44 | LOG_ROTATION = env.str('LOG_ROTATION', '100MB') 45 | LOG_RETENTION = env.str('LOG_RETENTION', '1 week') 46 | 47 | logger.remove() 48 | logger.add(sys.stderr, level='INFO') 49 | 50 | if ENABLE_LOG_FILE: 51 | if ENABLE_LOG_RUNTIME_FILE: 52 | logger.add(env.str('LOG_RUNTIME_FILE', join(LOG_DIR, 'runtime.log')), 53 | level=LOG_LEVEL, rotation=LOG_ROTATION, retention=LOG_RETENTION) 54 | if ENABLE_LOG_ERROR_FILE: 55 | logger.add(env.str('LOG_ERROR_FILE', join(LOG_DIR, 'error.log')), 56 | level='ERROR', rotation=LOG_ROTATION) 57 | else: 58 | shutil.rmtree(LOG_DIR, ignore_errors=True) 59 | -------------------------------------------------------------------------------- /templates/AI/maskrcnn.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | train2017.zip http://images.cocodataset.org/zips/train2017.zip 6 | val2017.zip http://images.cocodataset.org/zips/val2017.zip 7 | annotations_trainval2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip 8 | maskrcnn.sif https://afdata.sjtu.edu.cn/files/maskrcnn_latest.sif 9 | 10 | [DEPENDENCY] 11 | mkdir -p ./benchmark/AI/maskrcnn/data 12 | 13 | cp -rfv ./downloads/train2017.zip ./benchmark/AI/maskrcnn/data 14 | cp -rfv ./downloads/val2017.zip ./benchmark/AI/maskrcnn/data/ 15 | cp -rfv ./downloads/annotations_trainval2017.zip ./benchmark/AI/maskrcnn/data/ 16 | cd ./benchmark/AI/maskrcnn/data 17 | unzip train2017.zip 18 | unzip val2017.zip 19 | unzip annotations_trainval2017.zip 20 | cd .. 21 | 22 | [ENV] 23 | 24 | [APP] 25 | app_name = maskrcnn 26 | build_dir = ${HPCbench_ROOT}/benchmark/AI/maskrcnn 27 | binary_dir = ${HPCbench_ROOT}/benchmark/AI/maskrcnn 28 | case_dir = ${HPCbench_ROOT}/benchmark/AI/maskrcnn 29 | 30 | [BUILD] 31 | # MaskRCNN for Ascend 32 | 33 | ## environment 34 | ### miniconda-aarch64 35 | ### conda environment 36 | conda create -n maskrcnn-torch1.11 python=3.7 37 | conda activate maskrcnn-torch1.11 38 | 39 | ### dependency 40 | pip3 install attrs numpy decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py tqdm pyyaml wheel typing_extensions 41 | 42 | ### maskrcnn src 43 | cd ${HPCbench_ROOT}/benchmark/AI/maskrcnn 44 | git clone https://gitee.com/ascend/ModelZoo-PyTorch.git 45 | cd ModelZoo-PyTorch/PyTorch/built-in/cv/detection/MaskRCNN_for_Pytorch/ 46 | 47 | ### torch-1.11 48 | wget --no-check-certificate https://repo.huaweicloud.com/kunpeng/archive/Ascend/PyTorch/torch-1.11.0-cp37-cp37m-linux_aarch64.whl 49 | pip3 install torch-1.11.0-cp37-cp37m-linux_aarch64.whl 50 | 51 | ### extra requirements 52 | pip install -r requirements.txt 53 | 54 | ### cocoapi installation 55 | git clone https://github.com/cocodataset/cocoapi.git 56 | cd cocoapi/PythonAPI 57 | 58 | python setup.py build_ext install 59 | cd ../.. 60 | 61 | ### torch-npu torchvision apex installation 62 | wget --no-check-certificate https://gitee.com/ascend/pytorch/releases/download/v5.0.rc1-pytorch1.11.0/torch_npu-1.11.0-cp37-cp37m-linux_aarch64.whl 63 | pip3 install torch_npu-1.11.0-cp37-cp37m-linux_aarch64.whl 64 | 65 | git clone https://github.com/pytorch/vision.git 66 | cd vision 67 | git checkout v0.12.0 68 | python setup.py bdist_wheel 69 | cd dist 70 | pip3 install torchvision-0.12.*.whl 71 | cd ../.. 72 | 73 | git clone -b v0.12.0 https://gitee.com/ascend/vision.git vision_npu 74 | cd vision_npu 75 | source /opt/Ascend/ascend-toolkit/set_env.sh 76 | python setup.py bdist_wheel 77 | cd dist 78 | pip install torchvision_npu-0.12.*.whl 79 | cd ../.. 80 | 81 | pip3 install apex --no-index --find-links https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindX/OpenSource/pytorch1_11_0/index.html --trusted-host ascend-repo.obs.cn-east-2.myhuaweicloud.com 82 | 83 | ### maskrcnn 84 | python setup.py build develop 85 | 86 | [RUN] 87 | binary = maskrcnn 88 | 89 | [BATCH] 90 | ## training 91 | bash test/train_full_8p.sh --data_path=${HPCbench_ROOT}/benchmark/AI/maskrcnn/data 92 | 93 | -------------------------------------------------------------------------------- /templates/AI/maskrcnn.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | train2017.zip http://images.cocodataset.org/zips/train2017.zip 6 | val2017.zip http://images.cocodataset.org/zips/val2017.zip 7 | annotations_trainval2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip 8 | maskrcnn.sif https://afdata.sjtu.edu.cn/files/maskrcnn_latest.sif 9 | 10 | [DEPENDENCY] 11 | mkdir -p ./benchmark/AI/maskrcnn/data 12 | 13 | cp -rfv ./downloads/train2017.zip ./benchmark/AI/maskrcnn/data 14 | cp -rfv ./downloads/val2017.zip ./benchmark/AI/maskrcnn/data/ 15 | cp -rfv ./downloads/annotations_trainval2017.zip ./benchmark/AI/maskrcnn/data/ 16 | cd ./benchmark/AI/maskrcnn/data 17 | unzip train2017.zip 18 | unzip val2017.zip 19 | unzip annotations_trainval2017.zip 20 | cd .. 21 | git clone https://github.com/NVIDIA/DeepLearningExamples.git 22 | mv ${HPCbench_ROOT}/benchmark/AI/maskrcnn/data ${HPCbench_ROOT}/benchmark/AI/maskrcnn/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/pytorch 23 | 24 | [ENV] 25 | 26 | [APP] 27 | app_name = maskrcnn 28 | build_dir = ${HPCbench_ROOT}/benchmark/AI/maskrcnn 29 | binary_dir = ${HPCbench_ROOT}/benchmark/AI/maskrcnn 30 | case_dir = ${HPCbench_ROOT}/benchmark/AI/maskrcnn 31 | 32 | [BUILD] 33 | 34 | [RUN] 35 | binary = maskrcnn 36 | 37 | [JOB1] 38 | #!/bin/bash 39 | #SBATCH -J maskrcnn 40 | #SBATCH -p {{ GPU_PARTITION }} #GPU partition 41 | #SBATCH -N 1 42 | #SBATCH -n 64 43 | #SBATCH --gres=gpu:4 44 | #SBATCH --exclusive 45 | #SBATCH -o result/AI/maskrcnn.txt 46 | 47 | image=${HPCbench_ROOT}/downloads/maskrcnn_latest.sif 48 | cd ${HPCbench_ROOT}/benchmark/AI/maskrcnn/DeepLearningExamples/PyTorch/Segmentation/MaskRCNN/pytorch 49 | mkdir -p results 50 | singularity exec --nv --bind `pwd`:/datasets,`pwd`/results:/results ${image} bash -c "./scripts/train.sh" 51 | 52 | -------------------------------------------------------------------------------- /templates/AI/resnet.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | ILSVRC2012_img_val https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar 6 | resnet50_v1.pb https://zenodo.org/record/2535873/files/resnet50_v1.pb 7 | val_map.txt https://github.com/microsoft/Swin-Transformer/files/8529898/val_map.txt 8 | 9 | [DEPENDENCY] 10 | mkdir -p ./benchmark/AI/resnet/data/val 11 | tar -xvf ./downloads/ILSVRC2012_img_val.tar -C ./benchmark/AI/resnet/data/val/ 12 | cp -rfv ./downloads/resnet50_v1.pb ./benchmark/AI/resnet/data/ 13 | cp -rfv ./downloads/val_map.txt ./benchmark/AI/resnet/data/ 14 | cd ./benchmark/AI/resnet/ 15 | git clone https://gitee.com/ascend/ModelZoo-PyTorch.git 16 | 17 | [ENV] 18 | 19 | [APP] 20 | app_name = resnet 21 | build_dir = ${HPCbench_ROOT}/benchmark/AI/resnet 22 | binary_dir = ${HPCbench_ROOT}/benchmark/AI/resnet 23 | case_dir = ${HPCbench_ROOT}/benchmark/AI/resnet 24 | 25 | [BUILD] 26 | # ResNet50 MLperf for Ascend 27 | ## environment 28 | ### miniconda-aarch64 29 | wget --no-check-certificate https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py39_4.9.2-Linux-aarch64.sh 30 | sh Miniconda3-py39_4.9.2-Linux-aarch64.sh 31 | source activate 32 | ### conda environment 33 | conda create -n resnet50-torch1.11 python=3.7 34 | conda activate resnet50-torch1.11 35 | ### dependency 36 | pip3 install attrs numpy decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py tqdm pyyaml wheel typing_extensions cloudpickle tornado synr==0.5.0 37 | cd ${HPCbench_ROOT}/benchmark/AI/resnet/ModelZoo-PyTorch/ACL_PyTorch/built-in/cv/Resnet50_Pytorch_Infer 38 | pip3 install -r requirements.txt 39 | python3 imagenet_torch_preprocess.py resnet ${HPCbench_ROOT}/benchmark/AI/resnet/data/val/ ./prep_dataset 40 | wget --no-check-certificate https://download.pytorch.org/models/resnet50-0676ba61.pth 41 | source /opt/Ascend/ascend-toolkit/set_env.sh 42 | atc --model=resnet50_official.onnx --framework=5 --output=resnet50_bs64 --input_format=NCHW --input_shape="actual_input_1:64,3,224,224" --enable_small_channel=1 --log=error --soc_version=Ascend910B --insert_op_conf=aipp_resnet50.aippconfig 43 | wget --no-check-certificate https://aisbench.obs.myhuaweicloud.com/packet/ais_bench_infer/0.0.2/aclruntime-0.0.2-cp37-cp37m-linux_aarch64.whl 44 | wget --no-check-certificate https://aisbench.obs.myhuaweicloud.com/packet/ais_bench_infer/0.0.2/ais_bench-0.0.2-py3-none-any.whl 45 | pip3 install aclruntime-0.0.2-cp37-cp37m-linux_aarch64.whl 46 | pip3 install ais_bench-0.0.2-py3-none-any.whl 47 | 48 | 49 | [RUN] 50 | binary = resnet 51 | 52 | 53 | [BATCH] 54 | python3 -m ais_bench --model ./resnet50_bs64.om --input ./prep_dataset/ --output ./ --output_dirname result --outfmt TXT | tee ${RESULT_DIR}/inference.log 55 | python3 vision_metric_ImageNet.py ./result ./ImageNet/val_label.txt ./ result.json 56 | -------------------------------------------------------------------------------- /templates/AI/resnet.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | ILSVRC2012_img_val https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar 6 | resnet50_v1.pb https://zenodo.org/record/2535873/files/resnet50_v1.pb 7 | val_map.txt https://github.com/microsoft/Swin-Transformer/files/8529898/val_map.txt 8 | resnet50.sif https://afdata.sjtu.edu.cn/files/resnet_latest.sif 9 | 10 | [DEPENDENCY] 11 | mkdir -p ./benchmark/AI/resnet/data/val 12 | tar -xvf ./downloads/ILSVRC2012_img_val.tar -C ./benchmark/AI/resnet/data/val/ 13 | cp -rfv ./downloads/resnet50_v1.pb ./benchmark/AI/resnet/data/ 14 | cp -rfv ./downloads/val_map.txt ./benchmark/AI/resnet/data/ 15 | cd ./benchmark/AI/resnet/ 16 | git clone https://github.com/mlcommons/inference.git 17 | 18 | [ENV] 19 | 20 | [APP] 21 | app_name = resnet 22 | build_dir = ${HPCbench_ROOT}/benchmark/AI/resnet 23 | binary_dir = ${HPCbench_ROOT}/benchmark/AI/resnet 24 | case_dir = ${HPCbench_ROOT}/benchmark/AI/resnet 25 | 26 | [BUILD] 27 | 28 | [RUN] 29 | binary = resnet 30 | 31 | 32 | [JOB1] 33 | #!/bin/bash 34 | #SBATCH -J inference 35 | #SBATCH -p {{ GPU_PARTITION }} 36 | #SBATCH -n 16 37 | #SBATCH --gres=gpu:1 38 | #SBATCH -o result/AI/inference.txt 39 | 40 | source init.sh 41 | module load cuda/11.8.0 cudnn 42 | export MODEL_DIR=${HPCbench_ROOT}/benchmark/AI/resnet/data/ 43 | export DATA_DIR=${HPCbench_ROOT}/benchmark/AI/resnet/data/ 44 | export IMAGE=${HPCbench_ROOT}/downloads/resnet_latest.sif 45 | cd ./benchmark/AI/resnet/inference/vision/classification_and_detection 46 | 47 | singularity exec --nv $IMAGE bash -c "./run_local.sh tf resnet50 gpu --count 50000 --time 1200 --scenario Offline --qps 200 --max-latency 0.1" 48 | singularity exec --nv $IMAGE bash -c "./run_local.sh tf resnet50 gpu --accuracy --time 60 --scenario Offline --qps 200 --max-latency 0.2" 49 | -------------------------------------------------------------------------------- /templates/balance/balance.linux64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | 8 | [ENV] 9 | export RESULT_DIR=$HPCbench_ROOT/result/balance 10 | mkdir -p $RESULT_DIR 11 | 12 | [APP] 13 | app_name = balance 14 | build_dir = $HPCbench_ROOT 15 | binary_dir = $HPCbench_ROOT 16 | case_dir = $HPCbench_ROOT 17 | 18 | [BUILD] 19 | 20 | [CLEAN] 21 | 22 | [RUN] 23 | binary = balance 24 | run = echo 25 | nodes = 1 26 | 27 | [BATCH] 28 | cd $RESULT_DIR 29 | exec 1>$RESULT_DIR/balance.log 2>/dev/null 30 | # 内存容量与核心数比 31 | echo "内存容量与核心数比" 32 | TotalMemPerNode=`grep MemTotal /proc/meminfo|awk -F " " '{print $2}'` 33 | let TotalMemPerNode=$TotalMemPerNode/1024/1024 34 | echo TotalMemPerNode : $TotalMemPerNode GB 35 | TotalCorePerNode=`cat /proc/cpuinfo | grep "processor" | wc -l` 36 | echo TotalCorePerNode : $TotalCorePerNode 37 | mem2cpu=$(echo "scale=2; $TotalMemPerNode/$TotalCorePerNode" | bc) 38 | echo mem2cpu=$mem2cpu 39 | echo " " 40 | 41 | # BurstBuffer 与内存的容量比 42 | echo "BurstBuffer 与内存的容量比" 43 | BurstBuffer={{ CLUSTER_BURSTBUFFER }} 44 | TotalNodeNum={{ TOTAL_NODES }} 45 | let TotalMemAllNode=$TotalMemPerNode*$TotalNodeNum 46 | buffer2mem=$(echo "scale=2; $BurstBuffer/$TotalMemAllNode"|bc) 47 | echo BurstBuffer : $BurstBuffer GB 48 | echo TotalNodeNum : $TotalNodeNum 49 | echo TotalMemPerNode : $TotalMemPerNode GB 50 | echo buffer2mem=$buffer2mem 51 | echo " " 52 | 53 | # 并行文件系统与BurstBuffer的容量比 54 | echo "并行文件系统与BurstBuffer的容量比" 55 | 56 | ParaName={{ PARA_STORAGE_PATH }} 57 | echo $ParaName 58 | ParaSize=`df -a |grep $ParaName|awk '{print $2}'` 59 | let ParaSize=$ParaSize/1024/1024 60 | echo ParaSize : $ParaSize GB 61 | echo BurstBuffer : $BurstBuffer GB 62 | file2buffer=$(echo "scale=2; $ParaSize/$BurstBuffer"|bc) 63 | echo file2buffer=$file2buffer 64 | echo " " 65 | 66 | # 内存与BurstBuffer的带宽比 67 | echo "内存与BurstBuffer的带宽比" 68 | rm stream.c stream.log 69 | wget --no-check-certificate https://raw.githubusercontent.com/jeffhammond/STREAM/master/stream.c > /dev/null 2>&1 & 70 | wait 71 | gcc -mtune=native -march=native -O3 -mcmodel=medium -fopenmp \ 72 | -DSTREAM_ARRAY_SIZE=200000000 -DNTIMES=30 -DOFFSET=4096 stream.c \ 73 | -o stream.o > /dev/null 2>&1 & 74 | wait 75 | ./stream.o > stream.log 2>&1 & 76 | wait 77 | RateOfMem=`cat stream.log |grep Triad|awk '{print $2}'` 78 | echo RateOfMem:${RateOfMem} 79 | 80 | #BurstBuffer 带宽测试 需要在有闪存节点上进行测试 81 | #bash scrath-ior.sh 82 | BW_BURSTBUFFER={{ BW_BURSTBUFFER }} 83 | mem2buffer=$(echo "scale=2; $RateOfMem*$TotalNodeNum/$BW_BURSTBUFFER"|bc) 84 | echo mem2buffer=$mem2buffer 85 | echo " " 86 | 87 | echo "BurstBuffer与并行文件系统的带宽比" 88 | echo "running bandwidth test of ParaFileSystem" 89 | BW_ParaFile=`cat $HPCbench_ROOT/result/storage/ior/aggregation_bandwidth.txt |grep Write|awk 'NR==1 {print $3}'` 90 | echo BW_ParaFile : $BW_ParaFile 91 | buffer2file=$(echo "scale=2; $BW_BURSTBUFFER/$BW_ParaFile"|bc) 92 | echo buffer2file=$buffer2file 93 | 94 | -------------------------------------------------------------------------------- /templates/balance/stream/main/stream.linux64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | stream/5.10 https://github.com/jeffhammond/STREAM/archive/refs/heads/master.zip STREAM.zip 6 | 7 | [DEPENDENCY] 8 | export CC=`which gcc` 9 | export CXX=`which g++` 10 | export FC=`which gfortran` 11 | if [ ! -d "STREAM-master" ]; then 12 | unzip ./downloads/STREAM.zip 13 | fi 14 | 15 | [ENV] 16 | export STREAM_HOME=$HPCbench_ROOT/STREAM-master 17 | export OMP_PROC_BIND=true 18 | export OMP_NUM_THREADS=1 19 | 20 | [APP] 21 | app_name = STREAM 22 | build_dir = $STREAM_HOME 23 | binary_dir = $STREAM_HOME 24 | case_dir = $STREAM_HOME 25 | 26 | [BUILD] 27 | cat << \EOF > Makefile 28 | CC = gcc 29 | CFLAGS = -mtune=native -march=native -O3 -mcmodel=medium -fopenmp 30 | 31 | FC = gfortran 32 | FFLAGS = -O2 -fopenmp 33 | 34 | all: stream_f.exe stream_c.exe 35 | 36 | stream_f.exe: stream.f mysecond.o 37 | $(CC) $(CFLAGS) -c mysecond.c 38 | $(FC) $(FFLAGS) -c stream.f 39 | $(FC) $(FFLAGS) stream.o mysecond.o -o stream_f.exe 40 | 41 | stream_c.exe: stream.c 42 | $(CC) $(CFLAGS) stream.c -o stream_c.exe 43 | 44 | clean: 45 | rm -f stream_f.exe stream_c.exe *.o 46 | EOF 47 | # high-throughput mode 48 | # tuned-adm profile throughput-performance 49 | # close transparent hugepage 50 | # echo never > /sys/kernel/mm/transparent_hugepage/enabled 51 | # echo never > /sys/kernel/mm/transparent_hugepage/defrag 52 | make stream_c.exe > compiler.log 53 | 54 | [CLEAN] 55 | make clean 56 | 57 | [RUN] 58 | run = 59 | binary = stream_c.exe 2>&1 >> stream.output.log 60 | nodes = 1 61 | 62 | [BATCH] 63 | for core_num in 1 2 4 8 16 32 64 128 64 | do 65 | echo 3 > /proc/sys/vm/drop_caches 66 | export OMP_NUM_THREADS=$core_num 67 | ./stream_c.exe >> stream.output.log 68 | done 69 | -------------------------------------------------------------------------------- /templates/compute/hpcg.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | export CC=`which gcc` 8 | export CXX=`which g++` 9 | export FC=`which fortran` 10 | mkdir -p $HPCbench_ROOT/benchmark/compute 11 | mkdir -p $HPCbench_ROOT/result/compute 12 | cd $HPCbench_ROOT/benchmark/compute 13 | git config --global http.sslVerify false 14 | git clone --depth=1 https://github.com/hpcg-benchmark/hpcg.git 15 | 16 | [ENV] 17 | export CC=mpicc CXX=mpic++ FC=mpifort 18 | export HPCG_HOME=$HPCbench_ROOT/benchmark/compute/hpcg 19 | export OMPI_MCA_btl=self,tcp 20 | 21 | [APP] 22 | app_name = hpcg 23 | build_dir = $HPCG_HOME 24 | binary_dir = $HPCG_HOME/bin/ 25 | case_dir = $HPCG_HOME/bin/ 26 | 27 | [BUILD] 28 | cat << \EOF > setup/Make.MPI_GCC_OMP 29 | SHELL = /bin/sh 30 | CD = cd 31 | CP = cp 32 | LN_S = ln -s -f 33 | MKDIR = mkdir -p 34 | RM = /bin/rm -f 35 | TOUCH = touch 36 | TOPdir = . 37 | SRCdir = $(TOPdir)/src 38 | INCdir = $(TOPdir)/src 39 | BINdir = $(TOPdir)/bin 40 | HPCG_INCLUDES = -I$(INCdir) -I$(INCdir)/$(arch) $(MPinc) 41 | HPCG_LIBS = 42 | HPCG_OPTS = 43 | HPCG_DEFS = $(HPCG_OPTS) $(HPCG_INCLUDES) 44 | CXX = mpicxx 45 | CXXFLAGS = $(HPCG_DEFS) -O3 -ffast-math -ftree-vectorize -fopenmp 46 | LINKER = $(CXX) 47 | LINKFLAGS = $(CXXFLAGS) 48 | ARCHIVER = ar 49 | ARFLAGS = r 50 | RANLIB = echo 51 | EOF 52 | ./configure MPI_GCC_OMP 53 | make -j 54 | 55 | [CLEAN] 56 | make clean 57 | 58 | [RUN] 59 | run = mpirun -np 128 60 | binary = xhpcg --nx=104 --rt=60 61 | nodes = 1 62 | 63 | [BATCH] 64 | 65 | [JOB1] 66 | #!/bin/bash 67 | #SBATCH -J hpcg 68 | #SBATCH -N 1 69 | #SBATCH --ntasks-per-node 128 70 | #SBATCH -p {{ CPU_PARTITION }} 71 | #SBATCH --exclusive 72 | #SBATCH -o logs/hpcg.out 73 | #SBATCH -e logs/hpcg.out 74 | 75 | cd $HPCG_HOME/bin/ 76 | export UCX_NET_DEVICES=mlx5_0:1 77 | mpirun ./xhpcg --nx=104 --rt=60 78 | cp $HPCbench_ROOT/benchmark/compute/hpcg/bin/HPCG-Benchmark* $HPCbench_ROOT/result/compute/hpcg.txt 79 | -------------------------------------------------------------------------------- /templates/compute/hpcg.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | export CC=`which gcc` 8 | export CXX=`which g++` 9 | export FC=`which fortran` 10 | mkdir -p $HPCbench_ROOT/benchmark/compute 11 | cd $HPCbench_ROOT/benchmark/compute 12 | git config --global http.sslVerify false 13 | git clone --depth=1 https://github.com/hpcg-benchmark/hpcg.git 14 | 15 | [ENV] 16 | export CC=mpicc CXX=mpic++ FC=mpifort 17 | export HPCG_HOME=$HPCbench_ROOT/benchmark/compute/hpcg 18 | 19 | [APP] 20 | app_name = hpcg 21 | build_dir = $HPCG_HOME 22 | binary_dir = $HPCG_HOME/bin/ 23 | case_dir = $HPCG_HOME/bin/ 24 | 25 | [BUILD] 26 | cat << \EOF > setup/Make.MPI_GCC_OMP 27 | SHELL = /bin/sh 28 | CD = cd 29 | CP = cp 30 | LN_S = ln -s -f 31 | MKDIR = mkdir -p 32 | RM = /bin/rm -f 33 | TOUCH = touch 34 | TOPdir = . 35 | SRCdir = $(TOPdir)/src 36 | INCdir = $(TOPdir)/src 37 | BINdir = $(TOPdir)/bin 38 | HPCG_INCLUDES = -I$(INCdir) -I$(INCdir)/$(arch) $(MPinc) 39 | HPCG_LIBS = 40 | HPCG_OPTS = 41 | HPCG_DEFS = $(HPCG_OPTS) $(HPCG_INCLUDES) 42 | CXX = mpicxx 43 | CXXFLAGS = $(HPCG_DEFS) -O3 -ffast-math -ftree-vectorize -fopenmp 44 | LINKER = $(CXX) 45 | LINKFLAGS = $(CXXFLAGS) 46 | ARCHIVER = ar 47 | ARFLAGS = r 48 | RANLIB = echo 49 | EOF 50 | ./configure MPI_GCC_OMP 51 | make -j 52 | 53 | [CLEAN] 54 | make clean 55 | 56 | [RUN] 57 | run = mpirun -np 64 58 | binary = xhpcg 59 | nodes = 1 60 | 61 | [BATCH] 62 | 63 | [JOB1] 64 | #!/bin/bash 65 | #SBATCH -J hpcg 66 | #SBATCH -N 2 67 | #SBATCH --ntasks-per-node {{ CPU_MAX_CORES }} 68 | #SBATCH -p {{ CPU_PARTITION }} 69 | #SBATCH --exclusive 70 | #SBATCH -o logs/hpcg.out 71 | #SBATCH -e logs/hpcg.out 72 | 73 | cd $HPCG_HOME/bin/ 74 | export UCX_NET_DEVICES=mlx5_0:1 75 | mpirun ./xhpcg --nx=104 --rt=60 76 | cp $HPCbench_ROOT/benchmark/compute/hpcg/bin/HPCG-Benchmark* $HPCbench_ROOT/result/compute/hpcg.txt 77 | -------------------------------------------------------------------------------- /templates/compute/hpl.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | hpl/2.3 https://netlib.org/benchmark/hpl/hpl-2.3.tar.gz 6 | 7 | [DEPENDENCY] 8 | export CC=`which gcc` 9 | export CXX=`which g++` 10 | export FC=`which gfortran` 11 | mkdir -p $HPCbench_ROOT/benchmark/compute 12 | mkdir -p $HPCbench_RESULT/compute 13 | tar -xzvf $HPCbench_DOWNLOAD/hpl-2.3.tar.gz -C $HPCbench_ROOT/benchmark/compute 14 | 15 | [ENV] 16 | module use ./software/moduledeps/gcc${gcc_version_number} 17 | module load openblas/0.3.18 18 | export OMPI_MCA_btl=self,tcp 19 | export HPL_HOME=$HPCbench_ROOT/benchmark/compute/hpl-2.3 20 | 21 | [APP] 22 | app_name = hpl 23 | build_dir = $HPL_HOME 24 | binary_dir = $HPL_HOME/bin/aarch64 25 | case_dir = $HPL_HOME/bin/aarch64 26 | 27 | [BUILD] 28 | cat << \EOF > Make.aarch64 29 | SHELL = /bin/sh 30 | CD = cd 31 | CP = cp 32 | LN_S = ln -s 33 | MKDIR = mkdir 34 | RM = /bin/rm -f 35 | TOUCH = touch 36 | ARCH = aarch64 37 | TOPdir = $(HPL_HOME) 38 | INCdir = $(TOPdir)/include 39 | BINdir = $(TOPdir)/bin/$(ARCH) 40 | LIBdir = $(TOPdir)/lib/$(ARCH) 41 | HPLlib = $(LIBdir)/libhpl.a 42 | LAdir = $(OPENBLAS_PATH) 43 | LAinc = 44 | LAlib = $(LAdir)/lib/libopenblas.a 45 | F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle 46 | HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) 47 | HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) 48 | HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT 49 | HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 50 | CC = mpicc 51 | CCNOOPT = $(HPL_DEFS) 52 | CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -fopenmp -funroll-loops -W -Wall 53 | LINKER = $(CC) 54 | LINKFLAGS = $(CCFLAGS) 55 | ARCHIVER = ar 56 | ARFLAGS = r 57 | RANLIB = echo 58 | EOF 59 | make arch=aarch64 -j 60 | if [ ! -e ./bin/aarch64/xhpl ]; then 61 | echo "Build failed" 62 | exit 1 63 | fi 64 | echo "check if SVE exists" 65 | objdump -d bin/aarch64/xhpl | grep z0 66 | cd bin/aarch64 67 | 68 | # modify HPL.dat 69 | cat << \EOF > HPL.dat 70 | HPLinpack benchmark input file 71 | Innovative Computing Laboratory, University of Tennessee 72 | HPL.out output file name (if any) 73 | 6 device out (6=stdout,7=stderr,file) 74 | 1 # of problems sizes (N) 75 | 10000 Ns 76 | 1 # of NBs 77 | 256 NBs 78 | 0 PMAP process mapping (0=Row-,1=Column-major) 79 | 1 # of process grids (P x Q) 80 | 8 Ps 81 | 16 Qs 82 | 16.0 threshold 83 | 1 # of panel fact 84 | 2 1 0 PFACTs (0=left, 1=Crout, 2=Right) 85 | 1 # of recursive stopping criterium 86 | 1 NBMINs (>= 1) 87 | 1 # of panels in recursion 88 | 2 NDIVs 89 | 1 # of recursive panel fact. 90 | 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) 91 | 1 # of broadcast 92 | 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 93 | 1 # of lookahead depth 94 | 0 DEPTHs (>=0) 95 | 0 SWAP (0=bin-exch,1=long,2=mix) 96 | 1 swapping threshold 97 | 1 L1 in (0=transposed,1=no-transposed) form 98 | 1 U in (0=transposed,1=no-transposed) form 99 | 0 Equilibration (0=no,1=yes) 100 | 8 memory alignment in double (> 0) 101 | EOF 102 | 103 | [CLEAN] 104 | make arch=aarch64 clean 105 | rm -rf bin/aarch64 106 | 107 | [RUN] 108 | run = mpirun -np 128 109 | binary = xhpl | tee $HPCbench_RESULT/compute/hpl.txt 110 | nodes = 1 111 | 112 | [JOB1] 113 | #!/bin/bash 114 | #SBATCH -J hpl 115 | #SBATCH -N 1 116 | #SBATCH --ntasks-per-node 128 117 | #SBATCH -p {{ CPU_PARTITION }} 118 | #SBATCH --exclusive 119 | #SBATCH -o logs/hpl.out 120 | #SBATCH -e logs/hpl.out 121 | 122 | cd $HPCbench_ROOT/benchmark/compute/hpl-2.3/bin/aarch64 123 | 124 | # modify HPL.dat 125 | cat << \EOF > HPL.dat 126 | HPLinpack benchmark input file 127 | Innovative Computing Laboratory, University of Tennessee 128 | HPL.out output file name (if any) 129 | 6 device out (6=stdout,7=stderr,file) 130 | 1 # of problems sizes (N) 131 | 170000 Ns 132 | 1 # of NBs 133 | 256 NBs 134 | 0 PMAP process mapping (0=Row-,1=Column-major) 135 | 1 # of process grids (P x Q) 136 | 8 Ps 137 | 16 Qs 138 | 16.0 threshold 139 | 1 # of panel fact 140 | 2 1 0 PFACTs (0=left, 1=Crout, 2=Right) 141 | 1 # of recursive stopping criterium 142 | 1 NBMINs (>= 1) 143 | 1 # of panels in recursion 144 | 2 NDIVs 145 | 1 # of recursive panel fact. 146 | 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) 147 | 1 # of broadcast 148 | 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 149 | 1 # of lookahead depth 150 | 0 DEPTHs (>=0) 151 | 0 SWAP (0=bin-exch,1=long,2=mix) 152 | 1 swapping threshold 153 | 1 L1 in (0=transposed,1=no-transposed) form 154 | 1 U in (0=transposed,1=no-transposed) form 155 | 0 Equilibration (0=no,1=yes) 156 | 8 memory alignment in double (> 0) 157 | EOF 158 | 159 | mpirun xhpl | tee $HPCbench_RESULT/compute/hpl.txt 160 | -------------------------------------------------------------------------------- /templates/compute/hpl.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | hpl/2.3 https://netlib.org/benchmark/hpl/hpl-2.3.tar.gz 6 | 7 | [DEPENDENCY] 8 | export CC=`which gcc` 9 | export CXX=`which g++` 10 | export FC=`which fortran` 11 | ./hpcbench -install openblas/0.3.18 gcc 12 | mkdir -p $HPCbench_ROOT/benchmark/compute 13 | tar -xzvf $HPCbench_DOWNLOAD/hpl-2.3.tar.gz -C $HPCbench_ROOT/benchmark/compute 14 | 15 | [ENV] 16 | module use ./software/moduledeps/gcc11.2.0/ 17 | module load openblas/0.3.18 18 | export HPL_HOME=$HPCbench_ROOT/benchmark/compute/hpl-2.3 19 | 20 | [APP] 21 | app_name = hpl 22 | build_dir = $HPL_HOME 23 | binary_dir = $HPL_HOME/bin/linux64 24 | case_dir = $HPL_HOME/bin/linux64 25 | 26 | [BUILD] 27 | cat << \EOF > Make.linux64 28 | SHELL = /bin/sh 29 | CD = cd 30 | CP = cp 31 | LN_S = ln -s 32 | MKDIR = mkdir 33 | RM = /bin/rm -f 34 | TOUCH = touch 35 | ARCH = linux64 36 | TOPdir = $(HPL_HOME) 37 | INCdir = $(TOPdir)/include 38 | BINdir = $(TOPdir)/bin/$(ARCH) 39 | LIBdir = $(TOPdir)/lib/$(ARCH) 40 | HPLlib = $(LIBdir)/libhpl.a 41 | LAdir = $(OPENBLAS_PATH) 42 | LAinc = 43 | LAlib = $(LAdir)/lib/libopenblas.a 44 | F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle 45 | HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) 46 | HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) 47 | HPL_OPTS = -DHPL_DETAILED_TIMING -DHPL_PROGRESS_REPORT 48 | HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) 49 | CC = mpicc 50 | CCNOOPT = $(HPL_DEFS) 51 | CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -fopenmp -funroll-loops -W -Wall 52 | LINKER = $(CC) 53 | LINKFLAGS = $(CCFLAGS) 54 | ARCHIVER = ar 55 | ARFLAGS = r 56 | RANLIB = echo 57 | EOF 58 | make arch=linux64 -j 59 | if [ ! -e ./bin/linux64/xhpl ]; then 60 | echo "Build failed" 61 | exit 1 62 | fi 63 | echo "check if SVE exists" 64 | objdump -d bin/linux64/xhpl | grep z0 65 | 66 | cd $HPL_HOME/bin/linux64 67 | 68 | # modify HPL.dat 69 | cat << \EOF > HPL.dat 70 | HPLinpack benchmark input file 71 | Innovative Computing Laboratory, University of Tennessee 72 | HPL.out output file name (if any) 73 | 6 device out (6=stdout,7=stderr,file) 74 | 1 # of problems sizes (N) 75 | 176640 Ns 76 | 1 # of NBs 77 | 256 NBs 78 | 0 PMAP process mapping (0=Row-,1=Column-major) 79 | 1 # of process grids (P x Q) 80 | 8 Ps 81 | 8 Qs 82 | 16.0 threshold 83 | 1 # of panel fact 84 | 2 1 0 PFACTs (0=left, 1=Crout, 2=Right) 85 | 1 # of recursive stopping criterium 86 | 1 NBMINs (>= 1) 87 | 1 # of panels in recursion 88 | 2 NDIVs 89 | 1 # of recursive panel fact. 90 | 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) 91 | 1 # of broadcast 92 | 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 93 | 1 # of lookahead depth 94 | 0 DEPTHs (>=0) 95 | 0 SWAP (0=bin-exch,1=long,2=mix) 96 | 1 swapping threshold 97 | 1 L1 in (0=transposed,1=no-transposed) form 98 | 1 U in (0=transposed,1=no-transposed) form 99 | 0 Equilibration (0=no,1=yes) 100 | 8 memory alignment in double (> 0) 101 | EOF 102 | 103 | [CLEAN] 104 | make arch=linux64 clean 105 | rm -rf bin/linux64 106 | 107 | [RUN] 108 | run = mpirun -np 64 109 | binary = xhpl | tee $HPCbench_RESULT/compute/hpl.txt 110 | nodes = 1 111 | 112 | [JOB1] 113 | #!/bin/bash 114 | #SBATCH -J hpl 115 | #SBATCH -N 2 116 | #SBATCH --ntasks-per-node 64 117 | #SBATCH -p {{ CPU_PARTITION }} 118 | #SBATCH --exclusive 119 | #SBATCH -o logs/hpl.out 120 | #SBATCH -e logs/hpl.out 121 | 122 | cd $HPCbench_ROOT/benchmark/compute/hpl-2.3/bin/linux64 123 | 124 | # modify HPL.dat 125 | cat << \EOF > HPL.dat 126 | HPLinpack benchmark input file 127 | Innovative Computing Laboratory, University of Tennessee 128 | HPL.out output file name (if any) 129 | 6 device out (6=stdout,7=stderr,file) 130 | 1 # of problems sizes (N) 131 | 176640 Ns 132 | 1 # of NBs 133 | 256 NBs 134 | 0 PMAP process mapping (0=Row-,1=Column-major) 135 | 1 # of process grids (P x Q) 136 | 8 Ps 137 | 16 Qs 138 | 16.0 threshold 139 | 3 # of panel fact 140 | 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) 141 | 2 # of recursive stopping criterium 142 | 2 4 NBMINs (>= 1) 143 | 1 # of panels in recursion 144 | 2 NDIVs 145 | 3 # of recursive panel fact. 146 | 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) 147 | 1 # of broadcast 148 | 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 149 | 1 # of lookahead depth 150 | 0 DEPTHs (>=0) 151 | 2 SWAP (0=bin-exch,1=long,2=mix) 152 | 64 swapping threshold 153 | 0 L1 in (0=transposed,1=no-transposed) form 154 | 0 U in (0=transposed,1=no-transposed) form 155 | 1 Equilibration (0=no,1=yes) 156 | 8 memory alignment in double (> 0) 157 | EOF 158 | 159 | export UCX_NET_DEVICES=mlx5_0:1 160 | mpirun xhpl | tee $HPCbench_RESULT/compute/hpl.txt 161 | -------------------------------------------------------------------------------- /templates/network/osu.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | set -e 8 | set -x 9 | export CC=`which gcc` 10 | export CXX=`which g++` 11 | export FC=`which fortran` 12 | ./hpcbench -install osu/7.0.1 gcc 13 | mkdir -p $HPCbench_ROOT/benchmark/network 14 | mkdir -p $HPCbench_ROOT/result/network 15 | 16 | [ENV] 17 | 18 | [APP] 19 | app_name = osu 20 | build_dir = $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/ 21 | binary_dir = $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/libexec/osu-micro-benchmarks/mpi/pt2pt/ 22 | case_dir = $HPCbench_ROOT/benchmark/network 23 | 24 | [BUILD] 25 | 26 | [CLEAN] 27 | 28 | [RUN] 29 | run = mpirun -np 2 30 | binary = osu_bibw 31 | nodes = 1 32 | 33 | [BATCH] 34 | 35 | 36 | 37 | [JOB1] 38 | #!/bin/bash 39 | #SBATCH --job-name=osu_bibw 40 | #SBATCH --partition={{ CPU_PARTITION }} 41 | #SBATCH -n 2 42 | #SBATCH --ntasks-per-node=1 43 | #SBATCH --exclusive 44 | #SBATCH --output=logs/osu_bibw.out 45 | #SBATCH --error=logs/osu_bibw.out 46 | 47 | mpirun -np 2 $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bibw |tee $HPCbench_ROOT/result/network/osu_bibw.log 48 | 49 | [JOB2] 50 | #!/bin/bash 51 | #SBATCH --job-name=osu_latency 52 | #SBATCH --partition={{ CPU_PARTITION }} 53 | #SBATCH -n 2 54 | #SBATCH --ntasks-per-node=1 55 | #SBATCH --exclusive 56 | #SBATCH --output=logs/osu_latency.out 57 | #SBATCH --error=logs/osu_latency.out 58 | 59 | mpirun -np 2 $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_latency|tee $HPCbench_ROOT/result/network/osu_latency.log 60 | 61 | -------------------------------------------------------------------------------- /templates/network/osu.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | set -e 8 | set -x 9 | export CC=`which gcc` 10 | export CXX=`which g++` 11 | export FC=`which fortran` 12 | ./hpcbench -install osu/7.0.1 gcc 13 | mkdir -p $HPCbench_ROOT/benchmark/network 14 | mkdir -p $HPCbench_ROOT/result/network 15 | 16 | [ENV] 17 | 18 | [APP] 19 | app_name = osu 20 | build_dir = $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/ 21 | binary_dir = $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/libexec/osu-micro-benchmarks/mpi/pt2pt/ 22 | case_dir = $HPCbench_ROOT/benchmark/network 23 | 24 | [BUILD] 25 | 26 | [CLEAN] 27 | 28 | [RUN] 29 | run = mpirun -np 2 30 | binary = osu_bibw 31 | nodes = 1 32 | 33 | [BATCH] 34 | 35 | 36 | 37 | [JOB1] 38 | #!/bin/bash 39 | #SBATCH --job-name=osu_bibw 40 | #SBATCH --partition={{ CPU_PARTITION }} 41 | #SBATCH -n 2 42 | #SBATCH --ntasks-per-node=1 43 | #SBATCH --exclusive 44 | #SBATCH --output=logs/osu_bibw.out 45 | #SBATCH --error=logs/osu_bibw.out 46 | 47 | mpirun -np 2 $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bibw |tee $HPCbench_ROOT/result/network/osu_bibw.log 48 | 49 | [JOB2] 50 | #!/bin/bash 51 | #SBATCH --job-name=osu_latency 52 | #SBATCH --partition={{ CPU_PARTITION }} 53 | #SBATCH -n 2 54 | #SBATCH --ntasks-per-node=1 55 | #SBATCH --exclusive 56 | #SBATCH --output=logs/osu_latency.out 57 | #SBATCH --error=logs/osu_latency.err 58 | 59 | mpirun -np 2 $HPCbench_ROOT/software/libs/gcc${gcc_version_number}/osu/7.0.1/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_latency|tee $HPCbench_ROOT/result/network/osu_latency.log 60 | 61 | -------------------------------------------------------------------------------- /templates/storage/ior.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | localhost 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | export CC=`which mpicc` 8 | export CXX=`which mpic++` 9 | ./hpcbench -install ior/master gcc 10 | 11 | [ENV] 12 | module use ./software/moduledeps/gcc${gcc_version_number}/ 13 | module load ior/master 14 | mkdir -p $HPCbench_ROOT/benchmark/storage/ior 15 | mkdir -p $HPCbench_ROOT/result/storage/ior 16 | 17 | [APP] 18 | app_name = ior 19 | build_dir = $IOR_PATH 20 | binary_dir = $IOR_PATH/bin 21 | case_dir = $HPCbench_ROOT/benchmark/storage/ior 22 | 23 | [BUILD] 24 | 25 | [CLEAN] 26 | 27 | [RUN] 28 | binary = ior 29 | 30 | [JOB1] 31 | #!/bin/bash 32 | #SBATCH --job-name=single_client_single_fluence 33 | #SBATCH --ntasks=1 34 | #SBATCH --ntasks-per-node=1 35 | #SBATCH --output=logs/single_client_single_fluence.out 36 | #SBATCH --error=losg/single_client_single_fluence.out 37 | #SBATCH -p {{ CPU_PARTITION }} 38 | #SBATCH --exclusive 39 | 40 | # Date Stamp for benchmark 41 | DS=`date +"%F_%H:%M:%S"` 42 | SEQ=1 43 | MAXPROCS=1 44 | IOREXE=ior 45 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 46 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 47 | 48 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 49 | DATA_SIZE=8 50 | while [ ${SEQ} -le ${MAXPROCS} ]; do 51 | NPROC=`expr ${NCT} \* ${SEQ}` 52 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 53 | mpirun $IOREXE -v -w -r -i 4 \ 54 | -o ${BASE_DIR}/ior-test1.file \ 55 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/single_client_single_fluence.txt 56 | SEQ=`expr ${SEQ} \* 2` 57 | done 58 | 59 | [JOB2] 60 | #!/bin/bash 61 | #SBATCH --job-name="single_client_multi_fluence" 62 | #SBATCH -N 1 63 | #SBATCH --ntasks-per-node=64 64 | #SBATCH --output=logs/single_client_multi_fluence.out 65 | #SBATCH --error=logs/single_client_multi_fluence.out 66 | #SBATCH -p {{ CPU_PARTITION }} 67 | #SBATCH --exclusive 68 | 69 | IOREXE=ior 70 | NCT=2 71 | 72 | # Date Stamp for benchmark 73 | DS=`date +"%F_%H:%M:%S"` 74 | SEQ=8 75 | MAXPROCS=8 76 | DATA_SIZE=16 77 | 78 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 79 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 80 | 81 | while [ ${SEQ} -le ${MAXPROCS} ]; do 82 | NPROC=`expr ${NCT} \* ${SEQ}` 83 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 84 | mpirun -np ${NPROC} \ 85 | ior -v -w -r -i 4 -F \ 86 | -o ${BASE_DIR}/ior-test2.file \ 87 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/single_client_multi_fluence.txt 88 | SEQ=`expr ${SEQ} \* 2` 89 | done 90 | 91 | [JOB3] 92 | #!/bin/bash 93 | #SBATCH --job-name="aggreagate_bandwidth" 94 | #SBATCH -N 2 95 | #SBATCH --ntasks-per-node=64 96 | #SBATCH --output=logs/aggreagate_bandwidth.out 97 | #SBATCH --error=logs/aggreagate_bandwidth.out 98 | #SBATCH -p {{ CPU_PARTITION }} 99 | #SBATCH --exclusive 100 | 101 | NCT=2 102 | 103 | # Date Stamp for benchmark 104 | SEQ=64 105 | MAXPROCS=128 106 | DATA_SIZE=128 107 | 108 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 109 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 110 | 111 | NCT=2 #`grep -v ^# hfile |wc -l` 112 | DS=`date +"%F_%H:%M:%S"` 113 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 114 | 115 | while [ ${SEQ} -le ${MAXPROCS} ]; do 116 | NPROC=`expr ${NCT} \* ${SEQ}` 117 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 118 | # Alternatively, set to a static value and let the data size increase. 119 | # BSZ="1g" 120 | # BSZ="${DATA_SIZE}" 121 | mpirun \ 122 | ior -v -w -r -i 4 -F \ 123 | -o ${BASE_DIR}/ior-test3.file \ 124 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/aggregation_bandwidth.txt 125 | SEQ=`expr ${SEQ} \* 2` 126 | done 127 | 128 | [JOB4] 129 | #!/bin/bash 130 | #SBATCH --job-name="iops" 131 | #SBATCH -N 5 132 | #SBATCH --ntasks-per-node=64 133 | #SBATCH --output=logs/iops.out 134 | #SBATCH --error=logs/iops.out 135 | #SBATCH -p {{ CPU_PARTITION }} 136 | #SBATCH --exclusive 137 | 138 | NCT=2 139 | 140 | # Date Stamp for benchmark 141 | SEQ=320 142 | MAXPROCS=320 143 | DATA_SIZE=640 144 | 145 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 146 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 147 | mpirun --mca btl_openib_allow_ib true ior -vv -e -g -w -F\ 148 | -o ${BASE_DIR}/ior-test4.file \ 149 | -t 4k -b 8g | tee ${RESULT_DIR}/iops.txt 150 | SEQ=`expr ${SEQ} \* 2` 151 | 152 | -------------------------------------------------------------------------------- /templates/storage/ior.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | export CC=`which mpicc` 8 | export CXX=`which mpic++` 9 | ./hpcbench -install ior/master gcc 10 | 11 | [ENV] 12 | module use ./software/moduledeps/gcc${gcc_version_number}/ 13 | module load ior/master 14 | mkdir -p $HPCbench_ROOT/benchmark/storage/ior 15 | mkdir -p $HPCbench_ROOT/result/storage/ior 16 | 17 | [APP] 18 | app_name = ior 19 | build_dir = $IOR_PATH 20 | binary_dir = $IOR_PATH/bin 21 | case_dir = $HPCbench_ROOT/benchmark/storage/ior 22 | 23 | [BUILD] 24 | 25 | [CLEAN] 26 | 27 | [RUN] 28 | binary = ior 29 | 30 | [JOB1] 31 | #!/bin/bash 32 | #SBATCH --job-name=single_client_single_fluence 33 | #SBATCH --ntasks=1 34 | #SBATCH --ntasks-per-node=1 35 | #SBATCH --output=logs/single_client_single_fluence.out 36 | #SBATCH --error=logs/single_client_single_fluence.out 37 | #SBATCH -p {{ CPU_PARTITION }} 38 | 39 | # Date Stamp for benchmark 40 | DS=`date +"%F_%H:%M:%S"` 41 | SEQ=1 42 | MAXPROCS=1 43 | IOREXE=ior 44 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 45 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 46 | 47 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 48 | DATA_SIZE=8 49 | while [ ${SEQ} -le ${MAXPROCS} ]; do 50 | NPROC=`expr ${NCT} \* ${SEQ}` 51 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 52 | mpirun $IOREXE -v -w -r -i 4 \ 53 | -o ${BASE_DIR}/ior-test1.file \ 54 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/single_client_single_fluence.txt 55 | SEQ=`expr ${SEQ} \* 2` 56 | done 57 | 58 | [JOB2] 59 | #!/bin/bash 60 | #SBATCH --job-name="single_client_multi_fluence" 61 | #SBATCH -N 1 62 | #SBATCH --ntasks-per-node={{ CPU_MAX_CORES }} 63 | #SBATCH --output=logs/single_client_multi_fluence.out 64 | #SBATCH --error=logs/single_client_multi_fluence.out 65 | #SBATCH -p {{ CPU_PARTITION }} 66 | 67 | IOREXE=ior 68 | NCT=2 69 | 70 | # Date Stamp for benchmark 71 | DS=`date +"%F_%H:%M:%S"` 72 | SEQ=8 73 | MAXPROCS=8 74 | DATA_SIZE=16 75 | 76 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 77 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 78 | 79 | while [ ${SEQ} -le ${MAXPROCS} ]; do 80 | NPROC=`expr ${NCT} \* ${SEQ}` 81 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 82 | mpirun -np ${NPROC} \ 83 | ior -v -w -r -i 4 -F \ 84 | -o ${BASE_DIR}/ior-test2.file \ 85 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/single_client_multi_fluence.txt 86 | SEQ=`expr ${SEQ} \* 2` 87 | done 88 | 89 | [JOB3] 90 | #!/bin/bash 91 | #SBATCH --job-name="aggreagate_bandwidth" 92 | #SBATCH -N 2 93 | #SBATCH --ntasks-per-node={{ CPU_MAX_CORES }} 94 | #SBATCH --output=logs/aggreagate_bandwidth.out 95 | #SBATCH --error=logs/aggreagate_bandwidth.out 96 | #SBATCH -p {{ CPU_PARTITION }} 97 | 98 | NCT=2 99 | 100 | # Date Stamp for benchmark 101 | SEQ=64 102 | MAXPROCS=128 103 | DATA_SIZE=128 104 | 105 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 106 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 107 | 108 | NCT=2 #`grep -v ^# hfile |wc -l` 109 | DS=`date +"%F_%H:%M:%S"` 110 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 111 | 112 | while [ ${SEQ} -le ${MAXPROCS} ]; do 113 | NPROC=`expr ${NCT} \* ${SEQ}` 114 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 115 | # Alternatively, set to a static value and let the data size increase. 116 | # BSZ="1g" 117 | # BSZ="${DATA_SIZE}" 118 | mpirun \ 119 | ior -v -w -r -i 4 -F \ 120 | -o ${BASE_DIR}/ior-test3.file \ 121 | -t 1m -b ${BSZ} | tee ${RESULT_DIR}/aggregation_bandwidth.txt 122 | SEQ=`expr ${SEQ} \* 2` 123 | done 124 | 125 | [JOB4] 126 | #!/bin/bash 127 | #SBATCH --job-name="iops" 128 | #SBATCH -N 5 129 | #SBATCH --ntasks-per-node={{ CPU_MAX_CORES }} 130 | #SBATCH --output=logs/iops.out 131 | #SBATCH --error=logs/iops.out 132 | #SBATCH -p {{ CPU_PARTITION }} 133 | 134 | NCT=2 135 | 136 | # Date Stamp for benchmark 137 | SEQ=320 138 | MAXPROCS=320 139 | DATA_SIZE=640 140 | 141 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/ior 142 | RESULT_DIR=$HPCbench_ROOT/result/storage/ior 143 | mpirun --mca btl_openib_allow_ib true ior -vv -e -g -w -F\ 144 | -o ${BASE_DIR}/ior-test4.file \ 145 | -t 4k -b 8g | tee ${RESULT_DIR}/iops.txt 146 | SEQ=`expr ${SEQ} \* 2` 147 | 148 | -------------------------------------------------------------------------------- /templates/storage/protocol/hadoop.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | hadoop/3.3.5 https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz 6 | 7 | [DEPENDENCY] 8 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/hadoop_data 9 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/hadoop 10 | mkdir -p $HPCbench_ROOT/result/storage/protocol/hadoop 11 | tar -xzf $HPCbench_DOWNLOAD/hadoop-3.3.5.tar.gz -C $HPCbench_ROOT/benchmark/storage/protocol/hadoop 12 | cd $HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5 13 | ## 配置JAVA路径、HADOOP路径 14 | ## vim etc/hadoop/hadoop-env.sh 15 | echo "export JAVA_HOME="/usr"" >> etc/hadoop/hadoop-env.sh 16 | echo "export HADOOP_HOME="$HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5"" >> etc/hadoop/hadoop-env.sh 17 | export HADOOP_HOME=$HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5 18 | export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin 19 | 20 | ## 配置hadoop访问ip和数据存储目录 21 | cat > etc/hadoop/core-site.xml << \EOF 22 | 23 | 24 | fs.defaultFS 25 | hdfs://localhost:8020 26 | 27 | 28 | hadoop.tmp.dir 29 | {{ HADOOP_DATA }} 30 | 31 | 32 | EOF 33 | 34 | ## 配置hdfs复制数 35 | cat > etc/hadoop/hdfs-site.xml << \EOF 36 | 37 | 38 | dfs.replication 39 | 1 40 | 41 | 42 | EOF 43 | # 3. 启动 44 | hdfs namenode -format 45 | hadoop-daemon.sh start namenode 46 | hadoop-daemon.sh start datanode 47 | 48 | [ENV] 49 | export HADOOP_HOME=$HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5 50 | export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin 51 | export RESULT_DIR=$HPCbench_ROOT/result/storage/protocol/hadoop 52 | 53 | [APP] 54 | app_name = hadoop 55 | build_dir = $HADOOP_HOME 56 | binary_dir = $HADOOP_HOME 57 | case_dir = $HADOOP_HOME 58 | 59 | [BUILD] 60 | 61 | [CLEAN] 62 | 63 | [RUN] 64 | binary = hadoop 65 | run = echo 66 | nodes = 1 67 | 68 | 69 | [BATCH] 70 | for i in 1 2 4 8 16; do 71 | hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.5-tests.jar TestDFSIO -write -nrFiles $i -fileSize 1GB -resFile $RESULT_DIR/hdfs_write.log 72 | hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.5-tests.jar TestDFSIO -read -nrFiles $i -fileSize 1GB -resFile $RESULT_DIR/hdfs_read.log 73 | done 74 | -------------------------------------------------------------------------------- /templates/storage/protocol/hadoop.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | hadoop/3.3.5 https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz 6 | 7 | [DEPENDENCY] 8 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/hadoop_data 9 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/hadoop 10 | mkdir -p $HPCbench_ROOT/result/storage/protocol/hadoop 11 | tar -xzf $HPCbench_DOWNLOAD/hadoop-3.3.5.tar.gz -C $HPCbench_ROOT/benchmark/storage/protocol/hadoop 12 | cd $HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5 13 | ## 配置JAVA路径、HADOOP路径 14 | ## vim etc/hadoop/hadoop-env.sh 15 | echo "export JAVA_HOME="/usr"" >> etc/hadoop/hadoop-env.sh 16 | echo "export HADOOP_HOME="$HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5"" >> etc/hadoop/hadoop-env.sh 17 | export HADOOP_HOME=$HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5 18 | export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin 19 | 20 | ## 配置hadoop访问ip和数据存储目录 21 | cat > etc/hadoop/core-site.xml << \EOF 22 | 23 | 24 | fs.defaultFS 25 | hdfs://localhost:8020 26 | 27 | 28 | hadoop.tmp.dir 29 | {{ HADOOP_DATA }} 30 | 31 | 32 | EOF 33 | 34 | ## 配置hdfs复制数 35 | cat > etc/hadoop/hdfs-site.xml << \EOF 36 | 37 | 38 | dfs.replication 39 | 1 40 | 41 | 42 | EOF 43 | # 3. 启动 44 | hdfs namenode -format 45 | hadoop-daemon.sh start namenode 46 | hadoop-daemon.sh start datanode 47 | 48 | [ENV] 49 | export HADOOP_HOME=$HPCbench_ROOT/benchmark/storage/protocol/hadoop/hadoop-3.3.5 50 | export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin 51 | export RESULT_DIR=$HPCbench_ROOT/result/storage/protocol/hadoop 52 | 53 | [APP] 54 | app_name = hadoop 55 | build_dir = $HADOOP_HOME 56 | binary_dir = $HADOOP_HOME 57 | case_dir = $HADOOP_HOME 58 | 59 | [BUILD] 60 | 61 | [CLEAN] 62 | 63 | [RUN] 64 | binary = hadoop 65 | run = echo 66 | nodes = 1 67 | 68 | 69 | [BATCH] 70 | for i in 1 2 4 8 16; do 71 | hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.5-tests.jar TestDFSIO -write -nrFiles $i -fileSize 1GB -resFile $RESULT_DIR/hdfs_write.log 72 | hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.5-tests.jar TestDFSIO -read -nrFiles $i -fileSize 1GB -resFile $RESULT_DIR/hdfs_read.log 73 | done 74 | -------------------------------------------------------------------------------- /templates/storage/protocol/nfs.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | 8 | [ENV] 9 | module use ./software/moduledeps/gcc${gcc_version_number}/ 10 | module load ior/master 11 | 12 | [APP] 13 | app_name = ior 14 | build_dir = $IOR_PATH 15 | binary_dir = $IOR_PATH/bin 16 | case_dir = $HPCbench_ROOT/benchmark/storage/protocol/nfs_test 17 | 18 | [BUILD] 19 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/nfs_test 20 | mkdir -p $HPCbench_ROOT/result/storage/protocol/nfs_test 21 | echo "${HPCbench_ROOT}/benchmark/storage/protocol/nfs_test *(rw,no_root_squash,sync)" >> /etc/exports 22 | exportfs -r 23 | showmount -e localhost 24 | mkdir /mnt/nfs 25 | mount -t nfs localhost:${HPCbench_ROOT}/benchmark/storage/protocol/nfs_test /mnt/nfs 26 | 27 | [CLEAN] 28 | 29 | [RUN] 30 | binary = ior 31 | 32 | 33 | [BATCH] 34 | DS=`date +"%F_%H:%M:%S"` 35 | # IOR will be run in a loop, doubling the number of processes per client node 36 | # with every iteration from $SEQ -> $MAXPROCS. If SEQ=1 and MAXPROCS=8, then the 37 | # iterations will be 1, 2, 4, 8 processes per node. 38 | # SEQ and MAXPROCS should be a power of 2 (including 2^0). 39 | SEQ=1 40 | MAXPROCS=1 41 | 42 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 43 | DATA_SIZE=8 44 | 45 | BASE_DIR=/mnt/nfs 46 | NCT=1 47 | 48 | mkdir -p ${BASE_DIR} 49 | 50 | while [ ${SEQ} -le ${MAXPROCS} ]; do 51 | NPROC=`expr ${NCT} \* ${SEQ}` 52 | # Pick a reasonable block size, bearing in mind the size of the target file system. 53 | # Bear in mind that the overall data size will be block size * number of processes. 54 | # Block size must be a multiple of transfer size (-t option in command line). 55 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 56 | # Alternatively, set to a static value and let the data size increase. 57 | # BSZ="1g" 58 | # BSZ="${DATA_SIZE}" 59 | mpirun --allow-run-as-root ior -v -w -r -i 4 \ 60 | -o ${BASE_DIR}/ior-test.file \ 61 | -t 1m -b ${BSZ} | tee $HPCbench_ROOT/result/storage/protocol/nfs_test/nfs.txt 62 | SEQ=`expr ${SEQ} \* 2` 63 | done 64 | -------------------------------------------------------------------------------- /templates/storage/protocol/nfs.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | 8 | [ENV] 9 | module use ./software/moduledeps/gcc${gcc_version_number}/ 10 | module load ior/master 11 | 12 | [APP] 13 | app_name = ior 14 | build_dir = $IOR_PATH 15 | binary_dir = $IOR_PATH/bin 16 | case_dir = $HPCbench_ROOT/benchmark/storage/protocol/nfs_test 17 | 18 | [BUILD] 19 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/nfs_test 20 | mkdir -p $HPCbench_ROOT/result/storage/protocol/nfs_test 21 | echo "${HPCbench_ROOT}/benchmark/storage/protocol/nfs_test *(rw,no_root_squash,sync)" >> /etc/exports 22 | exportfs -r 23 | showmount -e localhost 24 | mkdir /mnt/nfs 25 | mount -t nfs localhost:${HPCbench_ROOT}/benchmark/storage/protocol/nfs_test /mnt/nfs 26 | 27 | [CLEAN] 28 | 29 | [RUN] 30 | binary = ior 31 | 32 | 33 | [BATCH] 34 | DS=`date +"%F_%H:%M:%S"` 35 | # IOR will be run in a loop, doubling the number of processes per client node 36 | # with every iteration from $SEQ -> $MAXPROCS. If SEQ=1 and MAXPROCS=8, then the 37 | # iterations will be 1, 2, 4, 8 processes per node. 38 | # SEQ and MAXPROCS should be a power of 2 (including 2^0). 39 | SEQ=1 40 | MAXPROCS=1 41 | 42 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 43 | DATA_SIZE=8 44 | 45 | BASE_DIR=/mnt/nfs 46 | NCT=1 47 | 48 | mkdir -p ${BASE_DIR} 49 | 50 | while [ ${SEQ} -le ${MAXPROCS} ]; do 51 | NPROC=`expr ${NCT} \* ${SEQ}` 52 | # Pick a reasonable block size, bearing in mind the size of the target file system. 53 | # Bear in mind that the overall data size will be block size * number of processes. 54 | # Block size must be a multiple of transfer size (-t option in command line). 55 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 56 | # Alternatively, set to a static value and let the data size increase. 57 | # BSZ="1g" 58 | # BSZ="${DATA_SIZE}" 59 | mpirun --allow-run-as-root ior -v -w -r -i 4 \ 60 | -o ${BASE_DIR}/ior-test.file \ 61 | -t 1m -b ${BSZ} | tee $HPCbench_ROOT/result/storage/protocol/nfs_test/nfs.txt 62 | SEQ=`expr ${SEQ} \* 2` 63 | done 64 | -------------------------------------------------------------------------------- /templates/storage/protocol/nfs_environment.md: -------------------------------------------------------------------------------- 1 | # 3. 部署NFS(nfs协议) 2 | 3 | ```bash 4 | # 1. 安装nfs-utils和rpcbind 5 | $ yum install -y nfs-utils rpcbind 6 | 7 | # 2. 启用nfs和rpcbind 8 | $ systemctl enable --now rpcbind 9 | $ systemctl enable --now nfs-server 10 | 11 | # 3. 配置要共享的文件夹 12 | $ vim /etc/exports 13 | /dssg/test/protocol_test/nfs_data *(rw,no_root_squash,sync) 14 | #此文件的配置格式为:<输出目录> [客户端1 选项(访问权限,用户映射,其他)] [客户端2 选项(访问权限,用户映射,其他)] 15 | #rw read-write,可读写; 16 | #sync:文件同时写入硬盘和内存; 17 | #async:文件暂存于内存,而不是直接写入内存; 18 | #no_root_squash:NFS客户端连接服务端时如果使用的是root的话,那么对服务端分享的目录来说,也拥有root权限。显然开启这项是不安全的。 19 | #root_squash:NFS客户端连接服务端时如果使用的是root的话,那么对服务端分享的目录来说,拥有匿名用户权限,通常他将使用nobody或nfsnobody身份; 20 | #all_squash:不论NFS客户端连接服务端时使用什么用户,对服务端分享的目录来说都是拥有匿名用户权限; 21 | #anonuid:匿名用户的UID值,通常是nobody或nfsnobody,可以在此处自行设定; 22 | #anongid:匿名用户的GID值。 23 | $ exportfs -r 24 | 25 | # 4. 列出nfs共享目录 26 | $ showmount -e localhost 27 | 28 | # 5. 挂载nfs 29 | $ mkdir /mnt/nfs 30 | $ mount -t nfs localhost:/dssg/test/protocol_test/nfs_data /mnt/nfs 31 | ``` 32 | -------------------------------------------------------------------------------- /templates/storage/protocol/nfs_environment.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 1. 安装nfs-utils和rpcbind 4 | #yum install -y nfs-utils rpcbind 5 | 6 | # 2. 启用nfs和rpcbind 7 | #systemctl enable --now rpcbind 8 | #systemctl enable --now nfs-server 9 | 10 | # 3. 配置要共享的文件夹 11 | mkdir -p /dssg/test/protocol_test/nfs_data 12 | echo "/dssg/test/protocol_test/nfs_data *(rw,no_root_squash,sync)" >> /etc/exports 13 | 14 | #此文件的配置格式为:<输出目录> [客户端1 选项(访问权限,用户映射,其他)] [客户端2 选项(访问权限,用户映射,其他)] 15 | #rw read-write,可读写; 16 | #sync:文件同时写入硬盘和内存; 17 | #async:文件暂存于内存,而不是直接写入内存; 18 | #no_root_squash:NFS客户端连接服务端时如果使用的是root的话,那么对服务端分享的目录来说,也拥有root权限。显然开启这项是不安全的。 19 | #root_squash:NFS客户端连接服务端时如果使用的是root的话,那么对服务端分享的目录来说,拥有匿名用户权限,通常他将使用nobody或nfsnobody身份; 20 | #all_squash:不论NFS客户端连接服务端时使用什么用户,对服务端分享的目录来说都是拥有匿名用户权限; 21 | #anonuid:匿名用户的UID值,通常是nobody或nfsnobody,可以在此处自行设定; 22 | #anongid:匿名用户的GID值。 23 | exportfs -r 24 | 25 | # 4. 列出nfs共享目录 26 | showmount -e localhost 27 | 28 | # 5. 挂载nfs 29 | mkdir /mnt/nfs 30 | mount -t nfs localhost:/dssg/test/protocol_test/nfs_data /mnt/nfs 31 | -------------------------------------------------------------------------------- /templates/storage/protocol/posix.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | 8 | [ENV] 9 | module use ./software/moduledeps/gcc${gcc_version_number}/ 10 | module load ior/master 11 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/posix_test 12 | mkdir -p $HPCbench_ROOT/result/storage/protocol/posix_test 13 | 14 | [APP] 15 | app_name = ior 16 | build_dir = $IOR_PATH 17 | binary_dir = $IOR_PATH/bin 18 | case_dir = $HPCbench_ROOT/benchmark/storage/protocol/posix_test 19 | 20 | [BUILD] 21 | 22 | [CLEAN] 23 | 24 | [RUN] 25 | binary = ior 26 | 27 | 28 | [BATCH] 29 | # Node count -- not very accurate 30 | #NCT=`grep -v ^# hfile |wc -l` 31 | 32 | # Date Stamp for benchmark 33 | DS=`date +"%F_%H:%M:%S"` 34 | # IOR will be run in a loop, doubling the number of processes per client node 35 | # with every iteration from $SEQ -> $MAXPROCS. If SEQ=1 and MAXPROCS=8, then the 36 | # iterations will be 1, 2, 4, 8 processes per node. 37 | # SEQ and MAXPROCS should be a power of 2 (including 2^0). 38 | SEQ=1 39 | MAXPROCS=1 40 | 41 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 42 | DATA_SIZE=8 43 | 44 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/protocol/posix_test 45 | RESULT_DIR=$HPCbench_ROOT/result/storage/protocol/posix_test 46 | NCT=1 47 | 48 | mkdir -p ${BASE_DIR} 49 | 50 | while [ ${SEQ} -le ${MAXPROCS} ]; do 51 | NPROC=`expr ${NCT} \* ${SEQ}` 52 | # Pick a reasonable block size, bearing in mind the size of the target file system. 53 | # Bear in mind that the overall data size will be block size * number of processes. 54 | # Block size must be a multiple of transfer size (-t option in command line). 55 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 56 | # Alternatively, set to a static value and let the data size increase. 57 | # BSZ="1g" 58 | # BSZ="${DATA_SIZE}" 59 | mpirun --allow-run-as-root ior -v -w -r -i 4 \ 60 | -o ${BASE_DIR}/ior-test.file \ 61 | -t 1m -b ${BSZ} | tee $RESULT_DIR/posix.txt 62 | SEQ=`expr ${SEQ} \* 2` 63 | done 64 | -------------------------------------------------------------------------------- /templates/storage/protocol/posix.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | 8 | [ENV] 9 | module use ./software/moduledeps/gcc${gcc_version_number}/ 10 | module load ior/master 11 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/posix_test 12 | mkdir -p $HPCbench_ROOT/result/storage/protocol/posix_test 13 | 14 | [APP] 15 | app_name = ior 16 | build_dir = $IOR_PATH 17 | binary_dir = $IOR_PATH/bin 18 | case_dir = $HPCbench_ROOT/benchmark/storage/protocol/posix_test 19 | 20 | [BUILD] 21 | 22 | [CLEAN] 23 | 24 | [RUN] 25 | binary = ior 26 | 27 | 28 | [BATCH] 29 | # Node count -- not very accurate 30 | #NCT=`grep -v ^# hfile |wc -l` 31 | 32 | # Date Stamp for benchmark 33 | DS=`date +"%F_%H:%M:%S"` 34 | # IOR will be run in a loop, doubling the number of processes per client node 35 | # with every iteration from $SEQ -> $MAXPROCS. If SEQ=1 and MAXPROCS=8, then the 36 | # iterations will be 1, 2, 4, 8 processes per node. 37 | # SEQ and MAXPROCS should be a power of 2 (including 2^0). 38 | SEQ=1 39 | MAXPROCS=1 40 | 41 | # Overall data set size in GiB. Must be >=MAXPROCS. Should be a power of 2. 42 | DATA_SIZE=8 43 | 44 | BASE_DIR=$HPCbench_ROOT/benchmark/storage/protocol/posix_test 45 | RESULT_DIR=$HPCbench_ROOT/result/storage/protocol/posix_test 46 | NCT=1 47 | 48 | mkdir -p ${BASE_DIR} 49 | 50 | while [ ${SEQ} -le ${MAXPROCS} ]; do 51 | NPROC=`expr ${NCT} \* ${SEQ}` 52 | # Pick a reasonable block size, bearing in mind the size of the target file system. 53 | # Bear in mind that the overall data size will be block size * number of processes. 54 | # Block size must be a multiple of transfer size (-t option in command line). 55 | BSZ=`expr ${DATA_SIZE} / ${SEQ}`"g" 56 | # Alternatively, set to a static value and let the data size increase. 57 | # BSZ="1g" 58 | # BSZ="${DATA_SIZE}" 59 | mpirun --allow-run-as-root ior -v -w -r -i 4 \ 60 | -o ${BASE_DIR}/ior-test.file \ 61 | -t 1m -b ${BSZ} | tee $RESULT_DIR/posix.txt 62 | SEQ=`expr ${SEQ} \* 2` 63 | done 64 | -------------------------------------------------------------------------------- /templates/storage/protocol/warp.aarch64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | minio/latest https://dl.min.io/server/minio/release/linux-arm64/minio 6 | warp/v0.6.7 https://github.com/minio/warp/releases/download/v0.6.7/warp_Linux_arm64.tar.gz 7 | 8 | [DEPENDENCY] 9 | chmod +x $HPCbench_DOWNLOAD/minio 10 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/mino_data 11 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/mino 12 | mkdir -p $HPCbench_ROOT/result/storage/protocol/mino 13 | cp $HPCbench_DOWNLOAD/minio $HPCbench_ROOT/benchmark/storage/protocol/mino/ 14 | 15 | nohup $HPCbench_ROOT/benchmark/storage/protocol/mino/minio server $HPCbench_ROOT/benchmark/storage/protocol/mino_data/minio_data/ & 16 | tar -xzf $HPCbench_DOWNLOAD/warp_Linux_arm64.tar.gz -C $HPCbench_ROOT/benchmark/storage/protocol/mino/ 17 | 18 | [ENV] 19 | export WARP_HOME=$HPCbench_ROOT/benchmark/storage/protocol/mino 20 | export PATH=:$PATH:$WARP_HOME 21 | export RESULT_DIR=$HPCbench_ROOT/result/storage/protocol/mino 22 | 23 | [APP] 24 | app_name = warp 25 | build_dir = $WARP_HOME 26 | binary_dir = $WARP_HOME 27 | case_dir = $WARP_HOME 28 | 29 | [BUILD] 30 | 31 | [CLEAN] 32 | 33 | [RUN] 34 | binary = warp 35 | run = echo 36 | nodes = 1 37 | 38 | [BATCH] 39 | for i in 16 32 64 128 256; do 40 | warp get --host=localhost:9000 --access-key=minioadmin --secret-key=minioadmin --autoterm --concurrent $i | tee $RESULT_DIR/warp.txt 41 | done 42 | -------------------------------------------------------------------------------- /templates/storage/protocol/warp.x86_64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | minio/latest https://dl.min.io/server/minio/release/linux-amd64/minio 6 | warp/v0.6.7 https://github.com/minio/warp/releases/download/v0.6.7/warp_Linux_x86_64.tar.gz 7 | 8 | [DEPENDENCY] 9 | chmod +x $HPCbench_DOWNLOAD/minio 10 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/mino_data 11 | mkdir -p $HPCbench_ROOT/benchmark/storage/protocol/mino 12 | mkdir -p $HPCbench_ROOT/result/storage/protocol/mino 13 | cp $HPCbench_DOWNLOAD/minio $HPCbench_ROOT/benchmark/storage/protocol/mino/ 14 | 15 | nohup $HPCbench_ROOT/benchmark/storage/protocol/mino/minio server $HPCbench_ROOT/benchmark/storage/protocol/mino_data/minio_data/ & 16 | tar -xzf $HPCbench_DOWNLOAD/warp_Linux_x86_64.tar.gz -C $HPCbench_ROOT/benchmark/storage/protocol/mino/ 17 | 18 | [ENV] 19 | export WARP_HOME=$HPCbench_ROOT/benchmark/storage/protocol/mino 20 | export PATH=:$PATH:$WARP_HOME 21 | export RESULT_DIR=$HPCbench_ROOT/result/storage/protocol/mino 22 | 23 | [APP] 24 | app_name = warp 25 | build_dir = $WARP_HOME 26 | binary_dir = $WARP_HOME 27 | case_dir = $WARP_HOME 28 | 29 | [BUILD] 30 | 31 | [CLEAN] 32 | 33 | [RUN] 34 | binary = warp 35 | run = echo 36 | nodes = 1 37 | 38 | [BATCH] 39 | for i in 16 32 64 128 256; do 40 | warp get --host=localhost:9000 --access-key=minioadmin --secret-key=minioadmin --autoterm --concurrent $i | tee $RESULT_DIR/warp.txt 41 | done 42 | -------------------------------------------------------------------------------- /templates/system/system.linux64.config: -------------------------------------------------------------------------------- 1 | [SERVER] 2 | 11.11.11.11 3 | 4 | [DOWNLOAD] 5 | 6 | [DEPENDENCY] 7 | 8 | [ENV] 9 | export RESULT_DIR=$HPCbench_ROOT/result/system 10 | export PYTHONPATH=$HPCbench_ROOT 11 | mkdir -p $RESULT_DIR 12 | 13 | [APP] 14 | app_name = system 15 | build_dir = $HPCbench_ROOT 16 | binary_dir = $HPCbench_ROOT 17 | case_dir = $HPCbench_ROOT 18 | 19 | [BUILD] 20 | 21 | [CLEAN] 22 | 23 | [RUN] 24 | binary = balance 25 | run = echo 26 | nodes = 1 27 | 28 | [BATCH] 29 | cd $RESULT_DIR 30 | exec 1>$RESULT_DIR/system.log 2>/dev/null 31 | # compute_efficiency 32 | echo "Calculating Compute_Effiency" 33 | CLUSTER_POWER={{ CLUSTER_POWER }} #w 34 | TOTAL_NODES={{ TOTAL_NODES }} 35 | 36 | TOTAL_CLUSTER_POWER=$(echo "scale=2; $CLUSTER_POWER*$TOTAL_NODES*0.875/1000"|bc) 37 | TOTAL_CLUSTER_HPL=$(python -c "from utils.result import extract_pflops;print(extract_pflops('$HPCbench_ROOT/result/compute/hpl.txt'))") #Pflops 38 | COMPUTE_EFFIENCY=$(echo "scale=2;$TOTAL_CLUSTER_HPL*1000/$TOTAL_CLUSTER_POWER"|bc) 39 | echo COMPUTE_EFFIENCY=$COMPUTE_EFFIENCY 40 | 41 | # IO_operation_rate 42 | echo "Calculating IO_OPERATION_RATE" 43 | IOPS=`cat $HPCbench_ROOT/result/storage/ior/iops.txt |grep write |awk 'NR==2 {print $3}'` 44 | STORAGE_POWER={{ STORAGE_POWER }} 45 | STORAGE_POWER=$(echo "scale=2; $STORAGE_POWER*0.8"|bc) 46 | IO_operation_rate=$(echo "scale=2; $IOPS/$STORAGE_POWER/1000"|bc) 47 | echo "IO_operation_rate=$IO_operation_rate" 48 | -------------------------------------------------------------------------------- /utils/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import re 5 | import platform 6 | 7 | from utils.tool import Tool 8 | from loguru import logger 9 | from setting import APP_CONFIG 10 | from setting import ROOT_DIR 11 | from utils.execute import Execute 12 | 13 | class Singleton(type): 14 | 15 | def __init__(self, name, bases, dictItem): 16 | super(Singleton,self).__init__(name,bases, dictItem) 17 | self._instance = None 18 | 19 | def __call__(self, *args, **kwargs): 20 | if self._instance is None: 21 | self._instance = super(Singleton,self).__call__(*args, **kwargs) 22 | return self._instance 23 | 24 | class App(object, metaclass=Singleton): 25 | # Hardware Info 26 | hosts = '' 27 | 28 | # Dependent Info 29 | dependency = '' 30 | modules = '' 31 | env_file = 'env.sh' 32 | 33 | # Application Info 34 | app_name = '' 35 | build_dir = '' 36 | binary_dir = '' 37 | case_dir = '' 38 | binary_file = '' 39 | binary_para = '' 40 | 41 | # CMD info 42 | build_cmd = '' 43 | clean_cmd = '' 44 | run_cmd = {} 45 | batch_cmd = '' 46 | job_cmd = [] 47 | 48 | # Other Info 49 | config_file = 'data.config' 50 | meta_file = '.meta' 51 | download_list = '' 52 | 53 | def __init__(self): 54 | self.tool = Tool() 55 | self.exe = Execute() 56 | self.data_process() 57 | 58 | def get_abspath(self, file): 59 | return os.path.join(ROOT_DIR, file) 60 | 61 | ''' 62 | APP_CONFIG -> .meta文件 -> data.config 63 | ''' 64 | def get_config_file(self): 65 | if APP_CONFIG is not None: 66 | logger.info(f"Load Config file from ENV: {APP_CONFIG}") 67 | return APP_CONFIG 68 | if not os.path.exists(App.meta_file): 69 | return App.config_file 70 | return self.tool.read_file(App.meta_file) 71 | 72 | def get_config_data(self): 73 | config_file = self.get_config_file() 74 | file_path = self.get_abspath(config_file) 75 | if not os.path.exists(file_path): 76 | logger.info("config file not found, switch to default data.config.") 77 | file_path = self.get_abspath(App.config_file) 78 | with open(file_path, encoding='utf-8') as file: 79 | contents = file.read() 80 | return contents.strip() 81 | 82 | def is_empty(self, content): 83 | return len(content) == 0 or content.isspace() or content == '\n' 84 | 85 | def read_rows(self, rows, start_row, needs_strip=True): 86 | data = '' 87 | row = rows[start_row] 88 | if needs_strip: 89 | row = row.strip() 90 | while not row.startswith('['): 91 | if not self.is_empty(row): 92 | data += row + '\n' 93 | start_row += 1 94 | if start_row == len(rows): 95 | break 96 | row = rows[start_row] 97 | if needs_strip: 98 | row = row.strip() 99 | return start_row, data 100 | 101 | def read_rows_kv(self, rows, start_row): 102 | data = {} 103 | row = rows[start_row].strip() 104 | while not row.startswith('['): 105 | if '=' in row: 106 | key, value = row.split('=', 1) 107 | data[key.strip()] = value.strip() 108 | start_row += 1 109 | if start_row == len(rows): 110 | break 111 | row = rows[start_row].strip() 112 | return start_row, data 113 | 114 | def set_app_info(self, data): 115 | App.app_name = data['app_name'] 116 | App.build_dir = data['build_dir'] 117 | App.binary_dir = data['binary_dir'] 118 | App.case_dir = data['case_dir'] 119 | 120 | def split_two_part(self, data): 121 | split_list = data.split(' ', 1) 122 | first = split_list[0] 123 | second = '' 124 | if len(split_list) > 1: 125 | second = split_list[1] 126 | return (first, second) 127 | 128 | def data_integration(self, config_data): 129 | App.hosts = config_data.get('[SERVER]','') 130 | App.download_list = config_data.get('[DOWNLOAD]','') 131 | App.dependency = self.tool.chomp_cmd(config_data.get('[DEPENDENCY]',''), flag=False) 132 | App.modules = config_data.get('[ENV]','') 133 | App.build_cmd = config_data.get('[BUILD]','') 134 | App.clean_cmd = config_data.get('[CLEAN]','') 135 | App.run_cmd = config_data.get('[RUN]','') 136 | App.batch_cmd = config_data.get('[BATCH]','') 137 | data = config_data.get('[APP]','') 138 | self.set_app_info(data) 139 | App.binary_file, App.binary_para = self.split_two_part(App.run_cmd['binary']) 140 | 141 | def data_process(self): 142 | contents = self.get_config_data() 143 | rows = contents.split('\n') 144 | rowIndex = 0 145 | handlers = { 146 | '[SERVER]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), 147 | '[DOWNLOAD]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), 148 | '[DEPENDENCY]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), 149 | '[ENV]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), 150 | '[APP]': lambda rows, rowIndex: self.read_rows_kv(rows, rowIndex+1), 151 | '[BUILD]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), 152 | '[CLEAN]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1), 153 | '[RUN]': lambda rows, rowIndex: self.read_rows_kv(rows, rowIndex+1), 154 | '[BATCH]': lambda rows, rowIndex: self.read_rows(rows, rowIndex+1, False), 155 | } 156 | config_data = {} 157 | while rowIndex < len(rows): 158 | row = rows[rowIndex].strip() 159 | if row in handlers.keys(): 160 | rowIndex, config_data[row] = handlers[row](rows, rowIndex) 161 | else: 162 | rowIndex += 1 163 | self.data_integration(config_data) 164 | self.get_jobs(contents) 165 | 166 | def get_env(self): 167 | cmd = f''' 168 | source ./init.sh 169 | ./hpcbench -e 170 | source ./{App.env_file} 171 | ''' 172 | return self.tool.chomp_cmd(cmd, flag=True) 173 | 174 | def source_env(self): 175 | print(f"Set environment for {App.app_name}") 176 | env_file = os.path.join(ROOT_DIR, App.env_file) 177 | self.tool.write_file(env_file, App.modules) 178 | print(f"ENV FILE {App.env_file} GENERATED.") 179 | self.exe.exec_raw(f'chmod +x {App.env_file}', show=False) 180 | 181 | def get_run_cmd(self): 182 | hostfile = '' 183 | nodes = int(App.run_cmd['nodes']) 184 | if nodes > 1: 185 | hostfile = f'--hostfile {ROOT_DIR}/hostfile' 186 | cmd = App.run_cmd['run'] 187 | if 'mpi' in cmd: 188 | cmd = cmd.replace('mpirun', f'mpirun {hostfile}') 189 | binary = os.path.join(App.binary_dir, App.binary_file) 190 | cmd = f'{cmd} {binary} {App.binary_para}' 191 | cmd = f'''\ 192 | {self.get_env()} 193 | \ncd {App.case_dir} 194 | \n{cmd} 195 | ''' 196 | run_cmd = self.tool.chomp_cmd(cmd) 197 | return run_cmd 198 | 199 | def get_jobs(self, contents): 200 | pattern = re.compile(r'(^\[)', re.M) 201 | all_starts = [match.start() for match in pattern.finditer(contents)] 202 | start_tag = '[JOB' 203 | 204 | i = 1 205 | while True: 206 | job_start = contents.find(start_tag + str(i)) 207 | if job_start == -1: 208 | break 209 | job_end = contents.find(start_tag + str(i+1)) 210 | if job_end == -1: 211 | for k, index in list(map(lambda x: (x > job_start, all_starts.index(x)), all_starts)): 212 | if k: 213 | job_end = all_starts[index] 214 | break 215 | else: 216 | job_end = len(contents) 217 | 218 | job_content = contents[job_start+len(start_tag + str(i))+1:job_end].strip() 219 | i += 1 220 | 221 | App.job_cmd.append(job_content) 222 | -------------------------------------------------------------------------------- /utils/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from utils.app import App 4 | from utils.execute import Execute 5 | from utils.tool import Tool 6 | 7 | 8 | class Build: 9 | def __init__(self): 10 | self.app = App() 11 | self.exe = Execute() 12 | self.tool = Tool() 13 | 14 | def clean(self): 15 | print(f"Start clean {App.app_name}") 16 | cmd = f'''\ 17 | {self.app.get_env()} 18 | \ncd {App.build_dir} 19 | \n{App.clean_cmd} 20 | ''' 21 | clean_cmd = self.tool.chomp_cmd(cmd) 22 | clean_file = 'clean.sh' 23 | self.tool.write_file(clean_file, clean_cmd) 24 | run_cmd = f'''\ 25 | chmod +x {clean_file} 26 | bash ./{clean_file} 27 | ''' 28 | self.exe.exec_raw(run_cmd) 29 | 30 | def build(self): 31 | print(f"Start build {App.app_name}") 32 | cmd = f'''\ 33 | {self.app.get_env()} 34 | \ncd {App.build_dir} 35 | \n{App.build_cmd} 36 | ''' 37 | build_cmd = self.tool.chomp_cmd(cmd) 38 | build_file = 'build.sh' 39 | self.tool.write_file(build_file, build_cmd) 40 | run_cmd = f'''\ 41 | chmod +x {build_file} 42 | bash ./{build_file} 43 | ''' 44 | self.exe.exec_raw(run_cmd) 45 | 46 | -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | from utils.app import App 5 | from utils.execute import Execute 6 | from utils.tool import Tool 7 | from setting import ROOT_DIR 8 | 9 | class Config: 10 | def __init__(self): 11 | self.exe = Execute() 12 | self.tool = Tool() 13 | self.meta_path = os.path.join(ROOT_DIR, App.meta_file) 14 | 15 | def switch_config(self, config_file): 16 | print(f"Switch config file to {config_file}") 17 | config_path = os.path.join(ROOT_DIR, config_file) 18 | if not os.path.isfile(config_path): 19 | print("config_path not found, switch failed.") 20 | return 21 | self.tool.write_file(self.meta_path, config_file.strip()) 22 | print("Successfully switched. config file saved in file .meta") 23 | 24 | -------------------------------------------------------------------------------- /utils/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | from utils.app import App 5 | from utils.execute import Execute 6 | from utils.tool import Tool 7 | from setting import ROOT_DIR 8 | from pprint import pprint 9 | 10 | class Download: 11 | def __init__(self): 12 | self.app = App() 13 | self.exe = Execute() 14 | self.tool = Tool() 15 | self.download_list = self.tool.gen_list(App.download_list) 16 | self.download_path = os.path.join(ROOT_DIR, 'downloads') 17 | self.package_path = os.path.join(ROOT_DIR, 'package') 18 | 19 | def gen_wget_url(self, out_dir='./downloads', url='', filename=''): 20 | head = "wget --no-check-certificate" 21 | file_path = os.path.join(out_dir, filename) 22 | download_url = f'{head} {url} -O {file_path}' 23 | print(download_url) 24 | return download_url 25 | 26 | def download(self): 27 | print(f"start download") 28 | filename_url_map = {} 29 | self.tool.mkdirs(self.download_path) 30 | download_flag = False 31 | # create directory 32 | for url_info in self.download_list: 33 | url_list = url_info.split(' ') 34 | if len(url_list) < 2: 35 | continue 36 | url_link = url_list[1].strip() 37 | filename = os.path.basename(url_link) 38 | if len(url_list) == 3: 39 | filename = url_list[2].strip() 40 | filename_url_map[filename] = url_link 41 | 42 | pprint(filename_url_map) 43 | # start download 44 | for filename, url in filename_url_map.items(): 45 | download_flag = True 46 | file_path = os.path.join(self.download_path, filename) 47 | if os.path.exists(file_path): 48 | self.tool.prt_content(f"FILE {filename} already DOWNLOADED") 49 | continue 50 | download_url = self.gen_wget_url(self.download_path, url, filename) 51 | self.tool.prt_content("DOWNLOAD " + filename) 52 | output = os.popen(download_url) 53 | data = output.read() 54 | output.close() 55 | 56 | if not download_flag: 57 | print("The download list is empty!") 58 | -------------------------------------------------------------------------------- /utils/execute.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import subprocess 5 | import threading 6 | from loguru import logger 7 | from datetime import datetime 8 | from utils.tool import Tool 9 | 10 | 11 | class CommandExecutionException(Exception): 12 | def __init__(self, cmd: str, exit_code: int) -> None: 13 | super().__init__(f"command executed fail with exit-code={exit_code}: {cmd}") 14 | 15 | 16 | class TextReadLineThread(threading.Thread): 17 | def __init__(self, readline, callback, *args, **kargs) -> None: 18 | super().__init__(*args, **kargs) 19 | self.readline = readline 20 | self.callback = callback 21 | 22 | def run(self): 23 | for line in iter(self.readline, ""): 24 | if len(line) == 0: 25 | break 26 | self.callback(line) 27 | 28 | 29 | class Execute: 30 | def __init__(self): 31 | self.cur_time = None 32 | self.end_time = None 33 | self.flags = '\n' + '*' * 80 34 | self.tool = Tool() 35 | 36 | def print_cmd(self, cmd): 37 | self.cur_time = self.tool.get_time_stamp() 38 | print(f"RUNNING AT {self.cur_time}:") 39 | cmd = self.flags + '\n' + cmd + self.flags 40 | return cmd 41 | 42 | # Execute, get output and don't know whether success or not 43 | def exec_popen(self, cmd): 44 | output = os.popen(f"bash -c '{cmd}'").readlines() 45 | return output 46 | 47 | def get_duration(self): 48 | time1 = datetime.strptime(self.cur_time, "%Y-%m-%d %H:%M:%S") 49 | time2 = datetime.strptime(self.end_time, "%Y-%m-%d %H:%M:%S") 50 | seconds = (time2 - time1).seconds 51 | return seconds 52 | 53 | def cmd_exec(self, cmd: str, ensure_success: bool=True, show: bool=True) -> int: 54 | if show: logger.info("executing command: {}".format(self.print_cmd(cmd))) 55 | 56 | process = subprocess.Popen( 57 | cmd, 58 | shell=True, 59 | text=True, 60 | executable="bash", 61 | stdout=subprocess.PIPE, 62 | stderr=subprocess.PIPE, 63 | ) 64 | 65 | if show: logger.debug("started command") 66 | 67 | def log_warp(func): 68 | def _wrapper(line: str): 69 | return func("\t" + line.rstrip()) 70 | return _wrapper 71 | 72 | read_stdout = TextReadLineThread(process.stdout.readline, log_warp(logger.info)) 73 | read_stderr = TextReadLineThread(process.stderr.readline, log_warp(logger.warning)) 74 | read_stdout.start() 75 | read_stderr.start() 76 | 77 | read_stdout.join() 78 | read_stderr.join() 79 | ret = process.wait() 80 | if show: logger.debug("process finish") 81 | 82 | logger.info("executed command with exit-code={}".format(ret)) 83 | if ensure_success and ret != 0: 84 | raise CommandExecutionException(cmd=cmd, exit_code=ret) 85 | return ret 86 | 87 | # Execute, get whether success or not 88 | def exec_sys(self, cmd): 89 | cmd = self.tool.chomp_cmd(cmd) 90 | logger.info("executing command: {}".format(self.print_cmd(cmd))) 91 | state = os.system(f"bash -c '{cmd}'") 92 | self.end_time = self.tool.get_time_stamp() 93 | print(f"total time used: {self.get_duration()}s") 94 | if state: 95 | print(f"failed at {self.end_time}:{state}".upper()) 96 | return False 97 | else: 98 | print(f"successfully executed at {self.end_time}, congradulations!!!".upper()) 99 | return True 100 | 101 | def exec_raw(self, cmd, ensure_success=False, show=True): 102 | cmd = self.tool.chomp_cmd(cmd) 103 | state = self.cmd_exec(cmd, ensure_success, show) 104 | self.end_time = self.tool.get_time_stamp() 105 | if state: 106 | if show: print(f"failed at {self.end_time}:{state}".upper()) 107 | return False 108 | else: 109 | if show: print(f"successfully executed at {self.end_time}, congradulations!!!".upper()) 110 | return True 111 | -------------------------------------------------------------------------------- /utils/install.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import sys 5 | import re 6 | import fnmatch 7 | from enum import Enum 8 | from glob import glob 9 | 10 | from utils.app import App 11 | from utils.tool import Tool 12 | from utils.execute import Execute 13 | from utils.tool import JSON 14 | from setting import ROOT_DIR 15 | 16 | class SType(Enum): 17 | COMPILER = 1 18 | MPI = 2 19 | UTIL = 3 20 | LIB = 4 21 | 22 | class Install: 23 | def __init__(self): 24 | self.app = App() 25 | self.exe = Execute() 26 | self.tool = Tool() 27 | self.PACKAGE = 'package' 28 | self.FULL_VERSION='fullver' 29 | self.PACKAGE_PATH = os.path.join(ROOT_DIR, self.PACKAGE) 30 | self.SOFTWARE_PATH = os.path.join(ROOT_DIR, 'software') 31 | self.INSTALL_INFO_PATH = os.path.join(self.SOFTWARE_PATH, "install.json") 32 | self.COMPILER_PATH = os.path.join(self.SOFTWARE_PATH, 'compiler') 33 | self.LIBS_PATH = os.path.join(self.SOFTWARE_PATH, 'libs') 34 | self.MODULE_DEPS_PATH = os.path.join(self.SOFTWARE_PATH, 'moduledeps') 35 | self.MODULE_FILES = os.path.join(self.SOFTWARE_PATH, 'modulefiles') 36 | self.MPI_PATH = os.path.join(self.SOFTWARE_PATH, 'mpi') 37 | self.UTILS_PATH = os.path.join(self.SOFTWARE_PATH, 'utils') 38 | self.json = JSON(self.INSTALL_INFO_PATH) 39 | 40 | def get_version_info(self, info, reg = r'(\d+)\.(\d+)\.(\d+)'): 41 | matched_group = re.search(reg ,info) 42 | if not matched_group: 43 | return None 44 | mversion = matched_group.group(1) 45 | mid_ver = matched_group.group(2) 46 | last_ver = matched_group.group(3) 47 | return ( mversion, f'{mversion}.{mid_ver}.{last_ver}') 48 | 49 | def gen_compiler_dict(self, cname, version): 50 | return {"cname": cname, "cmversion": version[0], self.FULL_VERSION: version[1]} 51 | 52 | def gen_mpi_dict(self, name, version): 53 | return {"name": name, "mversion": version[0], self.FULL_VERSION: version[1]} 54 | 55 | # some command don't generate output, must redirect to a tmp file 56 | def get_cmd_output(self, cmd): 57 | tmp_path = os.path.join(ROOT_DIR, 'tmp') 58 | tmp_file = os.path.join(tmp_path, 'tmp.txt') 59 | self.tool.mkdirs(tmp_path) 60 | cmd += f' &> {tmp_file}' 61 | self.exe.exec_popen(cmd) 62 | info_list = self.tool.read_file(tmp_file).split('\n') 63 | return info_list 64 | 65 | def get_gcc_info(self): 66 | gcc_info_list = self.get_cmd_output('gcc -v') 67 | gcc_info = gcc_info_list[-1].strip() 68 | version = self.get_version_info(gcc_info) 69 | if not version: 70 | print("GCC not found, please install gcc first") 71 | sys.exit() 72 | name = 'gcc' 73 | if 'kunpeng' in gcc_info.lower(): 74 | name = 'kgcc' 75 | return self.gen_compiler_dict(name, version) 76 | 77 | def get_clang_info(self): 78 | clang_info_list = self.get_cmd_output('clang -v') 79 | clang_info = clang_info_list[0].strip() 80 | version = self.get_version_info(clang_info) 81 | if not version: 82 | print("clang not found, please install clang first") 83 | sys.exit() 84 | name = 'clang' 85 | if 'bisheng' in clang_info.lower(): 86 | name = 'bisheng' 87 | return self.gen_compiler_dict(name, version) 88 | 89 | def get_nvc_info(self): 90 | return self.gen_compiler_dict("nvc", ('11', "11.4")) 91 | 92 | def get_icc_info(self): 93 | return self.gen_compiler_dict("icc", ('2018', "2018.4")) 94 | 95 | def get_hmpi_version(self): 96 | ucg_path = self.get_cmd_output('whereis ucg_info')[0] 97 | if ucg_path == "ucg_info:": 98 | ucg_path = self.get_cmd_output('which ucx_info')[0] 99 | else: 100 | ucg_path = self.get_cmd_output('which ucg_info')[0] 101 | ver_0 = ('1','1.3.0') 102 | ver_dict = {('2','2.0.0'): 'ver_0'} 103 | ucg_path = os.path.dirname(ucg_path) 104 | ucg_path = os.path.dirname(ucg_path) 105 | libucg_path = os.path.join(ucg_path, "lib") 106 | libucg_so_flag = "libucg.so." 107 | version = None 108 | for file_name in os.listdir(libucg_path): 109 | if libucg_so_flag in file_name: 110 | version = self.get_version_info(file_name) 111 | if version in ver_dict: 112 | return ver_0 113 | elif version: 114 | break 115 | return version 116 | 117 | def get_hmpi_info(self): 118 | hmpi_info = self.get_cmd_output('which ucx_info')[0] 119 | if hmpi_info == "": 120 | return None 121 | name = 'hmpi' 122 | version = self.get_hmpi_version() 123 | return self.gen_mpi_dict(name, version) 124 | 125 | def get_openmpi_info(self): 126 | mpi_info_list = self.get_cmd_output('mpirun -version') 127 | mpi_info = mpi_info_list[0].strip() 128 | name = 'openmpi' 129 | version = self.get_version_info(mpi_info) 130 | if not version: 131 | return None 132 | return self.gen_mpi_dict(name, version) 133 | 134 | def get_mpich_info(self): 135 | mpi_info_list = self.get_cmd_output('mpirun -version') 136 | mpi_info = "".join(mpi_info_list).strip() 137 | name = 'mpich' 138 | if name not in mpi_info: 139 | return None 140 | version = self.get_version_info(mpi_info) 141 | if not version: 142 | return None 143 | return self.gen_mpi_dict(name, version) 144 | 145 | def get_mpi_info(self): 146 | mpich_info = self.get_mpich_info() 147 | if mpich_info: 148 | return mpich_info 149 | hmpi_info = self.get_hmpi_info() 150 | if hmpi_info: 151 | return hmpi_info 152 | openmpi_info = self.get_openmpi_info() 153 | if openmpi_info: 154 | return openmpi_info 155 | print("MPI not found, please install MPI first.") 156 | sys.exit() 157 | 158 | def check_software_path(self, software_path): 159 | abs_software_path = os.path.join(self.PACKAGE_PATH, software_path) 160 | if not os.path.exists(abs_software_path): 161 | print(f"{software_path} not exist, Are you sure the software lies in package dir?") 162 | return False 163 | return abs_software_path 164 | 165 | def check_compiler_mpi(self, compiler_list, compiler_mpi_info): 166 | no_compiler = ["COM","ANY"] 167 | is_valid = False 168 | compiler_mpi_info = compiler_mpi_info.upper() 169 | valid_list = [] 170 | for compiler in compiler_list: 171 | valid_list.append(compiler) 172 | valid_list.append(f'{compiler}+MPI') 173 | valid_list += no_compiler 174 | for valid_para in valid_list: 175 | if compiler_mpi_info == valid_para: 176 | is_valid = True 177 | break 178 | if not is_valid: 179 | print(f"compiler or mpi info error, Only {valid_list.join('/').lower()} is supported") 180 | return False 181 | return compiler_mpi_info 182 | 183 | def get_used_compiler(self, compiler_mpi_info): 184 | return compiler_mpi_info.split('+')[0] 185 | 186 | def get_software_type(self,software_name, compiler_mpi_info): 187 | if self.is_mpi_software(software_name): 188 | return SType.MPI 189 | if compiler_mpi_info == "COM": 190 | return SType.COMPILER 191 | elif compiler_mpi_info == "ANY": 192 | return SType.UTIL 193 | else: 194 | return SType.LIB 195 | 196 | def get_suffix(self, software_info_list): 197 | if len(software_info_list) >= 3: 198 | return software_info_list[2] 199 | return "" 200 | 201 | def get_software_info(self, software_path, compiler_mpi_info): 202 | software_info_list = software_path.split('/') 203 | software_name = software_info_list[0] 204 | software_version = software_info_list[1] 205 | software_main_version = self.get_main_version(software_version) 206 | software_type = self.get_software_type(software_name, compiler_mpi_info) 207 | software_info = { 208 | "sname":software_name, 209 | "sversion": software_version, 210 | "mversion": software_main_version, 211 | "type" : software_type, 212 | "suffix": self.get_suffix(software_info_list) 213 | } 214 | if software_type == SType.LIB or software_type == SType.MPI: 215 | software_info["is_use_mpi"] = self.is_contained_mpi(compiler_mpi_info) 216 | software_info["use_compiler"] = self.get_used_compiler(compiler_mpi_info) 217 | return software_info 218 | 219 | def get_compiler_info(self, compilers, compiler_mpi_info): 220 | compiler_info = {"cname":None, "cmversion": None, self.FULL_VERSION: None} 221 | for compiler, info_func in compilers.items(): 222 | if compiler in compiler_mpi_info: 223 | compiler_info = info_func() 224 | return compiler_info 225 | 226 | def get_main_version(self, version): 227 | return version.split('.')[0] 228 | 229 | def is_mpi_software(self, software_name): 230 | mpis = ['hmpi', 'openmpi', 'hpcx', 'mpich'] 231 | for mpi in mpis: 232 | if software_name.startswith(mpi): 233 | return True 234 | return False 235 | 236 | def add_mpi_path(self, software_info, install_path): 237 | if not software_info['is_use_mpi']: 238 | return install_path 239 | mpi_info = self.get_mpi_info() 240 | if mpi_info[self.FULL_VERSION] == None: 241 | print("MPI not found!") 242 | return False 243 | mpi_str = mpi_info["name"]+mpi_info[self.FULL_VERSION] 244 | print("Use MPI: "+mpi_str) 245 | install_path = os.path.join(install_path, mpi_str) 246 | return install_path 247 | 248 | def get_install_path(self, software_info, env_info): 249 | suffix = software_info['suffix'] 250 | sversion = software_info['sversion'] 251 | stype = software_info['type'] 252 | cname = env_info['cname'] 253 | cfullver = env_info[self.FULL_VERSION] 254 | if suffix != "": 255 | software_info['sname'] += '-' + suffix 256 | sname = software_info['sname'] 257 | if stype == SType.MPI: 258 | return os.path.join(self.MPI_PATH, f"{sname}{sversion}-{cname}{cfullver}", sversion) 259 | if stype == SType.COMPILER: 260 | install_path = os.path.join(self.COMPILER_PATH, f'{sname}/{sversion}') 261 | elif stype == SType.UTIL: 262 | install_path = os.path.join(self.UTILS_PATH, f'{sname}/{sversion}') 263 | else: 264 | # install library 265 | install_path = os.path.join(self.LIBS_PATH, cname+cfullver) 266 | # get mpi name and version 267 | install_path = self.add_mpi_path(software_info, install_path) 268 | install_path = os.path.join(install_path, f'{sname}/{sversion}') 269 | return install_path 270 | 271 | def is_contained_mpi(self, compiler_mpi_info): 272 | return "MPI" in compiler_mpi_info 273 | 274 | def get_files(self, abs_path): 275 | file_list = [d for d in glob(abs_path+'/**', recursive=True)] 276 | return file_list 277 | 278 | def get_module_file_content(self, install_path, sname, sversion): 279 | module_file_content = '' 280 | file_list = self.get_files(install_path) 281 | bins_dir_type = ["bin"] 282 | libs_dir_type = ["libs", "lib", "lib64"] 283 | incs_dir_type = ["include"] 284 | bins_dir = [] 285 | libs_dir = [] 286 | incs_dir = [] 287 | bins_str = '' 288 | libs_str = '' 289 | incs_str = '' 290 | opal_prefix = '' 291 | for file in file_list: 292 | if not os.path.isdir(file): 293 | continue 294 | last_dir = file.split('/')[-1] 295 | if last_dir in bins_dir_type: 296 | bins_dir.append(file.replace(install_path, "$prefix")) 297 | elif last_dir in libs_dir_type: 298 | libs_dir.append(file.replace(install_path, "$prefix")) 299 | elif last_dir in incs_dir_type: 300 | incs_dir.append(file.replace(install_path, "$prefix")) 301 | if len(bins_dir) >= 1: 302 | bins_str = "prepend-path PATH "+':'.join(bins_dir) 303 | if len(libs_dir) >= 1: 304 | libs_str = "prepend-path LD_LIBRARY_PATH "+':'.join(libs_dir) 305 | if len(incs_dir) >= 1: 306 | incs_str = "prepend-path INCLUDE " + ':'.join(incs_dir) 307 | if self.is_mpi_software(sname): 308 | opal_prefix = f"setenv OPAL_PREFIX {install_path}" 309 | module_file_content = f'''#%Module1.0##################################################################### 310 | set prefix {install_path} 311 | set version {sversion} 312 | 313 | setenv {sname.upper().replace('-','_')}_PATH {install_path} 314 | {opal_prefix} 315 | {bins_str} 316 | {libs_str} 317 | {incs_str} 318 | ''' 319 | return module_file_content 320 | 321 | def is_installed(self, install_path): 322 | #为了兼容老版本,只要安装路径下存在installed也算做已安装 323 | installed_file_path = os.path.join(install_path, "installed") 324 | if self.tool.read_file(installed_file_path) == "1": 325 | return True 326 | return self.json.query_data(install_path) 327 | 328 | def gen_module_file(self, install_path, software_info, env_info): 329 | sname = software_info['sname'] 330 | sversion = software_info['sversion'] 331 | stype = software_info['type'] 332 | cname = env_info['cname'] 333 | cfullversion = env_info[self.FULL_VERSION] 334 | module_file_content = self.get_module_file_content(install_path, sname, sversion) 335 | if not self.is_installed(install_path): 336 | return '' 337 | # if install_path is empty, The module file should not generated. 338 | if len(os.listdir(install_path)) == 1: 339 | print('module file did not generated because no file generated under install path') 340 | return '' 341 | if stype == SType.MPI: 342 | compiler_str = cname + cfullversion 343 | software_str = sname + sversion 344 | module_path = os.path.join(self.MODULE_DEPS_PATH, compiler_str ,sname) 345 | attach_module_path = os.path.join(self.MODULE_DEPS_PATH, compiler_str+'-'+software_str) 346 | self.tool.mkdirs(attach_module_path) 347 | module_file_content += f"\nprepend-path MODULEPATH {attach_module_path}" 348 | print(f'attach module file {attach_module_path} successfully generated.') 349 | else: 350 | if stype == SType.COMPILER: 351 | software_str = sname + sversion 352 | module_path = os.path.join(self.MODULE_FILES, sname) 353 | attach_module_path = os.path.join(self.MODULE_DEPS_PATH, software_str) 354 | self.tool.mkdirs(attach_module_path) 355 | module_file_content += f"\nprepend-path MODULEPATH {attach_module_path}" 356 | print(f'attach module file {attach_module_path} successfully generated.') 357 | elif stype == SType.UTIL: 358 | module_path = os.path.join(self.MODULE_FILES, sname) 359 | else: 360 | compiler_str = cname + cfullversion 361 | if software_info['is_use_mpi']: 362 | mpi_info = self.get_mpi_info() 363 | mpi_str = mpi_info['name'] + mpi_info[self.FULL_VERSION] 364 | module_path = os.path.join(self.MODULE_DEPS_PATH, f"{compiler_str}-{mpi_str}" ,sname) 365 | else: 366 | module_path = os.path.join(self.MODULE_DEPS_PATH, compiler_str, sname) 367 | self.tool.mkdirs(module_path) 368 | module_file = os.path.join(module_path, sversion) 369 | self.tool.write_file(module_file, module_file_content) 370 | print(f"module file {module_file} successfully generated") 371 | row = self.json.query_data(install_path) 372 | row["module_path"] = module_file 373 | self.json.update_data(install_path, row) 374 | self.json.write_file() 375 | 376 | def install_package(self, abs_software_path, install_path, other_args): 377 | install_script = 'install.sh' 378 | install_script_path = os.path.join(abs_software_path, install_script) 379 | print("start installing..."+ abs_software_path) 380 | if not os.path.exists(install_script_path): 381 | print("install script not exists, skipping...") 382 | return 383 | self.tool.mkdirs(install_path) 384 | if self.is_installed(install_path): 385 | print("already installed, skipping...") 386 | return 387 | #argparse无法解析前缀为-的参数,所以参数使用双单引号,这里要去除单引号 388 | other_args = [x.replace('\'','') for x in other_args] 389 | other_args_uni = ' '.join(other_args) 390 | #print(other_args) 391 | install_cmd = f''' 392 | source ./init.sh 393 | cd {abs_software_path} 394 | chmod +x {install_script} 395 | bash ./{install_script} {install_path} {other_args_uni} 396 | ''' 397 | result = self.exe.exec_raw(install_cmd) 398 | if result: 399 | print(f"install to {install_path} successful") 400 | else: 401 | print("install failed") 402 | sys.exit() 403 | 404 | def add_install_info(self, software_info, install_path): 405 | software_dict = {} 406 | software_dict['name'] = software_info['sname'] 407 | software_dict['version'] = software_info['sversion'] 408 | software_dict['module_path'] = '' 409 | self.json.add_data(install_path, software_dict) 410 | self.json.write_file() 411 | 412 | def remove_prefix(self, software_path): 413 | if software_path.startswith('package/') or software_path.startswith('./'): 414 | software_path = software_path.replace('./', '', 1) 415 | software_path = software_path.replace('package/', '', 1) 416 | return software_path 417 | 418 | def install(self, install_args): 419 | software_path = install_args[0] 420 | compiler_mpi_info = install_args[1] 421 | other_args = install_args[2:] 422 | self.tool.prt_content("INSTALL " + software_path) 423 | compilers = {"GCC":self.get_gcc_info, "CLANG":self.get_clang_info, 424 | "NVC":self.get_nvc_info, "ICC":self.get_icc_info, 425 | "BISHENG":self.get_clang_info} 426 | software_path = self.remove_prefix(software_path) 427 | # software_path should exists 428 | abs_software_path = self.check_software_path(software_path) 429 | if not abs_software_path: return 430 | compiler_mpi_info = self.check_compiler_mpi(compilers.keys(), compiler_mpi_info) 431 | if not compiler_mpi_info: return 432 | software_info = self.get_software_info(software_path, compiler_mpi_info) 433 | stype = software_info['type'] 434 | # get compiler name and version 435 | env_info = self.get_compiler_info(compilers, compiler_mpi_info) 436 | if stype == SType.LIB or stype == SType.MPI: 437 | cmversion = env_info['cmversion'] 438 | cfullver = env_info[self.FULL_VERSION] 439 | if cmversion == None: 440 | print(f"The specified {software_info['use_compiler']} Compiler not found!") 441 | return False 442 | else: 443 | print(f"Use Compiler: {env_info['cname']} {cfullver}") 444 | 445 | # get install path 446 | install_path = self.get_install_path(software_info, env_info) 447 | if not install_path: return 448 | # get install script 449 | self.install_package(abs_software_path, install_path, other_args) 450 | # add install info 451 | self.add_install_info(software_info, install_path) 452 | # gen module file 453 | self.gen_module_file( install_path, software_info, env_info) 454 | 455 | def install_depend(self): 456 | depend_file = 'depend_install.sh' 457 | print(f"start installing dependendcy of {App.app_name}") 458 | depend_content = f''' 459 | source ./init.sh 460 | {App.dependency} 461 | ''' 462 | self.tool.write_file(depend_file, depend_content) 463 | run_cmd = f''' 464 | chmod +x {depend_file} 465 | bash ./{depend_file} 466 | ''' 467 | self.exe.exec_sys(run_cmd) 468 | 469 | def remove(self, software_info): 470 | self.tool.prt_content("UNINSTALL " + software_info) 471 | remove_list = [] 472 | installed_dict = self.json.read_file() 473 | for path, software_row in installed_dict.items(): 474 | if software_info in software_row['name']: 475 | remove_list.append((path, software_row)) 476 | lens = len(remove_list) 477 | if lens == 0: 478 | print("software not installed") 479 | return 480 | choice = 1 481 | if lens > 1: 482 | for i in range(lens): 483 | print(f"{i+1}: {remove_list[i][1]}") 484 | self.tool.prt_content("") 485 | choice = input(f"find {lens} software, which one do you want to remove?\n") 486 | try: 487 | choice = int(choice) 488 | if choice > lens or choice < 1: 489 | print("invalid choice!") 490 | return 491 | except: 492 | sys.exit("please enter a valid number!") 493 | self.json.delete_data(remove_list[choice-1][0]) 494 | self.json.write_file() 495 | print("Successfully remove "+software_info) 496 | 497 | def list(self): 498 | self.tool.prt_content("Installed list".upper()) 499 | installed_list = self.json.read_file() 500 | if len(installed_list) == 0: 501 | print("no software installed.") 502 | return 503 | # 获取所有列名,除了module_path 504 | headers = list(installed_list.values())[0].keys() 505 | print_headers = list(headers)[:-1] 506 | # 打印表头 507 | table_str = "" 508 | for header in print_headers: 509 | table_str += f"{header:<10}" 510 | # 添加path打印 511 | table_str += " path" 512 | # 分割线 513 | table_str += "\n" + "-" * (10 * (len(print_headers)+1)) + "\n" 514 | # 打印每行数据 515 | for path, row in installed_list.items(): 516 | for key in print_headers: 517 | table_str += f"{row[key]:<10} " 518 | table_str += f"{path:<10} \n" 519 | print(table_str) 520 | 521 | def find(self, content): 522 | self.tool.prt_content(f"Looking for package {content}") 523 | installed_list = list(self.json.read_file().values()) 524 | for row in installed_list: 525 | if content in row['name']: 526 | print(row) 527 | 528 | # update path when hpcrunner is translocation 529 | def update(self): 530 | file1_list = [d for d in glob(self.MODULE_FILES+'/**', recursive=True)] 531 | file2_list= [d for d in glob(self.MODULE_DEPS_PATH+'/**', recursive=True)] 532 | file_list = file1_list+file2_list 533 | module_list = [] 534 | for file in file_list: 535 | if not os.path.isdir(file): 536 | module_list.append(file) 537 | for file in module_list: 538 | content = self.tool.read_file(file) 539 | search_old_path = re.search(r'prefix +(.*hpcrunner(-master)?)', content) 540 | if search_old_path: 541 | content = content.replace(search_old_path.group(1), ROOT_DIR) 542 | self.tool.write_file(file, content) 543 | #还要更新install list 544 | install_info = self.tool.read_file(self.INSTALL_INFO_PATH) 545 | search_old_path = re.search(r'(\/.*hpcrunner(-master)?)', install_info) 546 | if search_old_path: 547 | content = install_info.replace(search_old_path.group(1), ROOT_DIR) 548 | self.tool.write_file(self.INSTALL_INFO_PATH, content) 549 | print("update successfully.") 550 | 551 | def check_download_url(self): 552 | # 查找指定目录下所有名字叫做install.sh的文件,将文件路径保存到列表中 553 | matches = [] 554 | for root, dirnames, filenames in os.walk(self.PACKAGE_PATH): 555 | for filename in fnmatch.filter(filenames, 'install.sh'): 556 | matches.append(os.path.join(root, filename)) 557 | # 定义匹配下载链接的正则表达式 558 | url_regex = r'(https?://\S+\.[zip|rar|tar|gz|bz|git]{2,3})' 559 | for script in matches: 560 | script_content = self.tool.read_file(script) 561 | urls = re.findall(url_regex, script_content) 562 | print(f"checking script {script}") 563 | for url in urls: 564 | if self.tool.check_url_isvalid(url): 565 | print(f"url {url} successfully checked") 566 | else: 567 | print(f"url {url} check failed,please update") 568 | print("all of the urls has been checked.") 569 | -------------------------------------------------------------------------------- /utils/invoke.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import sys 5 | from utils.tool import Tool 6 | from utils.execute import Execute 7 | from utils.app import App 8 | from setting import ROOT_DIR, HPCbench_BENCHMARK 9 | 10 | 11 | class Run: 12 | def __init__(self): 13 | self.app = App() 14 | self.exe = Execute() 15 | self.tool = Tool() 16 | self.hosts_list = self.tool.gen_list(App.hosts) 17 | 18 | def gen_hostfile(self, nodes): 19 | length = len(self.hosts_list) 20 | if nodes > length: 21 | print(f"You don't have {nodes} nodes, only {length} nodes available!") 22 | sys.exit() 23 | if nodes <= 1: 24 | return 25 | gen_nodes = '\n'.join(self.hosts_list[:nodes]) 26 | print(f"HOSTFILE GENERATED:\n{gen_nodes}\n") 27 | self.tool.write_file('hostfile', gen_nodes) 28 | 29 | # single run 30 | def run(self): 31 | print(f"Start run {App.app_name}") 32 | nodes = int(App.run_cmd['nodes']) 33 | self.gen_hostfile(nodes) 34 | run_cmd = self.app.get_run_cmd() 35 | print(run_cmd) 36 | self.exe.exec_raw(run_cmd) 37 | 38 | def batch_run(self): 39 | batch_file = os.path.join(ROOT_DIR, 'batch_run.sh') 40 | print(f"Start batch run {App.app_name}") 41 | cmd = f'''\ 42 | {self.app.get_env()} 43 | \ncd {App.case_dir} 44 | \n{App.batch_cmd} 45 | ''' 46 | batch_content = self.tool.chomp_cmd(cmd) 47 | self.tool.write_file(batch_file, batch_content) 48 | run_cmd = f'''\ 49 | chmod +x {batch_file} 50 | bash {batch_file} 51 | ''' 52 | self.exe.exec_sys(run_cmd) 53 | 54 | def job_run(self): 55 | print(f"Start job run {App.app_name}") 56 | jobs_dir = HPCbench_BENCHMARK+'/jobs' 57 | self.tool.mkdirs(jobs_dir) 58 | for i, job_cmd in enumerate(App.job_cmd): 59 | job_file = os.path.join(jobs_dir, f'job_{App.app_name}_run{i}.sh') 60 | cmd = f'''\ 61 | {job_cmd} 62 | ''' 63 | job_content = self.tool.chomp_cmd(cmd) 64 | self.tool.write_file(job_file, job_content) 65 | run_cmd = f'''\ 66 | {self.app.get_env()} 67 | sbatch {job_file} 68 | ''' 69 | self.exe.exec_raw(run_cmd) 70 | 71 | -------------------------------------------------------------------------------- /utils/machine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from utils.execute import Execute 4 | from utils.tool import Tool 5 | 6 | class Machine: 7 | def __init__(self): 8 | self.exe = Execute() 9 | self.tool = Tool() 10 | self.info2cmd = { 11 | 'CHECK network adapter':'nmcli d', 12 | 'CHECK Machine Bits':'getconf LONG_BIT', 13 | 'CHECK OS':'cat /proc/version && uname -a', 14 | 'CHECK GPU': 'lspci | grep -i nvidia', 15 | 'CHECK Total Memory':'cat /proc/meminfo | grep MemTotal', 16 | 'CHECK Total Disk Memory':'fdisk -l | grep Disk', 17 | 'CHECK CPU info': 'cat /proc/cpuinfo | grep "processor" | wc -l && lscpu && dmidecode -t 4' 18 | } 19 | 20 | def get_info(self, content, cmd): 21 | self.tool.prt_content(content) 22 | self.exe.exec_raw(cmd) 23 | 24 | def output_machine_info(self): 25 | print("get machine info") 26 | for key, value in self.info2cmd.items(): 27 | self.get_info(key, value) 28 | -------------------------------------------------------------------------------- /utils/report_tmp.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Report 7 | 8 | 66 | 67 | 68 | 69 |
70 |
71 |

HPCbenchmarks 测试报告

72 |
73 |
74 |
集群:{{ name }}
75 |
报告日期:{{ time }}
76 |
77 |
78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 |
维度指标实测值参考值指标分数权重维度分
计算性能HPL双精度浮点计算性能(PFLOPS) {{ "%.2f"|format(test.compute.HPL) }} {{ compute.HPL[scale] }} {{ "%.2f"|format(compute.HPL.score) }} {{ compute.HPL.weights }} {{ "%.2f"|format(compute.issue_score) }}
HPCG双精度浮点计算性能(GFLOPS) {{ "%.2f"|format(test.compute.HPCG) }} {{ compute.HPCG[scale] }} {{ "%.2f"|format(compute.HPCG.score) }} {{ compute.HPCG.weights }}
AI计算性能图像推理任务的计算性能(Fig/s) {{ "%.2f"|format(test.AI.infering) }} {{ AI.infering[scale] }} {{ "%.2f"|format(AI.infering.score) }} {{ AI.infering.weights }} {{ "%.2f"|format(AI.issue_score) }}
图像训练任务的计算性能(Fig/s) {{ "%.2f"|format(test.AI.training) }} {{ AI.training[scale] }} {{ "%.2f"|format(AI.training.score) }} {{ AI.training.weights }}
存储性能文件系统单客户端单流带宽(GB/s) {{ "%.2f"|format(test.storage.single_client_single_fluence) }} {{ storage.single_client_single_fluence[scale] }} {{ "%.2f"|format(storage.single_client_single_fluence.score) }} {{ storage.single_client_single_fluence.weights }} {{ "%.2f"|format(storage.issue_score) }}
文件系统单客户端多流带宽(GB/s) {{ "%.2f"|format(test.storage.single_client_multi_fluence) }} {{ storage.single_client_multi_fluence[scale] }} {{ "%.2f"|format(storage.single_client_multi_fluence.score) }} {{ storage.single_client_multi_fluence.weights }}
文件系统聚合带宽(GB/s) {{ "%.2f"|format(test.storage.aggregation_bandwidth) }} {{ storage.aggregation_bandwidth[scale] }} {{ "%.2f"|format(storage.aggregation_bandwidth.score) }} {{ storage.aggregation_bandwidth.weights }}
文件系统聚合IO操作速率(IOPS) {{ "%d"|format(test.storage.IO_rate) }} {{ storage.IO_rate[scale] }} {{ "%.2f"|format(storage.IO_rate.score) }} {{ storage.IO_rate.weights }}
多协议平均访问效率(%) {{ "%.2f"|format(test.storage.multi_request) }} {{ storage.multi_request[scale] }} {{ "%.2f"|format(storage.multi_request.score) }} {{ storage.multi_request.weights }}
网络性能点对点网络带宽(Gbps) {{ "%.2f"|format(test.network.P2P_network_bandwidth) }} {{ network.P2P_network_bandwidth[scale] }} {{ "%.2f"|format(network.P2P_network_bandwidth.score) }} {{ network.P2P_network_bandwidth.weights }} {{ "%.2f"|format(network.issue_score) }}
点对点消息延迟(μs) {{ test.network.P2P_message_latency }} {{ network.P2P_message_latency[scale] }} {{ "%.2f"|format(network.P2P_message_latency.score) }} {{ network.P2P_message_latency.weights }}
网络对分带宽与注入带宽比值 {{ "%.2f"|format(test.network.ratio) }} {{ network.ratio[scale] }} {{ "%.2f"|format(network.ratio.score) }} {{ network.ratio.weights }}
系统能效单位功耗的浮点计算性能(FLOPS/W) {{ "%.2f"|format(test.system.compute_efficiency) }} {{ system.compute_efficiency[scale] }} {{ "%.2f"|format(system.compute_efficiency.score) }} {{ system.compute_efficiency.weights }} {{ "%.2f"|format(system.issue_score) }}
单位功耗的文件系统聚合IO操作速率(TB/W) {{ "%.2f"|format(test.system.IO_operation_rate) }} {{ system.IO_operation_rate[scale] }} {{ "%.2f"|format(system.IO_operation_rate.score) }} {{ system.IO_operation_rate.weights }}
系统平衡性内存容量与处理器核心数的比值 {{ "%.2f"|format(test.balance.mem2cpu) }} {{ balance.mem2cpu[scale] }} {{ "%.2f"|format(balance.mem2cpu.score) }} {{ balance.mem2cpu.weights }} {{ "%.2f"|format(balance.issue_score) }}
BurstBuffer与内存的容量比 {{ "%.2f"|format(test.balance.buffer2mem) }} {{ balance.buffer2mem[scale] }} {{ "%.2f"|format(balance.buffer2mem.score) }} {{ balance.buffer2mem.weights }}
并行文件系统与BurstBuffer的容量比 {{ "%.2f"|format(test.balance.file2buffer) }} {{ balance.file2buffer[scale] }} {{ "%.2f"|format(balance.file2buffer.score) }} {{ balance.file2buffer.weights }}
内存与BurstBuffer的带宽比 {{ "%.2f"|format(test.balance.mem2buffer) }} {{ balance.mem2buffer[scale] }} {{ "%.2f"|format(balance.mem2buffer.score) }} {{ balance.mem2buffer.weights }}
BurstBuffer与并行文件系统的带宽比 {{ "%.2f"|format(test.balance.buffer2file) }} {{ balance.buffer2file[scale] }} {{ "%.2f"|format(balance.buffer2file.score) }} {{ balance.buffer2file.weights }}
238 |
239 |
240 |
241 |
集群评价:
242 |

243 | • 该集群HPL性能{{ test.compute.HPL }}PF,属于{{ scale_CN }}型系统
244 | {% if (good | length > 0) and (better | length > 0) %} 245 | • 集群在{{ good }}方面性能较好,在{{ better }}方面有待提高
246 | {% elif (good | length == 0) and (better | length > 0) %} 247 | • 集群在{{ better }}方面有待提高
248 | {% else %} 249 | • 集群在{{ good }}方面性能较好
250 | {% endif %} 251 | • 集群的综合分数为{{ "%.2f"|format(sum_score) }}分
252 |

253 |
254 |
255 | 260 | 261 | 262 | 263 | -------------------------------------------------------------------------------- /utils/result.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from utils.tool import Tool 4 | from pprint import pprint 5 | 6 | tool = Tool() 7 | 8 | class Vividict(dict): 9 | def __missing__(self, key): 10 | value = self[key] = type(self)() 11 | return value 12 | def walk(self): 13 | for key, value in self.items(): 14 | if isinstance(value, Vividict): 15 | for tup in value.walk(): 16 | yield (key,) + tup 17 | else: 18 | yield key, value 19 | 20 | def extract_pflops(file): 21 | content = tool.read_lines(file) 22 | content = content[::-1] 23 | pflops_value = 0 24 | for i in range(len(content)): 25 | if "Time" in content[i]: 26 | match = re.search(r"\d+\.\d+e[+-]\d+", content[i-2]) 27 | if match: 28 | gflops_scientific = match.group(0) 29 | pflops_value = float(gflops_scientific)/1e6 30 | break 31 | return pflops_value 32 | 33 | def extract_number(pattern, file): 34 | content = tool.read_file(file) 35 | match = re.search(pattern, content) 36 | if match: 37 | number = match.group(1) 38 | return float(number) 39 | else: 40 | return 0 41 | 42 | def extract_number_multline(pattern, file): 43 | content = tool.read_file(file) 44 | match = re.search(pattern, content, re.MULTILINE) 45 | if match: 46 | number = match.group(1) 47 | return float(number) 48 | else: 49 | return 0 50 | 51 | def extract_after_get_operation(file): 52 | number = 0 53 | content = tool.read_lines(file) 54 | for i in range(len(content)): 55 | if "Operation: GET. Concurrency: 256" in content[i]: 56 | number = re.findall(r'\d+\.\d+', content[i+1])[0] 57 | return float(number) 58 | 59 | def get_result(): 60 | result = Vividict() 61 | ## compute/HPL 62 | file = "result/compute/hpl.txt" 63 | result['compute']['HPL'] = extract_pflops(file) 64 | ## compute/HPCG 65 | file = "result/compute/hpcg.txt" 66 | pattern = r"Final Summary::HPCG result is VALID with a GFLOP/s rating of=(\d+\.?\d*)" 67 | result['compute']['HPCG'] = extract_number(pattern, file)/1e6 68 | ## AI/infering 69 | file = "result/AI/resnet.txt" 70 | pattern = r"TestScenario\.Offline qps.*?time=(\d+\.\d+)" 71 | result['AI']['infering'] = 50000/extract_number(pattern, file) 72 | ## AI/training 73 | file = "result/AI/maskrcnn.txt" 74 | pattern = r"train_perf_fps : (\d+\.\d+)" 75 | result['AI']['training'] = extract_number(pattern, file) 76 | ## storage/single_client_single_fluence 77 | file = "result/storage/ior/single_client_single_fluence.txt" 78 | pattern = r"Max Write: \d+\.\d+ MiB/sec \((\d+\.\d+) MB/sec\)" 79 | result['storage']['single_client_single_fluence'] = extract_number(pattern, file)/1024 80 | ## storage/single_client_multi_fluence 81 | file = "result/storage/ior/single_client_multi_fluence.txt" 82 | pattern = r"Max Write: \d+\.\d+ MiB/sec \((\d+\.\d+) MB/sec\)" 83 | result['storage']['single_client_multi_fluence'] = extract_number(pattern, file)/1024 84 | ## storage/aggregation_bandwidth 85 | file = "result/storage/ior/aggregation_bandwidth.txt" 86 | pattern = r"Max Write: \d+\.\d+ MiB/sec \((\d+\.\d+) MB/sec\)" 87 | result['storage']['aggregation_bandwidth'] = extract_number(pattern, file)/1024 88 | ## storage/multi_request 89 | file = "result/storage/protocol/posix_test/posix.txt" 90 | pattern = r"Max Read:.*?(\d+\.\d+).*?MB/sec" 91 | posix = extract_number(pattern, file) 92 | file = "result/storage/protocol/nfs/nfs.txt" 93 | pattern = r"Max Read:.*?(\d+\.\d+).*?MB/sec" 94 | nfs = extract_number(pattern, file) 95 | file = "result/storage/protocol/mino/s3_read.log" 96 | s3 = extract_after_get_operation(file) 97 | file = "result/storage/protocol/hadoop/hdfs_read.log" 98 | pattern = r'Throughput mb/sec: +(\d+\.\d+)' 99 | hdfs = extract_number(pattern, file) 100 | result['storage']['multi_request'] = ((nfs/posix) + (s3/posix) + (hdfs/posix)) / 3.0 * 100 101 | ## storage/IO_rate 102 | file = "result/storage/ior/iops.txt" 103 | pattern = r"^write\s+\d+\.?\d*\s+(\d+)" 104 | result['storage']['IO_rate'] = extract_number_multline(pattern, file) 105 | ## network/P2P_network_bandwidth 106 | file = "result/network/osu_bibw.log" 107 | pattern = r'4194304\s+(\d+\.\d+)' 108 | result['network']['P2P_network_bandwidth'] = extract_number(pattern, file)/1024*8 109 | ## network/P2P_message_latency 110 | file = "result/network/osu_latency.log" 111 | pattern = r'8\s+(\d+\.\d+)' 112 | result['network']['P2P_message_latency'] = '1/' + str(extract_number(pattern, file)) 113 | ## network/ratio 114 | result['network']['ratio'] = 0.5 115 | ## system/compute_efficiency 116 | file = "result/system/system.log" 117 | pattern = r'COMPUTE_EFFIENCY=(\d+\.\d+)' 118 | result['system']['compute_efficiency'] = extract_number(pattern, file) 119 | ## system/IO_operation_rate 120 | pattern = r'IO_operation_rate=(\d*\.\d+)' 121 | result['system']['IO_operation_rate'] = extract_number(pattern,file) 122 | ## balance 123 | file = "result/balance/balance.log" 124 | pattern = r"\w+=([\d.]+)" 125 | matches = list(map(float, re.findall(pattern, tool.read_file(file)))) 126 | result['balance']['mem2cpu'] = matches[0] 127 | result['balance']['buffer2mem'] = matches[1] 128 | result['balance']['file2buffer'] = matches[2] 129 | result['balance']['mem2buffer'] = matches[3] 130 | result['balance']['buffer2file'] = matches[4] 131 | return result -------------------------------------------------------------------------------- /utils/scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import argparse 4 | 5 | from setting import IS_WINDOWS 6 | from loguru import logger 7 | 8 | from utils.app import App 9 | from utils.machine import Machine 10 | from utils.config import Config 11 | from utils.download import Download 12 | from utils.install import Install 13 | from utils.build import Build 14 | from utils.invoke import Run 15 | from utils.score import get_score 16 | 17 | if IS_WINDOWS: 18 | raise Exception("Sorry, not supported platform!") 19 | 20 | class Scheduler: 21 | def __init__(self, args): 22 | self.machine = Machine() 23 | self.config = Config() 24 | self.download = Download() 25 | self.install = Install() 26 | self.build = Build() 27 | self.run = Run() 28 | self.app = App() 29 | self.args = args 30 | 31 | def main(self): 32 | if self.args.version: 33 | print("v1.0") 34 | 35 | if self.args.info: 36 | self.machine.output_machine_info() 37 | 38 | if self.args.list: 39 | self.install.list() 40 | 41 | if self.args.download: 42 | self.download.download() 43 | 44 | if self.args.depend: 45 | self.install.install_depend() 46 | 47 | if self.args.install: 48 | self.install.install(self.args.install) 49 | 50 | if self.args.remove: 51 | self.install.remove(self.args.remove[0]) 52 | 53 | if self.args.find: 54 | self.install.find(self.args.find[0]) 55 | 56 | if self.args.env: 57 | self.app.source_env() 58 | 59 | if self.args.clean: 60 | self.build.clean() 61 | 62 | if self.args.build: 63 | self.build.build() 64 | 65 | if self.args.job: 66 | self.run.job_run() 67 | 68 | if self.args.run: 69 | self.run.run() 70 | 71 | if self.args.rbatch: 72 | self.run.batch_run() 73 | 74 | if self.args.use: 75 | self.config.switch_config(self.args.use[0]) 76 | 77 | if self.args.update: 78 | self.install.update() 79 | 80 | if self.args.check: 81 | self.install.check_download_url() 82 | 83 | if self.args.score: 84 | get_score() 85 | -------------------------------------------------------------------------------- /utils/score.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | import copy 4 | import time 5 | from setting import CLUSTER_SCALE, CLUSTER_NAME 6 | from utils.tool import Tool, dict_to_obj 7 | import pyecharts.options as opts 8 | from loguru import logger 9 | from jinja2 import Environment, FileSystemLoader 10 | from pyecharts.charts import Radar 11 | from utils.result import get_result 12 | 13 | test_result_file = "result/test_result.json" 14 | standard_file = "utils/standard_score.json" 15 | test_score_file = "result/test_score.json" 16 | 17 | tool = Tool() 18 | 19 | def read_file(file): 20 | with open(file, "r") as f: 21 | data = json.load(f) 22 | return data 23 | 24 | def get_evaluate(result): 25 | issue_map = {'AI':'AI计算', 'compute':'计算', 'storage':'存储', 'network':'网络', 'system':'能效', 'balance':'系统平衡性'} 26 | good = [] 27 | better = [] 28 | for k in issue_map: 29 | score = result[k].issue_score 30 | if score < 70: 31 | better.append(issue_map[k]) 32 | else: 33 | good.append(issue_map[k]) 34 | return good, better 35 | 36 | def get_score(): 37 | test_result = get_result() # read_file(test_result_file) 38 | CLUSTER_SCALE = tool.get_scale(test_result['compute']['HPL']) 39 | standard = read_file(standard_file) 40 | sum_score = 0 41 | for issue, sub_issue in standard.items(): 42 | issue_score = 1 43 | for norm, value in sub_issue.items(): 44 | try: 45 | norm_score = test_result[issue][norm] / (value[CLUSTER_SCALE] * 0.8) * 100 46 | except Exception: 47 | norm_score = eval(test_result[issue][norm]) / ((eval(value[CLUSTER_SCALE])) * 0.8) * 100 48 | if norm_score > 100: 49 | norm_score = 100 50 | standard[issue][norm]["score"] = norm_score 51 | issue_score *= math.pow(norm_score, value["weights"]) 52 | standard[issue]["issue_score"] = issue_score 53 | sum_score += issue_score / 6 54 | standard["sum_score"] = sum_score 55 | 56 | tool.write_file(test_score_file, json.dumps(standard, ensure_ascii=False)) 57 | 58 | res = dict_to_obj(standard) 59 | good, better = get_evaluate(res) 60 | 61 | data = [[round(res.compute.issue_score, 2), 62 | round(res.AI.issue_score, 2), 63 | round(res.storage.issue_score, 2), 64 | round(res.network.issue_score, 2), 65 | round(res.system.issue_score, 2), 66 | round(res.balance.issue_score, 2)]] 67 | c = ( 68 | Radar(init_opts=opts.InitOpts()) 69 | .add_schema( 70 | schema=[ 71 | opts.RadarIndicatorItem(name="计算", max_=100), 72 | opts.RadarIndicatorItem(name="AI", max_=100), 73 | opts.RadarIndicatorItem(name="存储", max_=100), 74 | opts.RadarIndicatorItem(name="网络", max_=100), 75 | opts.RadarIndicatorItem(name="能效", max_=100), 76 | opts.RadarIndicatorItem(name="平衡性", max_=100), 77 | ], 78 | splitarea_opt=opts.SplitAreaOpts( 79 | is_show=True, areastyle_opts=opts.AreaStyleOpts(opacity=1) 80 | ), 81 | textstyle_opts=opts.TextStyleOpts(color="#000000"), 82 | ) 83 | .add( 84 | series_name="Score", 85 | data=data, 86 | areastyle_opts=opts.AreaStyleOpts(color="#FF0000", opacity=0.2), 87 | ) 88 | .set_series_opts(label_opts=opts.LabelOpts(is_show=False)) 89 | .set_global_opts( 90 | title_opts=opts.TitleOpts(title=f"综合分:{res.sum_score:.2f}", pos_right=True), 91 | legend_opts=opts.LegendOpts(selected_mode="single") 92 | ) 93 | 94 | ) 95 | 96 | logger.info(f"create RadarMap for {CLUSTER_SCALE} cluster") 97 | 98 | env = Environment(loader=FileSystemLoader("./utils")) 99 | template = env.get_template('report_tmp.html') 100 | 101 | data = copy.deepcopy(standard) 102 | data['radarmap'] = c.dump_options_with_quotes() 103 | data['scale'] = CLUSTER_SCALE 104 | data['scale_CN'] = {'mini':'微', 'small':'小', 'medium':'中', 'large':'大'}[CLUSTER_SCALE] 105 | data['test'] = test_result 106 | data['good'] = '、'.join(good) 107 | data['better'] = '、'.join(better) 108 | data['time'] = time.strftime("%Y.%m.%d", time.localtime()) 109 | data['name'] = CLUSTER_NAME 110 | 111 | with open('Report.html', 'w') as f: 112 | f.write(template.render(data)) 113 | 114 | -------------------------------------------------------------------------------- /utils/standard_score.json: -------------------------------------------------------------------------------- 1 | { 2 | "compute":{ 3 | "HPL":{ 4 | "name":"HPL双精度浮点计算性能", 5 | "weights":0.6, 6 | "large":148.6, 7 | "medium":14.01, 8 | "small":6.0, 9 | "mini":0.3 10 | }, 11 | "HPCG":{ 12 | "name":"HPCG双精度浮点计算性能", 13 | "weights":0.4, 14 | "large":2725.75, 15 | "medium":355.44, 16 | "small":175, 17 | "mini":6 18 | } 19 | }, 20 | "AI":{ 21 | "infering":{ 22 | "name":"图像推理任务的计算性能", 23 | "weights":0.5, 24 | "large":2000000, 25 | "medium":1500000, 26 | "small":100000, 27 | "mini":750 28 | }, 29 | "training":{ 30 | "name":"图像训练任务的计算性能", 31 | "weights":0.5, 32 | "large":10802, 33 | "medium":254, 34 | "small":10000, 35 | "mini":560 36 | } 37 | }, 38 | "storage":{ 39 | "single_client_single_fluence":{ 40 | "name":"文件系统单客户端单流带宽", 41 | "weights":0.2, 42 | "large":8, 43 | "medium":9, 44 | "small":6, 45 | "mini":1 46 | }, 47 | "single_client_multi_fluence":{ 48 | "name":"文件系统单客户端多流带宽", 49 | "weights":0.2, 50 | "large":13, 51 | "medium":21, 52 | "small":11, 53 | "mini":5 54 | }, 55 | "aggregation_bandwidth":{ 56 | "name":"文件系统聚合带宽", 57 | "weights":0.2, 58 | "large":2500, 59 | "medium":1760, 60 | "small":200, 61 | "mini":80 62 | }, 63 | "IO_rate":{ 64 | "name":"文件系统聚合IO操作速率", 65 | "weights":0.2, 66 | "large":26000000, 67 | "medium":14000000, 68 | "small":17500000, 69 | "mini":4300000 70 | }, 71 | "multi_request":{ 72 | "name":"多协议平均访问效率", 73 | "weights":0.2, 74 | "large":62.0, 75 | "medium":64.6, 76 | "small":65, 77 | "mini":65 78 | } 79 | }, 80 | "network":{ 81 | "P2P_network_bandwidth":{ 82 | "name":"点对点网络带宽", 83 | "weights":0.4, 84 | "large":200, 85 | "medium":200, 86 | "small":200, 87 | "mini":100 88 | }, 89 | "P2P_message_latency":{ 90 | "name":"点对点消息延迟", 91 | "weights":0.3, 92 | "large":"1/1.67", 93 | "medium":"1/3.7", 94 | "small":"1/4.0", 95 | "mini":"1/2.0" 96 | }, 97 | "ratio":{ 98 | "name":"网络对分带宽与注入带宽比值", 99 | "weights":0.3, 100 | "large":1.022, 101 | "medium":2.06, 102 | "small":1.5, 103 | "mini":1 104 | } 105 | }, 106 | "system":{ 107 | "compute_efficiency":{ 108 | "name":"单位功耗的浮点计算性能", 109 | "weights":0.6, 110 | "large":14.719, 111 | "medium":3.56, 112 | "small":20, 113 | "mini":6 114 | }, 115 | "IO_operation_rate":{ 116 | "name":"单位功耗的文件系统聚合IO速率", 117 | "weights":0.4, 118 | "large":2.57, 119 | "medium":3.55, 120 | "small":200, 121 | "mini":100 122 | } 123 | }, 124 | "balance":{ 125 | "mem2cpu":{ 126 | "name":"内存容量与处理器核心数比", 127 | "weights":0.2, 128 | "large":9.64, 129 | "medium":1.66, 130 | "small":4, 131 | "mini":3.93 132 | }, 133 | "buffer2mem":{ 134 | "name":"BurstBuffer与内存的容量比", 135 | "weights":0.2, 136 | "large":3.78, 137 | "medium":2.3, 138 | "small":2, 139 | "mini":2.7 140 | }, 141 | "file2buffer":{ 142 | "name":"并行文件系统与BurstBuffer的容量比", 143 | "weights":0.2, 144 | "large":23.87, 145 | "medium":15, 146 | "small":10, 147 | "mini":17 148 | }, 149 | "mem2buffer":{ 150 | "name":"内存与BurstBuffer的带宽比", 151 | "weights":0.2, 152 | "large":6000, 153 | "medium":4000, 154 | "small":1000, 155 | "mini":125 156 | }, 157 | "buffer2file":{ 158 | "name":"BurstBuffer与并行文件系统的带宽比", 159 | "weights":0.2, 160 | "large":4, 161 | "medium":3, 162 | "small":10, 163 | "mini":5.5 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /utils/tool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import time 4 | import os 5 | import json 6 | import re 7 | import textwrap 8 | from setting import HPCbench_RESULT, ROOT_DIR, GPU_PARTITION, CPU_PARTITION, CPU_MAX_CORES, HADOOP_DATA, \ 9 | BW_BURSTBUFFER, PARA_STORAGE_PATH, TOTAL_NODES, CLUSTER_BURSTBUFFER, CLUSTER_POWER, STORAGE_POWER 10 | 11 | 12 | class Dict(dict): 13 | 14 | def __getattr__(self, key): 15 | return self.get(key) 16 | 17 | def __setattr__(self, key, value): 18 | self[key] = value 19 | 20 | def dict_to_obj(obj): 21 | if not isinstance(obj, dict): 22 | return obj 23 | d = Dict() 24 | for k, v in obj.items(): 25 | d[k] = dict_to_obj(v) 26 | return d 27 | 28 | class Tool: 29 | def __init__(self): 30 | pass 31 | 32 | def prt_content(self, content): 33 | flags = '*' * 30 34 | print(f"{flags}{content}{flags}") 35 | 36 | def gen_list(self, data): 37 | return data.strip().split('\n') 38 | 39 | def chomp_cmd(self, cmd, flag=True): 40 | if flag: 41 | cmd = textwrap.dedent(cmd) 42 | # cmd = re.sub(r'^ +', '',cmd, flags=re.MULTILINE) 43 | cmd = cmd.replace("{{ HPCbench_RESULT }}", HPCbench_RESULT) 44 | cmd = cmd.replace("{{ GPU_PARTITION }}", GPU_PARTITION) 45 | cmd = cmd.replace("{{ CPU_PARTITION }}", CPU_PARTITION) 46 | cmd = cmd.replace("{{ CPU_MAX_CORES }}", CPU_MAX_CORES) 47 | cmd = cmd.replace("{{ HADOOP_DATA }}", HADOOP_DATA) 48 | cmd = cmd.replace("{{ BW_BURSTBUFFER }}", BW_BURSTBUFFER) 49 | cmd = cmd.replace("{{ PARA_STORAGE_PATH }}", PARA_STORAGE_PATH) 50 | cmd = cmd.replace("{{ TOTAL_NODES }}", TOTAL_NODES) 51 | cmd = cmd.replace("{{ CLUSTER_BURSTBUFFER }}", CLUSTER_BURSTBUFFER) 52 | cmd = cmd.replace("{{ CLUSTER_POWER }}", CLUSTER_POWER) 53 | cmd = cmd.replace("{{ STORAGE_POWER }}", STORAGE_POWER) 54 | cmd = cmd.strip() 55 | return cmd 56 | 57 | def get_scale(self, hpl_score): 58 | CLUSTER_SCALE = None 59 | if hpl_score <= 1: 60 | CLUSTER_SCALE = 'mini' 61 | elif hpl_score <= 10: 62 | CLUSTER_SCALE = 'small' 63 | elif hpl_score > 30: 64 | CLUSTER_SCALE = 'large' 65 | else: 66 | CLUSTER_SCALE = 'medium' 67 | return CLUSTER_SCALE 68 | 69 | def get_time_stamp(self): 70 | return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 71 | 72 | def read_file(self, filename): 73 | content = '' 74 | try: 75 | with open(filename, encoding='utf-8') as f: 76 | content = f.read().strip() 77 | except IOError: 78 | return content 79 | return content 80 | 81 | def read_lines(self, filename): 82 | content = '' 83 | try: 84 | with open(filename, encoding='utf-8') as f: 85 | content = f.readlines() 86 | except IOError: 87 | return content 88 | return content 89 | 90 | def write_file(self, filename, content=""): 91 | with open(filename,'w') as f: 92 | f.write(content) 93 | 94 | def mkdirs(self, path): 95 | if not os.path.exists(path): 96 | os.makedirs(path) 97 | 98 | def mkfile(self, path, content=''): 99 | if not os.path.exists(path): 100 | self.write_file(path, content) 101 | 102 | def check_url_isvalid(self,url): 103 | import requests 104 | try: 105 | response = requests.get(url, stream=True) 106 | if response.status_code == 200: 107 | return True 108 | else: 109 | return False 110 | except requests.exceptions.RequestException as e: 111 | return False 112 | 113 | 114 | class JSON: 115 | def __init__(self, filename): 116 | self.filename = filename 117 | self.app = self.read_file() 118 | 119 | # 读取 JSON 文件 120 | def read_file(self): 121 | if not os.path.exists(self.filename): 122 | with open(self.filename, 'w') as f: 123 | f.write('{}') 124 | with open(self.filename, "r") as file: 125 | data = json.load(file) 126 | return data 127 | 128 | # 写入 JSON 文件 129 | def write_file(self): 130 | with open(self.filename, "w") as file: 131 | json.dump(self.app, file, indent=4) 132 | 133 | # 查询数据 134 | def query_data(self, key): 135 | if key in self.app: 136 | return self.app[key] 137 | else: 138 | return None 139 | 140 | # 添加数据 141 | def add_data(self, key, value): 142 | self.app[key] = value 143 | 144 | # 删除数据 145 | def delete_data(self, key): 146 | if key in self.app: 147 | del self.app[key] 148 | else: 149 | print("Key not found") 150 | 151 | # 修改数据 152 | def update_data(self, key, value): 153 | if key in self.app: 154 | self.app[key] = value 155 | else: 156 | print("Key not found") 157 | 158 | def json_transform(self, dict): 159 | return json.dumps(dict) 160 | --------------------------------------------------------------------------------