├── .github └── workflows │ └── ubuntu24_04_cuda12_9.yaml ├── .gitignore ├── COPYING ├── Makefile.am ├── README ├── README_sparc ├── autogen.sh ├── configure.ac ├── debian ├── changelog ├── compat ├── control ├── perftest.install └── rules ├── encrypt_credentials.c ├── gen_data_enc_key.c ├── man └── perftest.1 ├── perftest.spec ├── run_perftest_loopback ├── run_perftest_multi_devices ├── runme └── src ├── atomic_bw.c ├── atomic_lat.c ├── clock_test.c ├── cuda_memory.c ├── cuda_memory.h ├── cuda_utils.cu ├── get_clock.c ├── get_clock.h ├── hl_memory.c ├── hl_memory.h ├── host_memory.c ├── host_memory.h ├── memory.h ├── mlu_memory.c ├── mlu_memory.h ├── mlx5_devx.h ├── mmap_memory.c ├── mmap_memory.h ├── multicast_resources.c ├── multicast_resources.h ├── neuron_memory.c ├── neuron_memory.h ├── opencl_memory.c ├── opencl_memory.h ├── perftest_communication.c ├── perftest_communication.h ├── perftest_counters.c ├── perftest_counters.h ├── perftest_parameters.c ├── perftest_parameters.h ├── perftest_resources.c ├── perftest_resources.h ├── raw_ethernet_fs_rate.c ├── raw_ethernet_resources.c ├── raw_ethernet_resources.h ├── raw_ethernet_send_burst_lat.c ├── raw_ethernet_send_bw.c ├── raw_ethernet_send_lat.c ├── read_bw.c ├── read_lat.c ├── rocm_memory.c ├── rocm_memory.h ├── send_bw.c ├── send_lat.c ├── write_bw.c └── write_lat.c /.github/workflows/ubuntu24_04_cuda12_9.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Test perftest on Ubuntu 24.04 with CUDA 12.9 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-24.04 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v4 18 | 19 | - name: Install CUDA repository 20 | run: | 21 | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb 22 | sudo dpkg -i cuda-keyring_1.1-1_all.deb 23 | 24 | - name: Install dependencies 25 | run: | 26 | sudo apt-get update 27 | sudo apt-get install -y \ 28 | build-essential \ 29 | autoconf \ 30 | automake \ 31 | libtool \ 32 | pkg-config \ 33 | libibverbs-dev \ 34 | librdmacm-dev \ 35 | libibumad-dev \ 36 | libpci-dev \ 37 | cuda-toolkit-12-9 \ 38 | cuda-drivers 39 | 40 | - name: Set up CUDA environment 41 | run: | 42 | echo 'export PATH=/usr/local/cuda-12.9/bin:${PATH}' >> $GITHUB_ENV 43 | echo 'export LD_LIBRARY_PATH=/usr/local/cuda-12.9/lib64:${LD_LIBRARY_PATH}' >> $GITHUB_ENV 44 | 45 | - name: Run autogen.sh 46 | run: ./autogen.sh 47 | 48 | - name: Configure the build 49 | run: ./configure CUDA_H_PATH=/usr/local/cuda/include/cuda.h 50 | 51 | - name: Build perftest 52 | run: make 53 | 54 | - name: Install perftest 55 | run: sudo make install 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object Files 2 | *.o 3 | *.lo 4 | *.la 5 | 6 | # Libraries 7 | *.lib 8 | *.a 9 | .libs 10 | 11 | # Shared objects 12 | *.so 13 | *.so.* 14 | 15 | # Dependencies 16 | .dirstamp 17 | .deps 18 | 19 | # Automake 20 | Makefile 21 | Makefile.in 22 | aclocal.m4 23 | autom4te.cache/ 24 | config/ 25 | config.h 26 | config.h.in* 27 | config.log 28 | config.status 29 | configure* 30 | libtool 31 | m4/ 32 | stamp-h1 33 | 34 | # Project/Eclipse 35 | .cproject 36 | .project 37 | 38 | #Ctags 39 | tags 40 | 41 | #Programs 42 | ib_atomic_bw 43 | ib_atomic_lat 44 | ib_read_bw 45 | ib_read_lat 46 | ib_send_bw 47 | ib_send_lat 48 | ib_write_bw 49 | ib_write_lat 50 | raw_ethernet_bw 51 | raw_ethernet_lat 52 | raw_ethernet_burst_lat 53 | raw_ethernet_fs_rate 54 | 55 | #Debian Stuff 56 | debian/compat 57 | debian/*.log 58 | debian/files 59 | debian/perftest 60 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | This software is available to you under a choice of one of two 2 | licenses. You may choose to be licensed under the terms of the GNU 3 | General Public License (GPL) Version 2, available from the file 4 | COPYING in the main directory of this source tree, or the 5 | OpenIB.org BSD license below: 6 | 7 | Redistribution and use in source and binary forms, with or 8 | without modification, are permitted provided that the following 9 | conditions are met: 10 | 11 | - Redistributions of source code must retain the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer. 14 | 15 | - Redistributions in binary form must reproduce the above 16 | copyright notice, this list of conditions and the following 17 | disclaimer in the documentation and/or other materials 18 | provided with the distribution. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 24 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 25 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 26 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE. 28 | 29 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2001-2011 Mellanox Technologies Ltd. All rights reserved. 3 | # 4 | # This software is available to you under a choice of one of two 5 | # licenses. You may choose to be licensed under the terms of the GNU 6 | # General Public License (GPL) Version 2, available from the file 7 | # COPYING in the main directory of this source tree, or the 8 | # OpenIB.org BSD license below: 9 | # 10 | # Redistribution and use in source and binary forms, with or 11 | # without modification, are permitted provided that the following 12 | # conditions are met: 13 | # 14 | # - Redistributions of source code must retain the above 15 | # copyright notice, this list of conditions and the following 16 | # disclaimer. 17 | # 18 | # - Redistributions in binary form must reproduce the above 19 | # copyright notice, this list of conditions and the following 20 | # disclaimer in the documentation and/or other materials 21 | # provided with the distribution. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | # SOFTWARE. 31 | # 32 | 33 | ACLOCAL_AMFLAGS= -I m4 34 | AUTOMAKE_OPTIONS= subdir-objects 35 | 36 | noinst_LIBRARIES = libperftest.a 37 | libperftest_a_SOURCES = src/get_clock.c src/perftest_communication.c src/perftest_parameters.c src/perftest_resources.c src/perftest_counters.c src/host_memory.c src/mmap_memory.c 38 | noinst_HEADERS = src/get_clock.h src/perftest_communication.h src/perftest_parameters.h src/perftest_resources.h src/perftest_counters.h src/memory.h src/host_memory.h src/mmap_memory.h src/cuda_memory.h src/rocm_memory.h src/neuron_memory.h src/hl_memory.h src/mlu_memory.h 39 | 40 | if CUDA 41 | libperftest_a_SOURCES += src/cuda_memory.c 42 | endif 43 | 44 | if ROCM 45 | libperftest_a_SOURCES += src/rocm_memory.c 46 | endif 47 | 48 | if NEURON 49 | libperftest_a_SOURCES += src/neuron_memory.c 50 | endif 51 | 52 | if MLU 53 | libperftest_a_SOURCES += src/mlu_memory.c 54 | endif 55 | 56 | if HABANALABS 57 | libperftest_a_SOURCES += src/hl_memory.c 58 | endif 59 | 60 | if OPENCL 61 | libperftest_a_SOURCES += src/opencl_memory.c 62 | endif 63 | 64 | bin_PROGRAMS = ib_send_bw ib_send_lat ib_write_lat ib_write_bw ib_read_lat ib_read_bw ib_atomic_lat ib_atomic_bw 65 | bin_SCRIPTS = run_perftest_loopback run_perftest_multi_devices 66 | 67 | # Non-source man pages: 68 | man1_MANS = \ 69 | $(top_builddir)/man/ib_write_bw.1 \ 70 | $(top_builddir)/man/ib_read_bw.1 \ 71 | $(top_builddir)/man/ib_send_bw.1 \ 72 | $(top_builddir)/man/ib_atomic_bw.1 \ 73 | $(top_builddir)/man/ib_write_lat.1 \ 74 | $(top_builddir)/man/ib_read_lat.1 \ 75 | $(top_builddir)/man/ib_send_lat.1 \ 76 | $(top_builddir)/man/ib_atomic_lat.1 \ 77 | $(top_builddir)/man/raw_ethernet_bw.1 \ 78 | $(top_builddir)/man/raw_ethernet_lat.1 \ 79 | $(top_builddir)/man/raw_ethernet_burst_lat.1 \ 80 | $(top_builddir)/man/raw_ethernet_fs_rate.1 \ 81 | # 82 | 83 | dist_man_MANS = man/perftest.1 84 | 85 | generated_mans = $(man1_MANS) 86 | $(generated_mans): $(top_builddir)/man/%.1: man/perftest.1 87 | @if echo $(top_srcdir) | grep '^/' > /dev/null ; then \ 88 | echo ln -s $(top_srcdir)/man/perftest.1 $@ ; \ 89 | ln -s $(top_srcdir)/man/perftest.1 $@ ; \ 90 | else \ 91 | echo ln -s ../$(top_srcdir)/man/perftest.1 $@ ; \ 92 | ln -s ../$(top_srcdir)/man/perftest.1 $@ ; \ 93 | fi 94 | 95 | clean-local: 96 | -rm -f man/[ir]*.1 97 | 98 | if HAVE_RAW_ETH 99 | libperftest_a_SOURCES += src/raw_ethernet_resources.c 100 | noinst_HEADERS += src/raw_ethernet_resources.h 101 | bin_PROGRAMS += raw_ethernet_bw raw_ethernet_lat raw_ethernet_burst_lat raw_ethernet_fs_rate 102 | else 103 | libperftest_a_SOURCES += 104 | noinst_HEADERS += 105 | bin_PROGRAMS += 106 | endif 107 | 108 | if IS_FREEBSD 109 | LIBMLX4= -lmlx4 110 | else 111 | LIBMLX4= 112 | endif 113 | 114 | if HAVE_CUDART 115 | libperftest_a_SOURCES += src/cuda_utils.cu 116 | libperftest_a_LIBADD = src/cuda_utils.cu.o src/cuda_utils.cu.lo 117 | SUFFIXES= .cu 118 | %.cu.lo: %.cu.o 119 | $(NVCC) -dlink -o $@ $< -L$(CUDA_LIB_DIR) -lcudadevrt -lcudart 120 | %.cu.o: %.cu 121 | $(NVCC) -DHAVE_CONFIG_H $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) -o $@ -c $< $(NVCCFLAGS) 122 | endif 123 | 124 | 125 | ib_send_bw_SOURCES = src/send_bw.c src/multicast_resources.c src/multicast_resources.h 126 | ib_send_bw_LDADD = libperftest.a $(LIBUMAD) $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 127 | 128 | ib_send_lat_SOURCES = src/send_lat.c src/multicast_resources.c src/multicast_resources.h 129 | ib_send_lat_LDADD = libperftest.a $(LIBUMAD) $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 130 | 131 | ib_write_lat_SOURCES = src/write_lat.c 132 | ib_write_lat_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 133 | 134 | ib_write_bw_SOURCES = src/write_bw.c 135 | ib_write_bw_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 136 | 137 | ib_read_lat_SOURCES = src/read_lat.c 138 | ib_read_lat_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 139 | 140 | ib_read_bw_SOURCES = src/read_bw.c 141 | ib_read_bw_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 142 | 143 | ib_atomic_lat_SOURCES = src/atomic_lat.c 144 | ib_atomic_lat_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 145 | 146 | ib_atomic_bw_SOURCES = src/atomic_bw.c 147 | ib_atomic_bw_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 148 | 149 | if HAVE_RAW_ETH 150 | raw_ethernet_bw_SOURCES = src/raw_ethernet_send_bw.c 151 | raw_ethernet_bw_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 152 | 153 | raw_ethernet_lat_SOURCES = src/raw_ethernet_send_lat.c 154 | raw_ethernet_lat_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 155 | 156 | raw_ethernet_burst_lat_SOURCES = src/raw_ethernet_send_burst_lat.c 157 | raw_ethernet_burst_lat_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 158 | 159 | raw_ethernet_fs_rate_SOURCES = src/raw_ethernet_fs_rate.c 160 | raw_ethernet_fs_rate_LDADD = libperftest.a $(LIBMATH) $(LIBMLX4) $(LIBMLX5) $(LIBEFA) $(LIBHNS) 161 | 162 | else 163 | raw_ethernet_bw_SOURCES = 164 | raw_ethernet_bw_LDADD = 165 | 166 | raw_ethernet_lat_SOURCES = 167 | raw_ethernet_lat_LDADD = 168 | 169 | endif 170 | -------------------------------------------------------------------------------- /README_sparc: -------------------------------------------------------------------------------- 1 | ======================================= 2 | Build instructions: 3 | ======================================= 4 | 5 | # ./autogen.sh 6 | 7 | # ./configure --build= --host=sparc64-redhat-linux-gnu 8 | --target=sparc64-linux --program-prefix= --prefix=/usr 9 | --exec-prefix=/usr --bindir=/usr/bin --sbindir=/usr/sbin 10 | --sysconfdir=/etc --datadir=/usr/share --includedir=/usr/include 11 | --libdir=/usr/lib64 --libexecdir=/usr/libexec --localstatedir=/var 12 | --sharedstatedir=/var/lib --mandir=/usr/share/man 13 | --infodir=/usr/share/info 14 | 15 | # make clean && make V=1 16 | 17 | 18 | ======================================= 19 | RPM queries: 20 | ======================================= 21 | 22 | # rpm --eval %{_target} 23 | sparc64-linux 24 | 25 | # rpm --eval %{_host} 26 | sparc64-redhat-linux-gnu 27 | 28 | 29 | ======================================= 30 | GCC queries: 31 | ======================================= 32 | 33 | Get all the gcc definitions for the platform: 34 | 35 | # gcc -E -dM - < /dev/null 36 | ... 37 | #define __sparc__ 1 38 | ... 39 | #define __arch64__ 1 40 | ... 41 | 42 | 43 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | test -d ./config || mkdir ./config 4 | mkdir -p m4 config/m4 config/aux 5 | libtoolize --force --copy 6 | aclocal -I config 7 | autoheader 8 | automake --foreign --add-missing --copy 9 | autoconf 10 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | perftest (4.2-0.0) stable; urgency=low 2 | 3 | * Initial release For Ubuntu (Closes: #182805) 4 | 5 | -- Gil Rockah Sun, 29 Mar 2015 10:58:00 +0200 6 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 10 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: perftest 2 | Section: net 3 | Priority: extra 4 | Maintainer: Gil Rockah 5 | Build-Depends: debhelper (>= 10), 6 | libibumad-dev, 7 | libibverbs-dev (>= 1.1.6), 8 | libpci-dev, 9 | librdmacm-dev (>= 1.0.8) 10 | Standards-Version: 3.9.2 11 | Homepage: https://openfabrics.org/downloads/perftest/ 12 | Vcs-Git: git://git.openfabrics.org/~shamoya/perftest.git 13 | Vcs-Browser: http://git.openfabrics.org/git?p=~shamoya/perftest.git;a=summary 14 | 15 | Package: perftest 16 | Architecture: any 17 | Depends: ${misc:Depends}, ${shlibs:Depends} 18 | Description: Infiniband verbs performance tests 19 | This is a collection of tests written using Infiniband verbs intended for 20 | use as a performance micro-benchmark. The tests can measure the latency 21 | bandwidth and message rate of Infiniband or Ethernet (RoCE) fabrics. 22 | -------------------------------------------------------------------------------- /debian/perftest.install: -------------------------------------------------------------------------------- 1 | ib_atomic_bw usr/bin/ 2 | ib_atomic_lat usr/bin/ 3 | ib_read_bw usr/bin/ 4 | ib_read_lat usr/bin/ 5 | ib_send_bw usr/bin/ 6 | ib_send_lat usr/bin/ 7 | ib_write_bw usr/bin/ 8 | ib_write_lat usr/bin/ 9 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # -*- makefile -*- 3 | # Sample debian/rules that uses debhelper. 4 | # 5 | # This file was originally written by Joey Hess and Craig Small. 6 | # As a special exception, when this file is copied by dh-make into a 7 | # dh-make output file, you may use that output file without restriction. 8 | # This special exception was added by Craig Small in version 0.37 of dh-make. 9 | # 10 | # Modified to make a template file for a multi-binary package with separated 11 | # build-arch and build-indep targets by Bill Allombert 2001 12 | 13 | # Uncomment this to turn on verbose mode. 14 | #export DH_VERBOSE=1 15 | 16 | # This has to be exported to make some magic below work. 17 | export DH_OPTIONS 18 | 19 | 20 | %: 21 | dh $@ 22 | -------------------------------------------------------------------------------- /encrypt_credentials.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /* 10 | This program encrypt the credentials that is used to verify that 11 | a software communicating with the Mellanox device is authorized 12 | to manage crypto resources. 13 | */ 14 | 15 | void handleErrors(void) 16 | { 17 | ERR_print_errors_fp(stderr); 18 | abort(); 19 | } 20 | 21 | int encrypt(unsigned char *plaintext, int plaintext_len, unsigned char *kek, 22 | unsigned char *iv, unsigned char *ciphertext) 23 | { 24 | EVP_CIPHER_CTX *ctx; 25 | 26 | int len; 27 | 28 | int ciphertext_len; 29 | 30 | /* Create and initialise the context */ 31 | if (!(ctx = EVP_CIPHER_CTX_new())) 32 | handleErrors(); 33 | 34 | EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW); 35 | 36 | if (1 != EVP_EncryptInit_ex(ctx, EVP_aes_128_wrap(), NULL, kek, iv)) 37 | handleErrors(); 38 | if (1 != 39 | EVP_EncryptUpdate(ctx, ciphertext, &len, plaintext, plaintext_len)) 40 | handleErrors(); 41 | ciphertext_len = len; 42 | 43 | if (1 != EVP_EncryptFinal_ex(ctx, ciphertext + len, &len)) 44 | handleErrors(); 45 | ciphertext_len += len; 46 | 47 | /* Clean up */ 48 | EVP_CIPHER_CTX_free(ctx); 49 | 50 | return ciphertext_len; 51 | } 52 | 53 | 54 | int main (int argc , char** argv) 55 | { 56 | if(argc != 4){ 57 | fprintf(stderr, "The application must get three parameters\n"); 58 | exit(1); 59 | } 60 | /* A 128 bit kek */ 61 | unsigned char kek[16] = {}; 62 | unsigned char iv[8] = {0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6}; 63 | unsigned char credentials[40] = {}; 64 | unsigned char ciphertext[128] = {}; 65 | int ciphertext_len; 66 | char * line = NULL; 67 | size_t len = 0; 68 | ssize_t read; 69 | int index = 0; 70 | char* eptr; 71 | int i; 72 | FILE* creds_file = NULL; 73 | FILE* kek_file = NULL; 74 | FILE* encrypted_credentials = NULL; 75 | 76 | creds_file = fopen(argv[1], "r"); 77 | 78 | if(creds_file == NULL) { 79 | fprintf(stderr, "Couldn't open the credentials file\n"); 80 | exit(1); 81 | } 82 | 83 | kek_file = fopen(argv[2], "r"); 84 | 85 | if(kek_file == NULL) { 86 | fprintf(stderr, "Couldn't open key encryption key file\n"); 87 | fclose(creds_file); 88 | exit(1); 89 | } 90 | 91 | while((read = getline(&line, &len, creds_file)) != -1) { 92 | 93 | if(index >= sizeof(credentials)) { 94 | fprintf(stderr, "Invalid credentials file\n"); 95 | fclose(creds_file); 96 | fclose(kek_file); 97 | exit(1); 98 | } 99 | 100 | credentials[index] = strtol(line, &eptr, 16); 101 | index++; 102 | } 103 | 104 | fclose(creds_file); 105 | 106 | line = NULL; 107 | len = 0; 108 | index = 0; 109 | 110 | while((read = getline(&line, &len, kek_file)) != -1) { 111 | 112 | if(index >= sizeof(kek)) { 113 | fprintf(stderr, "Invalid key encryption key file\n"); 114 | fclose(kek_file); 115 | exit(1); 116 | } 117 | 118 | kek[index] = strtol(line, &eptr, 16); 119 | index++; 120 | } 121 | 122 | fclose(kek_file); 123 | 124 | ciphertext_len = 125 | encrypt(credentials, sizeof(credentials), kek, iv, ciphertext); 126 | 127 | encrypted_credentials = fopen(argv[3], "w"); 128 | 129 | if(encrypted_credentials == NULL) { 130 | fprintf(stderr, "Couldn't open the encrypted credentials file\n"); 131 | exit(1); 132 | } 133 | 134 | for(i = 0; i < ciphertext_len; i++) 135 | fprintf(encrypted_credentials, "0x%02x\n", ciphertext[i]); 136 | 137 | fclose(encrypted_credentials); 138 | 139 | return 0; 140 | } 141 | -------------------------------------------------------------------------------- /gen_data_enc_key.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /* 10 | This program generates a data encryption key that 11 | is used to encrypt data that is sent in perftest applications 12 | */ 13 | 14 | void handleErrors(void) 15 | { 16 | ERR_print_errors_fp(stderr); 17 | abort(); 18 | } 19 | 20 | int encrypt(unsigned char *plaintext, int plaintext_len, unsigned char *kek, 21 | unsigned char *iv, unsigned char *ciphertext) 22 | { 23 | EVP_CIPHER_CTX *ctx; 24 | 25 | int len; 26 | 27 | int ciphertext_len; 28 | 29 | /* Create and initialise the context */ 30 | if (!(ctx = EVP_CIPHER_CTX_new())) 31 | handleErrors(); 32 | 33 | EVP_CIPHER_CTX_set_flags(ctx, EVP_CIPHER_CTX_FLAG_WRAP_ALLOW); 34 | 35 | if (1 != EVP_EncryptInit_ex(ctx, EVP_aes_128_wrap(), NULL, kek, iv)) 36 | handleErrors(); 37 | if (1 != 38 | EVP_EncryptUpdate(ctx, ciphertext, &len, plaintext, plaintext_len)) 39 | handleErrors(); 40 | ciphertext_len = len; 41 | 42 | if (1 != EVP_EncryptFinal_ex(ctx, ciphertext + len, &len)) 43 | handleErrors(); 44 | ciphertext_len += len; 45 | 46 | /* Clean up */ 47 | EVP_CIPHER_CTX_free(ctx); 48 | 49 | return ciphertext_len; 50 | } 51 | 52 | 53 | int main (int argc , char** argv) 54 | { 55 | if(argc != 3){ 56 | fprintf(stderr, "The application should get 3 parameters\n"); 57 | exit(1); 58 | } 59 | /* A 128 bit kek */ 60 | unsigned char kek[16] = {}; 61 | 62 | /* A 64 bit IV */ 63 | unsigned char iv[8] = {0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6}; 64 | 65 | unsigned char dek[32] = {}; 66 | unsigned char ciphertext[128] = {}; 67 | int ciphertext_len; 68 | 69 | char * line = NULL; 70 | size_t len = 0; 71 | ssize_t read; 72 | int index = 0; 73 | char* eptr; 74 | int i; 75 | FILE* kek_file = NULL; 76 | FILE* dek_file = NULL; 77 | 78 | srand(time(NULL)); 79 | 80 | //random 81 | for(i = 0; i < 32; i++){ 82 | dek[i]=(rand()%256); 83 | } 84 | 85 | kek_file = fopen(argv[1] , "r"); 86 | 87 | if(kek_file == NULL){ 88 | fprintf(stderr, "Couldn't open key encryption key file\n"); 89 | exit(1); 90 | } 91 | 92 | while((read = getline(&line, &len, kek_file)) != -1) { 93 | 94 | if(index >= sizeof(kek)) { 95 | fprintf(stderr, "Invalid key encryption key file\n"); 96 | fclose(kek_file); 97 | exit(1); 98 | } 99 | 100 | kek[index] = strtol(line, &eptr, 16); 101 | index++; 102 | } 103 | 104 | fclose(kek_file); 105 | 106 | ciphertext_len = 107 | encrypt(dek, sizeof(dek), kek, iv, ciphertext); 108 | 109 | dek_file = fopen(argv[2] , "w"); 110 | 111 | if(dek_file == NULL){ 112 | fprintf(stderr, "Couldn't open data encryption key file\n"); 113 | exit(1); 114 | } 115 | 116 | for(i = 0; i < ciphertext_len; i++) 117 | fprintf(dek_file, "0x%02x\n", ciphertext[i]); 118 | 119 | fclose(dek_file); 120 | 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /perftest.spec: -------------------------------------------------------------------------------- 1 | Name: perftest 2 | Summary: IB Performance tests 3 | Version: 25.04.0 4 | Release: 0.0 5 | License: BSD 3-Clause, GPL v2 or later 6 | Group: Productivity/Networking/Diagnostic 7 | Source: http://www.openfabrics.org/downloads/%{name}-%{version}.tar.gz 8 | Url: http://www.openfabrics.org 9 | BuildRoot: %{_tmppath}/%{name}-%{version}-build 10 | BuildRequires: libibverbs-devel librdmacm-devel libibumad-devel 11 | BuildRequires: pciutils-devel 12 | 13 | %description 14 | gen3 uverbs microbenchmarks 15 | 16 | %prep 17 | %setup -q 18 | 19 | %build 20 | %configure \ 21 | %if %{?_cuda_h_path:1}0 22 | CUDA_H_PATH=%{_cuda_h_path} 23 | %endif 24 | %{__make} 25 | chmod -x runme 26 | 27 | %install 28 | rm -rf $RPM_BUILD_ROOT 29 | make DESTDIR=%{buildroot} install 30 | 31 | %clean 32 | rm -rf ${RPM_BUILD_ROOT} 33 | 34 | %files 35 | %defattr(-, root, root) 36 | %doc README COPYING runme 37 | %_bindir/* 38 | %_mandir/man1/*.1* 39 | 40 | %changelog 41 | * Wed Jan 09 2013 - idos@mellanox.com 42 | - Use autotools for building package. 43 | * Sun Dec 30 2012 - idos@mellanox.com 44 | - Added raw_ethernet_bw to install script. 45 | * Sun Oct 21 2012 - idos@mellanox.com 46 | - Removed write_bw_postlist (feature contained in all BW tests) 47 | * Sat Oct 20 2012 - idos@mellanox.com 48 | - Version 2.0 is underway 49 | * Mon May 14 2012 - idos@mellanox.com 50 | - Removed (deprecated) rdma_bw and rdma_lat tests 51 | * Thu Feb 02 2012 - idos@mellanox.com 52 | - Updated to 1.4.0 version (no compability with older version). 53 | * Thu Feb 02 2012 - idos@mellanox.com 54 | - Merge perftest code for Linux & Windows 55 | * Sun Jan 01 2012 - idos@mellanox.com 56 | - Added atomic benchmarks 57 | * Sat Apr 18 2009 - hal.rosenstock@gmail.com 58 | - Change executable names for rdma_lat and rdma_bw 59 | * Mon Jul 09 2007 - hvogel@suse.de 60 | - Use correct version 61 | * Wed Jul 04 2007 - hvogel@suse.de 62 | - Add GPL COPYING file [#289509] 63 | * Mon Jul 02 2007 - hvogel@suse.de 64 | - Update to the OFED 1.2 version 65 | * Fri Jun 22 2007 - hvogel@suse.de 66 | - Initial Package, Version 1.1 67 | -------------------------------------------------------------------------------- /run_perftest_loopback: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # trivial script to launch a loopback test on the same device 3 | # example: run_perftest_loopback 0 1 ib_write_bw -s 10 4 | os_type=$(uname) 5 | if [ "$os_type" = "FreeBSD" ]; then 6 | cpu_bind_cmd="cpuset -l" 7 | else 8 | cpu_bind_cmd="taskset -c" 9 | fi 10 | if [ $# -lt 3 ] ; then 11 | echo "" 12 | echo "Usage: run_perftest_loopback " 13 | echo "example: run_perftest_loopback 0 1 ib_write_bw -s 10" 14 | exit 3 15 | fi 16 | server_core=$1 17 | client_core=$2 18 | shift 2 19 | 20 | $cpu_bind_cmd $server_core "$@" & 21 | #give server time to start 22 | sleep 1 23 | 24 | $cpu_bind_cmd $client_core "$@" localhost 25 | 26 | status=$? 27 | 28 | wait 29 | exit $status 30 | -------------------------------------------------------------------------------- /run_perftest_multi_devices: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Script to launch a multi device test on seperate processes. 3 | 4 | GET_HELP=0 5 | HAVE_CORES=0 6 | HAVE_DEVICES=0 7 | HAVE_IB_PORTS=0 8 | HAVE_GID_INDEXES=0 9 | HAVE_REMOTE_HOST=0 10 | HAVE_CMD=0 11 | BASE_TCP_PORT=15000 12 | 13 | 14 | function force_dependencies { 15 | if [[ $GET_HELP -eq 1 ]] 16 | then 17 | print_usage 18 | exit 19 | fi 20 | 21 | #mandatory flags 22 | if [[ $HAVE_DEVICES -eq 0 ]] 23 | then 24 | echo "-d/--devices flag is mandatory" 25 | exit 26 | fi 27 | 28 | if [[ $HAVE_CMD -eq 0 ]] 29 | then 30 | echo "-C/--cmd flag is mandatory" 31 | exit 32 | fi 33 | 34 | #optional flags 35 | #check that all arguments have enough params. 36 | if [[ $HAVE_CORES -eq 1 ]] 37 | then 38 | if [[ ${#CORES_LIST[@]} -ne $EXPECTED_PARAMS ]] 39 | then 40 | echo "number of cores should be equal to number of devices (cores for each device)" 41 | exit 42 | fi 43 | fi 44 | 45 | if [[ $HAVE_IB_PORTS -eq 1 ]] 46 | then 47 | if [[ ${#IB_PORTS_LIST[@]} -ne $EXPECTED_PARAMS ]] 48 | then 49 | echo "number of ib ports should be equal to number of devices (ib_port for each device)" 50 | exit 51 | fi 52 | fi 53 | 54 | if [[ $HAVE_GID_INDEXES -eq 1 ]] 55 | then 56 | if [[ ${#GID_INDEXES_LIST[@]} -ne $EXPECTED_PARAMS ]] 57 | then 58 | echo "number of gid indexes should be equal to number of devices (gid_index for each device)" 59 | exit 60 | fi 61 | fi 62 | } 63 | 64 | function run_commands { 65 | for (( I=0 ; I < $EXPECTED_PARAMS ; I++ )) 66 | do 67 | cmd="" 68 | if [[ $HAVE_CORES -eq 1 ]] 69 | then 70 | cmd="taskset -c ${CORES_LIST[$I]}" 71 | fi 72 | 73 | #mandatory: 74 | cmd="$cmd $TEST_CMD -d ${DEVICE_LIST[$I]} -p $(($BASE_TCP_PORT+I))" 75 | 76 | #optional: 77 | if [[ $HAVE_IB_PORTS -eq 1 ]] 78 | then 79 | cmd="$cmd -i ${IB_PORTS_LIST[$I]}" 80 | fi 81 | 82 | if [[ $HAVE_GID_INDEXES -eq 1 ]] 83 | then 84 | cmd="$cmd -x ${GID_INDEXES_LIST[$I]}" 85 | fi 86 | 87 | if [[ $HAVE_REMOTE_HOST -eq 1 ]] 88 | then 89 | cmd="$cmd $REMOTE_HOST" 90 | fi 91 | 92 | if [[ $I -ne $(($EXPECTED_PARAMS-1)) ]] 93 | then 94 | cmd="$cmd &" 95 | fi 96 | 97 | eval $cmd 98 | done 99 | } 100 | 101 | function print_usage { 102 | echo -e "\nUsage:" 103 | echo " Server side: run_perftest_multi_device --devices dev1,dev2 --cmd \"\" [optional_flags]" 104 | echo " Client side: run_perftest_multi_device --devices dev1,dev2 --cmd \"\" --remote [optional_flags]" 105 | echo "" 106 | echo " ** Please make sure that does not include the on both sides. **" 107 | echo " This should be added only by --remote flag on the Client side." 108 | 109 | echo -e "\nMandatory flags:" 110 | echo " -d, --devices List of IB devices, seperated by comma. This will override '-d, --ib-dev' flag if existed in the perftest command." 111 | echo " i.e. --devices dev1,dev2" 112 | 113 | echo " -C, --cmd A valid perftest command." 114 | echo " i.e. --cmd \"ib_write_bw --size 64 --duration 3\"" 115 | 116 | echo -e "\nOptional flags:" 117 | echo " -c, --cores Pin each device to a specific core using taskset" 118 | echo " i.e. --cores 0,1 - This will pin dev1 command to core 0 and dev2 command to core 1" 119 | 120 | echo " -i, --ib_ports Choose ib_port for each device. This will override '-i, --ib-port' flag if existed in the perftest command." 121 | echo " i.e. --ib_ports 1,2 - dev1 will work with port 1 and dev2 will work with port 2" 122 | 123 | echo " -x, --gid_indexes Choose gid_index for each device. This will override '-x, --gid-index' flag if existed in the perftest command." 124 | echo " i.e. --gid_indexes 3,7 - dev1 will work with gid_index 3 and dev2 will work with gid_index 7" 125 | 126 | echo " -r, --remote Sets the remote host to connect. This will set it as the client side." 127 | echo " i.e. --remote , or --remote ." 128 | } 129 | 130 | #parser 131 | while [[ $# -ge 1 ]] 132 | do 133 | key="$1" 134 | 135 | case $key in 136 | -h|--help) 137 | GET_HELP=1 138 | shift 139 | ;; 140 | -c|--cores) 141 | CORES_LIST=($(echo "$2" | tr "," " ")) 142 | HAVE_CORES=1 143 | shift # past argument 144 | ;; 145 | -d|--devices) 146 | DEVICE_LIST=($(echo "$2" | tr "," " ")) 147 | HAVE_DEVICES=1 148 | EXPECTED_PARAMS=${#DEVICE_LIST[@]} 149 | shift # past argument 150 | ;; 151 | -i|--ib_ports) 152 | IB_PORTS_LIST=($(echo "$2" | tr "," " ")) 153 | HAVE_IB_PORTS=1 154 | shift # past argument 155 | ;; 156 | -x|--gid_indexes) 157 | GID_INDEXES_LIST=($(echo "$2" | tr "," " ")) 158 | HAVE_GID_INDEXES=1 159 | shift 160 | ;; 161 | -C|--cmd) 162 | TEST_CMD="$2" 163 | HAVE_CMD=1 164 | shift 165 | ;; 166 | -r|--remote) 167 | REMOTE_HOST="$2" 168 | HAVE_REMOTE_HOST=1 169 | shift 170 | ;; 171 | *) 172 | # unknown option - ignore 173 | ;; 174 | esac 175 | shift 176 | done 177 | 178 | force_dependencies 179 | 180 | run_commands 181 | 182 | exit 183 | -------------------------------------------------------------------------------- /runme: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # trivial script to launch a server/client test with ssh 3 | # must be launched from client 4 | # example: runme 10.0.0.1 /home/perftest/rdma_lat -s 10 5 | 6 | if [ $# -lt 1 ] ; then 7 | echo "Usage: runme " 8 | exit 3 9 | fi 10 | 11 | server=$1 12 | shift 13 | ssh $server $* & 14 | #give server time to start 15 | sleep 2 16 | $* $server 17 | status=$? 18 | wait 19 | exit $status 20 | -------------------------------------------------------------------------------- /src/atomic_bw.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 5 | * 6 | * This software is available to you under a choice of one of two 7 | * licenses. You may choose to be licensed under the terms of the GNU 8 | * General Public License (GPL) Version 2, available from the file 9 | * COPYING in the main directory of this source tree, or the 10 | * OpenIB.org BSD license below: 11 | * 12 | * Redistribution and use in source and binary forms, with or 13 | * without modification, are permitted provided that the following 14 | * conditions are met: 15 | * 16 | * - Redistributions of source code must retain the above 17 | * copyright notice, this list of conditions and the following 18 | * disclaimer. 19 | * 20 | * - Redistributions in binary form must reproduce the above 21 | * copyright notice, this list of conditions and the following 22 | * disclaimer in the documentation and/or other materials 23 | * provided with the distribution. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | * SOFTWARE. 33 | * 34 | * $Id$ 35 | */ 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | #include "perftest_parameters.h" 42 | #include "perftest_resources.h" 43 | #include "perftest_communication.h" 44 | 45 | /****************************************************************************** 46 | * 47 | ******************************************************************************/ 48 | int main(int argc, char *argv[]) 49 | { 50 | int ret_parser, i, rc; 51 | struct ibv_device *ib_dev = NULL; 52 | struct pingpong_context ctx; 53 | struct pingpong_dest *my_dest = NULL; 54 | struct pingpong_dest *rem_dest = NULL; 55 | struct perftest_parameters user_param; 56 | struct perftest_comm user_comm; 57 | struct bw_report_data my_bw_rep, rem_bw_rep; 58 | int rdma_cm_flow_destroyed = 0; 59 | 60 | /* init default values to user's parameters */ 61 | memset(&ctx, 0, sizeof(struct pingpong_context)); 62 | memset(&user_param, 0, sizeof(struct perftest_parameters)); 63 | memset(&user_comm, 0, sizeof(struct perftest_comm)); 64 | 65 | user_param.verb = ATOMIC; 66 | user_param.tst = BW; 67 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 68 | 69 | ret_parser = parser(&user_param, argv, argc); 70 | if (ret_parser) { 71 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) 72 | fprintf(stderr, " Parser function exited with Error\n"); 73 | goto return_error; 74 | } 75 | 76 | if ((user_param.connection_type == DC || user_param.use_xrc) && user_param.duplex) { 77 | user_param.num_of_qps *= 2; 78 | } 79 | 80 | ib_dev = ctx_find_dev(&user_param.ib_devname); 81 | if (!ib_dev) 82 | return 7; 83 | 84 | /* Getting the relevant context from the device */ 85 | ctx.context = ibv_open_device(ib_dev); 86 | if (!ctx.context) { 87 | fprintf(stderr, " Couldn't get context for the device\n"); 88 | goto free_devname; 89 | } 90 | 91 | /* Verify user parameters that require the device context, 92 | * the function will print the relevent error info. */ 93 | if (verify_params_with_device_context(ctx.context, &user_param)) 94 | { 95 | fprintf(stderr, " Couldn't get context for the device\n"); 96 | goto free_devname; 97 | } 98 | 99 | /* See if link type is valid and supported. */ 100 | if (check_link(ctx.context, &user_param)) { 101 | fprintf(stderr, " Couldn't get context for the device\n"); 102 | goto free_devname; 103 | } 104 | 105 | /* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */ 106 | if (create_comm_struct(&user_comm, &user_param)) { 107 | fprintf(stderr, " Unable to create RDMA_CM resources\n"); 108 | goto free_devname; 109 | } 110 | 111 | if (user_param.output == FULL_VERBOSITY && user_param.machine == SERVER) { 112 | printf("\n************************************\n"); 113 | printf("* Waiting for client to connect... *\n"); 114 | printf("************************************\n"); 115 | } 116 | 117 | /* Initialize the connection and print the local data. */ 118 | if (establish_connection(&user_comm)) { 119 | fprintf(stderr, " Unable to init the socket connection\n"); 120 | dealloc_comm_struct(&user_comm, &user_param); 121 | goto free_devname; 122 | } 123 | 124 | exchange_versions(&user_comm, &user_param); 125 | check_version_compatibility(&user_param); 126 | check_sys_data(&user_comm, &user_param); 127 | 128 | /* See if MTU is valid and supported. */ 129 | if (check_mtu(ctx.context, &user_param, &user_comm)) { 130 | fprintf(stderr, " Couldn't get context for the device\n"); 131 | dealloc_comm_struct(&user_comm, &user_param); 132 | goto free_devname; 133 | } 134 | 135 | MAIN_ALLOC(my_dest, struct pingpong_dest, user_param.num_of_qps, free_rdma_params); 136 | memset(my_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 137 | MAIN_ALLOC(rem_dest, struct pingpong_dest, user_param.num_of_qps, free_rem_dest); 138 | memset(rem_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 139 | 140 | /* Allocating arrays needed for the test. */ 141 | if(alloc_ctx(&ctx,&user_param)){ 142 | fprintf(stderr, "Couldn't allocate context\n"); 143 | goto free_mem; 144 | } 145 | 146 | /* Create RDMA CM resources and connect through CM. */ 147 | if (user_param.work_rdma_cm == ON) { 148 | rc = create_rdma_cm_connection(&ctx, &user_param, &user_comm, 149 | my_dest, rem_dest); 150 | if (rc) { 151 | fprintf(stderr, 152 | "Failed to create RDMA CM connection with resources.\n"); 153 | dealloc_ctx(&ctx, &user_param); 154 | goto free_mem; 155 | } 156 | } else { 157 | /* create all the basic IB resources. */ 158 | if (ctx_init(&ctx, &user_param)) { 159 | fprintf(stderr, " Couldn't create IB resources\n"); 160 | dealloc_ctx(&ctx, &user_param); 161 | goto free_mem; 162 | } 163 | } 164 | 165 | /* Set up the Connection. */ 166 | if (set_up_connection(&ctx, &user_param, my_dest)) { 167 | fprintf(stderr, " Unable to set up socket connection\n"); 168 | goto destroy_context; 169 | } 170 | 171 | /* Print basic test information. */ 172 | ctx_print_test_info(&user_param); 173 | 174 | for (i = 0; i < user_param.num_of_qps; i++) { 175 | 176 | /* shaking hands and gather the other side info. */ 177 | if (ctx_hand_shake(&user_comm, &my_dest[i], &rem_dest[i])) { 178 | fprintf(stderr, "Failed to exchange data between server and clients\n"); 179 | goto destroy_context; 180 | } 181 | } 182 | 183 | if (user_param.work_rdma_cm == OFF) { 184 | if (ctx_check_gid_compatibility(&my_dest[0], &rem_dest[0])) { 185 | fprintf(stderr, "\n Found Incompatibility issue with GID types.\n"); 186 | fprintf(stderr, " Please Try to use a different IP version.\n\n"); 187 | goto destroy_context; 188 | } 189 | } 190 | 191 | if (user_param.work_rdma_cm == OFF) { 192 | if (ctx_connect(&ctx, rem_dest, &user_param, my_dest)) { 193 | fprintf(stderr, " Unable to Connect the HCA's through the link\n"); 194 | goto destroy_context; 195 | } 196 | } 197 | 198 | if (user_param.connection_type == DC) 199 | { 200 | /* Set up connection one more time to send qpn properly for DC */ 201 | if (set_up_connection(&ctx,&user_param,my_dest)) { 202 | fprintf(stderr," Unable to set up socket connection\n"); 203 | goto destroy_context; 204 | } 205 | } 206 | 207 | /* Print this machine QP information */ 208 | for (i=0; i < user_param.num_of_qps; i++) { 209 | ctx_print_pingpong_data(&my_dest[i],&user_comm); 210 | } 211 | 212 | user_comm.rdma_params->side = REMOTE; 213 | 214 | for (i=0; i < user_param.num_of_qps; i++) { 215 | 216 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 217 | fprintf(stderr," Failed to exchange data between server and clients\n"); 218 | goto destroy_context; 219 | } 220 | 221 | ctx_print_pingpong_data(&rem_dest[i],&user_comm); 222 | } 223 | 224 | /* An additional handshake is required after moving qp to RTR. */ 225 | if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { 226 | fprintf(stderr, "Failed to exchange data between server and clients\n"); 227 | goto destroy_context; 228 | } 229 | 230 | /* For half duplex tests, server just waits for client to exit */ 231 | if (user_param.machine == SERVER && !user_param.duplex) { 232 | if (user_param.output == FULL_VERBOSITY) { 233 | printf(RESULT_LINE); 234 | printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); 235 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 236 | } 237 | 238 | if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { 239 | fprintf(stderr, " Failed to exchange data between server and clients\n"); 240 | goto destroy_context; 241 | } 242 | 243 | xchg_bw_reports(&user_comm, &my_bw_rep, &rem_bw_rep, atof(user_param.rem_version)); 244 | print_full_bw_report(&user_param, &rem_bw_rep, NULL); 245 | 246 | if (user_param.output == FULL_VERBOSITY) { 247 | printf(RESULT_LINE); 248 | } 249 | 250 | if (ctx_close_connection(&user_comm, &my_dest[0], &rem_dest[0])) { 251 | fprintf(stderr, "Failed to close connection between server and client\n"); 252 | goto destroy_context; 253 | } 254 | 255 | 256 | if (user_param.work_rdma_cm == ON) { 257 | if (destroy_ctx(&ctx, &user_param)) { 258 | fprintf(stderr, "Failed to destroy resources.\n"); 259 | goto destroy_cm_context; 260 | } 261 | free(my_dest); 262 | free(rem_dest); 263 | free(user_param.ib_devname); 264 | user_comm.rdma_params->work_rdma_cm = OFF; 265 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 266 | free(user_comm.rdma_params); 267 | free(user_comm.rdma_ctx); 268 | return FAILURE; 269 | } 270 | free(user_comm.rdma_params); 271 | return SUCCESS; 272 | } 273 | free(my_dest); 274 | free(rem_dest); 275 | free(user_param.ib_devname); 276 | if(destroy_ctx(&ctx, &user_param)) { 277 | free(user_comm.rdma_params); 278 | return FAILURE; 279 | } 280 | free(user_comm.rdma_params); 281 | return SUCCESS; 282 | } 283 | 284 | if (user_param.use_event) { 285 | if (ibv_req_notify_cq(ctx.send_cq, 0)) { 286 | fprintf(stderr, "Couldn't request CQ notification\n"); 287 | goto free_mem; 288 | } 289 | } 290 | if (user_param.output == FULL_VERBOSITY) { 291 | printf(RESULT_LINE); 292 | printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); 293 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 294 | } 295 | 296 | ctx_set_send_wqes(&ctx, &user_param, rem_dest); 297 | 298 | if (user_param.test_method == RUN_REGULAR || user_param.test_method == RUN_ALL) { 299 | 300 | if (user_param.perform_warm_up) { 301 | if (perform_warm_up(&ctx, &user_param)) { 302 | fprintf(stderr, "Problems with warm up\n"); 303 | goto free_mem; 304 | } 305 | } 306 | 307 | if (user_param.duplex) { 308 | if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { 309 | fprintf(stderr, "Failed to sync between server and client between different msg sizes\n"); 310 | goto free_mem; 311 | } 312 | } 313 | 314 | if (run_iter_bw(&ctx, &user_param)) { 315 | fprintf(stderr, " Error occurred in run_iter function\n"); 316 | goto free_mem; 317 | } 318 | 319 | print_report_bw(&user_param, &my_bw_rep); 320 | 321 | if (user_param.duplex) { 322 | xchg_bw_reports(&user_comm, &my_bw_rep, &rem_bw_rep, atof(user_param.rem_version)); 323 | print_full_bw_report(&user_param, &my_bw_rep, &rem_bw_rep); 324 | } 325 | 326 | if (user_param.report_both && user_param.duplex) { 327 | printf(RESULT_LINE); 328 | printf("\n Local results:\n"); 329 | printf(RESULT_LINE); 330 | printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); 331 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 332 | print_full_bw_report(&user_param, &my_bw_rep, NULL); 333 | printf(RESULT_LINE); 334 | 335 | printf("\n Remote results:\n"); 336 | printf(RESULT_LINE); 337 | printf((user_param.report_fmt == MBS ? RESULT_FMT : RESULT_FMT_G)); 338 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 339 | print_full_bw_report(&user_param, &rem_bw_rep, NULL); 340 | } 341 | } else if (user_param.test_method == RUN_INFINITELY) { 342 | 343 | if (run_iter_bw_infinitely(&ctx, &user_param)) { 344 | fprintf(stderr, " Error occurred while running infinitely! aborting ...\n"); 345 | goto free_mem; 346 | } 347 | } 348 | 349 | if (user_param.output == FULL_VERBOSITY) { 350 | printf(RESULT_LINE); 351 | } 352 | /* For half duplex tests, server just waits for client to exit */ 353 | if (user_param.machine == CLIENT && !user_param.duplex) { 354 | 355 | if (ctx_hand_shake(&user_comm, &my_dest[0], &rem_dest[0])) { 356 | fprintf(stderr, " Failed to exchange data between server and clients\n"); 357 | goto free_mem; 358 | } 359 | 360 | xchg_bw_reports(&user_comm, &my_bw_rep, &rem_bw_rep, atof(user_param.rem_version)); 361 | } 362 | 363 | if (ctx_close_connection(&user_comm, &my_dest[0], &rem_dest[0])) { 364 | fprintf(stderr, "Failed to close connection between server and client\n"); 365 | goto free_mem; 366 | } 367 | 368 | if (!user_param.is_bw_limit_passed && (user_param.is_limit_bw == ON)) { 369 | fprintf(stderr, "Error: BW result is below bw limit\n"); 370 | goto destroy_context; 371 | } 372 | 373 | if (!user_param.is_msgrate_limit_passed && (user_param.is_limit_bw == ON)) { 374 | fprintf(stderr, "Error: Msg rate is below msg_rate limit\n"); 375 | goto destroy_context; 376 | } 377 | 378 | if (user_param.work_rdma_cm == ON) { 379 | if (destroy_ctx(&ctx, &user_param)) { 380 | fprintf(stderr, "Failed to destroy resources.\n"); 381 | goto destroy_cm_context; 382 | } 383 | 384 | user_comm.rdma_params->work_rdma_cm = OFF; 385 | free(my_dest); 386 | free(rem_dest); 387 | free(user_param.ib_devname); 388 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 389 | free(user_comm.rdma_params); 390 | free(user_comm.rdma_ctx); 391 | return FAILURE; 392 | } 393 | free(user_comm.rdma_ctx); 394 | free(user_comm.rdma_params); 395 | return SUCCESS; 396 | } 397 | 398 | free(my_dest); 399 | free(rem_dest); 400 | free(user_param.ib_devname); 401 | 402 | if(destroy_ctx(&ctx, &user_param)){ 403 | free(user_comm.rdma_params); 404 | return FAILURE; 405 | } 406 | free(user_comm.rdma_params); 407 | return SUCCESS; 408 | 409 | destroy_context: 410 | if (destroy_ctx(&ctx,&user_param)) 411 | fprintf(stderr, "Failed to destroy resources\n"); 412 | destroy_cm_context: 413 | if (user_param.work_rdma_cm == ON) { 414 | rdma_cm_flow_destroyed = 1; 415 | user_comm.rdma_params->work_rdma_cm = OFF; 416 | destroy_ctx(user_comm.rdma_ctx,user_comm.rdma_params); 417 | } 418 | free_mem: 419 | free(rem_dest); 420 | free_rem_dest: 421 | free(my_dest); 422 | free_rdma_params: 423 | if (user_param.use_rdma_cm == ON && rdma_cm_flow_destroyed == 0) 424 | dealloc_comm_struct(&user_comm, &user_param); 425 | else { 426 | if (user_param.use_rdma_cm == ON) 427 | free(user_comm.rdma_ctx); 428 | free(user_comm.rdma_params); 429 | } 430 | free_devname: 431 | free(user_param.ib_devname); 432 | return_error: 433 | //coverity[leaked_storage] 434 | return FAILURE; 435 | 436 | } 437 | -------------------------------------------------------------------------------- /src/atomic_lat.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler) 5 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 6 | * 7 | * This software is available to you under a choice of one of two 8 | * licenses. You may choose to be licensed under the terms of the GNU 9 | * General Public License (GPL) Version 2, available from the file 10 | * COPYING in the main directory of this source tree, or the 11 | * OpenIB.org BSD license below: 12 | * 13 | * Redistribution and use in source and binary forms, with or 14 | * without modification, are permitted provided that the following 15 | * conditions are met: 16 | * 17 | * - Redistributions of source code must retain the above 18 | * copyright notice, this list of conditions and the following 19 | * disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above 22 | * copyright notice, this list of conditions and the following 23 | * disclaimer in the documentation and/or other materials 24 | * provided with the distribution. 25 | * 26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | * SOFTWARE. 34 | * 35 | * $Id$ 36 | */ 37 | 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | #if !defined(__FreeBSD__) 44 | #include 45 | #endif 46 | 47 | #include "get_clock.h" 48 | #include "perftest_parameters.h" 49 | #include "perftest_resources.h" 50 | #include "perftest_communication.h" 51 | 52 | /****************************************************************************** 53 | * 54 | ******************************************************************************/ 55 | int main(int argc, char *argv[]) 56 | { 57 | int ret_parser, i, rc, error = 1; 58 | struct report_options report; 59 | struct pingpong_context ctx; 60 | struct pingpong_dest *my_dest = NULL; 61 | struct pingpong_dest *rem_dest = NULL; 62 | struct ibv_device *ib_dev; 63 | struct perftest_parameters user_param; 64 | struct perftest_comm user_comm; 65 | int rdma_cm_flow_destroyed = 0; 66 | 67 | /* init default values to user's parameters */ 68 | memset(&ctx,0,sizeof(struct pingpong_context)); 69 | memset(&user_param, 0, sizeof(struct perftest_parameters)); 70 | memset(&user_comm,0,sizeof(struct perftest_comm)); 71 | 72 | user_param.verb = ATOMIC; 73 | user_param.tst = LAT; 74 | user_param.r_flag = &report; 75 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 76 | 77 | ret_parser = parser(&user_param,argv,argc); 78 | if (ret_parser) { 79 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) 80 | fprintf(stderr," Parser function exited with Error\n"); 81 | goto return_error; 82 | } 83 | 84 | if (user_param.use_xrc || user_param.connection_type == DC) { 85 | user_param.num_of_qps *= 2; 86 | } 87 | 88 | /* Finding the IB device selected (or defalut if no selected). */ 89 | ib_dev = ctx_find_dev(&user_param.ib_devname); 90 | if (!ib_dev) { 91 | fprintf(stderr," Unable to find the Infiniband/RoCE device\n"); 92 | return FAILURE; 93 | } 94 | 95 | /* Getting the relevant context from the device */ 96 | ctx.context = ibv_open_device(ib_dev); 97 | if (!ctx.context) { 98 | fprintf(stderr, " Couldn't get context for the device\n"); 99 | goto free_devname; 100 | } 101 | 102 | /* Verify user parameters that require the device context, 103 | * the function will print the relevent error info. */ 104 | if (verify_params_with_device_context(ctx.context, &user_param)) { 105 | goto free_devname; 106 | } 107 | 108 | /* See if link type is valid and supported. */ 109 | if (check_link(ctx.context,&user_param)) { 110 | fprintf(stderr, " Couldn't get context for the device\n"); 111 | goto free_devname; 112 | } 113 | 114 | /* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */ 115 | if (create_comm_struct(&user_comm,&user_param)) { 116 | fprintf(stderr," Unable to create RDMA_CM resources\n"); 117 | goto free_devname; 118 | } 119 | 120 | if (user_param.output == FULL_VERBOSITY && user_param.machine == SERVER) { 121 | printf("\n************************************\n"); 122 | printf("* Waiting for client to connect... *\n"); 123 | printf("************************************\n"); 124 | } 125 | 126 | /* Initialize the connection and print the local data. */ 127 | if (establish_connection(&user_comm)) { 128 | fprintf(stderr," Unable to init the socket connection\n"); 129 | dealloc_comm_struct(&user_comm, &user_param); 130 | goto free_devname; 131 | } 132 | 133 | exchange_versions(&user_comm, &user_param); 134 | check_version_compatibility(&user_param); 135 | check_sys_data(&user_comm, &user_param); 136 | 137 | /* See if MTU is valid and supported. */ 138 | if (check_mtu(ctx.context,&user_param, &user_comm)) { 139 | fprintf(stderr, " Couldn't get context for the device\n"); 140 | dealloc_comm_struct(&user_comm, &user_param); 141 | goto free_devname; 142 | } 143 | 144 | MAIN_ALLOC(my_dest , struct pingpong_dest , user_param.num_of_qps , free_rdma_params); 145 | memset(my_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 146 | MAIN_ALLOC(rem_dest , struct pingpong_dest , user_param.num_of_qps , free_rem_dest); 147 | memset(rem_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 148 | 149 | /* Allocating arrays needed for the test. */ 150 | if (alloc_ctx(&ctx,&user_param)){ 151 | fprintf(stderr, "Couldn't allocate context\n"); 152 | goto free_mem; 153 | } 154 | 155 | /* Create RDMA CM resources and connect through CM. */ 156 | if (user_param.work_rdma_cm == ON) { 157 | rc = create_rdma_cm_connection(&ctx, &user_param, &user_comm, 158 | my_dest, rem_dest); 159 | if (rc) { 160 | fprintf(stderr, 161 | "Failed to create RDMA CM connection with resources.\n"); 162 | dealloc_ctx(&ctx, &user_param); 163 | goto free_mem; 164 | } 165 | } else { 166 | /* create all the basic IB resources (data buffer, PD, MR, CQ and events channel) */ 167 | if (ctx_init(&ctx,&user_param)) { 168 | fprintf(stderr, " Couldn't create IB resources\n"); 169 | dealloc_ctx(&ctx, &user_param); 170 | goto free_mem; 171 | } 172 | } 173 | 174 | /* Set up the Connection. */ 175 | if (set_up_connection(&ctx,&user_param,my_dest)) { 176 | fprintf(stderr," Unable to set up socket connection\n"); 177 | goto destroy_context; 178 | } 179 | 180 | /* Print basic test information. */ 181 | ctx_print_test_info(&user_param); 182 | 183 | /* shaking hands and gather the other side info. */ 184 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 185 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 186 | goto destroy_context; 187 | } 188 | 189 | for (i=0; i < user_param.num_of_qps; i++) { 190 | 191 | /* shaking hands and gather the other side info. */ 192 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 193 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 194 | goto destroy_context; 195 | } 196 | } 197 | 198 | if (user_param.work_rdma_cm == OFF) { 199 | if (ctx_check_gid_compatibility(&my_dest[0], &rem_dest[0])) { 200 | fprintf(stderr,"\n Found Incompatibility issue with GID types.\n"); 201 | fprintf(stderr," Please Try to use a different IP version.\n\n"); 202 | goto destroy_context; 203 | } 204 | } 205 | 206 | if (user_param.work_rdma_cm == OFF) { 207 | if (ctx_connect(&ctx,rem_dest,&user_param,my_dest)) { 208 | fprintf(stderr," Unable to Connect the HCA's through the link\n"); 209 | goto destroy_context; 210 | } 211 | } 212 | 213 | if (user_param.connection_type == DC) 214 | { 215 | /* Set up connection one more time to send qpn properly for DC */ 216 | if (set_up_connection(&ctx,&user_param,my_dest)) { 217 | fprintf(stderr," Unable to set up socket connection\n"); 218 | goto destroy_context; 219 | } 220 | } 221 | 222 | /* Print this machine QP information */ 223 | for (i=0; i < user_param.num_of_qps; i++) { 224 | ctx_print_pingpong_data(&my_dest[i],&user_comm); 225 | } 226 | 227 | 228 | user_comm.rdma_params->side = REMOTE; 229 | 230 | for (i=0; i < user_param.num_of_qps; i++) { 231 | 232 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 233 | fprintf(stderr," Failed to exchange data between server and clients\n"); 234 | goto destroy_context; 235 | } 236 | 237 | ctx_print_pingpong_data(&rem_dest[i],&user_comm); 238 | } 239 | 240 | /* An additional handshake is required after moving qp to RTR. */ 241 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 242 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 243 | goto destroy_context; 244 | } 245 | 246 | /* Only Client post read request. */ 247 | if (user_param.machine == SERVER) { 248 | if (ctx_close_connection(&user_comm,my_dest,rem_dest)) { 249 | fprintf(stderr,"Failed to close connection between server and client\n"); 250 | goto destroy_context; 251 | } 252 | if (user_param.output == FULL_VERBOSITY) { 253 | printf(RESULT_LINE); 254 | } 255 | if (user_param.work_rdma_cm == ON) { 256 | if (destroy_ctx(&ctx, &user_param)) { 257 | fprintf(stderr, "Failed to destroy resources.\n"); 258 | goto destroy_cm_context; 259 | } 260 | free(my_dest); 261 | free(rem_dest); 262 | free(user_param.ib_devname); 263 | user_comm.rdma_params->work_rdma_cm = OFF; 264 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 265 | free(user_comm.rdma_params); 266 | free(user_comm.rdma_ctx); 267 | return FAILURE; 268 | } 269 | free(user_comm.rdma_params); 270 | return SUCCESS; 271 | } 272 | free(my_dest); 273 | free(rem_dest); 274 | free(user_param.ib_devname); 275 | if(destroy_ctx(&ctx, &user_param)) { 276 | free(user_comm.rdma_params); 277 | return FAILURE; 278 | } 279 | free(user_comm.rdma_params); 280 | return SUCCESS; 281 | } 282 | 283 | if (user_param.use_event) { 284 | if (ibv_req_notify_cq(ctx.send_cq, 0)) { 285 | fprintf(stderr, "Couldn't request CQ notification\n"); 286 | goto free_mem; 287 | } 288 | 289 | } 290 | 291 | ctx_set_send_wqes(&ctx,&user_param,rem_dest); 292 | 293 | if (user_param.output == FULL_VERBOSITY) { 294 | printf(RESULT_LINE); 295 | printf("%s",(user_param.test_type == ITERATIONS) ? RESULT_FMT_LAT : RESULT_FMT_LAT_DUR); 296 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 297 | } 298 | 299 | if(run_iter_lat(&ctx,&user_param)){ 300 | error = 17; 301 | goto free_mem; 302 | } 303 | 304 | user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : print_report_lat_duration(&user_param); 305 | 306 | if (ctx_close_connection(&user_comm,my_dest,rem_dest)) { 307 | fprintf(stderr,"Failed to close connection between server and client\n"); 308 | goto free_mem; 309 | } 310 | 311 | if (user_param.output == FULL_VERBOSITY) { 312 | printf(RESULT_LINE); 313 | } 314 | 315 | if (user_param.work_rdma_cm == ON) { 316 | if (destroy_ctx(&ctx, &user_param)) { 317 | fprintf(stderr, "Failed to destroy resources.\n"); 318 | goto destroy_cm_context; 319 | } 320 | 321 | user_comm.rdma_params->work_rdma_cm = OFF; 322 | free(my_dest); 323 | free(rem_dest); 324 | free(user_param.ib_devname); 325 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 326 | free(user_comm.rdma_params); 327 | free(user_comm.rdma_ctx); 328 | return FAILURE; 329 | } 330 | free(user_comm.rdma_ctx); 331 | free(user_comm.rdma_params); 332 | return SUCCESS; 333 | } 334 | 335 | free(my_dest); 336 | free(rem_dest); 337 | free(user_param.ib_devname); 338 | 339 | if(destroy_ctx(&ctx, &user_param)){ 340 | free(user_comm.rdma_params); 341 | return FAILURE; 342 | } 343 | free(user_comm.rdma_params); 344 | return SUCCESS; 345 | 346 | destroy_context: 347 | if (destroy_ctx(&ctx,&user_param)) 348 | fprintf(stderr, "Failed to destroy resources\n"); 349 | destroy_cm_context: 350 | if (user_param.work_rdma_cm == ON) { 351 | rdma_cm_flow_destroyed = 1; 352 | user_comm.rdma_params->work_rdma_cm = OFF; 353 | destroy_ctx(user_comm.rdma_ctx,user_comm.rdma_params); 354 | } 355 | free_mem: 356 | free(rem_dest); 357 | free_rem_dest: 358 | free(my_dest); 359 | free_rdma_params: 360 | if (user_param.use_rdma_cm == ON && rdma_cm_flow_destroyed == 0) 361 | dealloc_comm_struct(&user_comm, &user_param); 362 | else { 363 | if (user_param.use_rdma_cm == ON) 364 | free(user_comm.rdma_ctx); 365 | free(user_comm.rdma_params); 366 | } 367 | free_devname: 368 | free(user_param.ib_devname); 369 | return_error: 370 | //coverity[leaked_storage] 371 | return error; 372 | 373 | } 374 | 375 | -------------------------------------------------------------------------------- /src/clock_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "get_clock.h" 4 | 5 | int main() 6 | { 7 | int no_cpu_freq_fail = 0; 8 | double mhz; 9 | mhz = get_cpu_mhz(no_cpu_freq_fail); 10 | 11 | if (!mhz) { 12 | printf("Unable to calibrate cycles. Exiting.\n"); 13 | return 2; 14 | } 15 | 16 | printf("Type CTRL-C to cancel.\n"); 17 | for (;;) { 18 | cycles_t c1,c2; 19 | c1 = get_cycles(); 20 | sleep(1); 21 | c2 = get_cycles(); 22 | printf("1 sec = %g usec\n", (c2 - c1) / mhz); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/cuda_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "cuda_memory.h" 10 | #include "perftest_parameters.h" 11 | #include CUDA_PATH 12 | 13 | #define CUCHECK(stmt) \ 14 | do { \ 15 | CUresult result = (stmt); \ 16 | ASSERT(CUDA_SUCCESS == result); \ 17 | } while (0) 18 | 19 | #define ACCEL_PAGE_SIZE (64 * 1024) 20 | 21 | static const char *cuda_mem_type_str[] = { 22 | "CUDA_MEM_DEVICE", 23 | "CUDA_MEM_MANAGED", 24 | "CUDA_MEM_HOSTALLOC", 25 | "CUDA_MEM_HOSTREGISTER", 26 | "CUDA_MEM_MALLOC", 27 | "CUDA_MEM_TYPES" 28 | }; 29 | 30 | #ifdef HAVE_CUDART 31 | int touch_gpu_pages(uint8_t *addr, int buf_size, int is_infinitely, volatile int **stop_flag); 32 | int init_gpu_stop_flag(volatile int **stop_flag); 33 | #endif 34 | 35 | struct cuda_memory_ctx { 36 | struct memory_ctx base; 37 | int mem_type; 38 | int gpu_touch; 39 | int device_id; 40 | char *device_bus_id; 41 | volatile int *stop_touch_gpu_kernel_flag; // used for stopping cuda gpu_touch kernel 42 | CUdevice cuDevice; 43 | CUcontext cuContext; 44 | bool use_dmabuf; 45 | bool use_pcie_mapping; 46 | int driver_version; 47 | }; 48 | 49 | 50 | static int init_gpu(struct cuda_memory_ctx *ctx) 51 | { 52 | int cuda_device_id = ctx->device_id; 53 | int cuda_pci_bus_id; 54 | int cuda_pci_device_id; 55 | int index; 56 | CUdevice cu_device; 57 | 58 | printf("initializing CUDA\n"); 59 | CUresult error = cuInit(0); 60 | if (error != CUDA_SUCCESS) { 61 | printf("cuInit(0) returned %d\n", error); 62 | return FAILURE; 63 | } 64 | 65 | int deviceCount = 0; 66 | error = cuDeviceGetCount(&deviceCount); 67 | if (error != CUDA_SUCCESS) { 68 | printf("cuDeviceGetCount() returned %d\n", error); 69 | return FAILURE; 70 | } 71 | /* This function call returns 0 if there are no CUDA capable devices. */ 72 | if (deviceCount == 0) { 73 | printf("There are no available device(s) that support CUDA\n"); 74 | return FAILURE; 75 | } 76 | if (cuda_device_id >= deviceCount) { 77 | fprintf(stderr, "No such device ID (%d) exists in system\n", cuda_device_id); 78 | return FAILURE; 79 | } 80 | 81 | printf("Listing all CUDA devices in system:\n"); 82 | for (index = 0; index < deviceCount; index++) { 83 | CUCHECK(cuDeviceGet(&cu_device, index)); 84 | cuDeviceGetAttribute(&cuda_pci_bus_id, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID , cu_device); 85 | cuDeviceGetAttribute(&cuda_pci_device_id, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID , cu_device); 86 | printf("CUDA device %d: PCIe address is %02X:%02X\n", index, (unsigned int)cuda_pci_bus_id, (unsigned int)cuda_pci_device_id); 87 | } 88 | 89 | printf("\nPicking device No. %d\n", cuda_device_id); 90 | 91 | CUCHECK(cuDeviceGet(&ctx->cuDevice, cuda_device_id)); 92 | 93 | char name[128]; 94 | CUCHECK(cuDeviceGetName(name, sizeof(name), cuda_device_id)); 95 | printf("[pid = %d, dev = %d] device name = [%s]\n", getpid(), ctx->cuDevice, name); 96 | printf("creating CUDA Ctx\n"); 97 | 98 | /* Create context */ 99 | error = cuCtxCreate(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice); 100 | if (error != CUDA_SUCCESS) { 101 | printf("cuCtxCreate() error=%d\n", error); 102 | return FAILURE; 103 | } 104 | 105 | printf("making it the current CUDA Ctx\n"); 106 | error = cuCtxSetCurrent(ctx->cuContext); 107 | if (error != CUDA_SUCCESS) { 108 | printf("cuCtxSetCurrent() error=%d\n", error); 109 | return FAILURE; 110 | } 111 | 112 | #ifdef HAVE_CUDART 113 | if (ctx->gpu_touch != GPU_NO_TOUCH){ 114 | error = init_gpu_stop_flag(&ctx->stop_touch_gpu_kernel_flag); 115 | if (error != 0) { 116 | printf("init_gpu_stop_flag() error=%d\n", error); 117 | return FAILURE; 118 | } 119 | } 120 | #endif 121 | 122 | CUCHECK(cuDriverGetVersion(&ctx->driver_version)); 123 | 124 | return SUCCESS; 125 | } 126 | 127 | static void free_gpu(struct cuda_memory_ctx *ctx) 128 | { 129 | printf("destroying current CUDA Ctx\n"); 130 | CUCHECK(cuCtxDestroy(ctx->cuContext)); 131 | } 132 | 133 | int cuda_memory_init(struct memory_ctx *ctx) { 134 | struct cuda_memory_ctx *cuda_ctx = container_of(ctx, struct cuda_memory_ctx, base); 135 | int return_value = 0; 136 | 137 | if (cuda_ctx->device_bus_id) { 138 | int err; 139 | 140 | printf("initializing CUDA\n"); 141 | CUresult error = cuInit(0); 142 | if (error != CUDA_SUCCESS) { 143 | printf("cuInit(0) returned %d\n", error); 144 | return FAILURE; 145 | } 146 | 147 | printf("Finding PCIe BUS %s\n", cuda_ctx->device_bus_id); 148 | err = cuDeviceGetByPCIBusId(&cuda_ctx->device_id, cuda_ctx->device_bus_id); 149 | if (err != 0) { 150 | fprintf(stderr, "cuDeviceGetByPCIBusId failed with error: %d; Failed to get PCI Bus ID (%s)\n", err, cuda_ctx->device_bus_id); 151 | return FAILURE; 152 | } 153 | printf("Picking GPU number %d\n", cuda_ctx->device_id); 154 | } 155 | 156 | return_value = init_gpu(cuda_ctx); 157 | if (return_value) { 158 | fprintf(stderr, "Couldn't init GPU context: %d\n", return_value); 159 | return FAILURE; 160 | } 161 | 162 | #ifdef HAVE_CUDA_DMABUF 163 | if (cuda_ctx->use_dmabuf) { 164 | int is_supported = 0; 165 | 166 | CUCHECK(cuDeviceGetAttribute(&is_supported, CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, cuda_ctx->cuDevice)); 167 | if (!is_supported) { 168 | fprintf(stderr, "DMA-BUF is not supported on this GPU\n"); 169 | return FAILURE; 170 | } 171 | } 172 | #endif 173 | 174 | return SUCCESS; 175 | } 176 | 177 | int cuda_memory_destroy(struct memory_ctx *ctx) { 178 | struct cuda_memory_ctx *cuda_ctx = container_of(ctx, struct cuda_memory_ctx, base); 179 | 180 | free_gpu(cuda_ctx); 181 | free(cuda_ctx); 182 | return SUCCESS; 183 | } 184 | 185 | static int cuda_allocate_device_memory_buffer(struct cuda_memory_ctx *cuda_ctx, uint64_t size, int *dmabuf_fd, 186 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 187 | int error; 188 | size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1); 189 | 190 | // Check if discrete or integrated GPU (tegra), for allocating memory where adequate 191 | int cuda_device_integrated; 192 | cuDeviceGetAttribute(&cuda_device_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, cuda_ctx->cuDevice); 193 | printf("CUDA device integrated: %X\n", (unsigned int)cuda_device_integrated); 194 | 195 | if (cuda_device_integrated == 1) { 196 | error = cuMemAllocHost(addr, buf_size); 197 | if (error != CUDA_SUCCESS) { 198 | printf("cuMemAllocHost error=%d\n", error); 199 | return FAILURE; 200 | } 201 | 202 | printf("allocated GPU buffer address at %p\n", addr); 203 | *can_init = false; 204 | } else { 205 | CUdeviceptr d_A; 206 | error = cuMemAlloc(&d_A, buf_size); 207 | if (error != CUDA_SUCCESS) { 208 | printf("cuMemAlloc error=%d\n", error); 209 | return FAILURE; 210 | } 211 | 212 | *addr = (void *)d_A; 213 | *can_init = false; 214 | 215 | #ifdef HAVE_CUDA_DMABUF 216 | { 217 | if (cuda_ctx->use_dmabuf) { 218 | CUdeviceptr aligned_ptr; 219 | const size_t host_page_size = sysconf(_SC_PAGESIZE); 220 | uint64_t offset; 221 | size_t aligned_size; 222 | int cu_flags = 0; 223 | 224 | // Round down to host page size 225 | aligned_ptr = d_A & ~(host_page_size - 1); 226 | offset = d_A - aligned_ptr; 227 | aligned_size = (size + offset + host_page_size - 1) & ~(host_page_size - 1); 228 | 229 | printf("using DMA-BUF for GPU buffer address at %#llx aligned at %#llx with aligned size %zu\n", d_A, aligned_ptr, aligned_size); 230 | *dmabuf_fd = 0; 231 | CUmemRangeHandleType cuda_handle_type = CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD; 232 | 233 | if (cuda_ctx->use_pcie_mapping) { 234 | #ifdef HAVE_DMABUF_MAPPING_TYPE_PCIE 235 | cu_flags = CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE; 236 | if (cuda_ctx->driver_version < 12*1000+8*10) { 237 | printf("CUDA driver version %d.%d does not support CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE\n", 238 | (cuda_ctx->driver_version / 1000), (cuda_ctx->driver_version % 1000) / 10); 239 | return FAILURE; 240 | } 241 | #else 242 | // this may happen with binaries built with a CUDA toolkit older than 12.8 243 | printf("support for CU_MEM_RANGE_FLAG_DMA_BUF_MAPPING_TYPE_PCIE is missing\n"); 244 | return FAILURE; 245 | #endif 246 | } 247 | 248 | error = cuMemGetHandleForAddressRange((void *)dmabuf_fd, aligned_ptr, aligned_size, cuda_handle_type, cu_flags); 249 | if (error != CUDA_SUCCESS) { 250 | printf("cuMemGetHandleForAddressRange error=%d\n", error); 251 | return FAILURE; 252 | } 253 | 254 | *dmabuf_offset = offset; 255 | } 256 | } 257 | #endif 258 | } 259 | 260 | return CUDA_SUCCESS; 261 | } 262 | 263 | int cuda_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 264 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 265 | int error; 266 | CUdeviceptr d_ptr; 267 | 268 | struct cuda_memory_ctx *cuda_ctx = container_of(ctx, struct cuda_memory_ctx, base); 269 | 270 | switch (cuda_ctx->mem_type) { 271 | case CUDA_MEM_DEVICE: 272 | error = cuda_allocate_device_memory_buffer(cuda_ctx, size, dmabuf_fd, 273 | dmabuf_offset, addr, can_init); 274 | if (error != CUDA_SUCCESS) 275 | return FAILURE; 276 | break; 277 | case CUDA_MEM_MANAGED: 278 | error = cuMemAllocManaged(&d_ptr, size, CU_MEM_ATTACH_GLOBAL); 279 | if (error != CUDA_SUCCESS) { 280 | printf("cuMemAllocManaged error=%d\n", error); 281 | return FAILURE; 282 | } 283 | 284 | *addr = (void *)d_ptr; 285 | *can_init = false; 286 | break; 287 | 288 | case CUDA_MEM_MALLOC: 289 | *can_init = false; 290 | // Fall through 291 | 292 | printf("Host allocation selected, calling memalign allocator for %lu bytes with %d page size\n", size, alignment); 293 | *addr = memalign(alignment, size); 294 | if (!*addr) { 295 | printf("memalign error=%d\n", errno); 296 | return FAILURE; 297 | } 298 | 299 | break; 300 | /* 301 | * TODO: Add Implementation for HOSTALLOC and HOSTREGISTER 302 | * buffer allocations 303 | */ 304 | case CUDA_MEM_HOSTALLOC: 305 | case CUDA_MEM_HOSTREGISTER: 306 | default: 307 | printf("invalid CUDA memory type\n"); 308 | return FAILURE; 309 | } 310 | 311 | printf("allocated GPU buffer of a %lu address at %p for type %s\n", size, addr, cuda_mem_type_str[cuda_ctx->mem_type]); 312 | 313 | #ifdef HAVE_CUDART 314 | if (cuda_ctx->gpu_touch != GPU_NO_TOUCH) { 315 | printf("Starting GPU touching process\n"); 316 | return touch_gpu_pages((uint8_t *)*addr, size, cuda_ctx->gpu_touch == GPU_TOUCH_INFINITE, &cuda_ctx->stop_touch_gpu_kernel_flag); 317 | } 318 | #endif 319 | 320 | return SUCCESS; 321 | } 322 | 323 | int cuda_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) { 324 | struct cuda_memory_ctx *cuda_ctx = container_of(ctx, struct cuda_memory_ctx, base); 325 | int cuda_device_integrated; 326 | cuDeviceGetAttribute(&cuda_device_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, cuda_ctx->cuDevice); 327 | 328 | if (cuda_ctx->stop_touch_gpu_kernel_flag) { 329 | *cuda_ctx->stop_touch_gpu_kernel_flag = 1; 330 | printf("stopping CUDA gpu touch running kernel\n"); 331 | cuCtxSynchronize(); 332 | cuMemFree((CUdeviceptr)cuda_ctx->stop_touch_gpu_kernel_flag); 333 | cuda_ctx->stop_touch_gpu_kernel_flag = NULL; 334 | } 335 | 336 | switch (cuda_ctx->mem_type) { 337 | case CUDA_MEM_DEVICE: 338 | if (cuda_device_integrated == 1) { 339 | printf("deallocating GPU buffer %p\n", addr); 340 | cuMemFreeHost(addr); 341 | } else { 342 | CUdeviceptr d_A = (CUdeviceptr)addr; 343 | printf("deallocating GPU buffer %016llx\n", d_A); 344 | cuMemFree(d_A); 345 | } 346 | break; 347 | case CUDA_MEM_MANAGED: 348 | CUCHECK(cuMemFree((CUdeviceptr)addr)); 349 | break; 350 | case CUDA_MEM_MALLOC: 351 | free((void *) addr); 352 | break; 353 | } 354 | 355 | return SUCCESS; 356 | } 357 | 358 | void *cuda_memory_copy_host_buffer(void *dest, const void *src, size_t size) { 359 | cuMemcpy((CUdeviceptr)dest, (CUdeviceptr)src, size); 360 | return dest; 361 | } 362 | 363 | void *cuda_memory_copy_buffer_to_buffer(void *dest, const void *src, size_t size) { 364 | cuMemcpyDtoD((CUdeviceptr)dest, (CUdeviceptr)src, size); 365 | return dest; 366 | } 367 | 368 | bool cuda_memory_supported() { 369 | return true; 370 | } 371 | 372 | bool cuda_memory_dmabuf_supported() { 373 | #ifdef HAVE_CUDA_DMABUF 374 | return true; 375 | #else 376 | return false; 377 | #endif 378 | } 379 | 380 | 381 | bool data_direct_supported() { 382 | #ifdef HAVE_DATA_DIRECT 383 | return true; 384 | #else 385 | return false; 386 | #endif 387 | } 388 | 389 | 390 | bool cuda_gpu_touch_supported() { 391 | #ifdef HAVE_CUDART 392 | return true; 393 | #else 394 | return false; 395 | #endif 396 | } 397 | 398 | 399 | struct memory_ctx *cuda_memory_create(struct perftest_parameters *params) { 400 | struct cuda_memory_ctx *ctx; 401 | 402 | ALLOCATE(ctx, struct cuda_memory_ctx, 1); 403 | ctx->base.init = cuda_memory_init; 404 | ctx->base.destroy = cuda_memory_destroy; 405 | ctx->base.allocate_buffer = cuda_memory_allocate_buffer; 406 | ctx->base.free_buffer = cuda_memory_free_buffer; 407 | ctx->base.copy_host_to_buffer = cuda_memory_copy_host_buffer; 408 | ctx->base.copy_buffer_to_host = cuda_memory_copy_host_buffer; 409 | ctx->base.copy_buffer_to_buffer = cuda_memory_copy_buffer_to_buffer; 410 | ctx->device_id = params->cuda_device_id; 411 | ctx->device_bus_id = params->cuda_device_bus_id; 412 | ctx->use_dmabuf = params->use_cuda_dmabuf; 413 | ctx->use_pcie_mapping = params->use_cuda_pcie_mapping; 414 | ctx->gpu_touch = params->gpu_touch; 415 | ctx->stop_touch_gpu_kernel_flag = NULL; 416 | ctx->mem_type = params->cuda_mem_type; 417 | 418 | return &ctx->base; 419 | } 420 | -------------------------------------------------------------------------------- /src/cuda_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef CUDA_MEMORY_H 7 | #define CUDA_MEMORY_H 8 | 9 | #include "memory.h" 10 | #include "config.h" 11 | 12 | 13 | struct perftest_parameters; 14 | 15 | bool cuda_memory_supported(); 16 | 17 | bool cuda_memory_dmabuf_supported(); 18 | 19 | bool data_direct_supported(); 20 | 21 | bool cuda_gpu_touch_supported(); 22 | 23 | 24 | struct memory_ctx *cuda_memory_create(struct perftest_parameters *params); 25 | 26 | 27 | #ifndef HAVE_CUDA 28 | 29 | inline bool cuda_memory_supported() { 30 | return false; 31 | } 32 | 33 | inline bool cuda_memory_dmabuf_supported() { 34 | return false; 35 | } 36 | 37 | inline bool data_direct_supported() { 38 | return false; 39 | } 40 | 41 | inline struct memory_ctx *cuda_memory_create(struct perftest_parameters *params) { 42 | return NULL; 43 | } 44 | 45 | inline bool cuda_gpu_touch_supported() { 46 | return false; 47 | } 48 | 49 | #endif 50 | 51 | #endif /* CUDA_MEMORY_H */ 52 | -------------------------------------------------------------------------------- /src/cuda_utils.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda.h" 4 | 5 | #include 6 | #define GPU_TOUCH_STEP 4096 7 | 8 | __global__ void cuda_touch_pages(volatile uint8_t *c, int size, 9 | volatile int *stop_flag, int is_infinite) { 10 | do { 11 | for (int iter = 0; iter < size; iter += GPU_TOUCH_STEP) 12 | c[iter] = 0; 13 | } 14 | while (is_infinite && !*stop_flag); 15 | } 16 | 17 | extern "C" int touch_gpu_pages(uint8_t *addr, int buf_size, 18 | int is_infinite, volatile int **stop_flag) 19 | { 20 | cuda_touch_pages<<<1, 1>>>(addr, buf_size, *stop_flag, is_infinite); 21 | return 0; 22 | } 23 | 24 | extern "C" int init_gpu_stop_flag(volatile int **stop_flag) 25 | { 26 | cudaError_t ret = cudaMallocManaged((void **)stop_flag, sizeof(int)); 27 | if (ret) { 28 | printf("failed to allocate stop flag\n"); 29 | return -1; 30 | } 31 | **stop_flag = 0; 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /src/get_clock.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 3 | * 4 | * This software is available to you under a choice of one of two 5 | * licenses. You may choose to be licensed under the terms of the GNU 6 | * General Public License (GPL) Version 2, available from the file 7 | * COPYING in the main directory of this source tree, or the 8 | * OpenIB.org BSD license below: 9 | * 10 | * Redistribution and use in source and binary forms, with or 11 | * without modification, are permitted provided that the following 12 | * conditions are met: 13 | * 14 | * - Redistributions of source code must retain the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer. 17 | * 18 | * - Redistributions in binary form must reproduce the above 19 | * copyright notice, this list of conditions and the following 20 | * disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | * SOFTWARE. 31 | * 32 | * $Id$ 33 | * 34 | * Author: Michael S. Tsirkin 35 | */ 36 | 37 | /* #define DEBUG 1 */ 38 | /* #define DEBUG_DATA 1 */ 39 | /* #define GET_CPU_MHZ_FROM_PROC 1 */ 40 | 41 | /* For gettimeofday */ 42 | #define _DEFAULT_SOURCE 43 | #include 44 | 45 | #include 46 | #include 47 | #include 48 | #include "get_clock.h" 49 | 50 | #ifndef DEBUG 51 | #define DEBUG 0 52 | #endif 53 | #ifndef DEBUG_DATA 54 | #define DEBUG_DATA 0 55 | #endif 56 | 57 | #define MEASUREMENTS 200 58 | #define USECSTEP 10 59 | #define USECSTART 100 60 | 61 | /* 62 | Use linear regression to calculate cycles per microsecond. 63 | http://en.wikipedia.org/wiki/Linear_regression#Parameter_estimation 64 | */ 65 | static double sample_get_cpu_mhz(void) 66 | { 67 | struct timeval tv1, tv2; 68 | double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0; 69 | int i; 70 | 71 | /* Regression: y = a + b x */ 72 | long x[MEASUREMENTS]; 73 | cycles_t y[MEASUREMENTS]; 74 | double a; /* system call overhead in cycles */ 75 | double b; /* cycles per microsecond */ 76 | double r_2; 77 | 78 | for (i = 0; i < MEASUREMENTS; ++i) { 79 | cycles_t start = get_cycles(); 80 | 81 | if (gettimeofday(&tv1, NULL)) { 82 | fprintf(stderr, "gettimeofday failed.\n"); 83 | return 0; 84 | } 85 | 86 | do { 87 | if (gettimeofday(&tv2, NULL)) { 88 | fprintf(stderr, "gettimeofday failed.\n"); 89 | return 0; 90 | } 91 | } while ((tv2.tv_sec - tv1.tv_sec) * 1000000 + 92 | (tv2.tv_usec - tv1.tv_usec) < USECSTART + i * USECSTEP); 93 | 94 | x[i] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + 95 | tv2.tv_usec - tv1.tv_usec; 96 | y[i] = get_cycles() - start; 97 | if (DEBUG_DATA) 98 | fprintf(stderr, "x=%ld y=%Ld\n", x[i], (long long)y[i]); 99 | } 100 | 101 | for (i = 0; i < MEASUREMENTS; ++i) { 102 | double tx = x[i]; 103 | double ty = y[i]; 104 | sx += tx; 105 | sy += ty; 106 | sxx += tx * tx; 107 | syy += ty * ty; 108 | sxy += tx * ty; 109 | } 110 | 111 | b = (MEASUREMENTS * sxy - sx * sy) / (MEASUREMENTS * sxx - sx * sx); 112 | a = (sy - b * sx) / MEASUREMENTS; 113 | 114 | if (DEBUG) 115 | fprintf(stderr, "a = %g\n", a); 116 | if (DEBUG) 117 | fprintf(stderr, "b = %g\n", b); 118 | if (DEBUG) 119 | fprintf(stderr, "a / b = %g\n", a / b); 120 | r_2 = (MEASUREMENTS * sxy - sx * sy) * (MEASUREMENTS * sxy - sx * sy) / 121 | (MEASUREMENTS * sxx - sx * sx) / 122 | (MEASUREMENTS * syy - sy * sy); 123 | 124 | if (DEBUG) 125 | fprintf(stderr, "r^2 = %g\n", r_2); 126 | if (r_2 < 0.9) { 127 | fprintf(stderr,"Correlation coefficient r^2: %g < 0.9\n", r_2); 128 | return 0; 129 | } 130 | 131 | return b; 132 | } 133 | 134 | #if !defined(__s390x__) && !defined(__s390__) 135 | static double proc_get_cpu_mhz(int no_cpu_freq_warn) 136 | { 137 | FILE* f; 138 | char buf[256]; 139 | double mhz = 0.0; 140 | int print_flag = 0; 141 | double delta; 142 | 143 | #if defined(__FreeBSD__) 144 | f = popen("/sbin/sysctl hw.clockrate","r"); 145 | #else 146 | f = fopen("/proc/cpuinfo","r"); 147 | #endif 148 | 149 | if (!f) 150 | return 0.0; 151 | while(fgets(buf, sizeof(buf), f)) { 152 | double m; 153 | int rc; 154 | 155 | #if defined (__ia64__) 156 | /* Use the ITC frequency on IA64 */ 157 | rc = sscanf(buf, "itc MHz : %lf", &m); 158 | #elif defined (__loongarch__) 159 | /* Use upper case cpu on LoongArch */ 160 | rc = sscanf(buf, "CPU MHz : %lf", &m); 161 | #elif defined (__PPC__) || defined (__PPC64__) 162 | /* PPC has a different format as well */ 163 | rc = sscanf(buf, "clock : %lf", &m); 164 | #elif defined (__sparc__) && defined (__arch64__) 165 | /* 166 | * on sparc the /proc/cpuinfo lines that hold 167 | * the cpu freq in HZ are as follow: 168 | * Cpu{cpu-num}ClkTck : 00000000a9beeee4 169 | */ 170 | char *s; 171 | 172 | s = strstr(buf, "ClkTck\t: "); 173 | if (!s) 174 | continue; 175 | s += (strlen("ClkTck\t: ") - strlen("0x")); 176 | strncpy(s, "0x", strlen("0x")); 177 | rc = sscanf(s, "%lf", &m); 178 | m /= 1000000; 179 | #else 180 | #if defined (__FreeBSD__) 181 | rc = sscanf(buf, "hw.clockrate: %lf", &m); 182 | #else 183 | rc = sscanf(buf, "cpu MHz : %lf", &m); 184 | #endif 185 | #endif 186 | 187 | if (rc != 1) 188 | continue; 189 | 190 | if (mhz == 0.0) { 191 | mhz = m; 192 | continue; 193 | } 194 | delta = mhz > m ? mhz - m : m - mhz; 195 | if ((delta / mhz > 0.02) && (print_flag ==0)) { 196 | print_flag = 1; 197 | if (!no_cpu_freq_warn) { 198 | fprintf(stderr, "Conflicting CPU frequency values" 199 | " detected: %lf != %lf. CPU Frequency is not max.\n", mhz, m); 200 | } 201 | continue; 202 | } 203 | } 204 | 205 | #if defined(__FreeBSD__) 206 | pclose(f); 207 | #else 208 | fclose(f); 209 | #endif 210 | return mhz; 211 | } 212 | #endif 213 | 214 | double get_cpu_mhz(int no_cpu_freq_warn) 215 | { 216 | #if defined(__s390x__) || defined(__s390__) 217 | return sample_get_cpu_mhz(); 218 | #else 219 | double sample, proc, delta; 220 | sample = sample_get_cpu_mhz(); 221 | proc = proc_get_cpu_mhz(no_cpu_freq_warn); 222 | #ifdef __aarch64__ 223 | if (proc < 1) 224 | proc = sample; 225 | #endif 226 | #ifdef __riscv 227 | if (proc <= 0) 228 | proc = sample; 229 | #endif 230 | 231 | if (!proc || !sample) 232 | return 0; 233 | 234 | delta = proc > sample ? proc - sample : sample - proc; 235 | if (delta / proc > 0.02) { 236 | return sample; 237 | } 238 | return proc; 239 | #endif 240 | } 241 | 242 | #if defined(__riscv) 243 | #include 244 | #include 245 | #include 246 | #include 247 | #include 248 | #include 249 | #include 250 | 251 | static long perf_event_open(struct perf_event_attr *hw_event, 252 | pid_t pid, int cpu, int group_fd, 253 | unsigned long flags) 254 | { 255 | return syscall(__NR_perf_event_open, hw_event, pid, 256 | cpu, group_fd, flags); 257 | } 258 | 259 | cycles_t perf_get_cycles() 260 | { 261 | cycles_t cycles = 0; 262 | struct perf_event_attr pe; 263 | const pid_t pid = 0; // Current task 264 | const int cpu = -1; // On any CPU 265 | const int group_fd = -1; // Use leader group 266 | const unsigned long flags = 0; 267 | /* Use this variable just to open perf event here and once. 268 | It is appropriate because it touches only this function and 269 | not fix other code */ 270 | static int is_open = 0; 271 | /* Make file discriptor static just to keep it valid during 272 | programm execution. It will be closed automatically when 273 | test finishes. It is a hack just not to fix other part of test */ 274 | static int fd = -1; 275 | 276 | if (!is_open) { 277 | memset(&pe, 0, sizeof(pe)); 278 | 279 | pe.type = PERF_TYPE_HARDWARE; 280 | pe.size = sizeof(pe); 281 | pe.config = PERF_COUNT_HW_CPU_CYCLES; 282 | pe.disabled = 0; 283 | pe.exclude_kernel = 0; 284 | pe.exclude_hv = 0; 285 | 286 | fd = perf_event_open(&pe, pid, cpu, group_fd, flags); 287 | if (fd == -1) { 288 | fprintf(stderr, "Error opening perf event (%llx)\n", pe.config); 289 | exit(EXIT_FAILURE); 290 | } 291 | 292 | is_open = 1; 293 | } 294 | 295 | if(read(fd, &cycles, sizeof(cycles)) < 0) { 296 | fprintf(stderr, "Error reading perf event (%llx)\n", pe.config); 297 | exit(EXIT_FAILURE); 298 | } 299 | 300 | return cycles; 301 | } 302 | #endif 303 | -------------------------------------------------------------------------------- /src/get_clock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 3 | * 4 | * This software is available to you under a choice of one of two 5 | * licenses. You may choose to be licensed under the terms of the GNU 6 | * General Public License (GPL) Version 2, available from the file 7 | * COPYING in the main directory of this source tree, or the 8 | * OpenIB.org BSD license below: 9 | * 10 | * Redistribution and use in source and binary forms, with or 11 | * without modification, are permitted provided that the following 12 | * conditions are met: 13 | * 14 | * - Redistributions of source code must retain the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer. 17 | * 18 | * - Redistributions in binary form must reproduce the above 19 | * copyright notice, this list of conditions and the following 20 | * disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | * SOFTWARE. 31 | * 32 | * $Id$ 33 | * 34 | * Author: Michael S. Tsirkin 35 | */ 36 | 37 | #ifndef GET_CLOCK_H 38 | #define GET_CLOCK_H 39 | 40 | #if defined (__x86_64__) || defined(__i386__) 41 | /* Note: only x86 CPUs which have rdtsc instruction are supported. */ 42 | typedef unsigned long long cycles_t; 43 | static inline cycles_t get_cycles() 44 | { 45 | unsigned low, high; 46 | unsigned long long val; 47 | asm volatile ("rdtsc" : "=a" (low), "=d" (high)); 48 | val = high; 49 | val = (val << 32) | low; 50 | return val; 51 | } 52 | #elif defined(__PPC__) || defined(__PPC64__) 53 | /* Note: only PPC CPUs which have mftb instruction are supported. */ 54 | /* PPC64 has mftb */ 55 | typedef unsigned long cycles_t; 56 | static inline cycles_t get_cycles() 57 | { 58 | cycles_t ret; 59 | 60 | __asm__ __volatile__ ("\n\t isync" "\n\t mftb %0" : "=r"(ret)); 61 | return ret; 62 | } 63 | #elif defined(__ia64__) 64 | /* Itanium2 and up has ar.itc (Itanium1 has errata) */ 65 | typedef unsigned long cycles_t; 66 | static inline cycles_t get_cycles() 67 | { 68 | cycles_t ret; 69 | 70 | asm volatile ("mov %0=ar.itc" : "=r" (ret)); 71 | return ret; 72 | } 73 | #elif defined(__ARM_ARCH_7A__) 74 | typedef unsigned long long cycles_t; 75 | static inline cycles_t get_cycles(void) 76 | { 77 | cycles_t clk; 78 | asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (clk)); 79 | return clk; 80 | } 81 | #elif defined(__s390x__) || defined(__s390__) 82 | typedef unsigned long long cycles_t; 83 | static inline cycles_t get_cycles(void) 84 | { 85 | cycles_t clk; 86 | asm volatile("stck %0" : "=Q" (clk) : : "cc"); 87 | return clk >> 2; 88 | } 89 | #elif defined(__sparc__) && defined(__arch64__) 90 | typedef unsigned long long cycles_t; 91 | static inline cycles_t get_cycles(void) 92 | { 93 | cycles_t v; 94 | asm volatile ("rd %%tick, %0" : "=r" (v) : ); 95 | return v; 96 | } 97 | #elif defined(__aarch64__) 98 | 99 | typedef unsigned long cycles_t; 100 | static inline cycles_t get_cycles() 101 | { 102 | cycles_t cval; 103 | asm volatile("isb" : : : "memory"); 104 | asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); 105 | return cval; 106 | } 107 | #elif defined(__loongarch_lp64) 108 | 109 | typedef unsigned long cycles_t; 110 | 111 | static inline cycles_t get_cycles() 112 | { 113 | cycles_t cval; 114 | __asm__ __volatile__("rdtime.d %0, $zero" : "=r"(cval)); 115 | return cval; 116 | } 117 | #elif defined(__riscv) 118 | typedef unsigned long cycles_t; 119 | 120 | cycles_t perf_get_cycles(); 121 | 122 | static inline cycles_t get_cycles() 123 | { 124 | return perf_get_cycles(); 125 | } 126 | 127 | #elif defined(__hppa__) 128 | typedef unsigned long long cycles_t; 129 | static inline cycles_t get_cycles(void) 130 | { 131 | cycles_t clk; 132 | asm volatile("mfctl %%cr16, %0" : "=r" (clk)); 133 | return clk; 134 | } 135 | #else 136 | #warning get_cycles not implemented for this architecture: attempt asm/timex.h 137 | #include 138 | #endif 139 | 140 | extern double get_cpu_mhz(int); 141 | 142 | #endif 143 | -------------------------------------------------------------------------------- /src/hl_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "hl_memory.h" 12 | #include "perftest_parameters.h" 13 | #include "hlthunk.h" 14 | 15 | #define ACCEL_PAGE_SIZE (4096) 16 | #define INVALID_FD (-1) 17 | 18 | #define LIST_SIZE (64) 19 | 20 | struct KeyValuePair { 21 | uint64_t key; 22 | uint64_t value; 23 | bool is_occupied; 24 | }; 25 | 26 | struct hl_memory_ctx { 27 | struct memory_ctx base; 28 | char *device_bus_id; 29 | int device_fd; 30 | struct KeyValuePair mem_handle_table[LIST_SIZE]; 31 | pthread_mutex_t mem_handle_table_lock; 32 | }; 33 | 34 | static int hl_set_memory_handle(struct hl_memory_ctx *const hl_ctx, const uint64_t addr, const uint64_t memory_handle) 35 | { 36 | size_t i = 0; 37 | 38 | for (i = 0; i < LIST_SIZE; i++) { 39 | if (hl_ctx->mem_handle_table[i].is_occupied) 40 | continue; 41 | hl_ctx->mem_handle_table[i].key = addr; 42 | hl_ctx->mem_handle_table[i].value = memory_handle; 43 | hl_ctx->mem_handle_table[i].is_occupied = true; 44 | return SUCCESS; // successfully inserted 45 | } 46 | return FAILURE; // list is full 47 | } 48 | 49 | static int 50 | hl_delete_memory_handle(struct hl_memory_ctx *const hl_ctx, const uint64_t addr, uint64_t *const memory_handle) { 51 | size_t i = 0; 52 | 53 | for (i = 0; i < LIST_SIZE; i++) { 54 | if (hl_ctx->mem_handle_table[i].is_occupied && hl_ctx->mem_handle_table[i].key == addr) { 55 | hl_ctx->mem_handle_table[i].is_occupied = false; 56 | *memory_handle = hl_ctx->mem_handle_table[i].value; 57 | return SUCCESS; // key removed 58 | } 59 | } 60 | return FAILURE; // key not found 61 | } 62 | 63 | int hl_memory_init(struct memory_ctx *ctx) 64 | { 65 | struct hl_memory_ctx *const hl_ctx = container_of(ctx, struct hl_memory_ctx, base); 66 | 67 | hl_ctx->device_fd = hlthunk_open(HLTHUNK_DEVICE_DONT_CARE, hl_ctx->device_bus_id); 68 | if (hl_ctx->device_fd < 0) 69 | return FAILURE; 70 | 71 | memset(hl_ctx->mem_handle_table, 0, sizeof(hl_ctx->mem_handle_table)); 72 | 73 | if (pthread_mutex_init(&hl_ctx->mem_handle_table_lock, NULL) != 0) { 74 | (void) hlthunk_close(hl_ctx->device_fd); 75 | return FAILURE; 76 | } 77 | 78 | return SUCCESS; 79 | } 80 | 81 | int hl_memory_destroy(struct memory_ctx *ctx) 82 | { 83 | struct hl_memory_ctx *const hl_ctx = container_of(ctx, struct hl_memory_ctx, base); 84 | 85 | (void) pthread_mutex_destroy(&hl_ctx->mem_handle_table_lock); 86 | (void) hlthunk_close(hl_ctx->device_fd); 87 | 88 | free(hl_ctx); 89 | return SUCCESS; 90 | } 91 | 92 | int hl_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 93 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 94 | struct hl_memory_ctx *const hl_ctx = container_of(ctx, struct hl_memory_ctx, base); 95 | const uint64_t page_size = 0; 96 | const uint64_t NO_OFFSET = 0; 97 | const bool NOT_SHARED = false; 98 | int fd = INVALID_FD; 99 | uint64_t buffer_addr = 0; 100 | const size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1); 101 | 102 | const uint64_t memory_handle = hlthunk_device_memory_alloc(hl_ctx->device_fd, buf_size, page_size, 103 | HL_MEM_CONTIGUOUS, NOT_SHARED); 104 | if (memory_handle == 0) { 105 | fprintf(stderr, "Failed to allocate %lu bytes of device memory\n", (unsigned long) buf_size); 106 | return FAILURE; 107 | } 108 | buffer_addr = hlthunk_device_memory_map(hl_ctx->device_fd, memory_handle, 0); 109 | if (buffer_addr == 0) { 110 | fprintf(stderr, "Failed to map device memory allocation\n"); 111 | return FAILURE; 112 | } 113 | if (pthread_mutex_lock(&hl_ctx->mem_handle_table_lock) != 0) { 114 | fprintf(stderr, "Failed to lock mutex while allocating memory\n"); 115 | return FAILURE; 116 | } 117 | if (hl_set_memory_handle(hl_ctx, buffer_addr, memory_handle) != SUCCESS) { 118 | (void) pthread_mutex_unlock(&hl_ctx->mem_handle_table_lock); 119 | return FAILURE; 120 | } 121 | if (pthread_mutex_unlock(&hl_ctx->mem_handle_table_lock) != 0) { 122 | fprintf(stderr, "Failed to unlock mutex\n"); 123 | return FAILURE; 124 | } 125 | 126 | fd = hlthunk_device_mapped_memory_export_dmabuf_fd(hl_ctx->device_fd, buffer_addr, buf_size, NO_OFFSET, 127 | O_RDWR | O_CLOEXEC); 128 | if (fd < 0) { 129 | fprintf(stderr, "Failed to export dmabuf. sz[%lu] ptr[%p] err[%d]\n", 130 | (unsigned long) buf_size, (void *) buffer_addr, fd); 131 | return FAILURE; 132 | } 133 | 134 | fprintf(stderr, "Allocated %lu bytes of accelerator buffer at %p on fd %d\n", 135 | (unsigned long) buf_size, (void *) buffer_addr, fd); 136 | *dmabuf_fd = fd; 137 | *dmabuf_offset = NO_OFFSET; 138 | *addr = (void *) buffer_addr; 139 | *can_init = false; 140 | return SUCCESS; 141 | } 142 | 143 | int hl_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) 144 | { 145 | struct hl_memory_ctx *hl_ctx = container_of(ctx, struct hl_memory_ctx, base); 146 | uint64_t memory_handle = INVALID_FD; 147 | int rc = hlthunk_memory_unmap(hl_ctx->device_fd, (uint64_t) addr); 148 | 149 | if (rc) { 150 | fprintf(stderr, "Failed to unmap host memory\n"); 151 | return rc; 152 | } 153 | if (pthread_mutex_lock(&hl_ctx->mem_handle_table_lock) != 0) { 154 | fprintf(stderr, "Failed to lock mutex while deallocating memory\n"); 155 | return FAILURE; 156 | } 157 | if (hl_delete_memory_handle(hl_ctx, (uint64_t) addr, &memory_handle) != SUCCESS) { 158 | fprintf(stderr, "Failed to remove memory handle\n"); 159 | (void) pthread_mutex_unlock(&hl_ctx->mem_handle_table_lock); 160 | return FAILURE; 161 | } 162 | 163 | rc = hlthunk_device_memory_free(hl_ctx->device_fd, memory_handle); 164 | pthread_mutex_unlock(&hl_ctx->mem_handle_table_lock); 165 | return (rc == 0) ? SUCCESS : FAILURE; 166 | } 167 | 168 | bool hl_memory_supported(void) 169 | { 170 | return true; 171 | } 172 | 173 | struct memory_ctx *hl_memory_create(struct perftest_parameters *params) 174 | { 175 | struct hl_memory_ctx *ctx; 176 | 177 | ALLOCATE(ctx, struct hl_memory_ctx, 1); 178 | ctx->base.init = hl_memory_init; 179 | ctx->base.destroy = hl_memory_destroy; 180 | ctx->base.allocate_buffer = hl_memory_allocate_buffer; 181 | ctx->base.free_buffer = hl_memory_free_buffer; 182 | ctx->base.copy_host_to_buffer = memcpy; 183 | ctx->base.copy_buffer_to_host = memcpy; 184 | ctx->base.copy_buffer_to_buffer = memcpy; 185 | ctx->device_bus_id = params->hl_device_bus_id; 186 | return &ctx->base; 187 | } 188 | -------------------------------------------------------------------------------- /src/hl_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef HL_MEMORY_H 7 | #define HL_MEMORY_H 8 | 9 | #include "memory.h" 10 | #include "config.h" 11 | 12 | 13 | struct perftest_parameters; 14 | 15 | bool hl_memory_supported(); 16 | 17 | struct memory_ctx *hl_memory_create(struct perftest_parameters *params); 18 | 19 | 20 | #ifndef HAVE_HL 21 | 22 | inline bool hl_memory_supported() { 23 | return false; 24 | } 25 | 26 | inline struct memory_ctx *hl_memory_create(struct perftest_parameters *params) { 27 | return NULL; 28 | } 29 | 30 | #endif 31 | 32 | #endif /* HL_MEMORY_H */ 33 | -------------------------------------------------------------------------------- /src/host_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "host_memory.h" 12 | #include "perftest_parameters.h" 13 | 14 | 15 | struct host_memory_ctx { 16 | struct memory_ctx base; 17 | int use_hugepages; 18 | }; 19 | 20 | 21 | #define HUGEPAGE_ALIGN (2*1024*1024) 22 | #define SHMAT_ADDR (void *)(0x0UL) 23 | #define SHMAT_FLAGS (0) 24 | #define SHMAT_INVALID_PTR ((void *)-1) 25 | 26 | #if !defined(__FreeBSD__) 27 | int alloc_hugepage_region(int alignment, uint64_t size, void **addr) 28 | { 29 | int huge_shmid; 30 | uint64_t buf_size; 31 | uint64_t buf_alignment = (((alignment + HUGEPAGE_ALIGN -1) / HUGEPAGE_ALIGN) * HUGEPAGE_ALIGN); 32 | buf_size = (((size + buf_alignment -1 ) / buf_alignment ) * buf_alignment); 33 | 34 | /* create hugepage shared region */ 35 | huge_shmid = shmget(IPC_PRIVATE, buf_size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); 36 | if (huge_shmid < 0) { 37 | fprintf(stderr, "Failed to allocate hugepages. Please configure hugepages\n"); 38 | return FAILURE; 39 | } 40 | 41 | /* attach shared memory */ 42 | *addr = (void *)shmat(huge_shmid, SHMAT_ADDR, SHMAT_FLAGS); 43 | if (*addr == SHMAT_INVALID_PTR) { 44 | fprintf(stderr, "Failed to attach shared memory region\n"); 45 | return FAILURE; 46 | } 47 | 48 | /* Mark shmem for removal */ 49 | if (shmctl(huge_shmid, IPC_RMID, 0) != 0) { 50 | fprintf(stderr, "Failed to mark shm for removal\n"); 51 | return FAILURE; 52 | } 53 | 54 | return SUCCESS; 55 | } 56 | #endif 57 | 58 | int host_memory_init(struct memory_ctx *ctx) { 59 | return SUCCESS; 60 | } 61 | 62 | int host_memory_destroy(struct memory_ctx *ctx) { 63 | struct host_memory_ctx *host_ctx = container_of(ctx, struct host_memory_ctx, base); 64 | 65 | free(host_ctx); 66 | return SUCCESS; 67 | } 68 | 69 | int host_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 70 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 71 | #if defined(__FreeBSD__) 72 | posix_memalign(addr, alignment, size); 73 | #else 74 | struct host_memory_ctx *host_ctx = container_of(ctx, struct host_memory_ctx, base); 75 | if (host_ctx->use_hugepages) { 76 | if (alloc_hugepage_region(alignment, size, addr) != 0){ 77 | fprintf(stderr, "Failed to allocate hugepage region.\n"); 78 | return FAILURE; 79 | } 80 | } else { 81 | *addr = memalign(alignment, size); 82 | } 83 | #endif 84 | if (!*addr) { 85 | fprintf(stderr, "Couldn't allocate work buf.\n"); 86 | return FAILURE; 87 | } 88 | 89 | memset(*addr, 0, size); 90 | *can_init = true; 91 | return SUCCESS; 92 | } 93 | 94 | int host_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) { 95 | struct host_memory_ctx *host_ctx = container_of(ctx, struct host_memory_ctx, base); 96 | 97 | if (host_ctx->use_hugepages) { 98 | shmdt(addr); 99 | } else { 100 | free(addr); 101 | } 102 | return SUCCESS; 103 | } 104 | 105 | struct memory_ctx *host_memory_create(struct perftest_parameters *params) { 106 | struct host_memory_ctx *ctx; 107 | 108 | ALLOCATE(ctx, struct host_memory_ctx, 1); 109 | ctx->base.init = host_memory_init; 110 | ctx->base.destroy = host_memory_destroy; 111 | ctx->base.allocate_buffer = host_memory_allocate_buffer; 112 | ctx->base.free_buffer = host_memory_free_buffer; 113 | ctx->base.copy_host_to_buffer = memcpy; 114 | ctx->base.copy_buffer_to_host = memcpy; 115 | ctx->base.copy_buffer_to_buffer = memcpy; 116 | ctx->use_hugepages = params->use_hugepages; 117 | return &ctx->base; 118 | } 119 | -------------------------------------------------------------------------------- /src/host_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef HOST_MEMORY_H 7 | #define HOST_MEMORY_H 8 | 9 | #include 10 | #include 11 | #include "memory.h" 12 | 13 | 14 | struct perftest_parameters; 15 | 16 | struct memory_ctx *host_memory_create(struct perftest_parameters *params); 17 | 18 | #endif /* HOST_MEMORY_H */ 19 | -------------------------------------------------------------------------------- /src/memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef MEMORY_H 7 | #define MEMORY_H 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | /* Get pointer to containing type object by a pointer to its member field */ 14 | #define container_of(ptr, type, member) ({ \ 15 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 16 | (type *)( (char *)__mptr - offsetof(type,member) );}) 17 | 18 | 19 | /* Base context for memory management to be extended by concrete implementations */ 20 | struct memory_ctx { 21 | int (*init)(struct memory_ctx *ctx); 22 | int (*destroy)(struct memory_ctx *ctx); 23 | int (*allocate_buffer)(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 24 | uint64_t *dmabuf_offset, void **addr, bool *can_init); 25 | int (*free_buffer)(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size); 26 | void *(*copy_host_to_buffer)(void *dest, const void *src, size_t size); 27 | void *(*copy_buffer_to_host)(void *dest, const void *src, size_t size); 28 | void *(*copy_buffer_to_buffer)(void *dest, const void *src, size_t size); 29 | }; 30 | 31 | #endif /* MEMORY_H */ 32 | -------------------------------------------------------------------------------- /src/mlu_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "mlu_memory.h" 11 | #include "perftest_parameters.h" 12 | 13 | static inline const char *getErrorName(CNresult error) 14 | { 15 | const char *str; 16 | cnGetErrorName(error, &str); 17 | return str; 18 | } 19 | 20 | static inline const char *getErrorString(CNresult error) 21 | { 22 | const char *str; 23 | cnGetErrorString(error, &str); 24 | return str; 25 | } 26 | 27 | #define ERROR_CHECK(ret) \ 28 | do { \ 29 | CNresult r__ = (ret); \ 30 | if (r__ != CN_SUCCESS) { \ 31 | printf( \ 32 | "error occur, func: %s, line: %d, ret:%d, cn_error_code:%s, cn_error_string:%s\n", \ 33 | __func__, __LINE__, r__, getErrorName(r__), getErrorString(r__)); \ 34 | exit(0); \ 35 | } \ 36 | } while (0) 37 | 38 | #define ACCEL_PAGE_SIZE (64 * 1024) 39 | 40 | 41 | struct mlu_memory_ctx { 42 | struct memory_ctx base; 43 | int device_id; 44 | CNdev cnDevice; 45 | CNcontext cnContext; 46 | 47 | }; 48 | 49 | 50 | static int init_mlu(struct mlu_memory_ctx *ctx) 51 | { 52 | int mlu_device_id = ctx->device_id; 53 | int mlu_pci_bus_id; 54 | int mlu_pci_device_id; 55 | int index; 56 | CNdev cn_device; 57 | 58 | printf("initializing MLU\n"); 59 | CNresult error = cnInit(0); 60 | if (error != CN_SUCCESS) { 61 | printf("cnInit(0) returned %d\n", error); 62 | return FAILURE; 63 | } 64 | 65 | int deviceCount = 0; 66 | error = cnDeviceGetCount(&deviceCount); 67 | if (error != CN_SUCCESS) { 68 | printf("cnDeviceGetCount() returned %d\n", error); 69 | return FAILURE; 70 | } 71 | 72 | if (deviceCount == 0) { 73 | printf("There are no available device(s) that support MLU\n"); 74 | return FAILURE; 75 | } 76 | if (mlu_device_id >= deviceCount) { 77 | fprintf(stderr, "No such device ID (%d) exists in system\n", mlu_device_id); 78 | return FAILURE; 79 | } 80 | 81 | printf("Listing all MLU devices in system:\n"); 82 | for (index = 0; index < deviceCount; index++) { 83 | ERROR_CHECK(cnDeviceGet(&cn_device, index)); 84 | cnDeviceGetAttribute(&mlu_pci_bus_id, CN_DEVICE_ATTRIBUTE_PCI_BUS_ID , cn_device); 85 | cnDeviceGetAttribute(&mlu_pci_device_id, CN_DEVICE_ATTRIBUTE_PCI_DEVICE_ID , cn_device); 86 | printf("MLU device %d: PCIe address is %02X:%02X\n", index, (unsigned int)mlu_pci_bus_id, (unsigned int)mlu_pci_device_id); 87 | } 88 | 89 | printf("\nPicking device No. %d\n", mlu_device_id); 90 | 91 | ERROR_CHECK(cnDeviceGet(&ctx->cnDevice, mlu_device_id)); 92 | 93 | char name[128]; 94 | ERROR_CHECK(cnDeviceGetName(name, sizeof(name), mlu_device_id)); 95 | printf("[pid = %d, dev = %ld] device name = [%s]\n", getpid(), ctx->cnDevice, name); 96 | printf("creating MLU Ctx\n"); 97 | 98 | /* Create context */ 99 | error = cnCtxCreate(&ctx->cnContext, 0, ctx->cnDevice); 100 | if (error != CN_SUCCESS) { 101 | printf("cnCtxCreate() error=%d\n", error); 102 | return FAILURE; 103 | } 104 | 105 | printf("making it the current MLU Ctx\n"); 106 | error = cnCtxSetCurrent(ctx->cnContext); 107 | if (error != CN_SUCCESS) { 108 | printf("cnCtxSetCurrent() error=%d\n", error); 109 | return FAILURE; 110 | } 111 | 112 | return SUCCESS; 113 | } 114 | 115 | static void free_mlu(struct mlu_memory_ctx *ctx) 116 | { 117 | printf("destroying current MLU Ctx\n"); 118 | ERROR_CHECK(cnCtxDestroy(ctx->cnContext)); 119 | } 120 | 121 | int mlu_memory_init(struct memory_ctx *ctx) { 122 | struct mlu_memory_ctx *mlu_ctx = container_of(ctx, struct mlu_memory_ctx, base); 123 | int return_value = 0; 124 | 125 | return_value = init_mlu(mlu_ctx); 126 | 127 | if (return_value) { 128 | fprintf(stderr, "Couldn't initialize mlu device : %d\n", return_value); 129 | return FAILURE; 130 | } 131 | 132 | return SUCCESS; 133 | } 134 | 135 | int mlu_memory_destroy(struct memory_ctx *ctx) { 136 | struct mlu_memory_ctx *mlu_ctx = container_of(ctx, struct mlu_memory_ctx, base); 137 | 138 | free_mlu(mlu_ctx); 139 | free(mlu_ctx); 140 | return SUCCESS; 141 | } 142 | 143 | int mlu_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 144 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 145 | CNresult error; 146 | size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1); 147 | 148 | CNaddr mlu_addr; 149 | printf("cnMalloc() of a %lu bytes MLU buffer\n", size); 150 | 151 | error = cnMalloc(&mlu_addr, buf_size); 152 | if (error != CN_SUCCESS) { 153 | printf("cnMalloc error=%d\n", error); 154 | return FAILURE; 155 | } 156 | 157 | printf("allocated %lu bytes of MLU buffer at %ld\n", (unsigned long)buf_size, mlu_addr); 158 | *addr = (void *)mlu_addr; 159 | *can_init = false; 160 | return SUCCESS; 161 | } 162 | 163 | int mlu_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) 164 | { 165 | CNaddr mlu_addr = (CNaddr)addr; 166 | printf("deallocating MLU buffer %016lx\n", mlu_addr); 167 | cnFree(mlu_addr); 168 | return SUCCESS; 169 | } 170 | 171 | void *mlu_memory_copy_host_buffer(void *dest, const void *src, size_t size) { 172 | cnMemcpy((CNaddr) dest, (CNaddr) src, size); 173 | return dest; 174 | } 175 | 176 | void *mlu_memory_copy_buffer_to_buffer(void *dest, const void *src, size_t size) { 177 | cnMemcpyDtoD((CNaddr) dest, (CNaddr) src, size); 178 | return dest; 179 | } 180 | 181 | bool mlu_memory_supported() { 182 | return true; 183 | } 184 | 185 | struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) { 186 | struct mlu_memory_ctx *ctx; 187 | 188 | ALLOCATE(ctx, struct mlu_memory_ctx, 1); 189 | ctx->base.init = mlu_memory_init; 190 | ctx->base.destroy = mlu_memory_destroy; 191 | ctx->base.allocate_buffer = mlu_memory_allocate_buffer; 192 | ctx->base.free_buffer = mlu_memory_free_buffer; 193 | ctx->base.copy_host_to_buffer = mlu_memory_copy_host_buffer; 194 | ctx->base.copy_buffer_to_host = mlu_memory_copy_host_buffer; 195 | ctx->base.copy_buffer_to_buffer = mlu_memory_copy_buffer_to_buffer; 196 | ctx->device_id = params->mlu_device_id; 197 | 198 | return &ctx->base; 199 | } 200 | -------------------------------------------------------------------------------- /src/mlu_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef MLU_MEMORY_H 7 | #define MLU_MEMORY_H 8 | 9 | #include 10 | #include 11 | #include "memory.h" 12 | #include "config.h" 13 | 14 | 15 | struct perftest_parameters; 16 | 17 | bool mlu_memory_supported(); 18 | 19 | struct memory_ctx *mlu_memory_create(struct perftest_parameters *params); 20 | 21 | 22 | #ifndef HAVE_MLU 23 | 24 | inline bool mlu_memory_supported() { 25 | return false; 26 | } 27 | 28 | inline struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) { 29 | return NULL; 30 | } 31 | 32 | #endif 33 | 34 | #endif /* MLU_MEMORY_H */ 35 | -------------------------------------------------------------------------------- /src/mmap_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "mmap_memory.h" 12 | #include "perftest_parameters.h" 13 | 14 | 15 | struct mmap_memory_ctx { 16 | struct memory_ctx base; 17 | char *file; 18 | unsigned long offset; 19 | }; 20 | 21 | 22 | static int init_mmap(void **addr, size_t size, const char *fname, unsigned long offset) { 23 | int fd = open(fname, O_RDWR); 24 | 25 | if (fd < 0) { 26 | printf("Unable to open '%s': %s\n", fname, strerror(errno)); 27 | return FAILURE; 28 | } 29 | 30 | *addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, offset); 31 | close(fd); 32 | 33 | if (*addr == MAP_FAILED) { 34 | printf("Unable to mmap '%s': %s\n", fname, strerror(errno)); 35 | return FAILURE; 36 | } 37 | 38 | printf("allocated mmap buffer of size %zu at %p\n", size, *addr); 39 | 40 | return SUCCESS; 41 | } 42 | 43 | int mmap_memory_init(struct memory_ctx *ctx) { 44 | return SUCCESS; 45 | } 46 | 47 | int mmap_memory_destroy(struct memory_ctx *ctx) { 48 | struct mmap_memory_ctx *mmap_ctx = container_of(ctx, struct mmap_memory_ctx, base); 49 | 50 | free(mmap_ctx); 51 | return SUCCESS; 52 | } 53 | 54 | int mmap_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 55 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 56 | struct mmap_memory_ctx *mmap_ctx = container_of(ctx, struct mmap_memory_ctx, base); 57 | 58 | if (init_mmap(addr, size, mmap_ctx->file, mmap_ctx->offset)) 59 | { 60 | fprintf(stderr, "Couldn't allocate work buf.\n"); 61 | return FAILURE; 62 | } 63 | *can_init = true; 64 | return SUCCESS; 65 | } 66 | 67 | int mmap_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) { 68 | munmap(addr, size); 69 | return SUCCESS; 70 | } 71 | 72 | struct memory_ctx *mmap_memory_create(struct perftest_parameters *params) { 73 | struct mmap_memory_ctx *ctx; 74 | 75 | ALLOCATE(ctx, struct mmap_memory_ctx, 1); 76 | ctx->base.init = mmap_memory_init; 77 | ctx->base.destroy = mmap_memory_destroy; 78 | ctx->base.allocate_buffer = mmap_memory_allocate_buffer; 79 | ctx->base.free_buffer = mmap_memory_free_buffer; 80 | ctx->base.copy_host_to_buffer = memcpy; 81 | ctx->base.copy_buffer_to_host = memcpy; 82 | ctx->base.copy_buffer_to_buffer = memcpy; 83 | ctx->file = params->mmap_file; 84 | ctx->offset = params->mmap_offset; 85 | return &ctx->base; 86 | } 87 | -------------------------------------------------------------------------------- /src/mmap_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef MMAP_MEMORY_H 7 | #define MMAP_MEMORY_H 8 | 9 | #include "memory.h" 10 | 11 | 12 | struct perftest_parameters; 13 | 14 | struct memory_ctx *mmap_memory_create(struct perftest_parameters *params); 15 | 16 | #endif /* MMAP_MEMORY_H */ 17 | -------------------------------------------------------------------------------- /src/multicast_resources.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #if !defined(__FreeBSD__) 13 | #include 14 | #endif 15 | #include 16 | #include 17 | #if defined(__FreeBSD__) 18 | #include 19 | #else 20 | #include 21 | #endif 22 | #include 23 | #include 24 | #include "multicast_resources.h" 25 | #include "perftest_communication.h" 26 | 27 | /* This is when we get sig handler from the user before we remove the join request. */ 28 | struct mcast_parameters *sighandler_params; 29 | 30 | /****************************************************************************** 31 | * signalCatcher - cacth user signal in order to reregiser the mcast group 32 | ******************************************************************************/ 33 | static void signalCatcher (int sig) 34 | { 35 | if (sig == SIGINT) { 36 | 37 | if (join_multicast_group(SUBN_ADM_METHOD_DELETE,sighandler_params)) 38 | fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n"); 39 | 40 | if (sighandler_params->is_2nd_mgid_used) { 41 | memcpy(sighandler_params->mgid.raw,sighandler_params->base_mgid.raw,16); 42 | if (join_multicast_group(SUBN_ADM_METHOD_DELETE,sighandler_params)) 43 | fprintf(stderr,"Couldn't Unregister the Base Mcast group on the SM\n"); 44 | } 45 | } 46 | exit(1); 47 | } 48 | 49 | /****************************************************************************** 50 | * prepare_mcast_mad 51 | ******************************************************************************/ 52 | static void prepare_mcast_mad(uint8_t method, 53 | struct mcast_parameters *params, 54 | struct sa_mad_packet_t *samad_packet) 55 | { 56 | uint8_t *ptr; 57 | uint64_t comp_mask; 58 | 59 | memset(samad_packet,0,sizeof(*samad_packet)); 60 | 61 | /* prepare the MAD header. according to Table 145 in IB spec 1.2.1 */ 62 | ptr = samad_packet->mad_header_buf; 63 | ptr[0] = 0x01; /* BaseVersion */ 64 | ptr[1] = MANAGMENT_CLASS_SUBN_ADM; /* MgmtClass */ 65 | ptr[2] = 0x02; /* ClassVersion */ 66 | ptr[3] = INSERTF(ptr[3], 0, method, 0, 7); /* Method */ 67 | (*(uint64_t *)(ptr + 8)) = ntoh_64((uint64_t)DEF_TRANS_ID); /* TransactionID */ 68 | (*(uint16_t *)(ptr + 16)) = htons(SUBN_ADM_ATTR_MC_MEMBER_RECORD); /* AttributeID */ 69 | 70 | ptr = samad_packet->SubnetAdminData; 71 | 72 | memcpy(&ptr[0],params->mgid.raw, 16); 73 | memcpy(&ptr[16],params->port_gid.raw, 16); 74 | 75 | (*(uint32_t *)(ptr + 32)) = htonl(DEF_QKEY); 76 | (*(uint16_t *)(ptr + 40)) = params->pkey; 77 | ptr[39] = DEF_TCLASS; 78 | ptr[44] = INSERTF(ptr[44], 4, DEF_SLL, 0, 4); 79 | ptr[44] = INSERTF(ptr[44], 0, DEF_FLOW_LABLE, 16, 4); 80 | ptr[45] = INSERTF(ptr[45], 0, DEF_FLOW_LABLE, 8, 8); 81 | ptr[46] = INSERTF(ptr[46], 0, DEF_FLOW_LABLE, 0, 8); 82 | ptr[48] = INSERTF(ptr[48], 0, MCMEMBER_JOINSTATE_FULL_MEMBER, 0, 4); 83 | 84 | comp_mask = SUBN_ADM_COMPMASK_MGID | SUBN_ADM_COMPMASK_PORT_GID | SUBN_ADM_COMPMASK_Q_KEY | 85 | SUBN_ADM_COMPMASK_P_KEY | SUBN_ADM_COMPMASK_TCLASS | SUBN_ADM_COMPMASK_SL | 86 | SUBN_ADM_COMPMASK_FLOW_LABEL | SUBN_ADM_COMPMASK_JOIN_STATE; 87 | 88 | samad_packet->ComponentMask = ntoh_64(comp_mask); 89 | } 90 | 91 | /****************************************************************************** 92 | * check_mad_status 93 | ******************************************************************************/ 94 | static int check_mad_status(struct sa_mad_packet_t *samad_packet) 95 | { 96 | uint8_t *ptr; 97 | uint32_t user_trans_id; 98 | uint16_t mad_header_status; 99 | 100 | ptr = samad_packet->mad_header_buf; 101 | 102 | /* the upper 32 bits of TransactionID were set by the kernel */ 103 | user_trans_id = ntohl(*(uint32_t *)(ptr + 12)); 104 | 105 | /* check the TransactionID to make sure this is the response */ 106 | /* for the join/leave multicast group request we posted */ 107 | if (user_trans_id != DEF_TRANS_ID) { 108 | fprintf(stderr, "received a mad with TransactionID 0x%x, when expecting 0x%x\n", 109 | (unsigned int)user_trans_id, (unsigned int)DEF_TRANS_ID);; 110 | return 1; 111 | } 112 | 113 | mad_header_status = 0x0; 114 | mad_header_status = INSERTF(mad_header_status, 8, ptr[4], 0, 7); 115 | mad_header_status = INSERTF(mad_header_status, 0, ptr[5], 0, 8); 116 | 117 | if (mad_header_status) { 118 | fprintf(stderr,"received UMAD with an error: 0x%x\n", mad_header_status); 119 | return 1; 120 | } 121 | 122 | return 0; 123 | } 124 | 125 | 126 | /****************************************************************************** 127 | * get_mlid_from_mad 128 | ******************************************************************************/ 129 | static void get_mlid_from_mad(struct sa_mad_packet_t *samad_packet,uint16_t *mlid) 130 | { 131 | uint8_t *ptr; 132 | ptr = samad_packet->SubnetAdminData; 133 | *mlid = ntohs(*(uint16_t *)(ptr + 36)); 134 | } 135 | 136 | /****************************************************************************** 137 | * set_multicast_gid 138 | ******************************************************************************/ 139 | void set_multicast_gid(struct mcast_parameters *params,uint32_t qp_num,int is_client) 140 | { 141 | uint8_t mcg_gid[16] = MCG_GID; 142 | const char *pstr = params->user_mgid; 143 | char *term = NULL; 144 | 145 | if (params->user_mgid) { 146 | int i; 147 | char tmp[20]; 148 | term = strpbrk(pstr, ":"); 149 | memcpy(tmp, pstr, term - pstr+1); 150 | tmp[term - pstr] = 0; 151 | 152 | mcg_gid[0] = (unsigned char)strtoll(tmp, NULL, 0); 153 | 154 | for (i = 1; i < 15; ++i) { 155 | pstr += term - pstr + 1; 156 | term = strpbrk(pstr, ":"); 157 | memcpy(tmp, pstr, term - pstr+1); 158 | tmp[term - pstr] = 0; 159 | 160 | mcg_gid[i] = (unsigned char)strtoll(tmp, NULL, 0); 161 | } 162 | pstr += term - pstr + 1; 163 | 164 | strcpy(tmp, pstr); 165 | mcg_gid[15] = (unsigned char)strtoll(tmp, NULL, 0); 166 | } 167 | 168 | memcpy(params->mgid.raw,mcg_gid,16); 169 | if (is_client && params->user_mgid==NULL) 170 | params->mgid.raw[15]++; 171 | } 172 | 173 | /****************************************************************************** 174 | * Set pkey correctly for cases where non-default values are used (e.g. Azure setup) 175 | ******************************************************************************/ 176 | static int set_pkey(void *umad_buff, struct ibv_context *ctx, int port_num) 177 | { 178 | struct ibv_device_attr device_attr; 179 | int32_t partial_ix = -1; 180 | uint16_t pkey = 0xffff; 181 | uint16_t tmp_pkey; 182 | uint16_t pkey_tbl; 183 | uint16_t index; 184 | int ret; 185 | int i; 186 | 187 | ret = ibv_query_device(ctx, &device_attr); 188 | if (ret) 189 | return ret; 190 | 191 | //coverity[uninit_use] 192 | pkey_tbl = device_attr.max_pkeys; 193 | for (i = 0; i < pkey_tbl; ++i) { 194 | ret = ibv_query_pkey(ctx, port_num, i, &tmp_pkey); 195 | if (ret) 196 | continue; 197 | 198 | //coverity[uninit_use_in_call] 199 | tmp_pkey = ntohs(tmp_pkey); 200 | if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { 201 | /* if there is full-member pkey take it.*/ 202 | if (tmp_pkey & 0x8000) { 203 | index = i; 204 | umad_set_pkey(umad_buff, index); 205 | return 0; 206 | } 207 | if (partial_ix < 0) 208 | partial_ix = i; 209 | } 210 | } 211 | 212 | /*no full-member, if exists take the limited*/ 213 | if (partial_ix >= 0) { 214 | index = partial_ix; 215 | umad_set_pkey(umad_buff, index); 216 | return 0; 217 | } 218 | 219 | return 1; 220 | } 221 | 222 | /****************************************************************************** 223 | * join_multicast_group 224 | ******************************************************************************/ 225 | int join_multicast_group(subn_adm_method method,struct mcast_parameters *params) 226 | { 227 | int portid = -1; 228 | int agentid = -1; 229 | void *umad_buff = NULL; 230 | void *mad = NULL; 231 | int length = MAD_SIZE; 232 | int test_result = 1; 233 | 234 | /* mlid will be assigned to the new LID after the join */ 235 | if (umad_init() < 0) { 236 | fprintf(stderr, "failed to init the UMAD library\n"); 237 | goto cleanup; 238 | } 239 | /* use casting to loose the "const char0 *" */ 240 | portid = umad_open_port((char*)params->ib_devname,params->ib_port); 241 | if (portid < 0) { 242 | fprintf(stderr,"failed to open UMAD port %d\n",params->ib_port); 243 | goto cleanup; 244 | } 245 | 246 | agentid = umad_register(portid,MANAGMENT_CLASS_SUBN_ADM, 2, 0, 0); 247 | if (agentid < 0) { 248 | fprintf(stderr,"failed to register UMAD agent for MADs\n"); 249 | goto cleanup; 250 | } 251 | 252 | umad_buff = umad_alloc(1, umad_size() + MAD_SIZE); 253 | if (!umad_buff) { 254 | fprintf(stderr, "failed to allocate MAD buffer\n"); 255 | goto cleanup; 256 | } 257 | 258 | mad = umad_get_mad(umad_buff); 259 | prepare_mcast_mad(method,params,(struct sa_mad_packet_t *)mad); 260 | 261 | if (set_pkey(umad_buff, params->ib_ctx, params->ib_port)) { 262 | fprintf(stderr, "failed to set pkey index\n"); 263 | goto cleanup; 264 | } 265 | 266 | if (umad_set_addr(umad_buff,params->sm_lid,1,params->sm_sl,QP1_WELL_KNOWN_Q_KEY) < 0) { 267 | fprintf(stderr, "failed to set the destination address of the SMP\n"); 268 | goto cleanup; 269 | } 270 | 271 | if (umad_send(portid,agentid,umad_buff,MAD_SIZE,100,5) < 0) { 272 | fprintf(stderr, "failed to send MAD\n"); 273 | goto cleanup; 274 | } 275 | 276 | if (umad_recv(portid,umad_buff,&length,5000) < 0) { 277 | fprintf(stderr, "failed to receive MAD response\n"); 278 | goto cleanup; 279 | } 280 | 281 | if (check_mad_status((struct sa_mad_packet_t*)mad)) { 282 | fprintf(stderr, "failed to get mlid from MAD\n"); 283 | goto cleanup; 284 | } 285 | 286 | /* "Join multicast group" message was sent */ 287 | if (method == SUBN_ADM_METHOD_SET) { 288 | get_mlid_from_mad((struct sa_mad_packet_t*)mad,¶ms->mlid); 289 | params->mcast_state |= MCAST_IS_JOINED; 290 | sighandler_params = params; 291 | signal(SIGINT,signalCatcher); 292 | } else { 293 | params->mcast_state &= ~MCAST_IS_JOINED; 294 | } 295 | test_result = 0; 296 | 297 | cleanup: 298 | if (umad_buff) 299 | umad_free(umad_buff); 300 | 301 | if (portid >= 0) { 302 | if (agentid >= 0) { 303 | if (umad_unregister(portid, agentid)) { 304 | fprintf(stderr, "failed to deregister UMAD agent for MADs\n"); 305 | test_result = 1; 306 | } 307 | } 308 | 309 | if (umad_close_port(portid)) { 310 | fprintf(stderr, "failed to close UMAD portid\n"); 311 | test_result = 1; 312 | } 313 | } 314 | 315 | return test_result; 316 | } 317 | 318 | /****************************************************************************** 319 | * End 320 | ******************************************************************************/ 321 | -------------------------------------------------------------------------------- /src/multicast_resources.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved. 3 | * 4 | * This software is available to you under a choice of one of two 5 | * licenses. You may choose to be licensed under the terms of the GNU 6 | * General Public License (GPL) Version 2, available from the file 7 | * COPYING in the main directory of this source tree, or the 8 | * OpenIB.org BSD license below: 9 | * 10 | * Redistribution and use in source and binary forms, with or 11 | * without modification, are permitted provided that the following 12 | * conditions are met: 13 | * 14 | * - Redistributions of source code must retain the above 15 | * copyright notice, this list of conditions and the following 16 | * disclaimer. 17 | * 18 | * - Redistributions in binary form must reproduce the above 19 | * copyright notice, this list of conditions and the following 20 | * disclaimer in the documentation and/or other materials 21 | * provided with the distribution. 22 | * 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | * SOFTWARE. 31 | * 32 | * Author: Ido Shamay 33 | */ 34 | 35 | #ifndef MULTICAST_RESOURCES_H 36 | #define MULTICAST_RESOURCES_H 37 | 38 | /* Multicast Module for perftest. 39 | * 40 | * Description : 41 | * 42 | * This file contains the structures and methods for implementing a multiple 43 | * multicast groups in user space enviroment. 44 | * The module is in use in "send_bw" and "send_lat" ,but can be used on other 45 | * applications and can generate more methods and serve more benchmarks. 46 | * The Module uses only the structire defined here , enabling generic use of it. 47 | * 48 | * Defined Types : 49 | * 50 | * mcast_parameters - Contains all the parameters needed for this module. 51 | * mcast_group - The multicast group entitiy itself. 52 | * mcg_qp - Is a QP structure that is attahced to the group. 53 | * 54 | */ 55 | 56 | 57 | /************************************************************************ 58 | * Macros , Defines and Files included for work. * 59 | ************************************************************************/ 60 | 61 | #include 62 | #include 63 | #include "get_clock.h" 64 | 65 | #define QPNUM_MCAST 0xffffff 66 | #define DEF_QKEY 0x11111111 67 | #define DEF_PKEY_IDX 0 68 | #define DEF_SLL 0 69 | #define MAX_POLL_ITERATION_TIMEOUT 1000000 70 | #define MCG_GID {255,1,0,0,0,2,201,133,0,0,0,0,0,0,0,0} 71 | 72 | /* Definitions section for MADs */ 73 | #define SUBN_ADM_ATTR_MC_MEMBER_RECORD 0x38 74 | #define MANAGMENT_CLASS_SUBN_ADM 0x03 /* Subnet Administration class */ 75 | #define MCMEMBER_JOINSTATE_FULL_MEMBER 0x1 76 | #define MAD_SIZE 256 /* The size of a MAD is 256 bytes */ 77 | #define QP1_WELL_KNOWN_Q_KEY 0x80010000 /* Q_Key value of QP1 */ 78 | #define DEF_TRANS_ID 0x12345678 /* TransactionID */ 79 | #define DEF_TCLASS 0 80 | #define DEF_FLOW_LABLE 0 81 | 82 | /* generate a bit mask S bits width */ 83 | #define MASK32(S) ( ((uint32_t) ~0L) >> (32-(S)) ) 84 | 85 | /* generate a bit mask with bits O+S..O set (assumes 32 bit integer). */ 86 | #define BITS32(O,S) ( MASK32(S) << (O) ) 87 | 88 | /* extract S bits from (u_int32_t)W with offset O and shifts them O places to the right */ 89 | #define EXTRACT32(W,O,S) ( ((W)>>(O)) & MASK32(S) ) 90 | 91 | /* insert S bits with offset O from field F into word W (u_int32_t) */ 92 | #define INSERT32(W,F,O,S) (/*(W)=*/ ( ((W) & (~BITS32(O,S)) ) | (((F) & MASK32(S))<<(O)) )) 93 | 94 | #ifndef INSERTF 95 | #define INSERTF(W,O1,F,O2,S) (INSERT32(W, EXTRACT32(F, O2, S), O1, S) ) 96 | #endif 97 | 98 | 99 | /* according to Table 187 in the IB spec 1.2.1 */ 100 | typedef enum { 101 | SUBN_ADM_METHOD_SET = 0x2, 102 | SUBN_ADM_METHOD_DELETE = 0x15 103 | } subn_adm_method; 104 | 105 | /* Utilities for Umad Usage. */ 106 | typedef enum { 107 | SUBN_ADM_COMPMASK_MGID = (1ULL << 0), 108 | SUBN_ADM_COMPMASK_PORT_GID = (1ULL << 1), 109 | SUBN_ADM_COMPMASK_Q_KEY = (1ULL << 2), 110 | SUBN_ADM_COMPMASK_P_KEY = (1ULL << 7), 111 | SUBN_ADM_COMPMASK_TCLASS = (1ULL << 6), 112 | SUBN_ADM_COMPMASK_SL = (1ULL << 12), 113 | SUBN_ADM_COMPMASK_FLOW_LABEL = (1ULL << 13), 114 | SUBN_ADM_COMPMASK_JOIN_STATE = (1ULL << 16), 115 | } subn_adm_component_mask; 116 | 117 | typedef enum { 118 | MCAST_IS_JOINED = 1, 119 | MCAST_IS_ATTACHED = (1 << 1) 120 | } mcast_state; 121 | 122 | 123 | /************************************************************************ 124 | * Multicast data structures. * 125 | ************************************************************************/ 126 | 127 | /* Needed parameters for creating a multiple multicast group entity. */ 128 | struct mcast_parameters { 129 | int num_qps_on_group; 130 | int is_user_mgid; 131 | int mcast_state; 132 | int ib_port; 133 | uint16_t mlid; 134 | uint16_t base_mlid; 135 | const char *user_mgid; 136 | char *ib_devname; 137 | uint16_t pkey; 138 | uint16_t sm_lid; 139 | uint8_t sm_sl; 140 | union ibv_gid port_gid; 141 | union ibv_gid mgid; 142 | /* In case it's a latency test. */ 143 | union ibv_gid base_mgid; 144 | int is_2nd_mgid_used; 145 | struct ibv_context *ib_ctx; 146 | }; 147 | 148 | /* according to Table 195 in the IB spec 1.2.1 */ 149 | 150 | struct sa_mad_packet_t { 151 | u_int8_t mad_header_buf[24]; 152 | u_int8_t rmpp_header_buf[12]; 153 | u_int64_t SM_Key; 154 | u_int16_t AttributeOffset; 155 | u_int16_t Reserved1; 156 | u_int64_t ComponentMask; 157 | u_int8_t SubnetAdminData[200]; 158 | }__attribute__((packed)); 159 | 160 | /************************************************************************ 161 | * Multicast resources methods. * 162 | ************************************************************************/ 163 | 164 | /* set_multicast_gid . 165 | * 166 | * Description : 167 | * 168 | * Sets the Multicast GID , and stores it in the "mgid" value of 169 | * mcast resourcs. If the user requested for a specific MGID, which 170 | * is stored in params->user_mgid (in this case params->is_user_mgid should be 1) 171 | * than it will be his MGID, if not the library choose a default one. 172 | * 173 | * Parameters : 174 | * 175 | * params - The parameters of the machine 176 | * my_dest ,rem_dest - The 2 sides that ends the connection. 177 | * 178 | * Return Value : 0 upon success. -1 if it fails. 179 | */ 180 | void set_multicast_gid(struct mcast_parameters *params,uint32_t qp_num,int is_client); 181 | 182 | 183 | /* ctx_close_connection . 184 | * 185 | * Description : 186 | * 187 | * Close the connection between the 2 machines. 188 | * It performs an handshake to ensure the 2 sides are there. 189 | * 190 | * Parameters : 191 | * 192 | * params - The parameters of the machine 193 | * my_dest ,rem_dest - The 2 sides that ends the connection. 194 | * 195 | * Return Value : 0 upon success. -1 if it fails. 196 | */ 197 | int join_multicast_group(subn_adm_method method,struct mcast_parameters *params); 198 | 199 | 200 | #endif /* MULTICAST_RESOURCES_H */ 201 | -------------------------------------------------------------------------------- /src/neuron_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "neuron_memory.h" 11 | #include "perftest_parameters.h" 12 | 13 | #define NRT_VISIBLE_CORES_STR_LEN 8 14 | #define ACCEL_PAGE_SIZE (4 * 1024) 15 | 16 | 17 | struct neuron_memory_ctx { 18 | struct memory_ctx base; 19 | int core_id; 20 | int max_tensors; 21 | nrt_tensor_t **tensors; 22 | int num_of_tensors; 23 | bool use_dmabuf; 24 | }; 25 | 26 | 27 | int neuron_memory_init(struct memory_ctx *ctx) { 28 | struct neuron_memory_ctx *neuron_ctx = container_of(ctx, struct neuron_memory_ctx, base); 29 | NRT_STATUS result; 30 | char *env_var = getenv("NEURON_RT_VISIBLE_CORES"); 31 | 32 | if (env_var != NULL) { 33 | printf("NEURON_RT_VISIBLE_CORES is set to %s, core id will be used relatively\n",env_var); 34 | } else { 35 | char nrt_visible_cores[NRT_VISIBLE_CORES_STR_LEN]; 36 | 37 | printf("NEURON_RT_VISIBLE_CORES is not set, setting to %d\n",neuron_ctx->core_id); 38 | snprintf(nrt_visible_cores, sizeof(nrt_visible_cores), "%d", neuron_ctx->core_id); 39 | setenv("NEURON_RT_VISIBLE_CORES", nrt_visible_cores, 1); 40 | neuron_ctx->core_id = 0; 41 | } 42 | 43 | result = nrt_init(NRT_FRAMEWORK_TYPE_NO_FW, "", ""); 44 | if (result != NRT_SUCCESS) { 45 | fprintf(stderr, "Couldn't initialize Neuron device\n"); 46 | return FAILURE; 47 | } 48 | return SUCCESS; 49 | } 50 | 51 | int neuron_memory_destroy(struct memory_ctx *ctx) { 52 | struct neuron_memory_ctx *neuron_ctx = container_of(ctx, struct neuron_memory_ctx, base); 53 | int i; 54 | 55 | for (i = 0; i < neuron_ctx->num_of_tensors; i++) { 56 | nrt_tensor_free(&neuron_ctx->tensors[i]); 57 | } 58 | neuron_ctx->num_of_tensors = 0; 59 | nrt_close(); 60 | 61 | free(neuron_ctx->tensors); 62 | free(neuron_ctx); 63 | return SUCCESS; 64 | } 65 | 66 | int neuron_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 67 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 68 | struct neuron_memory_ctx *neuron_ctx = container_of(ctx, struct neuron_memory_ctx, base); 69 | void *d_A = NULL; 70 | NRT_STATUS result; 71 | size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1); 72 | int tensor_index = neuron_ctx->num_of_tensors; 73 | 74 | if (tensor_index >= neuron_ctx->max_tensors) 75 | { 76 | printf("Can't allocate Neuron memory, max tensors reached\n"); 77 | return FAILURE; 78 | } 79 | 80 | result = nrt_tensor_allocate(NRT_TENSOR_PLACEMENT_DEVICE, neuron_ctx->core_id, buf_size, NULL, &neuron_ctx->tensors[tensor_index]); 81 | if (result != NRT_SUCCESS) { 82 | neuron_ctx->tensors[tensor_index] = NULL; 83 | printf("nrt_tensor_allocate_error =%d\n", (int)result); 84 | return FAILURE; 85 | } 86 | 87 | d_A = nrt_tensor_get_va(neuron_ctx->tensors[tensor_index]); 88 | if (d_A == NULL) { 89 | nrt_tensor_free(&neuron_ctx->tensors[tensor_index]); 90 | neuron_ctx->tensors[tensor_index] = NULL; 91 | printf("Failed to get va for the allocated tensor\n"); 92 | return FAILURE; 93 | } 94 | 95 | if (neuron_ctx->use_dmabuf) { 96 | result = nrt_get_dmabuf_fd((uint64_t)d_A, (uint64_t)buf_size, dmabuf_fd); 97 | if (result != NRT_SUCCESS) { 98 | nrt_tensor_free(&neuron_ctx->tensors[tensor_index]); 99 | neuron_ctx->tensors[tensor_index] = NULL; 100 | *dmabuf_fd = 0; 101 | printf("Unable to retrieve dmabuf fd of Neuron device buffer\n"); 102 | return FAILURE; 103 | } 104 | 105 | *dmabuf_offset = 0; 106 | } 107 | 108 | neuron_ctx->num_of_tensors++; 109 | *addr = d_A; 110 | *can_init = false; 111 | return SUCCESS; 112 | } 113 | 114 | int neuron_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) { 115 | return SUCCESS; 116 | } 117 | 118 | bool neuron_memory_supported() { 119 | return true; 120 | } 121 | 122 | bool neuron_memory_dmabuf_supported() { 123 | #ifdef HAVE_NEURON_DMABUF 124 | return true; 125 | #else 126 | return false; 127 | #endif 128 | } 129 | 130 | struct memory_ctx *neuron_memory_create(struct perftest_parameters *params) { 131 | struct neuron_memory_ctx *ctx; 132 | 133 | ALLOCATE(ctx, struct neuron_memory_ctx, 1); 134 | ctx->base.init = neuron_memory_init; 135 | ctx->base.destroy = neuron_memory_destroy; 136 | ctx->base.allocate_buffer = neuron_memory_allocate_buffer; 137 | ctx->base.free_buffer = neuron_memory_free_buffer; 138 | ctx->base.copy_host_to_buffer = memcpy; 139 | ctx->base.copy_buffer_to_host = memcpy; 140 | ctx->base.copy_buffer_to_buffer = memcpy; 141 | ctx->core_id = params->neuron_core_id; 142 | ctx->max_tensors = params->num_of_qps * 2; 143 | ALLOCATE(ctx->tensors, nrt_tensor_t* , ctx->max_tensors); 144 | ctx->num_of_tensors = 0; 145 | ctx->use_dmabuf = params->use_neuron_dmabuf; 146 | return &ctx->base; 147 | } 148 | -------------------------------------------------------------------------------- /src/neuron_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef NEURON_MEMORY_H 7 | #define NEURON_MEMORY_H 8 | 9 | #include "memory.h" 10 | #include "config.h" 11 | 12 | 13 | struct perftest_parameters; 14 | 15 | bool neuron_memory_supported(); 16 | 17 | bool neuron_memory_dmabuf_supported(); 18 | 19 | struct memory_ctx *neuron_memory_create(struct perftest_parameters *params); 20 | 21 | 22 | #ifndef HAVE_NEURON 23 | 24 | inline bool neuron_memory_supported() { 25 | return false; 26 | } 27 | 28 | inline bool neuron_memory_dmabuf_supported() { 29 | return false; 30 | } 31 | 32 | inline struct memory_ctx *neuron_memory_create(struct perftest_parameters *params) { 33 | return NULL; 34 | } 35 | 36 | #endif 37 | 38 | #endif /* NEURON_MEMORY_H */ 39 | -------------------------------------------------------------------------------- /src/opencl_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "perftest_parameters.h" 9 | #include "perftest_resources.h" 10 | #define CL_TARGET_OPENCL_VERSION 220 11 | #include 12 | 13 | __attribute__ ((unused)) static const char *opencl_mem_types_str[] = { 14 | "OPENCL_MEM_SVM", 15 | }; 16 | 17 | // Allocating this context on heap so it can be passed to other thread 18 | struct buffer_ctx { 19 | pthread_t thread; 20 | const void * addr; 21 | bool gpu_touch_infinite; 22 | bool gpu_touch_stop; 23 | size_t size; 24 | cl_command_queue command_queue; 25 | }; 26 | 27 | struct opencl_memory_ctx { 28 | struct buffer_ctx *buffer_ctx; 29 | struct memory_ctx base; 30 | int platform_ix; 31 | int device_ix; 32 | cl_context cl_context; 33 | cl_command_queue command_queue; 34 | int gpu_touch; 35 | }; 36 | 37 | static int init_gpu(struct opencl_memory_ctx *ctx) 38 | { 39 | cl_uint num_devices; 40 | cl_uint num_platforms; 41 | cl_device_svm_capabilities caps; 42 | cl_int error; 43 | cl_platform_id *platform_ids; 44 | cl_device_id *device_ids; 45 | int ret = SUCCESS; 46 | 47 | platform_ids = malloc(sizeof(cl_platform_id) * (ctx->platform_ix + 1)); 48 | if (!platform_ids) { 49 | printf("Allocation of platform_ids failed\n"); 50 | return FAILURE; 51 | } 52 | 53 | device_ids = malloc(sizeof(cl_device_id) * (ctx->device_ix + 1)); 54 | if (!device_ids) { 55 | printf("Allocation of device_ids failed\n"); 56 | ret = FAILURE; 57 | goto free_platform_ids; 58 | } 59 | 60 | error = clGetPlatformIDs((ctx->platform_ix + 1), platform_ids, &num_platforms); 61 | if (error) { 62 | printf("clGetPlatformIDs returned %d\n", error); 63 | ret = FAILURE; 64 | goto free_device_ids; 65 | } 66 | 67 | if (num_platforms < ctx->platform_ix) 68 | { 69 | printf("platform_id (%d) is not in the range of num_platforms (%d)\n", 70 | ctx->platform_ix, num_platforms); 71 | ret = FAILURE; 72 | goto free_device_ids; 73 | } 74 | 75 | error = clGetDeviceIDs(platform_ids[ctx->platform_ix], CL_DEVICE_TYPE_ALL, (ctx->device_ix + 1), device_ids, &num_devices); 76 | if (error) { 77 | printf("clGetDeviceIDs returned %d\n", error); 78 | ret = FAILURE; 79 | goto free_device_ids; 80 | } 81 | 82 | if (num_devices < ctx->device_ix) 83 | { 84 | printf("device_id (%d) is not in the range of num_devices (%d)\n", 85 | ctx->device_ix, num_devices); 86 | ret = FAILURE; 87 | goto free_device_ids; 88 | } 89 | 90 | error = clGetDeviceInfo(device_ids[ctx->device_ix], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, 0); 91 | if (error) { 92 | printf("clGetDeviceInfo returned %d\n", error); 93 | ret = FAILURE; 94 | goto free_device_ids; 95 | } 96 | 97 | if (!(caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM)) 98 | { 99 | printf("SVM needed caps are not supported\n"); 100 | ret = FAILURE; 101 | goto free_device_ids; 102 | } 103 | 104 | ctx->cl_context = clCreateContext(NULL, 1, &device_ids[ctx->device_ix], NULL, NULL, &error); 105 | if (error) { 106 | printf("clCreateContext returned %d\n", error); 107 | ret = FAILURE; 108 | goto free_device_ids; 109 | } 110 | 111 | ctx->command_queue = clCreateCommandQueueWithProperties(ctx->cl_context, device_ids[ctx->device_ix], NULL, &error); 112 | if (error) { 113 | printf("clCreateCommandQueueWithProperties failed with ret=%d\n", error); 114 | clReleaseContext(ctx->cl_context); 115 | ret = FAILURE; 116 | } 117 | 118 | free_device_ids: 119 | free(device_ids); 120 | free_platform_ids: 121 | free(platform_ids); 122 | 123 | return ret; 124 | } 125 | 126 | static void free_gpu(struct opencl_memory_ctx *ctx) 127 | { 128 | printf("destroying current OpenCL ctx\n"); 129 | clReleaseCommandQueue(ctx->command_queue); 130 | clReleaseContext(ctx->cl_context); 131 | } 132 | 133 | int opencl_memory_init(struct memory_ctx *ctx) 134 | { 135 | struct opencl_memory_ctx *opencl_ctx = container_of(ctx, struct opencl_memory_ctx, base); 136 | int return_value = 0; 137 | 138 | return_value = init_gpu(opencl_ctx); 139 | if (return_value) { 140 | fprintf(stderr, "Couldn't init GPU context: %d\n", return_value); 141 | return FAILURE; 142 | } 143 | 144 | return SUCCESS; 145 | } 146 | 147 | int opencl_memory_destroy(struct memory_ctx *ctx) { 148 | struct opencl_memory_ctx *opencl_ctx = container_of(ctx, struct opencl_memory_ctx, base); 149 | 150 | free_gpu(opencl_ctx); 151 | free(opencl_ctx); 152 | return SUCCESS; 153 | } 154 | 155 | void *touch_gpu_pages(void *ctx_param) { 156 | struct buffer_ctx *ctx = (struct buffer_ctx *)ctx_param; 157 | int ret;// cppcheck-suppress variableScope 158 | do { 159 | ret = clEnqueueSVMMigrateMem(ctx->command_queue, 1, &ctx->addr, &ctx->size, 0, 0, NULL, NULL); 160 | if (ret) { 161 | printf("clEnqueueSVMMigrateMem failed with ret=%d\n", ret); 162 | break; 163 | } 164 | 165 | ret = clFlush(ctx->command_queue); 166 | if (ret) { 167 | printf("clFlush with ret=%d\n", ret); 168 | break; 169 | } 170 | 171 | ret = clFinish(ctx->command_queue); 172 | if (ret) { 173 | printf("clFinish with ret=%d\n", ret); 174 | break; 175 | } 176 | } while (ctx->gpu_touch_infinite && !ctx->gpu_touch_stop); 177 | return NULL; 178 | } 179 | 180 | int opencl_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, uint64_t *dmabuf_offset, void **addr, bool *can_init) { 181 | struct opencl_memory_ctx *opencl_ctx = container_of(ctx, struct opencl_memory_ctx, base); 182 | 183 | *addr = clSVMAlloc(opencl_ctx->cl_context, CL_MEM_READ_WRITE, size, MAX(alignment, sysconf(_SC_PAGESIZE))); 184 | if (!*addr) 185 | { 186 | printf("clSVMAlloc failed\n"); 187 | return -1; 188 | } 189 | 190 | opencl_ctx->buffer_ctx = NULL; 191 | 192 | if (opencl_ctx->gpu_touch != GPU_NO_TOUCH) { 193 | opencl_ctx->buffer_ctx = malloc(sizeof(struct buffer_ctx)); 194 | if (!opencl_ctx->buffer_ctx) { 195 | clSVMFree(opencl_ctx->cl_context, addr); 196 | printf("Failed to allocate context for gpu_touch\n"); 197 | return -ENOMEM; 198 | } 199 | 200 | opencl_ctx->buffer_ctx->addr = *addr; 201 | opencl_ctx->buffer_ctx->gpu_touch_infinite = opencl_ctx->gpu_touch == GPU_TOUCH_INFINITE; 202 | opencl_ctx->buffer_ctx->gpu_touch_stop = 0; 203 | opencl_ctx->buffer_ctx->size = size; 204 | opencl_ctx->buffer_ctx->command_queue = opencl_ctx->command_queue; 205 | *can_init = false; 206 | 207 | return pthread_create(&opencl_ctx->buffer_ctx->thread, NULL, touch_gpu_pages, opencl_ctx->buffer_ctx); 208 | } 209 | 210 | return 0; 211 | } 212 | 213 | int opencl_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) { 214 | struct opencl_memory_ctx *opencl_ctx = container_of(ctx, struct opencl_memory_ctx, base); 215 | if (opencl_ctx->buffer_ctx) { 216 | opencl_ctx->buffer_ctx->gpu_touch_stop = 1; 217 | if (pthread_join(opencl_ctx->buffer_ctx->thread, NULL)) { 218 | free(opencl_ctx->buffer_ctx); 219 | printf("Error stopping gpu_touch thread\n"); 220 | return -1; 221 | } 222 | free(opencl_ctx->buffer_ctx); 223 | } 224 | clSVMFree(opencl_ctx->cl_context, addr); 225 | return 0; 226 | } 227 | 228 | bool opencl_memory_supported() { 229 | return true; 230 | } 231 | 232 | struct memory_ctx *opencl_memory_create(struct perftest_parameters *params) { 233 | struct opencl_memory_ctx *ctx; 234 | 235 | ALLOCATE(ctx, struct opencl_memory_ctx, 1); 236 | ctx->base.init = opencl_memory_init; 237 | ctx->base.destroy = opencl_memory_destroy; 238 | ctx->base.allocate_buffer = opencl_memory_allocate_buffer; 239 | ctx->base.free_buffer = opencl_memory_free_buffer; 240 | ctx->base.copy_host_to_buffer = memcpy; 241 | ctx->base.copy_buffer_to_host = memcpy; 242 | ctx->base.copy_buffer_to_buffer = memcpy; 243 | ctx->device_ix = params->opencl_device_id; 244 | ctx->platform_ix = params->opencl_platform_id; 245 | ctx->gpu_touch = params->gpu_touch; 246 | 247 | return &ctx->base; 248 | } 249 | -------------------------------------------------------------------------------- /src/opencl_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | 3 | #ifndef OPENCL_MEMORY_H 4 | #define OPENCL_MEMORY_H 5 | 6 | #include "memory.h" 7 | #include "config.h" 8 | 9 | 10 | struct perftest_parameters; 11 | 12 | bool opencl_memory_supported(); 13 | 14 | struct memory_ctx *opencl_memory_create(struct perftest_parameters *params); 15 | 16 | 17 | #ifndef HAVE_OPENCL 18 | 19 | inline bool opencl_memory_supported() { 20 | return false; 21 | } 22 | 23 | inline struct memory_ctx *opencl_memory_create(struct perftest_parameters *params) { 24 | return NULL; 25 | } 26 | 27 | #endif 28 | 29 | #endif /* OPENCL_MEMORY_H */ 30 | -------------------------------------------------------------------------------- /src/perftest_counters.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "perftest_parameters.h" 8 | 9 | #define COUNTER_PATH "/sys/class/infiniband/%s/ports/%i/%s" 10 | #define COUNTER_VALUE_MAX_LEN (21) 11 | 12 | typedef unsigned long long counter_t; 13 | 14 | struct counter_context { 15 | char *counter_list; 16 | unsigned num_counters; 17 | struct { 18 | int fd; 19 | char *name; 20 | counter_t prev_value; 21 | counter_t last_value; 22 | } counters[]; 23 | }; 24 | 25 | 26 | static int counters_read(struct counter_context *ctx) 27 | { 28 | char read_buf[COUNTER_VALUE_MAX_LEN]; 29 | 30 | int i; 31 | for (i = 0; i < ctx->num_counters; i++) { 32 | int read_fd = ctx->counters[i].fd; 33 | if (read(read_fd, &read_buf, COUNTER_VALUE_MAX_LEN) < 0) { 34 | return -1; 35 | } 36 | 37 | ctx->counters[i].prev_value = ctx->counters[i].last_value; 38 | ctx->counters[i].last_value = strtoll(read_buf, NULL, 10); 39 | (void) lseek(read_fd, 0, SEEK_SET); 40 | } 41 | 42 | return SUCCESS; 43 | } 44 | 45 | int counters_alloc(const char *counter_names, 46 | struct counter_context **ctx) 47 | { 48 | /* Count the number of commas and allocate accordingly */ 49 | unsigned i, num_counters = (unsigned)(strlen(counter_names) > 0); 50 | for (i = 0; i < strlen(counter_names); i++) { 51 | if (counter_names[i] == ',') { 52 | num_counters++; 53 | } 54 | } 55 | 56 | ALLOCATE(*ctx, struct counter_context, 3 * num_counters + 1); 57 | (*ctx)->counter_list = strdup(counter_names); 58 | (*ctx)->num_counters = num_counters; 59 | return SUCCESS; 60 | } 61 | 62 | int counters_open(struct counter_context *ctx, 63 | const char *dev_name, int port) 64 | { 65 | /* Open the sysfs file for each counter */ 66 | int i; 67 | char *given_path, *real_path, *next_counter; 68 | for (i = 0, next_counter = strtok(ctx->counter_list, ","); 69 | i < ctx->num_counters; 70 | i++, next_counter = strtok(0, ",")) { 71 | if (asprintf(&given_path, COUNTER_PATH, dev_name, port, next_counter) == -1) { 72 | goto counter_cleanup; 73 | } 74 | 75 | real_path = realpath(given_path, NULL); 76 | free(given_path); 77 | 78 | if (!real_path) { 79 | free(real_path); 80 | goto counter_cleanup; 81 | } 82 | 83 | if (strstr(real_path, COUNTER_PATH) != 0) { 84 | free(real_path); 85 | goto counter_cleanup; 86 | } 87 | 88 | if ((ctx->counters[i].fd = open(real_path, O_RDONLY)) < 0) { 89 | free(real_path); 90 | goto counter_cleanup; 91 | } 92 | 93 | ctx->counters[i].name = next_counter; 94 | ctx->counters[i].last_value = 0; 95 | free(real_path); 96 | } 97 | 98 | return counters_read(ctx); 99 | 100 | counter_cleanup: 101 | ctx->num_counters = i; 102 | counters_close(ctx); 103 | return FAILURE; 104 | } 105 | 106 | void counters_print(struct counter_context *ctx) 107 | { 108 | (void) counters_read(ctx); 109 | 110 | int i; 111 | for (i = 0; i < ctx->num_counters; i++) { 112 | printf("\t%s=%llu\n", ctx->counters[i].name, 113 | ctx->counters[i].last_value - ctx->counters[i].prev_value); 114 | } 115 | printf("\n"); 116 | } 117 | 118 | void counters_close(struct counter_context *ctx) 119 | { 120 | int i; 121 | for (i = 0; i < ctx->num_counters; i++) { 122 | close(ctx->counters[i].fd); 123 | } 124 | 125 | free(ctx->counter_list); 126 | free(ctx); 127 | } 128 | -------------------------------------------------------------------------------- /src/perftest_counters.h: -------------------------------------------------------------------------------- 1 | #ifndef PERFTEST_COUNTERS_H 2 | #define PERFTEST_COUNTERS_H 3 | 4 | struct counter_context; 5 | 6 | /* 7 | * Allocate context for performance counters. 8 | */ 9 | int counters_alloc(const char *counter_names, 10 | struct counter_context **ctx); 11 | 12 | /* 13 | * Open a handle to the counters (and sample once). 14 | */ 15 | int counters_open(struct counter_context *ctx, 16 | const char *dev_name, int port); 17 | 18 | /* 19 | * Sample and output the values to STDOUT. 20 | */ 21 | void counters_print(struct counter_context *ctx); 22 | 23 | /* 24 | * Close the handle to the counters. 25 | */ 26 | void counters_close(struct counter_context *ctx); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/raw_ethernet_fs_rate.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 5 | * 6 | * This software is available to you under a choice of one of two 7 | * licenses. You may choose to be licensed under the terms of the GNU 8 | * General Public License (GPL) Version 2, available from the file 9 | * COPYING in the main directory of this source tree, or the 10 | * OpenIB.org BSD license below: 11 | * 12 | * Redistribution and use in source and binary forms, with or 13 | * without modification, are permitted provided that the following 14 | * conditions are met: 15 | * 16 | * - Redistributions of source code must retain the above 17 | * copyright notice, this list of conditions and the following 18 | * disclaimer. 19 | * 20 | * - Redistributions in binary form must reproduce the above 21 | * copyright notice, this list of conditions and the following 22 | * disclaimer in the documentation and/or other materials 23 | * provided with the distribution. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | * SOFTWARE. 33 | * 34 | * $Id$ 35 | */ 36 | 37 | #if defined(__FreeBSD__) 38 | #include 39 | #include 40 | #endif 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include "perftest_parameters.h" 50 | #include "perftest_resources.h" 51 | #include "multicast_resources.h" 52 | #include "perftest_communication.h" 53 | #include "raw_ethernet_resources.h" 54 | 55 | /****************************************************************************** 56 | * 57 | ******************************************************************************/ 58 | int main(int argc, char *argv[]) 59 | { 60 | struct ibv_device *ib_dev = NULL; 61 | struct pingpong_context ctx; 62 | struct raw_ethernet_info *my_dest_info = NULL; 63 | struct raw_ethernet_info *rem_dest_info = NULL; 64 | int ret_parser; 65 | struct perftest_parameters user_param; 66 | struct report_options report; 67 | 68 | /* init default values to user's parameters */ 69 | memset(&ctx, 0, sizeof(struct pingpong_context)); 70 | memset(&user_param, 0, sizeof(struct perftest_parameters)); 71 | 72 | user_param.tst = FS_RATE; 73 | user_param.verb = SEND; 74 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 75 | user_param.connection_type = RawEth; 76 | user_param.r_flag = &report; 77 | 78 | ret_parser = parser(&user_param, argv, argc); 79 | 80 | if (ret_parser) { 81 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) { 82 | fprintf(stderr, " Parser function exited with Error\n"); 83 | } 84 | DEBUG_LOG(TRACE,"<<<<<<%s", __FUNCTION__); 85 | return FAILURE; 86 | } 87 | user_param.machine = SERVER; 88 | 89 | /* Allocate user input dependable structs */ 90 | MAIN_ALLOC(my_dest_info, struct raw_ethernet_info, user_param.num_of_qps, return_error); 91 | memset(my_dest_info, 0, sizeof(struct raw_ethernet_info) * user_param.num_of_qps); 92 | MAIN_ALLOC(rem_dest_info, struct raw_ethernet_info, user_param.num_of_qps, free_my_dest); 93 | memset(rem_dest_info, 0, sizeof(struct raw_ethernet_info) * user_param.num_of_qps); 94 | 95 | /* Finding the IB device selected (or default if no selected). */ 96 | ib_dev = ctx_find_dev(&user_param.ib_devname); 97 | if (!ib_dev) { 98 | fprintf(stderr, "Unable to find the Infiniband/RoCE device\n"); 99 | DEBUG_LOG(TRACE, "<<<<<<%s", __FUNCTION__); 100 | goto free_mem; 101 | } 102 | 103 | if (check_flow_steering_support(user_param.ib_devname)) { 104 | goto free_devname; 105 | } 106 | 107 | /* Getting the relevant context from the device */ 108 | ctx.context = ibv_open_device(ib_dev); 109 | if (!ctx.context) { 110 | fprintf(stderr, "Couldn't get context for the device\n"); 111 | DEBUG_LOG(TRACE, "<<<<<<%s", __FUNCTION__); 112 | goto free_devname; 113 | } 114 | 115 | /* See if MTU and link type are valid and supported. */ 116 | if (check_link_and_mtu(ctx.context, &user_param)) { 117 | fprintf(stderr, "Couldn't get context for the device\n"); 118 | DEBUG_LOG(TRACE, "<<<<<<%s", __FUNCTION__); 119 | goto free_devname; 120 | } 121 | 122 | /* Allocating arrays needed for the test. */ 123 | if(alloc_ctx(&ctx,&user_param)){ 124 | fprintf(stderr, "Couldn't allocate context\n"); 125 | goto free_devname; 126 | } 127 | 128 | /* create all the basic IB resources (data buffer, PD, MR, CQ and events channel) */ 129 | if (ctx_init(&ctx, &user_param)) { 130 | fprintf(stderr, "Couldn't create IB resources\n"); 131 | dealloc_ctx(&ctx, &user_param); 132 | goto free_devname; 133 | } 134 | 135 | /* Print basic test information. */ 136 | ctx_print_test_info(&user_param); 137 | 138 | if(run_iter_fs(&ctx, &user_param)){ 139 | fprintf(stderr, "Unable to run iter fs rate\n"); 140 | goto destroy_ctx; 141 | } 142 | 143 | print_report_fs_rate(&user_param); 144 | 145 | if (destroy_ctx(&ctx, &user_param)) { 146 | fprintf(stderr, "Failed to destroy_ctx\n"); 147 | DEBUG_LOG(TRACE, "<<<<<<%s", __FUNCTION__); 148 | goto free_devname; 149 | } 150 | 151 | if (user_param.output == FULL_VERBOSITY) 152 | printf(RESULT_LINE); 153 | 154 | DEBUG_LOG(TRACE, "<<<<<<%s", __FUNCTION__); 155 | free(my_dest_info); 156 | free(rem_dest_info); 157 | free(user_param.ib_devname); 158 | return SUCCESS; 159 | 160 | destroy_ctx: 161 | destroy_ctx(&ctx, &user_param); 162 | free_devname: 163 | free(user_param.ib_devname); 164 | free_mem: 165 | free(rem_dest_info); 166 | free_my_dest: 167 | free(my_dest_info); 168 | return_error: 169 | return FAILURE; 170 | } 171 | -------------------------------------------------------------------------------- /src/raw_ethernet_resources.h: -------------------------------------------------------------------------------- 1 | #ifndef RAW_ETHERNET_RESOURCES_H 2 | #define RAW_ETHERNET_RESOURCES_H 3 | 4 | 5 | #if defined(__FreeBSD__) 6 | #include 7 | #include 8 | #endif 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "perftest_parameters.h" 18 | #include "perftest_resources.h" 19 | #include "multicast_resources.h" 20 | #include "perftest_communication.h" 21 | 22 | #undef __LITTLE_ENDIAN 23 | #if defined(__FreeBSD__) 24 | #include 25 | #else 26 | #include 27 | #endif 28 | 29 | #define INFO "INFO" 30 | #define TRACE "TRACE" 31 | 32 | #ifdef DEBUG 33 | #define DEBUG_LOG(type,fmt, args...) fprintf(stderr,"file:%s: %d ""["type"]"fmt"\n",__FILE__,__LINE__,args) 34 | #else 35 | #define DEBUG_LOG(type,fmt, args...) 36 | #endif 37 | 38 | #define PERF_MAC_FMT " %02X:%02X:%02X:%02X:%02X:%02X" 39 | 40 | #define IP_ETHER_TYPE (0x800) 41 | #define IP6_ETHER_TYPE (0x86DD) 42 | #define PRINT_ON (1) 43 | #define PRINT_OFF (0) 44 | #define UDP_PROTOCOL (0x11) 45 | #define TCP_PROTOCOL (0x06) 46 | #define IP_HEADER_LEN (20) 47 | #define DEFAULT_IPV6_NEXT_HDR (0x3b) 48 | 49 | struct raw_ethernet_info { 50 | uint8_t mac[6]; 51 | uint32_t ip; 52 | uint8_t ip6[16]; 53 | int port; 54 | }; 55 | 56 | 57 | 58 | /* gen_eth_header . 59 | * Description :create raw Ethernet header on buffer 60 | * 61 | * Parameters : 62 | * eth_header - Pointer to output 63 | * src_mac - source MAC address of the packet 64 | * dst_mac - destination MAC address of the packet 65 | * eth_type - IP/or size of ptk 66 | * user_param - the parameters element. 67 | * 68 | */ 69 | struct ETH_header { 70 | uint8_t dst_mac[6]; 71 | uint8_t src_mac[6]; 72 | uint16_t eth_type; 73 | }__attribute__((packed)); 74 | 75 | struct ETH_vlan_header { 76 | uint8_t dst_mac[6]; 77 | uint8_t src_mac[6]; 78 | uint32_t vlan_header; 79 | uint16_t eth_type; 80 | }__attribute__((packed)); 81 | 82 | #define VLAN_TPID (0x8100) 83 | #define VLAN_VID (0x001) 84 | #define VLAN_CFI (0) 85 | 86 | #if defined(__FreeBSD__) 87 | #if BYTE_ORDER == BIG_ENDIAN 88 | #define __BIG_ENDIAN_BITFIELD 89 | #define htobe32_const(x) (x) 90 | #elif BYTE_ORDER == LITTLE_ENDIAN 91 | #define __LITTLE_ENDIAN_BITFIELD 92 | #define htobe32_const(x) (((x) >> 24) | (((x) >> 8) & 0xff00) | \ 93 | ((((x) & 0xffffff) << 8) & 0xff0000) | ((((x) & 0xff) << 24) & 0xff000000)) 94 | #else 95 | #error "Must set BYTE_ORDER" 96 | #endif 97 | #endif 98 | 99 | struct IP_V6_header { 100 | #if defined(__LITTLE_ENDIAN_BITFIELD) 101 | __u8 priority:4, 102 | version:4; 103 | #elif defined(__BIG_ENDIAN_BITFIELD) 104 | __u8 version:4, 105 | priority:4; 106 | #endif 107 | __u8 flow_lbl[3]; 108 | 109 | __be16 payload_len; 110 | __u8 nexthdr; 111 | __u8 hop_limit; 112 | 113 | struct in6_addr saddr; 114 | struct in6_addr daddr; 115 | }__attribute__((packed)); 116 | 117 | struct IP_V4_header{ 118 | #if defined(__LITTLE_ENDIAN_BITFIELD) 119 | uint8_t ihl:4; 120 | uint8_t version:4; 121 | #elif defined(__BIG_ENDIAN_BITFIELD) 122 | uint8_t version:4; 123 | uint8_t ihl:4; 124 | #endif 125 | uint8_t tos; 126 | uint16_t tot_len; 127 | uint16_t id; 128 | uint16_t frag_off; 129 | uint8_t ttl; 130 | uint8_t protocol; 131 | uint16_t check; 132 | uint32_t saddr; 133 | uint32_t daddr; 134 | }__attribute__((packed)); 135 | 136 | union IP_V4_header_raw { 137 | struct IP_V4_header ip_header; 138 | uint32_t raw[sizeof(struct IP_V4_header) / 4]; 139 | }; 140 | 141 | struct UDP_header { 142 | u_short uh_sport; /* source port */ 143 | u_short uh_dport; /* destination port */ 144 | u_short uh_ulen; /* udp length */ 145 | u_short uh_sum; /* udp checksum */ 146 | }__attribute__((packed)); 147 | 148 | struct TCP_header { 149 | uint16_t th_sport; /* source port */ 150 | uint16_t th_dport; /* destination port */ 151 | uint32_t th_seq; 152 | uint32_t th_ack; 153 | uint8_t th_rsv:4; 154 | uint8_t th_doff:4; 155 | uint8_t th_falgs; 156 | uint16_t th_window; 157 | uint16_t th_check; 158 | uint16_t th_urgptr; 159 | }__attribute__((packed)); 160 | 161 | void gen_eth_header(struct ETH_header* eth_header,uint8_t* src_mac,uint8_t* dst_mac, uint16_t eth_type, struct memory_ctx* memory); 162 | void print_spec(struct ibv_flow_attr* flow_rules,struct perftest_parameters* user_param); 163 | //void print_ethernet_header(struct ETH_header* p_ethernet_header); 164 | void print_ethernet_header(void* p_ethernet_header, struct perftest_parameters* user_param, struct memory_ctx *memory); 165 | //void print_ethernet_vlan_header(struct ETH_vlan_header* p_ethernet_header); 166 | void print_ethernet_vlan_header(void* p_ethernet_header, struct perftest_parameters* user_param, struct memory_ctx *memory); 167 | void print_ip_header(struct IP_V4_header* ip_header); 168 | void print_udp_header(struct UDP_header* udp_header); 169 | void print_pkt(void* pkt,struct perftest_parameters *user_param, struct memory_ctx *memory); 170 | 171 | int check_flow_steering_support(char *dev_name); 172 | 173 | /* build_pkt_on_buffer 174 | * Description: build single Ethernet packet on ctx buffer 175 | * 176 | * Parameters: 177 | * eth_header - Pointer to output 178 | * my_dest_info - ethernet information of me 179 | * rem_dest_info - ethernet information of the remote 180 | * user_param - user_parameters struct for this test 181 | * eth_type - 182 | * ip_next_protocol - 183 | * print_flag - if print_flag == TRUE : print the packet after it's done 184 | * pkt_size - size of the requested packet 185 | * flows_offset - current offset from the base flow 186 | */ 187 | void build_pkt_on_buffer(struct ETH_header* eth_header, 188 | struct raw_ethernet_info *my_dest_info, 189 | struct raw_ethernet_info *rem_dest_info, 190 | struct perftest_parameters *user_param, 191 | struct memory_ctx *memory, 192 | uint16_t eth_type, 193 | uint16_t ip_next_protocol, 194 | int print_flag, 195 | int pkt_size, 196 | int flows_offset); 197 | 198 | /* create_raw_eth_pkt 199 | * Description: build raw Ethernet packet by user arguments 200 | * on bw test, build one packet and duplicate it on the buffer 201 | * on lat test, build only one packet on the buffer (for the ping pong method) 202 | * 203 | * Parameters: 204 | * user_param - user_parameters struct for this test 205 | * ctx - Test Context. 206 | * buf - The QP's packet buffer. 207 | * my_dest_info - ethernet information of me 208 | * rem_dest_info - ethernet information of the remote 209 | */ 210 | void create_raw_eth_pkt( struct perftest_parameters *user_param, 211 | struct pingpong_context *ctx , 212 | void *eth_header, 213 | struct raw_ethernet_info *my_dest_info, 214 | struct raw_ethernet_info *rem_dest_info); 215 | 216 | /*calc_flow_rules_size 217 | * Description: calculate the size of the flow(size of headers - ib, ethernet and ip/udp if available) 218 | * Parameters: 219 | * is_ip_header - if ip header is exist, count the header's size 220 | * is_udp_header - if udp header is exist, count the header's size 221 | * 222 | */ 223 | int calc_flow_rules_size(struct perftest_parameters *user_param, int is_ip_header,int is_udp_header); 224 | 225 | /* send_set_up_connection 226 | * Description: init raw_ethernet_info and ibv_flow_spec to user args 227 | * 228 | * Parameters: 229 | * flow_rules - Pointer to output, is set to header buffer and specification information 230 | * ctx - Test Context. 231 | * user_param - user_parameters struct for this test 232 | * my_dest_info - ethernet information of me 233 | * rem_dest_info - ethernet information of the remote 234 | * 235 | */ 236 | 237 | int send_set_up_connection( 238 | struct ibv_flow_attr **flow_rules, 239 | struct pingpong_context *ctx, 240 | struct perftest_parameters *user_param, 241 | struct raw_ethernet_info* my_dest_info, 242 | struct raw_ethernet_info* rem_dest_info); 243 | 244 | /* gen_ip_header . 245 | 246 | * Description :create IP header on buffer 247 | * 248 | * Parameters : 249 | * ip_header_buff - Pointer to output 250 | * saddr - source IP address of the packet(network order) 251 | * daddr - destination IP address of the packet(network order) 252 | * pkt_size - size of the packet 253 | * hop_limit - hop limit (ttl for ipv4) 254 | * flows_offset - current offset from the base flow 255 | */ 256 | void gen_ip_header(void* ip_header_buff, uint32_t* saddr, uint32_t* daddr, 257 | uint8_t protocol, int pkt_size, int hop_limit, int tos, int flows_offset); 258 | 259 | /* gen_udp_header . 260 | 261 | * Description :create UDP header on buffer 262 | * 263 | * Parameters : 264 | * UDP_header_buffer - Pointer to output 265 | * src_port - source UDP port of the packet 266 | * dst_port -destination UDP port of the packet 267 | * pkt_size - size of the packet 268 | */ 269 | void gen_udp_header(void* UDP_header_buffer, int src_port, int dst_port, int pkt_size); 270 | 271 | /* gen_tcp_header . 272 | 273 | * Description :create TCP header on buffer 274 | * 275 | * Parameters : 276 | * TCP_header_buffer - Pointer to output 277 | * src_port - source TCP port of the packet 278 | * dst_port -destination TCP port of the packet 279 | */ 280 | void gen_tcp_header(void* TCP_header_buffer,int src_port ,int dst_port); 281 | 282 | /* run_iter_fw 283 | * 284 | * Description : 285 | * 286 | * In this method we receive packets and "turn them around" 287 | * this is done by changing the dmac with the smac 288 | * 289 | * Parameters : 290 | * 291 | * ctx - Test Context. 292 | * user_param - user_parameters struct for this test. 293 | */ 294 | int run_iter_fw(struct pingpong_context *ctx,struct perftest_parameters *user_param); 295 | 296 | /* switch_smac_dmac 297 | * 298 | * Description : In this method we receive buffer and change it's dmac and smac 299 | * 300 | * Parameters : 301 | * 302 | * sg - sg->addr is pointer to the buffer. 303 | */ 304 | static __inline void switch_smac_dmac(struct ibv_sge *sg) 305 | { 306 | struct ETH_header* eth_header; 307 | eth_header = (struct ETH_header*)sg->addr; 308 | uint8_t tmp_mac[6] = {0} ; 309 | memcpy(tmp_mac , (uint8_t *)eth_header + sizeof(eth_header->src_mac) ,sizeof(eth_header->src_mac)); 310 | memcpy((uint8_t *)eth_header->src_mac , (uint8_t *)eth_header->dst_mac ,sizeof(eth_header->src_mac)); 311 | memcpy((uint8_t *)eth_header->dst_mac , tmp_mac ,sizeof(tmp_mac)); 312 | } 313 | 314 | /* set_up_flow_rules 315 | * Description: set the flow rules objects 316 | * 317 | * Parameters: 318 | * flow_rules - Pointer to output, is set to header buffer and specification information 319 | * ctx - Test Context. 320 | * user_param - user_parameters struct for this test 321 | * local_port - the local port in the flow rule 322 | * remote_port - the remote port in the flow rule 323 | */ 324 | 325 | int set_up_flow_rules( 326 | struct ibv_flow_attr **flow_rules, 327 | struct pingpong_context *ctx, 328 | struct perftest_parameters *user_param, 329 | int local_port, 330 | int remote_port); 331 | /* set_up_fs_rules 332 | * Description: set the flow rules objects for FS rate test 333 | * 334 | * Parameters: 335 | * flow_rules - Pointer to output, is set to header buffer and specification information 336 | * ctx - Test Context. 337 | * user_param - user_parameters struct for this test 338 | * allocated_flows - number of flow ruled that are allocated and ready to be set 339 | * 340 | */ 341 | 342 | int set_up_fs_rules( 343 | struct ibv_flow_attr **flow_rules, 344 | struct pingpong_context *ctx, 345 | struct perftest_parameters *user_param, 346 | uint64_t allocated_flows); 347 | 348 | #endif /* RAW_ETHERNET_RESOURCES_H */ 349 | -------------------------------------------------------------------------------- /src/raw_ethernet_send_burst_lat.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 5 | * 6 | * This software is available to you under a choice of one of two 7 | * licenses. You may choose to be licensed under the terms of the GNU 8 | * General Public License (GPL) Version 2, available from the file 9 | * COPYING in the main directory of this source tree, or the 10 | * OpenIB.org BSD license below: 11 | * 12 | * Redistribution and use in source and binary forms, with or 13 | * without modification, are permitted provided that the following 14 | * conditions are met: 15 | * 16 | * - Redistributions of source code must retain the above 17 | * copyright notice, this list of conditions and the following 18 | * disclaimer. 19 | * 20 | * - Redistributions in binary form must reproduce the above 21 | * copyright notice, this list of conditions and the following 22 | * disclaimer in the documentation and/or other materials 23 | * provided with the distribution. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | * SOFTWARE. 33 | * 34 | * $Id$ 35 | */ 36 | 37 | #if defined(__FreeBSD__) 38 | #include 39 | #include 40 | #endif 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include "perftest_parameters.h" 50 | #include "perftest_resources.h" 51 | #include "multicast_resources.h" 52 | #include "perftest_communication.h" 53 | #include "raw_ethernet_resources.h" 54 | 55 | 56 | /* 57 | * Main function. implements raw_ethernet_send_lat 58 | */ 59 | int main(int argc, char *argv[]) 60 | { 61 | 62 | struct ibv_device *ib_dev = NULL; 63 | struct pingpong_context ctx; 64 | struct raw_ethernet_info my_dest_info,rem_dest_info; 65 | int ret_parser; 66 | struct perftest_parameters user_param; 67 | struct ibv_flow **flow_create_result; 68 | struct ibv_flow_attr **flow_rules; 69 | struct ibv_flow *flow_promisc = NULL; 70 | struct report_options report; 71 | int i, flows_created = 0; 72 | 73 | /* allocate memory space for user parameters &*/ 74 | memset(&ctx, 0, sizeof(struct pingpong_context)); 75 | memset(&user_param, 0, sizeof(struct perftest_parameters)); 76 | memset(&my_dest_info, 0 , sizeof(struct raw_ethernet_info)); 77 | memset(&rem_dest_info, 0 , sizeof(struct raw_ethernet_info)); 78 | 79 | /* init default values to user's parameters that's relvant for this test: 80 | * Raw Ethernet Send Latency Test 81 | */ 82 | user_param.verb = SEND; 83 | user_param.tst = LAT_BY_BW; 84 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 85 | user_param.connection_type = RawEth; 86 | user_param.r_flag = &report; 87 | 88 | 89 | /* Configure the parameters values according to user 90 | arguments or default values. */ 91 | ret_parser = parser(&user_param, argv,argc); 92 | 93 | /* check for parsing errors */ 94 | if (ret_parser) { 95 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) 96 | fprintf(stderr," Parser function exited with Error\n"); 97 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 98 | goto return_error; 99 | } 100 | MAIN_ALLOC(flow_create_result, struct ibv_flow*, user_param.flows, return_error); 101 | MAIN_ALLOC(flow_rules, struct ibv_flow_attr*, user_param.flows, free_flow_results); 102 | 103 | 104 | /*this is a bidirectional test, so we need to let the init functions 105 | * think we are in duplex mode 106 | */ 107 | user_param.duplex = 1; 108 | 109 | /* Find the selected IB device (or default if the user didn't select one). */ 110 | ib_dev = ctx_find_dev(&user_param.ib_devname); 111 | if (!ib_dev) { 112 | fprintf(stderr," Unable to find the Infiniband/RoCE device\n"); 113 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 114 | goto free_mem; 115 | } 116 | 117 | if (check_flow_steering_support(user_param.ib_devname)) { 118 | goto free_devname; 119 | } 120 | 121 | /* Getting the relevant context from the device */ 122 | ctx.context = ibv_open_device(ib_dev); 123 | if (!ctx.context) { 124 | fprintf(stderr, " Couldn't get context for the device\n"); 125 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 126 | goto free_devname; 127 | } 128 | 129 | /* Verify user parameters that require the device context, 130 | * the function will print the relevent error info. */ 131 | if (verify_params_with_device_context(ctx.context, &user_param)) { 132 | goto free_devname; 133 | } 134 | 135 | /* See if MTU and link type are valid and supported. */ 136 | if (check_link_and_mtu(ctx.context, &user_param)) { 137 | fprintf(stderr, " Couldn't get context for the device\n"); 138 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 139 | goto free_devname; 140 | } 141 | 142 | /* Allocating arrays needed for the test. */ 143 | if (alloc_ctx(&ctx,&user_param)){ 144 | fprintf(stderr, "Couldn't allocate context\n"); 145 | goto free_devname; 146 | } 147 | 148 | /*set up the connection, return the required flow rules (notice that user_param->duplex == TRUE) 149 | * so the function will setup like it's a bidirectional test 150 | */ 151 | if (send_set_up_connection(flow_rules, &ctx, &user_param, &my_dest_info, &rem_dest_info)) { 152 | fprintf(stderr," Unable to set up socket connection\n"); 153 | dealloc_ctx(&ctx, &user_param); 154 | goto free_devname; 155 | } 156 | 157 | /* Print basic test information. */ 158 | ctx_print_test_info(&user_param); 159 | 160 | for (i = 0; i < user_param.flows; i++) 161 | print_spec(flow_rules[i], &user_param); 162 | 163 | /* initalize IB resources (data buffer, PD, MR, CQ and events channel) */ 164 | if (ctx_init(&ctx, &user_param)) { 165 | fprintf(stderr, " Couldn't create IB resources\n"); 166 | dealloc_ctx(&ctx, &user_param); 167 | goto free_devname; 168 | } 169 | 170 | 171 | /* attaching the qp to the spec */ 172 | for (i = 0; i < user_param.flows; i++) { 173 | flow_create_result[i] = ibv_create_flow(ctx.qp[0], flow_rules[i]); 174 | 175 | if (!flow_create_result[i]){ 176 | perror("error"); 177 | fprintf(stderr, "Couldn't attach QP\n"); 178 | goto result_flow_destroy; 179 | } 180 | flows_created++; 181 | } 182 | 183 | if (user_param.use_promiscuous) { 184 | struct ibv_flow_attr attr = { 185 | .type = IBV_FLOW_ATTR_ALL_DEFAULT, 186 | .num_of_specs = 0, 187 | .port = user_param.ib_port, 188 | .flags = 0 189 | }; 190 | 191 | if ((flow_promisc = ibv_create_flow(ctx.qp[0], &attr)) == NULL) { 192 | perror("error"); 193 | fprintf(stderr, "Couldn't attach promiscuous rule QP\n"); 194 | goto result_flow_destroy; 195 | } 196 | } 197 | /* build ONE Raw Ethernet packets on ctx buffer */ 198 | create_raw_eth_pkt(&user_param, &ctx, (void*)ctx.buf[0], &my_dest_info , &rem_dest_info); 199 | 200 | if (user_param.output == FULL_VERBOSITY) { 201 | printf(RESULT_LINE); 202 | printf("%s",(user_param.test_type == ITERATIONS) ? RESULT_FMT_LAT : RESULT_FMT_LAT_DUR); 203 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 204 | } 205 | 206 | /* modify QPs to rtr/rts */ 207 | if (ctx_connect(&ctx, NULL, &user_param, NULL)) { 208 | fprintf(stderr," Unable to Connect the HCA's through the link\n"); 209 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 210 | goto promisc_flow_destroy; 211 | } 212 | 213 | ctx_set_send_wqes(&ctx,&user_param,NULL); 214 | 215 | if (ctx_set_recv_wqes(&ctx,&user_param)) { 216 | fprintf(stderr," Failed to post receive recv_wqes\n"); 217 | goto free_devname; 218 | } 219 | 220 | /* latency test function for SEND verb latency test. */ 221 | if (user_param.machine == CLIENT) { 222 | if (run_iter_lat_burst(&ctx, &user_param)) 223 | goto free_devname; 224 | } 225 | else { 226 | if (run_iter_lat_burst_server(&ctx, &user_param)) 227 | goto free_devname; 228 | } 229 | 230 | /* print report (like print_report_bw) in the correct format 231 | * (as set before: FMT_LAT or FMT_LAT_DUR) 232 | */ 233 | if (user_param.machine == CLIENT) 234 | print_report_lat(&user_param); 235 | 236 | /* destroy promisc flow */ 237 | if (user_param.use_promiscuous) { 238 | if (ibv_destroy_flow(flow_promisc)) { 239 | perror("error"); 240 | fprintf(stderr, "Couldn't destroy promisc flow\n"); 241 | goto result_flow_destroy; 242 | } 243 | } 244 | 245 | /* destroy flow */ 246 | for (i = 0; i < user_param.flows; i++) { 247 | if (ibv_destroy_flow(flow_create_result[i])) { 248 | perror("error"); 249 | fprintf(stderr, "Couldn't destroy flow\n"); 250 | goto destroy_ctx; 251 | } 252 | 253 | free(flow_rules[i]); 254 | } 255 | 256 | /* Deallocate all perftest resources. */ 257 | if (destroy_ctx(&ctx, &user_param)) { 258 | fprintf(stderr,"Failed to destroy_ctx\n"); 259 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 260 | goto free_devname; 261 | } 262 | 263 | free(flow_create_result); 264 | free(flow_rules); 265 | free(user_param.ib_devname); 266 | 267 | if (user_param.output == FULL_VERBOSITY) 268 | printf(RESULT_LINE); 269 | 270 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 271 | return SUCCESS; 272 | 273 | promisc_flow_destroy: 274 | if (user_param.use_promiscuous) { 275 | if (ibv_destroy_flow(flow_promisc)) { 276 | perror("error"); 277 | fprintf(stderr, "Couldn't destroy promisc flow\n"); 278 | } 279 | } 280 | result_flow_destroy: 281 | for (i = 0; i < flows_created; i++) { 282 | if (ibv_destroy_flow(flow_create_result[i])) { 283 | perror("error"); 284 | fprintf(stderr, "Couldn't destroy flow\n"); 285 | } 286 | } 287 | destroy_ctx: 288 | destroy_ctx(&ctx, &user_param); 289 | free_devname: 290 | free(user_param.ib_devname); 291 | free_mem: 292 | free(flow_rules); 293 | free_flow_results: 294 | free(flow_create_result); 295 | return_error: 296 | return FAILURE; 297 | } 298 | -------------------------------------------------------------------------------- /src/raw_ethernet_send_lat.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 5 | * 6 | * This software is available to you under a choice of one of two 7 | * licenses. You may choose to be licensed under the terms of the GNU 8 | * General Public License (GPL) Version 2, available from the file 9 | * COPYING in the main directory of this source tree, or the 10 | * OpenIB.org BSD license below: 11 | * 12 | * Redistribution and use in source and binary forms, with or 13 | * without modification, are permitted provided that the following 14 | * conditions are met: 15 | * 16 | * - Redistributions of source code must retain the above 17 | * copyright notice, this list of conditions and the following 18 | * disclaimer. 19 | * 20 | * - Redistributions in binary form must reproduce the above 21 | * copyright notice, this list of conditions and the following 22 | * disclaimer in the documentation and/or other materials 23 | * provided with the distribution. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | * SOFTWARE. 33 | * 34 | * $Id$ 35 | */ 36 | 37 | #if defined(__FreeBSD__) 38 | #include 39 | #include 40 | #endif 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include "perftest_parameters.h" 50 | #include "perftest_resources.h" 51 | #include "multicast_resources.h" 52 | #include "perftest_communication.h" 53 | #include "raw_ethernet_resources.h" 54 | 55 | 56 | /* 57 | * Main function. implements raw_ethernet_send_lat 58 | */ 59 | int main(int argc, char *argv[]) 60 | { 61 | 62 | struct ibv_device *ib_dev = NULL; 63 | struct pingpong_context ctx; 64 | struct raw_ethernet_info my_dest_info,rem_dest_info; 65 | int ret_parser; 66 | struct perftest_parameters user_param; 67 | struct ibv_flow **flow_create_result; 68 | struct ibv_flow_attr **flow_rules; 69 | struct ibv_flow *flow_promisc = NULL; 70 | struct report_options report; 71 | int i; 72 | int flows_created = 0; 73 | 74 | /* allocate memory space for user parameters &*/ 75 | memset(&ctx, 0, sizeof(struct pingpong_context)); 76 | memset(&user_param, 0, sizeof(struct perftest_parameters)); 77 | memset(&my_dest_info, 0 , sizeof(struct raw_ethernet_info)); 78 | memset(&rem_dest_info, 0 , sizeof(struct raw_ethernet_info)); 79 | 80 | /* init default values to user's parameters that's relvant for this test: 81 | * Raw Ethernet Send Latency Test 82 | */ 83 | user_param.verb = SEND; 84 | user_param.tst = LAT; 85 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 86 | user_param.connection_type = RawEth; 87 | user_param.r_flag = &report; 88 | 89 | 90 | /* Configure the parameters values according to user 91 | arguments or default values. */ 92 | ret_parser = parser(&user_param, argv,argc); 93 | 94 | /* check for parsing errors */ 95 | if (ret_parser) { 96 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) 97 | fprintf(stderr," Parser function exited with Error\n"); 98 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 99 | goto return_error; 100 | } 101 | MAIN_ALLOC(flow_create_result, struct ibv_flow*, user_param.flows, return_error); 102 | MAIN_ALLOC(flow_rules, struct ibv_flow_attr*, user_param.flows, free_flow_results); 103 | 104 | /*this is a bidirectional test, so we need to let the init functions 105 | * think we are in duplex mode 106 | */ 107 | user_param.duplex = 1; 108 | 109 | /* Find the selected IB device (or default if the user didn't select one). */ 110 | ib_dev = ctx_find_dev(&user_param.ib_devname); 111 | if (!ib_dev) { 112 | fprintf(stderr," Unable to find the Infiniband/RoCE device\n"); 113 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 114 | goto free_mem; 115 | } 116 | 117 | if (check_flow_steering_support(user_param.ib_devname)) { 118 | goto free_devname; 119 | } 120 | 121 | /* Getting the relevant context from the device */ 122 | ctx.context = ibv_open_device(ib_dev); 123 | if (!ctx.context) { 124 | fprintf(stderr, " Couldn't get context for the device\n"); 125 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 126 | goto free_devname; 127 | } 128 | 129 | /* Verify user parameters that require the device context, 130 | * the function will print the relevent error info. */ 131 | if (verify_params_with_device_context(ctx.context, &user_param)) { 132 | goto free_devname; 133 | } 134 | 135 | /* See if MTU and link type are valid and supported. */ 136 | if (check_link_and_mtu(ctx.context, &user_param)) { 137 | fprintf(stderr, " Couldn't get context for the device\n"); 138 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 139 | goto free_devname; 140 | } 141 | 142 | /* Allocating arrays needed for the test. */ 143 | if (alloc_ctx(&ctx,&user_param)){ 144 | fprintf(stderr, "Couldn't allocate context\n"); 145 | goto free_devname; 146 | } 147 | 148 | /*set up the connection, return the required flow rules (notice that user_param->duplex == TRUE) 149 | * so the function will setup like it's a bidirectional test 150 | */ 151 | if (send_set_up_connection(flow_rules, &ctx, &user_param, &my_dest_info, &rem_dest_info)) { 152 | fprintf(stderr," Unable to set up socket connection\n"); 153 | dealloc_ctx(&ctx, &user_param); 154 | goto free_devname; 155 | } 156 | 157 | /* Print basic test information. */ 158 | ctx_print_test_info(&user_param); 159 | 160 | for (i = 0; i < user_param.flows; i++) 161 | print_spec(flow_rules[i], &user_param); 162 | 163 | /* initalize IB resources (data buffer, PD, MR, CQ and events channel) */ 164 | if (ctx_init(&ctx, &user_param)) { 165 | fprintf(stderr, " Couldn't create IB resources\n"); 166 | dealloc_ctx(&ctx, &user_param); 167 | goto free_devname; 168 | } 169 | 170 | /* attaching the qp to the spec */ 171 | for (i = 0; i < user_param.flows; i++) { 172 | flow_create_result[i] = ibv_create_flow(ctx.qp[0], flow_rules[i]); 173 | 174 | if (!flow_create_result[i]){ 175 | perror("error"); 176 | fprintf(stderr, "Couldn't attach QP\n"); 177 | goto destroy_ctx; 178 | } 179 | flows_created++; 180 | } 181 | 182 | if (user_param.use_promiscuous) { 183 | struct ibv_flow_attr attr = { 184 | .type = IBV_FLOW_ATTR_ALL_DEFAULT, 185 | .num_of_specs = 0, 186 | .port = user_param.ib_port, 187 | .flags = 0 188 | }; 189 | 190 | if ((flow_promisc = ibv_create_flow(ctx.qp[0], &attr)) == NULL) { 191 | perror("error"); 192 | fprintf(stderr, "Couldn't attach promiscuous rule QP\n"); 193 | } 194 | } 195 | 196 | /* build ONE Raw Ethernet packets on ctx buffer */ 197 | create_raw_eth_pkt(&user_param, &ctx, ctx.buf[0], &my_dest_info , &rem_dest_info); 198 | 199 | if (user_param.output == FULL_VERBOSITY) { 200 | printf(RESULT_LINE); 201 | printf("%s",(user_param.test_type == ITERATIONS) ? RESULT_FMT_LAT : RESULT_FMT_LAT_DUR); 202 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 203 | } 204 | 205 | /* modify QPs to rtr/rts */ 206 | if (ctx_connect(&ctx, NULL, &user_param, NULL)) { 207 | fprintf(stderr," Unable to Connect the HCA's through the link\n"); 208 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 209 | goto promisc_flow_destroy; 210 | } 211 | 212 | 213 | ctx_set_send_wqes(&ctx,&user_param,NULL); 214 | 215 | if (ctx_set_recv_wqes(&ctx,&user_param)) { 216 | fprintf(stderr," Failed to post receive recv_wqes\n"); 217 | goto promisc_flow_destroy; 218 | } 219 | 220 | /* latency test function for SEND verb latency test. */ 221 | if (run_iter_lat_send(&ctx, &user_param)) { 222 | goto promisc_flow_destroy; 223 | } 224 | 225 | /* print report (like print_report_bw) in the correct format 226 | * (as set before: FMT_LAT or FMT_LAT_DUR) 227 | */ 228 | user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : 229 | print_report_lat_duration(&user_param); 230 | 231 | /* destroy promisc flow */ 232 | if (user_param.use_promiscuous) { 233 | if (ibv_destroy_flow(flow_promisc)) { 234 | perror("error"); 235 | fprintf(stderr, "Couldn't destroy promisc flow\n"); 236 | goto result_flow_destroy; 237 | } 238 | } 239 | 240 | /* destroy flow */ 241 | for (i = 0; i < user_param.flows; i++) { 242 | if (ibv_destroy_flow(flow_create_result[i])) { 243 | perror("error"); 244 | fprintf(stderr, "Couldn't destroy flow\n"); 245 | goto destroy_ctx; 246 | } 247 | 248 | free(flow_rules[i]); 249 | } 250 | 251 | /* Deallocate all perftest resources. */ 252 | if (destroy_ctx(&ctx, &user_param)) { 253 | fprintf(stderr,"Failed to destroy_ctx\n"); 254 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 255 | return FAILURE; 256 | } 257 | 258 | if (user_param.output == FULL_VERBOSITY) 259 | printf(RESULT_LINE); 260 | 261 | DEBUG_LOG(TRACE,"<<<<<<%s",__FUNCTION__); 262 | return SUCCESS; 263 | 264 | promisc_flow_destroy: 265 | if (user_param.use_promiscuous) { 266 | if (ibv_destroy_flow(flow_promisc)) { 267 | perror("error"); 268 | fprintf(stderr, "Couldn't destroy promisc flow\n"); 269 | } 270 | } 271 | result_flow_destroy: 272 | for (i = 0; i < flows_created; i++) { 273 | if (ibv_destroy_flow(flow_create_result[i])) { 274 | perror("error"); 275 | fprintf(stderr, "Couldn't destroy flow\n"); 276 | } 277 | } 278 | destroy_ctx: 279 | destroy_ctx(&ctx, &user_param); 280 | free_devname: 281 | free(user_param.ib_devname); 282 | free_mem: 283 | free(flow_rules); 284 | free_flow_results: 285 | free(flow_create_result); 286 | return_error: 287 | return FAILURE; 288 | } 289 | -------------------------------------------------------------------------------- /src/read_lat.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler) 5 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 6 | * 7 | * This software is available to you under a choice of one of two 8 | * licenses. You may choose to be licensed under the terms of the GNU 9 | * General Public License (GPL) Version 2, available from the file 10 | * COPYING in the main directory of this source tree, or the 11 | * OpenIB.org BSD license below: 12 | * 13 | * Redistribution and use in source and binary forms, with or 14 | * without modification, are permitted provided that the following 15 | * conditions are met: 16 | * 17 | * - Redistributions of source code must retain the above 18 | * copyright notice, this list of conditions and the following 19 | * disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above 22 | * copyright notice, this list of conditions and the following 23 | * disclaimer in the documentation and/or other materials 24 | * provided with the distribution. 25 | * 26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | * SOFTWARE. 34 | * 35 | * $Id$ 36 | */ 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | #if !defined(__FreeBSD__) 43 | #include 44 | #endif 45 | 46 | #include "get_clock.h" 47 | #include "perftest_resources.h" 48 | #include "perftest_parameters.h" 49 | #include "perftest_communication.h" 50 | 51 | /****************************************************************************** 52 | * 53 | ******************************************************************************/ 54 | int main(int argc, char *argv[]) 55 | { 56 | int ret_parser, i = 0, rc , error = 1; 57 | struct report_options report; 58 | struct pingpong_context ctx; 59 | struct ibv_device *ib_dev; 60 | struct perftest_parameters user_param; 61 | struct pingpong_dest *my_dest = NULL; 62 | struct pingpong_dest *rem_dest = NULL; 63 | struct perftest_comm user_comm; 64 | int rdma_cm_flow_destroyed = 0; 65 | 66 | /* init default values to user's parameters */ 67 | memset(&ctx,0,sizeof(struct pingpong_context)); 68 | memset(&user_param,0,sizeof(struct perftest_parameters)); 69 | memset(&user_comm,0,sizeof(struct perftest_comm)); 70 | 71 | user_param.verb = READ; 72 | user_param.tst = LAT; 73 | user_param.r_flag = &report; 74 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 75 | 76 | /* Configure the parameters values according to user arguments or defalut values. */ 77 | ret_parser = parser(&user_param,argv,argc); 78 | if (ret_parser) { 79 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) 80 | fprintf(stderr," Parser function exited with Error\n"); 81 | goto return_error; 82 | } 83 | 84 | if(user_param.use_xrc || user_param.connection_type == DC) { 85 | user_param.num_of_qps *= 2; 86 | } 87 | 88 | /* Finding the IB device selected (or defalut if no selected). */ 89 | ib_dev = ctx_find_dev(&user_param.ib_devname); 90 | if (!ib_dev) { 91 | fprintf(stderr," Unable to find the Infiniband/RoCE device\n"); 92 | goto return_error; 93 | } 94 | 95 | /* Getting the relevant context from the device */ 96 | ctx.context = ctx_open_device(ib_dev, &user_param); 97 | if (!ctx.context) { 98 | fprintf(stderr, " Couldn't get context for the device\n"); 99 | goto free_devname; 100 | } 101 | 102 | /* Verify user parameters that require the device context, 103 | * the function will print the relevent error info. */ 104 | if (verify_params_with_device_context(ctx.context, &user_param)) { 105 | goto free_devname; 106 | } 107 | 108 | /* See if link type is valid and supported. */ 109 | if (check_link(ctx.context,&user_param)) { 110 | fprintf(stderr, " Couldn't get context for the device\n"); 111 | goto free_devname; 112 | } 113 | 114 | /* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */ 115 | if (create_comm_struct(&user_comm,&user_param)) { 116 | fprintf(stderr," Unable to create RDMA_CM resources\n"); 117 | goto free_devname; 118 | } 119 | 120 | if (user_param.output == FULL_VERBOSITY && user_param.machine == SERVER) { 121 | printf("\n************************************\n"); 122 | printf("* Waiting for client to connect... *\n"); 123 | printf("************************************\n"); 124 | } 125 | 126 | /* Initialize the connection and print the local data. */ 127 | if (establish_connection(&user_comm)) { 128 | fprintf(stderr," Unable to init the socket connection\n"); 129 | dealloc_comm_struct(&user_comm,&user_param); 130 | goto free_devname; 131 | } 132 | 133 | exchange_versions(&user_comm, &user_param); 134 | check_version_compatibility(&user_param); 135 | check_sys_data(&user_comm, &user_param); 136 | 137 | /* See if MTU is valid and supported. */ 138 | if (check_mtu(ctx.context,&user_param, &user_comm)) { 139 | fprintf(stderr, " Couldn't get context for the device\n"); 140 | dealloc_comm_struct(&user_comm,&user_param); 141 | goto free_devname; 142 | } 143 | 144 | MAIN_ALLOC(my_dest , struct pingpong_dest , user_param.num_of_qps , free_rdma_params); 145 | memset(my_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 146 | MAIN_ALLOC(rem_dest , struct pingpong_dest , user_param.num_of_qps , free_my_dest); 147 | memset(rem_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 148 | 149 | /* Allocate arrays */ 150 | if (alloc_ctx(&ctx,&user_param)){ 151 | fprintf(stderr, "Couldn't allocate context\n"); 152 | goto free_mem; 153 | } 154 | 155 | /* Create RDMA CM resources and connect through CM. */ 156 | if (user_param.work_rdma_cm == ON) { 157 | rc = create_rdma_cm_connection(&ctx, &user_param, &user_comm, 158 | my_dest, rem_dest); 159 | if (rc) { 160 | fprintf(stderr, 161 | "Failed to create RDMA CM connection with resources.\n"); 162 | dealloc_ctx(&ctx, &user_param); 163 | goto free_mem; 164 | } 165 | } else { 166 | /* create all the basic IB resources (data buffer, PD, MR, CQ and events channel) */ 167 | if (ctx_init(&ctx,&user_param)) { 168 | fprintf(stderr, " Couldn't create IB resources\n"); 169 | dealloc_ctx(&ctx, &user_param); 170 | goto free_mem; 171 | } 172 | } 173 | 174 | /* Set up the Connection. */ 175 | if (set_up_connection(&ctx,&user_param,my_dest)) { 176 | fprintf(stderr," Unable to set up socket connection\n"); 177 | goto destroy_context; 178 | } 179 | 180 | /* Print basic test information. */ 181 | ctx_print_test_info(&user_param); 182 | 183 | /* shaking hands and gather the other side info. */ 184 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 185 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 186 | goto destroy_context; 187 | } 188 | 189 | for (i=0; i < user_param.num_of_qps; i++) { 190 | 191 | /* shaking hands and gather the other side info. */ 192 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 193 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 194 | goto destroy_context; 195 | } 196 | } 197 | 198 | if (user_param.work_rdma_cm == OFF) { 199 | if (ctx_check_gid_compatibility(&my_dest[0], &rem_dest[0])) { 200 | fprintf(stderr,"\n Found Incompatibility issue with GID types.\n"); 201 | fprintf(stderr," Please Try to use a different IP version.\n\n"); 202 | goto destroy_context; 203 | } 204 | } 205 | 206 | if (user_param.work_rdma_cm == OFF) { 207 | 208 | if (ctx_connect(&ctx,rem_dest,&user_param,my_dest)) { 209 | fprintf(stderr," Unable to Connect the HCA's through the link\n"); 210 | goto destroy_context; 211 | } 212 | } 213 | 214 | if (user_param.connection_type == DC) 215 | { 216 | /* Set up connection one more time to send qpn properly for DC */ 217 | if (set_up_connection(&ctx,&user_param,my_dest)) { 218 | fprintf(stderr," Unable to set up socket connection\n"); 219 | goto destroy_context; 220 | } 221 | } 222 | /* Print this machine QP information */ 223 | for (i=0; i < user_param.num_of_qps; i++) 224 | ctx_print_pingpong_data(&my_dest[i],&user_comm); 225 | 226 | user_comm.rdma_params->side = REMOTE; 227 | 228 | for (i=0; i < user_param.num_of_qps; i++) { 229 | 230 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 231 | fprintf(stderr," Failed to exchange data between server and clients\n"); 232 | goto destroy_context; 233 | } 234 | 235 | ctx_print_pingpong_data(&rem_dest[i],&user_comm); 236 | } 237 | 238 | /* An additional handshake is required after moving qp to RTR. */ 239 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 240 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 241 | goto destroy_context; 242 | } 243 | 244 | /* Only Client post read request. */ 245 | if (user_param.machine == SERVER) { 246 | 247 | if (ctx_close_connection(&user_comm,my_dest,rem_dest)) { 248 | fprintf(stderr,"Failed to close connection between server and client\n"); 249 | goto free_mem; 250 | } 251 | 252 | if (user_param.output == FULL_VERBOSITY) { 253 | printf(RESULT_LINE); 254 | } 255 | 256 | if (user_param.work_rdma_cm == ON) { 257 | if (destroy_ctx(&ctx,&user_param)) { 258 | fprintf(stderr, "Failed to destroy resources\n"); 259 | goto destroy_cm_context; 260 | } 261 | user_comm.rdma_params->work_rdma_cm = OFF; 262 | free(rem_dest); 263 | free(my_dest); 264 | free(user_param.ib_devname); 265 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 266 | free(user_comm.rdma_ctx); 267 | free(user_comm.rdma_params); 268 | return FAILURE; 269 | } 270 | free(user_comm.rdma_ctx); 271 | free(user_comm.rdma_params); 272 | return SUCCESS; 273 | } 274 | 275 | free(rem_dest); 276 | free(my_dest); 277 | free(user_param.ib_devname); 278 | if(destroy_ctx(&ctx, &user_param)){ 279 | free(user_comm.rdma_params); 280 | return FAILURE; 281 | } 282 | free(user_comm.rdma_params); 283 | return SUCCESS; 284 | } 285 | 286 | if (user_param.use_event) { 287 | if (ibv_req_notify_cq(ctx.send_cq, 0)) { 288 | fprintf(stderr, "Couldn't request CQ notification\n"); 289 | goto free_mem; 290 | } 291 | } 292 | 293 | if (user_param.output == FULL_VERBOSITY) { 294 | printf(RESULT_LINE); 295 | printf("%s",(user_param.test_type == ITERATIONS) ? RESULT_FMT_LAT : RESULT_FMT_LAT_DUR); 296 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 297 | } 298 | 299 | ctx_set_send_wqes(&ctx,&user_param,rem_dest); 300 | 301 | if (user_param.test_method == RUN_ALL) { 302 | for (i = 1; i < 24 ; ++i) { 303 | user_param.size = (uint64_t)1 << i; 304 | if(run_iter_lat(&ctx,&user_param)) { 305 | error = 17; 306 | goto free_mem; 307 | } 308 | 309 | user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : print_report_lat_duration(&user_param); 310 | } 311 | } else { 312 | if(run_iter_lat(&ctx,&user_param)){ 313 | error = 17; 314 | goto free_mem; 315 | } 316 | 317 | user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : print_report_lat_duration(&user_param); 318 | } 319 | 320 | if (ctx_close_connection(&user_comm,my_dest,rem_dest)) { 321 | fprintf(stderr,"Failed to close connection between server and client\n"); 322 | goto free_mem; 323 | } 324 | 325 | if (user_param.output == FULL_VERBOSITY) { 326 | printf(RESULT_LINE); 327 | } 328 | 329 | if (user_param.work_rdma_cm == ON) { 330 | if (destroy_ctx(&ctx,&user_param)) { 331 | fprintf(stderr, "Failed to destroy resources\n"); 332 | goto destroy_cm_context; 333 | } 334 | 335 | user_comm.rdma_params->work_rdma_cm = OFF; 336 | free(rem_dest); 337 | free(my_dest); 338 | free(user_param.ib_devname); 339 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 340 | free(user_comm.rdma_ctx); 341 | free(user_comm.rdma_params); 342 | return FAILURE; 343 | } 344 | free(user_comm.rdma_ctx); 345 | free(user_comm.rdma_params); 346 | return SUCCESS; 347 | } 348 | free(rem_dest); 349 | free(my_dest); 350 | free(user_param.ib_devname); 351 | 352 | if(destroy_ctx(&ctx, &user_param)){ 353 | free(user_comm.rdma_params); 354 | return FAILURE; 355 | } 356 | free(user_comm.rdma_params); 357 | return SUCCESS; 358 | 359 | 360 | destroy_context: 361 | if (destroy_ctx(&ctx,&user_param)) 362 | fprintf(stderr, "Failed to destroy resources\n"); 363 | destroy_cm_context: 364 | if (user_param.work_rdma_cm == ON) { 365 | rdma_cm_flow_destroyed = 1; 366 | user_comm.rdma_params->work_rdma_cm = OFF; 367 | destroy_ctx(user_comm.rdma_ctx,user_comm.rdma_params); 368 | } 369 | free_mem: 370 | free(rem_dest); 371 | free_my_dest: 372 | free(my_dest); 373 | free_rdma_params: 374 | if (user_param.use_rdma_cm == ON && rdma_cm_flow_destroyed == 0) 375 | dealloc_comm_struct(&user_comm, &user_param); 376 | else { 377 | if(user_param.use_rdma_cm == ON) 378 | free(user_comm.rdma_ctx); 379 | free(user_comm.rdma_params); 380 | } 381 | free_devname: 382 | free(user_param.ib_devname); 383 | return_error: 384 | //coverity[leaked_storage] 385 | return error; 386 | } 387 | -------------------------------------------------------------------------------- /src/rocm_memory.c: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "rocm_memory.h" 12 | #include 13 | #if defined HAVE_HIP_HIP_VERSION_H 14 | #include 15 | #endif 16 | #include "perftest_parameters.h" 17 | #include 18 | #include 19 | 20 | #define ROCM_CHECK(stmt) \ 21 | do { \ 22 | hipError_t result = (stmt); \ 23 | ASSERT(hipSuccess == result); \ 24 | } while (0) 25 | 26 | #define ACCEL_PAGE_SIZE (64 * 1024) 27 | 28 | 29 | struct rocm_memory_ctx { 30 | struct memory_ctx base; 31 | int device_id; 32 | bool use_dmabuf; 33 | }; 34 | 35 | 36 | static int init_rocm(int device_id) { 37 | int deviceCount = 0; 38 | hipError_t error = hipGetDeviceCount(&deviceCount); 39 | 40 | if (error != hipSuccess) { 41 | printf("hipDeviceGetCount() returned %d\n", error); 42 | return FAILURE; 43 | } 44 | 45 | if (device_id >= deviceCount) { 46 | printf("Requested ROCm device %d but found only %d device(s)\n", 47 | device_id, deviceCount); 48 | return FAILURE; 49 | } 50 | 51 | ROCM_CHECK(hipSetDevice(device_id)); 52 | 53 | hipDeviceProp_t prop = {0}; 54 | ROCM_CHECK(hipGetDeviceProperties(&prop, device_id)); 55 | 56 | /* Need 256 bytes to silence compiler warning */ 57 | char archName[256]; 58 | #if HIP_VERSION >= 60000000 59 | snprintf(archName, 256, "%s", prop.gcnArchName); 60 | #else 61 | snprintf(archName, 256, "%d", prop.gcnArch); 62 | #endif 63 | 64 | printf("Using ROCm Device with ID: %d, Name: %s, PCI Bus ID: 0x%x, GCN Arch: %s\n", 65 | device_id, prop.name, prop.pciBusID, archName); 66 | 67 | return SUCCESS; 68 | } 69 | 70 | int rocm_memory_init(struct memory_ctx *ctx) { 71 | struct rocm_memory_ctx *rocm_ctx = container_of(ctx, struct rocm_memory_ctx, base); 72 | 73 | if (init_rocm(rocm_ctx->device_id)) { 74 | fprintf(stderr, "Couldn't initialize ROCm device\n"); 75 | return FAILURE; 76 | } 77 | 78 | #ifdef HAVE_ROCM_DMABUF 79 | if (rocm_ctx->use_dmabuf) { 80 | int dmabuf_supported = 0; 81 | const char kernel_opt1[] = "CONFIG_DMABUF_MOVE_NOTIFY=y"; 82 | const char kernel_opt2[] = "CONFIG_PCI_P2PDMA=y"; 83 | int found_opt1 = 0; 84 | int found_opt2 = 0; 85 | FILE *fp; 86 | struct utsname utsname; 87 | char kernel_conf_file[128]; 88 | char buf[256]; 89 | 90 | if (uname(&utsname) == -1) { 91 | printf("could not get kernel name"); 92 | return FAILURE; 93 | } 94 | 95 | snprintf(kernel_conf_file, sizeof(kernel_conf_file), 96 | "/boot/config-%s", utsname.release); 97 | fp = fopen(kernel_conf_file, "r"); 98 | if (fp == NULL) { 99 | printf("could not open kernel conf file %s error: %m", 100 | kernel_conf_file); 101 | return FAILURE; 102 | } 103 | 104 | while (fgets(buf, sizeof(buf), fp) != NULL) { 105 | if (strstr(buf, kernel_opt1) != NULL) { 106 | found_opt1 = 1; 107 | } 108 | if (strstr(buf, kernel_opt2) != NULL) { 109 | found_opt2 = 1; 110 | } 111 | if (found_opt1 && found_opt2) { 112 | dmabuf_supported = 1; 113 | break; 114 | } 115 | } 116 | fclose(fp); 117 | 118 | if (dmabuf_supported == 0) { 119 | return FAILURE; 120 | } 121 | } 122 | #endif 123 | 124 | return SUCCESS; 125 | } 126 | 127 | int rocm_memory_destroy(struct memory_ctx *ctx) { 128 | struct rocm_memory_ctx *rocm_ctx = container_of(ctx, struct rocm_memory_ctx, base); 129 | 130 | free(rocm_ctx); 131 | return SUCCESS; 132 | } 133 | 134 | int rocm_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t size, int *dmabuf_fd, 135 | uint64_t *dmabuf_offset, void **addr, bool *can_init) { 136 | void *d_A; 137 | hipError_t error; 138 | size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1); 139 | 140 | error = hipMalloc(&d_A, buf_size); 141 | if (error != hipSuccess) { 142 | printf("hipMalloc error=%d\n", error); 143 | return FAILURE; 144 | } 145 | 146 | #ifdef HAVE_ROCM_DMABUF 147 | struct rocm_memory_ctx *rocm_ctx = container_of(ctx, struct rocm_memory_ctx, base); 148 | 149 | if (rocm_ctx->use_dmabuf) { 150 | hipDeviceptr_t aligned_ptr; 151 | const size_t host_page_size = sysconf(_SC_PAGESIZE); 152 | uint64_t offset; 153 | size_t aligned_size; 154 | hsa_status_t status; 155 | 156 | // Round down to host page size 157 | aligned_ptr = (hipDeviceptr_t)((uintptr_t)d_A & ~(host_page_size - 1)); 158 | offset = d_A - aligned_ptr; 159 | aligned_size = (size + offset + host_page_size - 1) & ~(host_page_size - 1); 160 | 161 | printf("using DMA-BUF for GPU buffer address at %p aligned at %p with aligned size %zu\n", d_A, aligned_ptr, aligned_size); 162 | *dmabuf_fd = 0; 163 | 164 | status = hsa_amd_portable_export_dmabuf(d_A, aligned_size, dmabuf_fd, &offset); 165 | if (status != HSA_STATUS_SUCCESS) { 166 | printf("failed to export dmabuf handle for addr %p / %zu", d_A, 167 | aligned_size); 168 | return FAILURE; 169 | } 170 | 171 | printf("dmabuf export addr %p %zu to dmabuf fd %d offset %lu\n", 172 | d_A, aligned_size, *dmabuf_fd, offset); 173 | 174 | *dmabuf_offset = offset; 175 | } 176 | #endif 177 | 178 | printf("allocated %lu bytes of GPU buffer at %p\n", (unsigned long)buf_size, d_A); 179 | *addr = d_A; 180 | *can_init = true; 181 | return SUCCESS; 182 | } 183 | 184 | int rocm_memory_free_buffer(struct memory_ctx *ctx, int dmabuf_fd, void *addr, uint64_t size) { 185 | printf("deallocating GPU buffer %p\n", addr); 186 | hipFree(addr); 187 | return SUCCESS; 188 | } 189 | 190 | bool rocm_memory_supported() { 191 | return true; 192 | } 193 | 194 | bool rocm_memory_dmabuf_supported() { 195 | #ifdef HAVE_ROCM_DMABUF 196 | return true; 197 | #else 198 | return false; 199 | #endif 200 | } 201 | 202 | struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) { 203 | struct rocm_memory_ctx *ctx; 204 | 205 | ALLOCATE(ctx, struct rocm_memory_ctx, 1); 206 | ctx->base.init = rocm_memory_init; 207 | ctx->base.destroy = rocm_memory_destroy; 208 | ctx->base.allocate_buffer = rocm_memory_allocate_buffer; 209 | ctx->base.free_buffer = rocm_memory_free_buffer; 210 | ctx->base.copy_host_to_buffer = memcpy; 211 | ctx->base.copy_buffer_to_host = memcpy; 212 | ctx->base.copy_buffer_to_buffer = memcpy; 213 | ctx->device_id = params->rocm_device_id; 214 | ctx->use_dmabuf = params->use_rocm_dmabuf; 215 | 216 | return &ctx->base; 217 | } 218 | -------------------------------------------------------------------------------- /src/rocm_memory.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ 2 | /* 3 | * Copyright 2023 Amazon.com, Inc. or its affiliates. All rights reserved. 4 | */ 5 | 6 | #ifndef ROCM_MEMORY_H 7 | #define ROCM_MEMORY_H 8 | 9 | #include 10 | #include 11 | #include "memory.h" 12 | #include "config.h" 13 | 14 | 15 | struct perftest_parameters; 16 | 17 | bool rocm_memory_supported(); 18 | 19 | bool rocm_memory_dmabuf_supported(); 20 | 21 | struct memory_ctx *rocm_memory_create(struct perftest_parameters *params); 22 | 23 | 24 | #ifndef HAVE_ROCM 25 | 26 | inline bool rocm_memory_supported() { 27 | return false; 28 | } 29 | 30 | inline bool rocm_memory_dmabuf_supported() { 31 | return false; 32 | } 33 | 34 | inline struct memory_ctx *rocm_memory_create(struct perftest_parameters *params) { 35 | return NULL; 36 | } 37 | 38 | #endif 39 | 40 | #endif /* ROCM_MEMORY_H */ 41 | -------------------------------------------------------------------------------- /src/write_lat.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 | * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. 4 | * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler) 5 | * Copyright (c) 2009 HNR Consulting. All rights reserved. 6 | * 7 | * This software is available to you under a choice of one of two 8 | * licenses. You may choose to be licensed under the terms of the GNU 9 | * General Public License (GPL) Version 2, available from the file 10 | * COPYING in the main directory of this source tree, or the 11 | * OpenIB.org BSD license below: 12 | * 13 | * Redistribution and use in source and binary forms, with or 14 | * without modification, are permitted provided that the following 15 | * conditions are met: 16 | * 17 | * - Redistributions of source code must retain the above 18 | * copyright notice, this list of conditions and the following 19 | * disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above 22 | * copyright notice, this list of conditions and the following 23 | * disclaimer in the documentation and/or other materials 24 | * provided with the distribution. 25 | * 26 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 | * SOFTWARE. 34 | * 35 | * $Id$ 36 | */ 37 | 38 | #include 39 | #include 40 | #include 41 | #include 42 | #if !defined(__FreeBSD__) 43 | #include 44 | #endif 45 | 46 | #include "get_clock.h" 47 | #include "perftest_parameters.h" 48 | #include "perftest_resources.h" 49 | #include "perftest_communication.h" 50 | 51 | /****************************************************************************** 52 | * 53 | ******************************************************************************/ 54 | int main(int argc, char *argv[]) 55 | { 56 | int ret_parser, i = 0, rc; 57 | struct report_options report; 58 | struct pingpong_context ctx; 59 | struct pingpong_dest *my_dest = NULL; 60 | struct pingpong_dest *rem_dest = NULL; 61 | struct ibv_device *ib_dev; 62 | struct perftest_parameters user_param; 63 | struct perftest_comm user_comm; 64 | int rdma_cm_flow_destroyed = 0; 65 | 66 | /* init default values to user's parameters */ 67 | memset(&ctx,0,sizeof(struct pingpong_context)); 68 | memset(&user_param, 0, sizeof(struct perftest_parameters)); 69 | memset(&user_comm,0,sizeof(struct perftest_comm)); 70 | 71 | user_param.verb = WRITE; 72 | user_param.tst = LAT; 73 | user_param.r_flag = &report; 74 | strncpy(user_param.version, VERSION, sizeof(user_param.version)); 75 | 76 | /* Configure the parameters values according to user arguments or defalut values. */ 77 | ret_parser = parser(&user_param,argv,argc); 78 | if (ret_parser) { 79 | if (ret_parser != VERSION_EXIT && ret_parser != HELP_EXIT) 80 | fprintf(stderr," Parser function exited with Error\n"); 81 | goto return_error; 82 | } 83 | 84 | /* In case of ib_write_lat, PCI relaxed ordering should be disabled since we're polling for data change 85 | * of last packet so in case of relaxed odering we might get the last packet in wrong order thus the test 86 | * would be incorrect 87 | */ 88 | user_param.disable_pcir = 1; 89 | 90 | if (user_param.use_xrc || user_param.connection_type == DC) { 91 | user_param.num_of_qps *= 2; 92 | } 93 | 94 | /* Finding the IB device selected (or defalut if no selected). */ 95 | ib_dev = ctx_find_dev(&user_param.ib_devname); 96 | if (!ib_dev) { 97 | fprintf(stderr," Unable to find the Infiniband/RoCE device\n"); 98 | goto return_error; 99 | } 100 | 101 | /* Getting the relevant context from the device */ 102 | ctx.context = ctx_open_device(ib_dev, &user_param); 103 | if (!ctx.context) { 104 | fprintf(stderr, " Couldn't get context for the device\n"); 105 | goto free_devname; 106 | } 107 | 108 | /* Verify user parameters that require the device context, 109 | * the function will print the relevent error info. */ 110 | if (verify_params_with_device_context(ctx.context, &user_param)) 111 | { 112 | fprintf(stderr, " Couldn't get context for the device\n"); 113 | goto free_devname; 114 | } 115 | 116 | /* See if link type is valid and supported. */ 117 | if (check_link(ctx.context,&user_param)) { 118 | fprintf(stderr, " Couldn't get context for the device\n"); 119 | goto free_devname; 120 | } 121 | 122 | /* copy the relevant user parameters to the comm struct + creating rdma_cm resources. */ 123 | if (create_comm_struct(&user_comm,&user_param)) { 124 | fprintf(stderr," Unable to create RDMA_CM resources\n"); 125 | goto free_devname; 126 | } 127 | 128 | if (user_param.output == FULL_VERBOSITY && user_param.machine == SERVER) { 129 | printf("\n************************************\n"); 130 | printf("* Waiting for client to connect... *\n"); 131 | printf("************************************\n"); 132 | } 133 | 134 | /* Initialize the connection and print the local data. */ 135 | if (establish_connection(&user_comm)) { 136 | fprintf(stderr," Unable to init the socket connection\n"); 137 | dealloc_comm_struct(&user_comm,&user_param); 138 | goto free_devname; 139 | } 140 | 141 | exchange_versions(&user_comm, &user_param); 142 | check_version_compatibility(&user_param); 143 | check_sys_data(&user_comm, &user_param); 144 | 145 | /* See if MTU is valid and supported. */ 146 | if (check_mtu(ctx.context,&user_param, &user_comm)) { 147 | fprintf(stderr, " Couldn't get context for the device\n"); 148 | dealloc_comm_struct(&user_comm,&user_param); 149 | goto free_devname; 150 | } 151 | 152 | MAIN_ALLOC(my_dest , struct pingpong_dest , user_param.num_of_qps , free_rdma_params); 153 | memset(my_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 154 | MAIN_ALLOC(rem_dest , struct pingpong_dest , user_param.num_of_qps , free_my_dest); 155 | memset(rem_dest, 0, sizeof(struct pingpong_dest)*user_param.num_of_qps); 156 | 157 | /* Allocating arrays needed for the test. */ 158 | if(alloc_ctx(&ctx,&user_param)){ 159 | fprintf(stderr, "Couldn't allocate context\n"); 160 | goto free_mem; 161 | } 162 | 163 | /* Create RDMA CM resources and connect through CM. */ 164 | if (user_param.work_rdma_cm == ON) { 165 | rc = create_rdma_cm_connection(&ctx, &user_param, &user_comm, 166 | my_dest, rem_dest); 167 | if (rc) { 168 | fprintf(stderr, 169 | "Failed to create RDMA CM connection with resources.\n"); 170 | dealloc_ctx(&ctx, &user_param); 171 | goto free_mem; 172 | } 173 | } else { 174 | /* create all the basic IB resources (data buffer, PD, MR, CQ and events channel) */ 175 | if (ctx_init(&ctx,&user_param)) { 176 | fprintf(stderr, " Couldn't create IB resources\n"); 177 | dealloc_ctx(&ctx, &user_param); 178 | goto free_mem; 179 | } 180 | } 181 | 182 | /* Set up the Connection. */ 183 | if (set_up_connection(&ctx,&user_param,my_dest)) { 184 | fprintf(stderr," Unable to set up socket connection\n"); 185 | goto destroy_context; 186 | } 187 | 188 | /* Print basic test information. */ 189 | ctx_print_test_info(&user_param); 190 | 191 | /* shaking hands and gather the other side info. */ 192 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 193 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 194 | goto destroy_context; 195 | } 196 | 197 | for (i=0; i < user_param.num_of_qps; i++) { 198 | /* shaking hands and gather the other side info. */ 199 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 200 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 201 | goto destroy_context; 202 | } 203 | 204 | } 205 | 206 | if (user_param.work_rdma_cm == OFF) { 207 | if (ctx_check_gid_compatibility(&my_dest[0], &rem_dest[0])) { 208 | fprintf(stderr,"\n Found Incompatibility issue with GID types.\n"); 209 | fprintf(stderr," Please Try to use a different IP version.\n\n"); 210 | goto destroy_context; 211 | } 212 | } 213 | 214 | if (user_param.work_rdma_cm == OFF) { 215 | if (ctx_connect(&ctx,rem_dest,&user_param,my_dest)) { 216 | fprintf(stderr," Unable to Connect the HCA's through the link\n"); 217 | goto destroy_context; 218 | } 219 | } 220 | 221 | if (user_param.connection_type == DC) 222 | { 223 | /* Set up connection one more time to send qpn properly for DC */ 224 | if (set_up_connection(&ctx,&user_param,my_dest)) { 225 | fprintf(stderr," Unable to set up socket connection\n"); 226 | goto destroy_context; 227 | } 228 | } 229 | 230 | /* Print this machine QP information */ 231 | for (i=0; i < user_param.num_of_qps; i++) 232 | ctx_print_pingpong_data(&my_dest[i],&user_comm); 233 | 234 | user_comm.rdma_params->side = REMOTE; 235 | 236 | for (i=0; i < user_param.num_of_qps; i++) { 237 | if (ctx_hand_shake(&user_comm,&my_dest[i],&rem_dest[i])) { 238 | fprintf(stderr," Failed to exchange data between server and clients\n"); 239 | goto destroy_context; 240 | } 241 | 242 | ctx_print_pingpong_data(&rem_dest[i],&user_comm); 243 | } 244 | 245 | /* An additional handshake is required after moving qp to RTR. */ 246 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 247 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 248 | goto destroy_context; 249 | } 250 | 251 | ctx_set_send_wqes(&ctx,&user_param,rem_dest); 252 | 253 | if (user_param.output == FULL_VERBOSITY) { 254 | printf(RESULT_LINE); 255 | printf("%s",(user_param.test_type == ITERATIONS) ? RESULT_FMT_LAT : RESULT_FMT_LAT_DUR); 256 | printf((user_param.cpu_util_data.enable ? RESULT_EXT_CPU_UTIL : RESULT_EXT)); 257 | } 258 | 259 | if (user_param.test_method == RUN_ALL) { 260 | 261 | for (i = 1; i < 24 ; ++i) { 262 | user_param.size = (uint64_t)1 << i; 263 | 264 | if (user_param.verb == WRITE_IMM) { 265 | if (!user_param.use_unsolicited_write) { 266 | /* Post receive recv_wqes fo current message size */ 267 | if (ctx_set_recv_wqes(&ctx,&user_param)) { 268 | fprintf(stderr," Failed to post receive recv_wqes\n"); 269 | goto free_mem; 270 | } 271 | } 272 | 273 | /* Sync between the client and server so the client won't send packets 274 | * Before the server has posted his receive wqes (in UC/UD it will result in a deadlock). 275 | */ 276 | 277 | if (ctx_hand_shake(&user_comm,&my_dest[0],&rem_dest[0])) { 278 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 279 | goto free_mem; 280 | } 281 | 282 | if(run_iter_lat_write_imm(&ctx,&user_param)) { 283 | fprintf(stderr,"Test exited with Error\n"); 284 | goto free_mem; 285 | } 286 | } else { 287 | if(run_iter_lat_write(&ctx,&user_param)) { 288 | fprintf(stderr,"Test exited with Error\n"); 289 | goto free_mem; 290 | } 291 | } 292 | 293 | user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : print_report_lat_duration(&user_param); 294 | } 295 | 296 | } else { 297 | 298 | if (user_param.verb == WRITE_IMM) { 299 | /* Post recevie recv_wqes fo current message size */ 300 | if (ctx_set_recv_wqes(&ctx,&user_param)) { 301 | fprintf(stderr," Failed to post receive recv_wqes\n"); 302 | goto free_mem; 303 | } 304 | 305 | /* Sync between the client and server so the client won't send packets 306 | * Before the server has posted his receive wqes (in UC/UD it will result in a deadlock). 307 | */ 308 | 309 | if (ctx_hand_shake(&user_comm,my_dest,rem_dest)) { 310 | fprintf(stderr,"Failed to exchange data between server and clients\n"); 311 | goto free_mem; 312 | } 313 | 314 | if(run_iter_lat_write_imm(&ctx,&user_param)) { 315 | fprintf(stderr,"Test exited with Error\n"); 316 | goto free_mem; 317 | } 318 | } else { 319 | if(run_iter_lat_write(&ctx,&user_param)) { 320 | fprintf(stderr,"Test exited with Error\n"); 321 | goto free_mem; 322 | } 323 | } 324 | 325 | user_param.test_type == ITERATIONS ? print_report_lat(&user_param) : print_report_lat_duration(&user_param); 326 | } 327 | 328 | if (user_param.output == FULL_VERBOSITY) { 329 | printf(RESULT_LINE); 330 | } 331 | 332 | if (user_param.work_rdma_cm == ON) { 333 | if (destroy_ctx(&ctx,&user_param)) { 334 | fprintf(stderr, "Failed to destroy resources\n"); 335 | goto destroy_cm_context; 336 | } 337 | 338 | user_comm.rdma_params->work_rdma_cm = OFF; 339 | free(rem_dest); 340 | free(my_dest); 341 | free(user_param.ib_devname); 342 | if(destroy_ctx(user_comm.rdma_ctx, user_comm.rdma_params)) { 343 | free(user_comm.rdma_ctx); 344 | free(user_comm.rdma_params); 345 | return FAILURE; 346 | } 347 | free(user_comm.rdma_ctx); 348 | free(user_comm.rdma_params); 349 | return SUCCESS; 350 | } 351 | 352 | free(rem_dest); 353 | free(my_dest); 354 | free(user_param.ib_devname); 355 | 356 | if(destroy_ctx(&ctx, &user_param)){ 357 | free(user_comm.rdma_params); 358 | return FAILURE; 359 | } 360 | free(user_comm.rdma_params); 361 | return SUCCESS; 362 | 363 | destroy_context: 364 | if (destroy_ctx(&ctx,&user_param)) 365 | fprintf(stderr, "Failed to destroy resources\n"); 366 | destroy_cm_context: 367 | if (user_param.work_rdma_cm == ON) { 368 | rdma_cm_flow_destroyed = 1; 369 | user_comm.rdma_params->work_rdma_cm = OFF; 370 | destroy_ctx(user_comm.rdma_ctx,user_comm.rdma_params); 371 | } 372 | free_mem: 373 | free(rem_dest); 374 | free_my_dest: 375 | free(my_dest); 376 | free_rdma_params: 377 | if (user_param.use_rdma_cm == ON && rdma_cm_flow_destroyed == 0) 378 | dealloc_comm_struct(&user_comm, &user_param); 379 | else { 380 | if(user_param.use_rdma_cm == ON) 381 | free(user_comm.rdma_ctx); 382 | free(user_comm.rdma_params); 383 | } 384 | free_devname: 385 | free(user_param.ib_devname); 386 | return_error: 387 | //coverity[leaked_storage] 388 | return FAILURE; 389 | } 390 | --------------------------------------------------------------------------------