├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── asplosaoe ├── README.md ├── build_benchmark.log ├── build_kernel_modules.log ├── build_libnvm.log ├── cmake.log ├── nvm_array_bench_1_intel.log ├── nvm_array_bench_1_sam.log ├── nvm_array_bench_2_sam.log ├── nvm_array_bench_two_asymetrical_ssds.log ├── nvm_bfs_bench_1_intel.log ├── nvm_bfs_bench_1_sam.log ├── nvm_bfs_bench_2_sam.log ├── nvm_block_bench_1_intel.log ├── nvm_block_bench_1_sam.log ├── nvm_block_bench_2_sam.log ├── nvm_cache_bench.log ├── nvm_cc_bench_1_intel.log ├── nvm_cc_bench_1_sam.log ├── nvm_cc_bench_2_sam.log └── nvm_pattern_bench.log ├── benchmarks ├── array │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ └── settings.h ├── bfs │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── run_bfs.sh │ └── settings.h ├── block │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── settings.h │ └── test.sh ├── cache │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ └── settings.h ├── cc │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── run_cc.sh │ ├── scan.cu │ └── settings.h ├── iodepth-block │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── settings.h │ └── test.sh ├── pagerank │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── run_pg.sh │ └── settings.h ├── pattern │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── scan.cu │ ├── settings.h │ ├── zip │ ├── zip.h │ └── zip_test.cpp ├── readwrite │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ └── settings.h ├── reduction │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ └── settings.h ├── scan │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── scan.cu │ └── settings.h ├── sssp │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── run_sssp.sh │ └── settings.h ├── sssp_float │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ └── settings.h └── vectoradd │ ├── CMakeLists.txt │ ├── benchmark.sh │ ├── main.cu │ ├── scan.cu │ └── settings.h ├── deprecated ├── README.md.old ├── examples │ ├── README.md │ ├── identify │ │ ├── CMakeLists.txt │ │ ├── common.c │ │ ├── common.h │ │ ├── module.c │ │ ├── smartio.c │ │ └── userspace.c │ ├── integrity │ │ ├── CMakeLists.txt │ │ ├── integrity.c │ │ ├── integrity.h │ │ ├── transfer.c │ │ └── util.c │ ├── read-blocks │ │ ├── CMakeLists.txt │ │ ├── args.c │ │ ├── args.h │ │ ├── module.c │ │ ├── read.c │ │ ├── read.h │ │ └── smartio.c │ └── rpc │ │ ├── CMakeLists.txt │ │ ├── rpc_dd.c │ │ ├── rpc_flush.c │ │ ├── rpc_identify.c │ │ ├── rpc_server.c │ │ ├── segment.c │ │ ├── segment.h │ │ ├── util.c │ │ └── util.h └── fio │ └── fio_plugin.c ├── include ├── bafs_ptr.h ├── buffer.h ├── ctrl.h ├── event.h ├── host_util.h ├── nvm_admin.h ├── nvm_aq.h ├── nvm_cmd.h ├── nvm_ctrl.h ├── nvm_dma.h ├── nvm_error.h ├── nvm_io.h ├── nvm_parallel_queue.h ├── nvm_queue.h ├── nvm_rpc.h ├── nvm_types.h ├── nvm_util.h ├── page_cache.h ├── queue.h └── util.h ├── module ├── Makefile.in ├── ctrl.c ├── ctrl.h ├── list.c ├── list.h ├── map.c ├── map.h └── pci.c ├── scripts ├── bfs_run_emogi_nvme_frontier.sh ├── bfs_run_nvme.sh ├── bfs_run_nvme_scaling.sh ├── cc_run_nvme.sh ├── cc_run_nvme_scaling.sh ├── extrach.sh ├── fw_show_route ├── fw_user_routing ├── identify_hba.sh ├── run_bfs.sh ├── run_cc.sh ├── run_emogi.sh ├── sssp_run_nvme.sh ├── unbind.sh └── write_emogi_graph_nvme.sh └── src ├── admin.cpp ├── ctrl.cpp ├── dis ├── device.c ├── device.h ├── dma.c ├── interrupt.c ├── interrupt.h ├── map.h └── rpc.c ├── dma.cpp ├── dma.h ├── dprintf.h ├── error.cpp ├── lib_ctrl.h ├── lib_util.h ├── linux ├── device.cpp ├── dma.cpp ├── ioctl.h └── map.h ├── mutex.cpp ├── mutex.h ├── queue.cpp ├── regs.h ├── rpc.cpp └── rpc.h /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Screenshots** 20 | If applicable, add screenshots to help explain your problem. 21 | 22 | **Machine Setup (please complete the following information):** 23 | - OS 24 | - NVIDIA Driver, CUDA Versions, GPU name 25 | - SSD used 26 | 27 | **Additional context** 28 | Add any other context about the problem here. Add as many description as possible to help you out faster. This is a system's setup, knowing about the system is critical to understand the problem. 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # vim backup files 2 | .*.swp 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | 8 | # Library builds 9 | *.so 10 | 11 | # Intermediate files 12 | *.mod 13 | *.mod.c 14 | .*.cmd 15 | 16 | # Kernel symbols 17 | Module.symvers 18 | modules.order 19 | .tmp_versions/* 20 | 21 | # CUDA stuff 22 | *.i 23 | *.ii 24 | *.gpu 25 | *.ptx 26 | *.cubin 27 | *.fatbin 28 | 29 | # Profiling stuff 30 | *.prof 31 | *.nvprof 32 | 33 | # Ignore build directory 34 | build/* 35 | 36 | # Ignore cscope files 37 | cscope.out 38 | cscope.in.out 39 | cscope.po.out 40 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "include/freestanding"] 2 | path = include/freestanding 3 | url = https://github.com/ogiroux/freestanding 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributions are welcome! 2 | 3 | We believe in open-source code development and hence all contributions that helps to grow and build BaM are most welcome. Please note that the BaM is in early stage of development and rapidly iterated to support more functionalities as required. However, the goals of BaM system will remain in-tact while the codebase will be evolving over time. To this end, we request developers who are interested in contributing to frequently update the codebases. 4 | 5 | ## Reporting bugs and asking questions 6 | 7 | Please open a github issue request for posting questions, issues or feedbacks. We try to respond as early as possible. 8 | 9 | ## To Contribute 10 | 11 | Please create pull request for all contributions. We currently have not enabled CI/CD pipeline on BaM codebase and hence all code will be going through manual review process. If anyone is interested in building CI/CD pipeline, your contributions are most welcome. 12 | 13 | ### PR process 14 | Each PR will have an assignee and will engage with the contributor to merge the PR. Please actively ping assignees after you have addresses the comments requested. 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Jonas Markussen 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /asplosaoe/build_kernel_modules.log: -------------------------------------------------------------------------------- 1 | make -j 2 | make -C /lib/modules/5.8.0-63-generic/build M=/home/vsm2/work/bam/build2/module modules 3 | make[1]: Entering directory '/usr/src/linux-headers-5.8.0-63-generic' 4 | CC [M] /home/vsm2/work/bam/build2/module/pci.o 5 | CC [M] /home/vsm2/work/bam/build2/module/list.o 6 | CC [M] /home/vsm2/work/bam/build2/module/ctrl.o 7 | CC [M] /home/vsm2/work/bam/build2/module/map.o 8 | LD [M] /home/vsm2/work/bam/build2/module/libnvm.o 9 | MODPOST /home/vsm2/work/bam/build2/module/Module.symvers 10 | CC [M] /home/vsm2/work/bam/build2/module/libnvm.mod.o 11 | LD [M] /home/vsm2/work/bam/build2/module/libnvm.ko 12 | make[1]: Leaving directory '/usr/src/linux-headers-5.8.0-63-generic' 13 | 14 | -------------------------------------------------------------------------------- /asplosaoe/cmake.log: -------------------------------------------------------------------------------- 1 | $ cmake .. 2 | -- The CUDA compiler identification is NVIDIA 11.6.0 3 | -- The C compiler identification is GNU 9.4.0 4 | -- The CXX compiler identification is GNU 9.4.0 5 | -- Check for working CUDA compiler: /usr/local/cuda-11.6-nightly/bin/nvcc 6 | -- Check for working CUDA compiler: /usr/local/cuda-11.6-nightly/bin/nvcc -- works 7 | -- Detecting CUDA compiler ABI info 8 | -- Detecting CUDA compiler ABI info - done 9 | -- Check for working C compiler: /usr/bin/cc 10 | -- Check for working C compiler: /usr/bin/cc -- works 11 | -- Detecting C compiler ABI info 12 | -- Detecting C compiler ABI info - done 13 | -- Detecting C compile features 14 | -- Detecting C compile features - done 15 | -- Check for working CXX compiler: /usr/bin/c++ 16 | -- Check for working CXX compiler: /usr/bin/c++ -- works 17 | -- Detecting CXX compiler ABI info 18 | -- Detecting CXX compiler ABI info - done 19 | -- Detecting CXX compile features 20 | -- Detecting CXX compile features - done 21 | -- Looking for pthread.h 22 | -- Looking for pthread.h - found 23 | -- Performing Test CMAKE_HAVE_LIBC_PTHREAD 24 | -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed 25 | -- Check if compiler accepts -pthread 26 | -- Check if compiler accepts -pthread - yes 27 | -- Found Threads: TRUE 28 | -- Found CUDA: /usr/local/cuda-11.6-nightly (found suitable version "11.6", minimum required is "8.0") 29 | -- Using NVIDIA driver found in /usr/src/nvidia-470.141.03 30 | -- Not building FIO 31 | -- Configuring libnvm without SmartIO 32 | -- Configuring kernel module with CUDA 33 | -- Found CUDA: /usr/local/cuda-11.6-nightly (found suitable version "11.6", minimum required is "10.0") 34 | -- Found CUDA: /usr/local/cuda-11.6-nightly (found suitable version "11.6", minimum required is "8.0") 35 | -- Configuring done 36 | -- Generating done 37 | -- Build files have been written to: /home/vsm2/work/bam/build 38 | 39 | -------------------------------------------------------------------------------- /asplosaoe/nvm_array_bench_1_intel.log: -------------------------------------------------------------------------------- 1 | # With Single Intel Optane SSD wih 512B cacheline size (page_size) 2 | sudo ./bin/nvm-array-bench --threads=$((1024*1024)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024 --page_size=512 --gpu=0 --n_ctrls=1 --num_queues=128 --random=false -S 1 3 | SQs: 135 CQs: 135 n_qps: 128 4 | n_ranges_bits: 6 5 | n_ranges_mask: 63 6 | pages_dma: 0x7fc41c010000 58020410000 7 | HEREN 8 | Cond1 9 | 100000 8 8 20000 10 | Finish Making Page Cache 11 | finished creating cache 12 | finished creating range 13 | atlaunch kernel 14 | 0000:6E:00.0 15 | #READ IOs: 16384 #Accesses:1048576 #Misses:524288 Miss Rate:0.5 #Hits: 524288 Hit Rate:0.5 CLSize:512 16 | ********************************* 17 | Elapsed Time: 3572.74 Number of Read Ops: 1048576 Data Size (bytes): 8388608 18 | Read Ops/sec: 2.93494e+08 Effective Bandwidth(GB/S): 2.1867 19 | 20 | -------------------------------------------------------------------------------- /asplosaoe/nvm_array_bench_1_sam.log: -------------------------------------------------------------------------------- 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) 2 | sudo ./bin/nvm-array-bench --threads=$((1024*1024*128)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024 --page_size=4096 --gpu=0 --n_ctrls=1 --num_queues=128 --random=false 3 | SQs: 129 CQs: 129 n_qps: 128 4 | n_ranges_bits: 6 5 | n_ranges_mask: 63 6 | pages_dma: 0x7f3752010000 3f020410000 7 | HEREN 8 | Cond1 9 | 100000 8 1 100000 10 | Finish Making Page Cache 11 | finished creating cache 12 | finished creating range 13 | atlaunch kernel 14 | 0000:BA:00.0 15 | #READ IOs: 262144 #Accesses:134217728 #Misses:8388608 Miss Rate:0.0625 #Hits: 125829120 Hit Rate:0.9375 CLSize:4096 16 | ********************************* 17 | Elapsed Time: 525437 Number of Read Ops: 134217728 Data Size (bytes): 1073741824 18 | Read Ops/sec: 2.5544e+08 Effective Bandwidth(GB/S): 1.90318 19 | -------------------------------------------------------------------------------- /asplosaoe/nvm_array_bench_2_sam.log: -------------------------------------------------------------------------------- 1 | # With Two Samsung 980 pro SSD with 4KB cacheline size (page_size) 2 | sudo ./bin/nvm-array-bench --threads=$((1024*1024*128)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024 --page_size=4096 --gpu=0 --n_ctrls=2 --num_queues=128 --random=false 3 | SQs: 129 CQs: 129 n_qps: 128 4 | SQs: 129 CQs: 129 n_qps: 128 5 | n_ranges_bits: 6 6 | n_ranges_mask: 63 7 | pages_dma: 0x7f890e010000 3f040410000 8 | HEREN 9 | Cond1 10 | 100000 8 1 100000 11 | Finish Making Page Cache 12 | finished creating cache 13 | finished creating range 14 | atlaunch kernel 15 | 0000:BA:00.0 16 | #READ IOs: 262144 #Accesses:134217728 #Misses:8388608 Miss Rate:0.0625 #Hits: 125829120 Hit Rate:0.9375 CLSize:4096 17 | ********************************* 18 | Elapsed Time: 385223 Number of Read Ops: 134217728 Data Size (bytes): 1073741824 19 | Read Ops/sec: 3.48416e+08 Effective Bandwidth(GB/S): 2.5959 20 | 21 | -------------------------------------------------------------------------------- /asplosaoe/nvm_array_bench_two_asymetrical_ssds.log: -------------------------------------------------------------------------------- 1 | sudo ./bin/nvm-array-bench --threads=$((1024*1024)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024 --page_size=512 --gpu=0 --n_ctrls=2 --num_queues=128 --random=false 2 | SQs: 129 CQs: 129 n_qps: 128 3 | SQs: 135 CQs: 135 n_qps: 128 4 | n_ranges_bits: 6 5 | n_ranges_mask: 63 6 | pages_dma: 0x7f87bc010000 58040410000 7 | HEREN 8 | Cond1 9 | 100000 8 8 20000 10 | Finish Making Page Cache 11 | finished creating cache 12 | finished creating range 13 | atlaunch kernel 14 | 0000:6E:00.0 15 | #READ IOs: 16384 #Accesses:1048576 #Misses:524288 Miss Rate:0.5 #Hits: 524288 Hit Rate:0.5 CLSize:512 16 | ********************************* 17 | Elapsed Time: 13853.7 Number of Read Ops: 1048576 Data Size (bytes): 8388608 18 | Read Ops/sec: 7.56893e+07 Effective Bandwidth(GB/S): 0.563929 19 | 20 | -------------------------------------------------------------------------------- /asplosaoe/nvm_bfs_bench_1_intel.log: -------------------------------------------------------------------------------- 1 | # With Single Intel Optane SSD wih 4KB cacheline size (page_size) and 8GB BaM Cache. 2 | sudo ./bin/nvm-bfs-bench -f /home/vsm2/bafsdata/MOLIERE_2016.bel -l 240518168576 --impl_type 20 --memalloc 6 --src 13229860 --n_ctrls 1 -p 4096 --gpu 0 --threads 128 -C 8 -M $((8*1024*1024*1024)) -S 1 3 | /home/vsm2/bafsdata/MOLIERE_2016.bel 4 | File /home/vsm2/bafsdata/MOLIERE_2016.bel 5 | Vertex: 30239687, Edge: 6677301366 6 | Allocation finished 7 | page size: 4096, pc_entries: 2097152 tile_size:4096 8 | SQs: 135 CQs: 135 n_qps: 128 9 | Controllers Created 10 | GPUID: 0000:6E:00.0 11 | Initialization done. 12 | n_ranges_bits: 6 13 | n_ranges_mask: 63 14 | pages_dma: 0x7f0b7a010000 58020410000 15 | HEREN 16 | Cond1 17 | 200000 8 1 200000 18 | Finish Making Page Cache 19 | Page cache initialized 20 | run 0: src 13229860, iteration 15, time 11719.943359 ms 21 | #READ IOs: 16634480 #Accesses:632341981 #Misses:296080469 Miss Rate:0.468228 #Hits: 336261512 Hit Rate:0.531772 CLSize:4096 22 | ********************************* 23 | 24 | BFS-0 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 1 Coarse: 8 AvgTime 11719.943359 ms 25 | run 1: src 13229860, iteration 15, time 11738.077148 ms 26 | #READ IOs: 16632603 #Accesses:632341981 #Misses:295874787 Miss Rate:0.467903 #Hits: 336467194 Hit Rate:0.532097 CLSize:4096 27 | ********************************* 28 | 29 | BFS-1 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 1 Coarse: 8 AvgTime 11738.077148 ms 30 | 31 | -------------------------------------------------------------------------------- /asplosaoe/nvm_bfs_bench_1_sam.log: -------------------------------------------------------------------------------- 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache. 2 | sudo ./bin/nvm-bfs-bench -f /home/vsm2/bafsdata/MOLIERE_2016.bel -l 240518168576 --impl_type 20 --memalloc 6 --src 13229860 --n_ctrls 1 -p 4096 --gpu 0 --threads 128 -C 8 -M $((8*1024*1024*1024)) 3 | /home/vsm2/bafsdata/MOLIERE_2016.bel 4 | File /home/vsm2/bafsdata/MOLIERE_2016.bel 5 | Vertex: 30239687, Edge: 6677301366 6 | Allocation finished 7 | page size: 4096, pc_entries: 2097152 tile_size:4096 8 | SQs: 129 CQs: 129 n_qps: 128 9 | Controllers Created 10 | GPUID: 0000:6E:00.0 11 | Initialization done. 12 | n_ranges_bits: 6 13 | n_ranges_mask: 63 14 | pages_dma: 0x7f2d9a010000 58020410000 15 | HEREN 16 | Cond1 17 | 200000 8 1 200000 18 | Finish Making Page Cache 19 | Page cache initialized 20 | run 0: src 13229860, iteration 15, time 30770.189453 ms 21 | #READ IOs: 16634497 #Accesses:632341981 #Misses:295996204 Miss Rate:0.468095 #Hits: 336345777 Hit Rate:0.531905 CLSize:4096 22 | ********************************* 23 | 24 | BFS-0 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 1 Coarse: 8 AvgTime 30770.189453 ms 25 | run 1: src 13229860, iteration 15, time 28861.617188 ms 26 | #READ IOs: 16632578 #Accesses:632341981 #Misses:296246135 Miss Rate:0.46849 #Hits: 336095846 Hit Rate:0.53151 CLSize:4096 27 | ********************************* 28 | 29 | BFS-1 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 1 Coarse: 8 AvgTime 28861.617188 ms 30 | 31 | -------------------------------------------------------------------------------- /asplosaoe/nvm_bfs_bench_2_sam.log: -------------------------------------------------------------------------------- 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache. 2 | sudo ./bin/nvm-bfs-bench -f /home/vsm2/bafsdata/MOLIERE_2016.bel -l 240518168576 --impl_type 20 --memalloc 6 --src 13229860 --n_ctrls 2 -p 4096 --gpu 1 --threads 128 -C 8 -M $((8*1024*1024*1024)) 3 | /home/vsm2/bafsdata/MOLIERE_2016.bel 4 | File /home/vsm2/bafsdata/MOLIERE_2016.bel 5 | Vertex: 30239687, Edge: 6677301366 6 | Allocation finished 7 | page size: 4096, pc_entries: 2097152 tile_size:4096 8 | SQs: 129 CQs: 129 n_qps: 128 9 | SQs: 129 CQs: 129 n_qps: 128 10 | Controllers Created 11 | GPUID: 0000:A6:00.0 12 | Initialization done. 13 | n_ranges_bits: 6 14 | n_ranges_mask: 63 15 | pages_dma: 0x7f5a66010000 48040410000 16 | HEREN 17 | Cond1 18 | 200000 8 1 200000 19 | Finish Making Page Cache 20 | Page cache initialized 21 | run 0: src 13229860, iteration 15, time 18059.087891 ms 22 | #READ IOs: 16634530 #Accesses:632341981 #Misses:295866151 Miss Rate:0.467889 #Hits: 336475830 Hit Rate:0.532111 CLSize:4096 23 | ********************************* 24 | 25 | BFS-0 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel Impl: 20 SSD: 2 CL: 4096 Cache: 8589934592 Stride: 1 Coarse: 8 AvgTime 18059.087891 ms 26 | run 1: src 13229860, iteration 15, time 15257.948242 ms 27 | #READ IOs: 16632591 #Accesses:632341981 #Misses:296056548 Miss Rate:0.468191 #Hits: 336285433 Hit Rate:0.531809 CLSize:4096 28 | ********************************* 29 | 30 | BFS-1 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel Impl: 20 SSD: 2 CL: 4096 Cache: 8589934592 Stride: 1 Coarse: 8 AvgTime 15257.948242 ms 31 | -------------------------------------------------------------------------------- /asplosaoe/nvm_block_bench_1_intel.log: -------------------------------------------------------------------------------- 1 | # With Single Intel Optane SSD wih 512B cacheline size (page_size) 2 | sudo ./bin/nvm-block-bench --threads=262144 --blk_size=64 --reqs=1 --pages=262144 --queue_depth=1024 --page_size=512 --num_blks=2097152 --gpu=0 --n_ctrls=1 --num_queues=128 --random=true -S 1 3 | SQs: 135 CQs: 135 n_qps: 128 4 | n_ranges_bits: 6 5 | n_ranges_mask: 63 6 | pages_dma: 0x7f8afc010000 58020410000 7 | HEREN 8 | Cond1 9 | 40000 8 8 8000 10 | Finish Making Page Cache 11 | finished creating cache 12 | 0000:6E:00.0 13 | atlaunch kernel 14 | Elapsed Time: 53459.9 Number of Ops: 262144 Data Size (bytes): 134217728 15 | Ops/sec: 4.90356e+06 Effective Bandwidth(GB/S): 2.3382 16 | 17 | 18 | -------------------------------------------------------------------------------- /asplosaoe/nvm_block_bench_1_sam.log: -------------------------------------------------------------------------------- 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) 2 | sudo ./bin/nvm-block-bench --threads=262144 --blk_size=64 --reqs=1 --pages=262144 --queue_depth=1024 --page_size=4096 --num_blks=2097152 --gpu=0 --n_ctrls=1 --num_queues=128 --random=true 3 | SQs: 129 CQs: 129 n_qps: 128 4 | n_ranges_bits: 6 5 | n_ranges_mask: 63 6 | pages_dma: 0x7f09e4010000 3f020410000 7 | HEREN 8 | Cond1 9 | 40000 8 1 40000 10 | Finish Making Page Cache 11 | finished creating cache 12 | 0000:BA:00.0 13 | atlaunch kernel 14 | Elapsed Time: 328288 Number of Ops: 262144 Data Size (bytes): 1073741824 15 | Ops/sec: 798519 Effective Bandwidth(GB/S): 3.04611 16 | 17 | -------------------------------------------------------------------------------- /asplosaoe/nvm_block_bench_2_sam.log: -------------------------------------------------------------------------------- 1 | # With Two Samsung 980 pro SSD with 4KB cacheline size (page_size) 2 | sudo ./bin/nvm-block-bench --threads=262144 --blk_size=64 --reqs=1 --pages=262144 --queue_depth=1024 --page_size=4096 --num_blks=2097152 --gpu=0 --n_ctrls=2 --num_queues=128 --random=true 3 | SQs: 129 CQs: 129 n_qps: 128 4 | SQs: 129 CQs: 129 n_qps: 128 5 | n_ranges_bits: 6 6 | n_ranges_mask: 63 7 | pages_dma: 0x7fd0ca010000 3f040410000 8 | HEREN 9 | Cond1 10 | 40000 8 1 40000 11 | Finish Making Page Cache 12 | finished creating cache 13 | 0000:BA:00.0 14 | atlaunch kernel 15 | Elapsed Time: 172061 Number of Ops: 262144 Data Size (bytes): 1073741824 16 | Ops/sec: 1.52355e+06 Effective Bandwidth(GB/S): 5.81188 17 | -------------------------------------------------------------------------------- /asplosaoe/nvm_cache_bench.log: -------------------------------------------------------------------------------- 1 | 2 | 3 | sudo ./bin/nvm-cache-bench -k 1 -p 2097152 -P 4096 -n 1 -t 1048576 -b 64 -d 1024 -q 135 -T 1 -e 8589934592 --gpu=0 -r 0 4 | SQs: 135 CQs: 135 n_qps: 135 5 | n_ranges_bits: 6 6 | n_ranges_mask: 63 7 | pages_dma: 0x7f6022010000 58022010000 8 | HEREN 9 | Cond1 10 | 200000 8 1 200000 11 | Finish Making Page Cache 12 | finished creating cache 13 | finished creating range 14 | n_elems_per_page: 200 15 | atlaunch kernel 16 | 0000:6E:00.0 17 | #READ IOs: 32768 #Accesses:1048576 #Misses:1048576 Miss Rate:1 #Hits: 0 Hit Rate:0 CLSize:4096 18 | ********************************* 19 | Itr:0 type: 0 Elapsed Time: 21351.4 Number of Read Ops: 16777216 Data Size (bytes): 134217728Read Ops/sec: 7.85766e+08 Effective Bandwidth(GB/S): 5.85441 20 | ID:0 type:0 n_warps:32768 n_pages_per_warp: 1 n_elems_per_page:512 ios: 16777216 IOPs: 785765697.813752 data:134217728 bandwidth: 5.854411 GBps time: 21351.423264 21 | #READ IOs: 0 #Accesses:1048576 #Misses:0 Miss Rate:0 #Hits: 1048576 Hit Rate:1 CLSize:4096 22 | ********************************* 23 | Itr:1 type: 0 Elapsed Time: 738.304 Number of Read Ops: 16777216 Data Size (bytes): 134217728Read Ops/sec: 2.2724e+10 Effective Bandwidth(GB/S): 169.307 24 | ID:1 type:0 n_warps:32768 n_pages_per_warp: 1 n_elems_per_page:512 ios: 16777216 IOPs: 22723993868.146492 data:134217728 bandwidth: 169.306948 GBps time: 738.304019 25 | 26 | -------------------------------------------------------------------------------- /asplosaoe/nvm_cc_bench_1_intel.log: -------------------------------------------------------------------------------- 1 | # With Single Intel Optane SSD wih 4KB cacheline size (page_size) and 8GB BaM Cache. 2 | sudo ./bin/nvm-cc-bench -f /home/vsm2/bafsdata/GAP-kron.bel -l 0 --impl_type 20 --memalloc 6 --src 58720242 --n_ctrls 1 -p 4096 --gpu 0 --threads 128 -M $((8*1024*1024*1024)) -P 128 -C 8 -S 1 3 | /home/vsm2/bafsdata/GAP-kron.bel 4 | File /home/vsm2/bafsdata/GAP-kron.bel 5 | Vertex: 134217726, Edge: 4223264644 6 | Allocation finished 7 | page size: 4096, pc_entries: 2097152 8 | SQs: 135 CQs: 135 n_qps: 128 9 | Controllers Created 10 | Initialization done 11 | n_ranges_bits: 6 12 | n_ranges_mask: 63 13 | pages_dma: 0x7fb9d8010000 58020410000 14 | HEREN 15 | Cond1 16 | 200000 8 1 200000 17 | Finish Making Page Cache 18 | Page cache initialized 19 | Hash Stride: 128 Coarse: 8 20 | total cc iterations: 4 21 | total components: 71164263 22 | total time: 10821.237305 ms 23 | #READ IOs: 16524764 #Accesses:1270205174 #Misses:354511917 Miss Rate:0.279098 #Hits: 915693257 Hit Rate:0.720902 CLSize:4096 24 | ********************************* 25 | 26 | CC 0 Graph:/home/vsm2/bafsdata/GAP-kron.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 128 Coarse: 8 TotalTime 10821.237305 ms 27 | Hash Stride: 128 Coarse: 8 28 | total cc iterations: 4 29 | total components: 71164263 30 | total time: 10806.666992 ms 31 | #READ IOs: 16502729 #Accesses:1270205148 #Misses:353968164 Miss Rate:0.27867 #Hits: 916236984 Hit Rate:0.72133 CLSize:4096 32 | ********************************* 33 | 34 | CC 1 Graph:/home/vsm2/bafsdata/GAP-kron.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 128 Coarse: 8 TotalTime 10806.666992 ms 35 | 36 | -------------------------------------------------------------------------------- /asplosaoe/nvm_cc_bench_1_sam.log: -------------------------------------------------------------------------------- 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache. 2 | sudo ./bin/nvm-cc-bench -f /home/vsm2/bafsdata/GAP-kron.bel -l 0 --impl_type 20 --memalloc 6 --src 58720242 --n_ctrls 1 -p 4096 --gpu 1 --threads 128 -M $((8*1024*1024*1024)) -P 128 -C 8 3 | /home/vsm2/bafsdata/GAP-kron.bel 4 | File /home/vsm2/bafsdata/GAP-kron.bel 5 | Vertex: 134217726, Edge: 4223264644 6 | Allocation finished 7 | page size: 4096, pc_entries: 2097152 8 | SQs: 129 CQs: 129 n_qps: 128 9 | Controllers Created 10 | Initialization done 11 | n_ranges_bits: 6 12 | n_ranges_mask: 63 13 | pages_dma: 0x7fbcd8010000 48020410000 14 | HEREN 15 | Cond1 16 | 200000 8 1 200000 17 | Finish Making Page Cache 18 | Page cache initialized 19 | total cc iterations: 4 20 | total components: 71164263 21 | total time: 25175.816406 ms 22 | #READ IOs: 16524749 #Accesses:1270205169 #Misses:354066640 Miss Rate:0.278748 #Hits: 916138529 Hit Rate:0.721252 CLSize:4096 23 | ********************************* 24 | 25 | CC 0 Graph:/home/vsm2/bafsdata/GAP-kron.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 128 Coarse: 8 TotalTime 25175.816406 ms 26 | total cc iterations: 4 27 | total components: 71164263 28 | total time: 25073.535156 ms 29 | #READ IOs: 16502707 #Accesses:1270205134 #Misses:353845039 Miss Rate:0.278573 #Hits: 916360095 Hit Rate:0.721427 CLSize:4096 30 | ********************************* 31 | 32 | CC 1 Graph:/home/vsm2/bafsdata/GAP-kron.bel Impl: 20 SSD: 1 CL: 4096 Cache: 8589934592 Stride: 128 Coarse: 8 TotalTime 25073.535156 ms 33 | 34 | 35 | -------------------------------------------------------------------------------- /asplosaoe/nvm_cc_bench_2_sam.log: -------------------------------------------------------------------------------- 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache. 2 | sudo ./bin/nvm-cc-bench -f /home/vsm2/bafsdata/GAP-kron.bel -l 0 --impl_type 20 --memalloc 6 --src 58720242 --n_ctrls 2 -p 4096 --gpu 1 --threads 128 -M $((8*1024*1024*1024)) -P 128 -C 8 3 | /home/vsm2/bafsdata/GAP-kron.bel 4 | File /home/vsm2/bafsdata/GAP-kron.bel 5 | Vertex: 134217726, Edge: 4223264644 6 | Allocation finished 7 | page size: 4096, pc_entries: 2097152 8 | SQs: 129 CQs: 129 n_qps: 128 9 | SQs: 129 CQs: 129 n_qps: 128 10 | Controllers Created 11 | Initialization done 12 | n_ranges_bits: 6 13 | n_ranges_mask: 63 14 | pages_dma: 0x7f8164010000 48040410000 15 | HEREN 16 | Cond1 17 | 200000 8 1 200000 18 | Finish Making Page Cache 19 | Page cache initialized 20 | total cc iterations: 4 21 | total components: 71164263 22 | total time: 18494.630859 ms 23 | #READ IOs: 16524763 #Accesses:1270205171 #Misses:354401603 Miss Rate:0.279011 #Hits: 915803568 Hit Rate:0.720989 CLSize:4096 24 | ********************************* 25 | 26 | CC 0 Graph:/home/vsm2/bafsdata/GAP-kron.bel Impl: 20 SSD: 2 CL: 4096 Cache: 8589934592 Stride: 128 Coarse: 8 TotalTime 18494.630859 ms 27 | total cc iterations: 4 28 | total components: 71164263 29 | total time: 16683.892578 ms 30 | #READ IOs: 16502832 #Accesses:1270205224 #Misses:353816610 Miss Rate:0.278551 #Hits: 916388614 Hit Rate:0.721449 CLSize:4096 31 | ********************************* 32 | 33 | CC 1 Graph:/home/vsm2/bafsdata/GAP-kron.bel Impl: 20 SSD: 2 CL: 4096 Cache: 8589934592 Stride: 128 Coarse: 8 TotalTime 16683.892578 ms 34 | -------------------------------------------------------------------------------- /asplosaoe/nvm_pattern_bench.log: -------------------------------------------------------------------------------- 1 | 2 | sudo ./bin/nvm-pattern-bench --input_a=/home/vsm2/bafsdata/GAP-kron.bel --memalloc=6 --threads=4194304 --n_elems=137438953472 --impl_type=3 --blk_size=128 --queue_depth=1024 --num_queues=135 --page_size=4096 --n_ctrls=1 3 | A: /home/vsm2/bafsdata/GAP-kron.bel.dst 4 | Total elements: 137438953472 5 | n_warps: 131072 numblocks:65536 6 | page size: 4096, pc_entries: 2097152 7 | Allocation finished 8 | SQs: 135 CQs: 135 n_qps: 135 9 | Controllers Created 10 | Initialization done 11 | n_ranges_bits: 6 12 | n_ranges_mask: 63 13 | pages_dma: 0x7efc30010000 58022010000 14 | HEREN 15 | Cond1 16 | 200000 8 1 200000 17 | Finish Making Page Cache 18 | Page cache initialized 19 | val in gpu: 1663758813 20 | #READ IOs: 131072 #Accesses:4194304 #Misses:4194304 Miss Rate:1 #Hits: 0 Hit Rate:0 CLSize:4096 21 | ********************************* 22 | P:0 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 797566000.672973 data:536870912 bandwidth: 5.942330 GBps avgiops: inf avgbandwidth: inf 23 | val in gpu: 915100374 24 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 25 | ********************************* 26 | P:1 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 49799391758.371330 data:536870912 bandwidth: 371.034382 GBps avgiops: 49799391758.371330 avgbandwidth: 371.034382 27 | val in gpu: 1624408235 28 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 29 | ********************************* 30 | P:2 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50451116401.548798 data:536870912 bandwidth: 375.890109 GBps avgiops: 50123133430.321938 avgbandwidth: 373.446445 31 | val in gpu: 2059900224 32 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 33 | ********************************* 34 | P:3 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50334868706.078766 data:536870912 bandwidth: 375.023996 GBps avgiops: 50193512158.904495 avgbandwidth: 373.970808 35 | val in gpu: 1631163370 36 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 37 | ********************************* 38 | P:4 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 49989322239.155258 data:536870912 bandwidth: 372.449474 GBps avgiops: 50142308454.389557 avgbandwidth: 373.589310 39 | val in gpu: 1097009717 40 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 41 | ********************************* 42 | P:5 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 49610899066.537041 data:536870912 bandwidth: 369.630002 GBps avgiops: 50035116879.830650 avgbandwidth: 372.790671 43 | val in gpu: 1091665284 44 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 45 | ********************************* 46 | P:6 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50257667401.372787 data:536870912 bandwidth: 374.448802 GBps avgiops: 50072073142.439964 avgbandwidth: 373.066017 47 | val in gpu: 344527228 48 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 49 | ********************************* 50 | P:7 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50373558369.068062 data:536870912 bandwidth: 375.312257 GBps avgiops: 50114918776.908516 avgbandwidth: 373.385241 51 | val in gpu: 774151880 52 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 53 | ********************************* 54 | P:8 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50373558369.068062 data:536870912 bandwidth: 375.312257 GBps avgiops: 50147105619.755478 avgbandwidth: 373.625052 55 | val in gpu: 310116501 56 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 57 | ********************************* 58 | P:9 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50027482318.761513 data:536870912 bandwidth: 372.733789 GBps avgiops: 50133783914.451157 avgbandwidth: 373.525798 59 | val in gpu: 1850572170 60 | #READ IOs: 0 #Accesses:4194304 #Misses:0 Miss Rate:0 #Hits: 4194304 Hit Rate:1 CLSize:4096 61 | ********************************* 62 | P:10 Impl: 3 SSD: 1 n_warps:131072 n_pages_per_warp: 1 n_elems_per_page:512 ios: 67108864 IOPs: 50296238429.055405 data:536870912 bandwidth: 374.736178 GBps avgiops: 50149982572.562210 avgbandwidth: 373.646487 63 | 64 | -------------------------------------------------------------------------------- /benchmarks/array/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-benchmarks) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | find_package (CUDA 8.0 REQUIRED) 8 | 9 | 10 | 11 | make_benchmark (array-benchmark-module array-bench "main.cu;") 12 | 13 | make_benchmark_choice (array-benchmark array-benchmark-smartio array-benchmark-module) 14 | -------------------------------------------------------------------------------- /benchmarks/array/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | 5 | P=4096 6 | R=1 7 | B=1024 8 | G=2 9 | for C in 1 2 3 4 5 6 7 10 | do 11 | echo "++++++++++++++++++ $C Controllers ++++++++++++++++++" 12 | for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 13 | do 14 | echo "------------------ $T Threads ------------------" 15 | ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO" 16 | done 17 | 18 | done 19 | -------------------------------------------------------------------------------- /benchmarks/bfs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-benchmarks) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | find_package (CUDA 8.0 REQUIRED) 8 | 9 | 10 | 11 | make_benchmark (bfs-benchmark-module bfs-bench "main.cu;") 12 | 13 | make_benchmark_choice (bfs-benchmark bfs-benchmark-smartio bfs-benchmark-module) 14 | -------------------------------------------------------------------------------- /benchmarks/bfs/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | 5 | P=4096 6 | R=1 7 | B=1024 8 | G=2 9 | for C in 1 2 3 4 5 6 7 10 | do 11 | echo "++++++++++++++++++ $C Controllers ++++++++++++++++++" 12 | for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 13 | do 14 | echo "------------------ $T Threads ------------------" 15 | ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO" 16 | done 17 | 18 | done 19 | -------------------------------------------------------------------------------- /benchmarks/bfs/run_bfs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | if [ $# -ne 3 ] 5 | then 6 | echo Usage $0 numssd gpuid tbsize && exit 1 7 | fi 8 | 9 | 10 | #Initialize set of files are taken from EMOGI and graphBIG. 11 | 12 | NUMDATASET=6 13 | declare -a GraphFileArray=( 14 | "/home/vsm2/bafsdata/GAP-kron.bel" 15 | "/home/vsm2/bafsdata/GAP-urand.bel" 16 | "/home/vsm2/bafsdata/com-Friendster.bel" 17 | "/home/vsm2/bafsdata/MOLIERE_2016.bel" 18 | "/home/vsm2/bafsdata/uk-2007-05.bel" 19 | "/home/vsm2/bafsdata/sk-2005.bel" 20 | ) 21 | declare -a GraphFileOffset=( 22 | "$((1024*1024*1024*0))" 23 | "$((1024*1024*1024*64))" 24 | "$((1024*1024*1024*160))" 25 | "$((1024*1024*1024*224))" 26 | "$((1024*1024*1024*320))" 27 | "$((1024*1024*1024*384))" 28 | ) 29 | 30 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}" 31 | 32 | declare -a GraphRootNode=( 33 | "58720242" 34 | "58720256" 35 | "28703654" 36 | "13229860" 37 | "46329738" 38 | "37977096" 39 | ) 40 | 41 | 42 | 43 | 44 | CTRL=$1 45 | MEMTYPE=6 #BAFS_DIRECT 46 | GPU=$2 47 | TB=128 48 | 49 | for ((gfid=0; gfid zipf(300); 24 | * 25 | * for (int i = 0; i < 100; i++) 26 | * printf("draw %d %d\n", i, zipf(gen)); 27 | */ 28 | 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | 37 | template 38 | class zipf_distribution 39 | { 40 | public: 41 | typedef IntType result_type; 42 | 43 | static_assert(std::numeric_limits::is_integer, ""); 44 | static_assert(!std::numeric_limits::is_integer, ""); 45 | 46 | /// zipf_distribution(N, s, q) 47 | /// Zipf distribution for `N` items, in the range `[1,N]` inclusive. 48 | /// The distribution follows the power-law 1/(n+q)^s with exponent 49 | /// `s` and Hurwicz q-deformation `q`. 50 | zipf_distribution(const IntType n=std::numeric_limits::max(), 51 | const RealType s=1.0, 52 | const RealType q=0.0) 53 | : n(n) 54 | , _s(s) 55 | , _q(q) 56 | , oms(1.0-s) 57 | , spole(abs(oms) < epsilon) 58 | , rvs(spole ? 0.0 : 1.0/oms) 59 | , H_x1(H(1.5) - h(1.0)) 60 | , H_n(H(n + 0.5)) 61 | , cut(1.0 - H_inv(H(1.5) - h(1.0))) 62 | , dist(H_x1, H_n) 63 | { 64 | if (-0.5 >= q) 65 | throw std::runtime_error("Range error: Parameter q must be greater than -0.5!"); 66 | } 67 | void reset() {} 68 | 69 | IntType operator()(std::mt19937& rng) 70 | { 71 | while (true) 72 | { 73 | const RealType u = dist(rng); 74 | const RealType x = H_inv(u); 75 | const IntType k = std::round(x); 76 | if (k - x <= cut) return k; 77 | if (u >= H(k + 0.5) - h(k)) 78 | return k; 79 | } 80 | } 81 | 82 | /// Returns the parameter the distribution was constructed with. 83 | RealType s() const { return _s; } 84 | /// Returns the Hurwicz q-deformation parameter. 85 | RealType q() const { return _q; } 86 | /// Returns the minimum value potentially generated by the distribution. 87 | result_type min() const { return 1; } 88 | /// Returns the maximum value potentially generated by the distribution. 89 | result_type max() const { return n; } 90 | 91 | 92 | private: 93 | IntType n; ///< Number of elements 94 | RealType _s; ///< Exponent 95 | RealType _q; ///< Deformation 96 | RealType oms; ///< 1-s 97 | bool spole; ///< true if s near 1.0 98 | RealType rvs; ///< 1/(1-s) 99 | RealType H_x1; ///< H(x_1) 100 | RealType H_n; ///< H(n) 101 | RealType cut; ///< rejection cut 102 | std::uniform_real_distribution dist; ///< [H(x_1), H(n)] 103 | 104 | // This provides 16 decimal places of precision, 105 | // i.e. good to (epsilon)^4 / 24 per expanions log, exp below. 106 | static constexpr RealType epsilon = 2e-5; 107 | 108 | /** (exp(x) - 1) / x */ 109 | static double 110 | expxm1bx(const double x) 111 | { 112 | if (std::abs(x) > epsilon) 113 | return std::expm1(x) / x; 114 | return (1.0 + x/2.0 * (1.0 + x/3.0 * (1.0 + x/4.0))); 115 | } 116 | 117 | /** log(1 + x) / x */ 118 | static RealType 119 | log1pxbx(const RealType x) 120 | { 121 | if (std::abs(x) > epsilon) 122 | return std::log1p(x) / x; 123 | return 1.0 - x * ((1/2.0) - x * ((1/3.0) - x * (1/4.0))); 124 | } 125 | /** 126 | * The hat function h(x) = 1/(x+q)^s 127 | */ 128 | const RealType h(const RealType x) 129 | { 130 | return std::pow(x + _q, -_s); 131 | } 132 | 133 | /** 134 | * H(x) is an integral of h(x). 135 | * H(x) = [(x+q)^(1-s) - (1+q)^(1-s)] / (1-s) 136 | * and if s==1 then 137 | * H(x) = log(x+q) - log(1+q) 138 | * 139 | * Note that the numerator is one less than in the paper 140 | * order to work with all s. Unfortunately, the naive 141 | * implementation of the above hits numerical underflow 142 | * when q is larger than 10 or so, so we split into 143 | * different regimes. 144 | * 145 | * When q != 0, we shift back to what the paper defined: 146 | 147 | * H(x) = (x+q)^{1-s} / (1-s) 148 | * and for q != 0 and also s==1, use 149 | * H(x) = [exp{(1-s) log(x+q)} - 1] / (1-s) 150 | */ 151 | const RealType H(const RealType x) 152 | { 153 | if (not spole) 154 | return std::pow(x + _q, oms) / oms; 155 | 156 | const RealType log_xpq = std::log(x + _q); 157 | return log_xpq * expxm1bx(oms * log_xpq); 158 | } 159 | 160 | /** 161 | * The inverse function of H(x). 162 | * H^{-1}(y) = [(1-s)y + (1+q)^{1-s}]^{1/(1-s)} - q 163 | * Same convergence issues as above; two regimes. 164 | * 165 | * For s far away from 1.0 use the paper version 166 | * H^{-1}(y) = -q + (y(1-s))^{1/(1-s)} 167 | */ 168 | const RealType H_inv(const RealType y) 169 | { 170 | if (not spole) 171 | return std::pow(y * oms, rvs) - _q; 172 | 173 | return std::exp(y * log1pxbx(oms * y)) - _q; 174 | } 175 | }; 176 | 177 | #endif 178 | -------------------------------------------------------------------------------- /benchmarks/pattern/zip_test.cpp: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Zipf (Zeta) random distribution. 4 | * 5 | * Implementation taken from drobilla's May 24, 2017 answer to 6 | * https://stackoverflow.com/questions/9983239/how-to-generate-zipf-distributed-numbers-efficiently 7 | * 8 | * That code is referenced with this: 9 | * "Rejection-inversion to generate variates from monotone discrete 10 | * distributions", Wolfgang Hörmann and Gerhard Derflinger 11 | * ACM TOMACS 6.3 (1996): 169-184 12 | * 13 | * Note that the Hörmann & Derflinger paper, and the stackoverflow 14 | * code base incorrectly names the paramater as `q`, when they mean `s`. 15 | * Thier `q` has nothing to do with the q-series. The names in the code 16 | * below conform to conventions. 17 | * 18 | * Example usage: 19 | * 20 | * std::random_device rd; 21 | * std::mt19937 gen(rd()); 22 | * zipf_distribution<> zipf(300); 23 | * 24 | * for (int i = 0; i < 100; i++) 25 | * printf("draw %d %d\n", i, zipf(gen)); 26 | */ 27 | 28 | 29 | #include 30 | #include 31 | 32 | 33 | int main(){ 34 | 35 | std::random_device rd; 36 | std::mt19937 gen(rd()); 37 | zipf_distribution zipf(100, 0.5); //number of unique keys 38 | 39 | for (int i = 0; i < 10000; i++) //number of values to draw from the unique keys. 40 | printf("i: %d val: %llu\n", i, zipf(gen)); 41 | } 42 | -------------------------------------------------------------------------------- /benchmarks/readwrite/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.10) 2 | project (libnvm-benchmarks) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | find_package (CUDA 10.0 REQUIRED) 8 | 9 | 10 | 11 | make_benchmark (readwrite-benchmark-module readwrite-bench "main.cu;") 12 | 13 | make_benchmark_choice (readwrite-benchmark readwrite-benchmark-smartio readwrite-benchmark-module) 14 | -------------------------------------------------------------------------------- /benchmarks/readwrite/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | 5 | P=512 6 | R=1 7 | B=1024 8 | G=2 9 | R=true 10 | A=2 11 | RT=50 12 | for C in 1 2 3 4 5 6 7 13 | do 14 | echo "++++++++++++++++++ $C Controllers ++++++++++++++++++" 15 | for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 16 | do 17 | echo "------------------ $T Threads ------------------" 18 | ../../build/bin/nvm-block-bench --threads=$T --blk_size=$B --reqs=1 --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G --access_type=$A --ratio=$RT --num_blks=$B --random=$R | grep "Ops" 19 | done 20 | 21 | done 22 | -------------------------------------------------------------------------------- /benchmarks/reduction/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-benchmarks) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | find_package (CUDA 8.0 REQUIRED) 8 | 9 | 10 | 11 | make_benchmark (reduction-benchmark-module reduction-bench "main.cu;") 12 | 13 | make_benchmark_choice (reduction-benchmark reduction-benchmark-smartio reduction-benchmark-module) 14 | -------------------------------------------------------------------------------- /benchmarks/reduction/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | 5 | P=4096 6 | R=1 7 | B=1024 8 | G=2 9 | for C in 1 2 3 4 5 6 7 10 | do 11 | echo "++++++++++++++++++ $C Controllers ++++++++++++++++++" 12 | for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 13 | do 14 | echo "------------------ $T Threads ------------------" 15 | ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO" 16 | done 17 | 18 | done 19 | -------------------------------------------------------------------------------- /benchmarks/scan/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-benchmarks) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | find_package (CUDA 8.0 REQUIRED) 8 | 9 | 10 | 11 | make_benchmark (scan-benchmark-module scan-bench "main.cu;") 12 | 13 | make_benchmark_choice (scan-benchmark scan-benchmark-smartio scan-benchmark-module) 14 | -------------------------------------------------------------------------------- /benchmarks/scan/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | 5 | P=4096 6 | R=1 7 | B=1024 8 | G=2 9 | for C in 1 2 3 4 5 6 7 10 | do 11 | echo "++++++++++++++++++ $C Controllers ++++++++++++++++++" 12 | for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 13 | do 14 | echo "------------------ $T Threads ------------------" 15 | ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO" 16 | done 17 | 18 | done 19 | -------------------------------------------------------------------------------- /benchmarks/sssp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-benchmarks) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | find_package (CUDA 8.0 REQUIRED) 8 | 9 | 10 | 11 | make_benchmark (sssp-benchmark-module sssp-bench "main.cu;") 12 | 13 | make_benchmark_choice (sssp-benchmark sssp-benchmark-smartio sssp-benchmark-module) 14 | -------------------------------------------------------------------------------- /benchmarks/sssp/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eux 3 | 4 | 5 | P=4096 6 | R=1 7 | B=1024 8 | G=2 9 | for C in 1 2 3 4 5 6 7 10 | do 11 | echo "++++++++++++++++++ $C Controllers ++++++++++++++++++" 12 | for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 13 | do 14 | echo "------------------ $T Threads ------------------" 15 | ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO" 16 | done 17 | 18 | done 19 | -------------------------------------------------------------------------------- /benchmarks/sssp/run_sssp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | if [ $# -ne 3 ] 5 | then 6 | echo Usage $0 numssd gpuid tbsize && exit 1 7 | fi 8 | 9 | 10 | NUMDATASET=5 #1 dataset MOLIERE is a floating type. Will be used separately later. 11 | declare -a GraphFileArray=( 12 | "/home/vsm2/bafsdata/GAP-kron.bel" 13 | "/home/vsm2/bafsdata/GAP-urand.bel" 14 | "/home/vsm2/bafsdata/com-Friendster.bel" 15 | "/home/vsm2/bafsdata/uk-2007-05.bel" 16 | "/home/vsm2/bafsdata/sk-2005.bel" 17 | "/home/vsm2/bafsdata/MOLIERE_2016.bel" 18 | ) 19 | declare -a GraphFileOffset=( 20 | "$((1024*1024*1024*0))" 21 | "$((1024*1024*1024*64))" 22 | "$((1024*1024*1024*160))" 23 | "$((1024*1024*1024*320))" 24 | "$((1024*1024*1024*384))" 25 | "$((1024*1024*1024*224))" 26 | ) 27 | 28 | declare -a GraphWeightOffset=( 29 | "$((1024*1024*1024*32))" 30 | "$((1024*1024*1024*128))" 31 | "$((1024*1024*1024*192))" 32 | "$((1024*1024*1024*352))" 33 | "$((1024*1024*1024*416))" 34 | "$((1024*1024*1024*288))" 35 | ) 36 | 37 | 38 | declare -a GraphRootNode=( 39 | "58720242" 40 | "58720256" 41 | "28703654" 42 | "46329738" 43 | "37977096" 44 | "13229860" 45 | ) 46 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}" 47 | 48 | 49 | CTRL=$1 50 | MEMTYPE=6 #BAFS_DIRECT 51 | GPU=$2 52 | TB=128 53 | 54 | for ((gfid=0; gfid 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "common.h" 9 | 10 | 11 | 12 | /* 13 | * Print controller information. 14 | */ 15 | static void print_ctrl_info(FILE* fp, const struct nvm_ctrl_info* info, uint16_t n_cqs, uint16_t n_sqs) 16 | { 17 | unsigned char vendor[4]; 18 | memcpy(vendor, &info->pci_vendor, sizeof(vendor)); 19 | 20 | char serial[21]; 21 | memset(serial, 0, 21); 22 | memcpy(serial, info->serial_no, 20); 23 | 24 | char model[41]; 25 | memset(model, 0, 41); 26 | memcpy(model, info->model_no, 40); 27 | 28 | char revision[9]; 29 | memset(revision, 0, 9); 30 | memcpy(revision, info->firmware, 8); 31 | 32 | fprintf(fp, "------------- Controller information -------------\n"); 33 | fprintf(fp, "PCI Vendor ID : %x %x\n", vendor[0], vendor[1]); 34 | fprintf(fp, "PCI Subsystem Vendor ID : %x %x\n", vendor[2], vendor[3]); 35 | fprintf(fp, "NVM Express version : %u.%u.%u\n", 36 | info->nvme_version >> 16, (info->nvme_version >> 8) & 0xff, info->nvme_version & 0xff); 37 | fprintf(fp, "Controller page size : %zu B (0x%zx)\n", info->page_size, info->page_size); 38 | fprintf(fp, "Max queue entries : %u\n", info->max_entries); 39 | fprintf(fp, "Serial Number : %s\n", serial); 40 | fprintf(fp, "Model Number : %s\n", model); 41 | fprintf(fp, "Firmware revision : %s\n", revision); 42 | fprintf(fp, "Max data transfer size : %zu B (%zu KiB)\n", info->max_data_size, info->max_data_size >> 10); 43 | fprintf(fp, "Max outstanding commands: %zu\n", info->max_out_cmds); 44 | fprintf(fp, "Max number of namespaces: %zu\n", info->max_n_ns); 45 | fprintf(fp, "Current number of CQs : %u\n", n_cqs); 46 | fprintf(fp, "Current number of SQs : %u\n", n_sqs); 47 | fprintf(fp, "--------------------------------------------------\n"); 48 | } 49 | 50 | 51 | /* 52 | * Print namespace information. 53 | */ 54 | static void print_ns_info(FILE* fp, const struct nvm_ns_info* info) 55 | { 56 | fprintf(fp, "------------- Namespace information -------------\n"); 57 | fprintf(fp, "Namespace identifier : %x\n", info->ns_id); 58 | fprintf(fp, "Logical block size : %zu bytes\n", info->lba_data_size); 59 | fprintf(fp, "Namespace size : %zu blocks\n", info->size); 60 | fprintf(fp, "Namespace capacity : %zu blocks\n", info->capacity); 61 | fprintf(fp, "--------------------------------------------------\n"); 62 | } 63 | 64 | 65 | 66 | nvm_aq_ref reset_ctrl(const nvm_ctrl_t* ctrl, const nvm_dma_t* dma_window) 67 | { 68 | int status; 69 | nvm_aq_ref admin; 70 | 71 | if (dma_window->n_ioaddrs < 2) 72 | { 73 | return NULL; 74 | } 75 | memset((void*) dma_window->vaddr, 0, dma_window->page_size * 2); 76 | 77 | fprintf(stderr, "Resetting controller and setting up admin queues...\n"); 78 | status = nvm_aq_create(&admin, ctrl, dma_window); 79 | if (status != 0) 80 | { 81 | fprintf(stderr, "Failed to reset controller: %s\n", strerror(status)); 82 | return NULL; 83 | } 84 | 85 | fprintf(stderr, "Admin queues OK\n"); 86 | return admin; 87 | } 88 | 89 | 90 | 91 | int identify_ctrl(nvm_aq_ref admin, void* ptr, uint64_t ioaddr) 92 | { 93 | int status; 94 | uint16_t n_cqs = 0; 95 | uint16_t n_sqs = 0; 96 | struct nvm_ctrl_info info; 97 | 98 | fprintf(stderr, "Getting number of queues...\n"); 99 | status = nvm_admin_get_num_queues(admin, &n_cqs, &n_sqs); 100 | if (status != 0) 101 | { 102 | fprintf(stderr, "Failed to get number of queues\n"); 103 | return status; 104 | } 105 | 106 | fprintf(stderr, "Identifying controller...\n"); 107 | status = nvm_admin_ctrl_info(admin, &info, ptr, ioaddr); 108 | if (!nvm_ok(status)) 109 | { 110 | fprintf(stderr, "Failed to identify controller: %s\n", nvm_strerror(status)); 111 | return status; 112 | } 113 | 114 | print_ctrl_info(stdout, &info, n_cqs, n_sqs); 115 | return 0; 116 | } 117 | 118 | 119 | 120 | int identify_ns(nvm_aq_ref admin, uint32_t nvm_namespace, void* ptr, uint64_t ioaddr) 121 | { 122 | int status; 123 | struct nvm_ns_info info; 124 | 125 | status = nvm_admin_ns_info(admin, &info, nvm_namespace, ptr, ioaddr); 126 | if (status != 0) 127 | { 128 | fprintf(stderr, "Failed to identify namespace: %s\n", strerror(status)); 129 | return status; 130 | } 131 | 132 | print_ns_info(stdout, &info); 133 | return 0; 134 | } 135 | 136 | -------------------------------------------------------------------------------- /deprecated/examples/identify/common.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_SAMPLES_IDENTIFY_H__ 2 | #define __LIBNVM_SAMPLES_IDENTIFY_H__ 3 | 4 | #include 5 | #include 6 | 7 | 8 | /* 9 | * Reset controller and take ownership of admin queues. 10 | * DMA window must be at least 2 pages. 11 | * Caller must manually destroy the admin reference. 12 | */ 13 | nvm_aq_ref reset_ctrl(const nvm_ctrl_t* ctrl, const nvm_dma_t* dma_window); 14 | 15 | 16 | /* 17 | * Identify controller and print information. 18 | */ 19 | int identify_ctrl(nvm_aq_ref admin, void* ptr, uint64_t ioaddr); 20 | 21 | 22 | /* 23 | * Identify namespace and print information. 24 | */ 25 | int identify_ns(nvm_aq_ref admin, uint32_t nvm_namespace, void* ptr, uint64_t ioaddr); 26 | 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /deprecated/examples/identify/module.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "common.h" 19 | 20 | 21 | 22 | static void parse_args(int argc, char** argv, char** device, uint32_t* ns_id); 23 | 24 | 25 | 26 | static int open_fd(const char* path) 27 | { 28 | int fd; 29 | 30 | fd = open(path, O_RDWR|O_NONBLOCK); 31 | if (fd < 0) 32 | { 33 | fprintf(stderr, "Failed to open descriptor: %s\n", strerror(errno)); 34 | return -1; 35 | } 36 | 37 | return fd; 38 | } 39 | 40 | 41 | int main(int argc, char** argv) 42 | { 43 | int status; 44 | nvm_ctrl_t* ctrl; 45 | nvm_dma_t* window = NULL; 46 | nvm_aq_ref admin = NULL; 47 | uint32_t nvm_namespace; 48 | void* memory; 49 | 50 | long page_size = sysconf(_SC_PAGESIZE); 51 | 52 | char* path = NULL; 53 | parse_args(argc, argv, &path, &nvm_namespace); 54 | 55 | int fd = open_fd(path); 56 | if (fd < 0) 57 | { 58 | exit(1); 59 | } 60 | 61 | status = nvm_ctrl_init(&ctrl, fd); 62 | if (status != 0) 63 | { 64 | close(fd); 65 | fprintf(stderr, "Failed to get controller reference: %s\n", strerror(status)); 66 | exit(1); 67 | } 68 | 69 | close(fd); 70 | 71 | status = posix_memalign(&memory, ctrl->page_size, 3 * page_size); 72 | if (status != 0) 73 | { 74 | fprintf(stderr, "Failed to allocate page-aligned memory: %s\n", strerror(status)); 75 | nvm_ctrl_free(ctrl); 76 | exit(2); 77 | } 78 | 79 | status = nvm_dma_map_host(&window, ctrl, memory, 3 * page_size); 80 | if (status != 0) 81 | { 82 | free(memory); 83 | nvm_ctrl_free(ctrl); 84 | exit(1); 85 | } 86 | 87 | admin = reset_ctrl(ctrl, window); 88 | if (admin == NULL) 89 | { 90 | status = 1; 91 | goto leave; 92 | } 93 | 94 | status = identify_ctrl(admin, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]); 95 | if (status != 0) 96 | { 97 | goto leave; 98 | } 99 | 100 | if (nvm_namespace != 0) 101 | { 102 | status = identify_ns(admin, nvm_namespace, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]); 103 | } 104 | 105 | leave: 106 | nvm_aq_destroy(admin); 107 | nvm_dma_unmap(window); 108 | free(memory); 109 | nvm_ctrl_free(ctrl); 110 | 111 | fprintf(stderr, "Goodbye!\n"); 112 | exit(status); 113 | } 114 | 115 | 116 | static void give_usage(const char* name) 117 | { 118 | fprintf(stderr, "Usage: %s --ctrl=\n", name); 119 | } 120 | 121 | 122 | static void show_help(const char* name) 123 | { 124 | give_usage(name); 125 | fprintf(stderr, "\nCreate a manager and run an IDENTIFY CONTROLLER NVM admin command.\n\n" 126 | " --ctrl Path to controller device (/dev/libnvmXXX).\n" 127 | " --ns Show information about NVM namespace.\n" 128 | " --help Show this information.\n\n"); 129 | } 130 | 131 | 132 | static void parse_args(int argc, char** argv, char** dev, uint32_t* ns_id) 133 | { 134 | static struct option opts[] = { 135 | { "help", no_argument, NULL, 'h' }, 136 | { "ctrl", required_argument, NULL, 'c' }, 137 | { "ns", required_argument, NULL, 'n' }, 138 | { NULL, 0, NULL, 0 } 139 | }; 140 | 141 | int opt; 142 | int idx; 143 | char* endptr = NULL; 144 | 145 | *dev = NULL; 146 | *ns_id = 0; 147 | 148 | while ((opt = getopt_long(argc, argv, ":hc:n:", opts, &idx)) != -1) 149 | { 150 | switch (opt) 151 | { 152 | case '?': // unknown option 153 | fprintf(stderr, "Unknown option: `%s'\n", argv[optind - 1]); 154 | give_usage(argv[0]); 155 | exit('?'); 156 | 157 | case ':': // missing option argument 158 | fprintf(stderr, "Missing argument for option: `%s'\n", argv[optind - 1]); 159 | give_usage(argv[0]); 160 | exit(':'); 161 | 162 | case 'c': // device identifier 163 | *dev = optarg; 164 | break; 165 | 166 | case 'n': // namespace identifier 167 | *ns_id = strtoul(optarg, &endptr, 0); 168 | if (endptr == NULL || *endptr != '\0') 169 | { 170 | fprintf(stderr, "Invalid NVM namespace"); 171 | give_usage(argv[0]); 172 | exit('n'); 173 | } 174 | break; 175 | 176 | case 'h': 177 | show_help(argv[0]); 178 | exit(0); 179 | } 180 | } 181 | 182 | if (*dev == NULL) 183 | { 184 | fprintf(stderr, "Controller is not set!\n"); 185 | give_usage(argv[0]); 186 | exit('c'); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /deprecated/examples/identify/smartio.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "common.h" 19 | 20 | 21 | /* 22 | * Command line arguments. 23 | */ 24 | struct cl_args 25 | { 26 | uint64_t dev_id; 27 | uint32_t namespace_id; 28 | }; 29 | 30 | 31 | static void parse_args(int argc, char** argv, struct cl_args* args); 32 | 33 | 34 | int main(int argc, char** argv) 35 | { 36 | sci_error_t err; 37 | struct cl_args args; 38 | 39 | parse_args(argc, argv, &args); 40 | 41 | SCIInitialize(0, &err); 42 | 43 | nvm_ctrl_t* ctrl; 44 | int status = nvm_dis_ctrl_init(&ctrl, args.dev_id); 45 | if (status != 0) 46 | { 47 | fprintf(stderr, "Failed to initialize controller reference: %s\n", strerror(status)); 48 | exit(status); 49 | } 50 | 51 | nvm_dma_t* window; 52 | status = nvm_dis_dma_create(&window, ctrl, 3 * 0x1000, 0); 53 | if (status != 0) 54 | { 55 | nvm_ctrl_free(ctrl); 56 | fprintf(stderr, "Failed to create local segment: %s\n", strerror(status)); 57 | exit(status); 58 | } 59 | memset(window->vaddr, 0, 3 * 0x1000); 60 | 61 | nvm_aq_ref aq; 62 | aq = reset_ctrl(ctrl, window); 63 | if (aq == NULL) 64 | { 65 | status = 1; 66 | goto leave; 67 | } 68 | 69 | status = identify_ctrl(aq, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]); 70 | if (status != 0) 71 | { 72 | goto leave; 73 | } 74 | 75 | if (args.namespace_id != 0) 76 | { 77 | status = identify_ns(aq, args.namespace_id, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]); 78 | } 79 | 80 | leave: 81 | nvm_aq_destroy(aq); 82 | nvm_dma_unmap(window); 83 | nvm_ctrl_free(ctrl); 84 | SCITerminate(); 85 | 86 | fprintf(stderr, "Goodbye!\n"); 87 | exit(status); 88 | } 89 | 90 | 91 | static int parse_u64(const char* str, uint64_t* num, int base) 92 | { 93 | char* endptr = NULL; 94 | uint64_t ul = strtoul(str, &endptr, base); 95 | 96 | if (endptr == NULL || *endptr != '\0') 97 | { 98 | return EINVAL; 99 | } 100 | 101 | *num = ul; 102 | return 0; 103 | } 104 | 105 | 106 | static int parse_u32(const char* str, uint32_t* num, int base) 107 | { 108 | int status; 109 | uint64_t ul; 110 | 111 | status = parse_u64(str, &ul, base); 112 | 113 | if (status != 0 || ul > UINT_MAX) 114 | { 115 | return EINVAL; 116 | } 117 | 118 | *num = (uint32_t) ul; 119 | return status; 120 | } 121 | 122 | 123 | static void give_usage(const char* name) 124 | { 125 | fprintf(stderr, "Usage: %s --ctrl= [--adapter=] [--id=]\n", name); 126 | } 127 | 128 | 129 | static void show_help(const char* name) 130 | { 131 | give_usage(name); 132 | fprintf(stderr, "\nCreate a manager and run an IDENTIFY CONTROLLER NVM admin command.\n\n" 133 | " --ctrl SmartIO device identifier (fabric device id).\n" 134 | " --ns Show information about NVM namespace.\n" 135 | " --help Show this information.\n\n"); 136 | } 137 | 138 | 139 | 140 | 141 | static void parse_args(int argc, char** argv, struct cl_args* args) 142 | { 143 | static struct option opts[] = { 144 | { "help", no_argument, NULL, 'h' }, 145 | { "ctrl", required_argument, NULL, 'c' }, 146 | { "ns", required_argument, NULL, 'n' }, 147 | { "segment", required_argument, NULL, 's' }, 148 | { NULL, 0, NULL, 0 } 149 | }; 150 | 151 | int opt; 152 | int idx; 153 | 154 | bool dev_set = false; 155 | args->dev_id = 0; 156 | args->namespace_id = 0; 157 | 158 | while ((opt = getopt_long(argc, argv, ":hc:n:", opts, &idx)) != -1) 159 | { 160 | switch (opt) 161 | { 162 | case '?': // unknown option 163 | fprintf(stderr, "Unknown option: `%s'\n", argv[optind - 1]); 164 | give_usage(argv[0]); 165 | exit('?'); 166 | 167 | case ':': // missing option argument 168 | fprintf(stderr, "Missing argument for option: `%s'\n", argv[optind - 1]); 169 | give_usage(argv[0]); 170 | exit(':'); 171 | 172 | case 'c': // device identifier 173 | dev_set = true; 174 | if (parse_u64(optarg, &args->dev_id, 16) != 0) 175 | { 176 | give_usage(argv[0]); 177 | exit('c'); 178 | } 179 | break; 180 | 181 | case 'n': 182 | if (parse_u32(optarg, &args->namespace_id, 0) != 0) 183 | { 184 | give_usage(argv[0]); 185 | exit('n'); 186 | } 187 | break; 188 | 189 | case 'h': 190 | show_help(argv[0]); 191 | exit(0); 192 | } 193 | } 194 | 195 | if (!dev_set) 196 | { 197 | fprintf(stderr, "Device ID is not set!\n"); 198 | give_usage(argv[0]); 199 | exit('c'); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /deprecated/examples/integrity/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-samples) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | make_sample (integrity-smartio integrity-util "integrity.c;transfer.c;util.c") 8 | set_multithread (integrity-smartio) 9 | set_sisci (integrity-smartio) 10 | 11 | 12 | make_sample (integrity-module integrity-util "integrity.c;transfer.c;util.c") 13 | set_multithread (integrity-module) 14 | 15 | 16 | make_samples_choice (integrity-util integrity-smartio integrity-module) 17 | add_custom_target (integrity DEPENDS integrity-util) 18 | -------------------------------------------------------------------------------- /deprecated/examples/integrity/integrity.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_SAMPLES_INTEGRITY_H__ 2 | #define __LIBNVM_SAMPLES_INTEGRITY_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | /* Memory descriptor */ 10 | struct buffer 11 | { 12 | void* buffer; 13 | nvm_dma_t* dma; 14 | }; 15 | 16 | 17 | /* Queue descriptor */ 18 | struct queue 19 | { 20 | struct buffer qmem; 21 | nvm_queue_t queue; 22 | size_t counter; 23 | }; 24 | 25 | 26 | /* Disk descriptor */ 27 | struct disk 28 | { 29 | size_t page_size; 30 | size_t max_data_size; 31 | uint32_t ns_id; 32 | size_t block_size; 33 | }; 34 | 35 | 36 | int create_buffer(struct buffer* b, nvm_aq_ref, size_t size); 37 | 38 | 39 | void remove_buffer(struct buffer* b); 40 | 41 | 42 | 43 | int create_queue(struct queue* q, nvm_aq_ref ref, const struct queue* cq, uint16_t qno); 44 | 45 | 46 | void remove_queue(struct queue* q); 47 | 48 | 49 | 50 | int disk_write(const struct disk* disk, struct buffer* buffer, struct queue* queues, uint16_t n_queues, FILE* fp, off_t size); 51 | 52 | int disk_read(const struct disk* disk, struct buffer* buffer, struct queue* queues, uint16_t n_queues, FILE* fp, off_t size); 53 | 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /deprecated/examples/integrity/util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "integrity.h" 12 | 13 | 14 | int create_buffer(struct buffer* b, nvm_aq_ref ref, size_t size) 15 | { 16 | int status; 17 | 18 | const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); 19 | 20 | #ifdef __DIS_CLUSTER__ 21 | b->buffer = NULL; 22 | status = nvm_dis_dma_create(&b->dma, ctrl, size, 0); 23 | #else 24 | status = posix_memalign(&b->buffer, ctrl->page_size, size); 25 | if (status != 0) 26 | { 27 | fprintf(stderr, "Failed to allocate memory: %s\n", strerror(status)); 28 | return status; 29 | } 30 | 31 | status = nvm_dma_map_host(&b->dma, ctrl, b->buffer, size); 32 | #endif 33 | if (!nvm_ok(status)) 34 | { 35 | free(b->buffer); 36 | fprintf(stderr, "Failed to create local segment: %s\n", nvm_strerror(status)); 37 | return status; 38 | } 39 | 40 | memset(b->dma->vaddr, 0, b->dma->page_size * b->dma->n_ioaddrs); 41 | 42 | return 0; 43 | } 44 | 45 | 46 | void remove_buffer(struct buffer* b) 47 | { 48 | nvm_dma_unmap(b->dma); 49 | free(b->buffer); 50 | } 51 | 52 | 53 | int create_queue(struct queue* q, nvm_aq_ref ref, const struct queue* cq, uint16_t qno) 54 | { 55 | int status; 56 | 57 | const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); 58 | 59 | size_t prp_lists = 0; 60 | if (cq != NULL) 61 | { 62 | size_t n_entries = ctrl->page_size / sizeof(nvm_cmd_t); 63 | prp_lists = n_entries <= ctrl->max_qs ? n_entries : ctrl->max_qs; 64 | } 65 | 66 | status = create_buffer(&q->qmem, ref, prp_lists * ctrl->page_size + ctrl->page_size); 67 | if (!nvm_ok(status)) 68 | { 69 | return status; 70 | } 71 | 72 | if (cq == NULL) 73 | { 74 | status = nvm_admin_cq_create(ref, &q->queue, qno, q->qmem.dma, 0, NVM_CQ_SIZE(ctrl, 1)); 75 | } 76 | else 77 | { 78 | status = nvm_admin_sq_create(ref, &q->queue, &cq->queue, qno, q->qmem.dma, 0, NVM_SQ_SIZE(ctrl, 1)); 79 | } 80 | 81 | if (!nvm_ok(status)) 82 | { 83 | remove_buffer(&q->qmem); 84 | fprintf(stderr, "Failed to create queue: %s\n", nvm_strerror(status)); 85 | return status; 86 | } 87 | 88 | q->counter = 0; 89 | return 0; 90 | } 91 | 92 | 93 | void remove_queue(struct queue* q) 94 | { 95 | remove_buffer(&q->qmem); 96 | } 97 | 98 | -------------------------------------------------------------------------------- /deprecated/examples/read-blocks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-samples) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | make_sample (read-blocks-module read-blocks "module.c;args.c;read.c") 8 | set_multithread (read-blocks-module) 9 | 10 | make_sample (read-blocks-smartio read-blocks "smartio.c;args.c;read.c") 11 | set_multithread (read-blocks-smartio) 12 | set_sisci (read-blocks-smartio) 13 | 14 | make_samples_choice (read-blocks read-blocks-smartio read-blocks-module) 15 | -------------------------------------------------------------------------------- /deprecated/examples/read-blocks/args.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_SAMPLES_READ_BLOCKS_OPTIONS_H__ 2 | #define __LIBNVM_SAMPLES_READ_BLOCKS_OPTIONS_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | struct options 11 | { 12 | #ifdef __DIS_CLUSTER__ 13 | uint64_t controller_id; 14 | #else 15 | const char* controller_path; 16 | #endif 17 | size_t queue_size; 18 | size_t chunk_size; 19 | uint32_t namespace_id; 20 | size_t num_blocks; 21 | size_t offset; 22 | FILE* output; 23 | FILE* input; 24 | bool ascii; 25 | bool identify; 26 | }; 27 | 28 | 29 | void parse_options(int argc, char** argv, struct options* options); 30 | 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /deprecated/examples/read-blocks/module.c: -------------------------------------------------------------------------------- 1 | #include "args.h" 2 | #include "read.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | 19 | static int prepare_and_read(nvm_aq_ref ref, const struct disk_info* disk, const struct options* args) 20 | { 21 | int status = 0; 22 | 23 | const size_t qs = args->queue_size; 24 | void* buffer_ptr = NULL; 25 | nvm_dma_t* buffer = NULL; 26 | void* queue_ptr = NULL; 27 | nvm_dma_t* sq_mem = NULL; 28 | nvm_dma_t* cq_mem = NULL; 29 | size_t n_prp_lists = qs; 30 | struct queue_pair queues; 31 | 32 | const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref); 33 | 34 | const size_t buffer_blocks = args->chunk_size <= args->num_blocks ? args->chunk_size : args->num_blocks; 35 | status = posix_memalign(&buffer_ptr, disk->page_size, NVM_CTRL_ALIGN(ctrl, buffer_blocks * disk->block_size)); 36 | if (status != 0) 37 | { 38 | fprintf(stderr, "Failed to allocate memory buffer: %s\n", strerror(status)); 39 | goto leave; 40 | } 41 | 42 | status = posix_memalign(&queue_ptr, disk->page_size, 43 | NVM_SQ_PAGES(disk, qs) * disk->page_size + disk->page_size * (n_prp_lists + 2)); 44 | if (status != 0) 45 | { 46 | fprintf(stderr, "Failed to allocate queue memory: %s\n", strerror(status)); 47 | goto leave; 48 | } 49 | 50 | status = nvm_dma_map_host(&sq_mem, ctrl, NVM_PTR_OFFSET(queue_ptr, disk->page_size, 1), 51 | NVM_SQ_PAGES(disk, qs) * disk->page_size + disk->page_size * (n_prp_lists + 1)); 52 | if (!nvm_ok(status)) 53 | { 54 | fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status)); 55 | goto leave; 56 | } 57 | 58 | status = nvm_dma_map_host(&cq_mem, ctrl, queue_ptr, disk->page_size); 59 | if (!nvm_ok(status)) 60 | { 61 | fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status)); 62 | goto leave; 63 | } 64 | 65 | status = nvm_dma_map_host(&buffer, ctrl, buffer_ptr, buffer_blocks * disk->block_size); 66 | if (!nvm_ok(status)) 67 | { 68 | fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status)); 69 | goto leave; 70 | } 71 | 72 | status = create_queue_pair(ref, &queues, cq_mem, sq_mem, qs); 73 | if (status != 0) 74 | { 75 | goto leave; 76 | } 77 | 78 | if (args->input != NULL) 79 | { 80 | status = write_blocks(disk, &queues, buffer, args); 81 | if (status != 0) 82 | { 83 | goto leave; 84 | } 85 | } 86 | 87 | status = read_and_dump(disk, &queues, buffer, args); 88 | 89 | leave: 90 | nvm_dma_unmap(buffer); 91 | nvm_dma_unmap(sq_mem); 92 | nvm_dma_unmap(cq_mem); 93 | free(buffer_ptr); 94 | free(queue_ptr); 95 | return status; 96 | } 97 | 98 | 99 | 100 | int main(int argc, char** argv) 101 | { 102 | int status; 103 | int fd; 104 | 105 | struct disk_info disk; 106 | 107 | nvm_ctrl_t* ctrl = NULL; 108 | void* aq_ptr = NULL; 109 | nvm_dma_t* aq_mem = NULL; 110 | nvm_aq_ref aq_ref = NULL; 111 | 112 | struct options args; 113 | 114 | // Parse arguments from command line 115 | parse_options(argc, argv, &args); 116 | 117 | // Get controller reference 118 | fd = open(args.controller_path, O_RDWR | O_NONBLOCK); 119 | if (fd < 0) 120 | { 121 | fprintf(stderr, "Failed to open file descriptor: %s\n", strerror(errno)); 122 | exit(1); 123 | } 124 | 125 | status = nvm_ctrl_init(&ctrl, fd); 126 | if (!nvm_ok(status)) 127 | { 128 | fprintf(stderr, "Failed to initialize controller reference: %s\n", nvm_strerror(status)); 129 | goto leave; 130 | } 131 | 132 | // Create admin queue pair + page for identify commands 133 | status = posix_memalign(&aq_ptr, ctrl->page_size, ctrl->page_size * 3); 134 | if (status != 0) 135 | { 136 | fprintf(stderr, "Failed to allocate queue memory: %s\n", strerror(status)); 137 | goto leave; 138 | } 139 | 140 | status = nvm_dma_map_host(&aq_mem, ctrl, aq_ptr, ctrl->page_size * 3); 141 | if (!nvm_ok(status)) 142 | { 143 | fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status)); 144 | goto leave; 145 | } 146 | 147 | // Reset controller and set admin queues 148 | status = nvm_aq_create(&aq_ref, ctrl, aq_mem); 149 | if (!nvm_ok(status)) 150 | { 151 | fprintf(stderr, "Failed to reset controller: %s\n", nvm_strerror(status)); 152 | goto leave; 153 | } 154 | 155 | // Identify controller and namespace 156 | status = get_disk_info(aq_ref, &disk, args.namespace_id, NVM_DMA_OFFSET(aq_mem, 2), aq_mem->ioaddrs[2], args.identify); 157 | if (status != 0) 158 | { 159 | goto leave; 160 | } 161 | 162 | status = prepare_and_read(aq_ref, &disk, &args); 163 | 164 | leave: 165 | if (args.input != NULL) 166 | { 167 | fclose(args.input); 168 | } 169 | 170 | if (args.output != NULL) 171 | { 172 | fprintf(stderr, "Flushing output file...\n"); 173 | fclose(args.output); 174 | } 175 | 176 | fprintf(stderr, "Done\n"); 177 | 178 | nvm_aq_destroy(aq_ref); 179 | nvm_dma_unmap(aq_mem); 180 | free(aq_ptr); 181 | nvm_ctrl_free(ctrl); 182 | close(fd); 183 | exit(status); 184 | } 185 | -------------------------------------------------------------------------------- /deprecated/examples/read-blocks/read.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_SAMPLES_READ_BLOCKS_READ_H__ 2 | #define __LIBNVM_SAMPLES_READ_BLOCKS_READ_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "args.h" 8 | 9 | 10 | /* 11 | * Information about controller and namespace. 12 | */ 13 | struct disk_info 14 | { 15 | uint32_t ns_id; 16 | size_t max_data_size; 17 | size_t page_size; 18 | size_t block_size; 19 | }; 20 | 21 | 22 | 23 | struct queue_pair 24 | { 25 | nvm_dma_t* sq_mem; 26 | nvm_dma_t* cq_mem; 27 | nvm_queue_t sq; 28 | nvm_queue_t cq; 29 | bool stop; 30 | size_t num_cpls; 31 | }; 32 | 33 | 34 | 35 | int get_disk_info(nvm_aq_ref ref, struct disk_info* info, uint32_t ns_id, void* ptr, uint64_t ioaddr, bool show); 36 | 37 | 38 | int create_queue_pair(nvm_aq_ref ref, struct queue_pair* qp, nvm_dma_t* cq_mem, nvm_dma_t* sq_mem, size_t sqs); 39 | 40 | 41 | int read_and_dump(const struct disk_info* disk, struct queue_pair* qp, const nvm_dma_t* buffer, const struct options* args); 42 | 43 | 44 | int write_blocks(const struct disk_info* disk, struct queue_pair* qp, const nvm_dma_t* buffer, const struct options* args); 45 | 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /deprecated/examples/read-blocks/smartio.c: -------------------------------------------------------------------------------- 1 | #include "args.h" 2 | #include "read.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | 18 | int main(int argc, char** argv) 19 | { 20 | int status; 21 | sci_error_t err; 22 | 23 | struct disk_info info; 24 | struct queue_pair queues; 25 | 26 | nvm_ctrl_t* ctrl = NULL; 27 | nvm_dma_t* aq_mem = NULL; 28 | nvm_aq_ref aq_ref = NULL; 29 | nvm_dma_t* buffer = NULL; 30 | nvm_dma_t* cq_mem = NULL; 31 | nvm_dma_t* sq_mem = NULL; 32 | 33 | struct options args; 34 | 35 | parse_options(argc, argv, &args); 36 | 37 | SCIInitialize(0, &err); 38 | if (err != SCI_ERR_OK) 39 | { 40 | fprintf(stderr, "Something went wrong: %s\n", SCIGetErrorString(err)); 41 | exit(1); 42 | } 43 | 44 | // Get controller reference 45 | status = nvm_dis_ctrl_init(&ctrl, args.controller_id); 46 | if (!nvm_ok(status)) 47 | { 48 | fprintf(stderr, "Failed to get controller reference: %s\n", nvm_strerror(status)); 49 | goto leave; 50 | } 51 | 52 | // Create admin queue pair + page for identify commands 53 | status = nvm_dis_dma_create(&aq_mem, ctrl, ctrl->page_size * 3, 0); 54 | if (!nvm_ok(status)) 55 | { 56 | fprintf(stderr, "Failed to create queue memory: %s\n", nvm_strerror(status)); 57 | goto leave; 58 | } 59 | 60 | // Reset controller and set admin queue pair 61 | status = nvm_aq_create(&aq_ref, ctrl, aq_mem); 62 | if (!nvm_ok(status)) 63 | { 64 | fprintf(stderr, "Failed to reset controller: %s\n", nvm_strerror(status)); 65 | goto leave; 66 | } 67 | 68 | // Identify controller and namespace 69 | status = get_disk_info(aq_ref, &info, args.namespace_id, NVM_DMA_OFFSET(aq_mem, 2), aq_mem->ioaddrs[2], args.identify); 70 | if (status != 0) 71 | { 72 | goto leave; 73 | } 74 | 75 | // Create data buffer 76 | size_t buffer_size = (args.chunk_size <= args.num_blocks ? args.chunk_size : args.num_blocks) * info.block_size; 77 | 78 | status = nvm_dis_dma_create(&buffer, ctrl, buffer_size, 0); 79 | if (!nvm_ok(status)) 80 | { 81 | fprintf(stderr, "Failed to create data buffer: %s\n", nvm_strerror(status)); 82 | goto leave; 83 | } 84 | 85 | // Create memory for completion queue 86 | status = nvm_dis_dma_create(&cq_mem, ctrl, ctrl->page_size, SCI_MEMACCESS_HOST_READ | SCI_MEMACCESS_DEVICE_WRITE); 87 | if (!nvm_ok(status)) 88 | { 89 | fprintf(stderr, "Failed to create completion queue memory: %s\n", nvm_strerror(status)); 90 | goto leave; 91 | } 92 | 93 | // Create memory for submission queue and PRP lists 94 | size_t n_prp_lists = args.queue_size; 95 | 96 | status = nvm_dis_dma_create(&sq_mem, ctrl, 97 | NVM_SQ_PAGES(ctrl, args.queue_size) * ctrl->page_size + ctrl->page_size * (n_prp_lists + 1), 98 | SCI_MEMACCESS_HOST_WRITE | SCI_MEMACCESS_DEVICE_READ); 99 | if (!nvm_ok(status)) 100 | { 101 | goto leave; 102 | } 103 | 104 | // Create queues 105 | status = create_queue_pair(aq_ref, &queues, cq_mem, sq_mem, args.queue_size); 106 | if (!nvm_ok(status)) 107 | { 108 | goto leave; 109 | } 110 | 111 | if (args.input != NULL) 112 | { 113 | status = write_blocks(&info, &queues, buffer, &args); 114 | if (status != 0) 115 | { 116 | goto leave; 117 | } 118 | } 119 | 120 | status = read_and_dump(&info, &queues, buffer, &args); 121 | 122 | 123 | leave: 124 | if (args.input != NULL) 125 | { 126 | fclose(args.input); 127 | } 128 | 129 | if (args.output != NULL) 130 | { 131 | fprintf(stderr, "Flushing output file...\n"); 132 | fclose(args.output); 133 | } 134 | fprintf(stderr, "Done\n"); 135 | nvm_dma_unmap(cq_mem); 136 | nvm_dma_unmap(sq_mem); 137 | nvm_dma_unmap(buffer); 138 | nvm_aq_destroy(aq_ref); 139 | nvm_dma_unmap(aq_mem); 140 | nvm_ctrl_free(ctrl); 141 | SCITerminate(); 142 | 143 | if (status != 0) 144 | { 145 | fprintf(stderr, "%d\n", status); 146 | } 147 | 148 | exit(status); 149 | } 150 | -------------------------------------------------------------------------------- /deprecated/examples/rpc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.1) 2 | project (libnvm-samples) 3 | 4 | set (THREADS_PREFER_PTHREAD_FLAG ON) 5 | find_package (Threads REQUIRED) 6 | 7 | make_sample (rpc-server rpc-server "rpc_server.c;util.c;segment.c") 8 | set_sisci (rpc-server) 9 | 10 | make_sample (rpc-identify rpc-identify "rpc_identify.c;util.c;segment.c") 11 | set_sisci (rpc-identify) 12 | 13 | make_sample (rpc-dd rpc-dd "rpc_dd.c;util.c;segment.c") 14 | set_sisci (rpc-dd) 15 | 16 | make_sample (rpc-flush rpc-flush "rpc_flush.c;util.c;segment.c") 17 | set_sisci (rpc-flush) 18 | 19 | add_custom_target(rpc DEPENDS rpc-server rpc-identify rpc-dd rpc-flush) 20 | add_custom_target(rpc-clients DEPENDS rpc-identify rpc-dd rpc-flush) 21 | -------------------------------------------------------------------------------- /deprecated/examples/rpc/rpc_flush.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "segment.h" 20 | #include "util.h" 21 | #include 22 | 23 | 24 | static int flush(nvm_queue_t* cq, nvm_queue_t* sq, uint32_t ns) 25 | { 26 | nvm_cmd_t cmd; 27 | memset(&cmd, 0, sizeof(cmd)); 28 | nvm_cpl_t* cpl; 29 | 30 | nvm_cmd_header(&cmd, NVM_DEFAULT_CID(sq), NVM_IO_FLUSH, ns); 31 | 32 | nvm_cmd_t* ptr = nvm_sq_enqueue(sq); 33 | if (ptr == NULL) 34 | { 35 | return ENOMEM; 36 | } 37 | 38 | *ptr = cmd; 39 | 40 | nvm_sq_submit(sq); 41 | 42 | while ((cpl = nvm_cq_dequeue(cq)) == NULL) 43 | { 44 | //usleep(1); 45 | } 46 | 47 | nvm_sq_update(sq); 48 | nvm_cq_update(cq); 49 | 50 | int status = NVM_ERR_STATUS(cpl); 51 | fprintf(stderr, "%s\n", nvm_strerror(status)); 52 | 53 | return status; 54 | } 55 | 56 | 57 | int main(int argc, char** argv) 58 | { 59 | nvm_ctrl_t* ctrl; 60 | nvm_aq_ref rpc; 61 | sci_error_t scierr; 62 | struct segment segment; 63 | nvm_dma_t* dma; 64 | nvm_queue_t cq; 65 | nvm_queue_t sq; 66 | 67 | SCIInitialize(0, &scierr); 68 | if (scierr != SCI_ERR_OK) 69 | { 70 | exit(1); 71 | } 72 | 73 | int err = nvm_dis_ctrl_init(&ctrl, 0xc0c00); 74 | 75 | err = nvm_dis_rpc_bind(&rpc, ctrl, 0); 76 | 77 | err = segment_create(&segment, 123, 0x2000); 78 | 79 | err = dma_create(&dma, ctrl, &segment, 0); 80 | 81 | err = nvm_admin_cq_create(rpc, &cq, 1, dma, 0, 2); 82 | 83 | err = nvm_admin_sq_create(rpc, &sq, &cq, 1, dma, 1, 2); 84 | 85 | fprintf(stderr, "OK\n"); 86 | 87 | for (int i = 0; i < 10; ++i) 88 | { 89 | flush(&cq, &sq, 1); 90 | } 91 | 92 | nvm_admin_sq_delete(rpc, &sq, &cq); 93 | nvm_admin_cq_delete(rpc, &cq); 94 | dma_remove(dma, &segment, 0); 95 | segment_remove(&segment); 96 | nvm_rpc_unbind(rpc); 97 | nvm_ctrl_free(ctrl); 98 | 99 | 100 | SCITerminate(); 101 | return 0; 102 | } 103 | -------------------------------------------------------------------------------- /deprecated/examples/rpc/segment.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "segment.h" 11 | 12 | 13 | int segment_create(struct segment* segment, uint32_t segment_id, size_t size) 14 | { 15 | sci_error_t err; 16 | sci_error_t status; 17 | 18 | SCIOpen(&segment->sd, 0, &err); 19 | if (err != SCI_ERR_OK) 20 | { 21 | return EIO; 22 | } 23 | 24 | SCICreateSegment(segment->sd, &segment->segment, segment_id, size, NULL, NULL, SCI_FLAG_AUTO_ID, &err); 25 | if (err != SCI_ERR_OK) 26 | { 27 | SCIClose(segment->sd, 0, &status); 28 | 29 | if (err == SCI_ERR_SEGMENTID_USED) 30 | { 31 | return EEXIST; 32 | } 33 | 34 | return ENOSPC; 35 | } 36 | 37 | segment->id = SCIGetLocalSegmentId(segment->segment); 38 | segment->size = size; 39 | return 0; 40 | } 41 | 42 | 43 | void segment_remove(struct segment* segment) 44 | { 45 | sci_error_t err; 46 | 47 | do 48 | { 49 | SCIRemoveSegment(segment->segment, 0, &err); 50 | } 51 | while (err == SCI_ERR_BUSY); 52 | 53 | SCIClose(segment->sd, 0, &err); 54 | } 55 | 56 | 57 | int dma_create(nvm_dma_t** window, const nvm_ctrl_t* ctrl, struct segment* segment, uint32_t adapter) 58 | { 59 | sci_error_t err; 60 | 61 | SCIPrepareSegment(segment->segment, adapter, 0, &err); 62 | if (err != SCI_ERR_OK) 63 | { 64 | return ENOSPC; 65 | } 66 | 67 | SCISetSegmentAvailable(segment->segment, adapter, 0, &err); 68 | if (err != SCI_ERR_OK) 69 | { 70 | return EIO; 71 | } 72 | 73 | int status = nvm_dis_dma_map_local(window, ctrl, adapter, segment->segment, true); 74 | if (status != 0) 75 | { 76 | do 77 | { 78 | SCISetSegmentUnavailable(segment->segment, adapter, 0, &err); 79 | } 80 | while (err == SCI_ERR_BUSY); 81 | 82 | return status; 83 | } 84 | 85 | return 0; 86 | } 87 | 88 | 89 | void dma_remove(nvm_dma_t* window, struct segment* segment, uint32_t adapter) 90 | { 91 | sci_error_t err; 92 | 93 | nvm_dma_unmap(window); 94 | 95 | do 96 | { 97 | SCISetSegmentUnavailable(segment->segment, adapter, 0, &err); 98 | } 99 | while (err == SCI_ERR_BUSY); 100 | } 101 | 102 | -------------------------------------------------------------------------------- /deprecated/examples/rpc/segment.h: -------------------------------------------------------------------------------- 1 | #ifndef __DIS_NVM_EXAMPLES_SEGMENT_H__ 2 | #define __DIS_NVM_EXAMPLES_SEGMENT_H__ 3 | #ifdef __DIS_CLUSTER__ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | /* 12 | * Wrapper for SISCI segment descriptors. 13 | */ 14 | struct segment 15 | { 16 | uint32_t id; // Segment ID 17 | sci_desc_t sd; // SISCI virtual device 18 | sci_local_segment_t segment; // Local segment descriptor 19 | size_t size; // Size of segment 20 | }; 21 | 22 | 23 | 24 | /* 25 | * Create a local segment and initialize the wrapper struct. 26 | */ 27 | int segment_create(struct segment* segment, uint32_t segment_id, size_t size); 28 | 29 | 30 | /* 31 | * Remove the local segment. 32 | */ 33 | void segment_remove(struct segment* segment); 34 | 35 | 36 | 37 | /* 38 | * Create a DMA window for the segment. 39 | */ 40 | int dma_create(nvm_dma_t** dma_window, const nvm_ctrl_t* ctrl, struct segment* segment, uint32_t dis_adapter); 41 | 42 | 43 | 44 | /* 45 | * Destroy the DMA window. 46 | */ 47 | void dma_remove(nvm_dma_t* dma_window, struct segment* segment, uint32_t dis_adapter); 48 | 49 | 50 | #endif // __DIS_CLUSTER__ 51 | #endif // __DIS_NVM_EXAMPLES_SEGMENT_H__ 52 | -------------------------------------------------------------------------------- /deprecated/examples/rpc/util.c: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | 13 | int parse_u64(const char* str, uint64_t* num, int base) 14 | { 15 | char* endptr = NULL; 16 | uint64_t ul = strtoul(str, &endptr, base); 17 | 18 | if (endptr == NULL || *endptr != '\0') 19 | { 20 | return EINVAL; 21 | } 22 | 23 | *num = ul; 24 | return 0; 25 | } 26 | 27 | 28 | int parse_u32(const char* str, uint32_t* num, int base) 29 | { 30 | int status; 31 | uint64_t ul; 32 | 33 | status = parse_u64(str, &ul, base); 34 | 35 | if (status != 0 || ul > UINT_MAX) 36 | { 37 | return EINVAL; 38 | } 39 | 40 | *num = (uint32_t) ul; 41 | return status; 42 | } 43 | 44 | 45 | int parse_u16(const char* str, uint16_t* num, int base) 46 | { 47 | int status; 48 | uint64_t ul; 49 | 50 | status = parse_u64(str, &ul, base); 51 | 52 | if (status != 0 || ul > 0xffff) 53 | { 54 | return EINVAL; 55 | } 56 | 57 | *num = (uint16_t) ul; 58 | return status; 59 | } 60 | 61 | 62 | 63 | void print_ctrl_info(FILE* fp, const struct nvm_ctrl_info* info) 64 | { 65 | unsigned char vendor[4]; 66 | memcpy(vendor, &info->pci_vendor, sizeof(vendor)); 67 | 68 | char serial[21]; 69 | memset(serial, 0, 21); 70 | memcpy(serial, info->serial_no, 20); 71 | 72 | char model[41]; 73 | memset(model, 0, 41); 74 | memcpy(model, info->model_no, 40); 75 | 76 | char revision[9]; 77 | memset(revision, 0, 9); 78 | memcpy(revision, info->firmware, 8); 79 | 80 | fprintf(fp, "------------- Controller information -------------\n"); 81 | fprintf(fp, "PCI Vendor ID : %x %x\n", vendor[0], vendor[1]); 82 | fprintf(fp, "PCI Subsystem Vendor ID : %x %x\n", vendor[2], vendor[3]); 83 | fprintf(fp, "NVM Express version : %u.%u.%u\n", 84 | info->nvme_version >> 16, (info->nvme_version >> 8) & 0xff, info->nvme_version & 0xff); 85 | fprintf(fp, "Controller page size : %zu\n", info->page_size); 86 | fprintf(fp, "Max queue entries : %u\n", info->max_entries); 87 | fprintf(fp, "Serial Number : %s\n", serial); 88 | fprintf(fp, "Model Number : %s\n", model); 89 | fprintf(fp, "Firmware revision : %s\n", revision); 90 | fprintf(fp, "Max data transfer size : %zu bytes (%zu KiB)\n", info->max_data_size, info->max_data_size >> 10); 91 | fprintf(fp, "Max outstanding commands: %zu\n", info->max_out_cmds); 92 | fprintf(fp, "Max number of namespaces: %zu\n", info->max_n_ns); 93 | fprintf(fp, "--------------------------------------------------\n"); 94 | } 95 | 96 | 97 | void print_ns_info(FILE* fp, const struct nvm_ns_info* info) 98 | { 99 | fprintf(fp, "------------- Namespace information -------------\n"); 100 | fprintf(fp, "Namespace identifier : %x\n", info->ns_id); 101 | fprintf(fp, "Logical block size : %zu bytes\n", info->lba_data_size); 102 | fprintf(fp, "Namespace size : %zu blocks (%zu MiB)\n", info->size, info->size >> 20); 103 | fprintf(fp, "Namespace capacity : %zu blocks (%zu MiB)\n", info->capacity, info->capacity >> 20); 104 | fprintf(fp, "--------------------------------------------------\n"); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /deprecated/examples/rpc/util.h: -------------------------------------------------------------------------------- 1 | #ifndef __DISNVM_EXAMPLES_UTIL_H__ 2 | #define __DISNVM_EXAMPLES_UTIL_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | /* 11 | * Parse an uint64_t from a string. 12 | */ 13 | int parse_u64(const char* str, uint64_t* number, int base); 14 | 15 | 16 | /* 17 | * Parse an uint32_t from a string. 18 | */ 19 | int parse_u32(const char* str, uint32_t* number, int base); 20 | 21 | 22 | /* 23 | * Parse an uint16_t from a string. 24 | */ 25 | int parse_u16(const char* str, uint16_t* number, int base); 26 | 27 | 28 | /* 29 | * Pretty print controller information. 30 | */ 31 | void print_ctrl_info(FILE* fp, const struct nvm_ctrl_info* info); 32 | 33 | 34 | /* 35 | * Pretty print namespace information. 36 | */ 37 | void print_ns_info(FILE* fp, const struct nvm_ns_info* info); 38 | 39 | #endif // __DISNVM_EXAMPLES_UTIL_H__ 40 | -------------------------------------------------------------------------------- /include/bafs_ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef __BAFS_PTR_H__ 2 | #define __BAFS_PTR_H__ 3 | 4 | #ifndef __device__ 5 | #define __device__ 6 | #endif 7 | #ifndef __host__ 8 | #define __host__ 9 | #endif 10 | #ifndef __forceinline__ 11 | #define __forceinline__ inline 12 | #endif 13 | 14 | #include "page_cache.h" 15 | #include 16 | 17 | template 18 | class bafs_ptr { 19 | private: 20 | array_t* h_pData; 21 | array_d_t* pData; 22 | uint64_t start_idx; 23 | public: 24 | __host__ 25 | void print_stats() const { 26 | if (h_pData) 27 | h_pData->print_reset_stats(); 28 | } 29 | __host__ __device__ bafs_ptr(): 30 | h_pData(NULL), pData(NULL),start_idx(0){ 31 | } 32 | // __host__ __device__ bafs_ptr(array_d_t* const pValue): 33 | // h_pData(NULL), pData(pValue),start_idx(0){ 34 | // } 35 | 36 | __host__ __device__ bafs_ptr(array_d_t* const pValue, const uint64_t start_off): 37 | h_pData(NULL), pData(pValue),start_idx(start_off){ 38 | } 39 | 40 | __host__ __device__ bafs_ptr(array_t* const pValue): 41 | h_pData(pValue), pData(pValue->d_array_ptr),start_idx(0){ 42 | 43 | } 44 | 45 | __host__ __device__ bafs_ptr(array_t* const pValue, const uint64_t start_off): 46 | h_pData(pValue), pData(pValue->d_array_ptr),start_idx(start_off){ 47 | } 48 | 49 | __host__ __device__ ~bafs_ptr(){} 50 | 51 | __host__ __device__ bafs_ptr(const bafs_ptr &var){ 52 | h_pData = var.h_pData; 53 | pData = var.pData; 54 | start_idx = var.start_idx; 55 | } 56 | 57 | __device__ T operator*(){ 58 | return (*pData)[start_idx]; 59 | } 60 | 61 | __host__ __device__ bafs_ptr& operator=(const bafs_ptr& obj) { 62 | if(*this == obj) 63 | return *this; 64 | else{ 65 | this->h_pData = obj.h_pData; 66 | this->pData = obj.pData; 67 | this->start_idx = obj.start_idx; 68 | } 69 | return *this; 70 | } 71 | 72 | template 73 | friend __host__ __device__ bool operator==(const bafs_ptr& lhs, const bafs_ptr& rhs); 74 | 75 | // template 76 | // friend __host__ __device__ bool operator==(bafs_ptr* lhs, const bafs_ptr& rhs); 77 | 78 | __host__ __device__ void operator()(const uint64_t i, const T val) { 79 | (*pData)(i, val); 80 | } 81 | __host__ __device__ T operator[](const uint64_t i) { 82 | return (*pData)[start_idx+i]; 83 | } 84 | 85 | __host__ __device__ const T operator[](const uint64_t i) const { 86 | return (*pData)[start_idx+i]; 87 | } 88 | 89 | __host__ __device__ bafs_ptr operator+(const uint64_t i){ 90 | uint64_t new_start_idx = this->start_idx+i; 91 | return bafs_ptr(this->pData, new_start_idx); 92 | } 93 | __host__ __device__ bafs_ptr operator-(const uint64_t i){ 94 | uint64_t new_start_idx = this->start_idx-i; 95 | return bafs_ptr(this->pData, new_start_idx); 96 | } 97 | //posfix operator 98 | __host__ __device__ bafs_ptr operator++(int){ 99 | bafs_ptr cpy = *this; 100 | this->start_idx += 1; 101 | return cpy; 102 | } 103 | //prefix operator 104 | __host__ __device__ bafs_ptr& operator++(){ 105 | this->start_idx += 1; 106 | return *this; 107 | } 108 | 109 | //posfix operator 110 | __host__ __device__ bafs_ptr operator--(int){ 111 | bafs_ptr cpy = *this; 112 | this->start_idx -= 1; 113 | return cpy; 114 | } 115 | //prefix operator 116 | __host__ __device__ bafs_ptr& operator--(){ 117 | this->start_idx -= 1; 118 | return *this; 119 | } 120 | 121 | __host__ __device__ void memcpy_to_array_aligned(const uint64_t src_idx, const uint64_t count, T* dest) const { 122 | pData->memcpy(src_idx, count, dest); 123 | } 124 | }; 125 | 126 | 127 | 128 | template 129 | __host__ __device__ 130 | bool operator==(const bafs_ptr& lhs, const bafs_ptr& rhs){ 131 | return (lhs.pData == rhs.pData && lhs.start_idx == rhs.start_idx && lhs.h_pData == rhs.h_pData); 132 | } 133 | 134 | // template 135 | // __host__ __device__ 136 | // bool operator==(bafs_ptr* lhs, const bafs_ptr& rhs){ 137 | // return (lhs->pData == rhs.pData && lhs->start_idx == rhs.start_idx); 138 | // } 139 | 140 | 141 | //#ifndef __CUDACC__ 142 | //#undef __device__ 143 | //#undef __host__ 144 | //#undef __forceinline__ 145 | //#endif 146 | 147 | #endif //__BAFS_PTR_H__ 148 | -------------------------------------------------------------------------------- /include/event.h: -------------------------------------------------------------------------------- 1 | #ifndef __BENCHMARK_EVENT_H__ 2 | #define __BENCHMARK_EVENT_H__ 3 | // #ifndef __CUDACC__ 4 | // #define __device__ 5 | // #define __host__ 6 | // #endif 7 | 8 | #include "cuda.h" 9 | #include 10 | #include 11 | 12 | 13 | struct Event 14 | { 15 | cudaEvent_t event; 16 | 17 | inline Event(cudaStream_t stream = 0) 18 | { 19 | auto err = cudaEventCreateWithFlags(&event, cudaEventDefault); 20 | if (err != cudaSuccess) 21 | { 22 | throw std::runtime_error(std::string("Failed to create event: ") + cudaGetErrorString(err)); 23 | } 24 | 25 | err = cudaEventRecord(event, stream); 26 | if (err != cudaSuccess) 27 | { 28 | throw std::runtime_error(std::string("Failed to record event on stream: ") + cudaGetErrorString(err)); 29 | } 30 | 31 | } 32 | 33 | 34 | inline ~Event() 35 | { 36 | cudaEventDestroy(event); 37 | } 38 | 39 | 40 | inline double operator-(const Event& other) const 41 | { 42 | float msecs = 0; 43 | auto err = cudaEventElapsedTime(&msecs, other.event, event); 44 | if (err != cudaSuccess) 45 | { 46 | throw std::runtime_error(std::string("Could not calculate elapsed time: ") + cudaGetErrorString(err)); 47 | } 48 | 49 | return ((double) msecs) * 1e3; 50 | } 51 | }; 52 | 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /include/host_util.h: -------------------------------------------------------------------------------- 1 | #ifndef __HOST_UTIL_H_ 2 | #define __HOST_UTIL_H_ 3 | 4 | #ifndef __device__ 5 | #define __device__ 6 | #endif 7 | #ifndef __host__ 8 | #define __host__ 9 | #endif 10 | #ifndef __forceinline__ 11 | #define __forceinline__ inline 12 | #endif 13 | 14 | #include 15 | 16 | #ifndef __CUDACC__ 17 | 18 | template 19 | inline __host__ 20 | void __nanosleep(T ns) { 21 | struct timespec time1,time2; 22 | time1.tv_sec = 0; 23 | time2.tv_nsec = ns; 24 | nanosleep(&time1, &time2); 25 | } 26 | 27 | template 28 | inline __host__ 29 | T __activemask() { 30 | T var; 31 | (void) var; 32 | return (T)1; 33 | } 34 | 35 | template 36 | inline __host__ 37 | int __popc(T v) { 38 | if (sizeof(T) == 4) 39 | return __builtin_popcount((unsigned)v); 40 | if (sizeof(T) == 8) 41 | return __builtin_popcountll((unsigned long long)v); 42 | return 0; 43 | 44 | } 45 | 46 | template 47 | inline __host__ 48 | int __ffs(T v) { 49 | if (sizeof(T) == 4) 50 | return __builtin_ffs((int)v); 51 | if (sizeof(T) == 8) 52 | return __builtin_ffsll((long long)v); 53 | return 0; 54 | 55 | } 56 | 57 | template 58 | inline __host__ 59 | void __syncwarp(T mask) { 60 | (void) mask; 61 | return; 62 | } 63 | 64 | template 65 | inline __host__ 66 | T __shfl_sync(unsigned mask, T var, int srcLane, int width=32) { 67 | (void) mask; 68 | (void) srcLane; 69 | (void) width; 70 | return var; 71 | } 72 | 73 | 74 | template 75 | inline __host__ 76 | unsigned int __match_any_sync(unsigned mask, T var) { 77 | (void) mask; 78 | (void) var; 79 | return 1; 80 | } 81 | 82 | #endif 83 | 84 | //#ifndef __CUDACC__ 85 | //#undef __device__ 86 | //#undef __host__ 87 | //#undef __forceinline__ 88 | //#endif 89 | 90 | #endif // __HOST_UTIL_H_ 91 | -------------------------------------------------------------------------------- /include/nvm_admin.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_ADMIN_H__ 2 | #define __NVM_ADMIN_H__ 3 | // #ifndef __CUDACC__ 4 | // #define __device__ 5 | // #define __host__ 6 | // #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | 15 | /* 16 | * Get controller information. 17 | */ 18 | int nvm_admin_ctrl_info(nvm_aq_ref ref, // AQ pair reference 19 | struct nvm_ctrl_info* info, // Controller information structure 20 | void* buffer, // Temporary buffer (must be at least 4 KB) 21 | uint64_t ioaddr); // Bus address of buffer as seen by the controller 22 | 23 | 24 | 25 | /* 26 | * Get namespace information. 27 | */ 28 | int nvm_admin_ns_info(nvm_aq_ref ref, // AQ pair reference 29 | struct nvm_ns_info* info, // NVM namespace information 30 | uint32_t ns_id, // Namespace identifier 31 | void* buffer, // Temporary buffer (must be at least 4 KB) 32 | uint64_t ioaddr); // Bus address of buffer as seen by controller 33 | 34 | 35 | 36 | /* 37 | * Make controller allocate and reserve queues. 38 | */ 39 | int nvm_admin_set_num_queues(nvm_aq_ref ref, uint16_t n_cqs, uint16_t n_sqs); 40 | 41 | 42 | /* 43 | * Retrieve the number of allocated queues. 44 | */ 45 | int nvm_admin_get_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sqs); 46 | 47 | 48 | /* 49 | * Make controller allocate number of queues before issuing them. 50 | */ 51 | int nvm_admin_request_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sqs); 52 | 53 | 54 | /* 55 | * Create IO completion queue (CQ) 56 | * Caller must set queue memory to zero manually. 57 | * 58 | * If number of queue entries (qs) exceeds a page, 59 | * DMA memory must be contiguous. 60 | * 61 | * If qs is 0, the API will use one page for queue memory. 62 | */ 63 | int nvm_admin_cq_create(nvm_aq_ref ref, // AQ pair reference 64 | nvm_queue_t* cq, // CQ descriptor 65 | uint16_t id, // Queue identifier 66 | const nvm_dma_t* dma, // Queue memory handle 67 | size_t page_offset, // Number of pages to offset into the handle 68 | size_t qs, // Queue size/depth 69 | bool need_prp = false); // non-contiguous queue 70 | 71 | /* 72 | * Delete IO completion queue (CQ) 73 | * After calling this, the queue is no longer used and must be recreated. 74 | * All associated submission queues must be deleted first. 75 | */ 76 | int nvm_admin_cq_delete(nvm_aq_ref ref, nvm_queue_t* cq); 77 | 78 | 79 | 80 | /* 81 | * Create IO submission queue (SQ) 82 | * Caller must set queue memory to zero manually. 83 | * 84 | * If number of queue entries (qs) exceeds a page, 85 | * DMA memory must be contiguous. 86 | * 87 | * If qs is 0, the API will use one page for queue memory. 88 | */ 89 | int nvm_admin_sq_create(nvm_aq_ref ref, // AQ pair reference 90 | nvm_queue_t* sq, // SQ descriptor 91 | const nvm_queue_t* cq, // Descriptor to paired CQ 92 | uint16_t id, // Queue identifier 93 | const nvm_dma_t* dma, // Queue memory handle 94 | size_t page_offset, // Number of pages to offset into the handle 95 | size_t qs, // Number of pages to use 96 | bool need_prp = false); // non-contiguous queue 97 | 98 | 99 | 100 | /* 101 | * Delete IO submission queue (SQ) 102 | * After calling this, the queue is no longer used and must be recreated. 103 | */ 104 | int nvm_admin_sq_delete(nvm_aq_ref ref, 105 | nvm_queue_t* sq, 106 | const nvm_queue_t* cq); 107 | 108 | 109 | /* 110 | * Get log page. 111 | */ 112 | int nvm_admin_get_log_page(nvm_aq_ref ref, 113 | uint32_t ns_id, 114 | void* ptr, 115 | uint64_t ioaddr, 116 | uint8_t log_id, 117 | uint64_t log_offset); 118 | 119 | 120 | #endif /* #ifdef __NVM_ADMIN_H__ */ 121 | -------------------------------------------------------------------------------- /include/nvm_aq.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_AQ_H__ 2 | #define __NVM_AQ_H__ 3 | // #ifndef __CUDACC__ 4 | // #define __device__ 5 | // #define __host__ 6 | // #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | 15 | /* 16 | * Create admin queue pair 17 | * 18 | * Take exclusive ownership of an NVM controller. This function resets the 19 | * controller and configures NVM admin queues. 20 | * 21 | * Returns a reference handle that can be used for admin RPC calls. 22 | */ 23 | int nvm_aq_create(nvm_aq_ref* ref, 24 | const nvm_ctrl_t* ctrl, 25 | const nvm_dma_t* dma_window); 26 | 27 | 28 | /* 29 | * Destroy admin queues and references. 30 | * 31 | * Send NVM abort command to controller and deallocate admin queues. 32 | * 33 | * After calling this function, all admin queue references are invalid. 34 | * This also means that remote references will no longer be valid. 35 | * 36 | * This function will also work for unbinding remote references. 37 | */ 38 | void nvm_aq_destroy(nvm_aq_ref ref); 39 | 40 | 41 | 42 | //int nvm_tcp_rpc_enable(nvm_aq_ref ref, uint16_t port, nvm_rpc_cb_t filter, void* data); 43 | //int nvm_tcp_rpc_disable(nvm_aq_ref ref, uint16_t port); 44 | 45 | 46 | 47 | #ifdef __DIS_CLUSTER__ 48 | 49 | 50 | /* 51 | * Callback function invoked whenever a remote NVM admin command is received. 52 | * Should indicate whether or not a remote admin command is accepted and can 53 | * be enqueued by using the return value. 54 | * 55 | * The remote command can also be modified if necessary. 56 | */ 57 | typedef bool (*nvm_dis_rpc_cb_t)(nvm_cmd_t* cmd, uint32_t dis_adapter, uint32_t dis_node_id); 58 | 59 | 60 | 61 | /* 62 | * Enable remote admin commands. 63 | * Allows remote processes to relay NVM admin commands to the local process. 64 | */ 65 | int nvm_dis_rpc_enable(nvm_aq_ref ref, // NVM admin queue-pair reference 66 | uint32_t dis_adapter, // Local adapter to enable interrupt on 67 | nvm_dis_rpc_cb_t filter); // Filter callback (can be NULL) 68 | 69 | 70 | 71 | /* 72 | * Disable remote admin commands. 73 | * Stop processing admin commands from remote processes. 74 | */ 75 | void nvm_dis_rpc_disable(nvm_aq_ref ref, uint32_t dis_adapter); 76 | 77 | #endif /* __DIS_CLUSTER__ */ 78 | 79 | 80 | 81 | 82 | #endif /* #ifdef __NVM_AQ_H__ */ 83 | -------------------------------------------------------------------------------- /include/nvm_ctrl.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_CTRL_H__ 2 | #define __NVM_CTRL_H__ 3 | // #ifndef __CUDACC__ 4 | // #define __device__ 5 | // #define __host__ 6 | // #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef __DIS_CLUSTER__ 14 | #include 15 | #endif 16 | 17 | 18 | 19 | /* 20 | * Minimum size of mapped controller memory. 21 | */ 22 | #define NVM_CTRL_MEM_MINSIZE 0x2000 23 | 24 | 25 | 26 | #if defined (__unix__) 27 | /* 28 | * Initialize NVM controller handle. 29 | * 30 | * Read from controller registers and initialize controller handle. 31 | * This function should be used when using the kernel module or to manually 32 | * read from sysfs. 33 | * 34 | * Note: fd must be opened with O_RDWR and O_NONBLOCK 35 | */ 36 | int nvm_ctrl_init(nvm_ctrl_t** ctrl, int fd); 37 | #endif 38 | 39 | 40 | 41 | /* 42 | * Initialize NVM controller handle. 43 | * 44 | * Read from controller registers and initialize the controller handle using 45 | * a memory-mapped pointer to the PCI device BAR. 46 | * 47 | * This function should be used when neither SmartIO nor the disnvme kernel 48 | * module are used. 49 | * 50 | * Note: ctrl_mem must be at least NVM_CTRL_MEM_MINSIZE large and mapped 51 | * as IO memory. See arguments for mmap() for more info. 52 | */ 53 | int nvm_raw_ctrl_init(nvm_ctrl_t** ctrl, volatile void* mm_ptr, size_t mm_size); 54 | 55 | 56 | 57 | /* 58 | * Release controller handle. 59 | */ 60 | void nvm_ctrl_free(nvm_ctrl_t* ctrl); 61 | 62 | 63 | 64 | /* 65 | * Reset NVM controller. 66 | * 67 | * The queue memory must be memset to zero and be exactly one page size large. 68 | * IO addresses must align to the controller page size. 69 | * 70 | * Note: The controller must be unbound from any driver before attempting to 71 | * reset the controller. 72 | * 73 | * Note: This function is implicitly called by the controller manager, so it 74 | * should not be necessary to call it directly. 75 | */ 76 | int nvm_raw_ctrl_reset(const nvm_ctrl_t* ctrl, uint64_t acq_ioaddr, uint64_t asq_ioaddr); 77 | 78 | 79 | 80 | #ifdef __DIS_CLUSTER__ 81 | /* 82 | * Initialize NVM controller handle. 83 | * 84 | * Read from device registers and initialize controller handle. 85 | * This function should be used when SmartIO is being used. 86 | */ 87 | int nvm_dis_ctrl_init(nvm_ctrl_t** ctrl, uint32_t smartio_fdid); 88 | #endif 89 | 90 | 91 | 92 | #ifdef __DIS_CLUSTER__ 93 | int nvm_dis_ctrl_map_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev, uint64_t* ioaddr); 94 | #endif 95 | 96 | 97 | 98 | #ifdef __DIS_CLUSTER__ 99 | void nvm_dis_ctrl_unmap_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev); 100 | #endif 101 | 102 | 103 | #endif /* __NVM_CTRL_H__ */ 104 | -------------------------------------------------------------------------------- /include/nvm_dma.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_DMA_H__ 2 | #define __NVM_DMA_H__ 3 | // #ifndef __CUDACC__ 4 | // #define __device__ 5 | // #define __host__ 6 | // #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef __DIS_CLUSTER__ 14 | #include 15 | #endif 16 | 17 | 18 | 19 | /* 20 | * Create DMA mapping descriptor from physical/bus addresses. 21 | * 22 | * Create a DMA mapping descriptor, describing a region of memory that is 23 | * accessible for the NVM controller. The caller must supply physical/bus 24 | * addresses of physical memory pages, page size and total number of pages. 25 | * As the host's page size may differ from the controller's page size (MPS), 26 | * this function will calculate the necessary offsets into the actual memory 27 | * pages. 28 | * 29 | * While virtual memory is assumed to be continuous, the physical pages do not 30 | * need to be contiguous. Physical/bus addresses must be aligned to the 31 | * controller's page size. 32 | * 33 | * Note: vaddr can be NULL. 34 | */ 35 | int nvm_dma_map(nvm_dma_t** map, // Mapping descriptor reference 36 | const nvm_ctrl_t* ctrl, // NVM controller reference 37 | void* vaddr, // Pointer to userspace memory (can be NULL if not required) 38 | size_t page_size, // Physical page size 39 | size_t n_pages, // Number of pages to map 40 | const uint64_t* page_addrs); // List of physical/bus addresses to the pages 41 | 42 | 43 | 44 | /* 45 | * Create DMA mapping descriptor using offsets from a previously 46 | * created DMA descriptor. 47 | */ 48 | int nvm_dma_remap(nvm_dma_t** new_map, const nvm_dma_t* other_map); 49 | 50 | 51 | 52 | /* 53 | * Remove DMA mapping descriptor. 54 | * 55 | * Unmap DMA mappings (if necessary) and remove the descriptor. 56 | * This function destroys the descriptor. 57 | */ 58 | void nvm_dma_unmap(nvm_dma_t* map); 59 | 60 | 61 | 62 | /* 63 | * Create DMA mapping descriptor from virtual address using the kernel module. 64 | * This function is similar to nvm_dma_map, except the user is not required 65 | * to pass physical/bus addresses. 66 | * 67 | * Note: vaddr can not be NULL, and must be aligned to system page size. 68 | */ 69 | int nvm_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* vaddr, size_t size); 70 | 71 | 72 | 73 | //#if ( defined( __CUDA__ ) || defined( __CUDACC__ ) ) 74 | 75 | /* 76 | * Create DMA mapping descriptor from CUDA device pointer using the kernel 77 | * module. This function is similar to nvm_dma_map_host, except the memory 78 | * pointer must be a valid CUDA device pointer (see manual for 79 | * cudaGetPointerAttributes). 80 | * 81 | * The controller handle must have been created using the kernel module. 82 | * 83 | * Note: vaddr can not be NULL, and must be aligned to GPU page size. 84 | */ 85 | int nvm_dma_map_device(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* devptr, size_t size); 86 | 87 | //#endif /* __CUDA__ */ 88 | 89 | 90 | 91 | #if defined( __DIS_CLUSTER__ ) 92 | 93 | /* 94 | * Create DMA mapping descriptor from local SISCI segment. 95 | * 96 | * Create DMA mapping descriptor from a local segment handler, and 97 | * reverse-map the segment making it accessible from the controller. 98 | * As segment memory is always continuous and page-aligned, it is not 99 | * necessary to calculate physical memory addresses. However, the user 100 | * should ensure that the mapping size is aligned to a controller 101 | * page-size (MPS). 102 | * 103 | * The controller handle must have been created using SmartIO, and 104 | * the segment must already be prepared on the local adapter. 105 | */ 106 | int nvm_dis_dma_map_local(nvm_dma_t** map, // Mapping descriptor reference 107 | const nvm_ctrl_t* ctrl, // NVM controller handle 108 | uint32_t dis_adapter, // Local DIS adapter segment is prepared on 109 | sci_local_segment_t segment, // Local segment descriptor 110 | bool map_vaddr); // Should function also map segment into local space 111 | 112 | #endif /* __DIS_CLUSTER__ */ 113 | 114 | 115 | 116 | #if defined( __DIS_CLUSTER__ ) 117 | 118 | /* 119 | * Create DMA mapping descriptor from remote SISCI segment. 120 | * 121 | * Create DMA mapping descriptor from a remote segment handler, and 122 | * reverse-map the segment making it accessible from the controller. 123 | * This function is similar to nvm_dis_dma_map_local. 124 | * 125 | * The remote segment must already be connected. 126 | * 127 | * Note: You should generally prefer write combining, except 128 | * for mapped device registers that require fine-grained writes. 129 | */ 130 | int nvm_dis_dma_map_remote(nvm_dma_t** map, // Mapping descriptor reference 131 | const nvm_ctrl_t* ctrl, // NVM controller handle 132 | sci_remote_segment_t segment,// Remote segment descriptor 133 | bool map_vaddr, // Should function also map segment into local space 134 | bool map_wc); // Should function map with write combining 135 | 136 | #endif /* __DIS_CLUSTER__ */ 137 | 138 | 139 | 140 | #if ( !defined( __CUDA__ ) && !defined( __CUDACC__ ) ) && ( defined (__unix__) ) 141 | /* 142 | * Short-hand function for allocating a page aligned buffer and mapping it 143 | * for the controller. 144 | * 145 | * Note: this function will not work if you are using the CUDA API 146 | */ 147 | int nvm_dma_create(nvm_dma_t** map, const nvm_ctrl_t* ctrl, size_t size); 148 | #endif 149 | 150 | 151 | 152 | #if defined( __DIS_CLUSTER__ ) 153 | /* 154 | * Create device memory segment and map it for the controller. 155 | * Short-hand function for creating a device memory segment. 156 | * If mem_hints is 0, the API will create a local segment instead. 157 | */ 158 | int nvm_dis_dma_create(nvm_dma_t** map, const nvm_ctrl_t* ctrl, size_t size, unsigned int mem_hints); 159 | 160 | #endif /* __DIS_CLUSTER__ */ 161 | 162 | 163 | 164 | #if defined ( __DIS_CLUSTER__ ) 165 | 166 | /* 167 | * Note: This function requires the IOMMU to be enabled. 168 | */ 169 | int nvm_dis_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* vaddr, size_t size); 170 | 171 | #endif 172 | 173 | 174 | #if ( ( defined( __CUDA__ ) || defined( __CUDACC__ ) ) && defined( __DIS_CLUSTER__ ) ) 175 | 176 | int nvm_dis_dma_map_device(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* devptr, size_t size); 177 | 178 | #endif /* __DIS_CLUSTER__ && __CUDA__ */ 179 | 180 | 181 | 182 | 183 | #endif /* __NVM_DMA_H__ */ 184 | -------------------------------------------------------------------------------- /include/nvm_error.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_ERROR_H__ 2 | #define __NVM_ERROR_H__ 3 | 4 | // #ifndef __CUDACC__ 5 | // #define __device__ 6 | // #define __host__ 7 | // #endif 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | 15 | 16 | /* Get the status code type of an NVM completion. */ 17 | #define NVM_ERR_SCT(cpl) ((uint8_t) _RB(*NVM_CPL_STATUS(cpl), 11, 9)) 18 | 19 | 20 | 21 | /* Get the status code of an NVM completion */ 22 | #define NVM_ERR_SC(cpl) ((uint8_t) _RB(*NVM_CPL_STATUS(cpl), 8, 1)) 23 | 24 | 25 | 26 | /* Is do not retry flag set? */ 27 | #define NVM_ERR_DNR(cpl) (!!_RB(*NVM_CPL_STATUS(cpl), 15, 15)) 28 | 29 | 30 | 31 | /* Is there more? (Get log page) */ 32 | #define NVM_ERR_MORE(cpl) (!!_RB(*NVM_CPL_STATUS(cpl), 14, 14)) 33 | 34 | 35 | 36 | /* Extract value from status field from NVM completion */ 37 | #define NVM_ERR_STATUS(cpl) \ 38 | ((int) ( (cpl) != NULL ? -((NVM_ERR_SCT(cpl) << 8) | NVM_ERR_SC(cpl)) : 0 )) 39 | 40 | 41 | /* Convenience macro for checking if an NVM completion indicates success. */ 42 | #define NVM_ERR_OK(cpl) ( !NVM_ERR_SCT(cpl) && !NVM_ERR_SC(cpl) ) 43 | 44 | 45 | 46 | /* Pack errno and NVM completion status into a single status variable */ 47 | #define NVM_ERR_PACK(cpl, err) \ 48 | ((int) ( (err) != 0 ? (err) : NVM_ERR_STATUS(cpl) ) ) 49 | 50 | 51 | 52 | /* Extract values from packed status */ 53 | #define NVM_ERR_UNPACK_ERRNO(status) ((status > 0) ? (status) : 0) 54 | #define NVM_ERR_UNPACK_SCT(status) ((status < 0) ? (((-status) >> 8) & 0xff) : 0) 55 | #define NVM_ERR_UNPACK_SC(status) ((status < 0) ? ((-status) & 0xff) : 0) 56 | 57 | 58 | /* Check if everything is okay */ 59 | #define nvm_ok(status) ( !(status) ) 60 | 61 | 62 | 63 | /* 64 | * Get an error string associated with the status code type and status code. 65 | * This function calls strerror() if the packed status is a regular errno. 66 | */ 67 | const char* nvm_strerror(int status); 68 | 69 | 70 | 71 | 72 | #endif /* __NVM_ERROR_H__ */ 73 | -------------------------------------------------------------------------------- /include/nvm_io.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_IO_H__ 2 | #define __NVM_IO_H__ 3 | // #ifndef __CUDACC__ 4 | // #define __device__ 5 | // #define __host__ 6 | // #endif 7 | 8 | //#include "page_cache.h" 9 | 10 | 11 | 12 | 13 | #endif // __NVM_IO_H__ 14 | -------------------------------------------------------------------------------- /include/nvm_rpc.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_RPC_H__ 2 | #define __NVM_RPC_H__ 3 | 4 | // #ifndef __CUDACC__ 5 | // #define __device__ 6 | // #define __host__ 7 | // #endif 8 | 9 | #include 10 | #include 11 | 12 | 13 | //int nvm_tcp_rpc_bind(nvm_aq_ref* ref, const char* hostname, uint16_t port); 14 | 15 | 16 | 17 | #ifdef __DIS_CLUSTER__ 18 | 19 | /* 20 | * Bind admin queue-pair reference to remote handle. 21 | * The user should call the nvm_aq_destroy() to remove binding. 22 | */ 23 | int nvm_dis_rpc_bind(nvm_aq_ref* ref, const nvm_ctrl_t* ctrl, uint32_t adapter); 24 | 25 | #endif 26 | 27 | 28 | 29 | /* 30 | * Unbind admin queue-pair reference. 31 | * If reference is not bound (i.e., it is local), this function will do nothing. 32 | */ 33 | void nvm_rpc_unbind(nvm_aq_ref ref); 34 | 35 | 36 | 37 | /* 38 | * Relay NVM admin command. 39 | * 40 | * Use a local AQ pair reference to relay a NVM admin command to ASQ and get 41 | * a corresponding completion from the ACQ. This function will block until 42 | * either a timeout occurs or until the command is completed. 43 | * 44 | * Return value: 45 | * - If return value is zero, it indicates success. 46 | * - If return value is positive, it indicates an errno. 47 | * - If return value is negative, it indicates an NVM error. 48 | * 49 | * Use the error handling macros in nvm_error.h 50 | * 51 | * Note: The command can be modified. 52 | */ 53 | int nvm_raw_rpc(nvm_aq_ref ref, nvm_cmd_t* cmd, nvm_cpl_t* cpl); 54 | 55 | 56 | 57 | 58 | #endif /* #ifdef __NVM_RPC_H__ */ 59 | -------------------------------------------------------------------------------- /include/util.h: -------------------------------------------------------------------------------- 1 | #ifndef __UTIL_H__ 2 | #define __UTIL_H__ 3 | 4 | #ifndef __device__ 5 | #define __device__ 6 | #endif 7 | #ifndef __host__ 8 | #define __host__ 9 | #endif 10 | #ifndef __forceinline__ 11 | #define __forceinline__ inline 12 | #endif 13 | 14 | 15 | 16 | #include "cuda.h" 17 | #include "nvm_util.h" 18 | #include "host_util.h" 19 | //#include 20 | #include 21 | 22 | 23 | #define cuda_err_chk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 24 | 25 | #ifndef __CUDACC__ 26 | inline void gpuAssert(int code, const char *file, int line, bool abort=false) 27 | { 28 | if (code != 0) 29 | { 30 | fprintf(stderr,"Assert: %i %s %d\n", code, file, line); 31 | if (abort) exit(1); 32 | } 33 | } 34 | #else 35 | 36 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=false) 37 | { 38 | if (code != cudaSuccess) 39 | { 40 | fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 41 | if (abort) exit(1); 42 | } 43 | } 44 | #endif 45 | 46 | #define CEIL(X, Y, Z) ((X + Y - 1) >> Z) 47 | 48 | 49 | #ifndef HEXDUMP_COLS 50 | #define HEXDUMP_COLS 16 51 | #endif 52 | inline __device__ void hexdump(void *mem, unsigned int len) 53 | { 54 | unsigned int i; 55 | 56 | for(i = 0; i < len + ((len % HEXDUMP_COLS) ? (HEXDUMP_COLS - len % HEXDUMP_COLS) : 0); i++) 57 | { 58 | /* print offset */ 59 | if(i % HEXDUMP_COLS == 0) 60 | { 61 | printf("\n0x%06x: ", i); 62 | } 63 | 64 | /* print hex data */ 65 | if(i < len) 66 | { 67 | printf("%02x ", 0xFF & ((char*)mem)[i]); 68 | } 69 | else /* end of block, just aligning for ASCII dump */ 70 | { 71 | printf(" "); 72 | } 73 | 74 | /* print ASCII dump */ 75 | // if(i % HEXDUMP_COLS == (HEXDUMP_COLS - 1)) 76 | // { 77 | // for(j = i - (HEXDUMP_COLS - 1); j <= i; j++) 78 | // { 79 | // if(j >= len) /* end of block, not really printing */ 80 | // { 81 | // printf(' '); 82 | // } 83 | // else if(isprint(((char*)mem)[j])) /* printable char */ 84 | // { 85 | // printf(0xFF & ((char*)mem)[j]); 86 | // } 87 | // else /* other char */ 88 | // { 89 | // putchar('.'); 90 | // } 91 | // } 92 | // putchar('\n'); 93 | // } 94 | } 95 | printf("\n"); 96 | } 97 | 98 | template 99 | void __ignore(T &&) 100 | { } 101 | /*warp memcpy, assumes alignment at type T and num is a count in type T*/ 102 | template 103 | inline __device__ 104 | void warp_memcpy(T* dest, const T* src, size_t num) { 105 | #ifndef __CUDACC__ 106 | uint32_t mask = 1; 107 | #else 108 | uint32_t mask = __activemask(); 109 | #endif 110 | uint32_t active_cnt = __popc(mask); 111 | uint32_t lane = lane_id(); 112 | uint32_t prior_mask = mask >> (32 - lane); 113 | uint32_t prior_count = __popc(prior_mask); 114 | 115 | for(size_t i = prior_count; i < num; i+=active_cnt) 116 | dest[i] = src[i]; 117 | } 118 | 119 | //#ifndef __CUDACC__ 120 | //#undef __device__ 121 | //#undef __host__ 122 | //#undef __forceinline__ 123 | //#endif 124 | 125 | #endif // __UTIL_H__ 126 | -------------------------------------------------------------------------------- /module/Makefile.in: -------------------------------------------------------------------------------- 1 | 2 | ifneq ($(KERNELRELEASE),) 3 | src := @module_root@ 4 | obj-m := @CMAKE_PROJECT_NAME@.o 5 | @CMAKE_PROJECT_NAME@-objs := pci.o list.o ctrl.o map.o 6 | ccflags-y += @module_ccflags@ 7 | KBUILD_EXTRA_SYMBOLS := @module_symbols@ 8 | else 9 | 10 | .PHONY: default reload unload load clean install 11 | 12 | default: 13 | $(MAKE) -C @KERNEL@ M=@module_output@ modules 14 | 15 | clean: 16 | $(MAKE) -C @KERNEL@ M=@module_output@ clean 17 | 18 | reload: unload load 19 | 20 | unload: 21 | -rmmod @CMAKE_PROJECT_NAME@.ko 22 | 23 | load: 24 | insmod @CMAKE_PROJECT_NAME@.ko max_num_ctrls=64 25 | 26 | install: default 27 | $(MAKE) -C @KERNEL@ M=@module_output@ modules_install 28 | #$(MAKE) INSTALL_MOD_DIR=@CMAKE_PROJECT_NAME@ -C @KERNEL@ M=@module_output@ modules_install 29 | 30 | endif 31 | 32 | -------------------------------------------------------------------------------- /module/ctrl.c: -------------------------------------------------------------------------------- 1 | #include "ctrl.h" 2 | #include "list.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | 12 | struct ctrl* ctrl_get(struct list* list, struct class* cls, struct pci_dev* pdev, int number) 13 | { 14 | struct ctrl* ctrl = NULL; 15 | 16 | ctrl = kmalloc(sizeof(struct ctrl), GFP_KERNEL | GFP_NOWAIT); 17 | if (ctrl == NULL) 18 | { 19 | printk(KERN_CRIT "Failed to allocate controller reference\n"); 20 | return ERR_PTR(-ENOMEM); 21 | } 22 | 23 | list_node_init(&ctrl->list); 24 | 25 | ctrl->pdev = pdev; 26 | ctrl->number = number; 27 | ctrl->rdev = 0; 28 | ctrl->cls = cls; 29 | ctrl->chrdev = NULL; 30 | 31 | snprintf(ctrl->name, sizeof(ctrl->name), "%s%d", KBUILD_MODNAME, ctrl->number); 32 | ctrl->name[sizeof(ctrl->name) - 1] = '\0'; 33 | 34 | list_insert(list, &ctrl->list); 35 | 36 | return ctrl; 37 | } 38 | 39 | 40 | 41 | void ctrl_put(struct ctrl* ctrl) 42 | { 43 | if (ctrl != NULL) 44 | { 45 | list_remove(&ctrl->list); 46 | ctrl_chrdev_remove(ctrl); 47 | kfree(ctrl); 48 | } 49 | } 50 | 51 | 52 | 53 | struct ctrl* ctrl_find_by_pci_dev(const struct list* list, const struct pci_dev* pdev) 54 | { 55 | const struct list_node* element = list_next(&list->head); 56 | struct ctrl* ctrl; 57 | 58 | while (element != NULL) 59 | { 60 | ctrl = container_of(element, struct ctrl, list); 61 | 62 | if (ctrl->pdev == pdev) 63 | { 64 | return ctrl; 65 | } 66 | 67 | element = list_next(element); 68 | } 69 | 70 | return NULL; 71 | } 72 | 73 | 74 | 75 | struct ctrl* ctrl_find_by_inode(const struct list* list, const struct inode* inode) 76 | { 77 | const struct list_node* element = list_next(&list->head); 78 | struct ctrl* ctrl; 79 | 80 | while (element != NULL) 81 | { 82 | ctrl = container_of(element, struct ctrl, list); 83 | 84 | if (&ctrl->cdev == inode->i_cdev) 85 | { 86 | return ctrl; 87 | } 88 | 89 | element = list_next(element); 90 | } 91 | 92 | return NULL; 93 | } 94 | 95 | 96 | 97 | int ctrl_chrdev_create(struct ctrl* ctrl, dev_t first, const struct file_operations* fops) 98 | { 99 | int err; 100 | struct device* chrdev = NULL; 101 | 102 | if (ctrl->chrdev != NULL) 103 | { 104 | printk(KERN_WARNING "Character device is already created\n"); 105 | return 0; 106 | } 107 | 108 | ctrl->rdev = MKDEV(MAJOR(first), MINOR(first) + ctrl->number); 109 | 110 | cdev_init(&ctrl->cdev, fops); 111 | err = cdev_add(&ctrl->cdev, ctrl->rdev, 1); 112 | if (err != 0) 113 | { 114 | printk(KERN_ERR "Failed to add cdev\n"); 115 | return err; 116 | } 117 | 118 | chrdev = device_create(ctrl->cls, NULL, ctrl->rdev, NULL, ctrl->name); 119 | if (IS_ERR(chrdev)) 120 | { 121 | cdev_del(&ctrl->cdev); 122 | printk(KERN_ERR "Failed to create character device\n"); 123 | return PTR_ERR(chrdev); 124 | } 125 | 126 | ctrl->chrdev = chrdev; 127 | 128 | printk(KERN_INFO "Character device /dev/%s created (%d.%d)\n", 129 | ctrl->name, MAJOR(ctrl->rdev), MINOR(ctrl->rdev)); 130 | 131 | return 0; 132 | } 133 | 134 | 135 | 136 | void ctrl_chrdev_remove(struct ctrl* ctrl) 137 | { 138 | if (ctrl->chrdev != NULL) 139 | { 140 | device_destroy(ctrl->cls, ctrl->rdev); 141 | cdev_del(&ctrl->cdev); 142 | ctrl->chrdev = NULL; 143 | 144 | printk(KERN_DEBUG "Character device /dev/%s removed (%d.%d)\n", 145 | ctrl->name, MAJOR(ctrl->rdev), MINOR(ctrl->rdev)); 146 | } 147 | } 148 | 149 | -------------------------------------------------------------------------------- /module/ctrl.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_HELPER_CTRL_H__ 2 | #define __LIBNVM_HELPER_CTRL_H__ 3 | 4 | #include "list.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | /* 12 | * Represents an NVM controller. 13 | */ 14 | struct ctrl 15 | { 16 | struct list_node list; /* Linked list head */ 17 | struct pci_dev* pdev; /* Reference to physical PCI device */ 18 | char name[64]; /* Character device name */ 19 | int number; /* Controller number */ 20 | dev_t rdev; /* Character device register */ 21 | struct class* cls; /* Character device class */ 22 | struct cdev cdev; /* Character device */ 23 | struct device* chrdev; /* Character device handle */ 24 | }; 25 | 26 | 27 | 28 | /* 29 | * Acquire a controller reference. 30 | */ 31 | struct ctrl* ctrl_get(struct list* list, struct class* cls, struct pci_dev* pdev, int number); 32 | 33 | 34 | 35 | /* 36 | * Release controller reference. 37 | */ 38 | void ctrl_put(struct ctrl* ctrl); 39 | 40 | 41 | 42 | /* 43 | * Find controller device. 44 | */ 45 | struct ctrl* ctrl_find_by_pci_dev(const struct list* list, const struct pci_dev* pdev); 46 | 47 | 48 | 49 | /* 50 | * Find controller reference. 51 | */ 52 | struct ctrl* ctrl_find_by_inode(const struct list* list, const struct inode* inode); 53 | 54 | 55 | 56 | /* 57 | * Create character device and set up file operations. 58 | */ 59 | int ctrl_chrdev_create(struct ctrl* ctrl, 60 | dev_t first, 61 | const struct file_operations* fops); 62 | 63 | 64 | 65 | /* 66 | * Remove character device. 67 | */ 68 | void ctrl_chrdev_remove(struct ctrl* ctrl); 69 | 70 | 71 | 72 | #endif /* __LIBNVM_HELPER_CTRL_H__ */ 73 | -------------------------------------------------------------------------------- /module/list.c: -------------------------------------------------------------------------------- 1 | #include "list.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | 10 | void list_init(struct list* list) 11 | { 12 | list->head.list = list; 13 | list->head.prev = &list->head; 14 | list->head.next = &list->head; 15 | 16 | spin_lock_init(&list->lock); 17 | } 18 | 19 | 20 | 21 | void list_remove(struct list_node* element) 22 | { 23 | if (likely(element != NULL && element->list != NULL && element != &element->list->head)) 24 | { 25 | spin_lock(&element->list->lock); 26 | element->prev->next = element->next; 27 | element->next->prev = element->prev; 28 | spin_unlock(&element->list->lock); 29 | 30 | element->list = NULL; 31 | element->next = NULL; 32 | element->prev = NULL; 33 | } 34 | } 35 | 36 | 37 | 38 | void list_insert(struct list* list, struct list_node* element) 39 | { 40 | struct list_node* last = NULL; 41 | 42 | spin_lock(&list->lock); 43 | last = list->head.prev; 44 | last->next = element; 45 | 46 | element->list = list; 47 | element->prev = last; 48 | element->next = &list->head; 49 | 50 | list->head.prev = element; 51 | 52 | spin_unlock(&list->lock); 53 | } 54 | 55 | -------------------------------------------------------------------------------- /module/list.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_HELPER_LIST_H__ 2 | #define __LIBNVM_HELPER_LIST_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | /* Forward declaration */ 10 | struct list; 11 | 12 | 13 | /* 14 | * Doubly linked list element. 15 | */ 16 | struct list_node 17 | { 18 | struct list* list; /* Reference to list */ 19 | struct list_node* next; /* Pointer to next element in list */ 20 | struct list_node* prev; /* Pointer to previous element in list */ 21 | }; 22 | 23 | 24 | /* 25 | * Doubly linked list. 26 | * This implementation expects there always be an empty head. 27 | */ 28 | struct list 29 | { 30 | struct list_node head; /* Start of the list */ 31 | spinlock_t lock; /* Ensure exclusive access to list */ 32 | }; 33 | 34 | 35 | 36 | /* 37 | * Initialize element. 38 | */ 39 | static void __always_inline list_node_init(struct list_node* element) 40 | { 41 | element->list = NULL; 42 | element->next = NULL; 43 | element->prev = NULL; 44 | } 45 | 46 | 47 | 48 | /* 49 | * Get next element in list (if there are any) 50 | */ 51 | #define list_next(current) \ 52 | ( ((current)->next != &(current)->list->head) ? (current)->next : NULL ) 53 | 54 | 55 | 56 | /* 57 | * Initialize list. 58 | */ 59 | void list_init(struct list* list); 60 | 61 | 62 | 63 | /* 64 | * Insert element into list. 65 | */ 66 | void list_insert(struct list* list, struct list_node* element); 67 | 68 | 69 | 70 | /* 71 | * Remove element from list. 72 | */ 73 | void list_remove(struct list_node* element); 74 | 75 | 76 | 77 | #endif /* __LIBNVM_HELPER_LIST_H__ */ 78 | -------------------------------------------------------------------------------- /module/map.h: -------------------------------------------------------------------------------- 1 | #ifndef __LIBNVM_HELPER_MAP_H__ 2 | #define __LIBNVM_HELPER_MAP_H__ 3 | 4 | #include "list.h" 5 | #include 6 | #include 7 | 8 | 9 | /* Forward declaration */ 10 | struct ctrl; 11 | struct map; 12 | 13 | 14 | typedef void (*release)(struct map*); 15 | 16 | 17 | /* 18 | * Describes a range of mapped memory. 19 | */ 20 | struct map 21 | { 22 | struct list_node list; /* Linked list header */ 23 | struct task_struct* owner; /* Owner of mapping */ 24 | u64 vaddr; /* Starting virtual address */ 25 | struct list* ctrl_list; 26 | struct pci_dev* pdev; /* Reference to physical PCI device */ 27 | unsigned long page_size; /* Logical page size */ 28 | void* data; /* Custom data */ 29 | release release; /* Custom callback for unmapping and releasing memory */ 30 | unsigned long n_addrs; /* Number of mapped pages */ 31 | uint64_t addrs[1]; /* Bus addresses */ 32 | }; 33 | 34 | 35 | 36 | /* 37 | * Lock and map userspace pages for DMA. 38 | */ 39 | struct map* map_userspace(struct list* list, const struct ctrl* ctrl, u64 vaddr, unsigned long n_pages); 40 | 41 | 42 | 43 | /* 44 | * Unmap and release memory. 45 | */ 46 | void unmap_and_release(struct map* map); 47 | 48 | 49 | 50 | #ifdef _CUDA 51 | /* 52 | * Lock and map GPU device memory. 53 | */ 54 | struct map* map_device_memory(struct list* list, const struct ctrl* ctrl, u64 vaddr, unsigned long n_pages, struct list* ctrl_list); 55 | #endif 56 | 57 | 58 | 59 | /* 60 | * Find memory mapping from vaddr and current task 61 | */ 62 | struct map* map_find(const struct list* list, u64 vaddr); 63 | 64 | 65 | #endif /* __LIBNVM_HELPER_MAP_H__ */ 66 | -------------------------------------------------------------------------------- /scripts/bfs_run_emogi_nvme_frontier.sh: -------------------------------------------------------------------------------- 1 | 2 | #echo "===============================================" 3 | #echo "Running NVME GAP-urand with GPU 8 and Page Size 4096" 4 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 1 -p 4096 --gpu 8 --threads 64 5 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 2 -p 4096 --gpu 8 --threads 64 6 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 3 -p 4096 --gpu 8 --threads 64 7 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 4 -p 4096 --gpu 8 --threads 64 8 | 9 | echo "===============================================" 10 | echo "Running NVME uk-2007-05 with GPU 8 and Page Size 4096" 11 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 1 -p 4096 --gpu 8 --threads 64 12 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 2 -p 4096 --gpu 8 --threads 64 13 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 3 -p 4096 --gpu 8 --threads 64 14 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 9 --memalloc 6 --repeat 32 --n_ctrls 4 -p 4096 --gpu 8 --threads 64 15 | 16 | 17 | #echo "===============================================" 18 | #echo "Running EMOGI GAP-urand with GPU 0" 19 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 7 --memalloc 2 --repeat 32 --n_ctrls 1 -p 4096 --gpu 0 --threads 64 20 | 21 | echo "===============================================" 22 | echo "Running EMOGI uk-2007-05 with GPU0" 23 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 7 --memalloc 2 --repeat 32 --n_ctrls 1 -p 4096 --gpu 0 --threads 64 24 | -------------------------------------------------------------------------------- /scripts/bfs_run_nvme_scaling.sh: -------------------------------------------------------------------------------- 1 | echo "===============================================" 2 | echo "Running GAP-urand with GPU 0 and Page Size 8192" 3 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 1 -p 8192 --gpu 0 --threads 64 4 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 2 -p 8192 --gpu 0 --threads 64 5 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 3 -p 8192 --gpu 0 --threads 64 6 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 4 -p 8192 --gpu 0 --threads 64 7 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 5 -p 8192 --gpu 0 --threads 64 8 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 6 -p 8192 --gpu 0 --threads 64 9 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 7 -p 8192 --gpu 0 --threads 64 10 | 11 | 12 | echo "===============================================" 13 | echo "Running GAP-urand with GPU 5 and Page Size 8192" 14 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 1 -p 8192 --gpu 5 --threads 64 15 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 2 -p 8192 --gpu 5 --threads 64 16 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 3 -p 8192 --gpu 5 --threads 64 17 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 4 -p 8192 --gpu 5 --threads 64 18 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 5 -p 8192 --gpu 5 --threads 64 19 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 6 -p 8192 --gpu 5 --threads 64 20 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel -l $((1024*1024*1024*64)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 7 -p 8192 --gpu 5 --threads 64 21 | 22 | 23 | echo "===============================================" 24 | echo "Running uk-2007-05 with GPU 0 and Page Size 8192" 25 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 1 -p 8192 --gpu 0 --threads 64 26 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 2 -p 8192 --gpu 0 --threads 64 27 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 3 -p 8192 --gpu 0 --threads 64 28 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 4 -p 8192 --gpu 0 --threads 64 29 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 5 -p 8192 --gpu 0 --threads 64 30 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 6 -p 8192 --gpu 0 --threads 64 31 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 7 -p 8192 --gpu 0 --threads 64 32 | 33 | 34 | echo "===============================================" 35 | echo "Running uk-2007-05 with GPU 5 and Page Size 8192" 36 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 1 -p 8192 --gpu 5 --threads 64 37 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 2 -p 8192 --gpu 5 --threads 64 38 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 3 -p 8192 --gpu 5 --threads 64 39 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 4 -p 8192 --gpu 5 --threads 64 40 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 5 -p 8192 --gpu 5 --threads 64 41 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 6 -p 8192 --gpu 5 --threads 64 42 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel -l $((1024*1024*1024*320)) --impl_type 3 --memalloc 6 --repeat 32 --n_ctrls 7 -p 8192 --gpu 5 --threads 64 43 | -------------------------------------------------------------------------------- /scripts/extrach.sh: -------------------------------------------------------------------------------- 1 | #set -x 2 | 3 | if [ $# -ne 2 ] 4 | then 5 | echo Usage $0 logfile numssd && exit 1 6 | fi 7 | 8 | logfile=$1 9 | CTRL=$2 10 | 11 | NUMDATASET=6 12 | declare -a GraphFileName=( 13 | "GAP-kron.bel" 14 | "GAP-urand.bel" 15 | "com-Friendster.bel" 16 | "MOLIERE_2016.bel" 17 | "uk-2007-05.bel" 18 | "sk-2005.bel" 19 | "Dummy" 20 | ) 21 | 22 | IMPLSIZE=2 23 | declare -a ImplType=( 24 | "3" 25 | "4" 26 | "5" 27 | #"8" 28 | #"9" 29 | ) 30 | 31 | NUMPAGESIZE=3 32 | declare -a PageSize=( 33 | "512" 34 | "4096" 35 | "8192" 36 | ) 37 | 38 | TYPE=Accesses 39 | for((gid=0;gid0) printf "%.2f\n",sum/n}' 52 | done 53 | done 54 | done 55 | done 56 | -------------------------------------------------------------------------------- /scripts/fw_user_routing: -------------------------------------------------------------------------------- 1 | FFFF0800,0814EEEE 2 | FFFF0814,0800EEEE 3 | FFFF0800,0815EEEE 4 | FFFF0815,0800EEEE 5 | FFFF0800,08100510,050c020c,0214EEEE 6 | FFFF0214,020c050c,05100810,0800EEEE 7 | FFFF0800,08100510,050c020c,0215EEEE 8 | FFFF0215,020c050c,05100810,0800EEEE 9 | FFFF0800,080c0400,0414000c,0014EEEE 10 | FFFF0014,000c0414,0400080c,0800EEEE 11 | FFFF0800,080c0400,0414000c,0015EEEE 12 | FFFF0015,000c0414,0400080c,0800EEEE 13 | FFFF0800,08080304,030c060c,0604EEEE 14 | FFFF0604,060c030c,03040808,0800EEEE 15 | FFFF0800,08080304,030c060c,0605EEEE 16 | FFFF0605,060c030c,03040808,0800EEEE 17 | FFFF0800,080c0400,0414000c,0004EEEE 18 | FFFF0004,000c0414,0400080c,0800EEEE 19 | FFFF0800,08100510,050c020c,0200EEEE 20 | FFFF0200,020c050c,05100810,0800EEEE 21 | 22 | #old 23 | FFFF0100,01100310,03080008,0014EEEE 24 | FFFF0014,00080308,03100110,0100EEEE 25 | -------------------------------------------------------------------------------- /scripts/identify_hba.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for a in `lspci -D -d 1000:c010 | cut -d" " -f1` 3 | 4 | do 5 | if [ $(lspci -vv -s $a | egrep -i "Upstream | 00-80-5e" | wc -l) == 2 ]; then 6 | if 7 | [ $(lspci -vv -s $a | egrep -i "Power budget" | wc -l) == 1 ]; then 8 | echo "Falcon_HBA_BUS#" $a 9 | fi 10 | fi 11 | done 12 | 13 | 14 | -------------------------------------------------------------------------------- /scripts/run_bfs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | if [ $# -ne 3 ] 5 | then 6 | echo Usage $0 numssd gpuid tbsize && exit 1 7 | fi 8 | 9 | 10 | #Initialize set of files are taken from EMOGI and graphBIG. 11 | 12 | NUMDATASET=6 13 | declare -a GraphFileArray=( 14 | "/home/vmailthody/data/GAP-kron.bel" 15 | "/home/vmailthody/data/GAP-urand.bel" 16 | "/home/vmailthody/data/com-Friendster.bel" 17 | "/home/vmailthody/data/MOLIERE_2016.bel" 18 | "/home/vmailthody/data/uk-2007-05.bel" 19 | "/home/vmailthody/data/sk-2005.bel" 20 | ) 21 | declare -a GraphFileOffset=( 22 | "$((1024*1024*1024*0))" 23 | "$((1024*1024*1024*64))" 24 | "$((1024*1024*1024*160))" 25 | "$((1024*1024*1024*224))" 26 | "$((1024*1024*1024*320))" 27 | "$((1024*1024*1024*384))" 28 | ) 29 | 30 | 31 | declare -a GraphRootNode=( 32 | "58720242" 33 | "58720256" 34 | "28703654" 35 | "13229860" 36 | "46329738" 37 | "37977096" 38 | ) 39 | 40 | 41 | 42 | 43 | CTRL=$1 44 | MEMTYPE=6 #BAFS_DIRECT 45 | GPU=$2 46 | TB=128 47 | 48 | for ((gfid=0; gfid /sys/bus/pci/devices/$a/driver/unbind; 5 | done 6 | -------------------------------------------------------------------------------- /scripts/write_emogi_graph_nvme.sh: -------------------------------------------------------------------------------- 1 | #make benchmarks -j 2 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-kron.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*0)) -o 1 3 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-kron.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*32)) -o 1 4 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-urand.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*64)) -o 1 5 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-urand.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*128)) -o 1 6 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/com-Friendster.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*160)) -o 1 7 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/com-Friendster.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*192)) -o 1 8 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/MOLIERE_2016.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*224)) -o 1 9 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/MOLIERE_2016.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*288)) -o 1 10 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/uk-2007-05.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*320)) -o 1 11 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/uk-2007-05.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*352)) -o 1 12 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/sk-2005.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*384)) -o 1 13 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/sk-2005.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16 --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*416)) -o 1 14 | 15 | -------------------------------------------------------------------------------- /src/dis/device.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_DIS_DEVICE_H__ 2 | #define __NVM_INTERNAL_DIS_DEVICE_H__ 3 | #ifdef _SISCI 4 | 5 | /* Make sure everything is defined as needed */ 6 | #ifndef __DIS_CLUSTER__ 7 | #define __DIS_CLUSTER__ 8 | #endif 9 | 10 | /* Necessary includes */ 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "mutex.h" 16 | 17 | 18 | 19 | /* 20 | * Device descriptor. 21 | * 22 | * Holds a reference to a "borrowed" SISCI SmartIO device. 23 | */ 24 | struct device 25 | { 26 | uint32_t fdid; // SISCI SmartIO device identifier (fabric device identifier) 27 | sci_desc_t sd; // SISCI virtual device descriptor 28 | struct mutex lock; // Ensure exclusive access to device 29 | uint32_t counter; // Segment identifier counter 30 | sci_smartio_device_t device; // SmartIO device handle 31 | sci_remote_segment_t segment; // Reference to PCI bar 0 32 | size_t size; // Size of BAR0 33 | volatile void* ptr; // Mapped pointer 34 | sci_map_t md; // SISCI mapping descriptor 35 | }; 36 | 37 | 38 | 39 | /* 40 | * Connect to SmartIO device memory data segment. 41 | */ 42 | int _nvm_device_memory_get(sci_remote_segment_t* segment, 43 | const struct device* dev, 44 | uint32_t id, 45 | unsigned int memtype); 46 | 47 | 48 | 49 | /* 50 | * Disconnect from SmartIO device memory segment. 51 | */ 52 | void _nvm_device_memory_put(sci_remote_segment_t* segment); 53 | 54 | 55 | 56 | /* 57 | * Create local segment. 58 | * If ptr is not NULL, create empty segment and register segment memory. 59 | * If ptr is not NULL and gpu_mem is set, attach it CUDA device memory 60 | */ 61 | int _nvm_local_memory_get(sci_local_segment_t* segment, 62 | uint32_t* adapter, 63 | const struct device* dev, 64 | size_t size, 65 | void* ptr, 66 | bool gpu_mem); 67 | 68 | 69 | 70 | /* 71 | * Remove local segment. 72 | */ 73 | void _nvm_local_memory_put(sci_local_segment_t* segment); 74 | 75 | 76 | 77 | #endif /* _SISCI */ 78 | #endif /* __NVM_INTERNAL_DIS_DEVICE_H__ */ 79 | -------------------------------------------------------------------------------- /src/dis/interrupt.c: -------------------------------------------------------------------------------- 1 | #ifndef _SISCI 2 | #error "Must compile with SISCI support" 3 | #endif 4 | 5 | #ifndef __DIS_CLUSTER__ 6 | #define __DIS_CLUSTER__ 7 | #endif 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "dis/interrupt.h" 14 | #include "dprintf.h" 15 | #include 16 | #include 17 | #include 18 | 19 | 20 | 21 | /* 22 | * Do some sanity checking and then call supplied callback. 23 | */ 24 | static sci_callback_action_t interrupt_callback(struct local_intr* interrupt, 25 | sci_local_data_interrupt_t intr, 26 | void* data, 27 | uint32_t length, 28 | sci_error_t status) 29 | { 30 | #ifndef NDEBUG 31 | if (status != SCI_ERR_OK) 32 | { 33 | dprintf("Unexpected status in interrupt handler routine: %s\n", _SCIGetErrorString(status)); 34 | return SCI_CALLBACK_CANCEL; 35 | } 36 | 37 | if (intr != interrupt->intr) 38 | { 39 | dprintf("Possible memory corruption\n"); 40 | return SCI_CALLBACK_CANCEL; 41 | } 42 | #endif 43 | 44 | interrupt->callback(interrupt->data, data, length); 45 | 46 | return SCI_CALLBACK_CONTINUE; 47 | } 48 | 49 | 50 | 51 | int _nvm_local_intr_get(struct local_intr* intr, uint32_t adapter, void* cb_data, intr_callback_t cb) 52 | { 53 | sci_error_t err = SCI_ERR_OK; 54 | 55 | // Get local node identifier 56 | SCIGetLocalNodeId(adapter, &intr->node_id, 0, &err); 57 | #ifndef NDEBUG 58 | if (err != SCI_ERR_OK) 59 | { 60 | dprintf("Unexpected error: %s\n", _SCIGetErrorString(err)); 61 | return EIO; 62 | } 63 | #endif 64 | 65 | // Open SISCI descriptor 66 | SCIOpen(&intr->sd, 0, &err); 67 | #ifndef NDEBUG 68 | if (err != SCI_ERR_OK) 69 | { 70 | dprintf("Failed to open SISCI virtual device: %s\n", _SCIGetErrorString(err)); 71 | return EIO; 72 | } 73 | #endif 74 | 75 | intr->adapter = adapter; 76 | intr->data = cb_data; 77 | intr->callback = cb; 78 | 79 | uint32_t flags = 0; 80 | void* data = NULL; 81 | sci_cb_data_interrupt_t callback = NULL; 82 | 83 | // Callback was supplied, set up parameters 84 | if (cb != NULL) 85 | { 86 | data = (void*) intr; 87 | callback = (sci_cb_data_interrupt_t) interrupt_callback; 88 | flags |= SCI_FLAG_USE_CALLBACK; 89 | } 90 | 91 | // Create data interrupt 92 | SCICreateDataInterrupt(intr->sd, &intr->intr, adapter, &intr->intr_no, callback, data, flags, &err); 93 | if (err != SCI_ERR_OK) 94 | { 95 | dprintf("Failed to create data interrupt: %s\n", _SCIGetErrorString(err)); 96 | SCIClose(intr->sd, 0, &err); 97 | return ENOSPC; 98 | } 99 | 100 | return 0; 101 | } 102 | 103 | 104 | 105 | void _nvm_local_intr_put(struct local_intr* intr) 106 | { 107 | sci_error_t err = SCI_ERR_OK; 108 | 109 | do 110 | { 111 | SCIRemoveDataInterrupt(intr->intr, 0, &err); 112 | } 113 | while (err == SCI_ERR_BUSY); 114 | 115 | SCIClose(intr->sd, 0, &err); 116 | } 117 | 118 | 119 | 120 | int _nvm_local_intr_wait(struct local_intr* intr, void* data, uint16_t maxlen, uint32_t timeout) 121 | { 122 | sci_error_t err = SCI_ERR_OK; 123 | uint32_t len = maxlen; 124 | 125 | SCIWaitForDataInterrupt(intr->intr, data, &len, timeout, 0, &err); 126 | 127 | switch (err) 128 | { 129 | case SCI_ERR_OK: 130 | return 0; 131 | 132 | case SCI_ERR_TIMEOUT: 133 | return ETIMEDOUT; 134 | 135 | default: 136 | dprintf("Waiting for data interrupt unexpectedly failed: %s\n", _SCIGetErrorString(err)); 137 | return EIO; 138 | } 139 | } 140 | 141 | 142 | 143 | int _nvm_remote_intr_get(struct remote_intr* intr, uint32_t adapter, uint32_t node, uint32_t no) 144 | { 145 | sci_error_t err = SCI_ERR_OK; 146 | 147 | SCIOpen(&intr->sd, 0, &err); 148 | #ifndef NDEBUG 149 | if (err != SCI_ERR_OK) 150 | { 151 | dprintf("Failed to open SISCI virtual device: %s\n", _SCIGetErrorString(err)); 152 | return EIO; 153 | } 154 | #endif 155 | 156 | SCIConnectDataInterrupt(intr->sd, &intr->intr, node, adapter, no, SCI_INFINITE_TIMEOUT, 0, &err); 157 | if (err != SCI_ERR_OK) 158 | { 159 | SCIClose(intr->sd, 0, &err); 160 | return ECONNREFUSED; 161 | } 162 | 163 | return 0; 164 | } 165 | 166 | 167 | 168 | void _nvm_remote_intr_put(struct remote_intr* intr) 169 | { 170 | sci_error_t err = SCI_ERR_OK; 171 | SCIDisconnectDataInterrupt(intr->intr, 0, &err); 172 | SCIClose(intr->sd, 0, &err); 173 | } 174 | 175 | 176 | 177 | /* 178 | * Trigger remote interrupt with data. 179 | */ 180 | int _nvm_remote_intr_trigger(const struct remote_intr* intr, void* data, uint16_t length) 181 | { 182 | sci_error_t err = SCI_ERR_OK; 183 | 184 | SCITriggerDataInterrupt(intr->intr, data, length, 0, &err); 185 | if (err != SCI_ERR_OK) 186 | { 187 | dprintf("Failed to trigger data interrupt\n"); 188 | return ENOTCONN; 189 | } 190 | 191 | return 0; 192 | } 193 | 194 | 195 | 196 | /* 197 | * Convenience function for easy remote interrupt triggering. 198 | */ 199 | int _nvm_remote_intr_fire_and_forget(uint32_t adapter, uint32_t node, uint32_t no, void* data, uint16_t len) 200 | { 201 | int status = 0; 202 | struct remote_intr intr; 203 | 204 | status = _nvm_remote_intr_get(&intr, adapter, node, no); 205 | if (status != 0) 206 | { 207 | return status; 208 | } 209 | 210 | status = _nvm_remote_intr_trigger(&intr, data, len); 211 | _nvm_remote_intr_put(&intr); 212 | return status; 213 | } 214 | 215 | -------------------------------------------------------------------------------- /src/dis/interrupt.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_DIS_INTERRUPT_H__ 2 | #define __NVM_INTERNAL_DIS_INTERRUPT_H__ 3 | #ifdef _SISCI 4 | 5 | /* Make sure everything is defined as needed */ 6 | #ifndef __DIS_CLUSTER__ 7 | #define __DIS_CLUSTER__ 8 | #endif 9 | 10 | /* Necessary includes */ 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | 18 | /* Forward declarations */ 19 | struct local_intr; 20 | struct remote_intr; 21 | 22 | 23 | 24 | /* 25 | * Interrupt callback. 26 | */ 27 | typedef void (*intr_callback_t)(void* user_data, void* recv_data, uint16_t length); 28 | 29 | 30 | 31 | /* 32 | * Local interrupt descriptor. 33 | * Data must be free'd manually. 34 | */ 35 | struct local_intr 36 | { 37 | sci_desc_t sd; // SISCI virtual device descriptor 38 | sci_local_data_interrupt_t intr; // SISCI data interrupt handle 39 | uint32_t adapter; // DIS adapter 40 | uint32_t intr_no; // Interrupt number 41 | uint32_t node_id; // DIS node identifier 42 | void* data; // User data 43 | intr_callback_t callback; // Interrupt callback 44 | }; 45 | 46 | 47 | 48 | /* 49 | * Remote interrupt descriptor. 50 | */ 51 | struct remote_intr 52 | { 53 | sci_desc_t sd; // SISCI virtual device descriptor 54 | sci_remote_data_interrupt_t intr; // SISCI data interrupt reference 55 | }; 56 | 57 | 58 | 59 | /* 60 | * Create a local data interrupt. 61 | */ 62 | int _nvm_local_intr_get(struct local_intr* intr, 63 | uint32_t adapter, 64 | void* cb_data, 65 | intr_callback_t cb_func); 66 | 67 | 68 | 69 | /* 70 | * Remove a local data interrupt. 71 | */ 72 | void _nvm_local_intr_put(struct local_intr* intr); 73 | 74 | 75 | 76 | /* 77 | * Block for a duration while waiting for an interrupt and removes interrupt afterwards. 78 | * Returns success if length of received data matches expected length. 79 | */ 80 | int _nvm_local_intr_wait(struct local_intr* intr, void* data, uint16_t maxlen, uint32_t timeout); 81 | 82 | 83 | 84 | /* 85 | * Connect to remote interrupt. 86 | */ 87 | int _nvm_remote_intr_get(struct remote_intr* intr, uint32_t adapter, uint32_t node_id, uint32_t intr_no); 88 | 89 | 90 | 91 | /* 92 | * Disconnect from remote interrupt. 93 | */ 94 | void _nvm_remote_intr_put(struct remote_intr* intr); 95 | 96 | 97 | 98 | /* 99 | * Trigger remote interrupt with data. 100 | */ 101 | int _nvm_remote_intr_trigger(const struct remote_intr* intr, void* data, uint16_t len); 102 | 103 | 104 | 105 | /* 106 | * Connect to remote interrupt, send data, and disconnect. 107 | */ 108 | int _nvm_remote_intr_fire_and_forget(uint32_t adapter, 109 | uint32_t node_id, 110 | uint32_t intr_no, 111 | void* data, 112 | uint16_t len); 113 | 114 | #endif /* _SISCI */ 115 | #endif /* __NVM_INTERNAL_DIS_INTERRUPT_H__ */ 116 | -------------------------------------------------------------------------------- /src/dis/map.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_DIS_MAP_H__ 2 | #define __NVM_INTERNAL_DIS_MAP_H__ 3 | #ifdef _SISCI 4 | 5 | /* Make sure everything is defined as needed */ 6 | #ifndef __DIS_CLUSTER__ 7 | #define __DIS_CLUSTER__ 8 | #endif 9 | 10 | /* Necessary includes */ 11 | #include 12 | #include "dma.h" 13 | #include 14 | 15 | 16 | /* 17 | * Virtual address space mapping. 18 | */ 19 | struct va_map 20 | { 21 | bool mapped; // Is segment mapped into virtual address space? 22 | sci_map_t md; // SISCI mapping descriptor 23 | }; 24 | 25 | 26 | 27 | /* 28 | * Local segment descriptor. 29 | * map.range.remote = false 30 | */ 31 | struct local_segment 32 | { 33 | // XXX: ctrl reference can be replaced with a new sci_desc_t 34 | struct controller* ctrl; // Controller reference 35 | uint32_t adapter; // DIS adapter number 36 | sci_local_segment_t segment; // Local segment reference 37 | bool remove; // Requires remove 38 | struct va_map map; // Mapping descriptor 39 | struct va_range range; // Memory range descriptor 40 | }; 41 | 42 | 43 | 44 | /* 45 | * Remote segment descriptor. 46 | * map.range.remote = true 47 | */ 48 | struct remote_segment 49 | { 50 | // XXX: ctrl reference is only necessary for device segments 51 | struct controller* ctrl; // Controller reference 52 | sci_remote_segment_t segment; // Remote segment reference 53 | bool disconnect; // Requires a disconnect 54 | struct va_map map; // Mapping descriptor 55 | struct va_range range; // Memory range descriptor 56 | }; 57 | 58 | 59 | #endif /* _SISCI */ 60 | #endif /* __NVM_INTERNAL_DIS_MAP_H__ */ 61 | -------------------------------------------------------------------------------- /src/dma.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_DMA_H__ 2 | #define __NVM_INTERNAL_DMA_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | /* Forward declaration */ 11 | struct va_range; 12 | 13 | 14 | 15 | /* 16 | * Callback type for freeing an address range descriptor. 17 | * Called after the range is unmapped for the device and virtual address mapping can 18 | * be released. 19 | */ 20 | typedef void (*va_range_free_t)(struct va_range* va); 21 | 22 | 23 | 24 | /* 25 | * Virtual address range descriptor. 26 | * This structure describes a custom address range mapped in userspace. 27 | */ 28 | struct va_range 29 | { 30 | bool remote; // Indicates if this is remote memory 31 | volatile void* vaddr; // Virtual address of mapped address range 32 | size_t page_size; // Alignment of mapping (page size) 33 | size_t n_pages; // Number of pages for address range 34 | }; 35 | 36 | 37 | #define VA_RANGE_INIT(remote, vaddr, page_size, n_pages) \ 38 | (struct va_range) {(remote), (vaddr), (page_size), (n_pages)} 39 | 40 | 41 | /* 42 | * Map address range for a controller and create and initialize a DMA handle. 43 | */ 44 | int _nvm_dma_init(nvm_dma_t** handle, 45 | const nvm_ctrl_t* ctrl, 46 | struct va_range* va, 47 | va_range_free_t release); 48 | 49 | 50 | 51 | /* 52 | * Get the internal virtual address range from a handle. 53 | */ 54 | const struct va_range* _nvm_dma_va(const nvm_dma_t* handle); 55 | 56 | 57 | #endif /* __NVM_INTERNAL_DMA_H__ */ 58 | -------------------------------------------------------------------------------- /src/dprintf.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_DPRINTF_H__ 2 | #define __NVM_INTERNAL_DPRINTF_H__ 3 | 4 | #ifndef NDEBUG 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | /* Debug printf */ 12 | static void _nvm_dprintf(const char* func, const char* format, ...) 13 | { 14 | va_list args; 15 | va_start(args, format); 16 | fprintf(stderr, "[%s] ", func); 17 | vfprintf(stderr, format, args); 18 | va_end(args); 19 | } 20 | 21 | #define dprintf(...) _nvm_dprintf(__func__, __VA_ARGS__) 22 | 23 | #define _nvm_strerror(status) nvm_strerror(status) 24 | #define _SCIGetErrorString(err) SCIGetErrorString(err) 25 | 26 | #endif /* ! NDEBUG */ 27 | 28 | 29 | 30 | /* If no debug print, just swallow message */ 31 | #ifndef dprintf 32 | #define dprintf(...) 33 | #endif 34 | 35 | 36 | 37 | /* If no debug print, don't lookup completions */ 38 | #ifndef _nvm_strerror 39 | #define _nvm_strerror(status) 40 | #define _SCIGetErrorString(err) 41 | #endif 42 | 43 | 44 | #endif /* __NVM_INTERNAL_DPRINTF_H__ */ 45 | -------------------------------------------------------------------------------- /src/error.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | static const char* generic_status[] = 11 | { 12 | "Success", 13 | "Invalid command opcode", 14 | "Invalid field in command", 15 | "Command ID conflict", 16 | "Data transfer error", 17 | "Commands aborted due to power loss notification", 18 | "Internal error", 19 | "Command abort requested", 20 | "Command aborted due to SQ deletion", 21 | "Command aborted due to failed fused command", 22 | "Command aborted due to missing fused command", 23 | "Invalid namespace or format", 24 | "Command sequence error", 25 | "Invalid SGL segment descriptor", 26 | "Invalid number of SQL descriptors", 27 | "Data SGL length invalid", 28 | "Metadata SGL length invalid", 29 | "SGL descriptor type invalid", 30 | "Invalid use of controller memory buffer", 31 | "PRP offset invalid", 32 | "Atomic write unit exceeded", 33 | "Operation denied", 34 | "SGL offset invalid", 35 | "Unknown/reserved", 36 | "Host identifier inconsistent format", 37 | "Keep alive timer expired", 38 | "Keep alive timer invalid", 39 | "Command aborted due to preempt and abort", 40 | "Sanitize failed", 41 | "Sanitize in progress", 42 | "SGL data block granularity invalid", 43 | "Command not supported for queue in CMB" 44 | }; 45 | 46 | 47 | 48 | static const char* generic_status_nvm_commands[] = 49 | { 50 | "LBA out of range", 51 | "Capacity exceeded", 52 | "Namespace not ready", 53 | "Reservation conflict", 54 | "Format in progress" 55 | }; 56 | 57 | 58 | 59 | static const char* command_specific_status[] = 60 | { 61 | "Completion queue invalid", 62 | "Invalid queue identifier", 63 | "Invalid queue size", 64 | "Abort command limit exceeded", 65 | "Unknown/reserved", 66 | "Asynchronous event request limit exceeded", 67 | "Invalid firmware slot", 68 | "Invalid firmware image", 69 | "Invalid interrupt vector", 70 | "Invalid log page", 71 | "Invalid format", 72 | "Firmware activation requires conventional reset", 73 | "Invalid queue deletion", 74 | "Feature identifier not saveable", 75 | "Feature not changeable", 76 | "Feature not namespace specific", 77 | "Firmware activation requires NVM subsystem reset", 78 | "Firmware activation requires reset", 79 | "Firmware activation requires maximum time violation", 80 | "Firmware activation prohibited", 81 | "Overlapping range", 82 | "Namespace insufficient capacity", 83 | "Namespace identifier unavailable", 84 | "Unknown/reserved", 85 | "Namespace already attached", 86 | "Namespace is private", 87 | "Namespace not attached", 88 | "Thin provisioning not supported", 89 | "Controller list invalid", 90 | "Device self-test in progress", 91 | "Boot partition write prohibited", 92 | "Invalid controller identifier", 93 | "Invalid secondary controller state", 94 | "Invalid number of controller resources", 95 | "Invalid resource identifier" 96 | }; 97 | 98 | 99 | 100 | static const char* command_specific_status_nvm_commands[] = 101 | { 102 | "Conflicting attributes", 103 | "Invalid protection information", 104 | "Attempted write to read only range" 105 | }; 106 | 107 | 108 | 109 | static const char* media_and_data_integrity_nvm_commands[] = 110 | { 111 | "Write fault", 112 | "Unrecovered read error", 113 | "End-to-end guard check error", 114 | "End-to-end application tag check error", 115 | "End-to-end reference tag check error", 116 | "Compare failure", 117 | "Access denied", 118 | "Deallocated or unwritten logical block" 119 | }; 120 | 121 | 122 | 123 | static const char* lookup_string(uint8_t status_code_type, uint8_t status_code) 124 | { 125 | switch (status_code_type) 126 | { 127 | case 0x00: // Generic command status 128 | if (status_code < 0x20) 129 | { 130 | return generic_status[status_code]; 131 | } 132 | else if (0x80 <= status_code && status_code <= 0x84) 133 | { 134 | return generic_status_nvm_commands[status_code - 0x80]; 135 | } 136 | return "Unknown generic error"; 137 | 138 | case 0x01: // Command specific status 139 | if (status_code < 0x23) 140 | { 141 | return command_specific_status[status_code]; 142 | } 143 | else if (0x80 <= status_code && status_code <= 0x82) 144 | { 145 | return command_specific_status_nvm_commands[status_code - 0x80]; 146 | } 147 | return "Unknown command specific error"; 148 | 149 | case 0x02: // Media and data integrity errors 150 | if (0x80 <= status_code && status_code <= 0x87) 151 | { 152 | return media_and_data_integrity_nvm_commands[status_code - 0x80]; 153 | } 154 | return "Unknown media or data integrity error"; 155 | 156 | default: 157 | return "Unknown status code type"; 158 | } 159 | } 160 | 161 | 162 | 163 | const char* nvm_strerror(int status) 164 | { 165 | int err; 166 | uint8_t sct; 167 | uint8_t sc; 168 | 169 | err = NVM_ERR_UNPACK_ERRNO(status); 170 | sct = NVM_ERR_UNPACK_SCT(status); 171 | sc = NVM_ERR_UNPACK_SC(status); 172 | 173 | if (sct != 0 || sc != 0) 174 | { 175 | printf("sct: %x\tsc: %x\n", sct, sc); 176 | return lookup_string(sct, sc); 177 | 178 | } 179 | 180 | return strerror(err); 181 | } 182 | 183 | -------------------------------------------------------------------------------- /src/lib_ctrl.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_CTRL_H__ 2 | #define __NVM_INTERNAL_CTRL_H__ 3 | 4 | #include 5 | #include "mutex.h" 6 | #include "lib_util.h" 7 | 8 | 9 | /* 10 | * Device handle. 11 | */ 12 | struct device; 13 | 14 | 15 | 16 | /* 17 | * Forward declaration of a virtual memory address range. 18 | */ 19 | struct va_range; 20 | 21 | 22 | 23 | /* 24 | * Device reference operations. 25 | */ 26 | struct device_ops 27 | { 28 | /* 29 | * Release device reference (called when refcount is 0) 30 | * This should also unmap MLBAR/BAR0 of the device. 31 | */ 32 | void (*release_device)(struct device* dev, volatile void* mm_ptr, size_t mm_size); 33 | 34 | 35 | /* 36 | * Map an address range for the device. 37 | */ 38 | int (*map_range)(const struct device* dev, const struct va_range* va, uint64_t* ioaddrs); 39 | 40 | 41 | /* 42 | * Unmap an address range for the device. 43 | */ 44 | void (*unmap_range)(const struct device* dev, const struct va_range* va); 45 | }; 46 | 47 | 48 | 49 | /* 50 | * Controller device type. 51 | * Indicates how the controller handle was initialized. 52 | */ 53 | enum device_type 54 | { 55 | DEVICE_TYPE_UNKNOWN = 0x00, /* Device is mapped manually by the user */ 56 | DEVICE_TYPE_IOCTL = 0x01, /* Device is mapped through UNIX file descriptor */ 57 | DEVICE_TYPE_SMARTIO = 0x02, /* Device is mapped by SISCI SmartIO API */ 58 | }; 59 | 60 | 61 | 62 | /* 63 | * Internal controller handle. 64 | * Used to reference count the device handle. 65 | * Reference counting is handled by the get and put functions. 66 | */ 67 | struct controller 68 | { 69 | struct mutex lock; /* Ensure exclusive access */ 70 | uint32_t count; /* Reference count */ 71 | enum device_type type; /* Controller device type */ 72 | struct device* device; /* Device handle */ 73 | struct device_ops ops; /* Device operations */ 74 | nvm_ctrl_t handle; /* User's handle */ 75 | }; 76 | 77 | 78 | /* 79 | * Helper function to initialize the controller handle by reading 80 | * the appropriate registers from the controller BAR. 81 | */ 82 | int _nvm_ctrl_init(nvm_ctrl_t** handle, /* User's handle */ 83 | struct device* dev, /* Device handle */ 84 | const struct device_ops* ops, /* Device handle operations */ 85 | enum device_type type, /* Device type */ 86 | volatile void* mm_ptr, /* Memory-mapped pointer */ 87 | size_t mm_size); /* Size of memory-map */ 88 | 89 | 90 | 91 | /* 92 | * Increase controller reference count. 93 | */ 94 | struct controller* _nvm_ctrl_get(const nvm_ctrl_t* handle); 95 | 96 | 97 | 98 | /* 99 | * Decrease controller reference count. 100 | */ 101 | void _nvm_ctrl_put(struct controller* ctrl); 102 | 103 | 104 | 105 | /* 106 | * Convenience macro to get the controller type. 107 | */ 108 | #define _nvm_ctrl_type(ctrl) _nvm_container_of(ctrl, struct controller, handle)->type 109 | 110 | #endif /* __NVM_INTERNAL_CTRL_H__ */ 111 | -------------------------------------------------------------------------------- /src/lib_util.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_UTIL_H__ 2 | #define __NVM_INTERNAL_UTIL_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #if defined( __unix__ ) 9 | #include 10 | #include 11 | #endif 12 | 13 | #ifndef NDEBUG 14 | #include 15 | #include 16 | #include "dprintf.h" 17 | #endif 18 | 19 | 20 | /* Get the containing struct */ 21 | #if defined( __clang__ ) || defined( __GNUC__ ) 22 | #define _nvm_container_of(ptr, type, member) ({ \ 23 | const typeof( ((type *) 0)->member )* __mptr = (ptr); \ 24 | (type *) (((unsigned char*) __mptr) - offsetof(type, member)); }) 25 | #else 26 | #define _nvm_container_of(ptr, type, member) \ 27 | ((type *) (((unsigned char*) (ptr)) - ((unsigned char*) (&((type *) 0)->member)))) 28 | #endif 29 | 30 | 31 | /* Get minimum of two values */ 32 | #define _MIN(a, b) ( (a) <= (b) ? (a) : (b) ) 33 | 34 | 35 | /* Get the maximum of two values */ 36 | #define _MAX(a, b) ( (a) > (b) ? (a) : (b) ) 37 | 38 | 39 | 40 | /* Calculate the base-2 logarithm of a number n */ 41 | static inline uint32_t _nvm_b2log(uint32_t n) 42 | { 43 | uint32_t count = 0; 44 | 45 | while (n > 0) 46 | { 47 | ++count; 48 | n >>= 1; 49 | } 50 | 51 | return count - 1; 52 | } 53 | 54 | 55 | #if defined( __unix__ ) 56 | /* Delay the minimum of one millisecond and a time remainder */ 57 | static inline uint64_t _nvm_delay_remain(uint64_t remaining_nanoseconds) 58 | { 59 | struct timespec ts; 60 | 61 | if (remaining_nanoseconds == 0) 62 | { 63 | return 0; 64 | } 65 | 66 | ts.tv_sec = 0; 67 | ts.tv_nsec = _MIN(1000000UL, remaining_nanoseconds); 68 | 69 | clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL); 70 | 71 | remaining_nanoseconds -= _MIN(1000000UL, remaining_nanoseconds); 72 | return remaining_nanoseconds; 73 | } 74 | #endif 75 | 76 | 77 | #if defined( __unix__ ) 78 | /* Get the system page size */ 79 | static inline size_t _nvm_host_page_size() 80 | { 81 | long page_size = sysconf(_SC_PAGESIZE); 82 | 83 | #ifndef NDEBUG 84 | if (page_size < 0) 85 | { 86 | dprintf("Failed to look up system page size: %s\n", strerror(errno)); 87 | return 0; 88 | } 89 | #endif 90 | 91 | return page_size; 92 | } 93 | #else 94 | #define _nvm_host_page_size() 0x1000 95 | #endif 96 | 97 | 98 | #endif /* __NVM_INTERNAL_UTIL_H__ */ 99 | -------------------------------------------------------------------------------- /src/linux/device.cpp: -------------------------------------------------------------------------------- 1 | #ifndef __linux__ 2 | #error "Must compile for Linux" 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "linux/map.h" 20 | #include "linux/ioctl.h" 21 | #include "lib_ctrl.h" 22 | #include "dprintf.h" 23 | 24 | 25 | 26 | /* 27 | * Device descriptor 28 | */ 29 | struct device 30 | { 31 | int fd; /* ioctl file descriptor */ 32 | }; 33 | 34 | 35 | 36 | /* 37 | * Unmap controller memory and close file descriptor. 38 | */ 39 | static void release_device(struct device* dev, volatile void* mm_ptr, size_t mm_size) 40 | { 41 | munmap((void*) mm_ptr, mm_size); 42 | close(dev->fd); 43 | free(dev); 44 | } 45 | 46 | 47 | 48 | /* 49 | * Call kernel module ioctl and map memory for DMA. 50 | */ 51 | static int ioctl_map(const struct device* dev, const struct va_range* va, uint64_t* ioaddrs) 52 | { 53 | const struct ioctl_mapping* m = _nvm_container_of(va, struct ioctl_mapping, range); 54 | enum nvm_ioctl_type type; 55 | 56 | switch (m->type) 57 | { 58 | case MAP_TYPE_API: 59 | case MAP_TYPE_HOST: 60 | type = NVM_MAP_HOST_MEMORY; 61 | break; 62 | 63 | #ifdef _CUDA 64 | case MAP_TYPE_CUDA: 65 | type = NVM_MAP_DEVICE_MEMORY; 66 | break; 67 | #endif 68 | default: 69 | dprintf("Unknown memory type in map for device"); 70 | return EINVAL; 71 | } 72 | 73 | struct nvm_ioctl_map request = { 74 | .vaddr_start = (uintptr_t) m->buffer, 75 | .n_pages = va->n_pages, 76 | .ioaddrs = ioaddrs 77 | }; 78 | 79 | int err = ioctl(dev->fd, type, &request); 80 | if (err < 0) 81 | { 82 | dprintf("Page mapping kernel request failed (ptr=%p, n_pages=%zu): %s\n", 83 | m->buffer, va->n_pages, strerror(errno)); 84 | return errno; 85 | } 86 | 87 | return 0; 88 | } 89 | 90 | 91 | 92 | /* 93 | * Call kernel module ioctl and unmap memory. 94 | */ 95 | static void ioctl_unmap(const struct device* dev, const struct va_range* va) 96 | { 97 | const struct ioctl_mapping* m = _nvm_container_of(va, struct ioctl_mapping, range); 98 | uint64_t addr = (uintptr_t) m->buffer; 99 | 100 | 101 | int err = ioctl(dev->fd, NVM_UNMAP_MEMORY, &addr); 102 | if (err < 0) 103 | { 104 | dprintf("Page unmapping kernel request failed: %s\n", strerror(errno)); 105 | } 106 | } 107 | 108 | 109 | 110 | int nvm_ctrl_init(nvm_ctrl_t** ctrl, int filedes) 111 | { 112 | int err; 113 | struct device* dev; 114 | const struct device_ops ops = { 115 | .release_device = &release_device, 116 | .map_range = &ioctl_map, 117 | .unmap_range = &ioctl_unmap, 118 | }; 119 | 120 | *ctrl = NULL; 121 | dev = (struct device*) malloc(sizeof(struct device)); 122 | if (dev == NULL) 123 | { 124 | dprintf("Failed to allocate device handle: %s\n", strerror(errno)); 125 | return ENOMEM; 126 | } 127 | 128 | dev->fd = dup(filedes); 129 | if (dev->fd < 0) 130 | { 131 | free(dev); 132 | dprintf("Could not duplicate file descriptor: %s\n", strerror(errno)); 133 | return errno; 134 | } 135 | 136 | err = fcntl(dev->fd, F_SETFD, O_RDWR); 137 | if (err == -1) 138 | { 139 | close(dev->fd); 140 | free(dev); 141 | dprintf("Failed to set file descriptor control: %s\n", strerror(errno)); 142 | return errno; 143 | } 144 | 145 | const size_t mm_size = NVM_CTRL_MEM_MINSIZE; 146 | void* mm_ptr = mmap(NULL, mm_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE|MAP_LOCKED, dev->fd, 0); 147 | if (mm_ptr == NULL) 148 | { 149 | close(dev->fd); 150 | free(dev); 151 | dprintf("Failed to map device memory: %s\n", strerror(errno)); 152 | return errno; 153 | } 154 | 155 | err = _nvm_ctrl_init(ctrl, dev, &ops, DEVICE_TYPE_IOCTL, mm_ptr, mm_size); 156 | if (err != 0) 157 | { 158 | release_device(dev, mm_ptr, mm_size); 159 | return err; 160 | } 161 | 162 | return 0; 163 | } 164 | 165 | -------------------------------------------------------------------------------- /src/linux/dma.cpp: -------------------------------------------------------------------------------- 1 | #ifndef __linux__ 2 | #error "Must compile for Linux" 3 | #endif 4 | 5 | #ifdef _CUDA 6 | #ifndef __CUDA__ 7 | #define __CUDA__ 8 | #endif 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "lib_util.h" 21 | #include "lib_ctrl.h" 22 | #include "dma.h" 23 | #include "linux/map.h" 24 | #include "dprintf.h" 25 | 26 | 27 | 28 | static void remove_mapping_descriptor(struct ioctl_mapping* md) 29 | { 30 | if (md->type == MAP_TYPE_API) 31 | { 32 | free((void*) md->buffer); 33 | } 34 | 35 | free(md); 36 | } 37 | 38 | 39 | 40 | static void release_mapping_descriptor(struct va_range* va) 41 | { 42 | remove_mapping_descriptor(_nvm_container_of(va, struct ioctl_mapping, range)); 43 | } 44 | 45 | 46 | 47 | static int create_mapping_descriptor(struct ioctl_mapping** handle, size_t page_size, enum mapping_type type, void* buffer, size_t size) 48 | { 49 | size_t n_pages = NVM_PAGE_ALIGN(size, page_size) / page_size; 50 | if (n_pages == 0) 51 | { 52 | return EINVAL; 53 | } 54 | 55 | struct ioctl_mapping* md = (struct ioctl_mapping*) malloc(sizeof(struct ioctl_mapping)); 56 | if (md == NULL) 57 | { 58 | dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno)); 59 | return errno; 60 | } 61 | 62 | md->type = type; 63 | md->buffer = buffer; 64 | md->range.remote = false; 65 | md->range.vaddr = (volatile void*) buffer; 66 | md->range.page_size = page_size; 67 | md->range.n_pages = n_pages; 68 | 69 | *handle = md; 70 | return 0; 71 | } 72 | 73 | 74 | 75 | int nvm_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, size_t size) 76 | { 77 | void* buffer; 78 | struct ioctl_mapping* md; 79 | 80 | size = NVM_CTRL_ALIGN(ctrl, size); 81 | if (size == 0) 82 | { 83 | return EINVAL; 84 | } 85 | 86 | *handle = NULL; 87 | if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL) 88 | { 89 | return EBADF; 90 | } 91 | 92 | int err = posix_memalign(&buffer, ctrl->page_size, size); 93 | if (err != 0) 94 | { 95 | dprintf("Failed to allocate page-aligned memory buffer: %s\n", strerror(err)); 96 | return err; 97 | } 98 | 99 | err = create_mapping_descriptor(&md, ctrl->page_size, MAP_TYPE_API, buffer, size); 100 | if (err != 0) 101 | { 102 | free(buffer); 103 | return err; 104 | } 105 | 106 | err = _nvm_dma_init(handle, ctrl, &md->range, &release_mapping_descriptor); 107 | if (err != 0) 108 | { 109 | remove_mapping_descriptor(md); 110 | return err; 111 | } 112 | 113 | return 0; 114 | } 115 | 116 | 117 | 118 | int nvm_dma_map_host(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* vaddr, size_t size) 119 | { 120 | struct ioctl_mapping* md; 121 | *handle = NULL; 122 | 123 | size = NVM_CTRL_ALIGN(ctrl, size); 124 | if (size == 0) 125 | { 126 | return EINVAL; 127 | } 128 | 129 | if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL) 130 | { 131 | return EBADF; 132 | } 133 | 134 | int err = create_mapping_descriptor(&md, ctrl->page_size, MAP_TYPE_HOST, vaddr, size); 135 | if (err != 0) 136 | { 137 | return err; 138 | } 139 | 140 | err = _nvm_dma_init(handle, ctrl, &md->range, &release_mapping_descriptor); 141 | if (err != 0) 142 | { 143 | remove_mapping_descriptor(md); 144 | return err; 145 | } 146 | 147 | return 0; 148 | } 149 | 150 | 151 | 152 | #ifdef _CUDA 153 | int nvm_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* devptr, size_t size) 154 | { 155 | struct ioctl_mapping* md; 156 | *handle = NULL; 157 | 158 | if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL) 159 | { 160 | return EBADF; 161 | } 162 | 163 | int err = create_mapping_descriptor(&md, 1ULL << 16, MAP_TYPE_CUDA, devptr, size); 164 | if (err != 0) 165 | { 166 | return err; 167 | } 168 | 169 | err = _nvm_dma_init(handle, ctrl, &md->range, &release_mapping_descriptor); 170 | if (err != 0) 171 | { 172 | remove_mapping_descriptor(md); 173 | return err; 174 | } 175 | 176 | return 0; 177 | } 178 | #endif 179 | -------------------------------------------------------------------------------- /src/linux/ioctl.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_LINUX_IOCTL_H__ 2 | #define __NVM_INTERNAL_LINUX_IOCTL_H__ 3 | #ifdef __linux__ 4 | 5 | #include 6 | #include 7 | 8 | #define NVM_IOCTL_TYPE 0x80 9 | 10 | 11 | 12 | /* Memory map request */ 13 | struct nvm_ioctl_map 14 | { 15 | uint64_t vaddr_start; 16 | size_t n_pages; 17 | uint64_t* ioaddrs; 18 | }; 19 | 20 | 21 | 22 | /* Supported operations */ 23 | enum nvm_ioctl_type 24 | { 25 | NVM_MAP_HOST_MEMORY = _IOW(NVM_IOCTL_TYPE, 1, struct nvm_ioctl_map), 26 | #ifdef _CUDA 27 | NVM_MAP_DEVICE_MEMORY = _IOW(NVM_IOCTL_TYPE, 2, struct nvm_ioctl_map), 28 | #endif 29 | NVM_UNMAP_MEMORY = _IOW(NVM_IOCTL_TYPE, 3, uint64_t) 30 | }; 31 | 32 | 33 | #endif /* __linux__ */ 34 | #endif /* __NVM_INTERNAL_LINUX_IOCTL_H__ */ 35 | -------------------------------------------------------------------------------- /src/linux/map.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_LINUX_MAP_H__ 2 | #define __NVM_INTERNAL_LINUX_MAP_H__ 3 | #ifdef __linux__ 4 | 5 | #include "linux/ioctl.h" 6 | #include "dma.h" 7 | 8 | 9 | /* 10 | * What kind of memory are we mapping. 11 | */ 12 | enum mapping_type 13 | { 14 | MAP_TYPE_CUDA = 0x1, // CUDA device memory 15 | MAP_TYPE_HOST = 0x2, // Host memory (RAM) 16 | MAP_TYPE_API = 0x4 // Allocated by the API (RAM) 17 | }; 18 | 19 | 20 | 21 | /* 22 | * Mapping container 23 | */ 24 | struct ioctl_mapping 25 | { 26 | enum mapping_type type; // What kind of memory 27 | void* buffer; 28 | struct va_range range; // Memory range descriptor 29 | }; 30 | 31 | 32 | #endif /* __linux__ */ 33 | #endif /* __NVM_INTERNAL_LINUX_MAP_H__ */ 34 | -------------------------------------------------------------------------------- /src/mutex.cpp: -------------------------------------------------------------------------------- 1 | #ifdef __unix__ 2 | #include 3 | #include 4 | #endif 5 | 6 | #include "mutex.h" 7 | #include "dprintf.h" 8 | 9 | 10 | 11 | #ifdef __unix__ 12 | int _nvm_mutex_init(struct mutex* mtx) 13 | { 14 | int err; 15 | 16 | err = pthread_mutex_init(&mtx->mutex, NULL); 17 | if (err != 0) 18 | { 19 | dprintf("Failed to initialize mutex: %s\n", strerror(err)); 20 | return err; 21 | } 22 | 23 | return 0; 24 | } 25 | #endif 26 | 27 | 28 | 29 | #ifdef __unix__ 30 | int _nvm_mutex_free(struct mutex* mtx) 31 | { 32 | return pthread_mutex_destroy(&mtx->mutex); 33 | } 34 | #endif 35 | 36 | 37 | 38 | #ifdef __unix__ 39 | int _nvm_mutex_lock(struct mutex* mtx) 40 | { 41 | pthread_mutex_lock(&mtx->mutex); 42 | return 0; 43 | } 44 | #endif 45 | 46 | 47 | 48 | #ifdef __unix__ 49 | void _nvm_mutex_unlock(struct mutex* mtx) 50 | { 51 | pthread_mutex_unlock(&mtx->mutex); 52 | } 53 | #endif 54 | 55 | -------------------------------------------------------------------------------- /src/mutex.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_MUTEX_H__ 2 | #define __NVM_INTERNAL_MUTEX_H__ 3 | 4 | /* Forward declaration */ 5 | struct mutex; 6 | 7 | 8 | /* 9 | * We currently only support OSes that have pthreads 10 | */ 11 | #if defined( __unix__ ) 12 | #include 13 | #else 14 | #error "OS is not supported" 15 | #endif 16 | 17 | 18 | 19 | /* 20 | * We don't want another level of indirection by 21 | * hiding implementation and using pointers, so 22 | * we expose the struct definition here. 23 | */ 24 | #if defined( __unix__ ) 25 | struct mutex 26 | { 27 | pthread_mutex_t mutex; 28 | }; 29 | #endif 30 | 31 | 32 | 33 | /* 34 | * Initialize mutex handle. 35 | */ 36 | int _nvm_mutex_init(struct mutex* mtx); 37 | 38 | 39 | 40 | /* 41 | * Destroy mutex handle. 42 | */ 43 | int _nvm_mutex_free(struct mutex* mtx); 44 | 45 | 46 | 47 | /* 48 | * Enter critical section. 49 | */ 50 | int _nvm_mutex_lock(struct mutex* mtx); 51 | 52 | 53 | 54 | /* 55 | * Leave critical section. 56 | */ 57 | void _nvm_mutex_unlock(struct mutex* mtx); 58 | 59 | 60 | 61 | #endif /* __NVM_INTERNAL_MUTEX_H__ */ 62 | -------------------------------------------------------------------------------- /src/queue.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "regs.h" 8 | #include "lib_util.h" 9 | #include 10 | 11 | 12 | int nvm_queue_clear(nvm_queue_t* queue, const nvm_ctrl_t* ctrl, bool cq, uint16_t no, uint32_t qs, 13 | bool local, volatile void* vaddr, uint64_t ioaddr) 14 | { 15 | if (qs < 2 || qs > 0x10000 || qs > ctrl->max_qs) 16 | { 17 | return EINVAL; 18 | } 19 | 20 | queue->no = no; 21 | queue->qs = qs; 22 | queue->es = cq ? sizeof(nvm_cpl_t) : sizeof(nvm_cmd_t); 23 | queue->head = 0; 24 | queue->tail = 0; 25 | queue->last = 0; 26 | queue->phase = 1; 27 | queue->local = !!local; 28 | queue->head_lock = 0; 29 | queue->tail_lock = 0; 30 | // queue->head_copy = 0; 31 | // queue->tail_copy = 0; 32 | queue->in_ticket = 0; 33 | queue->cid_ticket = 0; 34 | 35 | queue->db = (cq ? CQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd) : SQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd)); 36 | queue->vaddr = vaddr; 37 | queue->ioaddr = ioaddr; 38 | 39 | return 0; 40 | } 41 | 42 | 43 | 44 | void nvm_queue_reset(nvm_queue_t* queue) 45 | { 46 | queue->head = 0; 47 | queue->tail = 0; 48 | queue->last = 0; 49 | queue->phase = 1; 50 | } 51 | 52 | 53 | 54 | nvm_cpl_t* nvm_cq_dequeue_block(nvm_queue_t* cq, uint64_t timeout) 55 | { 56 | uint64_t nsecs = timeout * 1000000UL; 57 | nvm_cpl_t* cpl = nvm_cq_dequeue(cq); 58 | 59 | while (cpl == NULL && nsecs > 0) 60 | { 61 | nsecs = _nvm_delay_remain(nsecs); 62 | cpl = nvm_cq_dequeue(cq); 63 | } 64 | 65 | return cpl; 66 | } 67 | 68 | -------------------------------------------------------------------------------- /src/regs.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_REGS_H__ 2 | #define __NVM_INTERNAL_REGS_H__ 3 | 4 | #include 5 | #include 6 | 7 | 8 | /* Controller registers */ 9 | #define CAP(p) _REG(p, 0x0000, 64) // Controller Capabilities 10 | #define VER(p) _REG(p, 0x0008, 32) // NVM Express version 11 | #define CC(p) _REG(p, 0x0014, 32) // Controller Configuration 12 | #define CSTS(p) _REG(p, 0x001c, 32) // Controller Status 13 | #define AQA(p) _REG(p, 0x0024, 32) // Admin Queue Attributes 14 | #define ASQ(p) _REG(p, 0x0028, 64) // Admin Submission Queue Base Address 15 | #define ACQ(p) _REG(p, 0x0030, 64) // Admin Completion Queue Base Address 16 | 17 | 18 | /* Read bit fields */ 19 | #define CAP$MPSMAX(p) _RB(*CAP(p), 55, 52) // Memory Page Size Maximum 20 | #define CAP$MPSMIN(p) _RB(*CAP(p), 51, 48) // Memory Page Size Minimum 21 | #define CAP$DSTRD(p) _RB(*CAP(p), 35, 32) // Doorbell Stride 22 | #define CAP$TO(p) _RB(*CAP(p), 31, 24) // Timeout 23 | #define CAP$CQR(p) _RB(*CAP(p), 16, 16) // Contiguous Queues Required 24 | #define CAP$MQES(p) _RB(*CAP(p), 15, 0) // Maximum Queue Entries Supported 25 | 26 | #define CSTS$RDY(p) _RB(*CSTS(p), 0, 0) // Ready indicator 27 | 28 | 29 | /* Write bit fields */ 30 | #define CC$IOCQES(v) _WB(v, 23, 20) // IO Completion Queue Entry Size 31 | #define CC$IOSQES(v) _WB(v, 19, 16) // IO Submission Queue Entry Size 32 | #define CC$MPS(v) _WB(v, 10, 7) // Memory Page Size 33 | #define CC$CSS(v) _WB(0, 3, 1) // IO Command Set Selected (0=NVM Command Set) 34 | #define CC$EN(v) _WB(v, 0, 0) // Enable 35 | 36 | #define AQA$ACQS(v) _WB(v, 27, 16) // Admin Completion Queue Size 37 | #define AQA$ASQS(v) _WB(v, 11, 0) // Admin Submission Queue Size 38 | 39 | 40 | /* SQ doorbell register offset */ 41 | #define SQ_DBL(p, y, dstrd) \ 42 | ((volatile uint32_t*) (((volatile unsigned char*) (p)) + 0x1000 + ((2*(y)) * (4 << (dstrd)))) ) 43 | 44 | 45 | /* CQ doorbell register offset */ 46 | #define CQ_DBL(p, y, dstrd) \ 47 | ((volatile uint32_t*) (((volatile unsigned char*) (p)) + 0x1000 + ((2*(y) + 1) * (4 << (dstrd)))) ) 48 | 49 | #endif /* __NVM_INTERNAL_REGS_H__ */ 50 | -------------------------------------------------------------------------------- /src/rpc.h: -------------------------------------------------------------------------------- 1 | #ifndef __NVM_INTERNAL_RPC_H__ 2 | #define __NVM_INTERNAL_RPC_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | /* Forward declaration */ 12 | struct nvm_admin_reference; 13 | 14 | 15 | 16 | /* 17 | * Callback to release a local binding handle. 18 | */ 19 | typedef void (*rpc_free_handle_t)(uint32_t key, void* data); 20 | 21 | 22 | 23 | /* 24 | * Callback to release a remote binding reference. 25 | */ 26 | typedef void (*rpc_free_binding_t)(void* data); 27 | 28 | 29 | 30 | /* 31 | * RPC client-side stub definition. 32 | * 33 | * Should perform the following actions. 34 | * - marshal command 35 | * - send command to remote host 36 | * - wait for completion (or timeout) 37 | * - unmarshal completion and return status 38 | */ 39 | typedef int (*rpc_stub_t)(void*, nvm_cmd_t*, nvm_cpl_t*); 40 | 41 | 42 | 43 | /* 44 | * Allocate a reference wrapper and increase controller reference. 45 | */ 46 | int _nvm_ref_get(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl); 47 | 48 | 49 | 50 | /* 51 | * Free reference wrapper and decrease controller reference. 52 | */ 53 | void _nvm_ref_put(nvm_aq_ref ref); 54 | 55 | 56 | 57 | /* 58 | * Insert binding handle to server's list of handles. 59 | * If key is already found, this function will fail. 60 | */ 61 | int _nvm_rpc_handle_insert(nvm_aq_ref ref, uint32_t key, void* data, rpc_free_handle_t release); 62 | 63 | 64 | 65 | /* 66 | * Remove local binding handle. 67 | * This function will call the release callback. 68 | */ 69 | void _nvm_rpc_handle_remove(nvm_aq_ref ref, uint32_t key); 70 | 71 | 72 | 73 | /* 74 | * Bind reference to remote handle. 75 | */ 76 | int _nvm_rpc_bind(nvm_aq_ref ref, void* data, rpc_free_binding_t release, rpc_stub_t stub); 77 | 78 | 79 | 80 | /* 81 | * Execute a local admin command. 82 | */ 83 | int _nvm_local_admin(nvm_aq_ref ref, const nvm_cmd_t* cmd, nvm_cpl_t* cpl); 84 | 85 | 86 | 87 | #endif /* __NVM_INTERNAL_RPC_H__ */ 88 | --------------------------------------------------------------------------------