├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── asplosaoe
    ├── README.md
    ├── build_benchmark.log
    ├── build_kernel_modules.log
    ├── build_libnvm.log
    ├── cmake.log
    ├── nvm_array_bench_1_intel.log
    ├── nvm_array_bench_1_sam.log
    ├── nvm_array_bench_2_sam.log
    ├── nvm_array_bench_two_asymetrical_ssds.log
    ├── nvm_bfs_bench_1_intel.log
    ├── nvm_bfs_bench_1_sam.log
    ├── nvm_bfs_bench_2_sam.log
    ├── nvm_block_bench_1_intel.log
    ├── nvm_block_bench_1_sam.log
    ├── nvm_block_bench_2_sam.log
    ├── nvm_cache_bench.log
    ├── nvm_cc_bench_1_intel.log
    ├── nvm_cc_bench_1_sam.log
    ├── nvm_cc_bench_2_sam.log
    └── nvm_pattern_bench.log
├── benchmarks
    ├── array
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   └── settings.h
    ├── bfs
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── run_bfs.sh
    │   └── settings.h
    ├── block
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── settings.h
    │   └── test.sh
    ├── cache
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   └── settings.h
    ├── cc
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── run_cc.sh
    │   ├── scan.cu
    │   └── settings.h
    ├── iodepth-block
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── settings.h
    │   └── test.sh
    ├── pagerank
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── run_pg.sh
    │   └── settings.h
    ├── pattern
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── scan.cu
    │   ├── settings.h
    │   ├── zip
    │   ├── zip.h
    │   └── zip_test.cpp
    ├── readwrite
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   └── settings.h
    ├── reduction
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   └── settings.h
    ├── scan
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── scan.cu
    │   └── settings.h
    ├── sssp
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── run_sssp.sh
    │   └── settings.h
    ├── sssp_float
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   └── settings.h
    └── vectoradd
    │   ├── CMakeLists.txt
    │   ├── benchmark.sh
    │   ├── main.cu
    │   ├── scan.cu
    │   └── settings.h
├── deprecated
    ├── README.md.old
    ├── examples
    │   ├── README.md
    │   ├── identify
    │   │   ├── CMakeLists.txt
    │   │   ├── common.c
    │   │   ├── common.h
    │   │   ├── module.c
    │   │   ├── smartio.c
    │   │   └── userspace.c
    │   ├── integrity
    │   │   ├── CMakeLists.txt
    │   │   ├── integrity.c
    │   │   ├── integrity.h
    │   │   ├── transfer.c
    │   │   └── util.c
    │   ├── read-blocks
    │   │   ├── CMakeLists.txt
    │   │   ├── args.c
    │   │   ├── args.h
    │   │   ├── module.c
    │   │   ├── read.c
    │   │   ├── read.h
    │   │   └── smartio.c
    │   └── rpc
    │   │   ├── CMakeLists.txt
    │   │   ├── rpc_dd.c
    │   │   ├── rpc_flush.c
    │   │   ├── rpc_identify.c
    │   │   ├── rpc_server.c
    │   │   ├── segment.c
    │   │   ├── segment.h
    │   │   ├── util.c
    │   │   └── util.h
    └── fio
    │   └── fio_plugin.c
├── include
    ├── bafs_ptr.h
    ├── buffer.h
    ├── ctrl.h
    ├── event.h
    ├── host_util.h
    ├── nvm_admin.h
    ├── nvm_aq.h
    ├── nvm_cmd.h
    ├── nvm_ctrl.h
    ├── nvm_dma.h
    ├── nvm_error.h
    ├── nvm_io.h
    ├── nvm_parallel_queue.h
    ├── nvm_queue.h
    ├── nvm_rpc.h
    ├── nvm_types.h
    ├── nvm_util.h
    ├── page_cache.h
    ├── queue.h
    └── util.h
├── module
    ├── Makefile.in
    ├── ctrl.c
    ├── ctrl.h
    ├── list.c
    ├── list.h
    ├── map.c
    ├── map.h
    └── pci.c
├── scripts
    ├── bfs_run_emogi_nvme_frontier.sh
    ├── bfs_run_nvme.sh
    ├── bfs_run_nvme_scaling.sh
    ├── cc_run_nvme.sh
    ├── cc_run_nvme_scaling.sh
    ├── extrach.sh
    ├── fw_show_route
    ├── fw_user_routing
    ├── identify_hba.sh
    ├── run_bfs.sh
    ├── run_cc.sh
    ├── run_emogi.sh
    ├── sssp_run_nvme.sh
    ├── unbind.sh
    └── write_emogi_graph_nvme.sh
└── src
    ├── admin.cpp
    ├── ctrl.cpp
    ├── dis
        ├── device.c
        ├── device.h
        ├── dma.c
        ├── interrupt.c
        ├── interrupt.h
        ├── map.h
        └── rpc.c
    ├── dma.cpp
    ├── dma.h
    ├── dprintf.h
    ├── error.cpp
    ├── lib_ctrl.h
    ├── lib_util.h
    ├── linux
        ├── device.cpp
        ├── dma.cpp
        ├── ioctl.h
        └── map.h
    ├── mutex.cpp
    ├── mutex.h
    ├── queue.cpp
    ├── regs.h
    ├── rpc.cpp
    └── rpc.h


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Screenshots**
20 | If applicable, add screenshots to help explain your problem.
21 | 
22 | **Machine Setup (please complete the following information):**
23 |  - OS 
24 |  - NVIDIA Driver, CUDA Versions, GPU name
25 |  - SSD used  
26 | 
27 | **Additional context**
28 | Add any other context about the problem here. Add as many description as possible to help you out faster. This is a system's setup, knowing about the system is critical to understand the problem.
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # vim backup files
 2 | .*.swp
 3 | 
 4 | # Object files
 5 | *.o
 6 | *.ko
 7 | 
 8 | # Library builds
 9 | *.so
10 | 
11 | # Intermediate files
12 | *.mod
13 | *.mod.c
14 | .*.cmd
15 | 
16 | # Kernel symbols
17 | Module.symvers
18 | modules.order
19 | .tmp_versions/*
20 | 
21 | # CUDA stuff
22 | *.i
23 | *.ii
24 | *.gpu
25 | *.ptx
26 | *.cubin
27 | *.fatbin
28 | 
29 | # Profiling stuff
30 | *.prof
31 | *.nvprof
32 | 
33 | # Ignore build directory
34 | build/*
35 | 
36 | # Ignore cscope files
37 | cscope.out
38 | cscope.in.out
39 | cscope.po.out
40 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "include/freestanding"]
2 | 	path = include/freestanding
3 | 	url = https://github.com/ogiroux/freestanding
4 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributions are welcome! 
 2 | 
 3 | We believe in open-source code development and hence all contributions that helps to grow and build BaM are most welcome. Please note that the BaM is in early stage of development and rapidly iterated to support more functionalities as required. However, the goals of BaM system will remain in-tact while the codebase will be evolving over time. To this end, we request developers who are interested in contributing to frequently update the codebases. 
 4 | 
 5 | ## Reporting bugs and asking questions
 6 | 
 7 | Please open a github issue request for posting questions, issues or feedbacks. We try to respond as early as possible. 
 8 | 
 9 | ## To Contribute
10 | 
11 | Please create pull request for all contributions. We currently have not enabled CI/CD pipeline on BaM codebase and hence all code will be going through manual review process. If anyone is interested in building CI/CD pipeline, your contributions are most welcome. 
12 | 
13 | ### PR process
14 | Each PR will have an assignee and will engage with the contributor to merge the PR. Please actively ping assignees after you have addresses the comments requested. 
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017, Jonas Markussen <jonassm@ifi.uio.no>
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/asplosaoe/build_kernel_modules.log:
--------------------------------------------------------------------------------
 1 | make -j 
 2 | make -C /lib/modules/5.8.0-63-generic/build M=/home/vsm2/work/bam/build2/module modules
 3 | make[1]: Entering directory '/usr/src/linux-headers-5.8.0-63-generic'
 4 |   CC [M]  /home/vsm2/work/bam/build2/module/pci.o
 5 |   CC [M]  /home/vsm2/work/bam/build2/module/list.o
 6 |   CC [M]  /home/vsm2/work/bam/build2/module/ctrl.o
 7 |   CC [M]  /home/vsm2/work/bam/build2/module/map.o
 8 |   LD [M]  /home/vsm2/work/bam/build2/module/libnvm.o
 9 |   MODPOST /home/vsm2/work/bam/build2/module/Module.symvers
10 |   CC [M]  /home/vsm2/work/bam/build2/module/libnvm.mod.o
11 |   LD [M]  /home/vsm2/work/bam/build2/module/libnvm.ko
12 | make[1]: Leaving directory '/usr/src/linux-headers-5.8.0-63-generic'
13 | 
14 | 


--------------------------------------------------------------------------------
/asplosaoe/cmake.log:
--------------------------------------------------------------------------------
 1 | $ cmake ..
 2 | -- The CUDA compiler identification is NVIDIA 11.6.0
 3 | -- The C compiler identification is GNU 9.4.0
 4 | -- The CXX compiler identification is GNU 9.4.0
 5 | -- Check for working CUDA compiler: /usr/local/cuda-11.6-nightly/bin/nvcc
 6 | -- Check for working CUDA compiler: /usr/local/cuda-11.6-nightly/bin/nvcc -- works
 7 | -- Detecting CUDA compiler ABI info
 8 | -- Detecting CUDA compiler ABI info - done
 9 | -- Check for working C compiler: /usr/bin/cc
10 | -- Check for working C compiler: /usr/bin/cc -- works
11 | -- Detecting C compiler ABI info
12 | -- Detecting C compiler ABI info - done
13 | -- Detecting C compile features
14 | -- Detecting C compile features - done
15 | -- Check for working CXX compiler: /usr/bin/c++
16 | -- Check for working CXX compiler: /usr/bin/c++ -- works
17 | -- Detecting CXX compiler ABI info
18 | -- Detecting CXX compiler ABI info - done
19 | -- Detecting CXX compile features
20 | -- Detecting CXX compile features - done
21 | -- Looking for pthread.h
22 | -- Looking for pthread.h - found
23 | -- Performing Test CMAKE_HAVE_LIBC_PTHREAD
24 | -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
25 | -- Check if compiler accepts -pthread
26 | -- Check if compiler accepts -pthread - yes
27 | -- Found Threads: TRUE  
28 | -- Found CUDA: /usr/local/cuda-11.6-nightly (found suitable version "11.6", minimum required is "8.0") 
29 | -- Using NVIDIA driver found in /usr/src/nvidia-470.141.03
30 | -- Not building FIO
31 | -- Configuring libnvm without SmartIO
32 | -- Configuring kernel module with CUDA
33 | -- Found CUDA: /usr/local/cuda-11.6-nightly (found suitable version "11.6", minimum required is "10.0") 
34 | -- Found CUDA: /usr/local/cuda-11.6-nightly (found suitable version "11.6", minimum required is "8.0") 
35 | -- Configuring done
36 | -- Generating done
37 | -- Build files have been written to: /home/vsm2/work/bam/build
38 | 
39 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_array_bench_1_intel.log:
--------------------------------------------------------------------------------
 1 | # With Single Intel Optane SSD wih 512B cacheline size (page_size)
 2 | sudo ./bin/nvm-array-bench --threads=$((1024*1024)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024  --page_size=512 --gpu=0 --n_ctrls=1 --num_queues=128 --random=false -S 1
 3 | SQs: 135	CQs: 135	n_qps: 128
 4 | n_ranges_bits: 6
 5 | n_ranges_mask: 63
 6 | pages_dma: 0x7fc41c010000	58020410000
 7 | HEREN
 8 | Cond1
 9 | 100000 8 8 20000
10 | Finish Making Page Cache
11 | finished creating cache
12 | finished creating range
13 | atlaunch kernel
14 | 0000:6E:00.0
15 | #READ IOs: 16384	#Accesses:1048576	#Misses:524288	Miss Rate:0.5	#Hits: 524288	Hit Rate:0.5	CLSize:512
16 | *********************************
17 | Elapsed Time: 3572.74	Number of Read Ops: 1048576	Data Size (bytes): 8388608
18 | Read Ops/sec: 2.93494e+08	Effective Bandwidth(GB/S): 2.1867
19 | 
20 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_array_bench_1_sam.log:
--------------------------------------------------------------------------------
 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) 
 2 | sudo ./bin/nvm-array-bench --threads=$((1024*1024*128)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024  --page_size=4096 --gpu=0 --n_ctrls=1 --num_queues=128 --random=false
 3 | SQs: 129	CQs: 129	n_qps: 128
 4 | n_ranges_bits: 6
 5 | n_ranges_mask: 63
 6 | pages_dma: 0x7f3752010000	3f020410000
 7 | HEREN
 8 | Cond1
 9 | 100000 8 1 100000
10 | Finish Making Page Cache
11 | finished creating cache
12 | finished creating range
13 | atlaunch kernel
14 | 0000:BA:00.0
15 | #READ IOs: 262144	#Accesses:134217728	#Misses:8388608	Miss Rate:0.0625	#Hits: 125829120	Hit Rate:0.9375	CLSize:4096
16 | *********************************
17 | Elapsed Time: 525437	Number of Read Ops: 134217728	Data Size (bytes): 1073741824
18 | Read Ops/sec: 2.5544e+08	Effective Bandwidth(GB/S): 1.90318
19 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_array_bench_2_sam.log:
--------------------------------------------------------------------------------
 1 | # With Two Samsung 980 pro SSD with 4KB cacheline size (page_size)
 2 | sudo ./bin/nvm-array-bench --threads=$((1024*1024*128)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024  --page_size=4096 --gpu=0 --n_ctrls=2 --num_queues=128 --random=false
 3 | SQs: 129	CQs: 129	n_qps: 128
 4 | SQs: 129	CQs: 129	n_qps: 128
 5 | n_ranges_bits: 6
 6 | n_ranges_mask: 63
 7 | pages_dma: 0x7f890e010000	3f040410000
 8 | HEREN
 9 | Cond1
10 | 100000 8 1 100000
11 | Finish Making Page Cache
12 | finished creating cache
13 | finished creating range
14 | atlaunch kernel
15 | 0000:BA:00.0
16 | #READ IOs: 262144	#Accesses:134217728	#Misses:8388608	Miss Rate:0.0625	#Hits: 125829120	Hit Rate:0.9375	CLSize:4096
17 | *********************************
18 | Elapsed Time: 385223	Number of Read Ops: 134217728	Data Size (bytes): 1073741824
19 | Read Ops/sec: 3.48416e+08	Effective Bandwidth(GB/S): 2.5959
20 | 
21 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_array_bench_two_asymetrical_ssds.log:
--------------------------------------------------------------------------------
 1 | sudo ./bin/nvm-array-bench --threads=$((1024*1024)) --blk_size=64 --reqs=1 --pages=$((1024*1024)) --queue_depth=1024  --page_size=512 --gpu=0 --n_ctrls=2 --num_queues=128 --random=false
 2 | SQs: 129	CQs: 129	n_qps: 128
 3 | SQs: 135	CQs: 135	n_qps: 128
 4 | n_ranges_bits: 6
 5 | n_ranges_mask: 63
 6 | pages_dma: 0x7f87bc010000	58040410000
 7 | HEREN
 8 | Cond1
 9 | 100000 8 8 20000
10 | Finish Making Page Cache
11 | finished creating cache
12 | finished creating range
13 | atlaunch kernel
14 | 0000:6E:00.0
15 | #READ IOs: 16384	#Accesses:1048576	#Misses:524288	Miss Rate:0.5	#Hits: 524288	Hit Rate:0.5	CLSize:512
16 | *********************************
17 | Elapsed Time: 13853.7	Number of Read Ops: 1048576	Data Size (bytes): 8388608
18 | Read Ops/sec: 7.56893e+07	Effective Bandwidth(GB/S): 0.563929
19 | 
20 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_bfs_bench_1_intel.log:
--------------------------------------------------------------------------------
 1 | # With Single Intel Optane SSD wih 4KB cacheline size (page_size) and 8GB BaM Cache. 
 2 | sudo ./bin/nvm-bfs-bench -f /home/vsm2/bafsdata/MOLIERE_2016.bel  -l 240518168576 --impl_type 20 --memalloc 6 --src 13229860 --n_ctrls 1 -p 4096 --gpu 0 --threads 128 -C 8 -M $((8*1024*1024*1024)) -S 1
 3 | /home/vsm2/bafsdata/MOLIERE_2016.bel
 4 | File /home/vsm2/bafsdata/MOLIERE_2016.bel
 5 | Vertex: 30239687, Edge: 6677301366
 6 | Allocation finished
 7 | page size: 4096, pc_entries: 2097152 tile_size:4096
 8 | SQs: 135	CQs: 135	n_qps: 128
 9 | Controllers Created
10 | GPUID: 0000:6E:00.0
11 | Initialization done.
12 | n_ranges_bits: 6
13 | n_ranges_mask: 63
14 | pages_dma: 0x7f0b7a010000	58020410000
15 | HEREN
16 | Cond1
17 | 200000 8 1 200000
18 | Finish Making Page Cache
19 | Page cache initialized
20 | run   0: src   13229860, iteration  15, time 11719.943359 ms
21 | #READ IOs: 16634480	#Accesses:632341981	#Misses:296080469	Miss Rate:0.468228	#Hits: 336261512	Hit Rate:0.531772	CLSize:4096
22 | *********************************
23 | 
24 | BFS-0 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel 	Impl: 20 	SSD: 1 	CL: 4096 	Cache: 8589934592 	Stride: 1 	Coarse: 8 	AvgTime 11719.943359 ms
25 | run   1: src   13229860, iteration  15, time 11738.077148 ms
26 | #READ IOs: 16632603	#Accesses:632341981	#Misses:295874787	Miss Rate:0.467903	#Hits: 336467194	Hit Rate:0.532097	CLSize:4096
27 | *********************************
28 | 
29 | BFS-1 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel 	Impl: 20 	SSD: 1 	CL: 4096 	Cache: 8589934592 	Stride: 1 	Coarse: 8 	AvgTime 11738.077148 ms
30 | 
31 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_bfs_bench_1_sam.log:
--------------------------------------------------------------------------------
 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache.  
 2 | sudo ./bin/nvm-bfs-bench -f /home/vsm2/bafsdata/MOLIERE_2016.bel  -l 240518168576 --impl_type 20 --memalloc 6 --src 13229860 --n_ctrls 1 -p 4096 --gpu 0 --threads 128 -C 8 -M $((8*1024*1024*1024))
 3 | /home/vsm2/bafsdata/MOLIERE_2016.bel
 4 | File /home/vsm2/bafsdata/MOLIERE_2016.bel
 5 | Vertex: 30239687, Edge: 6677301366
 6 | Allocation finished
 7 | page size: 4096, pc_entries: 2097152 tile_size:4096
 8 | SQs: 129	CQs: 129	n_qps: 128
 9 | Controllers Created
10 | GPUID: 0000:6E:00.0
11 | Initialization done.
12 | n_ranges_bits: 6
13 | n_ranges_mask: 63
14 | pages_dma: 0x7f2d9a010000	58020410000
15 | HEREN
16 | Cond1
17 | 200000 8 1 200000
18 | Finish Making Page Cache
19 | Page cache initialized
20 | run   0: src   13229860, iteration  15, time 30770.189453 ms
21 | #READ IOs: 16634497	#Accesses:632341981	#Misses:295996204	Miss Rate:0.468095	#Hits: 336345777	Hit Rate:0.531905	CLSize:4096
22 | *********************************
23 | 
24 | BFS-0 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel 	 Impl: 20 	 SSD: 1 	 CL: 4096 	 Cache: 8589934592 	 Stride: 1 	 Coarse: 8 	 AvgTime 30770.189453 ms
25 | run   1: src   13229860, iteration  15, time 28861.617188 ms
26 | #READ IOs: 16632578	#Accesses:632341981	#Misses:296246135	Miss Rate:0.46849	#Hits: 336095846	Hit Rate:0.53151	CLSize:4096
27 | *********************************
28 | 
29 | BFS-1 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel 	 Impl: 20 	 SSD: 1 	 CL: 4096 	 Cache: 8589934592 	 Stride: 1 	 Coarse: 8 	 AvgTime 28861.617188 ms
30 | 
31 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_bfs_bench_2_sam.log:
--------------------------------------------------------------------------------
 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache.  
 2 | sudo ./bin/nvm-bfs-bench -f /home/vsm2/bafsdata/MOLIERE_2016.bel  -l 240518168576 --impl_type 20 --memalloc 6 --src 13229860 --n_ctrls 2 -p 4096 --gpu 1 --threads 128 -C 8 -M $((8*1024*1024*1024))
 3 | /home/vsm2/bafsdata/MOLIERE_2016.bel
 4 | File /home/vsm2/bafsdata/MOLIERE_2016.bel
 5 | Vertex: 30239687, Edge: 6677301366
 6 | Allocation finished
 7 | page size: 4096, pc_entries: 2097152 tile_size:4096
 8 | SQs: 129	CQs: 129	n_qps: 128
 9 | SQs: 129	CQs: 129	n_qps: 128
10 | Controllers Created
11 | GPUID: 0000:A6:00.0
12 | Initialization done.
13 | n_ranges_bits: 6
14 | n_ranges_mask: 63
15 | pages_dma: 0x7f5a66010000	48040410000
16 | HEREN
17 | Cond1
18 | 200000 8 1 200000
19 | Finish Making Page Cache
20 | Page cache initialized
21 | run   0: src   13229860, iteration  15, time 18059.087891 ms
22 | #READ IOs: 16634530	#Accesses:632341981	#Misses:295866151	Miss Rate:0.467889	#Hits: 336475830	Hit Rate:0.532111	CLSize:4096
23 | *********************************
24 | 
25 | BFS-0 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel 	 Impl: 20 	 SSD: 2 	 CL: 4096 	 Cache: 8589934592 	 Stride: 1 	 Coarse: 8 	 AvgTime 18059.087891 ms
26 | run   1: src   13229860, iteration  15, time 15257.948242 ms
27 | #READ IOs: 16632591	#Accesses:632341981	#Misses:296056548	Miss Rate:0.468191	#Hits: 336285433	Hit Rate:0.531809	CLSize:4096
28 | *********************************
29 | 
30 | BFS-1 Graph:/home/vsm2/bafsdata/MOLIERE_2016.bel 	 Impl: 20 	 SSD: 2 	 CL: 4096 	 Cache: 8589934592 	 Stride: 1 	 Coarse: 8 	 AvgTime 15257.948242 ms
31 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_block_bench_1_intel.log:
--------------------------------------------------------------------------------
 1 | # With Single Intel Optane SSD wih 512B cacheline size (page_size)
 2 | sudo ./bin/nvm-block-bench --threads=262144 --blk_size=64 --reqs=1 --pages=262144 --queue_depth=1024  --page_size=512 --num_blks=2097152 --gpu=0 --n_ctrls=1 --num_queues=128 --random=true -S 1
 3 | SQs: 135	CQs: 135	n_qps: 128
 4 | n_ranges_bits: 6
 5 | n_ranges_mask: 63
 6 | pages_dma: 0x7f8afc010000	58020410000
 7 | HEREN
 8 | Cond1
 9 | 40000 8 8 8000
10 | Finish Making Page Cache
11 | finished creating cache
12 | 0000:6E:00.0
13 | atlaunch kernel
14 | Elapsed Time: 53459.9	Number of Ops: 262144	Data Size (bytes): 134217728
15 | Ops/sec: 4.90356e+06	Effective Bandwidth(GB/S): 2.3382
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_block_bench_1_sam.log:
--------------------------------------------------------------------------------
 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) 
 2 | sudo ./bin/nvm-block-bench --threads=262144 --blk_size=64 --reqs=1 --pages=262144 --queue_depth=1024  --page_size=4096 --num_blks=2097152 --gpu=0 --n_ctrls=1 --num_queues=128 --random=true
 3 | SQs: 129	CQs: 129	n_qps: 128
 4 | n_ranges_bits: 6
 5 | n_ranges_mask: 63
 6 | pages_dma: 0x7f09e4010000	3f020410000
 7 | HEREN
 8 | Cond1
 9 | 40000 8 1 40000
10 | Finish Making Page Cache
11 | finished creating cache
12 | 0000:BA:00.0
13 | atlaunch kernel
14 | Elapsed Time: 328288	Number of Ops: 262144	Data Size (bytes): 1073741824
15 | Ops/sec: 798519	Effective Bandwidth(GB/S): 3.04611
16 | 
17 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_block_bench_2_sam.log:
--------------------------------------------------------------------------------
 1 | # With Two Samsung 980 pro SSD with 4KB cacheline size (page_size)
 2 | sudo ./bin/nvm-block-bench --threads=262144 --blk_size=64 --reqs=1 --pages=262144 --queue_depth=1024  --page_size=4096 --num_blks=2097152 --gpu=0 --n_ctrls=2 --num_queues=128 --random=true
 3 | SQs: 129	CQs: 129	n_qps: 128
 4 | SQs: 129	CQs: 129	n_qps: 128
 5 | n_ranges_bits: 6
 6 | n_ranges_mask: 63
 7 | pages_dma: 0x7fd0ca010000	3f040410000
 8 | HEREN
 9 | Cond1
10 | 40000 8 1 40000
11 | Finish Making Page Cache
12 | finished creating cache
13 | 0000:BA:00.0
14 | atlaunch kernel
15 | Elapsed Time: 172061	Number of Ops: 262144	Data Size (bytes): 1073741824
16 | Ops/sec: 1.52355e+06	Effective Bandwidth(GB/S): 5.81188
17 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_cache_bench.log:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | sudo ./bin/nvm-cache-bench -k 1 -p 2097152 -P 4096 -n 1 -t 1048576 -b 64 -d 1024 -q 135 -T 1 -e 8589934592 --gpu=0 -r 0
 4 | SQs: 135	CQs: 135	n_qps: 135
 5 | n_ranges_bits: 6
 6 | n_ranges_mask: 63
 7 | pages_dma: 0x7f6022010000	58022010000
 8 | HEREN
 9 | Cond1
10 | 200000 8 1 200000
11 | Finish Making Page Cache
12 | finished creating cache
13 | finished creating range
14 | n_elems_per_page: 200
15 | atlaunch kernel
16 | 0000:6E:00.0
17 | #READ IOs: 32768	#Accesses:1048576	#Misses:1048576	Miss Rate:1	#Hits: 0	Hit Rate:0	CLSize:4096
18 | *********************************
19 | Itr:0 type: 0 Elapsed Time: 21351.4	Number of Read Ops: 16777216	Data Size (bytes): 134217728Read Ops/sec: 7.85766e+08	Effective Bandwidth(GB/S): 5.85441
20 | ID:0 	type:0 	n_warps:32768 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 16777216 	IOPs: 785765697.813752 	data:134217728 	bandwidth: 5.854411 GBps 	time: 21351.423264
21 | #READ IOs: 0	#Accesses:1048576	#Misses:0	Miss Rate:0	#Hits: 1048576	Hit Rate:1	CLSize:4096
22 | *********************************
23 | Itr:1 type: 0 Elapsed Time: 738.304	Number of Read Ops: 16777216	Data Size (bytes): 134217728Read Ops/sec: 2.2724e+10	Effective Bandwidth(GB/S): 169.307
24 | ID:1 	type:0 	n_warps:32768 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 16777216 	IOPs: 22723993868.146492 	data:134217728 	bandwidth: 169.306948 GBps 	time: 738.304019
25 | 
26 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_cc_bench_1_intel.log:
--------------------------------------------------------------------------------
 1 | # With Single Intel Optane SSD wih 4KB cacheline size (page_size) and 8GB BaM Cache. 
 2 | sudo ./bin/nvm-cc-bench -f /home/vsm2/bafsdata/GAP-kron.bel -l 0 --impl_type 20 --memalloc 6 --src 58720242 --n_ctrls 1 -p 4096 --gpu 0 --threads 128 -M $((8*1024*1024*1024)) -P 128 -C 8 -S 1
 3 | /home/vsm2/bafsdata/GAP-kron.bel
 4 | File /home/vsm2/bafsdata/GAP-kron.bel
 5 | Vertex: 134217726, Edge: 4223264644
 6 | Allocation finished
 7 | page size: 4096, pc_entries: 2097152
 8 | SQs: 135	CQs: 135	n_qps: 128
 9 | Controllers Created
10 | Initialization done
11 | n_ranges_bits: 6
12 | n_ranges_mask: 63
13 | pages_dma: 0x7fb9d8010000	58020410000
14 | HEREN
15 | Cond1
16 | 200000 8 1 200000
17 | Finish Making Page Cache
18 | Page cache initialized
19 | Hash Stride: 128 Coarse: 8
20 | total cc iterations: 4
21 | total components: 71164263
22 | total time: 10821.237305 ms
23 | #READ IOs: 16524764	#Accesses:1270205174	#Misses:354511917	Miss Rate:0.279098	#Hits: 915693257	Hit Rate:0.720902	CLSize:4096
24 | *********************************
25 | 
26 | CC 0 Graph:/home/vsm2/bafsdata/GAP-kron.bel 	Impl: 20 	SSD: 1 	CL: 4096 	Cache: 8589934592 	Stride: 128 	Coarse: 8 	TotalTime 10821.237305 ms
27 | Hash Stride: 128 Coarse: 8
28 | total cc iterations: 4
29 | total components: 71164263
30 | total time: 10806.666992 ms
31 | #READ IOs: 16502729	#Accesses:1270205148	#Misses:353968164	Miss Rate:0.27867	#Hits: 916236984	Hit Rate:0.72133	CLSize:4096
32 | *********************************
33 | 
34 | CC 1 Graph:/home/vsm2/bafsdata/GAP-kron.bel 	Impl: 20 	SSD: 1 	CL: 4096 	Cache: 8589934592 	Stride: 128 	Coarse: 8 	TotalTime 10806.666992 ms
35 | 
36 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_cc_bench_1_sam.log:
--------------------------------------------------------------------------------
 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache.  
 2 | sudo ./bin/nvm-cc-bench -f /home/vsm2/bafsdata/GAP-kron.bel -l 0 --impl_type 20 --memalloc 6 --src 58720242 --n_ctrls 1 -p 4096 --gpu 1 --threads 128 -M $((8*1024*1024*1024)) -P 128 -C 8
 3 | /home/vsm2/bafsdata/GAP-kron.bel
 4 | File /home/vsm2/bafsdata/GAP-kron.bel
 5 | Vertex: 134217726, Edge: 4223264644
 6 | Allocation finished
 7 | page size: 4096, pc_entries: 2097152
 8 | SQs: 129	CQs: 129	n_qps: 128
 9 | Controllers Created
10 | Initialization done
11 | n_ranges_bits: 6
12 | n_ranges_mask: 63
13 | pages_dma: 0x7fbcd8010000	48020410000
14 | HEREN
15 | Cond1
16 | 200000 8 1 200000
17 | Finish Making Page Cache
18 | Page cache initialized
19 | total cc iterations: 4
20 | total components: 71164263
21 | total time: 25175.816406 ms
22 | #READ IOs: 16524749	#Accesses:1270205169	#Misses:354066640	Miss Rate:0.278748	#Hits: 916138529	Hit Rate:0.721252	CLSize:4096
23 | *********************************
24 | 
25 | CC 0 Graph:/home/vsm2/bafsdata/GAP-kron.bel 	 Impl: 20 	 SSD: 1 	 CL: 4096 	 Cache: 8589934592 	 Stride: 128 	 Coarse: 8 	 TotalTime 25175.816406 ms
26 | total cc iterations: 4
27 | total components: 71164263
28 | total time: 25073.535156 ms
29 | #READ IOs: 16502707	#Accesses:1270205134	#Misses:353845039	Miss Rate:0.278573	#Hits: 916360095	Hit Rate:0.721427	CLSize:4096
30 | *********************************
31 | 
32 | CC 1 Graph:/home/vsm2/bafsdata/GAP-kron.bel 	 Impl: 20 	 SSD: 1 	 CL: 4096 	 Cache: 8589934592 	 Stride: 128 	 Coarse: 8 	 TotalTime 25073.535156 ms
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_cc_bench_2_sam.log:
--------------------------------------------------------------------------------
 1 | # With Single Samsung 980 pro SSD with 4KB cacheline size (page_size) and 8GB BaM Cache.  
 2 | sudo ./bin/nvm-cc-bench -f /home/vsm2/bafsdata/GAP-kron.bel -l 0 --impl_type 20 --memalloc 6 --src 58720242 --n_ctrls 2 -p 4096 --gpu 1 --threads 128 -M $((8*1024*1024*1024)) -P 128 -C 8
 3 | /home/vsm2/bafsdata/GAP-kron.bel
 4 | File /home/vsm2/bafsdata/GAP-kron.bel
 5 | Vertex: 134217726, Edge: 4223264644
 6 | Allocation finished
 7 | page size: 4096, pc_entries: 2097152
 8 | SQs: 129	CQs: 129	n_qps: 128
 9 | SQs: 129	CQs: 129	n_qps: 128
10 | Controllers Created
11 | Initialization done
12 | n_ranges_bits: 6
13 | n_ranges_mask: 63
14 | pages_dma: 0x7f8164010000	48040410000
15 | HEREN
16 | Cond1
17 | 200000 8 1 200000
18 | Finish Making Page Cache
19 | Page cache initialized
20 | total cc iterations: 4
21 | total components: 71164263
22 | total time: 18494.630859 ms
23 | #READ IOs: 16524763	#Accesses:1270205171	#Misses:354401603	Miss Rate:0.279011	#Hits: 915803568	Hit Rate:0.720989	CLSize:4096
24 | *********************************
25 | 
26 | CC 0 Graph:/home/vsm2/bafsdata/GAP-kron.bel 	 Impl: 20 	 SSD: 2 	 CL: 4096 	 Cache: 8589934592 	 Stride: 128 	 Coarse: 8 	 TotalTime 18494.630859 ms
27 | total cc iterations: 4
28 | total components: 71164263
29 | total time: 16683.892578 ms
30 | #READ IOs: 16502832	#Accesses:1270205224	#Misses:353816610	Miss Rate:0.278551	#Hits: 916388614	Hit Rate:0.721449	CLSize:4096
31 | *********************************
32 | 
33 | CC 1 Graph:/home/vsm2/bafsdata/GAP-kron.bel 	 Impl: 20 	 SSD: 2 	 CL: 4096 	 Cache: 8589934592 	 Stride: 128 	 Coarse: 8 	 TotalTime 16683.892578 ms
34 | 


--------------------------------------------------------------------------------
/asplosaoe/nvm_pattern_bench.log:
--------------------------------------------------------------------------------
 1 | 
 2 | sudo ./bin/nvm-pattern-bench --input_a=/home/vsm2/bafsdata/GAP-kron.bel --memalloc=6 --threads=4194304 --n_elems=137438953472 --impl_type=3 --blk_size=128 --queue_depth=1024 --num_queues=135 --page_size=4096 --n_ctrls=1 
 3 | A: /home/vsm2/bafsdata/GAP-kron.bel.dst
 4 | Total elements: 137438953472 
 5 | n_warps: 131072 	numblocks:65536 
 6 | page size: 4096, pc_entries: 2097152
 7 | Allocation finished
 8 | SQs: 135	CQs: 135	n_qps: 135
 9 | Controllers Created
10 | Initialization done
11 | n_ranges_bits: 6
12 | n_ranges_mask: 63
13 | pages_dma: 0x7efc30010000	58022010000
14 | HEREN
15 | Cond1
16 | 200000 8 1 200000
17 | Finish Making Page Cache
18 | Page cache initialized
19 | val in gpu: 1663758813 
20 | #READ IOs: 131072	#Accesses:4194304	#Misses:4194304	Miss Rate:1	#Hits: 0	Hit Rate:0	CLSize:4096
21 | *********************************
22 | P:0 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 797566000.672973 	data:536870912 	bandwidth: 5.942330 GBps 	avgiops: inf 	avgbandwidth: inf 
23 | val in gpu: 915100374 
24 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
25 | *********************************
26 | P:1 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 49799391758.371330 	data:536870912 	bandwidth: 371.034382 GBps 	avgiops: 49799391758.371330 	avgbandwidth: 371.034382 
27 | val in gpu: 1624408235 
28 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
29 | *********************************
30 | P:2 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50451116401.548798 	data:536870912 	bandwidth: 375.890109 GBps 	avgiops: 50123133430.321938 	avgbandwidth: 373.446445 
31 | val in gpu: 2059900224 
32 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
33 | *********************************
34 | P:3 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50334868706.078766 	data:536870912 	bandwidth: 375.023996 GBps 	avgiops: 50193512158.904495 	avgbandwidth: 373.970808 
35 | val in gpu: 1631163370 
36 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
37 | *********************************
38 | P:4 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 49989322239.155258 	data:536870912 	bandwidth: 372.449474 GBps 	avgiops: 50142308454.389557 	avgbandwidth: 373.589310 
39 | val in gpu: 1097009717 
40 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
41 | *********************************
42 | P:5 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 49610899066.537041 	data:536870912 	bandwidth: 369.630002 GBps 	avgiops: 50035116879.830650 	avgbandwidth: 372.790671 
43 | val in gpu: 1091665284 
44 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
45 | *********************************
46 | P:6 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50257667401.372787 	data:536870912 	bandwidth: 374.448802 GBps 	avgiops: 50072073142.439964 	avgbandwidth: 373.066017 
47 | val in gpu: 344527228 
48 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
49 | *********************************
50 | P:7 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50373558369.068062 	data:536870912 	bandwidth: 375.312257 GBps 	avgiops: 50114918776.908516 	avgbandwidth: 373.385241 
51 | val in gpu: 774151880 
52 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
53 | *********************************
54 | P:8 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50373558369.068062 	data:536870912 	bandwidth: 375.312257 GBps 	avgiops: 50147105619.755478 	avgbandwidth: 373.625052 
55 | val in gpu: 310116501 
56 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
57 | *********************************
58 | P:9 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50027482318.761513 	data:536870912 	bandwidth: 372.733789 GBps 	avgiops: 50133783914.451157 	avgbandwidth: 373.525798 
59 | val in gpu: 1850572170 
60 | #READ IOs: 0	#Accesses:4194304	#Misses:0	Miss Rate:0	#Hits: 4194304	Hit Rate:1	CLSize:4096
61 | *********************************
62 | P:10 Impl: 3 	SSD: 1 	n_warps:131072 	n_pages_per_warp: 1 	n_elems_per_page:512 	ios: 67108864 	IOPs: 50296238429.055405 	data:536870912 	bandwidth: 374.736178 GBps 	avgiops: 50149982572.562210 	avgbandwidth: 373.646487 
63 | 
64 | 


--------------------------------------------------------------------------------
/benchmarks/array/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (array-benchmark-module array-bench "main.cu;")
12 | 
13 | make_benchmark_choice (array-benchmark array-benchmark-smartio array-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/array/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/bfs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (bfs-benchmark-module bfs-bench "main.cu;")
12 | 
13 | make_benchmark_choice (bfs-benchmark bfs-benchmark-smartio bfs-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/bfs/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/bfs/run_bfs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | 
10 | #Initialize set of files are taken from EMOGI and graphBIG.
11 | 
12 | NUMDATASET=6
13 | declare -a GraphFileArray=(
14 | "/home/vsm2/bafsdata/GAP-kron.bel"
15 | "/home/vsm2/bafsdata/GAP-urand.bel"
16 | "/home/vsm2/bafsdata/com-Friendster.bel"
17 | "/home/vsm2/bafsdata/MOLIERE_2016.bel"
18 | "/home/vsm2/bafsdata/uk-2007-05.bel"
19 | "/home/vsm2/bafsdata/sk-2005.bel"
20 | )
21 | declare -a GraphFileOffset=(
22 | "$((1024*1024*1024*0))"
23 | "$((1024*1024*1024*64))"
24 | "$((1024*1024*1024*160))"
25 | "$((1024*1024*1024*224))"
26 | "$((1024*1024*1024*320))"
27 | "$((1024*1024*1024*384))"
28 | )
29 | 
30 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}"
31 | 
32 | declare -a GraphRootNode=(
33 | "58720242"
34 | "58720256"
35 | "28703654"
36 | "13229860"
37 | "46329738"
38 | "37977096"
39 | )
40 | 
41 | 
42 | 
43 | 
44 | CTRL=$1
45 | MEMTYPE=6  #BAFS_DIRECT
46 | GPU=$2
47 | TB=128
48 | 
49 | for ((gfid=0; gfid<NUMDATASET; gfid++))
50 | do
51 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]} located at offset ${GraphFileOffset[gfid]} ++++++++++++++++++"
52 |     for IMPLTYPE in 4 9 #3 4    ##baseline, coalesced, frontier, frontier coaslesced.
53 |     do
54 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
55 |         for ((C=1; C<=$CTRL; C++))
56 |         do
57 |             echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
58 |             for P in 512 4096 8192
59 |             #for P in 512 1024 2048 4096 8192 16384
60 |             do
61 |                 echo "++++++++++++++++++ $P Page size ++++++++++++++++++"
62 |                 ./bin/nvm-bfs-bench -f ${GraphFileArray[gfid]} -l ${GraphFileOffset[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --src ${GraphRootNode[gfid]} --n_ctrls $C -p $P --gpu $GPU --threads $TB
63 |             done
64 |         done
65 |     done
66 | done
67 | 
68 | 


--------------------------------------------------------------------------------
/benchmarks/block/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (block-benchmark-module block-bench "main.cu;")
12 | 
13 | make_benchmark_choice (block-benchmark block-benchmark-smartio block-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/block/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | set -eux
10 | P=512
11 | R=1
12 | B=$3
13 | G=$2
14 | A=0
15 | RT=50
16 | NB=$((128*1024*1024))
17 | CTRLS=$1
18 | for ((C=1; C<=$CTRLS; C++))
19 | do
20 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
21 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432
22 |     do
23 |         echo "------------------ $T Threads ------------------"
24 |         ../../build/bin/nvm-block-bench --threads=$T --blk_size=$B --reqs=1 --pages=$T --queue_depth=1024 --num_queues=135 --page_size=$P --n_ctrls=$C --gpu=$G --num_blks=$NB --access_type=$A --random=$R
25 |     done
26 | done
27 | 


--------------------------------------------------------------------------------
/benchmarks/block/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | for i in {1..100000}
 5 | do
 6 |     ../../build/bin/nvm-block-bench --threads=$((1024*256*4)) --blk_size=64 --reqs=1 --pages=$((256*1024*4)) --queue_depth=1024  --page_size=$((512)) --num_blks=$((2097152)) --gpu=0 --n_ctrls=1 --num_queues=128
 7 | 
 8 |     echo "******************** $i *********************"
 9 | done
10 | 


--------------------------------------------------------------------------------
/benchmarks/cache/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (cache-benchmark-module cache-bench "main.cu;")
12 | 
13 | make_benchmark_choice (cache-benchmark cache-benchmark-smartio cache-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/cache/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | if [ $# -ne 2 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid && exit 1
 7 | fi
 8 | 
 9 | #when varying cacheline size, equally vary the num pages per cache (-p) to retain cache capacity.
10 | P=4096
11 | R=0 #1 random 0 sequential
12 | G=2
13 | B=64
14 | D=1024
15 | Q=135
16 | PTR=1
17 | G=$2
18 | 
19 | CTRLS=$1
20 | for ((C=1; C<=$CTRLS; C++))
21 | do
22 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
23 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608
24 |     do
25 |         echo "------------------ $T Threads ------------------"
26 |         ./bin/nvm-cache-bench -k $C -p $((1024*1024*2)) -P $P -n 1 -t $T -b $B -d $D -q $Q -T $PTR -e 8589934592 --gpu=$G -r $R
27 |     done
28 | done
29 | 


--------------------------------------------------------------------------------
/benchmarks/cc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (cc-benchmark-module cc-bench "main.cu;")
12 | 
13 | make_benchmark_choice (cc-benchmark cc-benchmark-smartio cc-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/cc/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/cc/run_cc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | 
10 | NUMDATASET=4
11 | declare -a GraphFileArray=(
12 | "/home/vsm2/bafsdata/GAP-kron.bel"
13 | "/home/vsm2/bafsdata/GAP-urand.bel"
14 | "/home/vsm2/bafsdata/com-Friendster.bel"
15 | "/home/vsm2/bafsdata/MOLIERE_2016.bel"
16 | )
17 | # directed graphs and cant do strong component
18 | #"/home/vsm2/bafsdata/uk-2007-05.bel"
19 | #"/home/vsm2/bafsdata/sk-2005.bel"
20 | #"$((1024*1024*1024*320))"
21 | #"$((1024*1024*1024*384))"
22 | declare -a GraphFileOffset=(
23 | "$((1024*1024*1024*0))"
24 | "$((1024*1024*1024*64))"
25 | "$((1024*1024*1024*160))"
26 | "$((1024*1024*1024*224))"
27 | )
28 | 
29 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}"
30 | 
31 | 
32 | CTRL=$1
33 | MEMTYPE=6  #BAFS_DIRECT
34 | GPU=$2
35 | TB=128
36 | 
37 | for ((gfid=0; gfid<NUMDATASET; gfid++))
38 | do
39 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]} located at offset ${GraphFileOffset[gfid]} ++++++++++++++++++"
40 |     for IMPLTYPE in 4 10 #3 9    ##baseline, coalesced, hash, hash coalesced.
41 |     do
42 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
43 |         for ((C=1; C<=$CTRL; C++))
44 |         do
45 |             echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
46 |             for P in 512 4096 8192
47 |             #for P in 512 1024 2048 4096 8192 16384
48 |             do
49 |                 echo "++++++++++++++++++ $P Page size ++++++++++++++++++"
50 |                 ./bin/nvm-cc-bench -f ${GraphFileArray[gfid]} -l ${GraphFileOffset[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --n_ctrls $C -p $P --gpu $GPU --threads $TB
51 |             done
52 |         done
53 |     done
54 | done
55 | 
56 | 


--------------------------------------------------------------------------------
/benchmarks/iodepth-block/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (iodepth-block-benchmark-module iodepth-block-bench "main.cu;")
12 | 
13 | make_benchmark_choice (iodepth-block-benchmark iodepth-block-benchmark-smartio iodepth-block-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/iodepth-block/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | [ $# -ne 1 ] && echo Usage $0 numssd  && exit 1
 3 | 
 4 | set -eux
 5 | P=512
 6 | R=0
 7 | B=64
 8 | G=1
 9 | A=0
10 | RT=50
11 | NB=$((128*1024*1024))
12 | for C in $1
13 | do
14 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
15 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
16 |     do
17 |         echo "------------------ $T Threads ------------------"
18 |         ../../build/bin/nvm-block-bench --threads=$T --blk_size=$B --reqs=1 --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G --num_blks=$NB --access_type=$R
19 |     done
20 | 
21 | done
22 | 


--------------------------------------------------------------------------------
/benchmarks/iodepth-block/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | for i in {1..100000}
 5 | do
 6 |     ../../build/bin/nvm-block-bench --threads=$((1024*256*4)) --blk_size=64 --reqs=1 --pages=$((256*1024*4)) --queue_depth=1024  --page_size=$((512)) --num_blks=$((2097152)) --gpu=0 --n_ctrls=1 --num_queues=128
 7 | 
 8 |     echo "******************** $i *********************"
 9 | done
10 | 


--------------------------------------------------------------------------------
/benchmarks/pagerank/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (pagerank-benchmark-module pagerank-bench "main.cu;")
12 | 
13 | make_benchmark_choice (pagerank-benchmark pagerank-benchmark-smartio pagerank-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/pagerank/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/pagerank/run_pg.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | #No MOILERE as it is multigraph and not right to run PR.
10 | NUMDATASET=5
11 | declare -a GraphFileArray=(
12 | "/home/vsm2/bafsdata/GAP-kron.bel"
13 | "/home/vsm2/bafsdata/GAP-urand.bel"
14 | "/home/vsm2/bafsdata/com-Friendster.bel"
15 | "/home/vsm2/bafsdata/uk-2007-05.bel"
16 | "/home/vsm2/bafsdata/sk-2005.bel"
17 | )
18 | declare -a GraphFileOffset=(
19 | "$((1024*1024*1024*0))"
20 | "$((1024*1024*1024*64))"
21 | "$((1024*1024*1024*160))"
22 | "$((1024*1024*1024*320))"
23 | "$((1024*1024*1024*384))"
24 | )
25 | 
26 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}"
27 | 
28 | 
29 | CTRL=$1
30 | MEMTYPE=6  #BAFS_DIRECT
31 | GPU=$2
32 | TB=128
33 | 
34 | for ((gfid=0; gfid<NUMDATASET; gfid++))
35 | do
36 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]} located at offset ${GraphFileOffset[gfid]} ++++++++++++++++++"
37 |     for IMPLTYPE in 4 10 #3 9    ##baseline, coalesced, hash, hash coalesced
38 |     do
39 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
40 |         for ((C=1; C<=$CTRL; C++))
41 |         do
42 |             echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
43 |             for P in 512 4096 8192
44 |             #for P in 512 1024 2048 4096 8192 16384
45 |             do
46 |                 echo "++++++++++++++++++ $P Page size ++++++++++++++++++"
47 |                 ./bin/nvm-pagerank-bench -f ${GraphFileArray[gfid]} -l ${GraphFileOffset[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --n_ctrls $C -p $P --gpu $GPU --threads $TB
48 |             done
49 |         done
50 |     done
51 | done
52 | 
53 | 


--------------------------------------------------------------------------------
/benchmarks/pattern/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (pattern-benchmark-module pattern-bench "main.cu;")
12 | 
13 | make_benchmark_choice (pattern-benchmark pattern-benchmark-smartio pattern-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/pattern/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/pattern/zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZaidQureshi/bam/0665a4790b357504b079ee6813c7c456858f0128/benchmarks/pattern/zip


--------------------------------------------------------------------------------
/benchmarks/pattern/zip.h:
--------------------------------------------------------------------------------
  1 | #ifndef __ZIP_H__
  2 | #define __ZIP_H__
  3 | /**
  4 |  * Zipf (Zeta) random distribution.
  5 |  *
  6 |  * Implementation taken from drobilla's May 24, 2017 answer to
  7 |  * https://stackoverflow.com/questions/9983239/how-to-generate-zipf-distributed-numbers-efficiently
  8 |  *
  9 |  * That code is referenced with this:
 10 |  * "Rejection-inversion to generate variates from monotone discrete
 11 |  * distributions", Wolfgang Hörmann and Gerhard Derflinger
 12 |  * ACM TOMACS 6.3 (1996): 169-184
 13 |  *
 14 |  * Note that the Hörmann & Derflinger paper, and the stackoverflow
 15 |  * code base incorrectly names the paramater as `q`, when they mean `s`.
 16 |  * Thier `q` has nothing to do with the q-series. The names in the code
 17 |  * below conform to conventions.
 18 |  *
 19 |  * Example usage:
 20 |  *
 21 |  *    std::random_device rd;
 22 |  *    std::mt19937 gen(rd());
 23 |  *    zipf_distribution<> zipf(300);
 24 |  *
 25 |  *    for (int i = 0; i < 100; i++)
 26 |  *        printf("draw %d %d\n", i, zipf(gen));
 27 |  */
 28 | 
 29 | 
 30 | #include <type_traits>
 31 | #include <random>
 32 | #include <limits>
 33 | #include <stdexcept>
 34 | #include <cmath>
 35 | 
 36 | 
 37 | template<class IntType = unsigned long, class RealType = double>
 38 | class zipf_distribution
 39 | {
 40 |    public:
 41 |       typedef IntType result_type;
 42 | 
 43 |       static_assert(std::numeric_limits<IntType>::is_integer, "");
 44 |       static_assert(!std::numeric_limits<RealType>::is_integer, "");
 45 | 
 46 |       /// zipf_distribution(N, s, q)
 47 |       /// Zipf distribution for `N` items, in the range `[1,N]` inclusive.
 48 |       /// The distribution follows the power-law 1/(n+q)^s with exponent
 49 |       /// `s` and Hurwicz q-deformation `q`.
 50 |       zipf_distribution(const IntType n=std::numeric_limits<IntType>::max(),
 51 |                         const RealType s=1.0,
 52 |                         const RealType q=0.0)
 53 |          : n(n)
 54 |          , _s(s)
 55 |          , _q(q)
 56 |          , oms(1.0-s)
 57 |          , spole(abs(oms) < epsilon)
 58 |          , rvs(spole ? 0.0 : 1.0/oms)
 59 |          , H_x1(H(1.5) - h(1.0))
 60 |          , H_n(H(n + 0.5))
 61 |          , cut(1.0 - H_inv(H(1.5) - h(1.0)))
 62 |          , dist(H_x1, H_n)
 63 |       {
 64 |          if (-0.5 >= q)
 65 |             throw std::runtime_error("Range error: Parameter q must be greater than -0.5!");
 66 |       }
 67 |       void reset() {}
 68 | 
 69 |       IntType operator()(std::mt19937& rng)
 70 |       {
 71 |          while (true)
 72 |          {
 73 |             const RealType u = dist(rng);
 74 |             const RealType x = H_inv(u);
 75 |             const IntType  k = std::round(x);
 76 |             if (k - x <= cut) return k;
 77 |             if (u >= H(k + 0.5) - h(k))
 78 |                return k;
 79 |          }
 80 |       }
 81 | 
 82 |       /// Returns the parameter the distribution was constructed with.
 83 |       RealType s() const { return _s; }
 84 |       /// Returns the Hurwicz q-deformation parameter.
 85 |       RealType q() const { return _q; }
 86 |       /// Returns the minimum value potentially generated by the distribution.
 87 |       result_type min() const { return 1; }
 88 |       /// Returns the maximum value potentially generated by the distribution.
 89 |       result_type max() const { return n; }
 90 | 
 91 | 
 92 |    private:
 93 |       IntType    n;     ///< Number of elements
 94 |       RealType   _s;    ///< Exponent
 95 |       RealType   _q;    ///< Deformation
 96 |       RealType   oms;   ///< 1-s
 97 |       bool       spole; ///< true if s near 1.0
 98 |       RealType   rvs;   ///< 1/(1-s)
 99 |       RealType   H_x1;  ///< H(x_1)
100 |       RealType   H_n;   ///< H(n)
101 |       RealType   cut;   ///< rejection cut
102 |       std::uniform_real_distribution<RealType> dist;  ///< [H(x_1), H(n)]
103 | 
104 |       // This provides 16 decimal places of precision,
105 |       // i.e. good to (epsilon)^4 / 24 per expanions log, exp below.
106 |       static constexpr RealType epsilon = 2e-5;
107 | 
108 |       /** (exp(x) - 1) / x */
109 |       static double
110 |       expxm1bx(const double x)
111 |       {
112 |          if (std::abs(x) > epsilon)
113 |             return std::expm1(x) / x;
114 |          return (1.0 + x/2.0 * (1.0 + x/3.0 * (1.0 + x/4.0)));
115 |       }
116 | 
117 |       /** log(1 + x) / x */
118 |       static RealType
119 |       log1pxbx(const RealType x)
120 |       {
121 |          if (std::abs(x) > epsilon)
122 |             return std::log1p(x) / x;
123 |          return 1.0 - x * ((1/2.0) - x * ((1/3.0) - x * (1/4.0)));
124 |       }
125 |       /**
126 |        * The hat function h(x) = 1/(x+q)^s
127 |        */
128 |       const RealType h(const RealType x)
129 |       {
130 |          return std::pow(x + _q, -_s);
131 |       }
132 | 
133 |       /**
134 |        * H(x) is an integral of h(x).
135 |        *     H(x) = [(x+q)^(1-s) - (1+q)^(1-s)] / (1-s)
136 |        * and if s==1 then
137 |        *     H(x) = log(x+q) - log(1+q)
138 |        *
139 |        * Note that the numerator is one less than in the paper
140 |        * order to work with all s. Unfortunately, the naive
141 |        * implementation of the above hits numerical underflow
142 |        * when q is larger than 10 or so, so we split into
143 |        * different regimes.
144 |        *
145 |        * When q != 0, we shift back to what the paper defined:
146 | 
147 |        *    H(x) = (x+q)^{1-s} / (1-s)
148 |        * and for q != 0 and also s==1, use
149 |        *    H(x) = [exp{(1-s) log(x+q)} - 1] / (1-s)
150 |        */
151 |       const RealType H(const RealType x)
152 |       {
153 |          if (not spole)
154 |             return std::pow(x + _q, oms) / oms;
155 | 
156 |          const RealType log_xpq = std::log(x + _q);
157 |          return log_xpq * expxm1bx(oms * log_xpq);
158 |       }
159 | 
160 |       /**
161 |        * The inverse function of H(x).
162 |        *    H^{-1}(y) = [(1-s)y + (1+q)^{1-s}]^{1/(1-s)} - q
163 |        * Same convergence issues as above; two regimes.
164 |        *
165 |        * For s far away from 1.0 use the paper version
166 |        *    H^{-1}(y) = -q + (y(1-s))^{1/(1-s)}
167 |        */
168 |       const RealType H_inv(const RealType y)
169 |       {
170 |          if (not spole)
171 |             return std::pow(y * oms, rvs) - _q;
172 | 
173 |          return std::exp(y * log1pxbx(oms * y)) - _q;
174 |       }
175 | };
176 | 
177 | #endif
178 | 


--------------------------------------------------------------------------------
/benchmarks/pattern/zip_test.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | /**
 3 |  * Zipf (Zeta) random distribution.
 4 |  *
 5 |  * Implementation taken from drobilla's May 24, 2017 answer to
 6 |  * https://stackoverflow.com/questions/9983239/how-to-generate-zipf-distributed-numbers-efficiently
 7 |  *
 8 |  * That code is referenced with this:
 9 |  * "Rejection-inversion to generate variates from monotone discrete
10 |  * distributions", Wolfgang Hörmann and Gerhard Derflinger
11 |  * ACM TOMACS 6.3 (1996): 169-184
12 |  *
13 |  * Note that the Hörmann & Derflinger paper, and the stackoverflow
14 |  * code base incorrectly names the paramater as `q`, when they mean `s`.
15 |  * Thier `q` has nothing to do with the q-series. The names in the code
16 |  * below conform to conventions.
17 |  *
18 |  * Example usage:
19 |  *
20 |  *    std::random_device rd;
21 |  *    std::mt19937 gen(rd());
22 |  *    zipf_distribution<> zipf(300);
23 |  *
24 |  *    for (int i = 0; i < 100; i++)
25 |  *        printf("draw %d %d\n", i, zipf(gen));
26 |  */
27 | 
28 | 
29 | #include <iostream> 
30 | #include <zip.h>
31 | 
32 | 
33 | int main(){
34 | 
35 |       std::random_device rd;
36 |       std::mt19937 gen(rd());
37 |       zipf_distribution<uint64_t> zipf(100, 0.5); //number of unique keys
38 |   
39 |       for (int i = 0; i < 10000; i++) //number of values to draw from the unique keys.
40 |           printf("i: %d val: %llu\n", i, zipf(gen));
41 | }
42 | 


--------------------------------------------------------------------------------
/benchmarks/readwrite/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.10)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 10.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (readwrite-benchmark-module readwrite-bench "main.cu;")
12 | 
13 | make_benchmark_choice (readwrite-benchmark readwrite-benchmark-smartio readwrite-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/readwrite/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=512
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | R=true
10 | A=2
11 | RT=50
12 | for C in 1 2 3 4 5 6 7
13 | do
14 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
15 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
16 |     do
17 |         echo "------------------ $T Threads ------------------"
18 |         ../../build/bin/nvm-block-bench --threads=$T --blk_size=$B --reqs=1 --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G --access_type=$A --ratio=$RT --num_blks=$B --random=$R | grep "Ops"
19 |     done
20 | 
21 | done
22 | 


--------------------------------------------------------------------------------
/benchmarks/reduction/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (reduction-benchmark-module reduction-bench "main.cu;")
12 | 
13 | make_benchmark_choice (reduction-benchmark reduction-benchmark-smartio reduction-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/reduction/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/scan/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (scan-benchmark-module scan-bench "main.cu;")
12 | 
13 | make_benchmark_choice (scan-benchmark scan-benchmark-smartio scan-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/scan/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/sssp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (sssp-benchmark-module sssp-bench "main.cu;")
12 | 
13 | make_benchmark_choice (sssp-benchmark sssp-benchmark-smartio sssp-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/sssp/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/sssp/run_sssp.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | 
10 | NUMDATASET=5 #1 dataset MOLIERE is a floating type. Will be used separately later.
11 | declare -a GraphFileArray=(
12 | "/home/vsm2/bafsdata/GAP-kron.bel"
13 | "/home/vsm2/bafsdata/GAP-urand.bel"
14 | "/home/vsm2/bafsdata/com-Friendster.bel"
15 | "/home/vsm2/bafsdata/uk-2007-05.bel"
16 | "/home/vsm2/bafsdata/sk-2005.bel"
17 | "/home/vsm2/bafsdata/MOLIERE_2016.bel"
18 | )
19 | declare -a GraphFileOffset=(
20 | "$((1024*1024*1024*0))"
21 | "$((1024*1024*1024*64))"
22 | "$((1024*1024*1024*160))"
23 | "$((1024*1024*1024*320))"
24 | "$((1024*1024*1024*384))"
25 | "$((1024*1024*1024*224))"
26 | )
27 | 
28 | declare -a GraphWeightOffset=(
29 | "$((1024*1024*1024*32))"
30 | "$((1024*1024*1024*128))"
31 | "$((1024*1024*1024*192))"
32 | "$((1024*1024*1024*352))"
33 | "$((1024*1024*1024*416))"
34 | "$((1024*1024*1024*288))"
35 | )
36 | 
37 | 
38 | declare -a GraphRootNode=(
39 | "58720242"
40 | "58720256"
41 | "28703654"
42 | "46329738"
43 | "37977096"
44 | "13229860"
45 | )
46 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}"
47 | 
48 | 
49 | CTRL=$1
50 | MEMTYPE=6  #BAFS_DIRECT
51 | GPU=$2
52 | TB=128
53 | 
54 | for ((gfid=0; gfid<NUMDATASET; gfid++))
55 | do
56 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]} located at offset ${GraphFileOffset[gfid]} and weight at ${GraphWeightOffset[gfid]}++++++++++++++++++"
57 |     for IMPLTYPE in 4 #3 8 9    ##baseline, coalesced, frontier, frontier coaslesced.
58 |     do
59 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
60 |         for ((C=1; C<=$CTRL; C++))
61 |         do
62 |             echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
63 |             for P in 512 4096 8192
64 |             #for P in 512 1024 2048 4096 8192 16384
65 |             do
66 |                 echo "++++++++++++++++++ $P Page size ++++++++++++++++++"
67 |                 ./bin/nvm-sssp-bench -f ${GraphFileArray[gfid]} -l ${GraphFileOffset[gfid]} -w ${GraphWeightOffset[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --src ${GraphRootNode[gfid]} --n_ctrls $C -p $P --gpu $GPU --threads $TB
68 |             done
69 |         done
70 |     done
71 | done
72 | 
73 | 
74 | 
75 | echo "++++++++++++++++++ ${GraphFileArray[6]} located at offset ${GraphFileOffset[6]} and weight at ${GraphWeightOffset[6]}++++++++++++++++++"
76 | for IMPLTYPE in 4 #3 8 9    ##baseline, coalesced, frontier, frontier coaslesced.
77 | do
78 |     echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
79 |     for ((C=1; C<=$CTRL; C++))
80 |     do
81 |         echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
82 |         for P in 512 4096 8192
83 |         #for P in 512 1024 2048 4096 8192 16384
84 |         do
85 |             echo "++++++++++++++++++ $P Page size ++++++++++++++++++"
86 |             ./bin/nvm-sssp_float-bench -f ${GraphFileArray[6]} -l ${GraphFileOffset[6]} -w ${GraphWeightOffset[6]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --src ${GraphRootNode[6]} --n_ctrls $C -p $P --gpu $GPU --threads $TB
87 |         done
88 |     done
89 | done
90 | 


--------------------------------------------------------------------------------
/benchmarks/sssp_float/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (sssp_float-benchmark-module sssp_float-bench "main.cu;")
12 | 
13 | make_benchmark_choice (sssp_float-benchmark sssp_float-benchmark-smartio sssp_float-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/sssp_float/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/benchmarks/vectoradd/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-benchmarks)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | find_package (CUDA 8.0 REQUIRED)
 8 | 
 9 | 
10 | 
11 | make_benchmark (vectoradd-benchmark-module vectoradd-bench "main.cu;")
12 | 
13 | make_benchmark_choice (vectoradd-benchmark vectoradd-benchmark-smartio vectoradd-benchmark-module)
14 | 


--------------------------------------------------------------------------------
/benchmarks/vectoradd/benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -eux
 3 | 
 4 | 
 5 | P=4096
 6 | R=1
 7 | B=1024
 8 | G=2
 9 | for C in 1 2 3 4 5 6 7
10 | do
11 |     echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
12 |     for T in 1024 2048 4096 8192 16384 32768 65536 131072 262144
13 |     do
14 |         echo "------------------ $T Threads ------------------"
15 |         ../../build/bin/nvm-cuda-bench --threads=$T --blk_size=$B --reqs=$R --pages=$T --queue_depth=1024 --num_queues=128 --page_size=$P --n_ctrls=$C --gpu=$G | grep "IO"
16 |     done
17 | 
18 | done
19 | 


--------------------------------------------------------------------------------
/deprecated/examples/README.md:
--------------------------------------------------------------------------------
1 | Collection of example programs using the library
2 | 


--------------------------------------------------------------------------------
/deprecated/examples/identify/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-samples)
 3 | 
 4 | make_sample (identify-userspace identify-userspace "userspace.c;common.c")
 5 | 
 6 | make_sample (identify-module identify "module.c;common.c")
 7 | 
 8 | make_sample (identify-smartio identify "smartio.c;common.c")
 9 | set_sisci (identify-smartio)
10 | 
11 | make_samples_choice(identify identify-smartio identify-module)
12 | 


--------------------------------------------------------------------------------
/deprecated/examples/identify/common.c:
--------------------------------------------------------------------------------
  1 | #include <nvm_types.h>
  2 | #include <nvm_aq.h>
  3 | #include <nvm_admin.h>
  4 | #include <nvm_error.h>
  5 | #include <stdint.h>
  6 | #include <string.h>
  7 | #include <stdio.h>
  8 | #include "common.h"
  9 | 
 10 | 
 11 | 
 12 | /*
 13 |  * Print controller information.
 14 |  */
 15 | static void print_ctrl_info(FILE* fp, const struct nvm_ctrl_info* info, uint16_t n_cqs, uint16_t n_sqs)
 16 | {
 17 |     unsigned char vendor[4];
 18 |     memcpy(vendor, &info->pci_vendor, sizeof(vendor));
 19 | 
 20 |     char serial[21];
 21 |     memset(serial, 0, 21);
 22 |     memcpy(serial, info->serial_no, 20);
 23 | 
 24 |     char model[41];
 25 |     memset(model, 0, 41);
 26 |     memcpy(model, info->model_no, 40);
 27 | 
 28 |     char revision[9];
 29 |     memset(revision, 0, 9);
 30 |     memcpy(revision, info->firmware, 8);
 31 | 
 32 |     fprintf(fp, "------------- Controller information -------------\n");
 33 |     fprintf(fp, "PCI Vendor ID           : %x %x\n", vendor[0], vendor[1]);
 34 |     fprintf(fp, "PCI Subsystem Vendor ID : %x %x\n", vendor[2], vendor[3]);
 35 |     fprintf(fp, "NVM Express version     : %u.%u.%u\n",
 36 |             info->nvme_version >> 16, (info->nvme_version >> 8) & 0xff, info->nvme_version & 0xff);
 37 |     fprintf(fp, "Controller page size    : %zu B (0x%zx)\n", info->page_size, info->page_size);
 38 |     fprintf(fp, "Max queue entries       : %u\n", info->max_entries);
 39 |     fprintf(fp, "Serial Number           : %s\n", serial);
 40 |     fprintf(fp, "Model Number            : %s\n", model);
 41 |     fprintf(fp, "Firmware revision       : %s\n", revision);
 42 |     fprintf(fp, "Max data transfer size  : %zu B (%zu KiB)\n", info->max_data_size, info->max_data_size >> 10);
 43 |     fprintf(fp, "Max outstanding commands: %zu\n", info->max_out_cmds);
 44 |     fprintf(fp, "Max number of namespaces: %zu\n", info->max_n_ns);
 45 |     fprintf(fp, "Current number of CQs   : %u\n", n_cqs);
 46 |     fprintf(fp, "Current number of SQs   : %u\n", n_sqs);
 47 |     fprintf(fp, "--------------------------------------------------\n");
 48 | }
 49 | 
 50 | 
 51 | /*
 52 |  * Print namespace information.
 53 |  */
 54 | static void print_ns_info(FILE* fp, const struct nvm_ns_info* info)
 55 | {
 56 |     fprintf(fp, "------------- Namespace  information -------------\n");
 57 |     fprintf(fp, "Namespace identifier    : %x\n", info->ns_id);
 58 |     fprintf(fp, "Logical block size      : %zu bytes\n", info->lba_data_size);
 59 |     fprintf(fp, "Namespace size          : %zu blocks\n", info->size);
 60 |     fprintf(fp, "Namespace capacity      : %zu blocks\n", info->capacity);
 61 |     fprintf(fp, "--------------------------------------------------\n");
 62 | }
 63 | 
 64 | 
 65 | 
 66 | nvm_aq_ref reset_ctrl(const nvm_ctrl_t* ctrl, const nvm_dma_t* dma_window)
 67 | {
 68 |     int status;
 69 |     nvm_aq_ref admin;
 70 | 
 71 |     if (dma_window->n_ioaddrs < 2)
 72 |     {
 73 |         return NULL;
 74 |     }
 75 |     memset((void*) dma_window->vaddr, 0, dma_window->page_size * 2);
 76 | 
 77 |     fprintf(stderr, "Resetting controller and setting up admin queues...\n");
 78 |     status = nvm_aq_create(&admin, ctrl, dma_window);
 79 |     if (status != 0)
 80 |     {
 81 |         fprintf(stderr, "Failed to reset controller: %s\n", strerror(status));
 82 |         return NULL;
 83 |     }
 84 | 
 85 |     fprintf(stderr, "Admin queues OK\n");
 86 |     return admin;
 87 | }
 88 | 
 89 | 
 90 | 
 91 | int identify_ctrl(nvm_aq_ref admin, void* ptr, uint64_t ioaddr)
 92 | {
 93 |     int status;
 94 |     uint16_t n_cqs = 0;
 95 |     uint16_t n_sqs = 0;
 96 |     struct nvm_ctrl_info info;
 97 | 
 98 |     fprintf(stderr, "Getting number of queues...\n");
 99 |     status = nvm_admin_get_num_queues(admin, &n_cqs, &n_sqs);
100 |     if (status != 0)
101 |     {
102 |         fprintf(stderr, "Failed to get number of queues\n");
103 |         return status;
104 |     }
105 | 
106 |     fprintf(stderr, "Identifying controller...\n");
107 |     status = nvm_admin_ctrl_info(admin, &info, ptr, ioaddr);
108 |     if (!nvm_ok(status))
109 |     {
110 |         fprintf(stderr, "Failed to identify controller: %s\n", nvm_strerror(status));
111 |         return status;
112 |     }
113 | 
114 |     print_ctrl_info(stdout, &info, n_cqs, n_sqs);
115 |     return 0;
116 | }
117 | 
118 | 
119 | 
120 | int identify_ns(nvm_aq_ref admin, uint32_t nvm_namespace, void* ptr, uint64_t ioaddr)
121 | {
122 |     int status;
123 |     struct nvm_ns_info info;
124 | 
125 |     status = nvm_admin_ns_info(admin, &info, nvm_namespace, ptr, ioaddr);
126 |     if (status != 0)
127 |     {
128 |         fprintf(stderr, "Failed to identify namespace: %s\n", strerror(status));
129 |         return status;
130 |     }
131 | 
132 |     print_ns_info(stdout, &info);
133 |     return 0;
134 | }
135 | 
136 | 


--------------------------------------------------------------------------------
/deprecated/examples/identify/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_SAMPLES_IDENTIFY_H__
 2 | #define __LIBNVM_SAMPLES_IDENTIFY_H__
 3 | 
 4 | #include <nvm_types.h>
 5 | #include <stdint.h>
 6 | 
 7 | 
 8 | /*
 9 |  * Reset controller and take ownership of admin queues.
10 |  * DMA window must be at least 2 pages.
11 |  * Caller must manually destroy the admin reference.
12 |  */
13 | nvm_aq_ref reset_ctrl(const nvm_ctrl_t* ctrl, const nvm_dma_t* dma_window);
14 | 
15 | 
16 | /*
17 |  * Identify controller and print information.
18 |  */
19 | int identify_ctrl(nvm_aq_ref admin, void* ptr, uint64_t ioaddr);
20 | 
21 | 
22 | /*
23 |  * Identify namespace and print information.
24 |  */
25 | int identify_ns(nvm_aq_ref admin, uint32_t nvm_namespace, void* ptr, uint64_t ioaddr);
26 | 
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/deprecated/examples/identify/module.c:
--------------------------------------------------------------------------------
  1 | #include <nvm_types.h>
  2 | #include <nvm_ctrl.h>
  3 | #include <nvm_dma.h>
  4 | #include <nvm_aq.h>
  5 | #include <nvm_admin.h>
  6 | #include <nvm_util.h>
  7 | #include <stddef.h>
  8 | #include <stdint.h>
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <stdbool.h>
 12 | #include <unistd.h>
 13 | #include <fcntl.h>
 14 | #include <sys/ioctl.h>
 15 | #include <getopt.h>
 16 | #include <string.h>
 17 | #include <errno.h>
 18 | #include "common.h"
 19 | 
 20 | 
 21 | 
 22 | static void parse_args(int argc, char** argv, char** device, uint32_t* ns_id);
 23 | 
 24 | 
 25 | 
 26 | static int open_fd(const char* path)
 27 | {
 28 |     int fd;
 29 | 
 30 |     fd = open(path, O_RDWR|O_NONBLOCK);
 31 |     if (fd < 0)
 32 |     {
 33 |         fprintf(stderr, "Failed to open descriptor: %s\n", strerror(errno));
 34 |         return -1;
 35 |     }
 36 | 
 37 |     return fd;
 38 | }
 39 | 
 40 | 
 41 | int main(int argc, char** argv)
 42 | {
 43 |     int status;
 44 |     nvm_ctrl_t* ctrl;
 45 |     nvm_dma_t* window = NULL;
 46 |     nvm_aq_ref admin = NULL;
 47 |     uint32_t nvm_namespace;
 48 |     void* memory;
 49 | 
 50 |     long page_size = sysconf(_SC_PAGESIZE);
 51 | 
 52 |     char* path = NULL;
 53 |     parse_args(argc, argv, &path, &nvm_namespace);
 54 | 
 55 |     int fd = open_fd(path);
 56 |     if (fd < 0)
 57 |     {
 58 |         exit(1);
 59 |     }
 60 | 
 61 |     status = nvm_ctrl_init(&ctrl, fd);
 62 |     if (status != 0)
 63 |     {
 64 |         close(fd);
 65 |         fprintf(stderr, "Failed to get controller reference: %s\n", strerror(status));
 66 |         exit(1);
 67 |     }
 68 | 
 69 |     close(fd);
 70 | 
 71 |     status = posix_memalign(&memory, ctrl->page_size, 3 * page_size);
 72 |     if (status != 0)
 73 |     {
 74 |         fprintf(stderr, "Failed to allocate page-aligned memory: %s\n", strerror(status));
 75 |         nvm_ctrl_free(ctrl);
 76 |         exit(2);
 77 |     }
 78 | 
 79 |     status = nvm_dma_map_host(&window, ctrl, memory, 3 * page_size);
 80 |     if (status != 0)
 81 |     {
 82 |         free(memory);
 83 |         nvm_ctrl_free(ctrl);
 84 |         exit(1);
 85 |     }
 86 | 
 87 |     admin = reset_ctrl(ctrl, window);
 88 |     if (admin == NULL)
 89 |     {
 90 |         status = 1;
 91 |         goto leave;
 92 |     }
 93 | 
 94 |     status = identify_ctrl(admin, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]);
 95 |     if (status != 0)
 96 |     {
 97 |         goto leave;
 98 |     }
 99 | 
100 |     if (nvm_namespace != 0)
101 |     {
102 |         status = identify_ns(admin, nvm_namespace, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]);
103 |     }
104 | 
105 | leave:
106 |     nvm_aq_destroy(admin);
107 |     nvm_dma_unmap(window);
108 |     free(memory);
109 |     nvm_ctrl_free(ctrl);    
110 | 
111 |     fprintf(stderr, "Goodbye!\n");
112 |     exit(status);
113 | }
114 | 
115 | 
116 | static void give_usage(const char* name)
117 | {
118 |     fprintf(stderr, "Usage: %s --ctrl=<dev id>\n", name);
119 | }
120 | 
121 | 
122 | static void show_help(const char* name)
123 | {
124 |     give_usage(name);
125 |     fprintf(stderr, "\nCreate a manager and run an IDENTIFY CONTROLLER NVM admin command.\n\n"
126 |             "    --ctrl     <path>          Path to controller device (/dev/libnvmXXX).\n"
127 |             "    --ns       <namespace>     Show information about NVM namespace.\n"
128 |             "    --help                     Show this information.\n\n");
129 | }
130 | 
131 | 
132 | static void parse_args(int argc, char** argv, char** dev, uint32_t* ns_id)
133 | {
134 |     static struct option opts[] = {
135 |         { "help", no_argument, NULL, 'h' },
136 |         { "ctrl", required_argument, NULL, 'c' },
137 |         { "ns", required_argument, NULL, 'n' },
138 |         { NULL, 0, NULL, 0 }
139 |     };
140 | 
141 |     int opt;
142 |     int idx;
143 |     char* endptr = NULL;
144 | 
145 |     *dev = NULL;
146 |     *ns_id = 0;
147 | 
148 |     while ((opt = getopt_long(argc, argv, ":hc:n:", opts, &idx)) != -1)
149 |     {
150 |         switch (opt)
151 |         {
152 |             case '?': // unknown option
153 |                 fprintf(stderr, "Unknown option: `%s'\n", argv[optind - 1]);
154 |                 give_usage(argv[0]);
155 |                 exit('?');
156 | 
157 |             case ':': // missing option argument
158 |                 fprintf(stderr, "Missing argument for option: `%s'\n", argv[optind - 1]);
159 |                 give_usage(argv[0]);
160 |                 exit(':');
161 | 
162 |             case 'c': // device identifier
163 |                 *dev = optarg;
164 |                 break;
165 | 
166 |             case 'n': // namespace identifier
167 |                 *ns_id = strtoul(optarg, &endptr, 0);
168 |                 if (endptr == NULL || *endptr != '\0')
169 |                 {
170 |                     fprintf(stderr, "Invalid NVM namespace");
171 |                     give_usage(argv[0]);
172 |                     exit('n');
173 |                 }
174 |                 break;
175 | 
176 |             case 'h':
177 |                 show_help(argv[0]);
178 |                 exit(0);
179 |         }
180 |     }
181 | 
182 |     if (*dev == NULL)
183 |     {
184 |         fprintf(stderr, "Controller is not set!\n");
185 |         give_usage(argv[0]);
186 |         exit('c');
187 |     }
188 | }
189 | 


--------------------------------------------------------------------------------
/deprecated/examples/identify/smartio.c:
--------------------------------------------------------------------------------
  1 | #include <nvm_dma.h>
  2 | #include <nvm_types.h>
  3 | #include <nvm_aq.h>
  4 | #include <nvm_admin.h>
  5 | #include <nvm_ctrl.h>
  6 | #include <nvm_util.h>
  7 | #include <sisci_types.h>
  8 | #include <sisci_api.h>
  9 | #include <getopt.h>
 10 | #include <stdint.h>
 11 | #include <stddef.h>
 12 | #include <stdlib.h>
 13 | #include <stdio.h>
 14 | #include <stdbool.h>
 15 | #include <errno.h>
 16 | #include <string.h>
 17 | #include <limits.h>
 18 | #include "common.h"
 19 | 
 20 | 
 21 | /*
 22 |  * Command line arguments.
 23 |  */
 24 | struct cl_args
 25 | {
 26 |     uint64_t    dev_id;
 27 |     uint32_t    namespace_id;
 28 | };
 29 | 
 30 | 
 31 | static void parse_args(int argc, char** argv, struct cl_args* args);
 32 | 
 33 | 
 34 | int main(int argc, char** argv)
 35 | {
 36 |     sci_error_t err;
 37 |     struct cl_args args;
 38 | 
 39 |     parse_args(argc, argv, &args);
 40 | 
 41 |     SCIInitialize(0, &err);
 42 | 
 43 |     nvm_ctrl_t* ctrl;
 44 |     int status = nvm_dis_ctrl_init(&ctrl, args.dev_id);
 45 |     if (status != 0)
 46 |     {
 47 |         fprintf(stderr, "Failed to initialize controller reference: %s\n", strerror(status));
 48 |         exit(status);
 49 |     }
 50 | 
 51 |     nvm_dma_t* window;
 52 |     status = nvm_dis_dma_create(&window, ctrl, 3 * 0x1000, 0);
 53 |     if (status != 0)
 54 |     {
 55 |         nvm_ctrl_free(ctrl);
 56 |         fprintf(stderr, "Failed to create local segment: %s\n", strerror(status));
 57 |         exit(status);
 58 |     }
 59 |     memset(window->vaddr, 0, 3 * 0x1000);
 60 | 
 61 |     nvm_aq_ref aq;
 62 |     aq = reset_ctrl(ctrl, window);
 63 |     if (aq == NULL)
 64 |     {
 65 |         status = 1;
 66 |         goto leave;
 67 |     }
 68 | 
 69 |     status = identify_ctrl(aq, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]);
 70 |     if (status != 0)
 71 |     {
 72 |         goto leave;
 73 |     }
 74 | 
 75 |     if (args.namespace_id != 0)
 76 |     {
 77 |         status = identify_ns(aq, args.namespace_id, NVM_DMA_OFFSET(window, 2), window->ioaddrs[2]);
 78 |     }
 79 | 
 80 | leave:
 81 |     nvm_aq_destroy(aq);
 82 |     nvm_dma_unmap(window);
 83 |     nvm_ctrl_free(ctrl);
 84 |     SCITerminate();
 85 | 
 86 |     fprintf(stderr, "Goodbye!\n");
 87 |     exit(status);
 88 | }
 89 | 
 90 | 
 91 | static int parse_u64(const char* str, uint64_t* num, int base)
 92 | {
 93 |     char* endptr = NULL;
 94 |     uint64_t ul = strtoul(str, &endptr, base);
 95 | 
 96 |     if (endptr == NULL || *endptr != '\0')
 97 |     {
 98 |         return EINVAL;
 99 |     }
100 | 
101 |     *num = ul;
102 |     return 0;
103 | }
104 | 
105 | 
106 | static int parse_u32(const char* str, uint32_t* num, int base)
107 | {
108 |     int status;
109 |     uint64_t ul;
110 | 
111 |     status = parse_u64(str, &ul, base);
112 | 
113 |     if (status != 0 || ul > UINT_MAX)
114 |     {
115 |         return EINVAL;
116 |     }
117 | 
118 |     *num = (uint32_t) ul;
119 |     return status;
120 | }
121 | 
122 | 
123 | static void give_usage(const char* name)
124 | {
125 |     fprintf(stderr, "Usage: %s --ctrl=<dev id> [--adapter=<adapter>] [--id=<segment id>]\n", name);
126 | }
127 | 
128 | 
129 | static void show_help(const char* name)
130 | {
131 |     give_usage(name);
132 |     fprintf(stderr, "\nCreate a manager and run an IDENTIFY CONTROLLER NVM admin command.\n\n"
133 |             "    --ctrl     <fdid>          SmartIO device identifier (fabric device id).\n"
134 |             "    --ns       <namespace id>  Show information about NVM namespace.\n"
135 |             "    --help                     Show this information.\n\n");
136 | }
137 | 
138 | 
139 | 
140 | 
141 | static void parse_args(int argc, char** argv, struct cl_args* args)
142 | {
143 |     static struct option opts[] = {
144 |         { "help", no_argument, NULL, 'h' },
145 |         { "ctrl", required_argument, NULL, 'c' },
146 |         { "ns", required_argument, NULL, 'n' },
147 |         { "segment", required_argument, NULL, 's' },
148 |         { NULL, 0, NULL, 0 }
149 |     };
150 | 
151 |     int opt;
152 |     int idx;
153 | 
154 |     bool dev_set = false;
155 |     args->dev_id = 0;
156 |     args->namespace_id = 0;
157 | 
158 |     while ((opt = getopt_long(argc, argv, ":hc:n:", opts, &idx)) != -1)
159 |     {
160 |         switch (opt)
161 |         {
162 |             case '?': // unknown option
163 |                 fprintf(stderr, "Unknown option: `%s'\n", argv[optind - 1]);
164 |                 give_usage(argv[0]);
165 |                 exit('?');
166 | 
167 |             case ':': // missing option argument
168 |                 fprintf(stderr, "Missing argument for option: `%s'\n", argv[optind - 1]);
169 |                 give_usage(argv[0]);
170 |                 exit(':');
171 | 
172 |             case 'c': // device identifier
173 |                 dev_set = true;
174 |                 if (parse_u64(optarg, &args->dev_id, 16) != 0)
175 |                 {
176 |                     give_usage(argv[0]);
177 |                     exit('c');
178 |                 }
179 |                 break;
180 | 
181 |             case 'n':
182 |                 if (parse_u32(optarg, &args->namespace_id, 0) != 0)
183 |                 {
184 |                     give_usage(argv[0]);
185 |                     exit('n');
186 |                 }
187 |                 break;
188 | 
189 |             case 'h':
190 |                 show_help(argv[0]);
191 |                 exit(0);
192 |         }
193 |     }
194 | 
195 |     if (!dev_set)
196 |     {
197 |         fprintf(stderr, "Device ID is not set!\n");
198 |         give_usage(argv[0]);
199 |         exit('c');
200 |     }
201 | }
202 | 


--------------------------------------------------------------------------------
/deprecated/examples/integrity/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-samples)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | make_sample (integrity-smartio integrity-util "integrity.c;transfer.c;util.c")
 8 | set_multithread (integrity-smartio)
 9 | set_sisci (integrity-smartio)
10 | 
11 | 
12 | make_sample (integrity-module integrity-util "integrity.c;transfer.c;util.c")
13 | set_multithread (integrity-module)
14 | 
15 | 
16 | make_samples_choice (integrity-util integrity-smartio integrity-module)
17 | add_custom_target (integrity DEPENDS integrity-util)
18 | 


--------------------------------------------------------------------------------
/deprecated/examples/integrity/integrity.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_SAMPLES_INTEGRITY_H__
 2 | #define __LIBNVM_SAMPLES_INTEGRITY_H__
 3 | 
 4 | #include <nvm_types.h>
 5 | #include <stdio.h>
 6 | #include <stdint.h>
 7 | 
 8 | 
 9 | /* Memory descriptor */
10 | struct buffer
11 | {
12 |     void*                   buffer;
13 |     nvm_dma_t*              dma;
14 | };
15 | 
16 | 
17 | /* Queue descriptor */
18 | struct queue
19 | {
20 |     struct buffer           qmem;
21 |     nvm_queue_t             queue;
22 |     size_t                  counter;
23 | };
24 | 
25 | 
26 | /* Disk descriptor */
27 | struct disk
28 | {
29 |     size_t      page_size;
30 |     size_t      max_data_size;
31 |     uint32_t    ns_id;
32 |     size_t      block_size;
33 | };
34 | 
35 | 
36 | int create_buffer(struct buffer* b, nvm_aq_ref, size_t size);
37 | 
38 | 
39 | void remove_buffer(struct buffer* b);
40 | 
41 | 
42 | 
43 | int create_queue(struct queue* q, nvm_aq_ref ref, const struct queue* cq, uint16_t qno);
44 | 
45 | 
46 | void remove_queue(struct queue* q);
47 | 
48 | 
49 | 
50 | int disk_write(const struct disk* disk, struct buffer* buffer, struct queue* queues, uint16_t n_queues, FILE* fp, off_t size);
51 | 
52 | int disk_read(const struct disk* disk, struct buffer* buffer, struct queue* queues, uint16_t n_queues, FILE* fp, off_t size);
53 | 
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/deprecated/examples/integrity/util.c:
--------------------------------------------------------------------------------
 1 | #include <nvm_ctrl.h>
 2 | #include <nvm_dma.h>
 3 | #include <nvm_admin.h>
 4 | #include <nvm_error.h>
 5 | #include <stdio.h>
 6 | #include <stdint.h>
 7 | #include <stddef.h>
 8 | #include <stdlib.h>
 9 | #include <stdbool.h>
10 | #include <string.h>
11 | #include "integrity.h"
12 | 
13 | 
14 | int create_buffer(struct buffer* b, nvm_aq_ref ref, size_t size)
15 | {
16 |     int status;
17 | 
18 |     const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref);
19 | 
20 | #ifdef __DIS_CLUSTER__
21 |     b->buffer = NULL;
22 |     status = nvm_dis_dma_create(&b->dma, ctrl, size, 0);
23 | #else
24 |     status = posix_memalign(&b->buffer, ctrl->page_size, size);
25 |     if (status != 0)
26 |     {
27 |         fprintf(stderr, "Failed to allocate memory: %s\n", strerror(status));
28 |         return status;
29 |     }
30 | 
31 |     status = nvm_dma_map_host(&b->dma, ctrl, b->buffer, size);
32 | #endif
33 |     if (!nvm_ok(status))
34 |     {
35 |         free(b->buffer);
36 |         fprintf(stderr, "Failed to create local segment: %s\n", nvm_strerror(status));
37 |         return status;
38 |     }
39 | 
40 |     memset(b->dma->vaddr, 0, b->dma->page_size * b->dma->n_ioaddrs);
41 | 
42 |     return 0;
43 | }
44 | 
45 | 
46 | void remove_buffer(struct buffer* b)
47 | {
48 |     nvm_dma_unmap(b->dma);
49 |     free(b->buffer);
50 | }
51 | 
52 | 
53 | int create_queue(struct queue* q, nvm_aq_ref ref, const struct queue* cq, uint16_t qno)
54 | {
55 |     int status;
56 | 
57 |     const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref);
58 | 
59 |     size_t prp_lists = 0;
60 |     if (cq != NULL)
61 |     {
62 |         size_t n_entries = ctrl->page_size / sizeof(nvm_cmd_t);
63 |         prp_lists = n_entries <= ctrl->max_qs ? n_entries : ctrl->max_qs;
64 |     }
65 | 
66 |     status = create_buffer(&q->qmem, ref, prp_lists * ctrl->page_size + ctrl->page_size);
67 |     if (!nvm_ok(status))
68 |     {
69 |         return status;
70 |     }
71 | 
72 |     if (cq == NULL)
73 |     {
74 |         status = nvm_admin_cq_create(ref, &q->queue, qno, q->qmem.dma, 0, NVM_CQ_SIZE(ctrl, 1));
75 |     }
76 |     else
77 |     {
78 |         status = nvm_admin_sq_create(ref, &q->queue, &cq->queue, qno, q->qmem.dma, 0, NVM_SQ_SIZE(ctrl, 1));
79 |     }
80 | 
81 |     if (!nvm_ok(status))
82 |     {
83 |         remove_buffer(&q->qmem);
84 |         fprintf(stderr, "Failed to create queue: %s\n", nvm_strerror(status));
85 |         return status;
86 |     }
87 | 
88 |     q->counter = 0;
89 |     return 0;
90 | }
91 | 
92 | 
93 | void remove_queue(struct queue* q)
94 | {
95 |     remove_buffer(&q->qmem);
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/deprecated/examples/read-blocks/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-samples)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | make_sample (read-blocks-module read-blocks "module.c;args.c;read.c")
 8 | set_multithread (read-blocks-module)
 9 | 
10 | make_sample (read-blocks-smartio read-blocks "smartio.c;args.c;read.c")
11 | set_multithread (read-blocks-smartio)
12 | set_sisci (read-blocks-smartio)
13 | 
14 | make_samples_choice (read-blocks read-blocks-smartio read-blocks-module)
15 | 


--------------------------------------------------------------------------------
/deprecated/examples/read-blocks/args.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_SAMPLES_READ_BLOCKS_OPTIONS_H__
 2 | #define __LIBNVM_SAMPLES_READ_BLOCKS_OPTIONS_H__
 3 | 
 4 | #include <stdint.h>
 5 | #include <stdbool.h>
 6 | #include <stdio.h>
 7 | #include <nvm_types.h>
 8 | 
 9 | 
10 | struct options
11 | {
12 | #ifdef __DIS_CLUSTER__
13 |     uint64_t    controller_id;
14 | #else
15 |     const char* controller_path;
16 | #endif
17 |     size_t      queue_size;
18 |     size_t      chunk_size;
19 |     uint32_t    namespace_id;
20 |     size_t      num_blocks;
21 |     size_t      offset;
22 |     FILE*       output;
23 |     FILE*       input;
24 |     bool        ascii;
25 |     bool        identify;
26 | };
27 | 
28 | 
29 | void parse_options(int argc, char** argv, struct options* options);
30 | 
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/deprecated/examples/read-blocks/module.c:
--------------------------------------------------------------------------------
  1 | #include "args.h"
  2 | #include "read.h"
  3 | #include <nvm_types.h>
  4 | #include <nvm_ctrl.h>
  5 | #include <nvm_dma.h>
  6 | #include <nvm_aq.h>
  7 | #include <nvm_error.h>
  8 | #include <nvm_util.h>
  9 | #include <stdio.h>
 10 | #include <stdbool.h>
 11 | #include <stdlib.h>
 12 | #include <fcntl.h>
 13 | #include <unistd.h>
 14 | #include <errno.h>
 15 | #include <string.h>
 16 | 
 17 | 
 18 | 
 19 | static int prepare_and_read(nvm_aq_ref ref, const struct disk_info* disk, const struct options* args)
 20 | {
 21 |     int status = 0;
 22 | 
 23 |     const size_t qs = args->queue_size;
 24 |     void* buffer_ptr = NULL;
 25 |     nvm_dma_t* buffer = NULL;
 26 |     void* queue_ptr = NULL;
 27 |     nvm_dma_t* sq_mem = NULL;
 28 |     nvm_dma_t* cq_mem = NULL;
 29 |     size_t n_prp_lists = qs;
 30 |     struct queue_pair queues;
 31 | 
 32 |     const nvm_ctrl_t* ctrl = nvm_ctrl_from_aq_ref(ref);
 33 | 
 34 |     const size_t buffer_blocks = args->chunk_size <= args->num_blocks ? args->chunk_size : args->num_blocks;
 35 |     status = posix_memalign(&buffer_ptr, disk->page_size, NVM_CTRL_ALIGN(ctrl, buffer_blocks * disk->block_size));
 36 |     if (status != 0)
 37 |     {
 38 |         fprintf(stderr, "Failed to allocate memory buffer: %s\n", strerror(status));
 39 |         goto leave;
 40 |     }
 41 | 
 42 |     status = posix_memalign(&queue_ptr, disk->page_size, 
 43 |             NVM_SQ_PAGES(disk, qs) * disk->page_size + disk->page_size * (n_prp_lists + 2));
 44 |     if (status != 0)
 45 |     {
 46 |         fprintf(stderr, "Failed to allocate queue memory: %s\n", strerror(status));
 47 |         goto leave;
 48 |     }
 49 | 
 50 |     status = nvm_dma_map_host(&sq_mem, ctrl, NVM_PTR_OFFSET(queue_ptr, disk->page_size, 1), 
 51 |             NVM_SQ_PAGES(disk, qs) * disk->page_size + disk->page_size * (n_prp_lists + 1));
 52 |     if (!nvm_ok(status))
 53 |     {
 54 |         fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status));
 55 |         goto leave;
 56 |     }
 57 | 
 58 |     status = nvm_dma_map_host(&cq_mem, ctrl, queue_ptr, disk->page_size);
 59 |     if (!nvm_ok(status))
 60 |     {
 61 |         fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status));
 62 |         goto leave;
 63 |     }
 64 | 
 65 |     status = nvm_dma_map_host(&buffer, ctrl, buffer_ptr, buffer_blocks * disk->block_size);
 66 |     if (!nvm_ok(status))
 67 |     {
 68 |         fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status));
 69 |         goto leave;
 70 |     }
 71 | 
 72 |     status = create_queue_pair(ref, &queues, cq_mem, sq_mem, qs);
 73 |     if (status != 0)
 74 |     {
 75 |         goto leave;
 76 |     }
 77 | 
 78 |     if (args->input != NULL)
 79 |     {
 80 |         status = write_blocks(disk, &queues, buffer, args);
 81 |         if (status != 0)
 82 |         {
 83 |             goto leave;
 84 |         }
 85 |     }
 86 | 
 87 |     status = read_and_dump(disk, &queues, buffer, args);
 88 | 
 89 | leave:
 90 |     nvm_dma_unmap(buffer);
 91 |     nvm_dma_unmap(sq_mem);
 92 |     nvm_dma_unmap(cq_mem);
 93 |     free(buffer_ptr);
 94 |     free(queue_ptr);
 95 |     return status;
 96 | }
 97 | 
 98 | 
 99 | 
100 | int main(int argc, char** argv)
101 | {
102 |     int status;
103 |     int fd;
104 | 
105 |     struct disk_info disk;
106 | 
107 |     nvm_ctrl_t* ctrl = NULL;
108 |     void* aq_ptr = NULL;
109 |     nvm_dma_t* aq_mem = NULL;
110 |     nvm_aq_ref aq_ref = NULL;
111 | 
112 |     struct options args;
113 | 
114 |     // Parse arguments from command line
115 |     parse_options(argc, argv, &args);
116 | 
117 |     // Get controller reference
118 |     fd = open(args.controller_path, O_RDWR | O_NONBLOCK);
119 |     if (fd < 0)
120 |     {
121 |         fprintf(stderr, "Failed to open file descriptor: %s\n", strerror(errno));
122 |         exit(1);
123 |     }
124 | 
125 |     status = nvm_ctrl_init(&ctrl, fd);
126 |     if (!nvm_ok(status))
127 |     {
128 |         fprintf(stderr, "Failed to initialize controller reference: %s\n", nvm_strerror(status));
129 |         goto leave;
130 |     }
131 | 
132 |     // Create admin queue pair + page for identify commands
133 |     status = posix_memalign(&aq_ptr, ctrl->page_size, ctrl->page_size * 3);
134 |     if (status != 0)
135 |     {
136 |         fprintf(stderr, "Failed to allocate queue memory: %s\n", strerror(status));
137 |         goto leave;
138 |     }
139 | 
140 |     status = nvm_dma_map_host(&aq_mem, ctrl, aq_ptr, ctrl->page_size * 3);
141 |     if (!nvm_ok(status))
142 |     {
143 |         fprintf(stderr, "Failed to map memory for controller: %s\n", nvm_strerror(status));
144 |         goto leave;
145 |     }
146 | 
147 |     // Reset controller and set admin queues
148 |     status = nvm_aq_create(&aq_ref, ctrl, aq_mem);
149 |     if (!nvm_ok(status))
150 |     {
151 |         fprintf(stderr, "Failed to reset controller: %s\n", nvm_strerror(status));
152 |         goto leave;
153 |     }
154 | 
155 |     // Identify controller and namespace
156 |     status = get_disk_info(aq_ref, &disk, args.namespace_id, NVM_DMA_OFFSET(aq_mem, 2), aq_mem->ioaddrs[2], args.identify);
157 |     if (status != 0)
158 |     {
159 |         goto leave;
160 |     }
161 | 
162 |     status = prepare_and_read(aq_ref, &disk, &args);
163 | 
164 | leave:
165 |     if (args.input != NULL)
166 |     {
167 |         fclose(args.input);
168 |     }
169 | 
170 |     if (args.output != NULL)
171 |     {
172 |         fprintf(stderr, "Flushing output file...\n");
173 |         fclose(args.output);
174 |     }
175 | 
176 |     fprintf(stderr, "Done\n");
177 | 
178 |     nvm_aq_destroy(aq_ref);
179 |     nvm_dma_unmap(aq_mem);
180 |     free(aq_ptr);
181 |     nvm_ctrl_free(ctrl);
182 |     close(fd);
183 |     exit(status);
184 | }
185 | 


--------------------------------------------------------------------------------
/deprecated/examples/read-blocks/read.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_SAMPLES_READ_BLOCKS_READ_H__
 2 | #define __LIBNVM_SAMPLES_READ_BLOCKS_READ_H__
 3 | 
 4 | #include <stdint.h>
 5 | #include <stdbool.h>
 6 | #include <nvm_types.h>
 7 | #include "args.h"
 8 | 
 9 | 
10 | /*
11 |  * Information about controller and namespace.
12 |  */
13 | struct disk_info
14 | {
15 |     uint32_t    ns_id;
16 |     size_t      max_data_size;
17 |     size_t      page_size;
18 |     size_t      block_size;
19 | };
20 | 
21 | 
22 | 
23 | struct queue_pair
24 | {
25 |     nvm_dma_t*  sq_mem;
26 |     nvm_dma_t*  cq_mem;
27 |     nvm_queue_t sq;
28 |     nvm_queue_t cq;
29 |     bool        stop;
30 |     size_t      num_cpls;
31 | };
32 | 
33 | 
34 | 
35 | int get_disk_info(nvm_aq_ref ref, struct disk_info* info, uint32_t ns_id, void* ptr, uint64_t ioaddr, bool show);
36 | 
37 | 
38 | int create_queue_pair(nvm_aq_ref ref, struct queue_pair* qp, nvm_dma_t* cq_mem, nvm_dma_t* sq_mem, size_t sqs);
39 | 
40 | 
41 | int read_and_dump(const struct disk_info* disk, struct queue_pair* qp, const nvm_dma_t* buffer, const struct options* args);
42 | 
43 | 
44 | int write_blocks(const struct disk_info* disk, struct queue_pair* qp, const nvm_dma_t* buffer, const struct options* args);
45 | 
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/deprecated/examples/read-blocks/smartio.c:
--------------------------------------------------------------------------------
  1 | #include "args.h"
  2 | #include "read.h"
  3 | #include <nvm_types.h>
  4 | #include <nvm_ctrl.h>
  5 | #include <nvm_dma.h>
  6 | #include <nvm_aq.h>
  7 | #include <nvm_admin.h>
  8 | #include <nvm_util.h>
  9 | #include <nvm_error.h>
 10 | #include <sisci_api.h>
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <stdbool.h>
 14 | #include <errno.h>
 15 | #include <string.h>
 16 | 
 17 | 
 18 | int main(int argc, char** argv)
 19 | {
 20 |     int status;
 21 |     sci_error_t err;
 22 | 
 23 |     struct disk_info info;
 24 |     struct queue_pair queues;
 25 | 
 26 |     nvm_ctrl_t* ctrl = NULL;
 27 |     nvm_dma_t* aq_mem = NULL;
 28 |     nvm_aq_ref aq_ref = NULL;
 29 |     nvm_dma_t* buffer = NULL;
 30 |     nvm_dma_t* cq_mem = NULL;
 31 |     nvm_dma_t* sq_mem = NULL;
 32 | 
 33 |     struct options args;
 34 | 
 35 |     parse_options(argc, argv, &args);
 36 | 
 37 |     SCIInitialize(0, &err);
 38 |     if (err != SCI_ERR_OK)
 39 |     {
 40 |         fprintf(stderr, "Something went wrong: %s\n", SCIGetErrorString(err));
 41 |         exit(1);
 42 |     }
 43 |     
 44 |     // Get controller reference
 45 |     status = nvm_dis_ctrl_init(&ctrl, args.controller_id);
 46 |     if (!nvm_ok(status))
 47 |     {
 48 |         fprintf(stderr, "Failed to get controller reference: %s\n", nvm_strerror(status));
 49 |         goto leave;
 50 |     }
 51 | 
 52 |     // Create admin queue pair + page for identify commands
 53 |     status = nvm_dis_dma_create(&aq_mem, ctrl, ctrl->page_size * 3, 0);
 54 |     if (!nvm_ok(status))
 55 |     {
 56 |         fprintf(stderr, "Failed to create queue memory: %s\n", nvm_strerror(status));
 57 |         goto leave;
 58 |     }
 59 | 
 60 |     // Reset controller and set admin queue pair
 61 |     status = nvm_aq_create(&aq_ref, ctrl, aq_mem);
 62 |     if (!nvm_ok(status))
 63 |     {
 64 |         fprintf(stderr, "Failed to reset controller: %s\n", nvm_strerror(status));
 65 |         goto leave;
 66 |     }
 67 | 
 68 |     // Identify controller and namespace
 69 |     status = get_disk_info(aq_ref, &info, args.namespace_id, NVM_DMA_OFFSET(aq_mem, 2), aq_mem->ioaddrs[2], args.identify);
 70 |     if (status != 0)
 71 |     {
 72 |         goto leave;
 73 |     }
 74 | 
 75 |     // Create data buffer
 76 |     size_t buffer_size = (args.chunk_size <= args.num_blocks ? args.chunk_size : args.num_blocks) * info.block_size;
 77 | 
 78 |     status = nvm_dis_dma_create(&buffer, ctrl, buffer_size, 0);
 79 |     if (!nvm_ok(status))
 80 |     {
 81 |         fprintf(stderr, "Failed to create data buffer: %s\n", nvm_strerror(status));
 82 |         goto leave;
 83 |     }
 84 | 
 85 |     // Create memory for completion queue
 86 |     status = nvm_dis_dma_create(&cq_mem, ctrl, ctrl->page_size, SCI_MEMACCESS_HOST_READ | SCI_MEMACCESS_DEVICE_WRITE);
 87 |     if (!nvm_ok(status))
 88 |     {
 89 |         fprintf(stderr, "Failed to create completion queue memory: %s\n", nvm_strerror(status));
 90 |         goto leave;
 91 |     }
 92 | 
 93 |     // Create memory for submission queue and PRP lists
 94 |     size_t n_prp_lists = args.queue_size;
 95 | 
 96 |     status = nvm_dis_dma_create(&sq_mem, ctrl, 
 97 |             NVM_SQ_PAGES(ctrl, args.queue_size) * ctrl->page_size + ctrl->page_size * (n_prp_lists + 1), 
 98 |             SCI_MEMACCESS_HOST_WRITE | SCI_MEMACCESS_DEVICE_READ); 
 99 |     if (!nvm_ok(status))
100 |     {
101 |         goto leave;
102 |     }
103 | 
104 |     // Create queues
105 |     status = create_queue_pair(aq_ref, &queues, cq_mem, sq_mem, args.queue_size);
106 |     if (!nvm_ok(status))
107 |     {
108 |         goto leave;
109 |     }
110 | 
111 |     if (args.input != NULL)
112 |     {
113 |         status = write_blocks(&info, &queues, buffer, &args);
114 |         if (status != 0)
115 |         {
116 |             goto leave;
117 |         }
118 |     }
119 | 
120 |     status = read_and_dump(&info, &queues, buffer, &args);
121 | 
122 |     
123 | leave:
124 |     if (args.input != NULL)
125 |     {
126 |         fclose(args.input);
127 |     }
128 | 
129 |     if (args.output != NULL)
130 |     {
131 |         fprintf(stderr, "Flushing output file...\n");
132 |         fclose(args.output);
133 |     }
134 |     fprintf(stderr, "Done\n");
135 |     nvm_dma_unmap(cq_mem);
136 |     nvm_dma_unmap(sq_mem);
137 |     nvm_dma_unmap(buffer);
138 |     nvm_aq_destroy(aq_ref);
139 |     nvm_dma_unmap(aq_mem);
140 |     nvm_ctrl_free(ctrl);
141 |     SCITerminate();
142 | 
143 |     if (status != 0)
144 |     {
145 |         fprintf(stderr, "%d\n", status);
146 |     }
147 | 
148 |     exit(status);
149 | }
150 | 


--------------------------------------------------------------------------------
/deprecated/examples/rpc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.1)
 2 | project (libnvm-samples)
 3 | 
 4 | set (THREADS_PREFER_PTHREAD_FLAG ON)
 5 | find_package (Threads REQUIRED)
 6 | 
 7 | make_sample (rpc-server rpc-server "rpc_server.c;util.c;segment.c")
 8 | set_sisci (rpc-server)
 9 | 
10 | make_sample (rpc-identify rpc-identify "rpc_identify.c;util.c;segment.c") 
11 | set_sisci (rpc-identify)
12 | 
13 | make_sample (rpc-dd rpc-dd "rpc_dd.c;util.c;segment.c")
14 | set_sisci (rpc-dd)
15 | 
16 | make_sample (rpc-flush rpc-flush "rpc_flush.c;util.c;segment.c")
17 | set_sisci (rpc-flush)
18 | 
19 | add_custom_target(rpc DEPENDS rpc-server rpc-identify rpc-dd rpc-flush)
20 | add_custom_target(rpc-clients DEPENDS rpc-identify rpc-dd rpc-flush)
21 | 


--------------------------------------------------------------------------------
/deprecated/examples/rpc/rpc_flush.c:
--------------------------------------------------------------------------------
  1 | #include <nvm_types.h>
  2 | #include <nvm_ctrl.h>
  3 | #include <nvm_admin.h>
  4 | #include <nvm_dma.h>
  5 | #include <nvm_rpc.h>
  6 | #include <nvm_aq.h>
  7 | #include <nvm_util.h>
  8 | #include <nvm_error.h>
  9 | #include <nvm_queue.h>
 10 | #include <nvm_cmd.h>
 11 | #include <stdio.h>
 12 | #include <stdint.h>
 13 | #include <stdlib.h>
 14 | #include <stdbool.h>
 15 | #include <getopt.h>
 16 | #include <string.h>
 17 | #include <errno.h>
 18 | #include <sisci_api.h>
 19 | #include "segment.h"
 20 | #include "util.h"
 21 | #include <unistd.h>
 22 | 
 23 | 
 24 | static int flush(nvm_queue_t* cq, nvm_queue_t* sq, uint32_t ns)
 25 | {
 26 |     nvm_cmd_t cmd;
 27 |     memset(&cmd, 0, sizeof(cmd));
 28 |     nvm_cpl_t* cpl;
 29 | 
 30 |     nvm_cmd_header(&cmd, NVM_DEFAULT_CID(sq), NVM_IO_FLUSH, ns);
 31 | 
 32 |     nvm_cmd_t* ptr = nvm_sq_enqueue(sq);
 33 |     if (ptr == NULL)
 34 |     {
 35 |         return ENOMEM;
 36 |     }
 37 | 
 38 |     *ptr = cmd;
 39 | 
 40 |     nvm_sq_submit(sq);
 41 | 
 42 |     while ((cpl = nvm_cq_dequeue(cq)) == NULL)
 43 |     {
 44 |         //usleep(1);
 45 |     }
 46 | 
 47 |     nvm_sq_update(sq);
 48 |     nvm_cq_update(cq);
 49 | 
 50 |     int status = NVM_ERR_STATUS(cpl);
 51 |     fprintf(stderr, "%s\n", nvm_strerror(status));
 52 | 
 53 |     return status;
 54 | }
 55 | 
 56 | 
 57 | int main(int argc, char** argv)
 58 | {
 59 |     nvm_ctrl_t* ctrl;
 60 |     nvm_aq_ref rpc;
 61 |     sci_error_t scierr;
 62 |     struct segment segment;
 63 |     nvm_dma_t* dma;
 64 |     nvm_queue_t cq;
 65 |     nvm_queue_t sq;
 66 | 
 67 |     SCIInitialize(0, &scierr);
 68 |     if (scierr != SCI_ERR_OK)
 69 |     {
 70 |         exit(1);
 71 |     }
 72 | 
 73 |     int err = nvm_dis_ctrl_init(&ctrl, 0xc0c00);
 74 | 
 75 |     err = nvm_dis_rpc_bind(&rpc, ctrl, 0);
 76 |     
 77 |     err = segment_create(&segment, 123, 0x2000);
 78 | 
 79 |     err = dma_create(&dma, ctrl, &segment, 0);
 80 | 
 81 |     err = nvm_admin_cq_create(rpc, &cq, 1, dma, 0, 2);
 82 | 
 83 |     err = nvm_admin_sq_create(rpc, &sq, &cq, 1, dma, 1, 2);
 84 | 
 85 |     fprintf(stderr, "OK\n");
 86 | 
 87 |     for (int i = 0; i < 10; ++i)
 88 |     {
 89 |         flush(&cq, &sq, 1);
 90 |     }
 91 | 
 92 |     nvm_admin_sq_delete(rpc, &sq, &cq);
 93 |     nvm_admin_cq_delete(rpc, &cq);
 94 |     dma_remove(dma, &segment, 0);
 95 |     segment_remove(&segment);
 96 |     nvm_rpc_unbind(rpc);
 97 |     nvm_ctrl_free(ctrl);
 98 | 
 99 | 
100 |     SCITerminate();
101 |     return 0;
102 | }
103 | 


--------------------------------------------------------------------------------
/deprecated/examples/rpc/segment.c:
--------------------------------------------------------------------------------
  1 | #include <stddef.h>
  2 | #include <stdint.h>
  3 | #include <errno.h>
  4 | #include <nvm_types.h>
  5 | #include <nvm_dma.h>
  6 | #include <nvm_util.h>
  7 | #include <sisci_types.h>
  8 | #include <sisci_api.h>
  9 | #include <sisci_error.h>
 10 | #include "segment.h"
 11 | 
 12 | 
 13 | int segment_create(struct segment* segment, uint32_t segment_id, size_t size)
 14 | {
 15 |     sci_error_t err;
 16 |     sci_error_t status;
 17 | 
 18 |     SCIOpen(&segment->sd, 0, &err);
 19 |     if (err != SCI_ERR_OK)
 20 |     {
 21 |         return EIO;
 22 |     }
 23 | 
 24 |     SCICreateSegment(segment->sd, &segment->segment, segment_id, size, NULL, NULL, SCI_FLAG_AUTO_ID, &err);
 25 |     if (err != SCI_ERR_OK)
 26 |     {
 27 |         SCIClose(segment->sd, 0, &status);
 28 | 
 29 |         if (err == SCI_ERR_SEGMENTID_USED)
 30 |         {
 31 |             return EEXIST;
 32 |         }
 33 | 
 34 |         return ENOSPC;
 35 |     }
 36 | 
 37 |     segment->id = SCIGetLocalSegmentId(segment->segment);
 38 |     segment->size = size;
 39 |     return 0;
 40 | }
 41 | 
 42 | 
 43 | void segment_remove(struct segment* segment)
 44 | {
 45 |     sci_error_t err;
 46 | 
 47 |     do
 48 |     {
 49 |         SCIRemoveSegment(segment->segment, 0, &err);
 50 |     }
 51 |     while (err == SCI_ERR_BUSY);
 52 | 
 53 |     SCIClose(segment->sd, 0, &err);
 54 | }
 55 | 
 56 | 
 57 | int dma_create(nvm_dma_t** window, const nvm_ctrl_t* ctrl, struct segment* segment, uint32_t adapter)
 58 | {
 59 |     sci_error_t err;
 60 | 
 61 |     SCIPrepareSegment(segment->segment, adapter, 0, &err);
 62 |     if (err != SCI_ERR_OK)
 63 |     {
 64 |         return ENOSPC;
 65 |     }
 66 | 
 67 |     SCISetSegmentAvailable(segment->segment, adapter, 0, &err);
 68 |     if (err != SCI_ERR_OK)
 69 |     {
 70 |         return EIO;
 71 |     }
 72 | 
 73 |     int status = nvm_dis_dma_map_local(window, ctrl, adapter, segment->segment, true);
 74 |     if (status != 0)
 75 |     {
 76 |         do
 77 |         {
 78 |             SCISetSegmentUnavailable(segment->segment, adapter, 0, &err);
 79 |         }
 80 |         while (err == SCI_ERR_BUSY);
 81 | 
 82 |         return status;
 83 |     }
 84 | 
 85 |     return 0;
 86 | }
 87 | 
 88 | 
 89 | void dma_remove(nvm_dma_t* window, struct segment* segment, uint32_t adapter)
 90 | {
 91 |     sci_error_t err;
 92 | 
 93 |     nvm_dma_unmap(window);
 94 | 
 95 |     do
 96 |     {
 97 |         SCISetSegmentUnavailable(segment->segment, adapter, 0, &err);
 98 |     }
 99 |     while (err == SCI_ERR_BUSY);
100 | }
101 | 
102 | 


--------------------------------------------------------------------------------
/deprecated/examples/rpc/segment.h:
--------------------------------------------------------------------------------
 1 | #ifndef __DIS_NVM_EXAMPLES_SEGMENT_H__
 2 | #define __DIS_NVM_EXAMPLES_SEGMENT_H__
 3 | #ifdef __DIS_CLUSTER__
 4 | 
 5 | #include <nvm_types.h>
 6 | #include <stddef.h>
 7 | #include <stdint.h>
 8 | #include <sisci_types.h>
 9 | 
10 | 
11 | /*
12 |  * Wrapper for SISCI segment descriptors.
13 |  */
14 | struct segment
15 | {
16 |     uint32_t            id;         // Segment ID
17 |     sci_desc_t          sd;         // SISCI virtual device
18 |     sci_local_segment_t segment;    // Local segment descriptor
19 |     size_t              size;       // Size of segment
20 | };
21 | 
22 | 
23 | 
24 | /*
25 |  * Create a local segment and initialize the wrapper struct.
26 |  */
27 | int segment_create(struct segment* segment, uint32_t segment_id, size_t size);
28 | 
29 | 
30 | /*
31 |  * Remove the local segment.
32 |  */
33 | void segment_remove(struct segment* segment);
34 | 
35 | 
36 | 
37 | /*
38 |  * Create a DMA window for the segment.
39 |  */
40 | int dma_create(nvm_dma_t** dma_window, const nvm_ctrl_t* ctrl, struct segment* segment, uint32_t dis_adapter);
41 | 
42 | 
43 | 
44 | /*
45 |  * Destroy the DMA window.
46 |  */
47 | void dma_remove(nvm_dma_t* dma_window, struct segment* segment, uint32_t dis_adapter);
48 | 
49 | 
50 | #endif // __DIS_CLUSTER__
51 | #endif // __DIS_NVM_EXAMPLES_SEGMENT_H__
52 | 


--------------------------------------------------------------------------------
/deprecated/examples/rpc/util.c:
--------------------------------------------------------------------------------
  1 | #include "util.h"
  2 | #include <stdint.h>
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | #include <time.h>
  6 | #include <sys/time.h>
  7 | #include <string.h>
  8 | #include <errno.h>
  9 | #include <nvm_types.h>
 10 | #include <limits.h>
 11 | 
 12 | 
 13 | int parse_u64(const char* str, uint64_t* num, int base)
 14 | {
 15 |     char* endptr = NULL;
 16 |     uint64_t ul = strtoul(str, &endptr, base);
 17 | 
 18 |     if (endptr == NULL || *endptr != '\0')
 19 |     {
 20 |         return EINVAL;
 21 |     }
 22 | 
 23 |     *num = ul;
 24 |     return 0;
 25 | }
 26 | 
 27 | 
 28 | int parse_u32(const char* str, uint32_t* num, int base)
 29 | {
 30 |     int status;
 31 |     uint64_t ul;
 32 | 
 33 |     status = parse_u64(str, &ul, base);
 34 | 
 35 |     if (status != 0 || ul > UINT_MAX)
 36 |     {
 37 |         return EINVAL;
 38 |     }
 39 | 
 40 |     *num = (uint32_t) ul;
 41 |     return status;
 42 | }
 43 | 
 44 | 
 45 | int parse_u16(const char* str, uint16_t* num, int base)
 46 | {
 47 |     int status;
 48 |     uint64_t ul;
 49 | 
 50 |     status = parse_u64(str, &ul, base);
 51 | 
 52 |     if (status != 0 || ul > 0xffff)
 53 |     {
 54 |         return EINVAL;
 55 |     }
 56 | 
 57 |     *num = (uint16_t) ul;
 58 |     return status;
 59 | }
 60 | 
 61 | 
 62 | 
 63 | void print_ctrl_info(FILE* fp, const struct nvm_ctrl_info* info)
 64 | {
 65 |     unsigned char vendor[4];
 66 |     memcpy(vendor, &info->pci_vendor, sizeof(vendor));
 67 | 
 68 |     char serial[21];
 69 |     memset(serial, 0, 21);
 70 |     memcpy(serial, info->serial_no, 20);
 71 | 
 72 |     char model[41];
 73 |     memset(model, 0, 41);
 74 |     memcpy(model, info->model_no, 40);
 75 | 
 76 |     char revision[9];
 77 |     memset(revision, 0, 9);
 78 |     memcpy(revision, info->firmware, 8);
 79 | 
 80 |     fprintf(fp, "------------- Controller information -------------\n");
 81 |     fprintf(fp, "PCI Vendor ID           : %x %x\n", vendor[0], vendor[1]);
 82 |     fprintf(fp, "PCI Subsystem Vendor ID : %x %x\n", vendor[2], vendor[3]);
 83 |     fprintf(fp, "NVM Express version     : %u.%u.%u\n",
 84 |             info->nvme_version >> 16, (info->nvme_version >> 8) & 0xff, info->nvme_version & 0xff);
 85 |     fprintf(fp, "Controller page size    : %zu\n", info->page_size);
 86 |     fprintf(fp, "Max queue entries       : %u\n", info->max_entries);
 87 |     fprintf(fp, "Serial Number           : %s\n", serial);
 88 |     fprintf(fp, "Model Number            : %s\n", model);
 89 |     fprintf(fp, "Firmware revision       : %s\n", revision);
 90 |     fprintf(fp, "Max data transfer size  : %zu bytes (%zu KiB)\n", info->max_data_size, info->max_data_size >> 10);
 91 |     fprintf(fp, "Max outstanding commands: %zu\n", info->max_out_cmds);
 92 |     fprintf(fp, "Max number of namespaces: %zu\n", info->max_n_ns);
 93 |     fprintf(fp, "--------------------------------------------------\n");
 94 | }
 95 | 
 96 | 
 97 | void print_ns_info(FILE* fp, const struct nvm_ns_info* info)
 98 | {
 99 |     fprintf(fp, "------------- Namespace  information -------------\n");
100 |     fprintf(fp, "Namespace identifier    : %x\n", info->ns_id);
101 |     fprintf(fp, "Logical block size      : %zu bytes\n", info->lba_data_size);
102 |     fprintf(fp, "Namespace size          : %zu blocks (%zu MiB)\n", info->size, info->size >> 20);
103 |     fprintf(fp, "Namespace capacity      : %zu blocks (%zu MiB)\n", info->capacity, info->capacity >> 20);
104 |     fprintf(fp, "--------------------------------------------------\n");
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/deprecated/examples/rpc/util.h:
--------------------------------------------------------------------------------
 1 | #ifndef __DISNVM_EXAMPLES_UTIL_H__
 2 | #define __DISNVM_EXAMPLES_UTIL_H__
 3 | 
 4 | #include <nvm_types.h>
 5 | #include <stdint.h>
 6 | #include <getopt.h>
 7 | #include <stdio.h>
 8 | 
 9 | 
10 | /*
11 |  * Parse an uint64_t from a string.
12 |  */
13 | int parse_u64(const char* str, uint64_t* number, int base);
14 | 
15 | 
16 | /*
17 |  * Parse an uint32_t from a string.
18 |  */
19 | int parse_u32(const char* str, uint32_t* number, int base);
20 | 
21 | 
22 | /*
23 |  * Parse an uint16_t from a string.
24 |  */
25 | int parse_u16(const char* str, uint16_t* number, int base);
26 | 
27 | 
28 | /*
29 |  * Pretty print controller information.
30 |  */
31 | void print_ctrl_info(FILE* fp, const struct nvm_ctrl_info* info);
32 | 
33 | 
34 | /*
35 |  * Pretty print namespace information.
36 |  */
37 | void print_ns_info(FILE* fp, const struct nvm_ns_info* info);
38 | 
39 | #endif // __DISNVM_EXAMPLES_UTIL_H__
40 | 


--------------------------------------------------------------------------------
/include/bafs_ptr.h:
--------------------------------------------------------------------------------
  1 | #ifndef __BAFS_PTR_H__
  2 | #define __BAFS_PTR_H__
  3 | 
  4 | #ifndef __device__ 
  5 | #define __device__
  6 | #endif 
  7 | #ifndef __host__
  8 | #define __host__
  9 | #endif
 10 | #ifndef __forceinline__  
 11 | #define __forceinline__ inline
 12 | #endif
 13 | 
 14 | #include "page_cache.h"
 15 | #include <cstdint>
 16 | 
 17 | template<typename T>
 18 | class bafs_ptr {
 19 | private:
 20 |     array_t<T>* h_pData;
 21 |     array_d_t<T>* pData;
 22 |     uint64_t start_idx;
 23 | public:
 24 |     __host__
 25 |     void print_stats() const {
 26 |         if (h_pData)
 27 |             h_pData->print_reset_stats();
 28 |     }
 29 |     __host__ __device__ bafs_ptr():
 30 |         h_pData(NULL), pData(NULL),start_idx(0){
 31 |     }
 32 |     // __host__ __device__ bafs_ptr(array_d_t<T>* const pValue):
 33 |     //     h_pData(NULL), pData(pValue),start_idx(0){
 34 |     // }
 35 | 
 36 |     __host__ __device__ bafs_ptr(array_d_t<T>* const pValue, const uint64_t start_off):
 37 |         h_pData(NULL), pData(pValue),start_idx(start_off){
 38 |     }
 39 | 
 40 |     __host__ __device__ bafs_ptr(array_t<T>* const pValue):
 41 |         h_pData(pValue), pData(pValue->d_array_ptr),start_idx(0){
 42 | 
 43 |     }
 44 | 
 45 |     __host__ __device__ bafs_ptr(array_t<T>* const pValue, const uint64_t start_off):
 46 |         h_pData(pValue), pData(pValue->d_array_ptr),start_idx(start_off){
 47 |     }
 48 | 
 49 |     __host__ __device__ ~bafs_ptr(){}
 50 | 
 51 |     __host__ __device__ bafs_ptr(const bafs_ptr &var){
 52 |         h_pData = var.h_pData;
 53 |         pData = var.pData;
 54 |         start_idx = var.start_idx;
 55 |     }
 56 | 
 57 |     __device__ T operator*(){
 58 |         return (*pData)[start_idx];
 59 |     }
 60 | 
 61 |     __host__ __device__ bafs_ptr<T>& operator=(const bafs_ptr<T>& obj) {
 62 |         if(*this == obj)
 63 |             return *this;
 64 |         else{
 65 |             this->h_pData = obj.h_pData;
 66 |             this->pData = obj.pData;
 67 |             this->start_idx = obj.start_idx;
 68 |         }
 69 |         return *this;
 70 |     }
 71 | 
 72 |     template<typename T_>
 73 |     friend __host__ __device__ bool operator==(const bafs_ptr<T_>& lhs, const bafs_ptr<T_>& rhs);
 74 | 
 75 |     // template<typename T_>
 76 |     // friend __host__ __device__ bool operator==(bafs_ptr<T>* lhs, const bafs_ptr<T_>& rhs);
 77 | 
 78 |     __host__ __device__ void operator()(const uint64_t i, const T val) {
 79 |         (*pData)(i, val);
 80 |     }
 81 |     __host__ __device__ T operator[](const uint64_t i) {
 82 |         return (*pData)[start_idx+i];
 83 |     }
 84 | 
 85 |     __host__ __device__ const T operator[](const uint64_t i) const {
 86 |         return (*pData)[start_idx+i];
 87 |     }
 88 | 
 89 |     __host__ __device__ bafs_ptr<T> operator+(const uint64_t i){
 90 |         uint64_t new_start_idx = this->start_idx+i;
 91 |         return bafs_ptr<T>(this->pData, new_start_idx);
 92 |     }
 93 |     __host__ __device__ bafs_ptr<T> operator-(const uint64_t i){
 94 |         uint64_t new_start_idx = this->start_idx-i;
 95 |         return bafs_ptr<T>(this->pData, new_start_idx);
 96 |     }
 97 | //posfix operator
 98 |     __host__ __device__ bafs_ptr<T> operator++(int){
 99 |         bafs_ptr<T> cpy = *this;
100 |         this->start_idx += 1;
101 |         return cpy;
102 |     }
103 | //prefix operator
104 |     __host__ __device__ bafs_ptr<T>& operator++(){
105 |         this->start_idx += 1;
106 |         return *this;
107 |     }
108 | 
109 | //posfix operator
110 |     __host__ __device__ bafs_ptr<T> operator--(int){
111 |         bafs_ptr<T> cpy = *this;
112 |         this->start_idx -= 1;
113 |         return cpy;
114 |     }
115 | //prefix operator
116 |     __host__ __device__ bafs_ptr<T>& operator--(){
117 |         this->start_idx -= 1;
118 |         return *this;
119 |     }
120 | 
121 |     __host__ __device__ void memcpy_to_array_aligned(const uint64_t src_idx, const uint64_t count, T* dest) const {
122 |         pData->memcpy(src_idx, count, dest);
123 |     }
124 | };
125 | 
126 | 
127 | 
128 | template<typename T_>
129 | __host__ __device__
130 | bool operator==(const bafs_ptr<T_>& lhs, const bafs_ptr<T_>& rhs){
131 |    return (lhs.pData == rhs.pData && lhs.start_idx == rhs.start_idx && lhs.h_pData == rhs.h_pData);
132 | }
133 | 
134 | // template<typename T_>
135 | // __host__ __device__
136 | // bool operator==(bafs_ptr<T_>* lhs, const bafs_ptr<T_>& rhs){
137 | //    return (lhs->pData == rhs.pData && lhs->start_idx == rhs.start_idx);
138 | // }
139 | 
140 | 
141 | //#ifndef __CUDACC__
142 | //#undef __device__
143 | //#undef __host__
144 | //#undef __forceinline__
145 | //#endif
146 | 
147 | #endif //__BAFS_PTR_H__
148 | 


--------------------------------------------------------------------------------
/include/event.h:
--------------------------------------------------------------------------------
 1 | #ifndef __BENCHMARK_EVENT_H__
 2 | #define __BENCHMARK_EVENT_H__
 3 | // #ifndef __CUDACC__
 4 | // #define __device__
 5 | // #define __host__
 6 | // #endif
 7 | 
 8 | #include "cuda.h"
 9 | #include <string>
10 | #include <stdexcept>
11 | 
12 | 
13 | struct Event
14 | {
15 |     cudaEvent_t event;
16 | 
17 |     inline Event(cudaStream_t stream = 0)
18 |     {
19 |         auto err = cudaEventCreateWithFlags(&event, cudaEventDefault);
20 |         if (err != cudaSuccess)
21 |         {
22 |             throw std::runtime_error(std::string("Failed to create event: ") + cudaGetErrorString(err));
23 |         }
24 | 
25 |         err = cudaEventRecord(event, stream);
26 |         if (err != cudaSuccess)
27 |         {
28 |             throw std::runtime_error(std::string("Failed to record event on stream: ") + cudaGetErrorString(err));
29 |         }
30 | 
31 |     }
32 | 
33 | 
34 |     inline ~Event()
35 |     {
36 |         cudaEventDestroy(event);
37 |     }
38 | 
39 | 
40 |     inline double operator-(const Event& other) const
41 |     {
42 |         float msecs = 0;
43 |         auto err = cudaEventElapsedTime(&msecs, other.event, event);
44 |         if (err != cudaSuccess)
45 |         {
46 |             throw std::runtime_error(std::string("Could not calculate elapsed time: ") + cudaGetErrorString(err));
47 |         }
48 | 
49 |         return ((double) msecs) * 1e3;
50 |     }
51 | };
52 | 
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/include/host_util.h:
--------------------------------------------------------------------------------
 1 | #ifndef __HOST_UTIL_H_
 2 | #define __HOST_UTIL_H_
 3 | 
 4 | #ifndef __device__ 
 5 | #define __device__
 6 | #endif 
 7 | #ifndef __host__
 8 | #define __host__
 9 | #endif
10 | #ifndef __forceinline__
11 | #define __forceinline__ inline
12 | #endif
13 | 
14 | #include <ctime>
15 | 
16 | #ifndef __CUDACC__
17 | 
18 | template<typename T>
19 | inline __host__
20 | void __nanosleep(T ns) {
21 |         struct timespec time1,time2;
22 |         time1.tv_sec  = 0;
23 |         time2.tv_nsec = ns;
24 |         nanosleep(&time1, &time2);
25 | }
26 | 
27 | template<typename T>
28 | inline __host__
29 | T __activemask() {
30 |     T var;
31 |     (void) var;
32 |     return (T)1;
33 | }
34 | 
35 | template<typename T>
36 | inline __host__
37 | int __popc(T v) {
38 |     if (sizeof(T) == 4)
39 |         return __builtin_popcount((unsigned)v);
40 |     if (sizeof(T) == 8)
41 |         return __builtin_popcountll((unsigned long long)v);
42 |     return 0;
43 | 
44 | }
45 | 
46 | template<typename T>
47 | inline __host__
48 | int __ffs(T v) {
49 |     if (sizeof(T) == 4)
50 |         return __builtin_ffs((int)v);
51 |     if (sizeof(T) == 8)
52 |         return __builtin_ffsll((long long)v);
53 |     return 0;
54 | 
55 | }
56 | 
57 | template<typename T>
58 | inline __host__
59 | void __syncwarp(T mask) {
60 |     (void) mask;
61 |     return;
62 | }
63 | 
64 | template<typename T>
65 | inline __host__
66 | T __shfl_sync(unsigned mask, T var, int srcLane, int width=32) {
67 |     (void) mask;
68 |     (void) srcLane;
69 |     (void) width;
70 |     return var;
71 | }
72 | 
73 | 
74 | template<typename T>
75 | inline __host__
76 | unsigned int __match_any_sync(unsigned mask, T var) {
77 |     (void) mask;
78 |     (void) var;
79 |     return 1;
80 | }
81 | 
82 | #endif 
83 | 
84 | //#ifndef __CUDACC__
85 | //#undef __device__
86 | //#undef __host__
87 | //#undef __forceinline__
88 | //#endif
89 | 
90 | #endif // __HOST_UTIL_H_
91 | 


--------------------------------------------------------------------------------
/include/nvm_admin.h:
--------------------------------------------------------------------------------
  1 | #ifndef __NVM_ADMIN_H__
  2 | #define __NVM_ADMIN_H__
  3 | // #ifndef __CUDACC__
  4 | // #define __device__
  5 | // #define __host__
  6 | // #endif
  7 | 
  8 | #include <nvm_types.h>
  9 | #include <stddef.h>
 10 | #include <stdint.h>
 11 | #include <stdbool.h>
 12 | 
 13 | 
 14 | 
 15 | /*
 16 |  * Get controller information.
 17 |  */
 18 | int nvm_admin_ctrl_info(nvm_aq_ref ref,               // AQ pair reference
 19 |                         struct nvm_ctrl_info* info,   // Controller information structure
 20 |                         void* buffer,                 // Temporary buffer (must be at least 4 KB)
 21 |                         uint64_t ioaddr);             // Bus address of buffer as seen by the controller
 22 | 
 23 | 
 24 | 
 25 | /* 
 26 |  * Get namespace information.
 27 |  */
 28 | int nvm_admin_ns_info(nvm_aq_ref ref,                 // AQ pair reference
 29 |                       struct nvm_ns_info* info,       // NVM namespace information
 30 |                       uint32_t ns_id,                 // Namespace identifier
 31 |                       void* buffer,                   // Temporary buffer (must be at least 4 KB)
 32 |                       uint64_t ioaddr);               // Bus address of buffer as seen by controller
 33 | 
 34 | 
 35 | 
 36 | /*
 37 |  * Make controller allocate and reserve queues.
 38 |  */
 39 | int nvm_admin_set_num_queues(nvm_aq_ref ref, uint16_t n_cqs, uint16_t n_sqs);
 40 | 
 41 | 
 42 | /*
 43 |  * Retrieve the number of allocated queues.
 44 |  */
 45 | int nvm_admin_get_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sqs);
 46 | 
 47 | 
 48 | /*
 49 |  * Make controller allocate number of queues before issuing them.
 50 |  */
 51 | int nvm_admin_request_num_queues(nvm_aq_ref ref, uint16_t* n_cqs, uint16_t* n_sqs);
 52 | 
 53 | 
 54 | /*
 55 |  * Create IO completion queue (CQ)
 56 |  * Caller must set queue memory to zero manually.
 57 |  *
 58 |  * If number of queue entries (qs) exceeds a page,
 59 |  * DMA memory must be contiguous.
 60 |  *
 61 |  * If qs is 0, the API will use one page for queue memory.
 62 |  */
 63 | int nvm_admin_cq_create(nvm_aq_ref ref,                 // AQ pair reference
 64 |                         nvm_queue_t* cq,                // CQ descriptor
 65 |                         uint16_t id,                    // Queue identifier
 66 |                         const nvm_dma_t* dma,           // Queue memory handle
 67 |                         size_t page_offset,             // Number of pages to offset into the handle
 68 |                         size_t qs,                      // Queue size/depth
 69 |                         bool need_prp = false);                 // non-contiguous queue
 70 | 
 71 | /*
 72 |  * Delete IO completion queue (CQ)
 73 |  * After calling this, the queue is no longer used and must be recreated.
 74 |  * All associated submission queues must be deleted first.
 75 |  */
 76 | int nvm_admin_cq_delete(nvm_aq_ref ref, nvm_queue_t* cq);
 77 | 
 78 | 
 79 | 
 80 | /*
 81 |  * Create IO submission queue (SQ)
 82 |  * Caller must set queue memory to zero manually.
 83 |  *
 84 |  * If number of queue entries (qs) exceeds a page,
 85 |  * DMA memory must be contiguous.
 86 |  *
 87 |  * If qs is 0, the API will use one page for queue memory.
 88 |  */
 89 | int nvm_admin_sq_create(nvm_aq_ref ref,                 // AQ pair reference
 90 |                         nvm_queue_t* sq,                // SQ descriptor
 91 |                         const nvm_queue_t* cq,          // Descriptor to paired CQ
 92 |                         uint16_t id,                    // Queue identifier
 93 |                         const nvm_dma_t* dma,           // Queue memory handle
 94 |                         size_t page_offset,             // Number of pages to offset into the handle
 95 |                         size_t qs,                      // Number of pages to use
 96 |                         bool need_prp = false);                 // non-contiguous queue
 97 | 
 98 | 
 99 | 
100 | /*
101 |  * Delete IO submission queue (SQ)
102 |  * After calling this, the queue is no longer used and must be recreated.
103 |  */
104 | int nvm_admin_sq_delete(nvm_aq_ref ref, 
105 |                         nvm_queue_t* sq, 
106 |                         const nvm_queue_t* cq);
107 | 
108 | 
109 | /*
110 |  * Get log page.
111 |  */
112 | int nvm_admin_get_log_page(nvm_aq_ref ref, 
113 |                            uint32_t ns_id, 
114 |                            void* ptr, 
115 |                            uint64_t ioaddr, 
116 |                            uint8_t log_id, 
117 |                            uint64_t log_offset);
118 | 
119 | 
120 | #endif /* #ifdef __NVM_ADMIN_H__ */
121 | 


--------------------------------------------------------------------------------
/include/nvm_aq.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_AQ_H__
 2 | #define __NVM_AQ_H__
 3 | // #ifndef __CUDACC__
 4 | // #define __device__
 5 | // #define __host__
 6 | // #endif
 7 | 
 8 | #include <nvm_types.h>
 9 | #include <stddef.h>
10 | #include <stdint.h>
11 | #include <stdbool.h>
12 | 
13 | 
14 | 
15 | /*
16 |  * Create admin queue pair
17 |  *
18 |  * Take exclusive ownership of an NVM controller. This function resets the 
19 |  * controller and configures NVM admin queues. 
20 |  *
21 |  * Returns a reference handle that can be used for admin RPC calls.
22 |  */
23 | int nvm_aq_create(nvm_aq_ref* ref, 
24 |                   const nvm_ctrl_t* ctrl, 
25 |                   const nvm_dma_t* dma_window);
26 | 
27 | 
28 | /*
29 |  * Destroy admin queues and references.
30 |  *
31 |  * Send NVM abort command to controller and deallocate admin queues.
32 |  *
33 |  * After calling this function, all admin queue references are invalid.
34 |  * This also means that remote references will no longer be valid.
35 |  *
36 |  * This function will also work for unbinding remote references.
37 |  */
38 | void nvm_aq_destroy(nvm_aq_ref ref);
39 | 
40 | 
41 | 
42 | //int nvm_tcp_rpc_enable(nvm_aq_ref ref, uint16_t port, nvm_rpc_cb_t filter, void* data);
43 | //int nvm_tcp_rpc_disable(nvm_aq_ref ref, uint16_t port);
44 | 
45 | 
46 | 
47 | #ifdef __DIS_CLUSTER__
48 | 
49 | 
50 | /*
51 |  * Callback function invoked whenever a remote NVM admin command is received.
52 |  * Should indicate whether or not a remote admin command is accepted and can
53 |  * be enqueued by using the return value.
54 |  *
55 |  * The remote command can also be modified if necessary.
56 |  */
57 | typedef bool (*nvm_dis_rpc_cb_t)(nvm_cmd_t* cmd, uint32_t dis_adapter, uint32_t dis_node_id);
58 | 
59 | 
60 | 
61 | /*
62 |  * Enable remote admin commands.
63 |  * Allows remote processes to relay NVM admin commands to the local process.
64 |  */
65 | int nvm_dis_rpc_enable(nvm_aq_ref ref,               // NVM admin queue-pair reference
66 |                        uint32_t dis_adapter,         // Local adapter to enable interrupt on
67 |                        nvm_dis_rpc_cb_t filter);     // Filter callback (can be NULL)
68 | 
69 | 
70 | 
71 | /*
72 |  * Disable remote admin commands.
73 |  * Stop processing admin commands from remote processes.
74 |  */
75 | void nvm_dis_rpc_disable(nvm_aq_ref ref, uint32_t dis_adapter);
76 | 
77 | #endif /* __DIS_CLUSTER__ */
78 | 
79 | 
80 | 
81 | 
82 | #endif /* #ifdef __NVM_AQ_H__ */
83 | 


--------------------------------------------------------------------------------
/include/nvm_ctrl.h:
--------------------------------------------------------------------------------
  1 | #ifndef __NVM_CTRL_H__
  2 | #define __NVM_CTRL_H__
  3 | // #ifndef __CUDACC__
  4 | // #define __device__
  5 | // #define __host__
  6 | // #endif
  7 | 
  8 | #include <nvm_types.h>
  9 | #include <stddef.h>
 10 | #include <stdint.h>
 11 | #include <stdbool.h>
 12 | 
 13 | #ifdef __DIS_CLUSTER__
 14 | #include <sisci_types.h>
 15 | #endif
 16 | 
 17 | 
 18 | 
 19 | /* 
 20 |  * Minimum size of mapped controller memory.
 21 |  */
 22 | #define NVM_CTRL_MEM_MINSIZE                        0x2000
 23 | 
 24 | 
 25 | 
 26 | #if defined (__unix__)
 27 | /*
 28 |  * Initialize NVM controller handle.
 29 |  *
 30 |  * Read from controller registers and initialize controller handle. 
 31 |  * This function should be used when using the kernel module or to manually
 32 |  * read from sysfs.
 33 |  *
 34 |  * Note: fd must be opened with O_RDWR and O_NONBLOCK
 35 |  */
 36 | int nvm_ctrl_init(nvm_ctrl_t** ctrl, int fd);
 37 | #endif
 38 | 
 39 | 
 40 | 
 41 | /* 
 42 |  * Initialize NVM controller handle.
 43 |  *
 44 |  * Read from controller registers and initialize the controller handle using
 45 |  * a memory-mapped pointer to the PCI device BAR.
 46 |  *
 47 |  * This function should be used when neither SmartIO nor the disnvme kernel
 48 |  * module are used.
 49 |  *
 50 |  * Note: ctrl_mem must be at least NVM_CTRL_MEM_MINSIZE large and mapped
 51 |  *       as IO memory. See arguments for mmap() for more info.
 52 |  */
 53 | int nvm_raw_ctrl_init(nvm_ctrl_t** ctrl, volatile void* mm_ptr, size_t mm_size);
 54 | 
 55 | 
 56 | 
 57 | /*
 58 |  * Release controller handle.
 59 |  */
 60 | void nvm_ctrl_free(nvm_ctrl_t* ctrl);
 61 | 
 62 | 
 63 | 
 64 | /* 
 65 |  * Reset NVM controller.
 66 |  *
 67 |  * The queue memory must be memset to zero and be exactly one page size large.
 68 |  * IO addresses must align to the controller page size. 
 69 |  *
 70 |  * Note: The controller must be unbound from any driver before attempting to
 71 |  *       reset the controller.
 72 |  *
 73 |  * Note: This function is implicitly called by the controller manager, so it
 74 |  *       should not be necessary to call it directly.
 75 |  */
 76 | int nvm_raw_ctrl_reset(const nvm_ctrl_t* ctrl, uint64_t acq_ioaddr, uint64_t asq_ioaddr);
 77 | 
 78 | 
 79 | 
 80 | #ifdef __DIS_CLUSTER__
 81 | /* 
 82 |  * Initialize NVM controller handle.
 83 |  *
 84 |  * Read from device registers and initialize controller handle. 
 85 |  * This function should be used when SmartIO is being used.
 86 |  */
 87 | int nvm_dis_ctrl_init(nvm_ctrl_t** ctrl, uint32_t smartio_fdid);
 88 | #endif
 89 | 
 90 | 
 91 | 
 92 | #ifdef __DIS_CLUSTER__
 93 | int nvm_dis_ctrl_map_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev, uint64_t* ioaddr);
 94 | #endif
 95 | 
 96 | 
 97 | 
 98 | #ifdef __DIS_CLUSTER__
 99 | void nvm_dis_ctrl_unmap_p2p_device(const nvm_ctrl_t* ctrl, sci_smartio_device_t dev);
100 | #endif
101 | 
102 | 
103 | #endif /* __NVM_CTRL_H__ */
104 | 


--------------------------------------------------------------------------------
/include/nvm_dma.h:
--------------------------------------------------------------------------------
  1 | #ifndef __NVM_DMA_H__
  2 | #define __NVM_DMA_H__
  3 | // #ifndef __CUDACC__
  4 | // #define __device__
  5 | // #define __host__
  6 | // #endif
  7 | 
  8 | #include <nvm_types.h>
  9 | #include <stddef.h>
 10 | #include <stdint.h>
 11 | #include <stdbool.h>
 12 | 
 13 | #ifdef __DIS_CLUSTER__
 14 | #include <sisci_types.h>
 15 | #endif
 16 | 
 17 | 
 18 | 
 19 | /*
 20 |  * Create DMA mapping descriptor from physical/bus addresses.
 21 |  *
 22 |  * Create a DMA mapping descriptor, describing a region of memory that is
 23 |  * accessible for the NVM controller. The caller must supply physical/bus  
 24 |  * addresses of physical memory pages, page size and total number of pages.
 25 |  * As the host's page size may differ from the controller's page size (MPS),
 26 |  * this function will calculate the necessary offsets into the actual memory
 27 |  * pages.
 28 |  *
 29 |  * While virtual memory is assumed to be continuous, the physical pages do not
 30 |  * need to be contiguous. Physical/bus addresses must be aligned to the 
 31 |  * controller's page size.
 32 |  *
 33 |  * Note: vaddr can be NULL.
 34 |  */
 35 | int nvm_dma_map(nvm_dma_t** map,                // Mapping descriptor reference
 36 |                 const nvm_ctrl_t* ctrl,         // NVM controller reference
 37 |                 void* vaddr,                    // Pointer to userspace memory (can be NULL if not required)
 38 |                 size_t page_size,               // Physical page size
 39 |                 size_t n_pages,                 // Number of pages to map
 40 |                 const uint64_t* page_addrs);    // List of physical/bus addresses to the pages
 41 | 
 42 | 
 43 | 
 44 | /*
 45 |  * Create DMA mapping descriptor using offsets from a previously 
 46 |  * created DMA descriptor.
 47 |  */
 48 | int nvm_dma_remap(nvm_dma_t** new_map, const nvm_dma_t* other_map);
 49 | 
 50 | 
 51 | 
 52 | /*
 53 |  * Remove DMA mapping descriptor.
 54 |  *
 55 |  * Unmap DMA mappings (if necessary) and remove the descriptor.
 56 |  * This function destroys the descriptor.
 57 |  */
 58 | void nvm_dma_unmap(nvm_dma_t* map);
 59 | 
 60 | 
 61 | 
 62 | /*
 63 |  * Create DMA mapping descriptor from virtual address using the kernel module.
 64 |  * This function is similar to nvm_dma_map, except the user is not required
 65 |  * to pass physical/bus addresses. 
 66 |  *
 67 |  * Note: vaddr can not be NULL, and must be aligned to system page size.
 68 |  */
 69 | int nvm_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* vaddr, size_t size);
 70 | 
 71 | 
 72 | 
 73 | //#if ( defined( __CUDA__ ) || defined( __CUDACC__ ) )
 74 | 
 75 | /*
 76 |  * Create DMA mapping descriptor from CUDA device pointer using the kernel
 77 |  * module. This function is similar to nvm_dma_map_host, except the memory
 78 |  * pointer must be a valid CUDA device pointer (see manual for 
 79 |  * cudaGetPointerAttributes).
 80 |  *
 81 |  * The controller handle must have been created using the kernel module.
 82 |  *
 83 |  * Note: vaddr can not be NULL, and must be aligned to GPU page size.
 84 |  */
 85 | int nvm_dma_map_device(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* devptr, size_t size);
 86 | 
 87 | //#endif /* __CUDA__ */
 88 | 
 89 | 
 90 | 
 91 | #if defined( __DIS_CLUSTER__ )
 92 | 
 93 | /*
 94 |  * Create DMA mapping descriptor from local SISCI segment.
 95 |  *
 96 |  * Create DMA mapping descriptor from a local segment handler, and 
 97 |  * reverse-map the segment making it accessible from the controller.
 98 |  * As segment memory is always continuous and page-aligned, it is not
 99 |  * necessary to calculate physical memory addresses. However, the user
100 |  * should ensure that the mapping size is aligned to a controller
101 |  * page-size (MPS).
102 |  * 
103 |  * The controller handle must have been created using SmartIO, and
104 |  * the segment must already be prepared on the local adapter.
105 |  */
106 | int nvm_dis_dma_map_local(nvm_dma_t** map,              // Mapping descriptor reference
107 |                           const nvm_ctrl_t* ctrl,       // NVM controller handle
108 |                           uint32_t dis_adapter,         // Local DIS adapter segment is prepared on
109 |                           sci_local_segment_t segment,  // Local segment descriptor
110 |                           bool map_vaddr);              // Should function also map segment into local space
111 | 
112 | #endif /* __DIS_CLUSTER__ */
113 | 
114 | 
115 | 
116 | #if defined( __DIS_CLUSTER__ )
117 | 
118 | /*
119 |  * Create DMA mapping descriptor from remote SISCI segment.
120 |  *
121 |  * Create DMA mapping descriptor from a remote segment handler, and 
122 |  * reverse-map the segment making it accessible from the controller.
123 |  * This function is similar to nvm_dis_dma_map_local.
124 |  *
125 |  * The remote segment must already be connected.
126 |  *
127 |  * Note: You should generally prefer write combining, except
128 |  *       for mapped device registers that require fine-grained writes.
129 |  */
130 | int nvm_dis_dma_map_remote(nvm_dma_t** map,             // Mapping descriptor reference
131 |                            const nvm_ctrl_t* ctrl,      // NVM controller handle
132 |                            sci_remote_segment_t segment,// Remote segment descriptor
133 |                            bool map_vaddr,              // Should function also map segment into local space
134 |                            bool map_wc);                // Should function map with write combining
135 | 
136 | #endif /* __DIS_CLUSTER__ */
137 | 
138 | 
139 | 
140 | #if ( !defined( __CUDA__ ) && !defined( __CUDACC__ ) ) && ( defined (__unix__) )
141 | /* 
142 |  * Short-hand function for allocating a page aligned buffer and mapping it 
143 |  * for the controller.
144 |  *
145 |  * Note: this function will not work if you are using the CUDA API
146 |  */
147 | int nvm_dma_create(nvm_dma_t** map, const nvm_ctrl_t* ctrl, size_t size);
148 | #endif
149 | 
150 | 
151 | 
152 | #if defined( __DIS_CLUSTER__ )
153 | /*
154 |  * Create device memory segment and map it for the controller.
155 |  * Short-hand function for creating a device memory segment.
156 |  * If mem_hints is 0, the API will create a local segment instead.
157 |  */
158 | int nvm_dis_dma_create(nvm_dma_t** map, const nvm_ctrl_t* ctrl, size_t size, unsigned int mem_hints);
159 | 
160 | #endif /* __DIS_CLUSTER__ */
161 | 
162 | 
163 | 
164 | #if defined ( __DIS_CLUSTER__ )
165 | 
166 | /*
167 |  * Note: This function requires the IOMMU to be enabled.
168 |  */
169 | int nvm_dis_dma_map_host(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* vaddr, size_t size);
170 | 
171 | #endif
172 | 
173 | 
174 | #if ( ( defined( __CUDA__ ) || defined( __CUDACC__ ) ) && defined( __DIS_CLUSTER__ ) )
175 | 
176 | int nvm_dis_dma_map_device(nvm_dma_t** map, const nvm_ctrl_t* ctrl, void* devptr, size_t size);
177 | 
178 | #endif /* __DIS_CLUSTER__ && __CUDA__ */
179 | 
180 | 
181 | 
182 | 
183 | #endif /* __NVM_DMA_H__ */
184 | 


--------------------------------------------------------------------------------
/include/nvm_error.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_ERROR_H__
 2 | #define __NVM_ERROR_H__
 3 | 
 4 | // #ifndef __CUDACC__
 5 | // #define __device__
 6 | // #define __host__
 7 | // #endif
 8 | 
 9 | #include <stdint.h>
10 | #include <nvm_types.h>
11 | #include <nvm_util.h>
12 | 
13 | 
14 | 
15 | 
16 | /* Get the status code type of an NVM completion. */
17 | #define NVM_ERR_SCT(cpl)            ((uint8_t) _RB(*NVM_CPL_STATUS(cpl), 11, 9))
18 | 
19 | 
20 | 
21 | /* Get the status code of an NVM completion */
22 | #define NVM_ERR_SC(cpl)             ((uint8_t) _RB(*NVM_CPL_STATUS(cpl), 8, 1))
23 | 
24 | 
25 | 
26 | /* Is do not retry flag set? */
27 | #define NVM_ERR_DNR(cpl)            (!!_RB(*NVM_CPL_STATUS(cpl), 15, 15))
28 | 
29 | 
30 | 
31 | /* Is there more? (Get log page) */
32 | #define NVM_ERR_MORE(cpl)           (!!_RB(*NVM_CPL_STATUS(cpl), 14, 14))
33 | 
34 | 
35 | 
36 | /* Extract value from status field from NVM completion */
37 | #define NVM_ERR_STATUS(cpl)         \
38 |     ((int) ( (cpl) != NULL ? -((NVM_ERR_SCT(cpl) << 8) | NVM_ERR_SC(cpl)) : 0 ))
39 | 
40 | 
41 | /* Convenience macro for checking if an NVM completion indicates success. */
42 | #define NVM_ERR_OK(cpl)             ( !NVM_ERR_SCT(cpl) && !NVM_ERR_SC(cpl) )
43 | 
44 | 
45 | 
46 | /* Pack errno and NVM completion status into a single status variable */
47 | #define NVM_ERR_PACK(cpl, err)      \
48 |     ((int) ( (err) != 0 ? (err) : NVM_ERR_STATUS(cpl) ) )
49 | 
50 | 
51 | 
52 | /* Extract values from packed status */
53 | #define NVM_ERR_UNPACK_ERRNO(status)    ((status > 0) ? (status) : 0)
54 | #define NVM_ERR_UNPACK_SCT(status)      ((status < 0) ? (((-status) >> 8) & 0xff) : 0)
55 | #define NVM_ERR_UNPACK_SC(status)       ((status < 0) ? ((-status) & 0xff) : 0)
56 | 
57 | 
58 | /* Check if everything is okay */
59 | #define nvm_ok(status)              ( !(status) )
60 | 
61 | 
62 | 
63 | /*
64 |  * Get an error string associated with the status code type and status code.
65 |  * This function calls strerror() if the packed status is a regular errno.
66 |  */
67 | const char* nvm_strerror(int status);
68 | 
69 | 
70 |     
71 | 
72 | #endif /* __NVM_ERROR_H__ */
73 | 


--------------------------------------------------------------------------------
/include/nvm_io.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_IO_H__
 2 | #define __NVM_IO_H__
 3 | // #ifndef __CUDACC__
 4 | // #define __device__
 5 | // #define __host__
 6 | // #endif
 7 | 
 8 | //#include "page_cache.h"
 9 | 
10 | 
11 | 
12 | 
13 | #endif // __NVM_IO_H__
14 | 


--------------------------------------------------------------------------------
/include/nvm_rpc.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_RPC_H__
 2 | #define __NVM_RPC_H__
 3 | 
 4 | // #ifndef __CUDACC__
 5 | // #define __device__
 6 | // #define __host__
 7 | // #endif
 8 | 
 9 | #include <nvm_types.h>
10 | #include <stdint.h>
11 | 
12 | 
13 | //int nvm_tcp_rpc_bind(nvm_aq_ref* ref, const char* hostname, uint16_t port);
14 | 
15 | 
16 | 
17 | #ifdef __DIS_CLUSTER__
18 | 
19 | /*
20 |  * Bind admin queue-pair reference to remote handle.
21 |  * The user should call the nvm_aq_destroy() to remove binding.
22 |  */
23 | int nvm_dis_rpc_bind(nvm_aq_ref* ref, const nvm_ctrl_t* ctrl, uint32_t adapter);
24 | 
25 | #endif
26 | 
27 | 
28 | 
29 | /*
30 |  * Unbind admin queue-pair reference.
31 |  * If reference is not bound (i.e., it is local), this function will do nothing.
32 |  */
33 | void nvm_rpc_unbind(nvm_aq_ref ref);
34 | 
35 | 
36 | 
37 | /*
38 |  * Relay NVM admin command.
39 |  *
40 |  * Use a local AQ pair reference to relay a NVM admin command to ASQ and get
41 |  * a corresponding completion from the ACQ. This function will block until 
42 |  * either a timeout occurs or until the command is completed.
43 |  *
44 |  * Return value:
45 |  * - If return value is zero, it indicates success.
46 |  * - If return value is positive, it indicates an errno.
47 |  * - If return value is negative, it indicates an NVM error.
48 |  *
49 |  * Use the error handling macros in nvm_error.h
50 |  *
51 |  * Note: The command can be modified.
52 |  */
53 | int nvm_raw_rpc(nvm_aq_ref ref, nvm_cmd_t* cmd, nvm_cpl_t* cpl);
54 | 
55 | 
56 | 
57 | 
58 | #endif /* #ifdef __NVM_RPC_H__ */
59 | 


--------------------------------------------------------------------------------
/include/util.h:
--------------------------------------------------------------------------------
  1 | #ifndef __UTIL_H__
  2 | #define __UTIL_H__
  3 | 
  4 | #ifndef __device__ 
  5 | #define __device__
  6 | #endif 
  7 | #ifndef __host__
  8 | #define __host__
  9 | #endif
 10 | #ifndef __forceinline__
 11 | #define __forceinline__ inline
 12 | #endif
 13 | 
 14 | 
 15 | 
 16 | #include "cuda.h"
 17 | #include "nvm_util.h"
 18 | #include "host_util.h"
 19 | //#include <ctype>
 20 | #include <cstdio>
 21 | 
 22 | 
 23 | #define cuda_err_chk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 24 | 
 25 | #ifndef __CUDACC__
 26 | inline void gpuAssert(int code, const char *file, int line, bool abort=false)
 27 | {
 28 |     if (code != 0)
 29 |     {
 30 | 	fprintf(stderr,"Assert: %i %s %d\n", code, file, line);
 31 | 	if (abort) exit(1);
 32 |     }
 33 | }
 34 | #else
 35 | 
 36 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=false)
 37 | {
 38 |     if (code != cudaSuccess)
 39 |     {
 40 | 	fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
 41 | 	if (abort) exit(1);
 42 |     }
 43 | }
 44 | #endif
 45 | 
 46 | #define CEIL(X, Y, Z) ((X + Y - 1) >> Z)
 47 | 
 48 | 
 49 | #ifndef HEXDUMP_COLS
 50 | #define HEXDUMP_COLS 16
 51 | #endif
 52 | inline __device__ void hexdump(void *mem, unsigned int len)
 53 | {
 54 |         unsigned int i;
 55 | 
 56 |         for(i = 0; i < len + ((len % HEXDUMP_COLS) ? (HEXDUMP_COLS - len % HEXDUMP_COLS) : 0); i++)
 57 |         {
 58 |                 /* print offset */
 59 |                 if(i % HEXDUMP_COLS == 0)
 60 |                 {
 61 |                         printf("\n0x%06x: ", i);
 62 |                 }
 63 | 
 64 |                 /* print hex data */
 65 |                 if(i < len)
 66 |                 {
 67 |                         printf("%02x ", 0xFF & ((char*)mem)[i]);
 68 |                 }
 69 |                 else /* end of block, just aligning for ASCII dump */
 70 |                 {
 71 |                         printf("   ");
 72 |                 }
 73 | 
 74 |                 /* print ASCII dump */
 75 | //                if(i % HEXDUMP_COLS == (HEXDUMP_COLS - 1))
 76 | //                {
 77 | //                        for(j = i - (HEXDUMP_COLS - 1); j <= i; j++)
 78 | //                        {
 79 | //                                if(j >= len) /* end of block, not really printing */
 80 | //                                {
 81 | //                                        printf(' ');
 82 | //                                }
 83 | //                                else if(isprint(((char*)mem)[j])) /* printable char */
 84 | //                                {
 85 | //                                        printf(0xFF & ((char*)mem)[j]);
 86 | //                                }
 87 | //                                else /* other char */
 88 | //                                {
 89 | //                                        putchar('.');
 90 | //                                }
 91 | //                        }
 92 | //                        putchar('\n');
 93 | //                }
 94 |         }
 95 |         printf("\n");
 96 | }
 97 | 
 98 | template <typename T>
 99 | void __ignore(T &&)
100 | { }
101 | /*warp memcpy, assumes alignment at type T and num is a count in type T*/
102 | template <typename T>
103 | inline __device__
104 | void warp_memcpy(T* dest, const T* src, size_t num) {
105 | #ifndef __CUDACC__
106 |     uint32_t mask = 1;
107 | #else
108 |     uint32_t mask = __activemask();
109 | #endif
110 |         uint32_t active_cnt = __popc(mask);
111 |         uint32_t lane = lane_id();
112 |         uint32_t prior_mask = mask >> (32 - lane);
113 |         uint32_t prior_count = __popc(prior_mask);
114 | 
115 |         for(size_t i = prior_count; i < num; i+=active_cnt)
116 |                 dest[i] = src[i];
117 | }
118 | 
119 | //#ifndef __CUDACC__
120 | //#undef __device__
121 | //#undef __host__
122 | //#undef __forceinline__
123 | //#endif
124 | 
125 | #endif // __UTIL_H__
126 | 


--------------------------------------------------------------------------------
/module/Makefile.in:
--------------------------------------------------------------------------------
 1 | 
 2 | ifneq ($(KERNELRELEASE),)
 3 | 	src := @module_root@
 4 | 	obj-m := @CMAKE_PROJECT_NAME@.o
 5 | 	@CMAKE_PROJECT_NAME@-objs := pci.o list.o ctrl.o map.o
 6 | 	ccflags-y += @module_ccflags@
 7 | 	KBUILD_EXTRA_SYMBOLS := @module_symbols@
 8 | else
 9 | 
10 | .PHONY: default reload unload load clean install
11 | 
12 | default:
13 | 	$(MAKE) -C @KERNEL@ M=@module_output@ modules
14 | 
15 | clean:
16 | 	$(MAKE) -C @KERNEL@ M=@module_output@ clean
17 | 
18 | reload: unload load
19 | 
20 | unload:
21 | 	-rmmod @CMAKE_PROJECT_NAME@.ko
22 | 
23 | load:
24 | 	insmod @CMAKE_PROJECT_NAME@.ko max_num_ctrls=64
25 | 
26 | install: default
27 | 	$(MAKE) -C @KERNEL@ M=@module_output@ modules_install
28 | 	#$(MAKE) INSTALL_MOD_DIR=@CMAKE_PROJECT_NAME@ -C @KERNEL@ M=@module_output@ modules_install
29 | 
30 | endif
31 | 
32 | 


--------------------------------------------------------------------------------
/module/ctrl.c:
--------------------------------------------------------------------------------
  1 | #include "ctrl.h"
  2 | #include "list.h"
  3 | #include <linux/kernel.h>
  4 | #include <linux/types.h>
  5 | #include <linux/fs.h>
  6 | #include <linux/device.h>
  7 | #include <linux/slab.h>
  8 | #include <asm/errno.h>
  9 | 
 10 | 
 11 | 
 12 | struct ctrl* ctrl_get(struct list* list, struct class* cls, struct pci_dev* pdev, int number)
 13 | {
 14 |     struct ctrl* ctrl = NULL;
 15 | 
 16 |     ctrl = kmalloc(sizeof(struct ctrl), GFP_KERNEL | GFP_NOWAIT);
 17 |     if (ctrl == NULL)
 18 |     {
 19 |         printk(KERN_CRIT "Failed to allocate controller reference\n");
 20 |         return ERR_PTR(-ENOMEM);
 21 |     }
 22 | 
 23 |     list_node_init(&ctrl->list);
 24 | 
 25 |     ctrl->pdev = pdev;
 26 |     ctrl->number = number;
 27 |     ctrl->rdev = 0;
 28 |     ctrl->cls = cls;
 29 |     ctrl->chrdev = NULL;
 30 | 
 31 |     snprintf(ctrl->name, sizeof(ctrl->name), "%s%d", KBUILD_MODNAME, ctrl->number);
 32 |     ctrl->name[sizeof(ctrl->name) - 1] = '\0';
 33 | 
 34 |     list_insert(list, &ctrl->list);
 35 | 
 36 |     return ctrl;
 37 | }
 38 | 
 39 | 
 40 | 
 41 | void ctrl_put(struct ctrl* ctrl)
 42 | {
 43 |     if (ctrl != NULL)
 44 |     {
 45 |         list_remove(&ctrl->list);
 46 |         ctrl_chrdev_remove(ctrl);
 47 |         kfree(ctrl);
 48 |     }
 49 | }
 50 | 
 51 | 
 52 | 
 53 | struct ctrl* ctrl_find_by_pci_dev(const struct list* list, const struct pci_dev* pdev)
 54 | {
 55 |     const struct list_node* element = list_next(&list->head);
 56 |     struct ctrl* ctrl;
 57 | 
 58 |     while (element != NULL)
 59 |     {
 60 |         ctrl = container_of(element, struct ctrl, list);
 61 | 
 62 |         if (ctrl->pdev == pdev)
 63 |         {
 64 |             return ctrl;
 65 |         }
 66 | 
 67 |         element = list_next(element);
 68 |     }
 69 | 
 70 |     return NULL;
 71 | }
 72 | 
 73 | 
 74 | 
 75 | struct ctrl* ctrl_find_by_inode(const struct list* list, const struct inode* inode)
 76 | {
 77 |     const struct list_node* element = list_next(&list->head);
 78 |     struct ctrl* ctrl;
 79 | 
 80 |     while (element != NULL)
 81 |     {
 82 |         ctrl = container_of(element, struct ctrl, list);
 83 | 
 84 |         if (&ctrl->cdev == inode->i_cdev)
 85 |         {
 86 |             return ctrl;
 87 |         }
 88 | 
 89 |         element = list_next(element);
 90 |     }
 91 | 
 92 |     return NULL;
 93 | }
 94 | 
 95 | 
 96 | 
 97 | int ctrl_chrdev_create(struct ctrl* ctrl, dev_t first, const struct file_operations* fops)
 98 | {
 99 |     int err;
100 |     struct device* chrdev = NULL;
101 | 
102 |     if (ctrl->chrdev != NULL)
103 |     {
104 |         printk(KERN_WARNING "Character device is already created\n");
105 |         return 0;
106 |     }
107 | 
108 |     ctrl->rdev = MKDEV(MAJOR(first), MINOR(first) + ctrl->number);
109 | 
110 |     cdev_init(&ctrl->cdev, fops);
111 |     err = cdev_add(&ctrl->cdev, ctrl->rdev, 1);
112 |     if (err != 0)
113 |     {
114 |         printk(KERN_ERR "Failed to add cdev\n");
115 |         return err;
116 |     }
117 | 
118 |     chrdev = device_create(ctrl->cls, NULL, ctrl->rdev, NULL, ctrl->name);
119 |     if (IS_ERR(chrdev))
120 |     {
121 |         cdev_del(&ctrl->cdev);
122 |         printk(KERN_ERR "Failed to create character device\n");
123 |         return PTR_ERR(chrdev);
124 |     }
125 | 
126 |     ctrl->chrdev = chrdev;
127 | 
128 |     printk(KERN_INFO "Character device /dev/%s created (%d.%d)\n",
129 |             ctrl->name, MAJOR(ctrl->rdev), MINOR(ctrl->rdev));
130 | 
131 |     return 0;
132 | }
133 | 
134 | 
135 | 
136 | void ctrl_chrdev_remove(struct ctrl* ctrl)
137 | {
138 |     if (ctrl->chrdev != NULL)
139 |     {
140 |         device_destroy(ctrl->cls, ctrl->rdev);
141 |         cdev_del(&ctrl->cdev);
142 |         ctrl->chrdev = NULL;
143 | 
144 |         printk(KERN_DEBUG "Character device /dev/%s removed (%d.%d)\n",
145 |                 ctrl->name, MAJOR(ctrl->rdev), MINOR(ctrl->rdev));
146 |     }
147 | }
148 | 
149 | 


--------------------------------------------------------------------------------
/module/ctrl.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_HELPER_CTRL_H__
 2 | #define __LIBNVM_HELPER_CTRL_H__
 3 | 
 4 | #include "list.h"
 5 | #include <linux/pci.h>
 6 | #include <linux/cdev.h>
 7 | #include <linux/fs.h>
 8 | #include <linux/device.h>
 9 | 
10 | 
11 | /*
12 |  * Represents an NVM controller.
13 |  */
14 | struct ctrl
15 | {
16 |     struct list_node    list;       /* Linked list head */
17 |     struct pci_dev*     pdev;       /* Reference to physical PCI device */
18 |     char                name[64];   /* Character device name */
19 |     int                 number;     /* Controller number */
20 |     dev_t               rdev;       /* Character device register */
21 |     struct class*       cls;        /* Character device class */
22 |     struct cdev         cdev;       /* Character device */
23 |     struct device*      chrdev;     /* Character device handle */
24 | };
25 | 
26 | 
27 | 
28 | /*
29 |  * Acquire a controller reference.
30 |  */
31 | struct ctrl* ctrl_get(struct list* list, struct class* cls, struct pci_dev* pdev, int number);
32 | 
33 | 
34 | 
35 | /*
36 |  * Release controller reference.
37 |  */
38 | void ctrl_put(struct ctrl* ctrl);
39 | 
40 | 
41 | 
42 | /*
43 |  * Find controller device.
44 |  */
45 | struct ctrl* ctrl_find_by_pci_dev(const struct list* list, const struct pci_dev* pdev);
46 | 
47 | 
48 | 
49 | /*
50 |  * Find controller reference.
51 |  */
52 | struct ctrl* ctrl_find_by_inode(const struct list* list, const struct inode* inode);
53 | 
54 | 
55 | 
56 | /*
57 |  * Create character device and set up file operations.
58 |  */
59 | int ctrl_chrdev_create(struct ctrl* ctrl, 
60 |                        dev_t first,
61 |                        const struct file_operations* fops);
62 | 
63 | 
64 | 
65 | /*
66 |  * Remove character device.
67 |  */
68 | void ctrl_chrdev_remove(struct ctrl* ctrl);
69 | 
70 | 
71 | 
72 | #endif /* __LIBNVM_HELPER_CTRL_H__ */
73 | 


--------------------------------------------------------------------------------
/module/list.c:
--------------------------------------------------------------------------------
 1 | #include "list.h"
 2 | #include <linux/types.h>
 3 | #include <linux/spinlock.h>
 4 | #include <linux/printk.h>
 5 | #include <asm/errno.h>
 6 | #include <linux/compiler.h>
 7 | 
 8 | 
 9 | 
10 | void list_init(struct list* list)
11 | {
12 |     list->head.list = list;
13 |     list->head.prev = &list->head;
14 |     list->head.next = &list->head;
15 | 
16 |     spin_lock_init(&list->lock);
17 | }
18 | 
19 | 
20 | 
21 | void list_remove(struct list_node* element)
22 | {
23 |     if (likely(element != NULL && element->list != NULL && element != &element->list->head))
24 |     {
25 |         spin_lock(&element->list->lock);
26 |         element->prev->next = element->next;
27 |         element->next->prev = element->prev;
28 |         spin_unlock(&element->list->lock);
29 | 
30 |         element->list = NULL;
31 |         element->next = NULL;
32 |         element->prev = NULL;
33 |     }
34 | }
35 | 
36 | 
37 | 
38 | void list_insert(struct list* list, struct list_node* element)
39 | {
40 |     struct list_node* last = NULL;
41 | 
42 |     spin_lock(&list->lock);
43 |     last = list->head.prev;
44 |     last->next = element;
45 | 
46 |     element->list = list;
47 |     element->prev = last;
48 |     element->next = &list->head;
49 | 
50 |     list->head.prev = element;
51 | 
52 |     spin_unlock(&list->lock);
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/module/list.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_HELPER_LIST_H__
 2 | #define __LIBNVM_HELPER_LIST_H__
 3 | 
 4 | #include <linux/types.h>
 5 | #include <linux/spinlock.h>
 6 | #include <linux/compiler.h>
 7 | 
 8 | 
 9 | /* Forward declaration */
10 | struct list;
11 | 
12 | 
13 | /*
14 |  * Doubly linked list element.
15 |  */
16 | struct list_node
17 | {
18 |     struct list*        list;   /* Reference to list */
19 |     struct list_node*   next;   /* Pointer to next element in list */
20 |     struct list_node*   prev;   /* Pointer to previous element in list */
21 | };
22 | 
23 | 
24 | /* 
25 |  * Doubly linked list.
26 |  * This implementation expects there always be an empty head.
27 |  */
28 | struct list
29 | {
30 |     struct list_node    head;   /* Start of the list */
31 |     spinlock_t          lock;   /* Ensure exclusive access to list */
32 | };
33 | 
34 | 
35 | 
36 | /*
37 |  * Initialize element.
38 |  */
39 | static void __always_inline list_node_init(struct list_node* element)
40 | {
41 |     element->list = NULL;
42 |     element->next = NULL;
43 |     element->prev = NULL;
44 | }
45 | 
46 | 
47 | 
48 | /*
49 |  * Get next element in list (if there are any)
50 |  */
51 | #define list_next(current)  \
52 |     ( ((current)->next != &(current)->list->head) ? (current)->next : NULL )
53 | 
54 | 
55 | 
56 | /*
57 |  * Initialize list.
58 |  */
59 | void list_init(struct list* list);
60 | 
61 | 
62 | 
63 | /*
64 |  * Insert element into list.
65 |  */
66 | void list_insert(struct list* list, struct list_node* element);
67 | 
68 | 
69 | 
70 | /*
71 |  * Remove element from list.
72 |  */
73 | void list_remove(struct list_node* element);
74 | 
75 | 
76 | 
77 | #endif /* __LIBNVM_HELPER_LIST_H__ */
78 | 


--------------------------------------------------------------------------------
/module/map.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LIBNVM_HELPER_MAP_H__
 2 | #define __LIBNVM_HELPER_MAP_H__
 3 | 
 4 | #include "list.h"
 5 | #include <linux/types.h>
 6 | #include <linux/mm_types.h>
 7 | 
 8 | 
 9 | /* Forward declaration */
10 | struct ctrl;
11 | struct map;
12 | 
13 | 
14 | typedef void (*release)(struct map*);
15 | 
16 | 
17 | /*
18 |  * Describes a range of mapped memory.
19 |  */
20 | struct map
21 | {
22 |     struct list_node    list;           /* Linked list header */
23 |     struct task_struct* owner;          /* Owner of mapping */
24 |     u64                 vaddr;          /* Starting virtual address */
25 |     struct list*        ctrl_list;
26 |     struct pci_dev*     pdev;           /* Reference to physical PCI device */
27 |     unsigned long       page_size;      /* Logical page size */
28 |     void*               data;           /* Custom data */
29 |     release             release;        /* Custom callback for unmapping and releasing memory */
30 |     unsigned long       n_addrs;        /* Number of mapped pages */
31 |     uint64_t            addrs[1];       /* Bus addresses */
32 | };
33 | 
34 | 
35 | 
36 | /*
37 |  * Lock and map userspace pages for DMA.
38 |  */
39 | struct map* map_userspace(struct list* list, const struct ctrl* ctrl, u64 vaddr, unsigned long n_pages);
40 | 
41 | 
42 | 
43 | /*
44 |  * Unmap and release memory.
45 |  */
46 | void unmap_and_release(struct map* map);
47 | 
48 | 
49 | 
50 | #ifdef _CUDA
51 | /*
52 |  * Lock and map GPU device memory.
53 |  */
54 | struct map* map_device_memory(struct list* list, const struct ctrl* ctrl, u64 vaddr, unsigned long n_pages, struct list* ctrl_list);
55 | #endif
56 | 
57 | 
58 | 
59 | /*
60 |  * Find memory mapping from vaddr and current task
61 |  */
62 | struct map* map_find(const struct list* list, u64 vaddr);
63 | 
64 | 
65 | #endif /* __LIBNVM_HELPER_MAP_H__ */
66 | 


--------------------------------------------------------------------------------
/scripts/bfs_run_emogi_nvme_frontier.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | #echo "==============================================="
 3 | #echo "Running NVME GAP-urand with GPU 8 and Page Size 4096"
 4 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 1 -p 4096 --gpu 8 --threads 64
 5 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 2 -p 4096 --gpu 8 --threads 64
 6 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 3 -p 4096 --gpu 8 --threads 64
 7 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 4 -p 4096 --gpu 8 --threads 64
 8 | 
 9 | echo "==============================================="
10 | echo "Running NVME uk-2007-05 with GPU 8 and Page Size 4096"
11 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 1 -p 4096 --gpu 8 --threads 64
12 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 2 -p 4096 --gpu 8 --threads 64
13 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 3 -p 4096 --gpu 8 --threads 64
14 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 9 --memalloc 6 --repeat 32  --n_ctrls 4 -p 4096 --gpu 8 --threads 64
15 | 
16 | 
17 | #echo "==============================================="
18 | #echo "Running EMOGI GAP-urand with GPU 0"
19 | #../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 7 --memalloc 2 --repeat 32  --n_ctrls 1 -p 4096 --gpu 0 --threads 64
20 | 
21 | echo "==============================================="
22 | echo "Running EMOGI uk-2007-05 with GPU0"
23 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 7 --memalloc 2 --repeat 32  --n_ctrls 1 -p 4096 --gpu 0 --threads 64
24 | 


--------------------------------------------------------------------------------
/scripts/bfs_run_nvme_scaling.sh:
--------------------------------------------------------------------------------
 1 | echo "==============================================="
 2 | echo "Running GAP-urand with GPU 0 and Page Size 8192"
 3 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 1 -p 8192 --gpu 0 --threads 64
 4 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 2 -p 8192 --gpu 0 --threads 64
 5 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 3 -p 8192 --gpu 0 --threads 64
 6 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 4 -p 8192 --gpu 0 --threads 64
 7 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 5 -p 8192 --gpu 0 --threads 64
 8 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 6 -p 8192 --gpu 0 --threads 64
 9 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 7 -p 8192 --gpu 0 --threads 64
10 | 
11 | 
12 | echo "==============================================="
13 | echo "Running GAP-urand with GPU 5 and Page Size 8192"
14 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 1 -p 8192 --gpu 5 --threads 64
15 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 2 -p 8192 --gpu 5 --threads 64
16 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 3 -p 8192 --gpu 5 --threads 64
17 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 4 -p 8192 --gpu 5 --threads 64
18 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 5 -p 8192 --gpu 5 --threads 64
19 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 6 -p 8192 --gpu 5 --threads 64
20 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/GAP-urand.bel       -l $((1024*1024*1024*64))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 7 -p 8192 --gpu 5 --threads 64
21 | 
22 | 
23 | echo "==============================================="
24 | echo "Running uk-2007-05 with GPU 0 and Page Size 8192"
25 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 1 -p 8192 --gpu 0 --threads 64
26 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 2 -p 8192 --gpu 0 --threads 64
27 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 3 -p 8192 --gpu 0 --threads 64
28 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 4 -p 8192 --gpu 0 --threads 64
29 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 5 -p 8192 --gpu 0 --threads 64
30 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 6 -p 8192 --gpu 0 --threads 64
31 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 7 -p 8192 --gpu 0 --threads 64
32 | 
33 | 
34 | echo "==============================================="
35 | echo "Running uk-2007-05 with GPU 5 and Page Size 8192"
36 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 1 -p 8192 --gpu 5 --threads 64
37 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 2 -p 8192 --gpu 5 --threads 64
38 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 3 -p 8192 --gpu 5 --threads 64
39 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 4 -p 8192 --gpu 5 --threads 64
40 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 5 -p 8192 --gpu 5 --threads 64
41 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 6 -p 8192 --gpu 5 --threads 64
42 | ../build/bin/nvm-bfs-bench -f /nvme0/graphs/EMOGI/uk-2007-05.bel      -l $((1024*1024*1024*320))  --impl_type 3 --memalloc 6 --repeat 32  --n_ctrls 7 -p 8192 --gpu 5 --threads 64
43 | 


--------------------------------------------------------------------------------
/scripts/extrach.sh:
--------------------------------------------------------------------------------
 1 | #set -x
 2 | 
 3 | if [ $# -ne 2 ]
 4 | then
 5 | 	echo Usage $0 logfile numssd && exit 1
 6 | fi
 7 | 
 8 | logfile=$1
 9 | CTRL=$2
10 | 
11 | NUMDATASET=6
12 | declare -a GraphFileName=(
13 | "GAP-kron.bel"
14 | "GAP-urand.bel"
15 | "com-Friendster.bel"
16 | "MOLIERE_2016.bel"
17 | "uk-2007-05.bel"
18 | "sk-2005.bel"
19 | "Dummy"
20 | )
21 | 
22 | IMPLSIZE=2
23 | declare -a ImplType=(
24 | "3"
25 | "4"
26 | "5"
27 | #"8"
28 | #"9"
29 | )
30 | 
31 | NUMPAGESIZE=3
32 | declare -a PageSize=(
33 | "512"
34 | "4096"
35 | "8192"
36 | )
37 | 
38 | TYPE=Accesses
39 | for((gid=0;gid<NUMDATASET;gid++))
40 | do
41 |     echo "++++++++++++++++++ ${GraphFileName[gid]} ++++++++++++++++++"
42 |     for ((impl=0;impl<$IMPLSIZE;impl++))
43 |     do
44 |         echo "++++++++++++++++++ ${ImplType[impl]} Type ++++++++++++++++++"
45 |         for ((C=1; C<=$CTRL; C++))
46 |         do
47 |             echo "++++++++++++++++++ $C Controllers ++++++++++++++++++"
48 |             for ((pg=0; pg<$NUMPAGESIZE; pg++))
49 |             do
50 |                 echo "++++++++++++++++++ ${PageSize[pg]} PageSize ++++++++++++++++++"
51 |                 cat ${logfile} | grep -v "GraphFile"| sed -n "/${GraphFileName[gid]}/,/${GraphFileName[gid+1]}/p" | sed -n "/impl_type ${ImplType[impl]}/,/impl_type ${ImplType[impl+1]}/p"| sed -n "/n_ctrls ${C}/,/n_ctrls ${C+1}/p"| sed -n "/-p ${PageSize[pg]}/,/-p ${PageSize[pg+1]}/p" | grep -v "+"  |grep ${TYPE} | awk '{sum+=$3;n++} END {if(n>0) printf "%.2f\n",sum/n}'
52 |             done
53 |         done
54 |     done
55 | done
56 | 


--------------------------------------------------------------------------------
/scripts/fw_user_routing:
--------------------------------------------------------------------------------
 1 | FFFF0800,0814EEEE
 2 | FFFF0814,0800EEEE
 3 | FFFF0800,0815EEEE
 4 | FFFF0815,0800EEEE
 5 | FFFF0800,08100510,050c020c,0214EEEE
 6 | FFFF0214,020c050c,05100810,0800EEEE
 7 | FFFF0800,08100510,050c020c,0215EEEE
 8 | FFFF0215,020c050c,05100810,0800EEEE
 9 | FFFF0800,080c0400,0414000c,0014EEEE
10 | FFFF0014,000c0414,0400080c,0800EEEE
11 | FFFF0800,080c0400,0414000c,0015EEEE
12 | FFFF0015,000c0414,0400080c,0800EEEE
13 | FFFF0800,08080304,030c060c,0604EEEE
14 | FFFF0604,060c030c,03040808,0800EEEE
15 | FFFF0800,08080304,030c060c,0605EEEE
16 | FFFF0605,060c030c,03040808,0800EEEE
17 | FFFF0800,080c0400,0414000c,0004EEEE
18 | FFFF0004,000c0414,0400080c,0800EEEE
19 | FFFF0800,08100510,050c020c,0200EEEE
20 | FFFF0200,020c050c,05100810,0800EEEE
21 | 
22 | #old
23 | FFFF0100,01100310,03080008,0014EEEE
24 | FFFF0014,00080308,03100110,0100EEEE
25 | 


--------------------------------------------------------------------------------
/scripts/identify_hba.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | for a in `lspci -D -d 1000:c010 | cut -d" " -f1`
 3 | 
 4 | do
 5 |         if [ $(lspci -vv -s $a | egrep -i "Upstream | 00-80-5e" | wc -l) == 2 ]; then
 6 |                 if
 7 |                         [ $(lspci -vv -s $a | egrep -i "Power budget" | wc -l) == 1 ]; then
 8 |                         echo "Falcon_HBA_BUS#" $a
 9 |                 fi
10 |         fi
11 | done
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/scripts/run_bfs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | 
10 | #Initialize set of files are taken from EMOGI and graphBIG.
11 | 
12 | NUMDATASET=6
13 | declare -a GraphFileArray=(
14 | "/home/vmailthody/data/GAP-kron.bel"
15 | "/home/vmailthody/data/GAP-urand.bel"
16 | "/home/vmailthody/data/com-Friendster.bel"
17 | "/home/vmailthody/data/MOLIERE_2016.bel"
18 | "/home/vmailthody/data/uk-2007-05.bel"
19 | "/home/vmailthody/data/sk-2005.bel"
20 | )
21 | declare -a GraphFileOffset=(
22 | "$((1024*1024*1024*0))"
23 | "$((1024*1024*1024*64))"
24 | "$((1024*1024*1024*160))"
25 | "$((1024*1024*1024*224))"
26 | "$((1024*1024*1024*320))"
27 | "$((1024*1024*1024*384))"
28 | )
29 | 
30 | 
31 | declare -a GraphRootNode=(
32 | "58720242"
33 | "58720256"
34 | "28703654"
35 | "13229860"
36 | "46329738"
37 | "37977096"
38 | )
39 | 
40 | 
41 | 
42 | 
43 | CTRL=$1
44 | MEMTYPE=6  #BAFS_DIRECT
45 | GPU=$2
46 | TB=128
47 | 
48 | for ((gfid=0; gfid<NUMDATASET; gfid++))
49 | do
50 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]} located at offset ${GraphFileOffset[gfid]} ++++++++++++++++++"
51 |     for IMPLTYPE in 4 9 #3 4    ##baseline, coalesced, frontier, frontier coaslesced.
52 |     do
53 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
54 |         for ((C=1; C<=$CTRL; C++))
55 |         do
56 |             echo "++++++++++++++++++ $C Controller ++++++++++++++++++"
57 |             for P in 4096 512
58 |             do
59 |                 echo "++++++++++++++++++ $P Page size ++++++++++++++++++"
60 |                 ./bin/nvm-bfs-bench -f ${GraphFileArray[gfid]} -l ${GraphFileOffset[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --src ${GraphRootNode[gfid]} --n_ctrls $C -p $P --gpu $GPU --threads $TB
61 |             done
62 |         done
63 |     done
64 | done
65 | 


--------------------------------------------------------------------------------
/scripts/run_cc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 3 ]
 5 | then
 6 | 	echo Usage $0 numssd gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | 
10 | #Initialize set of files are taken from EMOGI and graphBIG.
11 | 
12 | NUMDATASET=4
13 | declare -a GraphFileArray=(
14 | "/home/vmailthody/data/GAP-kron.bel"
15 | "/home/vmailthody/data/GAP-urand.bel"
16 | "/home/vmailthody/data/com-Friendster.bel"
17 | "/home/vmailthody/data/MOLIERE_2016.bel"
18 | )
19 | declare -a GraphFileOffset=(
20 | "$((1024*1024*1024*0))"
21 | "$((1024*1024*1024*64))"
22 | "$((1024*1024*1024*160))"
23 | "$((1024*1024*1024*224))"
24 | )
25 | 
26 | 
27 | CTRL=$1
28 | MEMTYPE=6  #BAFS_DIRECT
29 | GPU=$2
30 | TB=128
31 | 
32 | for ((gfid=0; gfid<NUMDATASET; gfid++))
33 | do
34 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]} ++++++++++++++++++"
35 |     for IMPLTYPE in 4 10
36 |     do
37 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
38 |         for ((C=1; C<=$CTRL; C++))
39 |         do
40 |             echo "++++++++++++++++++ $C Controller ++++++++++++++++++"
41 |             for P in 4096 512
42 |             do
43 |                 echo "++++++++++++++++++ $P page size++++++++++++++++++"
44 |                 #for stride in 1 16 32 128 512 1024 4096 16384 131072 262144 1048576 4194304
45 |                 #for stride in 1 16 32 128 512 1024 4096
46 |                 #for stride in 128 512
47 |                 #do
48 |                    #echo "++++++++++++++++++ $CS stride factor ++++++++++++++++++"
49 |                    #for coarse in 1 2 4 8 16 32
50 |                    #for coarse in 1
51 |                    #do
52 |                    #echo "++++++++++++++++++ $COARSE coarsened ++++++++++++++++++"
53 |                         ./bin/nvm-cc-bench -f ${GraphFileArray[gfid]} -l ${GraphFileOffset[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --n_ctrls $C -p $P --gpu $GPU --threads $TB -M $((8*1024*1024*1024)) #-C $coarse -P $stride
54 |                     #done
55 |                 #done
56 |             done
57 |         done
58 |     done
59 | done
60 | 
61 | 


--------------------------------------------------------------------------------
/scripts/run_emogi.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -x
 3 | 
 4 | if [ $# -ne 2 ]
 5 | then
 6 | 	echo Usage $0 gpuid tbsize && exit 1
 7 | fi
 8 | 
 9 | 
10 | NUMDATASET=6
11 | declare -a GraphFileArray=(
12 | "/mnt/graphs/GAP-kron.bel"
13 | "/mnt/graphs/GAP-urand.bel"
14 | "/mnt/graphs/com-Friendster.bel"
15 | "/mnt/graphs/MOLIERE_2016.bel"
16 | "/mnt/graphs/uk-2007-05.bel"
17 | "/mnt/graphs/sk-2005.bel"
18 | )
19 | 
20 | SSSPNUMDATASET=5
21 | declare -a GraphFileArraySSSP=(
22 | "/mnt/graphs/GAP-kron.bel"
23 | "/mnt/graphs/GAP-urand.bel"
24 | "/mnt/graphs/com-Friendster.bel"
25 | "/mnt/graphs/uk-2007-05.bel"
26 | "/mnt/graphs/sk-2005.bel"
27 | )
28 | #echo "${GraphFileArray[5]} offset is ${GraphFileOffset[5]}"
29 | 
30 | #CTRL=$1
31 | MEMTYPE=2  #BAFS_DIRECT
32 | GPU=$1
33 | TB=128
34 | 
35 | 
36 | make benchmark -j
37 | 
38 | for ((gfid=0; gfid<NUMDATASET; gfid++))
39 | do
40 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]}  ++++++++++++++++++"
41 |     sysctl vm.drop_caches=3
42 |     for IMPLTYPE in 0 1 6 7 #baseline, coalesced, frontier, frontier coaslesced.
43 |     do
44 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
45 |         ./bin/nvm-bfs-bench -f ${GraphFileArray[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --repeat 32 --gpu $GPU --threads $TB
46 |     done
47 | done
48 | 
49 | for ((gfid=0; gfid<NUMDATASET; gfid++))
50 | do
51 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]}  ++++++++++++++++++"
52 |     for IMPLTYPE in 0 1 6 7 #baseline, coalesced, frontier, frontier coaslesced.
53 |     do
54 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
55 |         ./bin/nvm-cc-bench -f ${GraphFileArray[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --gpu $GPU --threads $TB
56 |     done
57 | done
58 | 
59 | for ((gfid=0; gfid<NUMDATASET; gfid++))
60 | do
61 |     echo "++++++++++++++++++ ${GraphFileArray[gfid]}  ++++++++++++++++++"
62 |     for IMPLTYPE in 0 1 6 7 #baseline, coalesced, frontier, frontier coaslesced.
63 |     do
64 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
65 |         ./bin/nvm-pagerank-bench -f ${GraphFileArray[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --gpu $GPU --threads $TB
66 |     done
67 | done
68 | 
69 | for ((gfid=0; gfid<SSSPNUMDATASET; gfid++))
70 | do
71 |     echo "++++++++++++++++++ ${GraphFileArraySSSP[gfid]}  ++++++++++++++++++"
72 |     sysctl vm.drop_caches=3
73 |     for IMPLTYPE in 0 1 #baseline, coalesced, frontier, frontier coaslesced.
74 |     do
75 |         echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
76 |         ./bin/nvm-sssp-bench -f ${GraphFileArraySSSP[gfid]} --impl_type $IMPLTYPE --memalloc $MEMTYPE --repeat 32 --gpu $GPU --threads $TB
77 |     done
78 | done
79 | 
80 | for IMPLTYPE in 0 1 #baseline, coalesced, frontier, frontier coaslesced.
81 | do
82 |      sysctl vm.drop_caches=3
83 |      echo "++++++++++++++++++ $IMPLTYPE Type ++++++++++++++++++"
84 |      ./bin/nvm-sssp_float-bench -f /mnt/graphs/MOLIERE_2016.bel --impl_type $IMPLTYPE --memalloc $MEMTYPE --repeat 32 --gpu $GPU --threads $TB
85 | done
86 | 


--------------------------------------------------------------------------------
/scripts/unbind.sh:
--------------------------------------------------------------------------------
1 | for a in $(dmesg|grep "nvme.*pci"|awk '{print $7}'); 
2 | do 
3 | 	echo $a
4 | 	echo -n $a > /sys/bus/pci/devices/$a/driver/unbind; 
5 | done 
6 | 


--------------------------------------------------------------------------------
/scripts/write_emogi_graph_nvme.sh:
--------------------------------------------------------------------------------
 1 | #make benchmarks -j
 2 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-kron.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*0)) -o 1
 3 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-kron.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*32)) -o 1
 4 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-urand.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*64)) -o 1
 5 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/GAP-urand.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*128)) -o 1
 6 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/com-Friendster.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*160)) -o 1
 7 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/com-Friendster.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*192)) -o 1
 8 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/MOLIERE_2016.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*224)) -o 1
 9 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/MOLIERE_2016.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*288)) -o 1
10 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/uk-2007-05.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*320)) -o 1
11 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/uk-2007-05.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*352)) -o 1
12 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/sk-2005.bel.dst -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*384)) -o 1
13 | ./bin/nvm-readwrite-bench -f /home/vmailthody/data/sk-2005.bel.val -p $((1024*1024*2)) -t $((1024*1024*2)) -b 128 -i 16  --queue_depth 4096 --num_queues 128 -l $((1024*1024*1024*416)) -o 1
14 | 
15 | 


--------------------------------------------------------------------------------
/src/dis/device.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_DIS_DEVICE_H__
 2 | #define __NVM_INTERNAL_DIS_DEVICE_H__
 3 | #ifdef _SISCI
 4 | 
 5 | /* Make sure everything is defined as needed */
 6 | #ifndef __DIS_CLUSTER__
 7 | #define __DIS_CLUSTER__
 8 | #endif
 9 | 
10 | /* Necessary includes */
11 | #include <stddef.h>
12 | #include <stdint.h>
13 | #include <stdbool.h>
14 | #include <sisci_types.h>
15 | #include "mutex.h"
16 | 
17 | 
18 | 
19 | /* 
20 |  * Device descriptor.
21 |  *
22 |  * Holds a reference to a "borrowed" SISCI SmartIO device.
23 |  */
24 | struct device
25 | {
26 |     uint32_t                fdid;           // SISCI SmartIO device identifier (fabric device identifier)
27 |     sci_desc_t              sd;             // SISCI virtual device descriptor
28 |     struct mutex            lock;           // Ensure exclusive access to device
29 |     uint32_t                counter;        // Segment identifier counter
30 |     sci_smartio_device_t    device;         // SmartIO device handle
31 |     sci_remote_segment_t    segment;        // Reference to PCI bar 0
32 |     size_t                  size;           // Size of BAR0
33 |     volatile void*          ptr;            // Mapped pointer
34 |     sci_map_t               md;             // SISCI mapping descriptor
35 | };
36 | 
37 | 
38 | 
39 | /*
40 |  * Connect to SmartIO device memory data segment.
41 |  */
42 | int _nvm_device_memory_get(sci_remote_segment_t* segment, 
43 |                            const struct device* dev, 
44 |                            uint32_t id,
45 |                            unsigned int memtype);
46 | 
47 | 
48 | 
49 | /*
50 |  * Disconnect from SmartIO device memory segment.
51 |  */
52 | void _nvm_device_memory_put(sci_remote_segment_t* segment);
53 | 
54 | 
55 | 
56 | /*
57 |  * Create local segment.
58 |  * If ptr is not NULL, create empty segment and register segment memory.
59 |  * If ptr is not NULL and gpu_mem is set, attach it CUDA device memory
60 |  */
61 | int _nvm_local_memory_get(sci_local_segment_t* segment,
62 |                           uint32_t* adapter,
63 |                           const struct device* dev,
64 |                           size_t size,
65 |                           void* ptr,
66 |                           bool gpu_mem);
67 | 
68 | 
69 | 
70 | /*
71 |  * Remove local segment.
72 |  */
73 | void _nvm_local_memory_put(sci_local_segment_t* segment);
74 | 
75 | 
76 | 
77 | #endif /* _SISCI */
78 | #endif /* __NVM_INTERNAL_DIS_DEVICE_H__ */
79 | 


--------------------------------------------------------------------------------
/src/dis/interrupt.c:
--------------------------------------------------------------------------------
  1 | #ifndef _SISCI
  2 | #error "Must compile with SISCI support"
  3 | #endif
  4 | 
  5 | #ifndef __DIS_CLUSTER__
  6 | #define __DIS_CLUSTER__
  7 | #endif
  8 | 
  9 | #include <stdbool.h>
 10 | #include <stddef.h>
 11 | #include <stdint.h>
 12 | #include <errno.h>
 13 | #include "dis/interrupt.h"
 14 | #include "dprintf.h"
 15 | #include <sisci_types.h>
 16 | #include <sisci_error.h>
 17 | #include <sisci_api.h>
 18 | 
 19 | 
 20 | 
 21 | /*
 22 |  * Do some sanity checking and then call supplied callback.
 23 |  */ 
 24 | static sci_callback_action_t interrupt_callback(struct local_intr* interrupt, 
 25 |                                                 sci_local_data_interrupt_t intr,
 26 |                                                 void* data,
 27 |                                                 uint32_t length,
 28 |                                                 sci_error_t status)
 29 | {
 30 | #ifndef NDEBUG
 31 |     if (status != SCI_ERR_OK)
 32 |     {
 33 |         dprintf("Unexpected status in interrupt handler routine: %s\n", _SCIGetErrorString(status));
 34 |         return SCI_CALLBACK_CANCEL;
 35 |     }
 36 | 
 37 |     if (intr != interrupt->intr)
 38 |     {
 39 |         dprintf("Possible memory corruption\n");
 40 |         return SCI_CALLBACK_CANCEL;
 41 |     }
 42 | #endif
 43 | 
 44 |     interrupt->callback(interrupt->data, data, length);
 45 | 
 46 |     return SCI_CALLBACK_CONTINUE;
 47 | }
 48 | 
 49 | 
 50 | 
 51 | int _nvm_local_intr_get(struct local_intr* intr, uint32_t adapter, void* cb_data, intr_callback_t cb)
 52 | {
 53 |     sci_error_t err = SCI_ERR_OK;
 54 | 
 55 |     // Get local node identifier
 56 |     SCIGetLocalNodeId(adapter, &intr->node_id, 0, &err);
 57 | #ifndef NDEBUG
 58 |     if (err != SCI_ERR_OK)
 59 |     {
 60 |         dprintf("Unexpected error: %s\n", _SCIGetErrorString(err));
 61 |         return EIO;
 62 |     }
 63 | #endif
 64 | 
 65 |     // Open SISCI descriptor
 66 |     SCIOpen(&intr->sd, 0, &err);
 67 | #ifndef NDEBUG
 68 |     if (err != SCI_ERR_OK)
 69 |     {
 70 |         dprintf("Failed to open SISCI virtual device: %s\n", _SCIGetErrorString(err));
 71 |         return EIO;
 72 |     }
 73 | #endif
 74 | 
 75 |     intr->adapter = adapter;
 76 |     intr->data = cb_data;
 77 |     intr->callback = cb;
 78 |     
 79 |     uint32_t flags = 0;
 80 |     void* data = NULL;
 81 |     sci_cb_data_interrupt_t callback = NULL;
 82 | 
 83 |     // Callback was supplied, set up parameters
 84 |     if (cb != NULL)
 85 |     {
 86 |         data = (void*) intr;
 87 |         callback = (sci_cb_data_interrupt_t) interrupt_callback;
 88 |         flags |= SCI_FLAG_USE_CALLBACK;
 89 |     }
 90 | 
 91 |     // Create data interrupt
 92 |     SCICreateDataInterrupt(intr->sd, &intr->intr, adapter, &intr->intr_no, callback, data, flags, &err);
 93 |     if (err != SCI_ERR_OK)
 94 |     {
 95 |         dprintf("Failed to create data interrupt: %s\n", _SCIGetErrorString(err));
 96 |         SCIClose(intr->sd, 0, &err);
 97 |         return ENOSPC;
 98 |     }
 99 | 
100 |     return 0;
101 | }
102 | 
103 | 
104 | 
105 | void _nvm_local_intr_put(struct local_intr* intr)
106 | {
107 |     sci_error_t err = SCI_ERR_OK;
108 | 
109 |     do
110 |     {
111 |         SCIRemoveDataInterrupt(intr->intr, 0, &err);
112 |     }
113 |     while (err == SCI_ERR_BUSY);
114 | 
115 |     SCIClose(intr->sd, 0, &err);
116 | }
117 | 
118 | 
119 | 
120 | int _nvm_local_intr_wait(struct local_intr* intr, void* data, uint16_t maxlen, uint32_t timeout)
121 | {
122 |     sci_error_t err = SCI_ERR_OK;
123 |     uint32_t len = maxlen;
124 |     
125 |     SCIWaitForDataInterrupt(intr->intr, data, &len, timeout, 0, &err);
126 | 
127 |     switch (err)
128 |     {
129 |         case SCI_ERR_OK:
130 |             return 0;
131 | 
132 |         case SCI_ERR_TIMEOUT:
133 |             return ETIMEDOUT;
134 | 
135 |         default:
136 |             dprintf("Waiting for data interrupt unexpectedly failed: %s\n", _SCIGetErrorString(err));
137 |             return EIO;
138 |     }
139 | }
140 | 
141 | 
142 | 
143 | int _nvm_remote_intr_get(struct remote_intr* intr, uint32_t adapter, uint32_t node, uint32_t no)
144 | {
145 |     sci_error_t err = SCI_ERR_OK;
146 | 
147 |     SCIOpen(&intr->sd, 0, &err);
148 | #ifndef NDEBUG
149 |     if (err != SCI_ERR_OK)
150 |     {
151 |         dprintf("Failed to open SISCI virtual device: %s\n", _SCIGetErrorString(err));
152 |         return EIO;
153 |     }
154 | #endif
155 | 
156 |     SCIConnectDataInterrupt(intr->sd, &intr->intr, node, adapter, no, SCI_INFINITE_TIMEOUT, 0, &err);
157 |     if (err != SCI_ERR_OK)
158 |     {
159 |         SCIClose(intr->sd, 0, &err);
160 |         return ECONNREFUSED;
161 |     }
162 | 
163 |     return 0;
164 | }
165 | 
166 | 
167 | 
168 | void _nvm_remote_intr_put(struct remote_intr* intr)
169 | {
170 |     sci_error_t err = SCI_ERR_OK;
171 |     SCIDisconnectDataInterrupt(intr->intr, 0, &err);
172 |     SCIClose(intr->sd, 0, &err);
173 | }
174 | 
175 | 
176 | 
177 | /*
178 |  * Trigger remote interrupt with data.
179 |  */
180 | int _nvm_remote_intr_trigger(const struct remote_intr* intr, void* data, uint16_t length)
181 | {
182 |     sci_error_t err = SCI_ERR_OK;
183 | 
184 |     SCITriggerDataInterrupt(intr->intr, data, length, 0, &err);
185 |     if (err != SCI_ERR_OK)
186 |     {
187 |         dprintf("Failed to trigger data interrupt\n");
188 |         return ENOTCONN;
189 |     }
190 | 
191 |     return 0;
192 | }
193 | 
194 | 
195 | 
196 | /*
197 |  * Convenience function for easy remote interrupt triggering.
198 |  */
199 | int _nvm_remote_intr_fire_and_forget(uint32_t adapter, uint32_t node, uint32_t no, void* data, uint16_t len)
200 | {
201 |     int status = 0;
202 |     struct remote_intr intr;
203 | 
204 |     status = _nvm_remote_intr_get(&intr, adapter, node, no);
205 |     if (status != 0)
206 |     {
207 |         return status;
208 |     }
209 | 
210 |     status = _nvm_remote_intr_trigger(&intr, data, len);
211 |     _nvm_remote_intr_put(&intr);
212 |     return status;
213 | }
214 | 
215 | 


--------------------------------------------------------------------------------
/src/dis/interrupt.h:
--------------------------------------------------------------------------------
  1 | #ifndef __NVM_INTERNAL_DIS_INTERRUPT_H__
  2 | #define __NVM_INTERNAL_DIS_INTERRUPT_H__
  3 | #ifdef _SISCI
  4 | 
  5 | /* Make sure everything is defined as needed */
  6 | #ifndef __DIS_CLUSTER__
  7 | #define __DIS_CLUSTER__
  8 | #endif
  9 | 
 10 | /* Necessary includes */
 11 | #include <stddef.h>
 12 | #include <stdint.h>
 13 | #include <stdbool.h>
 14 | #include <sisci_types.h>
 15 | 
 16 | 
 17 | 
 18 | /* Forward declarations */
 19 | struct local_intr;
 20 | struct remote_intr;
 21 | 
 22 | 
 23 | 
 24 | /*
 25 |  * Interrupt callback.
 26 |  */
 27 | typedef void (*intr_callback_t)(void* user_data, void* recv_data, uint16_t length);
 28 | 
 29 | 
 30 | 
 31 | /*
 32 |  * Local interrupt descriptor.
 33 |  * Data must be free'd manually.
 34 |  */
 35 | struct local_intr
 36 | {
 37 |     sci_desc_t                  sd;         // SISCI virtual device descriptor
 38 |     sci_local_data_interrupt_t  intr;       // SISCI data interrupt handle
 39 |     uint32_t                    adapter;    // DIS adapter
 40 |     uint32_t                    intr_no;    // Interrupt number
 41 |     uint32_t                    node_id;    // DIS node identifier
 42 |     void*                       data;       // User data
 43 |     intr_callback_t             callback;   // Interrupt callback
 44 | };
 45 | 
 46 | 
 47 | 
 48 | /*
 49 |  * Remote interrupt descriptor.
 50 |  */
 51 | struct remote_intr
 52 | {
 53 |     sci_desc_t                  sd;         // SISCI virtual device descriptor
 54 |     sci_remote_data_interrupt_t intr;       // SISCI data interrupt reference
 55 | };
 56 | 
 57 | 
 58 | 
 59 | /*
 60 |  * Create a local data interrupt.
 61 |  */
 62 | int _nvm_local_intr_get(struct local_intr* intr,
 63 |                         uint32_t adapter, 
 64 |                         void* cb_data, 
 65 |                         intr_callback_t cb_func);
 66 | 
 67 | 
 68 | 
 69 | /*
 70 |  * Remove a local data interrupt.
 71 |  */
 72 | void _nvm_local_intr_put(struct local_intr* intr);
 73 | 
 74 | 
 75 | 
 76 | /*
 77 |  * Block for a duration while waiting for an interrupt and removes interrupt afterwards.
 78 |  * Returns success if length of received data matches expected length.
 79 |  */
 80 | int _nvm_local_intr_wait(struct local_intr* intr, void* data, uint16_t maxlen, uint32_t timeout);
 81 | 
 82 | 
 83 | 
 84 | /*
 85 |  * Connect to remote interrupt.
 86 |  */
 87 | int _nvm_remote_intr_get(struct remote_intr* intr, uint32_t adapter, uint32_t node_id, uint32_t intr_no);
 88 | 
 89 | 
 90 | 
 91 | /*
 92 |  * Disconnect from remote interrupt.
 93 |  */
 94 | void _nvm_remote_intr_put(struct remote_intr* intr);
 95 | 
 96 | 
 97 | 
 98 | /*
 99 |  * Trigger remote interrupt with data.
100 |  */
101 | int _nvm_remote_intr_trigger(const struct remote_intr* intr, void* data, uint16_t len);
102 | 
103 | 
104 | 
105 | /*
106 |  * Connect to remote interrupt, send data, and disconnect.
107 |  */
108 | int _nvm_remote_intr_fire_and_forget(uint32_t adapter, 
109 |                                      uint32_t node_id, 
110 |                                      uint32_t intr_no, 
111 |                                      void* data, 
112 |                                      uint16_t len);
113 | 
114 | #endif /* _SISCI */
115 | #endif /* __NVM_INTERNAL_DIS_INTERRUPT_H__ */
116 | 


--------------------------------------------------------------------------------
/src/dis/map.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_DIS_MAP_H__
 2 | #define __NVM_INTERNAL_DIS_MAP_H__
 3 | #ifdef _SISCI
 4 | 
 5 | /* Make sure everything is defined as needed */
 6 | #ifndef __DIS_CLUSTER__
 7 | #define __DIS_CLUSTER__
 8 | #endif
 9 | 
10 | /* Necessary includes */
11 | #include <stdbool.h>
12 | #include "dma.h"
13 | #include <sisci_types.h>
14 | 
15 | 
16 | /*
17 |  * Virtual address space mapping.
18 |  */
19 | struct va_map
20 | {
21 |     bool                    mapped;     // Is segment mapped into virtual address space?
22 |     sci_map_t               md;         // SISCI mapping descriptor
23 | };
24 | 
25 | 
26 | 
27 | /*
28 |  * Local segment descriptor.
29 |  * map.range.remote = false
30 |  */
31 | struct local_segment
32 | {
33 |     // XXX: ctrl reference can be replaced with a new sci_desc_t
34 |     struct controller*      ctrl;       // Controller reference
35 |     uint32_t                adapter;    // DIS adapter number
36 |     sci_local_segment_t     segment;    // Local segment reference
37 |     bool                    remove;     // Requires remove
38 |     struct va_map           map;        // Mapping descriptor
39 |     struct va_range         range;      // Memory range descriptor
40 | };
41 | 
42 | 
43 | 
44 | /*
45 |  * Remote segment descriptor.
46 |  * map.range.remote = true
47 |  */
48 | struct remote_segment
49 | {
50 |     // XXX: ctrl reference is only necessary for device segments
51 |     struct controller*      ctrl;       // Controller reference
52 |     sci_remote_segment_t    segment;    // Remote segment reference
53 |     bool                    disconnect; // Requires a disconnect
54 |     struct va_map           map;        // Mapping descriptor
55 |     struct va_range         range;      // Memory range descriptor
56 | };
57 | 
58 | 
59 | #endif /* _SISCI */
60 | #endif /* __NVM_INTERNAL_DIS_MAP_H__ */
61 | 


--------------------------------------------------------------------------------
/src/dma.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_DMA_H__
 2 | #define __NVM_INTERNAL_DMA_H__
 3 | 
 4 | #include <nvm_types.h>
 5 | #include <stddef.h>
 6 | #include <stdint.h>
 7 | #include <stdbool.h>
 8 | 
 9 | 
10 | /* Forward declaration */
11 | struct va_range;
12 | 
13 | 
14 | 
15 | /*
16 |  * Callback type for freeing an address range descriptor.
17 |  * Called after the range is unmapped for the device and virtual address mapping can
18 |  * be released.
19 |  */
20 | typedef void (*va_range_free_t)(struct va_range* va);
21 | 
22 | 
23 | 
24 | /*
25 |  * Virtual address range descriptor.
26 |  * This structure describes a custom address range mapped in userspace.
27 |  */
28 | struct va_range
29 | {
30 |     bool            remote;     // Indicates if this is remote memory
31 |     volatile void*  vaddr;      // Virtual address of mapped address range
32 |     size_t          page_size;  // Alignment of mapping (page size)
33 |     size_t          n_pages;    // Number of pages for address range
34 | };
35 | 
36 | 
37 | #define VA_RANGE_INIT(remote, vaddr, page_size, n_pages)    \
38 |     (struct va_range) {(remote), (vaddr), (page_size), (n_pages)}
39 | 
40 | 
41 | /*
42 |  * Map address range for a controller and create and initialize a DMA handle.
43 |  */
44 | int _nvm_dma_init(nvm_dma_t** handle,
45 |                   const nvm_ctrl_t* ctrl,
46 |                   struct va_range* va,
47 |                   va_range_free_t release);
48 | 
49 | 
50 | 
51 | /*
52 |  * Get the internal virtual address range from a handle.
53 |  */
54 | const struct va_range* _nvm_dma_va(const nvm_dma_t* handle);
55 | 
56 | 
57 | #endif /* __NVM_INTERNAL_DMA_H__ */
58 | 


--------------------------------------------------------------------------------
/src/dprintf.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_DPRINTF_H__
 2 | #define __NVM_INTERNAL_DPRINTF_H__
 3 | 
 4 | #ifndef NDEBUG
 5 | 
 6 | #include <nvm_util.h>
 7 | #include <nvm_error.h>
 8 | #include <stdio.h>
 9 | #include <stdarg.h>
10 | 
11 | /* Debug printf */
12 | static void _nvm_dprintf(const char* func, const char* format, ...)
13 | {
14 |     va_list args;
15 |     va_start(args, format);
16 |     fprintf(stderr, "[%s] ", func);
17 |     vfprintf(stderr, format, args);
18 |     va_end(args);
19 | }
20 | 
21 | #define dprintf(...)            _nvm_dprintf(__func__, __VA_ARGS__)
22 | 
23 | #define _nvm_strerror(status)   nvm_strerror(status)
24 | #define _SCIGetErrorString(err) SCIGetErrorString(err)
25 | 
26 | #endif /* ! NDEBUG */
27 | 
28 | 
29 | 
30 | /* If no debug print, just swallow message */
31 | #ifndef dprintf
32 | #define dprintf(...)
33 | #endif
34 | 
35 | 
36 | 
37 | /* If no debug print, don't lookup completions */
38 | #ifndef _nvm_strerror
39 | #define _nvm_strerror(status)
40 | #define _SCIGetErrorString(err)
41 | #endif
42 | 
43 | 
44 | #endif /* __NVM_INTERNAL_DPRINTF_H__ */
45 | 


--------------------------------------------------------------------------------
/src/error.cpp:
--------------------------------------------------------------------------------
  1 | #include <nvm_error.h>
  2 | #include <nvm_util.h>
  3 | #include <stddef.h>
  4 | #include <stdint.h>
  5 | #include <errno.h>
  6 | #include <string.h>
  7 | #include <stdio.h>
  8 | 
  9 | 
 10 | static const char* generic_status[] =
 11 | {
 12 |     "Success",
 13 |     "Invalid command opcode",
 14 |     "Invalid field in command",
 15 |     "Command ID conflict",
 16 |     "Data transfer error",
 17 |     "Commands aborted due to power loss notification",
 18 |     "Internal error",
 19 |     "Command abort requested",
 20 |     "Command aborted due to SQ deletion",
 21 |     "Command aborted due to failed fused command",
 22 |     "Command aborted due to missing fused command",
 23 |     "Invalid namespace or format",
 24 |     "Command sequence error",
 25 |     "Invalid SGL segment descriptor",
 26 |     "Invalid number of SQL descriptors",
 27 |     "Data SGL length invalid",
 28 |     "Metadata SGL length invalid",
 29 |     "SGL descriptor type invalid",
 30 |     "Invalid use of controller memory buffer",
 31 |     "PRP offset invalid",
 32 |     "Atomic write unit exceeded",
 33 |     "Operation denied",
 34 |     "SGL offset invalid",
 35 |     "Unknown/reserved",
 36 |     "Host identifier inconsistent format",
 37 |     "Keep alive timer expired",
 38 |     "Keep alive timer invalid",
 39 |     "Command aborted due to preempt and abort",
 40 |     "Sanitize failed",
 41 |     "Sanitize in progress",
 42 |     "SGL data block granularity invalid",
 43 |     "Command not supported for queue in CMB"
 44 | };
 45 | 
 46 | 
 47 | 
 48 | static const char* generic_status_nvm_commands[] = 
 49 | {
 50 |     "LBA out of range",
 51 |     "Capacity exceeded",
 52 |     "Namespace not ready",
 53 |     "Reservation conflict",
 54 |     "Format in progress"
 55 | };
 56 | 
 57 | 
 58 | 
 59 | static const char* command_specific_status[] = 
 60 | {
 61 |     "Completion queue invalid",
 62 |     "Invalid queue identifier",
 63 |     "Invalid queue size",
 64 |     "Abort command limit exceeded",
 65 |     "Unknown/reserved",
 66 |     "Asynchronous event request limit exceeded",
 67 |     "Invalid firmware slot",
 68 |     "Invalid firmware image",
 69 |     "Invalid interrupt vector",
 70 |     "Invalid log page",
 71 |     "Invalid format",
 72 |     "Firmware activation requires conventional reset",
 73 |     "Invalid queue deletion",
 74 |     "Feature identifier not saveable",
 75 |     "Feature not changeable",
 76 |     "Feature not namespace specific",
 77 |     "Firmware activation requires NVM subsystem reset",
 78 |     "Firmware activation requires reset",
 79 |     "Firmware activation requires maximum time violation",
 80 |     "Firmware activation prohibited",
 81 |     "Overlapping range",
 82 |     "Namespace insufficient capacity",
 83 |     "Namespace identifier unavailable",
 84 |     "Unknown/reserved",
 85 |     "Namespace already attached",
 86 |     "Namespace is private",
 87 |     "Namespace not attached",
 88 |     "Thin provisioning not supported",
 89 |     "Controller list invalid",
 90 |     "Device self-test in progress",
 91 |     "Boot partition write prohibited",
 92 |     "Invalid controller identifier",
 93 |     "Invalid secondary controller state",
 94 |     "Invalid number of controller resources",
 95 |     "Invalid resource identifier"
 96 | };
 97 | 
 98 | 
 99 | 
100 | static const char* command_specific_status_nvm_commands[] =
101 | {
102 |     "Conflicting attributes",
103 |     "Invalid protection information",
104 |     "Attempted write to read only range"
105 | };
106 | 
107 | 
108 | 
109 | static const char* media_and_data_integrity_nvm_commands[] = 
110 | {
111 |     "Write fault",
112 |     "Unrecovered read error",
113 |     "End-to-end guard check error",
114 |     "End-to-end application tag check error",
115 |     "End-to-end reference tag check error",
116 |     "Compare failure",
117 |     "Access denied",
118 |     "Deallocated or unwritten logical block"
119 | };
120 | 
121 | 
122 | 
123 | static const char* lookup_string(uint8_t status_code_type, uint8_t status_code)
124 | {
125 |     switch (status_code_type)
126 |     {
127 |         case 0x00: // Generic command status
128 |             if (status_code < 0x20)
129 |             {
130 |                 return generic_status[status_code];
131 |             }
132 |             else if (0x80 <= status_code && status_code <= 0x84)
133 |             {
134 |                 return generic_status_nvm_commands[status_code - 0x80];
135 |             }
136 |             return "Unknown generic error";
137 | 
138 |         case 0x01: // Command specific status
139 |             if (status_code < 0x23)
140 |             {
141 |                 return command_specific_status[status_code];
142 |             }
143 |             else if (0x80 <= status_code && status_code <= 0x82)
144 |             {
145 |                 return command_specific_status_nvm_commands[status_code - 0x80];
146 |             }
147 |             return "Unknown command specific error";
148 | 
149 |         case 0x02: // Media and data integrity errors
150 |             if (0x80 <= status_code && status_code <= 0x87)
151 |             {
152 |                 return media_and_data_integrity_nvm_commands[status_code - 0x80];
153 |             }
154 |             return "Unknown media or data integrity error";
155 | 
156 |         default:
157 |             return "Unknown status code type";
158 |     }
159 | }
160 | 
161 | 
162 | 
163 | const char* nvm_strerror(int status)
164 | {
165 |     int err;
166 |     uint8_t sct;
167 |     uint8_t sc;
168 | 
169 |     err = NVM_ERR_UNPACK_ERRNO(status);
170 |     sct = NVM_ERR_UNPACK_SCT(status);
171 |     sc = NVM_ERR_UNPACK_SC(status);
172 | 
173 |     if (sct != 0 || sc != 0)
174 |     {
175 |         printf("sct: %x\tsc: %x\n", sct, sc);
176 |         return lookup_string(sct, sc);
177 | 
178 |     }
179 | 
180 |     return strerror(err);
181 | }
182 | 
183 | 


--------------------------------------------------------------------------------
/src/lib_ctrl.h:
--------------------------------------------------------------------------------
  1 | #ifndef __NVM_INTERNAL_CTRL_H__
  2 | #define __NVM_INTERNAL_CTRL_H__
  3 | 
  4 | #include <nvm_types.h>
  5 | #include "mutex.h"
  6 | #include "lib_util.h"
  7 | 
  8 | 
  9 | /* 
 10 |  * Device handle.
 11 |  */
 12 | struct device;
 13 | 
 14 | 
 15 | 
 16 | /*
 17 |  * Forward declaration of a virtual memory address range.
 18 |  */
 19 | struct va_range;
 20 | 
 21 | 
 22 | 
 23 | /*
 24 |  * Device reference operations.
 25 |  */
 26 | struct device_ops
 27 | {
 28 |     /*
 29 |      * Release device reference (called when refcount is 0)
 30 |      * This should also unmap MLBAR/BAR0 of the device.
 31 |      */
 32 |     void (*release_device)(struct device* dev, volatile void* mm_ptr, size_t mm_size);
 33 | 
 34 | 
 35 |     /*
 36 |      * Map an address range for the device.
 37 |      */
 38 |     int (*map_range)(const struct device* dev, const struct va_range* va, uint64_t* ioaddrs);
 39 | 
 40 |     
 41 |     /*
 42 |      * Unmap an address range for the device.
 43 |      */
 44 |     void (*unmap_range)(const struct device* dev, const struct va_range* va);
 45 | };
 46 | 
 47 | 
 48 | 
 49 | /*
 50 |  * Controller device type.
 51 |  * Indicates how the controller handle was initialized.
 52 |  */
 53 | enum device_type
 54 | {
 55 |     DEVICE_TYPE_UNKNOWN =   0x00,       /* Device is mapped manually by the user */
 56 |     DEVICE_TYPE_IOCTL   =   0x01,       /* Device is mapped through UNIX file descriptor */
 57 |     DEVICE_TYPE_SMARTIO =   0x02,       /* Device is mapped by SISCI SmartIO API */
 58 | };
 59 | 
 60 | 
 61 | 
 62 | /*
 63 |  * Internal controller handle.
 64 |  * Used to reference count the device handle.
 65 |  * Reference counting is handled by the get and put functions.
 66 |  */
 67 | struct controller
 68 | {
 69 |     struct mutex                lock;           /* Ensure exclusive access */
 70 |     uint32_t                    count;          /* Reference count */
 71 |     enum device_type            type;           /* Controller device type */
 72 |     struct device*              device;         /* Device handle */
 73 |     struct device_ops           ops;            /* Device operations */
 74 |     nvm_ctrl_t                  handle;         /* User's handle */
 75 | };
 76 | 
 77 | 
 78 | /* 
 79 |  * Helper function to initialize the controller handle by reading
 80 |  * the appropriate registers from the controller BAR.
 81 |  */
 82 | int _nvm_ctrl_init(nvm_ctrl_t** handle,             /* User's handle */
 83 |                    struct device* dev,              /* Device handle */
 84 |                    const struct device_ops* ops,    /* Device handle operations */
 85 |                    enum device_type type,           /* Device type */
 86 |                    volatile void* mm_ptr,           /* Memory-mapped pointer */
 87 |                    size_t mm_size);                 /* Size of memory-map */
 88 | 
 89 | 
 90 | 
 91 | /*
 92 |  * Increase controller reference count.
 93 |  */
 94 | struct controller* _nvm_ctrl_get(const nvm_ctrl_t* handle);
 95 | 
 96 | 
 97 | 
 98 | /*
 99 |  * Decrease controller reference count.
100 |  */
101 | void _nvm_ctrl_put(struct controller* ctrl);
102 | 
103 | 
104 | 
105 | /*
106 |  * Convenience macro to get the controller type.
107 |  */
108 | #define _nvm_ctrl_type(ctrl) _nvm_container_of(ctrl, struct controller, handle)->type
109 | 
110 | #endif /* __NVM_INTERNAL_CTRL_H__ */
111 | 


--------------------------------------------------------------------------------
/src/lib_util.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_UTIL_H__
 2 | #define __NVM_INTERNAL_UTIL_H__
 3 | 
 4 | #include <nvm_util.h>
 5 | #include <stddef.h>
 6 | #include <stdint.h>
 7 | 
 8 | #if defined( __unix__ )
 9 | #include <time.h>
10 | #include <unistd.h>
11 | #endif
12 | 
13 | #ifndef NDEBUG
14 | #include <string.h>
15 | #include <errno.h>
16 | #include "dprintf.h"
17 | #endif
18 | 
19 | 
20 | /* Get the containing struct */
21 | #if defined( __clang__ ) || defined( __GNUC__ )
22 | #define _nvm_container_of(ptr, type, member) ({                 \
23 |         const typeof( ((type *) 0)->member )* __mptr = (ptr);   \
24 |         (type *) (((unsigned char*) __mptr) - offsetof(type, member)); })
25 | #else
26 | #define _nvm_container_of(ptr, type, member) \
27 |     ((type *) (((unsigned char*) (ptr)) - ((unsigned char*) (&((type *) 0)->member))))
28 | #endif
29 | 
30 | 
31 | /* Get minimum of two values */
32 | #define _MIN(a, b) ( (a) <= (b) ? (a) : (b) )
33 | 
34 | 
35 | /* Get the maximum of two values */
36 | #define _MAX(a, b) ( (a) > (b) ? (a) : (b) )
37 | 
38 | 
39 | 
40 | /* Calculate the base-2 logarithm of a number n */
41 | static inline uint32_t _nvm_b2log(uint32_t n)
42 | {
43 |     uint32_t count = 0;
44 | 
45 |     while (n > 0)
46 |     {
47 |         ++count;
48 |         n >>= 1;
49 |     }
50 | 
51 |     return count - 1;
52 | }
53 | 
54 | 
55 | #if defined( __unix__ )
56 | /* Delay the minimum of one millisecond and a time remainder */
57 | static inline uint64_t _nvm_delay_remain(uint64_t remaining_nanoseconds)
58 | {
59 |     struct timespec ts;
60 | 
61 |     if (remaining_nanoseconds == 0)
62 |     {
63 |         return 0;
64 |     }
65 | 
66 |     ts.tv_sec = 0;
67 |     ts.tv_nsec = _MIN(1000000UL, remaining_nanoseconds);
68 | 
69 |     clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL);
70 | 
71 |     remaining_nanoseconds -= _MIN(1000000UL, remaining_nanoseconds);
72 |     return remaining_nanoseconds;
73 | }
74 | #endif
75 | 
76 | 
77 | #if defined( __unix__ )
78 | /* Get the system page size */
79 | static inline size_t _nvm_host_page_size()
80 | {
81 |     long page_size = sysconf(_SC_PAGESIZE);
82 | 
83 | #ifndef NDEBUG
84 |     if (page_size < 0)
85 |     {
86 |         dprintf("Failed to look up system page size: %s\n", strerror(errno));
87 |         return 0;
88 |     }
89 | #endif
90 | 
91 |     return page_size;
92 | }
93 | #else
94 | #define _nvm_host_page_size()   0x1000
95 | #endif
96 | 
97 | 
98 | #endif /* __NVM_INTERNAL_UTIL_H__ */
99 | 


--------------------------------------------------------------------------------
/src/linux/device.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef __linux__
  2 | #error "Must compile for Linux"
  3 | #endif
  4 | 
  5 | #include <nvm_types.h>
  6 | #include <nvm_ctrl.h>
  7 | #include <nvm_ctrl.h>
  8 | #include <nvm_util.h>
  9 | #include <stdint.h>
 10 | #include <stddef.h>
 11 | #include <stdbool.h>
 12 | #include <stdlib.h>
 13 | #include <errno.h>
 14 | #include <sys/ioctl.h>
 15 | #include <unistd.h>
 16 | #include <fcntl.h>
 17 | #include <sys/mman.h>
 18 | #include <stdio.h>
 19 | #include "linux/map.h"
 20 | #include "linux/ioctl.h"
 21 | #include "lib_ctrl.h"
 22 | #include "dprintf.h"
 23 | 
 24 | 
 25 | 
 26 | /*
 27 |  * Device descriptor
 28 |  */
 29 | struct device
 30 | {
 31 |     int fd; /* ioctl file descriptor */
 32 | };
 33 | 
 34 | 
 35 | 
 36 | /*
 37 |  * Unmap controller memory and close file descriptor.
 38 |  */
 39 | static void release_device(struct device* dev, volatile void* mm_ptr, size_t mm_size)
 40 | {
 41 |     munmap((void*) mm_ptr, mm_size);
 42 |     close(dev->fd);
 43 |     free(dev);
 44 | }
 45 | 
 46 | 
 47 | 
 48 | /*
 49 |  * Call kernel module ioctl and map memory for DMA.
 50 |  */
 51 | static int ioctl_map(const struct device* dev, const struct va_range* va, uint64_t* ioaddrs)
 52 | {
 53 |     const struct ioctl_mapping* m = _nvm_container_of(va, struct ioctl_mapping, range);
 54 |     enum nvm_ioctl_type type;
 55 | 
 56 |     switch (m->type)
 57 |     {
 58 |         case MAP_TYPE_API:
 59 |         case MAP_TYPE_HOST:
 60 |             type = NVM_MAP_HOST_MEMORY;
 61 |             break;
 62 | 
 63 | #ifdef _CUDA
 64 |         case MAP_TYPE_CUDA:
 65 |             type = NVM_MAP_DEVICE_MEMORY;
 66 |             break;
 67 | #endif
 68 |         default:
 69 |             dprintf("Unknown memory type in map for device");
 70 |             return EINVAL;
 71 |     }
 72 | 
 73 |     struct nvm_ioctl_map request = {
 74 |         .vaddr_start = (uintptr_t) m->buffer,
 75 |         .n_pages = va->n_pages,
 76 |         .ioaddrs = ioaddrs
 77 |     };
 78 | 
 79 |     int err = ioctl(dev->fd, type, &request);
 80 |     if (err < 0)
 81 |     {
 82 |         dprintf("Page mapping kernel request failed (ptr=%p, n_pages=%zu): %s\n", 
 83 |                 m->buffer, va->n_pages, strerror(errno));
 84 |         return errno;
 85 |     }
 86 |     
 87 |     return 0;
 88 | }
 89 | 
 90 | 
 91 | 
 92 | /*
 93 |  * Call kernel module ioctl and unmap memory.
 94 |  */
 95 | static void ioctl_unmap(const struct device* dev, const struct va_range* va)
 96 | {
 97 |     const struct ioctl_mapping* m = _nvm_container_of(va, struct ioctl_mapping, range);
 98 |     uint64_t addr = (uintptr_t) m->buffer;
 99 |     
100 | 
101 |     int err = ioctl(dev->fd, NVM_UNMAP_MEMORY, &addr);
102 |     if (err < 0)
103 |     {
104 |         dprintf("Page unmapping kernel request failed: %s\n", strerror(errno));
105 |     }
106 | }
107 | 
108 | 
109 | 
110 | int nvm_ctrl_init(nvm_ctrl_t** ctrl, int filedes)
111 | {
112 |     int err;
113 |     struct device* dev;
114 |     const struct device_ops ops = {
115 |         .release_device = &release_device,
116 |         .map_range = &ioctl_map,
117 |         .unmap_range = &ioctl_unmap,
118 |     };
119 | 
120 |     *ctrl = NULL;
121 |     dev = (struct device*) malloc(sizeof(struct device));
122 |     if (dev == NULL)
123 |     {
124 |         dprintf("Failed to allocate device handle: %s\n", strerror(errno));
125 |         return ENOMEM;
126 |     }
127 | 
128 |     dev->fd = dup(filedes);
129 |     if (dev->fd < 0)
130 |     {
131 |         free(dev);
132 |         dprintf("Could not duplicate file descriptor: %s\n", strerror(errno));
133 |         return errno;
134 |     }
135 | 
136 |     err = fcntl(dev->fd, F_SETFD, O_RDWR);
137 |     if (err == -1)
138 |     {
139 |         close(dev->fd);
140 |         free(dev);
141 |         dprintf("Failed to set file descriptor control: %s\n", strerror(errno));
142 |         return errno;
143 |     }
144 | 
145 |     const size_t mm_size = NVM_CTRL_MEM_MINSIZE;
146 |     void* mm_ptr = mmap(NULL, mm_size, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE|MAP_LOCKED, dev->fd, 0);
147 |     if (mm_ptr == NULL)
148 |     {
149 |         close(dev->fd);
150 |         free(dev);
151 |         dprintf("Failed to map device memory: %s\n", strerror(errno));
152 |         return errno;
153 |     }
154 | 
155 |     err = _nvm_ctrl_init(ctrl, dev, &ops, DEVICE_TYPE_IOCTL, mm_ptr, mm_size);
156 |     if (err != 0)
157 |     {
158 |         release_device(dev, mm_ptr, mm_size);
159 |         return err;
160 |     }
161 | 
162 |     return 0;
163 | }
164 | 
165 | 


--------------------------------------------------------------------------------
/src/linux/dma.cpp:
--------------------------------------------------------------------------------
  1 | #ifndef __linux__
  2 | #error "Must compile for Linux"
  3 | #endif
  4 | 
  5 | #ifdef _CUDA
  6 | #ifndef __CUDA__
  7 | #define __CUDA__
  8 | #endif
  9 | #endif
 10 | 
 11 | #include <nvm_types.h>
 12 | #include <nvm_util.h>
 13 | #include <nvm_dma.h>
 14 | #include <stdint.h>
 15 | #include <stddef.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <unistd.h>
 19 | #include <errno.h>
 20 | #include "lib_util.h"
 21 | #include "lib_ctrl.h"
 22 | #include "dma.h"
 23 | #include "linux/map.h"
 24 | #include "dprintf.h"
 25 | 
 26 | 
 27 | 
 28 | static void remove_mapping_descriptor(struct ioctl_mapping* md)
 29 | {
 30 |     if (md->type == MAP_TYPE_API)
 31 |     {
 32 |         free((void*) md->buffer);
 33 |     }
 34 | 
 35 |     free(md);
 36 | }
 37 | 
 38 | 
 39 | 
 40 | static void release_mapping_descriptor(struct va_range* va)
 41 | {
 42 |     remove_mapping_descriptor(_nvm_container_of(va, struct ioctl_mapping, range));
 43 | }
 44 | 
 45 | 
 46 | 
 47 | static int create_mapping_descriptor(struct ioctl_mapping** handle, size_t page_size, enum mapping_type type, void* buffer, size_t size)
 48 | {
 49 |     size_t n_pages = NVM_PAGE_ALIGN(size, page_size) / page_size;
 50 |     if (n_pages == 0)
 51 |     {
 52 |         return EINVAL;
 53 |     }
 54 | 
 55 |     struct ioctl_mapping* md = (struct ioctl_mapping*) malloc(sizeof(struct ioctl_mapping));
 56 |     if (md == NULL)
 57 |     {
 58 |         dprintf("Failed to allocate mapping descriptor: %s\n", strerror(errno));
 59 |         return errno;
 60 |     }
 61 | 
 62 |     md->type = type;
 63 |     md->buffer = buffer;
 64 |     md->range.remote = false;
 65 |     md->range.vaddr = (volatile void*) buffer;
 66 |     md->range.page_size = page_size;
 67 |     md->range.n_pages = n_pages;
 68 | 
 69 |     *handle = md;
 70 |     return 0;
 71 | }
 72 | 
 73 | 
 74 | 
 75 | int nvm_dma_create(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, size_t size)
 76 | {
 77 |     void* buffer;
 78 |     struct ioctl_mapping* md;
 79 | 
 80 |     size = NVM_CTRL_ALIGN(ctrl, size);
 81 |     if (size == 0)
 82 |     {
 83 |         return EINVAL;
 84 |     }
 85 | 
 86 |     *handle = NULL;
 87 |     if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL)
 88 |     {
 89 |         return EBADF;
 90 |     }
 91 | 
 92 |     int err = posix_memalign(&buffer, ctrl->page_size, size);
 93 |     if (err != 0)
 94 |     {
 95 |         dprintf("Failed to allocate page-aligned memory buffer: %s\n", strerror(err));
 96 |         return err;
 97 |     }
 98 | 
 99 |     err = create_mapping_descriptor(&md, ctrl->page_size, MAP_TYPE_API, buffer, size);
100 |     if (err != 0)
101 |     {
102 |         free(buffer);
103 |         return err;
104 |     }
105 | 
106 |     err = _nvm_dma_init(handle, ctrl, &md->range, &release_mapping_descriptor);
107 |     if (err != 0)
108 |     {
109 |         remove_mapping_descriptor(md);
110 |         return err;
111 |     }
112 | 
113 |     return 0;
114 | }
115 | 
116 | 
117 | 
118 | int nvm_dma_map_host(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* vaddr, size_t size)
119 | {
120 |     struct ioctl_mapping* md;
121 |     *handle = NULL;
122 | 
123 |     size = NVM_CTRL_ALIGN(ctrl, size);
124 |     if (size == 0)
125 |     {
126 |         return EINVAL;
127 |     }
128 | 
129 |     if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL)
130 |     {
131 |         return EBADF;
132 |     }
133 | 
134 |     int err = create_mapping_descriptor(&md, ctrl->page_size, MAP_TYPE_HOST, vaddr, size);
135 |     if (err != 0)
136 |     {
137 |         return err;
138 |     }
139 | 
140 |     err = _nvm_dma_init(handle, ctrl, &md->range, &release_mapping_descriptor);
141 |     if (err != 0)
142 |     {
143 |         remove_mapping_descriptor(md);
144 |         return err;
145 |     }
146 | 
147 |     return 0;
148 | }
149 | 
150 | 
151 | 
152 | #ifdef _CUDA
153 | int nvm_dma_map_device(nvm_dma_t** handle, const nvm_ctrl_t* ctrl, void* devptr, size_t size)
154 | {
155 |     struct ioctl_mapping* md;
156 |     *handle = NULL;
157 | 
158 |     if (_nvm_ctrl_type(ctrl) != DEVICE_TYPE_IOCTL)
159 |     {
160 |         return EBADF;
161 |     }
162 | 
163 |     int err = create_mapping_descriptor(&md, 1ULL << 16, MAP_TYPE_CUDA, devptr, size);
164 |     if (err != 0)
165 |     {
166 |         return err;
167 |     }
168 | 
169 |     err = _nvm_dma_init(handle, ctrl, &md->range, &release_mapping_descriptor);
170 |     if (err != 0)
171 |     {
172 |         remove_mapping_descriptor(md);
173 |         return err;
174 |     }
175 | 
176 |     return 0;
177 | }
178 | #endif
179 | 


--------------------------------------------------------------------------------
/src/linux/ioctl.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_LINUX_IOCTL_H__
 2 | #define __NVM_INTERNAL_LINUX_IOCTL_H__
 3 | #ifdef __linux__
 4 | 
 5 | #include <linux/types.h>
 6 | #include <asm/ioctl.h>
 7 | 
 8 | #define NVM_IOCTL_TYPE          0x80
 9 | 
10 | 
11 | 
12 | /* Memory map request */
13 | struct nvm_ioctl_map
14 | {
15 |     uint64_t    vaddr_start;
16 |     size_t      n_pages;
17 |     uint64_t*   ioaddrs;
18 | };
19 | 
20 | 
21 | 
22 | /* Supported operations */
23 | enum nvm_ioctl_type
24 | {
25 |     NVM_MAP_HOST_MEMORY         = _IOW(NVM_IOCTL_TYPE, 1, struct nvm_ioctl_map),
26 | #ifdef _CUDA
27 |     NVM_MAP_DEVICE_MEMORY       = _IOW(NVM_IOCTL_TYPE, 2, struct nvm_ioctl_map),
28 | #endif
29 |     NVM_UNMAP_MEMORY            = _IOW(NVM_IOCTL_TYPE, 3, uint64_t)
30 | };
31 | 
32 | 
33 | #endif /* __linux__ */
34 | #endif /* __NVM_INTERNAL_LINUX_IOCTL_H__ */
35 | 


--------------------------------------------------------------------------------
/src/linux/map.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_LINUX_MAP_H__
 2 | #define __NVM_INTERNAL_LINUX_MAP_H__
 3 | #ifdef __linux__
 4 | 
 5 | #include "linux/ioctl.h"
 6 | #include "dma.h"
 7 | 
 8 | 
 9 | /*
10 |  * What kind of memory are we mapping.
11 |  */
12 | enum mapping_type
13 | {
14 |     MAP_TYPE_CUDA   =   0x1,   // CUDA device memory
15 |     MAP_TYPE_HOST   =   0x2,   // Host memory (RAM)
16 |     MAP_TYPE_API    =   0x4    // Allocated by the API (RAM)
17 | };
18 | 
19 | 
20 | 
21 | /*
22 |  * Mapping container
23 |  */
24 | struct ioctl_mapping
25 | {
26 |     enum mapping_type   type;   // What kind of memory
27 |     void*               buffer;
28 |     struct va_range     range;  // Memory range descriptor
29 | };
30 | 
31 | 
32 | #endif /* __linux__ */
33 | #endif /* __NVM_INTERNAL_LINUX_MAP_H__ */
34 | 


--------------------------------------------------------------------------------
/src/mutex.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef __unix__
 2 | #include <pthread.h>
 3 | #include <string.h>
 4 | #endif
 5 | 
 6 | #include "mutex.h"
 7 | #include "dprintf.h"
 8 | 
 9 | 
10 | 
11 | #ifdef __unix__
12 | int _nvm_mutex_init(struct mutex* mtx)
13 | {
14 |     int err;
15 | 
16 |     err = pthread_mutex_init(&mtx->mutex, NULL);
17 |     if (err != 0)
18 |     {
19 |         dprintf("Failed to initialize mutex: %s\n", strerror(err));
20 |         return err;
21 |     }
22 | 
23 |     return 0;
24 | }
25 | #endif
26 | 
27 | 
28 | 
29 | #ifdef __unix__
30 | int _nvm_mutex_free(struct mutex* mtx)
31 | {
32 |     return pthread_mutex_destroy(&mtx->mutex);
33 | }
34 | #endif
35 | 
36 | 
37 | 
38 | #ifdef __unix__
39 | int _nvm_mutex_lock(struct mutex* mtx)
40 | {
41 |     pthread_mutex_lock(&mtx->mutex);
42 |     return 0;
43 | }
44 | #endif
45 | 
46 | 
47 | 
48 | #ifdef __unix__
49 | void _nvm_mutex_unlock(struct mutex* mtx)
50 | {
51 |     pthread_mutex_unlock(&mtx->mutex);
52 | }
53 | #endif
54 | 
55 | 


--------------------------------------------------------------------------------
/src/mutex.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_MUTEX_H__
 2 | #define __NVM_INTERNAL_MUTEX_H__
 3 | 
 4 | /* Forward declaration */
 5 | struct mutex;
 6 | 
 7 | 
 8 | /*
 9 |  * We currently only support OSes that have pthreads
10 |  */
11 | #if defined( __unix__ )
12 | #include <pthread.h>
13 | #else
14 | #error "OS is not supported"
15 | #endif
16 | 
17 | 
18 | 
19 | /*
20 |  * We don't want another level of indirection by
21 |  * hiding implementation and using pointers, so
22 |  * we expose the struct definition here.
23 |  */
24 | #if defined( __unix__ )
25 | struct mutex
26 | {
27 |     pthread_mutex_t mutex;
28 | };
29 | #endif
30 | 
31 | 
32 | 
33 | /*
34 |  * Initialize mutex handle.
35 |  */
36 | int _nvm_mutex_init(struct mutex* mtx);
37 | 
38 | 
39 | 
40 | /*
41 |  * Destroy mutex handle.
42 |  */
43 | int _nvm_mutex_free(struct mutex* mtx);
44 | 
45 | 
46 | 
47 | /*
48 |  * Enter critical section.
49 |  */
50 | int _nvm_mutex_lock(struct mutex* mtx);
51 | 
52 | 
53 | 
54 | /*
55 |  * Leave critical section.
56 |  */
57 | void _nvm_mutex_unlock(struct mutex* mtx);
58 | 
59 | 
60 | 
61 | #endif /* __NVM_INTERNAL_MUTEX_H__ */
62 | 


--------------------------------------------------------------------------------
/src/queue.cpp:
--------------------------------------------------------------------------------
 1 | #include <nvm_types.h>
 2 | #include <nvm_queue.h>
 3 | #include <nvm_util.h>
 4 | #include <stddef.h>
 5 | #include <stdint.h>
 6 | #include <time.h>
 7 | #include "regs.h"
 8 | #include "lib_util.h"
 9 | #include <simt/atomic>
10 | 
11 | 
12 | int nvm_queue_clear(nvm_queue_t* queue, const nvm_ctrl_t* ctrl, bool cq, uint16_t no, uint32_t qs, 
13 |         bool local, volatile void* vaddr, uint64_t ioaddr)
14 | {
15 |     if (qs < 2 || qs > 0x10000 || qs > ctrl->max_qs)
16 |     {
17 |         return EINVAL;
18 |     }
19 | 
20 |     queue->no = no;
21 |     queue->qs = qs;
22 |     queue->es = cq ? sizeof(nvm_cpl_t) : sizeof(nvm_cmd_t);
23 |     queue->head = 0;
24 |     queue->tail = 0;
25 |     queue->last = 0;
26 |     queue->phase = 1;
27 |     queue->local = !!local;
28 |     queue->head_lock = 0;
29 |     queue->tail_lock = 0;
30 |     // queue->head_copy = 0;
31 |     // queue->tail_copy = 0;
32 |     queue->in_ticket = 0;
33 |     queue->cid_ticket = 0;
34 | 
35 |     queue->db = (cq ? CQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd) : SQ_DBL(ctrl->mm_ptr, queue->no, ctrl->dstrd));
36 |     queue->vaddr = vaddr;
37 |     queue->ioaddr = ioaddr;
38 |     
39 |     return 0;
40 | }
41 | 
42 | 
43 | 
44 | void nvm_queue_reset(nvm_queue_t* queue)
45 | {
46 |     queue->head = 0;
47 |     queue->tail = 0;
48 |     queue->last = 0;
49 |     queue->phase = 1;
50 | }
51 | 
52 | 
53 | 
54 | nvm_cpl_t* nvm_cq_dequeue_block(nvm_queue_t* cq, uint64_t timeout)
55 | {
56 |     uint64_t nsecs = timeout * 1000000UL;
57 |     nvm_cpl_t* cpl = nvm_cq_dequeue(cq);
58 | 
59 |     while (cpl == NULL && nsecs > 0)
60 |     {
61 |         nsecs = _nvm_delay_remain(nsecs);
62 |         cpl = nvm_cq_dequeue(cq);
63 |     }
64 | 
65 |     return cpl;
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/src/regs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_REGS_H__
 2 | #define __NVM_INTERNAL_REGS_H__
 3 | 
 4 | #include <nvm_util.h>
 5 | #include <stdint.h>
 6 | 
 7 | 
 8 | /* Controller registers */
 9 | #define CAP(p)          _REG(p, 0x0000, 64)     // Controller Capabilities
10 | #define VER(p)          _REG(p, 0x0008, 32)     // NVM Express version
11 | #define CC(p)           _REG(p, 0x0014, 32)     // Controller Configuration
12 | #define CSTS(p)         _REG(p, 0x001c, 32)     // Controller Status
13 | #define AQA(p)          _REG(p, 0x0024, 32)     // Admin Queue Attributes
14 | #define ASQ(p)          _REG(p, 0x0028, 64)     // Admin Submission Queue Base Address
15 | #define ACQ(p)          _REG(p, 0x0030, 64)     // Admin Completion Queue Base Address
16 | 
17 | 
18 | /* Read bit fields */
19 | #define CAP$MPSMAX(p)   _RB(*CAP(p), 55, 52)    // Memory Page Size Maximum
20 | #define CAP$MPSMIN(p)   _RB(*CAP(p), 51, 48)    // Memory Page Size Minimum
21 | #define CAP$DSTRD(p)    _RB(*CAP(p), 35, 32)    // Doorbell Stride
22 | #define CAP$TO(p)       _RB(*CAP(p), 31, 24)    // Timeout
23 | #define CAP$CQR(p)      _RB(*CAP(p), 16, 16)    // Contiguous Queues Required
24 | #define CAP$MQES(p)     _RB(*CAP(p), 15,  0)    // Maximum Queue Entries Supported
25 | 
26 | #define CSTS$RDY(p)     _RB(*CSTS(p), 0,  0)    // Ready indicator
27 | 
28 | 
29 | /* Write bit fields */
30 | #define CC$IOCQES(v)    _WB(v, 23, 20)          // IO Completion Queue Entry Size
31 | #define CC$IOSQES(v)    _WB(v, 19, 16)          // IO Submission Queue Entry Size
32 | #define CC$MPS(v)       _WB(v, 10,  7)          // Memory Page Size
33 | #define CC$CSS(v)       _WB(0,  3,  1)          // IO Command Set Selected (0=NVM Command Set)
34 | #define CC$EN(v)        _WB(v,  0,  0)          // Enable
35 | 
36 | #define AQA$ACQS(v)     _WB(v, 27, 16)          // Admin Completion Queue Size
37 | #define AQA$ASQS(v)     _WB(v, 11,  0)          // Admin Submission Queue Size
38 | 
39 | 
40 | /* SQ doorbell register offset */
41 | #define SQ_DBL(p, y, dstrd)    \
42 |         ((volatile uint32_t*) (((volatile unsigned char*) (p)) + 0x1000 + ((2*(y)) * (4 << (dstrd)))) )
43 | 
44 | 
45 | /* CQ doorbell register offset */
46 | #define CQ_DBL(p, y, dstrd)    \
47 |         ((volatile uint32_t*) (((volatile unsigned char*) (p)) + 0x1000 + ((2*(y) + 1) * (4 << (dstrd)))) )
48 | 
49 | #endif /* __NVM_INTERNAL_REGS_H__ */
50 | 


--------------------------------------------------------------------------------
/src/rpc.h:
--------------------------------------------------------------------------------
 1 | #ifndef __NVM_INTERNAL_RPC_H__
 2 | #define __NVM_INTERNAL_RPC_H__
 3 | 
 4 | #include <nvm_types.h>
 5 | #include <nvm_queue.h>
 6 | #include <stdbool.h>
 7 | #include <stdint.h>
 8 | #include <pthread.h>
 9 | 
10 | 
11 | /* Forward declaration */
12 | struct nvm_admin_reference;
13 | 
14 | 
15 | 
16 | /*
17 |  * Callback to release a local binding handle.
18 |  */
19 | typedef void (*rpc_free_handle_t)(uint32_t key, void* data);
20 | 
21 | 
22 | 
23 | /*
24 |  * Callback to release a remote binding reference.
25 |  */
26 | typedef void (*rpc_free_binding_t)(void* data);
27 | 
28 | 
29 | 
30 | /*
31 |  * RPC client-side stub definition.
32 |  * 
33 |  * Should perform the following actions.
34 |  *      - marshal command
35 |  *      - send command to remote host
36 |  *      - wait for completion (or timeout)
37 |  *      - unmarshal completion and return status
38 |  */
39 | typedef int (*rpc_stub_t)(void*, nvm_cmd_t*, nvm_cpl_t*);
40 | 
41 | 
42 | 
43 | /*
44 |  * Allocate a reference wrapper and increase controller reference.
45 |  */
46 | int _nvm_ref_get(nvm_aq_ref* handle, const nvm_ctrl_t* ctrl);
47 | 
48 | 
49 | 
50 | /*
51 |  * Free reference wrapper and decrease controller reference.
52 |  */
53 | void _nvm_ref_put(nvm_aq_ref ref);
54 | 
55 | 
56 | 
57 | /*
58 |  * Insert binding handle to server's list of handles.
59 |  * If key is already found, this function will fail.
60 |  */
61 | int _nvm_rpc_handle_insert(nvm_aq_ref ref, uint32_t key, void* data, rpc_free_handle_t release);
62 | 
63 | 
64 | 
65 | /*
66 |  * Remove local binding handle.
67 |  * This function will call the release callback.
68 |  */
69 | void _nvm_rpc_handle_remove(nvm_aq_ref ref, uint32_t key);
70 | 
71 | 
72 | 
73 | /*
74 |  * Bind reference to remote handle.
75 |  */
76 | int _nvm_rpc_bind(nvm_aq_ref ref, void* data, rpc_free_binding_t release, rpc_stub_t stub);
77 | 
78 | 
79 | 
80 | /*
81 |  * Execute a local admin command.
82 |  */
83 | int _nvm_local_admin(nvm_aq_ref ref, const nvm_cmd_t* cmd, nvm_cpl_t* cpl);
84 | 
85 | 
86 | 
87 | #endif /* __NVM_INTERNAL_RPC_H__ */
88 | 


--------------------------------------------------------------------------------