├── .clang-format ├── include ├── alias_table.cuh ├── kernel.cuh ├── instance.cuh ├── app.cuh ├── roller.cuh └── frontier.cuh ├── old ├── shmem │ ├── alias │ ├── Makefile │ ├── alias_table.cu │ ├── tmp.cu │ ├── util.cu │ ├── util.cuh │ ├── vec.cuh │ └── alias_table.cuh ├── Makefile ├── vec.cuh ├── common.cuh ├── alias.cu └── graph.cuh ├── tools ├── getDegree ├── Makefile ├── getsnapgraph.sh ├── drop-caches.sh ├── getwebgraph.sh ├── getDegree.cu └── gr2npz.cu ├── .gitmodules ├── scripts ├── saint.sh ├── debug.sh ├── data.sh ├── simple_args_parsing.sh ├── numa.sh ├── graphwalker.sh ├── mem_test.sh ├── trw-biased.sh ├── my.sh ├── biased.sh ├── trw-unbiased.sh ├── trans.py ├── trans2.py ├── csaw.sh ├── multiple-gpu.sh ├── fig8_biased.sh ├── table3_unbiased.sh └── test.sh ├── .gitignore ├── src ├── api │ ├── bias_static.cu │ ├── bias_degree.cu │ └── bias_node2vec.cu ├── kernel.cu ├── util.cu ├── createTable.cu ├── online_sample.cu ├── offline_walk.cu └── online_sample_twc.cu ├── Makefile ├── .vscode ├── c_cpp_properties.json ├── tasks.json ├── launch.json └── settings.json ├── cmake └── FindNuma.cmake ├── result ├── nextdoor_unbias.sh ├── nextdoor.sh └── knightking.sh ├── test ├── tmp.cu └── alias_table.cu ├── figs ├── dynamic.sh ├── with_nextdoor.sh ├── offline.sh ├── online.sh ├── unbiased.sh ├── test_driver.sh ├── v100.sh ├── scale.sh └── spec.sh ├── CMakeLists.txt └── README.md /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google -------------------------------------------------------------------------------- /include/alias_table.cuh: -------------------------------------------------------------------------------- 1 | # include "alias_table_new.cuh" -------------------------------------------------------------------------------- /old/shmem/alias: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wpybtw/Skywalker/HEAD/old/shmem/alias -------------------------------------------------------------------------------- /tools/getDegree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wpybtw/Skywalker/HEAD/tools/getDegree -------------------------------------------------------------------------------- /old/Makefile: -------------------------------------------------------------------------------- 1 | all: alias 2 | 3 | 4 | alias: alias.cu alias.cuh #-arch=sm_75 5 | nvcc alias.cu -G -o alias -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "others/C-SAW"] 2 | path = others/C-SAW 3 | url = https://github.com/concept-inversion/C-SAW 4 | [submodule "deps/gflags"] 5 | path = deps/gflags 6 | url = https://github.com/gflags/gflags 7 | -------------------------------------------------------------------------------- /scripts/saint.sh: -------------------------------------------------------------------------------- 1 | cd GraphSAINT 2 | python -m graphsaint.tensorflow_version.train --data_prefix ./data/amazon --train_config ./train_config/table2/amazon_2_rw.yml --gpu -1 3 | 4 | python -m graphsaint.tensorflow_version.train --data_prefix ./data/ppi --train_config ./train_config/table2/ppi2_rw.yml --gpu -1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | C-SAW/* 2 | alias-method/* 3 | build/* 4 | bin/* 5 | deps/* 6 | bin_old/* 7 | 8 | perf_result/* 9 | .clangd/* 10 | .VSCodeCounter/* 11 | 12 | *.o 13 | *.cubin 14 | *.fatbin 15 | *.fatbin.* 16 | *.reg.cu 17 | *.ii 18 | *.gpu 19 | *.stub.c 20 | *.module_id 21 | *.cudafe1.* 22 | *.reg.c 23 | *.ptx 24 | *.ncu-rep 25 | -------------------------------------------------------------------------------- /scripts/debug.sh: -------------------------------------------------------------------------------- 1 | cuobjdump -sass ./sampler_high_degree.cubin >dump.txt 2 | 3 | nvdisasm ./sampler_high_degree.cubin -g > dump.disasm 4 | 5 | cuda-gdb ./main_gbuffer lj ~/data/orkut.w.edge_beg_pos.bin ~/data/orkut.w.edge_csr.bin 100 32 1 1 2 2 1 6 | 7 | set cuda memcheck on 8 | r lj ~/data/orkut.w.edge_beg_pos.bin ~/data/orkut.w.edge_csr.bin 100 32 10 1 2 2 1 -------------------------------------------------------------------------------- /include/kernel.cuh: -------------------------------------------------------------------------------- 1 | #include "sampler.cuh" 2 | #include "sampler_result.cuh" 3 | 4 | __global__ void init_kernel_ptr(Sampler *sampler, bool biasInit); 5 | __global__ void init_kernel_ptr(Walker *sampler, bool biasInit); 6 | // __global__ void initSeed(ResultBase *results, uint *seeds, size_t 7 | // size); 8 | 9 | __device__ bool AddTillSize(uint *size, size_t target_size); 10 | 11 | __global__ void BindResultKernel(Walker *walker); -------------------------------------------------------------------------------- /old/shmem/Makefile: -------------------------------------------------------------------------------- 1 | all: util alias_table alias 2 | 3 | CUFLAG= -G -g -rdc=true -gencode=arch=compute_75,code=sm_75 -std=c++11 4 | 5 | util: util.cuh util.cu 6 | nvcc util.cu $(CUFLAG) -c 7 | 8 | alias_table: alias_table.cuh alias_table.cu 9 | nvcc alias_table.cu $(CUFLAG) -c 10 | 11 | 12 | alias: tmp.cu util.o alias_table.o #-arch=sm_75 13 | nvcc tmp.cu util.o alias_table.o $(CUFLAG) -o alias 14 | 15 | clean: 16 | rm *.o -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | all: getDegree 2 | # util sampler main 3 | 4 | CUFLAG= -I../include -rdc=true -gencode=arch=compute_75,code=sm_75 -std=c++11 -res-usage -lineinfo -Xptxas -v #-keep #-Xptxas -O3,-v 5 | debug: CUFLAG += -G -g 6 | debug: main main_gbuffer main_spliced 7 | 8 | # main: main.cu util.cu sampler.cu 9 | 10 | getDegree: getDegree.cu ../src/util.cu 11 | nvcc getDegree.cu ../src/util.cu $(CUFLAG) -o getDegree 12 | 13 | -------------------------------------------------------------------------------- /src/api/bias_static.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * @Description: 3 | * @Date: 2020-12-03 16:46:11 4 | * @LastEditors: PengyuWang 5 | * @LastEditTime: 2020-12-27 20:41:22 6 | * @FilePath: /sampling/src/api/bias_static.cu 7 | */ 8 | #include "gpu_graph.cuh" 9 | DEFINE_bool(weight, true, "load edge weight from file"); 10 | // DEFINE_bool(bias, true, "biased or unbiased sampling"); 11 | 12 | __device__ float gpu_graph::getBias(edge_t dst, uint src, uint idx) { 13 | return adjwgt[dst]; 14 | } 15 | __device__ void gpu_graph::UpdateWalkerState(uint idx, uint info){} -------------------------------------------------------------------------------- /tools/getsnapgraph.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2021-01-04 22:39:00 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-10 14:24:09 6 | # @FilePath: /skywalker/scripts/getsnapgraph.sh 7 | ### 8 | 9 | cd ~/data 10 | wget http://data.law.di.unimi.it/webdata/$1/$1.properties 11 | # mv $1.txt $1 12 | # python2 ~/graph/gunrock/tools/associate_weights.py ~/data/$1 13 | 14 | # mv $1.random.weight.mtx $1.w.edge 15 | # ~/graph/Galois/build/tools/graph-convert/graph-convert -edgelist2gr ~/data/$1.w.edge ~/data/$1.w.gr -edgeType=uint32 -------------------------------------------------------------------------------- /src/api/bias_degree.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * @Description: 3 | * @Date: 2020-12-08 17:22:17 4 | * @LastEditors: PengyuWang 5 | * @LastEditTime: 2020-12-27 20:41:05 6 | * @FilePath: /sampling/src/api/bias_degree.cu 7 | */ 8 | 9 | #include "gpu_graph.cuh" 10 | DEFINE_bool(weight, false, "load edge weight from file"); 11 | // DEFINE_bool(bias, false, "biased or unbiased sampling"); 12 | 13 | __device__ float gpu_graph::getBias(uint dst, uint src, uint idx) { 14 | // printf("degree\t"); 15 | return xadj[dst + 1] - xadj[dst]; 16 | } 17 | __device__ void gpu_graph::UpdateWalkerState(uint idx, uint info){} -------------------------------------------------------------------------------- /tools/drop-caches.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2021-01-10 22:36:32 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-10 22:37:02 6 | # @FilePath: /skywalker/tools/drop-caches.sh 7 | ### 8 | # Mark it as executable using chmod a+x drop-caches 9 | # Call it using sudo ./drop-caches 10 | # If you place the script in /usr/local/bin you can call it using sudo drop-caches 11 | 12 | #!/bin/bash 13 | if [[ $(id -u) -ne 0 ]] ; then echo "Please run as root" ; exit 1 ; fi 14 | sync; echo 1 > /proc/sys/vm/drop_caches 15 | sync; echo 2 > /proc/sys/vm/drop_caches 16 | sync; echo 3 > /proc/sys/vm/drop_caches -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: build 2 | debug: debug 3 | 4 | 5 | .PHONY:build debug test 6 | 7 | SRC_DIR:= src 8 | SRC_FILES := $(wildcard $(SRC_DIR)/*.cu) 9 | HEADER_FILES := $(wildcard include/*.cuh) 10 | 11 | build: 12 | -mkdir build;cd build;cmake ..;make -j 13 | 14 | debug: 15 | -mkdir build;cd build;cmake .. -DCMAKE_BUILD_TYPE=Debug;make -j 16 | 17 | test: 18 | ./build/skywalker --k 2 --d 2 --ol=1 --input ~/data/lj.w.gr --ngpu=4 --hd=1 --n=400000 19 | ./build/skywalker -bias=1 --ol=1 --ngpu=1 --s --sage --input ~/data/orkut.w.gr -v 20 | ./build/skywalker -bias=1 --ol=0 --ngpu=1 --s --sage --input ~/data/orkut.w.gr -v 21 | 22 | 23 | clean: 24 | cd build;make clean -------------------------------------------------------------------------------- /tools/getwebgraph.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2021-01-04 22:39:00 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-04 22:52:19 6 | # @FilePath: /sampling/scripts/downloadgraph.sh 7 | ### 8 | 9 | cd ~/data 10 | 11 | 12 | wget http://data.law.di.unimi.it/webdata/$1/$1.properties 13 | wget http://data.law.di.unimi.it/webdata/$1/$1.graph 14 | 15 | cd webgraph-big-3.5.1 16 | 17 | java -cp "*" it.unimi.dsi.webgraph.ArcListASCIIGraph ../$1 ../$1 18 | 19 | cd .. 20 | python2 ~/graph/gunrock/tools/associate_weights.py ~/data/$1 21 | 22 | mv $1.random.weight.mtx $1.w.edge 23 | ~/graph/Galois/build/tools/graph-convert/graph-convert -edgelist2gr ~/data/$1.w.edge ~/data/$1.w.gr -edgeType=uint32 -------------------------------------------------------------------------------- /scripts/data.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2020-11-17 13:30:33 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-10 14:22:31 6 | # @FilePath: /skywalker/scripts/data.sh 7 | ### 8 | DATA=( sk-2005 friendster) # $orkut uk-union rmat29 web-ClueWeb09) twitter-2010 9 | # lj orkut web-Google uk-2005 10 | 11 | ED=".w.edge" 12 | 13 | EL=".el" 14 | 15 | 16 | # # for c-saw 17 | # for idx in $(seq 1 ${#DATA[*]}) 18 | # do 19 | # ~/graph_project_start/tuple_text_to_binary_csr_mem/text_to_bin.bin ~/data/${DATA[idx-1]}${ED} 0 0 40 20 | # done 21 | 22 | 23 | for idx in $(seq 1 ${#DATA[*]}) 24 | do 25 | ~/sampling/KnightKing/build/bin/gconverter -i ~/data/${DATA[idx-1]}${EL} -o ~/data/${DATA[idx-1]}.uw.data -s unweighted 26 | done -------------------------------------------------------------------------------- /.vscode/c_cpp_properties.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "Linux", 5 | "includePath": [ 6 | // "${workspaceFolder}/**", 7 | "${workspaceFolder}/include/**", 8 | // "${workspaceFolder}/build/deps/gflags/include/gflags", 9 | "${workspaceFolder}/build/deps/gflags/include", 10 | "/usr/local/cuda/include", 11 | "/usr/local/cuda-11.0/targets/x86_64-linux/include" 12 | // "/usr/lib/gcc/x86_64-linux-gnu/7/include" 13 | ], 14 | "defines": [], 15 | "compilerPath": "/usr/bin/clang", // "/usr/local/cuda/bin/nvcc", // 16 | "cStandard": "c11", 17 | "cppStandard": "c++17", 18 | "intelliSenseMode": "gcc-x64", 19 | "compileCommands": "${workspaceFolder}/compile_commands.json" 20 | } 21 | ], 22 | "version": 4 23 | } 24 | -------------------------------------------------------------------------------- /src/api/bias_node2vec.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * @Author: Pengyu Wang 3 | * @Date: 2020-12-08 17:22:17 4 | * @LastEditTime: 2020-12-27 20:01:54 5 | * @Description: 6 | * @FilePath: /sampling/src/api/bias_node2vec.cu 7 | */ 8 | 9 | #include 10 | #include "gpu_graph.cuh" 11 | 12 | DEFINE_bool(weight, true, "load edge weight from file"); 13 | // DEFINE_bool(bias, true, "biased or unbiased sampling"); 14 | 15 | __device__ float gpu_graph::getBias(edge_t dst, uint src, uint idx) { 16 | // if(LID==0) 17 | // printf("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__); 18 | if (this->result->state[idx].last == dst) { 19 | return adjwgt[dst] / this->result->p; 20 | } else if (CheckConnect(this->result->state[idx].last, dst)) { 21 | // printf("Connect\t"); 22 | return adjwgt[dst]; 23 | } else { 24 | // printf("NotConnect\t"); 25 | return adjwgt[dst] / this->result->q; 26 | } 27 | } 28 | __device__ void gpu_graph::UpdateWalkerState(uint idx, uint info){ 29 | this->result->state[idx].last = info; 30 | } -------------------------------------------------------------------------------- /scripts/simple_args_parsing.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # a simple way to parse shell script arguments 5 | # 6 | # please edit and use to your hearts content 7 | # 8 | 9 | 10 | ENVIRONMENT="dev" 11 | DB_PATH="/data/db" 12 | 13 | function usage() 14 | { 15 | echo "if this was a real script you would see something useful here" 16 | echo "" 17 | echo "./simple_args_parsing.sh" 18 | echo "\t-h --help" 19 | echo "\t--environment=$ENVIRONMENT" 20 | echo "\t--db-path=$DB_PATH" 21 | echo "" 22 | } 23 | 24 | while [ "$1" != "" ]; do 25 | PARAM=`echo $1 | awk -F= '{print $1}'` 26 | VALUE=`echo $1 | awk -F= '{print $2}'` 27 | case $PARAM in 28 | -h | --help) 29 | usage 30 | exit 31 | ;; 32 | --environment) 33 | ENVIRONMENT=$VALUE 34 | ;; 35 | --db-path) 36 | DB_PATH=$VALUE 37 | ;; 38 | *) 39 | echo "ERROR: unknown parameter \"$PARAM\"" 40 | usage 41 | exit 1 42 | ;; 43 | esac 44 | shift 45 | done 46 | 47 | 48 | echo "ENVIRONMENT is $ENVIRONMENT"; 49 | echo "DB_PATH is $DB_PATH"; -------------------------------------------------------------------------------- /scripts/numa.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: https://stackoverflow.com/questions/16056800/multi-gpu-programming-using-cuda-on-a-numa-machine 3 | # @Date: 2020-12-29 18:14:52 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2020-12-29 18:16:18 6 | # @FilePath: /sampling/scripts/numa.sh 7 | ### 8 | #!/bin/bash 9 | #this script will output a listing of each GPU and it's CPU core affinity mask 10 | file="/proc/driver/nvidia/gpus/0000:3d:00.0/information" 11 | if [ ! -e $file ]; then 12 | echo "Unable to locate any GPUs!" 13 | else 14 | gpu_num=0 15 | file="/proc/driver/nvidia/gpus/$gpu_num/information" 16 | if [ "-v" == "$1" ]; then echo "GPU: CPU CORE AFFINITY MASK: PCI:"; fi 17 | while [ -e $file ] 18 | do 19 | line=`grep "Bus Location" $file | { read line; echo $line; }` 20 | pcibdf=${line:14} 21 | pcibd=${line:14:7} 22 | file2="/sys/class/pci_bus/$pcibd/cpuaffinity" 23 | read line2 < $file2 24 | if [ "-v" == "$1" ]; then 25 | echo " $gpu_num $line2 $pcibdf" 26 | else 27 | echo " $gpu_num $line2 " 28 | fi 29 | gpu_num=`expr $gpu_num + 1` 30 | file="/proc/driver/nvidia/gpus/$gpu_num/information" 31 | done 32 | fi -------------------------------------------------------------------------------- /scripts/graphwalker.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2021-01-07 19:01:38 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-10 18:34:44 6 | # @FilePath: /skywalker/scripts/graphwalker.sh 7 | ### 8 | DATA=( lj.w.edge arabic-2005.w.edge uk-2005.w.edge sk-2005 friendster.w.edge) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 9 | NV=( 4847571 22744077 39459923 50636151 124836180) 10 | 11 | # DATA=( uk-2005.w.edge) 12 | # NV=( 39459923 ) 13 | 14 | # grep "00_runtime\|g_loadSubGraph:\|file:" 15 | 16 | ITR=1 17 | 18 | ED=".w.edge" 19 | EXE="./bin/apps/rwdomination" #main_degree 20 | DIR="/home/pywang/sampling/GraphWalker" 21 | 22 | cd $DIR 23 | # ${EXE} file ~/data/${DATA[idx-1]}${ED} firstsource 0 numsources 400000 walkspersource 1 maxwalklength 100 prob 0.0 L 100 N 4847571 24 | echo "-------------------------------------------------------unbias rw 40000 100" 25 | for idx in $(seq 1 ${#DATA[*]}) 26 | do 27 | ./bin/apps/rawrandomwalks file ~/data/${DATA[idx-1]} R 40000 L 100 N ${NV[idx-1]} 28 | done 29 | 30 | # echo "-------------------------------------------------------unbias ppr 40000 100" 31 | # for idx in $(seq 1 ${#DATA[*]}) 32 | # do 33 | # ./bin/apps/msppr file ~/data/${DATA[idx-1]} firstsource 0 numsources 40000 walkspersource 1 maxwalklength 100 prob 0.15 34 | # done 35 | 36 | 37 | -------------------------------------------------------------------------------- /old/vec.cuh: -------------------------------------------------------------------------------- 1 | 2 | template class Vector_itf{ 3 | public: 4 | Vector(){} 5 | ~Vector(){} 6 | virtual void init(){} 7 | virtual void add(){} 8 | virtual void clean(){} 9 | virtual bool empty(){} 10 | virtual size_t size(){} 11 | virtial T &operator[](int id){} 12 | }; 13 | 14 | 15 | template class Vector { 16 | size_t *size; 17 | size_t *capacity; 18 | T *data = nullptr; 19 | // bool use_self_buffer = false; 20 | // T data[VECTOR_SHMEM_SIZE]; 21 | Vector() {} 22 | __host__ ~Vector() { 23 | if (use_self_buffer && data != nullptr) 24 | cudaFree(data); 25 | } 26 | __host__ Vector(int _capacity) { 27 | cudaMallocManaged(&size, sizeof(size_t)); 28 | cudaMallocManaged(&capacity, sizeof(size_t)); 29 | *capacity = _capacity; 30 | *size=0; 31 | cudaMalloc(&data, _capacity * sizeof(T)); 32 | use_self_buffer = true; 33 | } 34 | __host__ __device__ size_t& size(){ 35 | return *size; 36 | } 37 | __device__ void add(T t) { 38 | size_t old = atomicAdd(size, 1); 39 | if (old < *capacity) 40 | data[old] = t; 41 | else 42 | printf("wtf vector overflow"); 43 | } 44 | __device__ void clean() { *size = 0; } 45 | __device__ bool empty() { 46 | if (*size == 0) 47 | return true; 48 | return false; 49 | } 50 | __device__ T &operator[](int id) { return data[id]; } 51 | }; -------------------------------------------------------------------------------- /src/kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * @Description: 3 | * @Date: 2020-11-25 13:28:14 4 | * @LastEditors: PengyuWang 5 | * @LastEditTime: 2020-12-07 16:32:47 6 | * @FilePath: /sampling/src/kernel.cu 7 | */ 8 | #include "gpu_graph.cuh" 9 | #include "kernel.cuh" 10 | 11 | // __global__ void initSeed(ResultBase *results, uint *seeds, size_t size) 12 | // { 13 | // if (TID < size) { 14 | // results[TID].data[0] = seeds[TID]; 15 | // } 16 | // } 17 | __global__ void BindResultKernel(Walker *walker) { 18 | if (TID == 0) walker->BindResult(); 19 | } 20 | 21 | __global__ void init_kernel_ptr(Sampler *sampler, bool biasInit) { 22 | if (TID == 0) { 23 | sampler->result.setAddrOffset(); 24 | if (biasInit) 25 | for (size_t i = 0; i < sampler->result.hop_num; i++) { 26 | sampler->result.high_degrees[i].Init(); 27 | } 28 | } 29 | } 30 | 31 | __global__ void init_kernel_ptr(Walker *sampler, bool biasInit) { 32 | if (TID == 0) { 33 | sampler->result.setAddrOffset(); 34 | if (biasInit) 35 | for (size_t i = 0; i < sampler->result.hop_num; i++) { 36 | sampler->result.high_degrees[i].Init(); 37 | } 38 | } 39 | } 40 | 41 | __device__ bool AddTillSize(uint *size, 42 | size_t target_size) // T *array, T t, 43 | { 44 | uint old = atomicAdd(size, 1); 45 | if (old < target_size) { 46 | return true; 47 | } 48 | return false; 49 | } -------------------------------------------------------------------------------- /scripts/mem_test.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Author: Pengyu Wang 3 | # @Date: 2021-01-15 14:35:15 4 | # @LastEditTime: 2021-01-15 14:38:16 5 | # @Description: 6 | # @FilePath: /skywalker/scripts/mem_test.sh 7 | ### 8 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 9 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 10 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 11 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 12 | 13 | # DATA=( sk-2005 friendster) 14 | # HD=( 4 1 ) 15 | ITR=1 16 | NG=4 17 | 18 | GR=".w.gr" 19 | EXE="./bin/main" #main_degree 20 | SG="--ngpu=1 --s" 21 | 22 | # node2vec always online 23 | # export OMP_PROC_BIND=TRUE 24 | # GOMP_CPU_AFFINITY="0-9 10-19 20-29 30-99" 25 | # OMP_PLACES=cores 26 | # OMP_PROC_BIND=close 27 | # correct one 28 | # OMP_PLACES=cores OMP_PROC_BIND=spread 29 | # --randomweight=1 --weightrange=2 30 | 31 | 32 | 33 | echo "-------------------------------------------------------unbias sample 2 20 40k" 34 | for idx in $(seq 1 ${#DATA[*]}) 35 | do 36 | ./bin/main --input ~/data/${DATA[idx-1]}${GR} --d 2 --k 20 --n 40000 --bias=0 --rw=0 --ngpu=1 --ol=0 --umgraph=1 -v 37 | ./bin/main --input ~/data/${DATA[idx-1]}${GR} --d 2 --k 20 --n 40000 --bias=0 --rw=0 --ngpu=1 --ol=0 --hmgraph=1 -v 38 | ./bin/main --input ~/data/${DATA[idx-1]}${GR} --d 2 --k 20 --n 40000 --bias=0 --rw=0 --ngpu=1 --ol=0 --gmgraph=1 --gmid=1 -v 39 | done 40 | 41 | 42 | -------------------------------------------------------------------------------- /tools/getDegree.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "gpu_graph.cuh" 15 | // #include "graph.h" 16 | // #include "sampler.cuh" 17 | 18 | using namespace std; 19 | 20 | int main(int argc, char *argv[]) { 21 | if (argc != 5) { 22 | std::cout << "Input: ./exe " 23 | " <# of samples> " 24 | " <#GPUs>\n"; 25 | exit(0); 26 | } 27 | // 28 | // SampleSize, FrontierSize, NeighborSize 29 | // printf("MPI started\n"); 30 | // int n_blocks = atoi(argv[4]); 31 | // int block_size = atoi(argv[5]); 32 | // int SampleSize = atoi(argv[5]); 33 | // int FrontierSize = atoi(argv[6]); 34 | // int NeighborSize = atoi(argv[7]); 35 | // int Depth = atoi(argv[8]); 36 | // int total_GPU = atoi(argv[9]); 37 | 38 | const char *beg_file = argv[2]; 39 | const char *csr_file = argv[3]; 40 | const char *weight_file = argv[3]; 41 | int node = atoi(argv[4]); 42 | 43 | graph *ginst = 44 | new graph( 45 | beg_file, csr_file, weight_file); 46 | gpu_graph ggraph(ginst); 47 | printf("node %d has degree %d\n", node, ggraph.getDegree_h(node)); 48 | 49 | return 0; 50 | } -------------------------------------------------------------------------------- /cmake/FindNuma.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # (c) Copyright 2016 Hewlett Packard Enterprise Development LP 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Find the numa policy library. 18 | # Output variables: 19 | # NUMA_INCLUDE_DIR : e.g., /usr/include/. 20 | # NUMA_LIBRARY : Library path of numa library 21 | # NUMA_FOUND : True if found. 22 | FIND_PATH(NUMA_INCLUDE_DIR NAME numa.h 23 | HINTS $ENV{HOME}/local/include /opt/local/include /usr/local/include /usr/include) 24 | 25 | FIND_LIBRARY(NUMA_LIBRARY NAME numa 26 | HINTS $ENV{HOME}/local/lib64 $ENV{HOME}/local/lib /usr/local/lib64 /usr/local/lib /opt/local/lib64 /opt/local/lib /usr/lib64 /usr/lib 27 | ) 28 | 29 | IF (NUMA_INCLUDE_DIR AND NUMA_LIBRARY) 30 | SET(NUMA_FOUND TRUE) 31 | MESSAGE(STATUS "Found numa library: inc=${NUMA_INCLUDE_DIR}, lib=${NUMA_LIBRARY}") 32 | ELSE () 33 | SET(NUMA_FOUND FALSE) 34 | MESSAGE(STATUS "WARNING: Numa library not found.") 35 | MESSAGE(STATUS "Try: 'sudo apt-get install libnuma libnuma-dev' (or sudo yum install numactl numactl-devel)") 36 | ENDIF () -------------------------------------------------------------------------------- /scripts/trw-biased.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2021-10-21 4 | # @LastEditors: Xu Cheng 5 | # @FilePath: /ThunderRW 6 | ### 7 | DATA=(GG lj OK AB UK SK FS) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 8 | 9 | 10 | echo "biased rw" >>"/home/xucheng/ThunderRW/biased3.csv" 11 | for idx in $(seq 1 ${#DATA[*]}) 12 | do 13 | echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/biased3.csv" 14 | ./build/random_walk/deepwalk.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -ew -l 100 >>"/home/xucheng/ThunderRW/biased3.csv" 2>&1 15 | echo " " >>"/home/xucheng/ThunderRW/biased3.csv" 16 | done 17 | 18 | echo " " >>"/home/xucheng/ThunderRW/biased3.csv" 19 | 20 | echo "biased ppr" >>"/home/xucheng/ThunderRW/biased3.csv" 21 | for idx in $(seq 1 ${#DATA[*]}) 22 | do 23 | echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/biased3.csv" 24 | ./build/random_walk/ppr.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -sp 0.15 -em 1 -sm 2 -l 100 >>"/home/xucheng/ThunderRW/biased3.csv" 2>&1 25 | echo " " >>"/home/xucheng/ThunderRW/biased3.csv" 26 | done 27 | 28 | echo " " >>"/home/xucheng/ThunderRW/biased3.csv" 29 | 30 | echo "biased node2vec" >>"/home/xucheng/ThunderRW/biased3.csv" 31 | for idx in $(seq 1 ${#DATA[*]}) 32 | do 33 | echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/biased3.csv" 34 | ./build/random_walk/node2vec.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -ew -l 100 >>"/home/xucheng/ThunderRW/biased3.csv" 2>&1 35 | echo " " >>"/home/xucheng/ThunderRW/biased3.csv" 36 | done 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /old/common.cuh: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_CUH 2 | #define COMMON_CUH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #define BLOCK_SIZE 512 27 | 28 | #define ALPHA 0.85 29 | #define EPSILON 0.01 30 | 31 | #define ACT_TH 0.01 32 | 33 | using std::cout; 34 | using std::endl; 35 | using std::flush; 36 | using std::ifstream; 37 | using std::ofstream; 38 | using std::string; 39 | using std::stringstream; 40 | using std::to_string; 41 | using std::vector; 42 | 43 | using uint = unsigned int; 44 | using ulong = unsigned long; 45 | 46 | using vtx_t = unsigned int; // vertex_num < 4B 47 | using edge_t = unsigned int; // vertex_num < 4B 48 | // using edge_t = unsigned long long int; // vertex_num > 4B 49 | using weight_t = unsigned int; 50 | 51 | 52 | const unsigned int INFINIT = std::numeric_limits::max() - 1; 53 | 54 | #define TID_1D (threadIdx.x + blockIdx.x * blockDim.x) 55 | 56 | template 57 | void printD(T *DeviceData, int n) 58 | { 59 | T *tmp = new T[n]; 60 | cudaMemcpy(tmp, DeviceData, n * sizeof(T), cudaMemcpyDeviceToHost); 61 | for (size_t i = 0; i < n; i++) 62 | { 63 | cout << tmp[i] << "\t"; 64 | if (i % 10 == 9) 65 | { 66 | cout << endl; 67 | } 68 | } 69 | } 70 | 71 | #endif -------------------------------------------------------------------------------- /include/instance.cuh: -------------------------------------------------------------------------------- 1 | #include "gpu_graph.cuh" 2 | #include "result.cuh" 3 | // #include "alias_table.cuh" 4 | #include 5 | 6 | // struct sample_result; 7 | // class Sampler; 8 | 9 | template 10 | void printH(T *ptr, int size) { 11 | T *ptrh = new T[size]; 12 | CUDA_RT_CALL(cudaMemcpy(ptrh, ptr, size * sizeof(T), cudaMemcpyDeviceToHost)); 13 | printf("printH: "); 14 | for (size_t i = 0; i < size; i++) { 15 | // printf("%d\t", ptrh[i]); 16 | std::cout << ptrh[i] << "\t"; 17 | } 18 | printf("\n"); 19 | delete ptrh; 20 | } 21 | 22 | class InstanceBase { 23 | public: 24 | gpu_graph ggraph; 25 | 26 | public: 27 | InstanceBase(gpu_graph graph) : ggraph(graph) {} 28 | ~InstanceBase() {} 29 | }; 30 | 31 | class WalkInstance : InstanceBase { 32 | public: 33 | ResultsRW result; 34 | uint num_seed; 35 | 36 | public: 37 | WalkInstance(gpu_graph graph) : InstanceBase(graph) {} 38 | ~WalkInstance() {} 39 | void SetSeed(uint _num_seed, uint _hop_num) { 40 | // printf("%s\t %s :%d\n", __FILE__, __PRETTY_FUNCTION__, __LINE__); 41 | num_seed = _num_seed; 42 | std::random_device rd; 43 | std::mt19937 gen(56); 44 | std::uniform_int_distribution<> dis(1, 10000); // ggraph.vtx_num); 45 | uint *seeds = new uint[num_seed]; 46 | for (int n = 0; n < num_seed; ++n) { 47 | #ifdef check 48 | // seeds[n] = n; 49 | seeds[n] = 1; 50 | // seeds[n] = 339; 51 | #else 52 | // seeds[n] = n; 53 | seeds[n] = dis(gen); 54 | #endif // check 55 | } 56 | result.init(num_seed, _hop_num, seeds); 57 | } 58 | // void Start(); 59 | }; 60 | 61 | void Start(WalkInstance WalkInstance); 62 | void Start_high_degree(WalkInstance WalkInstance); 63 | -------------------------------------------------------------------------------- /old/shmem/alias_table.cu: -------------------------------------------------------------------------------- 1 | #include "alias_table.cuh" 2 | // template 3 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num, 4 | Vector out) { 5 | 6 | __shared__ alias_table_constructor_shmem tables[WARP_PER_SM]; 7 | alias_table_constructor_shmem *table = &tables[WID]; 8 | // printf("table size %llu\n",table->size); 9 | 10 | table->Init(); 11 | if (LID == 0) { 12 | printf("table large size %llu\n", table->large.capacity); 13 | } 14 | if (TID == 0) { 15 | printf("load\n"); 16 | } 17 | table->load(ids, weights, size); 18 | if (TID == 0) { 19 | printf("construct\n"); 20 | } 21 | table->construct(); 22 | if (TID == 0) { 23 | printf("roll\n"); 24 | } 25 | table->roll_atomic(out, num); 26 | if (LID == 0) { 27 | printf("out: "); 28 | printD(out.data, out.Size()); 29 | } 30 | } 31 | 32 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num, 33 | int * out) { 34 | 35 | __shared__ alias_table_constructor_shmem tables[WARP_PER_SM]; 36 | alias_table_constructor_shmem *table = &tables[WID]; 37 | // printf("table size %llu\n",table->size); 38 | 39 | table->Init(); 40 | if (LID == 0) { 41 | printf("table large size %llu\n", table->large.capacity); 42 | } 43 | if (TID == 0) { 44 | printf("load\n"); 45 | } 46 | table->load(ids, weights, size); 47 | if (TID == 0) { 48 | printf("construct\n"); 49 | } 50 | table->construct(); 51 | if (TID == 0) { 52 | printf("roll\n"); 53 | } 54 | table->roll_atomic(out, num); 55 | if (LID == 0) { 56 | printf("out: "); 57 | printD(out, num); 58 | } 59 | } -------------------------------------------------------------------------------- /scripts/my.sh: -------------------------------------------------------------------------------- 1 | cd ../src 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-25 16:38:51 5 | # @LastEditors: PengyuWang 6 | # @LastEditTime: 2020-12-06 17:45:38 7 | # @FilePath: /sampling/scripts/my.sh 8 | ### 9 | 10 | ./main_degree --rw=0 --k 1 --d 100 --ol=1 --input ~/data/lj.w.gr 11 | ./main_degree --rw=0 --k 1 --d 100 --ol=1 --input ~/data/orkut.w.gr 12 | 13 | ./main_degree --rw=0 --k 2 --d 2 --ol=1 --input ~/data/lj.w.gr 14 | ./main_degree --rw=0 --k 2 --d 2 --ol=1 --input ~/data/orkut.w.gr 15 | 16 | ./main --rw=0 --k 2 --d 2 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/lj.w.gr 17 | ./main --rw=0 --k 2 --d 2 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/orkut.w.gr 18 | 19 | 20 | ./main --rw=0 --k 1 --d 100 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/lj.w.gr 21 | ./main --rw=0 --k 1 --d 100 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/orkut.w.gr 22 | 23 | ./main --rw=0 --k 1 --d 100 --ol=1 --input ~/data/lj.w.gr 24 | ./main --rw=0 --k 1 --d 100 --ol=1 --input ~/data/orkut.w.gr 25 | ./main --rw=1 --k 1 --d 100 --ol=1 --input ~/data/lj.w.gr 26 | ./main --rw=1 --k 1 --d 100 --ol=1 --input ~/data/orkut.w.gr 27 | 28 | ./main_degree --rw=0 --k 1 --d 100 --ol=1 --input ~/data/lj.w.gr 29 | ./main_degree --rw=0 --k 1 --d 100 --ol=1 --input ~/data/orkut.w.gr 30 | ./main_degree --rw=1 --k 1 --d 100 --ol=1 --input ~/data/lj.w.gr 31 | ./main_degree --rw=1 --k 1 --d 100 --ol=1 --input ~/data/orkut.w.gr 32 | 33 | 34 | 35 | ./main --rw=1 --ol=0 --k 1 --d 100 --input ~/data/lj.w.gr 36 | ./main --rw=1 --ol=0 --k 1 --d 100 --input ~/data/orkut.w.gr 37 | 38 | ./main --rw=1 --ol=0 --n=4847571 --k 1 --d 100 --input ~/data/lj.w.gr 39 | ./main --rw=1 --ol=0 --n=3072627 --k 1 --d 100 --input ~/data/orkut.w.gr -------------------------------------------------------------------------------- /result/nextdoor_unbias.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2020-11-25 16:50:34 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-11 16:30:59 6 | # @FilePath: /skywalker/result/knightking.sh 7 | ### 8 | DATA=(web-Google lj orkut ) # sk-2005 friendster) # twitter-2010 uk-union rmat29 web-ClueWeb09) 9 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) #41652230 10 | 11 | DIR="/home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/randomwalks/" 12 | # DeepWalkSampling 13 | # Node2VecSampling 14 | # PPRSampling 15 | # KHopSampling 16 | 17 | # echo "----------------------biased_walk -------------------" 18 | # for idx in $(seq 1 ${#DATA[*]}) 19 | # do 20 | # echo "------------"${DATA[idx-1]} 21 | # ${DIR}DeepWalkSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 22 | # done 23 | # echo "----------------------ppr biased -------------------" 24 | # for idx in $(seq 1 ${#DATA[*]}) 25 | # do 26 | # echo "------------"${DATA[idx-1]} 27 | # ${DIR}PPRSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 28 | # done 29 | # echo "---------------------- unweighted node2vec -------------------" 30 | # for idx in $(seq 1 ${#DATA[*]}) 31 | # do 32 | # echo "------------"${DATA[idx-1]} 33 | # ${DIR}Node2VecSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 34 | # done 35 | echo "----------------------kh 40k-------------------" 36 | for idx in $(seq 1 ${#DATA[*]}) 37 | do 38 | echo "------------"${DATA[idx-1]} 39 | /home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/khop/KHopSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 40 | done 41 | -------------------------------------------------------------------------------- /scripts/biased.sh: -------------------------------------------------------------------------------- 1 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 2 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 3 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 4 | #HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 5 | 6 | # DATA=( sk-2005 friendster) 7 | # HD=( 4 1 ) 8 | ITR=1 9 | NG=4 #8 10 | 11 | GR=".w.gr" 12 | EXE="./bin/main" #main_degree 13 | SG="--ngpu=1 --s" 14 | RW="--rw=1 --k 1 --d 100 " 15 | SP="--rw=0 --k 20 --d 2 " 16 | BATCH="--n=40000 -v" 17 | 18 | ROOT_DIR=$PWD 19 | LOG_FILE=${ROOT_DIR}"/result/table3_unbiased.csv" 20 | 21 | DATA_DIR="/home/xucheng//data" 22 | #DATA_DIR=${ROOT_DIR}"/dataset" 23 | GraphWalker_DIR="/home/pywang/sampling/GraphWalker" 24 | KnightKing_DIR="/home/pywang/sampling/KnightKing" 25 | CSAW_DIR="/home/pywang/sampling/C-SAW" 26 | NEXTDOOR_DIR="/home/pywang/sampling/nextdoor-experiments" 27 | 28 | echo "-------------------------------------------------------Skywalker unbias rw 100" #>>"${LOG_FILE}" 29 | for idx in $(seq 1 ${#DATA[*]}); do 30 | ./bin/main --bias=1 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 ${RW} ${BATCH} #>>"${LOG_FILE}" 31 | done 32 | 33 | echo "-------------------------------------------------------Skywalker unbias ppr 100" #>>"${LOG_FILE}" 34 | for idx in $(seq 1 ${#DATA[*]}); do 35 | ./bin/main --bias=1 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --tp=0.15 ${RW} ${BATCH} #>>"${LOG_FILE}" 36 | done 37 | 38 | echo "-------------------------------------------------------Skywalker unbias node2vec" #>>"${LOG_FILE}" 39 | for idx in $(seq 1 ${#DATA[*]}); do 40 | ./bin/main --bias=1 --ol=0 --buffer --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --node2vec ${BATCH} # >>"${LOG_FILE}" 41 | done -------------------------------------------------------------------------------- /old/shmem/tmp.cu: -------------------------------------------------------------------------------- 1 | #include "alias_table.cuh" 2 | #include 3 | #include 4 | #include 5 | 6 | template __global__ void init_range_d(T *ptr, size_t size) { 7 | if (TID < size) { 8 | ptr[TID] = TID; 9 | } 10 | } 11 | template void init_range(T *ptr, size_t size) { 12 | init_range_d<<>>(ptr, size); 13 | } 14 | template __global__ void init_array_d(T *ptr, size_t size, T v) { 15 | if (TID < size) { 16 | ptr[TID] = v; 17 | } 18 | } 19 | template void init_array(T *ptr, size_t size, T v) { 20 | init_array_d<<>>(ptr, size, v); 21 | } 22 | // todo 23 | /* 24 | 1. prefix sum to normalize 25 | 2. 26 | */ 27 | #define paster( n ) printf( "var: " #n " = %d\n", n ) 28 | int main(int argc, char const *argv[]) { 29 | 30 | int *buf7; 31 | int size = 40; 32 | 33 | cudaSetDevice(1); 34 | cudaMalloc(&buf7, size / 2 * sizeof(int)); 35 | 36 | int *id_ptr; 37 | float *weight_ptr; 38 | cudaMalloc(&id_ptr, size * sizeof(int)); 39 | cudaMalloc(&weight_ptr, size * sizeof(float)); 40 | init_range(id_ptr, size); 41 | init_array(weight_ptr, size / 8 * 7, 0.5); 42 | init_array(weight_ptr + size / 8 * 7, size - size / 8 * 7, 2.0); 43 | 44 | // P; 45 | // alias_table *table_ptr; 46 | // alias_table table_h; 47 | Vector out; 48 | out.init(40); 49 | paster(SHMEM_PER_WARP); 50 | paster(TMP_PER_ELE); 51 | paster(ELE_PER_WARP); 52 | 53 | shmem_kernel<<<1, 32, 0, 0>>>(id_ptr, weight_ptr, size, size / 2, out); 54 | // printf("size %d\n",sizeof(alias_table_constructor_shmem)); 55 | // printf("size %d %d\n",sizeof(Vector_shmem),ELE_PER_WARP); 56 | P; 57 | usleep(5000); 58 | HERR(cudaDeviceSynchronize()); 59 | HERR(cudaPeekAtLastError()); 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /test/tmp.cu: -------------------------------------------------------------------------------- 1 | #include "alias_table.cuh" 2 | #include 3 | #include 4 | #include 5 | 6 | template __global__ void init_range_d(T *ptr, size_t size) { 7 | if (TID < size) { 8 | ptr[TID] = TID; 9 | } 10 | } 11 | template void init_range(T *ptr, size_t size) { 12 | init_range_d<<>>(ptr, size); 13 | } 14 | template __global__ void init_array_d(T *ptr, size_t size, T v) { 15 | if (TID < size) { 16 | ptr[TID] = v; 17 | } 18 | } 19 | template void init_array(T *ptr, size_t size, T v) { 20 | init_array_d<<>>(ptr, size, v); 21 | } 22 | // todo 23 | /* 24 | 1. prefix sum to normalize 25 | 2. 26 | */ 27 | #define paster( n ) printf( "var: " #n " = %d\n", n ) 28 | int main(int argc, char const *argv[]) { 29 | 30 | int *buf7; 31 | int size = 40; 32 | 33 | cudaSetDevice(1); 34 | MyCudaMalloc(&buf7, size / 2 * sizeof(int)); 35 | 36 | int *id_ptr; 37 | float *weight_ptr; 38 | MyCudaMalloc(&id_ptr, size * sizeof(int)); 39 | MyCudaMalloc(&weight_ptr, size * sizeof(float)); 40 | init_range(id_ptr, size); 41 | init_array(weight_ptr, size / 8 * 7, 0.5); 42 | init_array(weight_ptr + size / 8 * 7, size - size / 8 * 7, 2.0); 43 | 44 | // P; 45 | // alias_table *table_ptr; 46 | // alias_table table_h; 47 | Vector out; 48 | out.init(40); 49 | paster(SHMEM_PER_WARP); 50 | paster(MEM_PER_ELE); 51 | paster(ELE_PER_WARP); 52 | 53 | shmem_kernel<<<1, 32, 0, 0>>>(id_ptr, weight_ptr, size, size / 2, out); 54 | // printf("size %d\n",sizeof(alias_table_constructor_shmem)); 55 | // printf("size %d %d\n",sizeof(Vector_shmem),ELE_PER_WARP); 56 | P; 57 | usleep(5000); 58 | H_ERR(cudaDeviceSynchronize()); 59 | H_ERR(cudaPeekAtLastError()); 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /figs/dynamic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-17 21:38:38 7 | # @FilePath: /skywalker/figs/unbiased.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--rw=1 --k 1 --d 100 " 23 | SP="--rw=0 --k 20 --d 2 " 24 | BATCH="--n=40000" 25 | OUT='>> ./figs/result/dynamic.csv' 26 | 27 | # --randomweight=1 --weightrange=2 28 | 29 | 30 | # walker 31 | echo "-------------------------------------------------------unbias rw 100" >> ./figs/result/dynamic.csv 32 | for idx in $(seq 1 ${#DATA[*]}) 33 | do 34 | ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 ${RW} ${BATCH} >> ./figs/result/dynamic.csv 35 | done 36 | 37 | echo "-------------------------------------------------------unbias rw 100 dynamic" >> ./figs/result/dynamic.csv 38 | for idx in $(seq 1 ${#DATA[*]}) 39 | do 40 | ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 ${RW} ${BATCH} --dynamic=1>> ./figs/result/dynamic.csv 41 | done 42 | 43 | # echo "-------------------------------------------------------unbias sp" >> ./figs/result/dynamic.csv 44 | # for idx in $(seq 1 ${#DATA[*]}) 45 | # do 46 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 ${SP} ${BATCH} >> ./figs/result/dynamic.csv 47 | # done 48 | 49 | -------------------------------------------------------------------------------- /test/alias_table.cu: -------------------------------------------------------------------------------- 1 | #include "alias_table.cuh" 2 | // template 3 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num, 4 | Vector out) 5 | { 6 | 7 | __shared__ alias_table_constructor_shmem tables[WARP_PER_BLK]; 8 | alias_table_constructor_shmem *table = &tables[WID]; 9 | // printf("table size %llu\n",table->size); 10 | 11 | table->Init(); 12 | if (LID == 0) 13 | { 14 | printf("table large size %llu\n", table->large.capacity); 15 | } 16 | if (TID == 0) 17 | { 18 | printf("load\n"); 19 | } 20 | table->load(ids, weights, size); 21 | if (TID == 0) 22 | { 23 | printf("construct\n"); 24 | } 25 | table->construct(); 26 | if (TID == 0) 27 | { 28 | printf("roll\n"); 29 | } 30 | curandState state; 31 | curand_init(TID, 0, 0, &state); //(unsigned long long)clock() + 32 | if(num>0) 33 | table->roll_atomic(out, num, &state); 34 | if (LID == 0) 35 | { 36 | printf("out: "); 37 | printD(out.data, out.Size()); 38 | } 39 | } 40 | 41 | // __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num, 42 | // int * out) { 43 | 44 | // __shared__ alias_table_constructor_shmem tables[WARP_PER_BLK]; 45 | // alias_table_constructor_shmem *table = &tables[WID]; 46 | // // printf("table size %llu\n",table->size); 47 | 48 | // table->Init(); 49 | // if (LID == 0) { 50 | // printf("table large size %llu\n", table->large.capacity); 51 | // } 52 | // if (TID == 0) { 53 | // printf("load\n"); 54 | // } 55 | // table->load(ids, weights, size); 56 | // if (TID == 0) { 57 | // printf("construct\n"); 58 | // } 59 | // table->construct(); 60 | // if (TID == 0) { 61 | // printf("roll\n"); 62 | // } 63 | // table->roll_atomic(out, num); 64 | // if (LID == 0) { 65 | // printf("out: "); 66 | // printD(out, num); 67 | // } 68 | // } -------------------------------------------------------------------------------- /result/nextdoor.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2020-11-25 16:50:34 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-11 16:30:59 6 | # @FilePath: /skywalker/result/knightking.sh 7 | ### 8 | DATA=(web-Google lj orkut ) # sk-2005 friendster) # twitter-2010 uk-union rmat29 web-ClueWeb09) 9 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) #41652230 10 | 11 | DIR="/home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/randomwalks/" 12 | # DeepWalkSampling 13 | # Node2VecSampling 14 | # PPRSampling 15 | # KHopSampling 16 | echo "----------------------unbiased_walk -------------------" 17 | for idx in $(seq 1 ${#DATA[*]}) 18 | do 19 | echo "------------"${DATA[idx-1]} 20 | ${DIR}DeepWalkSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 21 | done 22 | 23 | # echo "----------------------unbiased_walk -------------------" 24 | # for idx in $(seq 1 ${#DATA[*]}) 25 | # do 26 | # echo "------------"${DATA[idx-1]} 27 | # ${DIR}DeepWalkSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 28 | # done 29 | # echo "----------------------ppr unbiased -------------------" 30 | # for idx in $(seq 1 ${#DATA[*]}) 31 | # do 32 | # echo "------------"${DATA[idx-1]} 33 | # ${DIR}PPRSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 34 | # done 35 | # echo "----------------------node2vec -------------------" 36 | # for idx in $(seq 1 ${#DATA[*]}) 37 | # do 38 | # echo "------------"${DATA[idx-1]} 39 | # ${DIR}Node2VecSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 40 | # done 41 | # echo "----------------------kh -------------------" 42 | # for idx in $(seq 1 ${#DATA[*]}) 43 | # do 44 | # echo "------------"${DATA[idx-1]} 45 | # /home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/khop/KHopSampling -g ~/data/${DATA[idx-1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l 46 | # done 47 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | // { 5 | // "type": "shell", 6 | // "label": "g++ build active file", 7 | // "command": "/usr/bin/g++", 8 | // "args": ["-g", "${file}", "-o", "${fileDirname}/${fileBasenameNoExtension}"], 9 | // "options": { 10 | // "cwd": "/usr/bin" 11 | // }, 12 | // "problemMatcher": ["$gcc"], 13 | // "group": { 14 | // "kind": "build", 15 | // "isDefault": true 16 | // } 17 | // }, 18 | { 19 | "type": "shell", 20 | "label": "CUDA make", 21 | "command": "make", 22 | // "args": ["-g", "${file}", "-o", "${fileDirname}/${fileBasenameNoExtension}"], 23 | // "options": { 24 | // "cwd": "${workspaceFolder}/src" 25 | // }, 26 | "problemMatcher": [ 27 | "$nvcc" 28 | ], 29 | "group": { 30 | "kind": "build", 31 | "isDefault": true 32 | } 33 | }, 34 | // { 35 | // "label": "bandwidthTestMakefile", 36 | // "type": "shell", 37 | // "command": "make", 38 | // "options": { 39 | // "cwd": "/home/bigeye/NVIDIA_CUDA-9.2_Samples/1_Utilities/bandwidthTest" 40 | // }, 41 | // "group": { 42 | // "kind": "build", 43 | // "isDefault": true 44 | // }, 45 | // "presentation": { 46 | // "echo": true, 47 | // "reveal": "always", 48 | // "focus": false, 49 | // "panel": "shared" 50 | // }, 51 | // "args": [ 52 | // "QUIET=0" 53 | // ], 54 | // "problemMatcher": { 55 | // "owner": "cpp", 56 | // "fileLocation": [ 57 | // "absolute" 58 | // ], 59 | // "pattern": { 60 | // "regexp": "^(.*):(\\d+):(\\d+):\\s+(warning|error):\\s+(.*)$", 61 | // "file": 1, 62 | // "line": 2, 63 | // "column": 3, 64 | // "severity": 4, 65 | // "message": 5 66 | // } 67 | // } 68 | // } 69 | ] 70 | } -------------------------------------------------------------------------------- /figs/with_nextdoor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 15:49:20 7 | # @FilePath: /skywalker/figs/offline.sh 8 | ### 9 | DATA=(web-Google lj orkut) # arabic-2005 uk-2005 ) # sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--deepwalk " 23 | SP="--sage " 24 | BATCH="--full -v" 25 | LOG_FILE="with_nextdoor.csv" 26 | 27 | # --randomweight=1 --weightrange=2 28 | 29 | # echo "-------------------------------------------------------offline rw 100" >> ${LOG_FILE} 30 | # for idx in $(seq 1 ${#DATA[*]}) 31 | # do 32 | # for i in $(seq 1 ${ITR}) 33 | # do 34 | # ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 35 | # done 36 | # done 37 | 38 | # echo "-------------------------------------------------------offline ppr 0.15" >> ${LOG_FILE} 39 | # for idx in $(seq 1 ${#DATA[*]}) 40 | # do 41 | # for i in $(seq 1 ${ITR}) 42 | # do 43 | # ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --tp=0.15 >> ${LOG_FILE} 44 | # done 45 | # done 46 | 47 | 48 | echo "-------------------------------------------------------unbiased sp 100" >> ${LOG_FILE} 49 | for idx in $(seq 1 ${#DATA[*]}) 50 | do 51 | for i in $(seq 1 ${ITR}) 52 | do 53 | ./bin/main -bias=0 --ol=0 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 54 | done 55 | done 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // 使用 IntelliSense 了解相关属性。 3 | // 悬停以查看现有属性的描述。 4 | // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "CUDA C++: Launch", 9 | "type": "cuda-gdb", 10 | "request": "launch", 11 | "externalConsole": false, 12 | "program": "${workspaceFolder}/build/skywalker", 13 | "args": "-bias=1 --ol=1 --ngpu=1 --s --sage --input /home/pywang/data/lj.w.gr --hd=1 --n=40000 -v --printresult=1 --newsampler=1 --loc=1", 14 | "setupCommands": [ 15 | { 16 | "description": "为 gdb 启用整齐打印", 17 | "text": "-enable-pretty-printing", 18 | // "ignoreFailures": true 19 | } 20 | ] 21 | }, 22 | // { 23 | // "name": "(gdb) 启动", 24 | // "type": "cppdbg", 25 | // "request": "launch", 26 | // "program": "${workspaceFolder}/bin/main", //${fileBasenameNoExtension}.out 27 | // "args": [" --k 1 --d 100 --rw=0 --ol=0 --hd=4 --ngpu 4 --v --n=10"], 28 | // "stopAtEntry": false, 29 | // "cwd": "${workspaceFolder}", 30 | // "environment": [], 31 | // "externalConsole": false, 32 | // "MIMode": "gdb", 33 | // "miDebuggerPath": "gdb", ///usr/local/cuda/bin/cuda-gdb 34 | // "setupCommands": [ 35 | // { 36 | // "description": "为 gdb 启用整齐打印", 37 | // "text": "-enable-pretty-printing", 38 | // "ignoreFailures": true 39 | // } 40 | // ] 41 | // }, 42 | // { 43 | // "name": "cuda-gdb", 44 | // "type": "cuda-gdb", 45 | // "request": "launch", 46 | // "program": "${workspaceFolder}/bin/main", //${fileBasenameNoExtension}.out 47 | // "args": [" -bias=1 --ol=1 --deepwalk -v --ngpu=1 --dw=1 --n=400"] 48 | // } 49 | ] 50 | } -------------------------------------------------------------------------------- /scripts/trw-unbiased.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2021-10-21 4 | # @LastEditors: Xu Cheng 5 | # @FilePath: /ThunderRW 6 | ### 7 | DATA=(GG lj OK AB UK SK FS) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 8 | 9 | # 10 | # echo "unbiased rw" >>"/home/xucheng/ThunderRW/unbiased.csv" 11 | # for idx in $(seq 1 ${#DATA[*]}) 12 | # do 13 | # echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased.csv" 14 | # ./build/random_walk/deepwalk.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -em 0 -sm 0 -l 100 >>"/home/xucheng/ThunderRW/unbiased.csv" 2>&1 15 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv" 16 | # done 17 | # 18 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv" 19 | # 20 | # echo "unbiased ppr" >>"/home/xucheng/ThunderRW/unbiased.csv" 21 | # for idx in $(seq 1 ${#DATA[*]}) 22 | # do 23 | # echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased.csv" 24 | # ./build/random_walk/ppr.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -sp 0.15 -em 0 -sm 0 >>"/home/xucheng/ThunderRW/unbiased.csv" 2>&1 25 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv" 26 | # done 27 | # 28 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv" 29 | # 30 | # echo "unbiased node2vec" >>"/home/xucheng/ThunderRW/unbiased.csv" 31 | # for idx in $(seq 1 ${#DATA[*]}) 32 | # do 33 | # echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased.csv" 34 | # ./build/random_walk/node2vec.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -em 0 -sm 0 >>"/home/xucheng/ThunderRW/unbiased.csv" 2>&1 35 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv" 36 | # done 37 | # 38 | 39 | echo "unbiased rw" >>"/home/xucheng/ThunderRW/unbiased40.csv" 40 | for idx in $(seq 1 ${#DATA[*]}) 41 | do 42 | echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased40.csv" 43 | ./build/random_walk/deepwalk.out -f sample_dataset/${DATA[idx-1]}/ -n 40 -em 0 -sm 0 -l 100 >>"/home/xucheng/ThunderRW/unbiased40.csv" 2>&1 44 | echo " " >>"/home/xucheng/ThunderRW/unbiased40.csv" 45 | done 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /scripts/trans.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Date: 2020-12-16 11:06:22 4 | LastEditors: PengyuWang 5 | LastEditTime: 2020-12-16 17:15:33 6 | FilePath: /sampling/scripts/trans.py 7 | ''' 8 | # import numpy as np 9 | # import pandas as pd 10 | # import scipy.sparse as ss 11 | 12 | # def read_data_file_as_coo_matrix(filename='edges.txt'): 13 | # "Read data file and return sparse matrix in coordinate format." 14 | 15 | # # if the nodes are integers, use 'dtype = np.uint32' 16 | # data = pd.read_csv(filename, sep = '\t', encoding = 'utf-8') 17 | 18 | # # where 'rows' is node category one and 'cols' node category 2 19 | # rows = data['agn'] # Not a copy, just a reference. 20 | # cols = data['fct'] 21 | 22 | # # crucial third array in python, which can be left out in r 23 | # ones = np.ones(len(rows), np.uint32) 24 | # matrix = ss.coo_matrix((ones, (rows, cols))) 25 | # return matrix 26 | 27 | # def save_csr_matrix(filename, matrix): 28 | # """Save compressed sparse row (csr) matrix to file. 29 | 30 | # Based on http://stackoverflow.com/a/8980156/232571 31 | 32 | # """ 33 | # assert filename.endswith('.npz') 34 | # attributes = { 35 | # 'data': matrix.data, 36 | # 'indices': matrix.indices, 37 | # 'indptr': matrix.indptr, 38 | # 'shape': matrix.shape, 39 | # } 40 | # np.savez(filename, **attributes) 41 | 42 | # read_data_file_as_coo_matrix() 43 | # read_weighted_edgelist 44 | # G= networkit.graphio.readGraph("/home/pywang/data/lj.w.edge", networkit.Format.EdgeList, separator=" ", continuous=False) 45 | import scipy as sp 46 | import networkx as nx 47 | 48 | 49 | 50 | def save_csr_matrix(filename): 51 | G=nx.read_weighted_edgelist("/home/pywang/data/" + filename+ ".w.edge") 52 | S=nx.to_scipy_sparse_matrix(G) 53 | sp.sparse.save_npz("/home/pywang/data/" + filename+ ".w.npz", S) 54 | 55 | # save_csr_matrix("orkut") 56 | print("uk-2005") 57 | save_csr_matrix("uk-2005") 58 | print("twitter-2010") 59 | save_csr_matrix("twitter-2010") 60 | # print("sk-2005") 61 | # save_csr_matrix("sk-2005") 62 | # print("friendster") 63 | # save_csr_matrix("friendster") -------------------------------------------------------------------------------- /scripts/trans2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Description: 3 | Date: 2020-12-16 11:06:22 4 | LastEditors: PengyuWang 5 | LastEditTime: 2020-12-16 19:39:34 6 | FilePath: /sampling/scripts/trans2.py 7 | ''' 8 | import numpy as np 9 | import pandas as pd 10 | import scipy.sparse as ss 11 | 12 | def read_data_file_as_coo_matrix(filename='edges.txt'): 13 | "Read data file and return sparse matrix in coordinate format." 14 | data = pd.read_csv(filename, sep=' ', header=None, dtype=np.uint32) 15 | rows = data[0] # Not a copy, just a reference. 16 | cols = data[1] 17 | ones = np.ones(len(rows), np.uint32) 18 | matrix = ss.coo_matrix((ones, (rows, cols))) 19 | return matrix 20 | 21 | def save_csr_matrix(filename, matrix): 22 | """Save compressed sparse row (csr) matrix to file. 23 | 24 | Based on http://stackoverflow.com/a/8980156/232571 25 | 26 | """ 27 | assert filename.endswith('.npz') 28 | attributes = { 29 | 'data': matrix.data, 30 | 'indices': matrix.indices, 31 | 'indptr': matrix.indptr, 32 | 'shape': matrix.shape, 33 | } 34 | np.savez(filename, **attributes) 35 | 36 | def tx(filename): 37 | "Test data file parsing and matrix serialization." 38 | coo_matrix = read_data_file_as_coo_matrix("/home/pywang/data/" + filename+ ".w.edge") 39 | csr_matrix = coo_matrix.tocsr() 40 | save_csr_matrix("/home/pywang/data/" + filename+ ".w.npz", csr_matrix) 41 | 42 | if __name__ == '__main__': 43 | print("uk-2005") 44 | tx("uk-2005") 45 | # print("twitter-2010") 46 | # tx("twitter-2010") 47 | 48 | # read_data_file_as_coo_matrix() 49 | # read_weighted_edgelist 50 | # G= networkit.graphio.readGraph("/home/pywang/data/lj.w.edge", networkit.Format.EdgeList, separator=" ", continuous=False) 51 | import scipy as sp 52 | import networkx as nx 53 | 54 | 55 | 56 | # def save_csr_matrix(filename): 57 | # G=nx.read_weighted_edgelist("/home/pywang/data/" + filename+ ".w.edge") 58 | # S=nx.to_scipy_sparse_matrix(G) 59 | # sp.sparse.save_npz("/home/pywang/data/" + filename+ ".w.npz", S) 60 | 61 | # save_csr_matrix("orkut") 62 | 63 | 64 | # print("sk-2005") 65 | # save_csr_matrix("sk-2005") 66 | # print("friendster") 67 | # save_csr_matrix("friendster") -------------------------------------------------------------------------------- /tools/gr2npz.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * @Description: 3 | * @Date: 2020-12-24 14:04:06 4 | * @LastEditors: PengyuWang 5 | * @LastEditTime: 2020-12-24 14:22:35 6 | * @FilePath: /sampling/tools/gr2npz.cu 7 | */ 8 | // https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include // for myPyObject.cast>() 15 | #include 16 | #include "graph.cuh" 17 | 18 | 19 | namespace py = pybind11; 20 | 21 | int main() { 22 | py::scoped_interpreter guard{}; 23 | 24 | py::module np = py::module::import("numpy"); 25 | // py::object random = np.attr("random"); 26 | // py::module scipy = py::module::import("scipy.optimize"); 27 | 28 | // Load created module containing f_a(x) = a*x^2 29 | // py::module myModule = py::module::import("MyPythonModule.MyFunctionality"); 30 | 31 | // Create some data for fitting 32 | std::vector xValues(11, 0); 33 | std::vector yValues(11, 0); 34 | for (int i = -5; i < 6; ++i) { 35 | xValues[i + 5] = i; 36 | yValues[i + 5] = i * i; 37 | } 38 | 39 | // Cast data to numpy arrays 40 | py::array_t pyXValues = py::cast(xValues); 41 | py::array_t pyYValues = py::cast(yValues); 42 | 43 | // The return value contains the optimal values and the covariance matrix. 44 | // Get the optimal values 45 | py::object optVals = retVals.attr("__getitem__")(0); 46 | 47 | // Cast return value back to std::vector and show the result 48 | std::vector retValsStd = optVals.cast>(); 49 | std::cout << "Fitted parameter a = " << retValsStd[0] << std::endl; 50 | 51 | return 0; 52 | } 53 | 54 | py::array_t my_fft1d_complex(py::array_t<> input) { 55 | 56 | if (input.ndim() != 1) 57 | throw std::runtime_error("input dim must be 1"); 58 | 59 | vector> in, out; 60 | auto r1 = input.unchecked<1>(); 61 | for (int i = 0; i < input.size(); i++) 62 | { 63 | in.push_back(r1(i)); 64 | } 65 | 66 | fft1d(in, out, in.size()); 67 | 68 | py::array_t> result(out.size()); 69 | auto r2 = result.mutable_unchecked<1>(); 70 | 71 | for (int i = 0; i < out.size(); i++) 72 | { 73 | r2(i) = out[i]; 74 | } 75 | 76 | return result; 77 | 78 | } 79 | -------------------------------------------------------------------------------- /figs/offline.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 15:49:20 7 | # @FilePath: /skywalker/figs/offline.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--deepwalk " 23 | SP="--sage " 24 | BATCH="--n 40000 " 25 | LOG_FILE="offline.csv" 26 | 27 | 28 | 29 | 30 | 31 | # --randomweight=1 --weightrange=2 32 | 33 | # echo "-------------------------------------------------------offline rw 100 ${BATCH}" >> ${LOG_FILE} 34 | # for idx in $(seq 1 ${#DATA[*]}) 35 | # do 36 | # for i in $(seq 1 ${ITR}) 37 | # do 38 | # ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 39 | # done 40 | # done 41 | 42 | # echo "-------------------------------------------------------offline ppr 0.15 ${BATCH}" >> ${LOG_FILE} 43 | # for idx in $(seq 1 ${#DATA[*]}) 44 | # do 45 | # for i in $(seq 1 ${ITR}) 46 | # do 47 | # ./bin/main -bias=1 --ol=0 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> ${LOG_FILE} 48 | # done 49 | # done 50 | 51 | 52 | # echo "-------------------------------------------------------offline sp 100${BATCH}" >> ${LOG_FILE} 53 | # for idx in $(seq 1 ${#DATA[*]}) 54 | # do 55 | # for i in $(seq 1 ${ITR}) 56 | # do 57 | # ./bin/main -bias=1 --ol=0 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 58 | # done 59 | # done 60 | 61 | echo "-------------------------------------------------------offline sp 20 20 ${BATCH}" >> ${LOG_FILE} 62 | for idx in $(seq 1 ${#DATA[*]}) 63 | do 64 | for i in $(seq 1 ${ITR}) 65 | do 66 | ./bin/main -bias=1 --ol=0 ${SG} --rw=0 --k=20 --d=2 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 67 | done 68 | done 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /figs/online.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 16:43:38 7 | # @FilePath: /skywalker/figs/online.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--rw=1 --k 1 --d 100 " 23 | SP="--rw=0 --k 20 --d 2 " 24 | BATCH="--n 40000 " 25 | POLICY="--static=0" 26 | OUTPUT=" online.csv " 27 | 28 | # --randomweight=1 --weightrange=2 29 | 30 | # echo "-------------------------------------------------------online rw 100 ${POLICY} " >> ${OUTPUT} 31 | # for idx in $(seq 1 ${#DATA[*]}) 32 | # do 33 | # for i in $(seq 1 ${ITR}) 34 | # do 35 | # ./bin/main -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${POLICY} >> ${OUTPUT} 36 | # done 37 | # done 38 | 39 | # echo "-------------------------------------------------------online ppr 0.15" >> online.csv 40 | # for idx in $(seq 1 ${#DATA[*]}) 41 | # do 42 | # for i in $(seq 1 ${ITR}) 43 | # do 44 | # ./bin/main -bias=1 --ol=1 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> online.csv 45 | # done 46 | # done 47 | 48 | echo "-------------------------------------------------------node2vec ${POLICY} " >> ${OUTPUT} 49 | for idx in $(seq 1 ${#DATA[*]}) 50 | do 51 | for i in $(seq 1 ${ITR}) 52 | do 53 | ./bin/node2vec -node2vec ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} ${POLICY} >> ${OUTPUT} 54 | done 55 | done 56 | 57 | # echo "-------------------------------------------------------online sp 100 ${POLICY} " >> ${OUTPUT} 58 | # for idx in $(seq 1 ${#DATA[*]}) 59 | # do 60 | # for i in $(seq 1 ${ITR}) 61 | # do 62 | # ./bin/main -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${POLICY} >> ${OUTPUT} 63 | # done 64 | # done 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /scripts/csaw.sh: -------------------------------------------------------------------------------- 1 | 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-25 16:31:37 5 | # @LastEditors: PengyuWang 6 | # @LastEditTime: 2021-01-11 11:31:12 7 | # @FilePath: /skywalker/scripts/csaw.sh 8 | ### 9 | DATA=( web-Google lj orkut arabic-2005 uk-2005 ) # 10 | # DATA=( uk-2005 sk-2005 friendster) 11 | cd /home/pywang/sampling/C-SAW/non-stream 12 | 13 | # ./sampling.bin wg ~/data/soc-LiveJournal1.txt_beg_pos.bin ~/data/soc-LiveJournal1.txt_csr.bin 100 32 4000 1 2 2 1 14 | # ./sampling.bin wg ~/data/lj.w.edge_beg_pos.bin ~/data/lj.w.edge_csr.bin 100 32 4000 1 1 100 1 15 | # ./sampling.bin wg ~/data/lj.w.edge_beg_pos.bin ~/data/lj.w.edge_csr.bin 100 32 4000 1 2 2 1 16 | 17 | # ./sampling.bin wg ~/data/orkut.w.edge_beg_pos.bin ~/data/orkut.w.edge_csr.bin 100 32 4000 1 1 100 1 18 | # ./sampling.bin wg ~/data/orkut.w.edge_beg_pos.bin ~/data/orkut.w.edge_csr.bin 100 32 4000 1 2 2 1 19 | 20 | # ./sampling.bin wg ~/data/uk-2005.w.edge_beg_pos.bin ~/data/uk-2005.w.edge_csr.bin 100 32 4000 1 1 100 1 21 | # ./sampling.bin wg ~/data/uk-2005.w.edge_beg_pos.bin ~/data/uk-2005.w.edge_csr.bin 100 32 4000 1 2 2 1 22 | 23 | echo "----------------------biased walk 4k 64-------------------" 24 | for idx in $(seq 1 ${#DATA[*]}) 25 | do 26 | echo ${DATA[idx-1]} 27 | /home/pywang/sampling/C-SAW/non-stream/sampling.bin wg ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 1 100 1 28 | done 29 | 30 | # echo "----------------------sampling biased 4k 20 2 64-------------------" 31 | # for idx in $(seq 1 ${#DATA[*]}) 32 | # do 33 | # echo ${DATA[idx-1]} 34 | # /home/pywang/sampling/C-SAW/non-stream/sampling.bin wg ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 20 2 1 35 | # done 36 | 37 | # echo "----------------------biased walk 4k 64-------------------" 38 | # for idx in $(seq 1 ${#DATA[*]}) 39 | # do 40 | # echo ${DATA[idx-1]} 41 | # /home/pywang/sampling/C-SAW/streaming/streaming.bin wg ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 1 100 1 42 | # done 43 | 44 | # echo "----------------------sampling biased 4k 20 2 64-------------------" 45 | # for idx in $(seq 1 ${#DATA[*]}) 46 | # do 47 | # echo ${DATA[idx-1]} 48 | # /home/pywang/sampling/C-SAW/streaming/streaming.bin wg ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 20 2 1 49 | # done -------------------------------------------------------------------------------- /old/shmem/util.cu: -------------------------------------------------------------------------------- 1 | #include "util.cuh" 2 | 3 | // __device__ char char_atomicCAS(char *addr, char cmp, char val) { 4 | // unsigned *al_addr = reinterpret_cast(((unsigned long long)addr) & 5 | // (0xFFFFFFFFFFFFFFFCULL)); 6 | // unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8; 7 | // unsigned mask = 0xFFU; 8 | // mask <<= al_offset; 9 | // mask = ~mask; 10 | // unsigned sval = val; 11 | // sval <<= al_offset; 12 | // unsigned old = *al_addr, assumed, setval; 13 | // do { 14 | // assumed = old; 15 | // setval = assumed & mask; 16 | // setval |= sval; 17 | // old = atomicCAS(al_addr, assumed, setval); 18 | // } while (assumed != old); 19 | // return (char)((assumed >> al_offset) & 0xFFU); 20 | // } 21 | 22 | 23 | // template 24 | // __inline__ __device__ T warpPrefixSum(T val, int lane_id) { 25 | // T val_shuffled; 26 | // for (int offset = 1; offset < warpSize; offset *= 2) { 27 | // val_shuffled = __shfl_up(val, offset); 28 | // if (lane_id >= offset) { 29 | // val += val_shuffled; 30 | // } 31 | // } 32 | // return val; 33 | // } 34 | __device__ void active_size(int n=0) { 35 | coalesced_group active = coalesced_threads(); 36 | if (active.thread_rank() == 0) 37 | printf("coalesced_group %d at line %d\n", active.size(),n); 38 | } 39 | template void printH(T *ptr, int size) { 40 | T *ptrh = new T[size]; 41 | HERR(cudaMemcpy(ptrh, ptr, size * sizeof(T), cudaMemcpyDeviceToHost)); 42 | printf("printH: "); 43 | for (size_t i = 0; i < size; i++) { 44 | // printf("%d\t", ptrh[i]); 45 | std::cout << ptrh[i] << "\t"; 46 | } 47 | printf("\n"); 48 | delete ptrh; 49 | } 50 | __device__ void printD(float *ptr, int size) { 51 | printf("printDf: size%d, ", size); 52 | for (size_t i = 0; i < size; i++) { 53 | printf("%f\t", ptr[i]); 54 | } 55 | printf("\n"); 56 | } 57 | __device__ void printD(int *ptr, int size) { 58 | printf("printDi: size%d, ", size); 59 | for (size_t i = 0; i < size; i++) { 60 | printf("%d\t", ptr[i]); 61 | } 62 | printf("\n"); 63 | } 64 | // template __global__ void init_range_d(T *ptr, size_t size) { 65 | // if (TID < size) { 66 | // ptr[TID] = TID; 67 | // } 68 | // } 69 | // template void init_range(T *ptr, size_t size) { 70 | // init_range_d<<>>(ptr, size); 71 | // } 72 | // template __global__ void init_array_d(T *ptr, size_t size, T v) { 73 | // if (TID < size) { 74 | // ptr[TID] = v; 75 | // } 76 | // } 77 | // template void init_array(T *ptr, size_t size, T v) { 78 | // init_array_d<<>>(ptr, size, v); 79 | // } 80 | -------------------------------------------------------------------------------- /old/shmem/util.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | // #include 4 | // #include 5 | #include 6 | #include 7 | #include 8 | using namespace cooperative_groups; 9 | #include 10 | #include 11 | #include 12 | #define u64 unsigned long long int 13 | #define TID (threadIdx.x + blockIdx.x * blockDim.x) 14 | #define LID (threadIdx.x % 32) 15 | #define WID (threadIdx.x / 32) 16 | #define MIN(x, y) ((x < y) ? x : y) 17 | #define MAX(x, y) ((x > y) ? x : y) 18 | #define P printf("%d\n", __LINE__) 19 | #define HERR(ans) \ 20 | { gpuAssert((ans), __FILE__, __LINE__); } 21 | inline void gpuAssert(cudaError_t code, const char *file, int line, 22 | bool abort = true) { 23 | if (code != cudaSuccess) { 24 | fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, 25 | line); 26 | if (abort) 27 | exit(code); 28 | } 29 | } 30 | __device__ void active_size(int n); 31 | 32 | 33 | // __device__ char char_atomicCAS(char *addr, char cmp, char val) { 34 | // unsigned *al_addr = reinterpret_cast(((unsigned long long)addr) 35 | // & 36 | // (0xFFFFFFFFFFFFFFFCULL)); 37 | // unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8; 38 | // unsigned mask = 0xFFU; 39 | // mask <<= al_offset; 40 | // mask = ~mask; 41 | // unsigned sval = val; 42 | // sval <<= al_offset; 43 | // unsigned old = *al_addr, assumed, setval; 44 | // do { 45 | // assumed = old; 46 | // setval = assumed & mask; 47 | // setval |= sval; 48 | // old = atomicCAS(al_addr, assumed, setval); 49 | // } while (assumed != old); 50 | // return (char)((assumed >> al_offset) & 0xFFU); 51 | // } 52 | 53 | // template 54 | // __inline__ __device__ T warpPrefixSum(T val, int lane_id) { 55 | // T val_shuffled; 56 | // for (int offset = 1; offset < warpSize; offset *= 2) { 57 | // val_shuffled = __shfl_up(val, offset); 58 | // if (lane_id >= offset) { 59 | // val += val_shuffled; 60 | // } 61 | // } 62 | // return val; 63 | // } 64 | #define FULL_MASK 0xffffffff 65 | 66 | template 67 | __inline__ __device__ T warpReduce(T val, int lane_id) { 68 | // T val_shuffled; 69 | for (int offset = 16; offset > 0; offset /= 2) 70 | val += __shfl_down_sync(FULL_MASK, val, offset); 71 | return val; 72 | } 73 | 74 | template void printH(T *ptr, int size); 75 | __device__ void printD(float *ptr, int size); 76 | __device__ void printD(int *ptr, int size); 77 | // template __global__ void init_range_d(T *ptr, size_t size); 78 | // template void init_range(T *ptr, size_t size); 79 | // template __global__ void init_array_d(T *ptr, size_t size, T v); 80 | // template void init_array(T *ptr, size_t size, T v); 81 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "*.cu": "cpp", 4 | "*.cuh": "cpp", 5 | "*.tcu": "cpp", 6 | "array": "cpp", 7 | "string": "cpp", 8 | "string_view": "cpp", 9 | "*.tcc": "cpp", 10 | "atomic": "cpp", 11 | "bitset": "cpp", 12 | "initializer_list": "cpp", 13 | "utility": "cpp", 14 | "memory_resource": "cpp", 15 | "chrono": "cpp", 16 | "algorithm": "cpp", 17 | "cctype": "cpp", 18 | "clocale": "cpp", 19 | "cmath": "cpp", 20 | "cstdarg": "cpp", 21 | "cstddef": "cpp", 22 | "cstdio": "cpp", 23 | "cstdlib": "cpp", 24 | "cstring": "cpp", 25 | "ctime": "cpp", 26 | "cwchar": "cpp", 27 | "cwctype": "cpp", 28 | "strstream": "cpp", 29 | "complex": "cpp", 30 | "cstdint": "cpp", 31 | "deque": "cpp", 32 | "list": "cpp", 33 | "unordered_map": "cpp", 34 | "vector": "cpp", 35 | "exception": "cpp", 36 | "functional": "cpp", 37 | "optional": "cpp", 38 | "ratio": "cpp", 39 | "system_error": "cpp", 40 | "tuple": "cpp", 41 | "type_traits": "cpp", 42 | "fstream": "cpp", 43 | "iomanip": "cpp", 44 | "iosfwd": "cpp", 45 | "iostream": "cpp", 46 | "istream": "cpp", 47 | "limits": "cpp", 48 | "memory": "cpp", 49 | "new": "cpp", 50 | "ostream": "cpp", 51 | "numeric": "cpp", 52 | "sstream": "cpp", 53 | "stdexcept": "cpp", 54 | "streambuf": "cpp", 55 | "thread": "cpp", 56 | "cfenv": "cpp", 57 | "cinttypes": "cpp", 58 | "typeindex": "cpp", 59 | "typeinfo": "cpp", 60 | "iterator": "cpp", 61 | "map": "cpp", 62 | "random": "cpp", 63 | "set": "cpp", 64 | "forward_list": "cpp", 65 | "unordered_set": "cpp", 66 | "cerrno": "cpp", 67 | "condition_variable": "cpp", 68 | "mutex": "cpp", 69 | "regex": "cpp", 70 | "codecvt": "cpp", 71 | "filesystem": "cpp", 72 | "valarray": "cpp", 73 | "variant": "cpp", 74 | "__nullptr": "cpp", 75 | "ios": "cpp", 76 | "locale": "cpp", 77 | "__locale": "cpp", 78 | "__config": "cpp", 79 | "__string": "cpp", 80 | "cassert": "cpp", 81 | "ccomplex": "cpp", 82 | "cfloat": "cpp", 83 | "ciso646": "cpp", 84 | "climits": "cpp", 85 | "queue": "cpp", 86 | "stack": "cpp", 87 | "cstdbool": "cpp", 88 | "__threading_support": "cpp", 89 | "__hash_table": "cpp", 90 | "__split_buffer": "cpp", 91 | "__tree": "cpp", 92 | "*.ipp": "cpp", 93 | "future": "cpp", 94 | "slist": "cpp", 95 | "__bit_reference": "cpp", 96 | "cross_module_gil_utils.cpp": "cuda-cpp", 97 | "charconv": "cpp", 98 | "*.inc": "cpp" 99 | } 100 | } -------------------------------------------------------------------------------- /figs/unbiased.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-17 21:38:38 7 | # @FilePath: /skywalker/figs/unbiased.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--rw=1 --k 1 --d 100 " 23 | SP="--rw=0 --k 20 --d 2 " 24 | BATCH="--n=40000" 25 | OUT='>> unbiased.csv' 26 | 27 | # --randomweight=1 --weightrange=2 28 | 29 | 30 | # echo "-------------------------------------------------------unbias rw 100 full" >> unbiased.csv 31 | # for idx in $(seq 1 ${#DATA[*]}) 32 | # do 33 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} ${SG} ${RW} -full >> unbiased.csv 34 | # done 35 | # walker 36 | # echo "-------------------------------------------------------unbias rw 100" >> unbiased.csv 37 | # for idx in $(seq 1 ${#DATA[*]}) 38 | # do 39 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 ${RW} ${BATCH} >> unbiased.csv 40 | # done 41 | 42 | # echo "-------------------------------------------------------unbias ppr 100" >> unbiased.csv 43 | # for idx in $(seq 1 ${#DATA[*]}) 44 | # do 45 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 --tp=0.15 ${RW} ${BATCH} >> unbiased.csv 46 | # done 47 | 48 | # echo "-------------------------------------------------------unbias node2vec" >> unbiased.csv 49 | # for idx in $(seq 1 ${#DATA[*]}) 50 | # do 51 | # ./bin/main --bias=0 --ol=0 --buffer --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 --node2vec ${BATCH} >> unbiased.csv 52 | # done 53 | # echo "-------------------------------------------------------unbias node2vec full" >> unbiased.csv 54 | # for idx in $(seq 1 ${#DATA[*]}) 55 | # do 56 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 --node2vec --full >> unbiased.csv 57 | # done 58 | echo "-------------------------------------------------------unbias sage 40k" >> unbiased.csv 59 | for idx in $(seq 1 ${#DATA[*]}) 60 | do 61 | ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 --sage ${BATCH} >> unbiased.csv 62 | done 63 | # echo "-------------------------------------------------------unbias sage" >> unbiased.csv 64 | # for idx in $(seq 1 ${#DATA[*]}) 65 | # do 66 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 --sage ${BATCH} >> unbiased.csv 67 | # done 68 | 69 | # echo "-------------------------------------------------------unbias sp" >> unbiased.csv 70 | # for idx in $(seq 1 ${#DATA[*]}) 71 | # do 72 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 ${SP} ${BATCH} --m=2 >> unbiased2.csv 73 | # done 74 | 75 | # echo "-------------------------------------------------------unbias sp" >> unbiased.csv 76 | # for idx in $(seq 1 ${#DATA[*]}) 77 | # do 78 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu 1 ${SP} ${BATCH} --m=3>> unbiased3.csv 79 | # done 80 | -------------------------------------------------------------------------------- /old/shmem/vec.cuh: -------------------------------------------------------------------------------- 1 | #include "util.cuh" 2 | 3 | #define SHMEM_SIZE 49152 4 | 5 | #define BLOCK_SIZE 1024 6 | #define WARP_PER_SM (BLOCK_SIZE / 32) 7 | 8 | #define SHMEM_PER_WARP (SHMEM_SIZE / WARP_PER_SM) 9 | 10 | #define TMP_PER_ELE (4 + 4 + 4 + 4 + 1) 11 | 12 | // alignment 13 | #define ELE_PER_WARP (SHMEM_PER_WARP / TMP_PER_ELE - 8) 14 | 15 | template class Vector_itf { 16 | public: 17 | Vector_itf() {} 18 | ~Vector_itf() {} 19 | virtual void init() {} 20 | virtual void add() {} 21 | virtual void clean() {} 22 | virtual bool empty() {} 23 | virtual size_t size() {} 24 | virtual T &operator[](int id) {} 25 | }; 26 | 27 | template struct buf { T data[ELE_PER_WARP]; }; 28 | 29 | 30 | 31 | template struct Vector_shmem { 32 | u64 size = 0; 33 | u64 capacity = ELE_PER_WARP; 34 | T data[ELE_PER_WARP]; 35 | 36 | __device__ void Init(size_t s = 0) { 37 | if (LID == 0) { 38 | capacity = ELE_PER_WARP; 39 | size = s; 40 | } 41 | for (size_t i = LID; i < capacity; i += 32) { 42 | data[i] = 0; 43 | } 44 | } 45 | __device__ u64 &Size() { return size; } 46 | __device__ void Add(T t) { 47 | u64 old = atomicAdd(&size, 1); 48 | if (old < capacity) 49 | data[old] = t; 50 | else 51 | printf("Vector_shmem overflow %llu\n", capacity); 52 | } 53 | __device__ void Clean() { size = 0; } 54 | __device__ bool Empty() { 55 | if (size == 0) 56 | return true; 57 | return false; 58 | } 59 | __device__ T &operator[](int id) { return data[id]; } 60 | }; 61 | 62 | // template __global__ void myMemsetKernel(T *ptr, size_t size){ 63 | // for (size_t i = TID; i < size; i+=BLOCK_SIZE) 64 | // { 65 | // ptr[i]= 66 | // } 67 | 68 | // } 69 | 70 | // template void myMemset(T *ptr, size_t size){ 71 | 72 | // } 73 | 74 | template class Vector { 75 | public: 76 | u64 *size; 77 | u64 *capacity; 78 | T *data = nullptr; 79 | bool use_self_buffer = false; 80 | // T data[VECTOR_SHMEM_SIZE]; 81 | 82 | __host__ Vector() {} 83 | __host__ void free() { 84 | if (use_self_buffer && data != nullptr) 85 | cudaFree(data); 86 | } 87 | __device__ __host__ ~Vector() {} 88 | __host__ void init(int _capacity) { 89 | cudaMallocManaged(&size, sizeof(u64)); 90 | cudaMallocManaged(&capacity, sizeof(u64)); 91 | *capacity = _capacity; 92 | *size = 0; 93 | // init_array(capacity,1,_capacity); 94 | // init_array(capacity,1,_capacity); 95 | cudaMalloc(&data, _capacity * sizeof(T)); 96 | use_self_buffer = true; 97 | } 98 | __host__ __device__ u64 &Size() { return *size; } 99 | __device__ void add(T t) { 100 | u64 old = atomicAdd(size, 1); 101 | if (old < *capacity) 102 | data[old] = t; 103 | else 104 | printf("wtf vector overflow"); 105 | } 106 | __device__ void AddTillSize(T t, u64 target_size) { 107 | u64 old = atomicAdd(size, 1); 108 | if (old < *capacity) { 109 | if (old < target_size) 110 | data[old] = t; 111 | } else 112 | printf("wtf vector overflow"); 113 | } 114 | __device__ void clean() { *size = 0; } 115 | __device__ bool empty() { 116 | if (*size == 0) 117 | return true; 118 | return false; 119 | } 120 | __device__ T &operator[](int id) { return data[id]; } 121 | }; -------------------------------------------------------------------------------- /figs/test_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ONLINE=false 4 | BIAS=false 5 | FULL=false 6 | STATIC=false 7 | BUFFER=false 8 | for i in "$@" 9 | do 10 | case $i in 11 | -app=*|--app=*) 12 | APP="${i#*=}" 13 | shift # past argument=value 14 | # ;; 15 | # -s=*|--searchpath=*) 16 | # SEARCHPATH="${i#*=}" 17 | # shift # past argument=value 18 | # ;; 19 | # -l=*|--lib=*) 20 | # LIBPATH="${i#*=}" 21 | # shift # past argument=value 22 | ;; 23 | -online) 24 | ONLINE=true 25 | shift # past argument with no value 26 | ;; 27 | -bias) 28 | BIAS=true 29 | shift # past argument with no value 30 | ;; 31 | -full) 32 | FULL=true 33 | shift # past argument with no value 34 | ;; 35 | -static) 36 | STATIC=true 37 | shift # past argument with no value 38 | ;; 39 | -buffer) 40 | BUFFER=true 41 | shift # past argument with no value 42 | ;; 43 | *) 44 | # unknown option 45 | ;; 46 | esac 47 | done 48 | # echo "APP = ${APP}" 49 | # echo "SEARCH PATH = ${SEARCHPATH}" 50 | # echo "LIBRARY PATH = ${LIBPATH}" 51 | # echo "DEFAULT = ${DEFAULT}" 52 | # echo "Number files in SEARCH PATH with EXTENSION:" $(ls -1 "${SEARCHPATH}"/*."${EXTENSION}" | wc -l) 53 | # if [[ -n $1 ]]; then 54 | # echo "Last line of file specified as non-opt/last argument:" 55 | # tail -1 $1 56 | # fi 57 | echo ${BIN} 58 | BIN="./bin/main" 59 | if [ ${APP} = "node2vec" ] 60 | then 61 | if ${BIAS} 62 | then 63 | BIN="./bin/node2vec" 64 | fi 65 | fi 66 | # echo ${BIN} 67 | # if [ ${APP}="node2vec" ] && ${BIAS} ; then 68 | # BIN="./bin/node2vec" 69 | # else 70 | # BIN="./bin/main" 71 | # fi 72 | 73 | if ${BIAS} ; then 74 | BIN=${BIN}" -bias=1 " 75 | else 76 | BIN=${BIN}" -bias=0 " 77 | fi 78 | 79 | if ${ONLINE} ; then 80 | BIN=${BIN}" -ol=1 " 81 | else 82 | BIN=${BIN}" -ol=0 " 83 | fi 84 | 85 | if ${FULL} ; then 86 | BIN=${BIN}" --full " 87 | else 88 | BIN=${BIN}" --n 40000 " 89 | fi 90 | if ${STATIC} ; then 91 | BIN=${BIN}" --static=1 " 92 | else 93 | BIN=${BIN}" --static=0 " 94 | fi 95 | if ${BUFFER} ; then 96 | BIN=${BIN}" --buffer=1 " 97 | else 98 | BIN=${BIN}" --buffer=0 " 99 | fi 100 | 101 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 102 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 103 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 104 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 105 | 106 | # DATA=( sk-2005 friendster) 107 | # HD=( 4 1 ) 108 | ITR=1 109 | NG=4 #8 110 | 111 | GR=".w.gr" 112 | EXE="./bin/main" #main_degree 113 | SG="--ngpu=1 --s" 114 | RW="--deepwalk " 115 | SP="--sage " 116 | # BATCH="--n 40000 " 117 | LOG_FILE="offline.csv" 118 | 119 | # echo "-------------------------------------------------------offline rw 100 ${BATCH}" >> ${LOG_FILE} 120 | echo "-------------------------------------------------------${APP} ${BIN} BIAS=${BIAS} ONLINE=${ONLINE} FULL=${FULL}------------" 121 | for idx in $(seq 1 ${#DATA[*]}) 122 | do 123 | for i in $(seq 1 ${ITR}) 124 | do 125 | ${BIN} ${SG} --${APP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} # >> ${LOG_FILE} 126 | done 127 | done -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(Skywalker CXX ) 2 | cmake_minimum_required ( VERSION 3.17 ) 3 | 4 | set ( CUDA_ENABLE true ) 5 | if ( CUDA_ENABLE ) 6 | enable_language( CUDA ) 7 | list ( APPEND PRJ_COMPILE_OPTIONS -Xcompiler ) 8 | endif() 9 | 10 | find_package ( OpenMP ) 11 | if ( OpenMP_FOUND ) 12 | list ( APPEND PRJ_COMPILE_DEF ENABLE_OPENMP ) 13 | list ( APPEND PRJ_LIBRARIES ${OpenMP_CXX_LIBRARIES} ) 14 | list ( APPEND PRJ_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS} ) 15 | endif () 16 | 17 | # set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 18 | # find_package ( Numa REQUIRED) 19 | 20 | add_subdirectory(deps/gflags) 21 | include_directories(${PROJECT_BINARY_DIR}/deps/gflags/include) 22 | link_directories(${PROJECT_BINARY_DIR}/deps/gflags/lib) 23 | 24 | ############## BUILD ############## 25 | set(EXTRA_LIBS gflags) 26 | 27 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -rdc=true -res-usage -lnuma -fopenmp -Xptxas -v ") 28 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 29 | message("Debug mode") 30 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -O0 -Xcompiler -ggdb -lnvrt -Woverloaded-virtual") 31 | else() 32 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo -O3 -Xcompiler -DNDEBUG ") 33 | endif() 34 | 35 | # -gencode arch=compute_75,code=sm_75 -std=c++11 36 | 37 | #SET(CUDA_SEPARABLE_COMPILATION ON) 38 | #set(CUDA_VERBOSE_BUILD ON) 39 | 40 | include_directories(include) 41 | aux_source_directory(src DIR_SRCS) 42 | # aux_source_directory(src/util DIR_SRCS) 43 | 44 | if(NOT DEFINED CMAKE_CUDA_STANDARD) 45 | set(CMAKE_CUDA_STANDARD 11) 46 | set(CMAKE_CUDA_STANDARD_REQUIRED ON) 47 | endif() 48 | 49 | if (NOT DEFINED CUDA_SEPARABLE_COMPILATION) 50 | set(CUDA_SEPARABLE_COMPILATION ON) 51 | endif(NOT DEFINED CUDA_SEPARABLE_COMPILATION) 52 | 53 | 54 | add_executable(skywalker 55 | ${DIR_SRCS} 56 | src/api/bias_static.cu 57 | ) 58 | 59 | 60 | target_link_libraries(skywalker 61 | ${EXTRA_LIBS} 62 | ${CUDA_LIBRARIES} 63 | ) 64 | if(OpenMP_CXX_FOUND) 65 | target_link_libraries(skywalker OpenMP::OpenMP_CXX) 66 | endif() 67 | set_target_properties(skywalker PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) 68 | set_target_properties(skywalker PROPERTIES COMPILE_DEFINITIONS LOCALITY) 69 | 70 | add_executable(skywalker_noloc 71 | ${DIR_SRCS} 72 | src/api/bias_static.cu 73 | ) 74 | target_link_libraries(skywalker_noloc 75 | ${EXTRA_LIBS} 76 | ${CUDA_LIBRARIES} 77 | ) 78 | if(OpenMP_CXX_FOUND) 79 | target_link_libraries(skywalker_noloc OpenMP::OpenMP_CXX) 80 | endif() 81 | set_target_properties(skywalker_noloc PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) 82 | 83 | 84 | add_executable(skywalker_node2vec 85 | ${DIR_SRCS} 86 | src/api/bias_node2vec.cu 87 | ) 88 | target_link_libraries(skywalker_node2vec 89 | ${EXTRA_LIBS} 90 | ${CUDA_LIBRARIES} 91 | ) 92 | if(OpenMP_CXX_FOUND) 93 | target_link_libraries(skywalker_node2vec OpenMP::OpenMP_CXX) 94 | endif() 95 | set_target_properties(skywalker_node2vec PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) 96 | set_target_properties(skywalker_node2vec PROPERTIES COMPILE_DEFINITIONS LOCALITY) 97 | 98 | set_property(TARGET skywalker PROPERTY CUDA_ARCHITECTURES 75) 99 | set_property(TARGET skywalker skywalker_noloc CUDA_ARCHITECTURES 75) 100 | set_property(TARGET skywalker_node2vec PROPERTY CUDA_ARCHITECTURES 75) 101 | 102 | # add_executable(skywalker_degree 103 | # ${DIR_SRCS} 104 | # src/api/bias_degree.cu 105 | # ) 106 | # target_link_libraries(skywalker_degree 107 | # ${EXTRA_LIBS} 108 | # ${CUDA_LIBRARIES} 109 | # ) 110 | # if(OpenMP_CXX_FOUND) 111 | # target_link_libraries(skywalker_degree OpenMP::OpenMP_CXX) 112 | # endif() 113 | # set_target_properties(skywalker_degree PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) 114 | 115 | 116 | -------------------------------------------------------------------------------- /old/alias.cu: -------------------------------------------------------------------------------- 1 | #include "alias.cuh" 2 | #include 3 | #include 4 | #include 5 | 6 | template __global__ void init_array_d(T *ptr, size_t size, T v) { 7 | if (TID < size) { 8 | ptr[TID] = v; 9 | } 10 | } 11 | template void init_array(T *ptr, size_t size, T v) { 12 | init_array_d<<>>(ptr, size, v); 13 | } 14 | template __global__ void init_range_d(T *ptr, size_t size) { 15 | if (TID < size) { 16 | ptr[TID] = TID; 17 | } 18 | } 19 | template void init_range(T *ptr, size_t size) { 20 | init_range_d<<>>(ptr, size); 21 | } 22 | 23 | template 24 | __global__ void init(alias_table *table, T *buf1, T *buf2, T *buf3, 25 | float *buf4, float *buf5, char *buf6, T *buf7, int size, 26 | int size2) { 27 | if (TID == 0) { 28 | printf("init\n"); 29 | table->init_buffer(buf1, buf2, buf3, buf4, buf5, buf6, buf7, size, size2); 30 | } 31 | } 32 | template 33 | __global__ void load(alias_table *table, T *buf1, float *weight, int size) { 34 | // if (TID == 0) { 35 | // for (int i = 0; i < size; i++) { 36 | // printf("%f\t", weight[i]); 37 | // } 38 | // printf("\n"); 39 | // } 40 | if (TID == 0) { 41 | printf("load\n"); 42 | } 43 | table->load(buf1, weight, size); 44 | } 45 | template __global__ void kernel(alias_table *table) { 46 | if (TID == 0) { 47 | printf("kernel\n"); 48 | } 49 | table->normalize(); 50 | if (TID == 0) { 51 | printf("construct\n"); 52 | } 53 | table->construct(); 54 | } 55 | template __global__ void roll(alias_table *table, size_t num) { 56 | if (TID == 0) { 57 | printf("roll\n"); 58 | } 59 | // curandState state; 60 | // curand_init(0, TID, 0, &state); 61 | table->roll(&table->result, num); 62 | } 63 | 64 | // todo 65 | /* 66 | 1. prefix sum to normalize 67 | 2. 68 | */ 69 | int main(int argc, char const *argv[]) { 70 | int *buf1, *buf2, *buf3; 71 | float *buf4, *buf5; 72 | char *buf6; 73 | int *buf7; 74 | int size = 4000000; 75 | 76 | cudaSetDevice(1); 77 | cudaMalloc(&buf1, size * sizeof(int)); 78 | cudaMalloc(&buf2, size * sizeof(int)); 79 | cudaMalloc(&buf3, size * sizeof(int)); 80 | cudaMalloc(&buf4, size * sizeof(float)); 81 | cudaMalloc(&buf5, size * sizeof(float)); 82 | cudaMalloc(&buf6, size * sizeof(char)); 83 | cudaMalloc(&buf7, size / 2 * sizeof(int)); 84 | 85 | cudaMemset(buf6, size * sizeof(char), 0); 86 | 87 | int *id_ptr; 88 | float *weight_ptr; 89 | cudaMalloc(&id_ptr, size * sizeof(int)); 90 | cudaMalloc(&weight_ptr, size * sizeof(float)); 91 | init_range(id_ptr, size); 92 | init_array(weight_ptr, size / 8 * 7, 0.5); 93 | init_array(weight_ptr + size / 8 * 7, size - size / 8 * 7, 2.0); 94 | 95 | // printH(weight_ptr, size); 96 | 97 | P; 98 | alias_table *table_ptr; 99 | alias_table table_h; 100 | 101 | P; 102 | cudaMalloc(&table_ptr, 1 * sizeof(alias_table)); 103 | cudaMemcpy(table_ptr, &table_h, 1 * sizeof(alias_table), 104 | cudaMemcpyHostToDevice); 105 | P; 106 | init<<<1, 32, 0, 0>>>(table_ptr, buf1, buf2, buf3, buf4, buf5, buf6, 107 | buf7, size, size / 2); 108 | // table_ptr->init( buf1, buf2, buf3, buf4, buf5, size); 109 | // table_h.init( buf1, buf2, buf3, buf4, buf5, size); 110 | HERR(cudaPeekAtLastError()); 111 | P; 112 | load<<<1, 32, 0, 0>>>(table_ptr, id_ptr, weight_ptr, size); 113 | HERR(cudaPeekAtLastError()); 114 | P; 115 | kernel<<<1, 32, 0, 0>>>(table_ptr); 116 | P; 117 | roll<<<1, 32, 0, 0>>>(table_ptr, size / 2); 118 | P; 119 | usleep(5000); 120 | HERR(cudaDeviceSynchronize()); 121 | HERR(cudaPeekAtLastError()); 122 | return 0; 123 | } 124 | -------------------------------------------------------------------------------- /result/knightking.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2020-11-25 16:50:34 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-11 16:30:59 6 | # @FilePath: /skywalker/result/knightking.sh 7 | ### 8 | DATA=( web-Google lj orkut uk-2005 arabic-2005 sk-2005 friendster) # twitter-2010 uk-union rmat29 web-ClueWeb09) 9 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) #41652230 10 | # cd ../KnightKing/build 11 | # DATA=( lj ) # twitter-2010 uk-union rmat29 web-ClueWeb09) 12 | # NV=( 4847571 ) #41652230 13 | # DATA=( web-Google orkut arabic-2005 ) # twitter-2010 uk-union rmat29 web-ClueWeb09) 14 | # NV=(916428 3072627 22744077 ) #41652230 15 | 16 | # echo "----------------------unbiased 4k degree-------------------" 17 | # for idx in $(seq 1 ${#DATA[*]}) 18 | # do 19 | # echo ${DATA[idx-1]} 20 | # ~/sampling/KnightKing/build/bin/deepwalk -w 40000 -l 100 -s unweighted -g ~/data/${DATA[idx-1]}.uw.data -v ${NV[idx-1]} 21 | # done 22 | # echo "----------------------unbiased node2vec-------------------" 23 | # for idx in $(seq 1 ${#DATA[*]}) 24 | # do 25 | # echo ${DATA[idx-1]} 26 | # ~/sampling/KnightKing/build/bin/node2vec -w 40000 -l 100 -s unweighted -p 2.0 -q 0.5 -g ~/data/${DATA[idx-1]}.uw.data -v ${NV[idx-1]} 27 | # done 28 | 29 | # echo "----------------------ppr unbiased 40k 64-------------------" 30 | # for idx in $(seq 1 ${#DATA[*]}) 31 | # do 32 | # echo ${DATA[idx-1]} 33 | # ~/sampling/KnightKing/build/bin/ppr -w 40000 -s unweighted -t 0.15 -v ${NV[idx-1]} -g ~/data/${DATA[idx-1]}.uw.data 34 | # done 35 | 36 | 37 | # echo "----------------------simple_walk 4k-------------------" 38 | # for idx in $(seq 1 ${#DATA[*]}) 39 | # do 40 | # echo ${DATA[idx-1]} 41 | # ~/sampling/KnightKing/build/bin/simple_walk -g ~/sampling/KnightKing/build/${DATA[idx-1]}.data -v ${NV[idx-1]} -w 4000 -l 100 42 | # done 43 | 44 | # echo "----------------------online 40k-------------------" 45 | # echo "----------------------biased_walk 4k degree-------------------" 46 | # for idx in $(seq 1 ${#DATA[*]}) 47 | # do 48 | # echo ${DATA[idx-1]} 49 | # ~/sampling/KnightKing/build/bin/biased_walk -w 40000 -l 100 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 50 | # done 51 | # echo "----------------------ppr biased 4k 64-------------------" 52 | # for idx in $(seq 1 ${#DATA[*]}) 53 | # do 54 | # echo ${DATA[idx-1]} 55 | # ~/sampling/KnightKing/build/bin/ppr -s weighted -t 0.15 -w 40000 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 56 | # done 57 | # echo "----------------------node2vec-------------------" 58 | # for idx in $(seq 1 ${#DATA[*]}) 59 | # do 60 | # echo ${DATA[idx-1]} 61 | # ~/sampling/KnightKing/build/bin/node2vec -w 40000 -l 100 -s weighted -p 2.0 -q 0.5 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 62 | # done 63 | 64 | # echo "----------------------biased_walk 40k degree-------------------" 65 | # for idx in $(seq 1 ${#DATA[*]}) 66 | # do 67 | # echo ${DATA[idx-1]} 68 | # ~/sampling/KnightKing/build/bin/biased_walk -w 40000 -l 100 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 69 | # done 70 | 71 | 72 | 73 | # echo "----------------------ppr biased 40k 64-------------------" 74 | # for idx in $(seq 1 ${#DATA[*]}) 75 | # do 76 | # echo ${DATA[idx-1]} 77 | # ~/sampling/KnightKing/build/bin/ppr -s weighted -t 0.15 -w 40000 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 78 | # done 79 | 80 | 81 | # echo "----------------------biased_walk 40k degree-------------------" 82 | # for idx in $(seq 1 ${#DATA[*]}) 83 | # do 84 | # echo ${DATA[idx-1]} 85 | # ~/sampling/KnightKing/build/bin/biased_walk -w 40000 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} -l 100 86 | # done 87 | # -w ${NV[idx-1]} 88 | 89 | echo "----------------------biased node2vec-------------------" 90 | for idx in $(seq 1 ${#DATA[*]}) 91 | do 92 | echo ${DATA[idx-1]} 93 | ~/sampling/KnightKing/build/bin/node2vec -w 40000 -l 100 -s weighted -p 2.0 -q 0.5 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 94 | done 95 | -------------------------------------------------------------------------------- /figs/v100.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 16:45:05 7 | # @FilePath: /skywalker/figs/scale2.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.7 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | MEM=(--umgraph --umgraph --umgraph --umgraph --umgraph --umgraph --umgraph) 13 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 14 | 15 | # DATA=( sk-2005 friendster) 16 | # HD=( 4 1 ) 17 | ITR=1 18 | NG=8 #8 19 | 20 | 21 | GR=".w.gr" 22 | EXE="./bin/main" #main_degree 23 | SG="--ngpu=1 --s" 24 | RW="--rw=1 --k 1 --d 100 " 25 | SP="--rw=0 --k 20 --d 2 " 26 | BATCH="--n 40000" 27 | 28 | # --randomweight=1 --weightrange=2 29 | 30 | echo "-------------------------------------------------------unbias sp scale" >> scale.csv 31 | for idx in $(seq 1 ${#DATA[*]}) 32 | do 33 | for i in $(seq 1 ${ITR}) 34 | do 35 | ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=${NG} ${SP} ${BATCH} --m=1 >> scale.csv 36 | done 37 | done 38 | 39 | echo "-------------------------------------------------------unbias rw scale" >> scale.csv 40 | for idx in $(seq 1 ${#DATA[*]}) 41 | do 42 | for i in $(seq 1 ${ITR}) 43 | do 44 | ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=${NG} ${RW} ${BATCH} --m=1 >> scale.csv 45 | done 46 | done 47 | 48 | 49 | # echo "-------------------------------------------------------offline rw 100" >> scale.csv 50 | # for idx in $(seq 1 ${#DATA[*]}) 51 | # do 52 | # for i in $(seq 1 ${ITR}) 53 | # do 54 | # ./bin/main -bias=1 --ol=0 --ngpu=${NG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> scale.csv 55 | # done 56 | # done 57 | 58 | # echo "-------------------------------------------------------offline ppr 0.15" >> scale.csv 59 | # for idx in $(seq 1 ${#DATA[*]}) 60 | # do 61 | # for i in $(seq 1 ${ITR}) 62 | # do 63 | # ./bin/main -bias=1 --ol=0 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG} >> scale.csv 64 | # done 65 | # done 66 | 67 | 68 | # echo "-------------------------------------------------------offline sp 100" >> scale.csv 69 | # for idx in $(seq 1 ${#DATA[*]}) 70 | # do 71 | # for i in $(seq 1 ${ITR}) 72 | # do 73 | # ./bin/main -bias=1 --ol=0 --ngpu=${NG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> scale.csv 74 | # done 75 | # done 76 | 77 | # echo "-------------------------------------------------------online rw 100" >> scale.csv 78 | # for idx in $(seq 1 ${#DATA[*]}) 79 | # do 80 | # for i in $(seq 1 ${ITR}) 81 | # do 82 | # ./bin/main -bias=1 --ol=1 ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG} >> scale.csv 83 | # done 84 | # done 85 | 86 | # echo "-------------------------------------------------------online ppr 0.15" >> scale.csv 87 | # for idx in $(seq 1 ${#DATA[*]}) 88 | # do 89 | # for i in $(seq 1 ${ITR}) 90 | # do 91 | # ./bin/main -bias=1 --ol=1 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG} >> scale.csv 92 | # done 93 | # done 94 | 95 | # echo "-------------------------------------------------------online ppr 0.15" >> scale.csv 96 | # for idx in $(seq 1 ${#DATA[*]}) 97 | # do 98 | # for i in $(seq 1 ${ITR}) 99 | # do 100 | # ./bin/node2vec -node2vec --n=40000 ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG}>> scale.csv 101 | # done 102 | # done 103 | 104 | # echo "-------------------------------------------------------online sp 100" >> scale.csv 105 | # for idx in $(seq 1 ${#DATA[*]}) 106 | # do 107 | # for i in $(seq 1 ${ITR}) 108 | # do 109 | # ./bin/main -bias=1 --ol=1 ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG}>> scale.csv 110 | # done 111 | # done 112 | 113 | -------------------------------------------------------------------------------- /scripts/multiple-gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 16:45:34 7 | # @FilePath: /skywalker/figs/scale.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 0.5 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | 20 | GR=".w.gr" 21 | EXE="./bin/main" #main_degree 22 | SG="--ngpu=1 --s" 23 | RW="--rw=1 --k 1 --d 100 " 24 | SP="--rw=0 --k 20 --d 2 " 25 | BATCH="--n 40000" 26 | 27 | # BATCH="--n 4" 28 | 29 | # --randomweight=1 --weightrange=2 30 | 31 | # echo "-------------------------------------------------------unbias sp scale" >> multiple-gpu.csv 32 | # for idx in $(seq 1 ${#DATA[*]}) 33 | # do 34 | # for i in $(seq 1 ${NG}) 35 | # do 36 | # ./bin/main --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=$i ${SP} ${BATCH} --m=1 >> multiple-gpu.csv 37 | # done 38 | # done 39 | 40 | echo "-------------------------------------------------------table" >> multiple-gpu.csv 41 | for idx in $(seq 1 ${#DATA[*]}) 42 | do 43 | for i in $(seq 1 ${NG}) 44 | do 45 | ./bin/main -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n=0 >> multiple-gpu.csv 46 | done 47 | done 48 | 49 | # echo "-------------------------------------------------------offline rw 100" >> multiple-gpu.csv 50 | # for idx in $(seq 1 ${#DATA[*]}) 51 | # do 52 | # for i in $(seq 1 ${NG}) 53 | # do 54 | # ./bin/main -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv 55 | # done 56 | # done 57 | 58 | # echo "-------------------------------------------------------offline ppr 0.15" >> multiple-gpu.csv 59 | # for idx in $(seq 1 ${#DATA[*]}) 60 | # do 61 | # for i in $(seq 1 ${NG}) 62 | # do 63 | # ./bin/main -bias=1 --ol=0 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=$i --s >> multiple-gpu.csv 64 | # done 65 | # done 66 | 67 | 68 | # echo "-------------------------------------------------------offline sp 100" >> multiple-gpu.csv 69 | # for idx in $(seq 1 ${#DATA[*]}) 70 | # do 71 | # for i in $(seq 1 ${NG}) 72 | # do 73 | # ./bin/main -bias=1 --ol=0 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv 74 | # done 75 | # done 76 | 77 | 78 | 79 | 80 | 81 | 82 | # echo "-------------------------------------------------------online rw 100" >> multiple-gpu.csv 83 | # for idx in $(seq 1 ${#DATA[*]}) 84 | # do 85 | # for i in $(seq 1 ${NG}) 86 | # do 87 | # ./bin/main -bias=1 --ol=1 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv 88 | # done 89 | # done 90 | 91 | # echo "-------------------------------------------------------online ppr 0.15" >> multiple-gpu.csv 92 | # for idx in $(seq 1 ${#DATA[*]}) 93 | # do 94 | # for i in $(seq 1 ${NG}) 95 | # do 96 | # ./bin/main -bias=1 --ol=1 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=$i --s >> multiple-gpu.csv 97 | # done 98 | # done 99 | 100 | # echo "-------------------------------------------------------online ppr 0.15" >> multiple-gpu.csv 101 | # for idx in $(seq 1 ${#DATA[*]}) 102 | # do 103 | # for i in $(seq 1 ${NG}) 104 | # do 105 | # ./bin/node2vec -node2vec --n=40000 ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=$i --s >> multiple-gpu.csv 106 | # done 107 | # done 108 | 109 | # echo "-------------------------------------------------------online sp 100" >> multiple-gpu.csv 110 | # for idx in $(seq 1 ${#DATA[*]}) 111 | # do 112 | # for i in $(seq 1 ${NG}) 113 | # do 114 | # ./bin/main -bias=1 --ol=1 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv 115 | # done 116 | # done 117 | 118 | -------------------------------------------------------------------------------- /scripts/fig8_biased.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 15:49:20 7 | ### 8 | 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | #HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | 17 | ITR=1 18 | NG=4 #8 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--deepwalk " 23 | SP="--sage " 24 | BATCH="--n 40000 -v" 25 | 26 | ROOT_DIR=$PWD 27 | LOG_FILE=${ROOT_DIR}"/result/fig8_biased.csv" 28 | 29 | # DATA_DIR="~/data" 30 | DATA_DIR=${ROOT_DIR}"/dataset" 31 | GraphWalker_DIR="/home/pywang/sampling/GraphWalker" 32 | KnightKing_DIR="/home/pywang/sampling/KnightKing" 33 | CSAW_DIR="/home/pywang/sampling/C-SAW" 34 | 35 | 36 | echo "-------------------------------------------------------Skywalker offline rw 100 ${BATCH}" >>"${LOG_FILE}" 37 | for idx in $(seq 1 ${#DATA[*]}); do 38 | for i in $(seq 1 ${ITR}); do 39 | ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} >>"${LOG_FILE}" 40 | done 41 | done 42 | 43 | echo "-------------------------------------------------------Skywalker offline ppr 0.15 ${BATCH}" >>"${LOG_FILE}" 44 | for idx in $(seq 1 ${#DATA[*]}); do 45 | for i in $(seq 1 ${ITR}); do 46 | ./bin/main -bias=1 --ol=0 ${RW} --tp=0.15 --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} ${SG} >>"${LOG_FILE}" 47 | done 48 | done 49 | 50 | echo "-------------------------------------------------------Skywalker node2vec " >>"${LOG_FILE}" 51 | for idx in $(seq 1 ${#DATA[*]}); do 52 | for i in $(seq 1 ${ITR}); do 53 | # ./bin/node2vec -node2vec ${RW} --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} ${SG} ${POLICY} >>"${LOG_FILE}" 54 | ./bin/main --bias=1 --ol=0 --buffer --input ~/data/${DATA[idx - 1]}${GR} --ngpu 1 --node2vec ${BATCH} >>"${LOG_FILE}" 55 | done 56 | done 57 | # 58 | #echo "-------------------------------------------------------Skywalker offline sp sage ${BATCH}" >>"${LOG_FILE}" 59 | #for idx in $(seq 1 ${#DATA[*]}); do 60 | # for i in $(seq 1 ${ITR}); do 61 | # ./bin/main -bias=1 --ol=0 ${SG} --rw=0 --sage --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} >>"${LOG_FILE}" 62 | # done 63 | #done 64 | # 65 | #echo "-------------------Runtime of C-SAW need to be scale by 10 due to 4k as batch size. And scale by sampled edges ratio-------------------" >>"${LOG_FILE}" 66 | # 67 | #echo "----------------------C-SAW biased walk 4k 64-------------------" >>"${LOG_FILE}" 68 | #for idx in $(seq 1 ${#DATA[*]}); do 69 | # echo "------------"${DATA[idx - 1]} 70 | # $CSAW_DIR/non-stream/sampling.bin wg ~/data/${DATA[idx - 1]}.w.edge_beg_pos.bin ~/data/${DATA[idx - 1]}.w.edge_csr.bin 100 32 4000 1 1 100 1 >>"${LOG_FILE}" 71 | #done 72 | # 73 | #echo "----------------------C-SAW sampling biased 4k 20 2 64-------------------" >>"${LOG_FILE}" 74 | #for idx in $(seq 1 ${#DATA[*]}); do 75 | # echo "------------"${DATA[idx - 1]} 76 | # $CSAW_DIR/non-stream/sampling.bin wg ~/data/${DATA[idx - 1]}.w.edge_beg_pos.bin ~/data/${DATA[idx - 1]}.w.edge_csr.bin 100 32 4000 1 20 2 1 >>"${LOG_FILE}" 77 | #done 78 | # 79 | #echo "----------------------KnightKing biased_walk -------------------" >>"${LOG_FILE}" 80 | #for idx in $(seq 1 ${#DATA[*]}); do 81 | # echo "------------"${DATA[idx - 1]} 82 | # $KnightKing_DIR/build/bin/biased_walk -w 40000 -g ~/data/${DATA[idx - 1]}.data -v ${NV[idx - 1]} -l 100 >>"${LOG_FILE}" 83 | #done 84 | # 85 | #echo "----------------------KnightKing biased node2vec-------------------" >>"${LOG_FILE}" 86 | #for idx in $(seq 1 ${#DATA[*]}); do 87 | # echo "------------"${DATA[idx - 1]} 88 | # $KnightKing_DIR/build/bin/node2vec -w 40000 -l 100 -s weighted -p 2.0 -q 0.5 -g ~/data/${DATA[idx - 1]}.data -v ${NV[idx - 1]} >>"${LOG_FILE}" 89 | #done 90 | # 91 | #echo "----------------------KnightKing ppr biased -------------------" >>"${LOG_FILE}" 92 | #for idx in $(seq 1 ${#DATA[*]}); do 93 | # echo "------------"${DATA[idx - 1]} 94 | # $KnightKing_DIR/build/bin/ppr -s weighted -t 0.15 -w 40000 -g ~/data/${DATA[idx - 1]}.data -v ${NV[idx - 1]} >>"${LOG_FILE}" 95 | #done 96 | -------------------------------------------------------------------------------- /figs/scale.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 16:45:34 7 | # @FilePath: /skywalker/figs/scale.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 0.5 1 ) 16 | ITR=1 17 | # NG=4 #8 18 | NG=(1 2 4) 19 | 20 | 21 | GR=".w.gr" 22 | EXE="./build/skywalker_multi --csv " #main_degree 23 | SG="--ngpu=1 --s" 24 | RW="--rw=1 --k 1 --d 100 " 25 | SP="--rw=0 --k 20 --d 2 " 26 | BATCH="--n 40000" 27 | 28 | # BATCH="--n 4" 29 | 30 | # --randomweight=1 --weightrange=2 31 | 32 | echo "-------------------------------------------------------unbias sp scale" >> scale.csv 33 | for idx in $(seq 1 ${#DATA[*]}) 34 | do 35 | for i in "${NG[@]}" 36 | do 37 | ./build/skywalker_multi --csv --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=$i ${SP} --n $(( $i * 40000 )) >> scale.csv 38 | done 39 | done 40 | 41 | echo "-------------------------------------------------------unbias rw scale" >> scale.csv 42 | for idx in $(seq 1 ${#DATA[*]}) 43 | do 44 | for i in "${NG[@]}" 45 | do 46 | ./build/skywalker_multi --csv --deepwalk --bias=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=$i ${SP} --n $(( $i * 40000 )) >> scale.csv 47 | done 48 | done 49 | exit 0 50 | 51 | # echo "-------------------------------------------------------table" >> scale.csv 52 | # for idx in $(seq 1 ${#DATA[*]}) 53 | # do 54 | # for i in "${NG[@]}" 55 | # do 56 | # ./build/skywalker_multi --csv -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n=0 >> scale.csv 57 | # done 58 | # done 59 | # exit 0 60 | 61 | 62 | # echo "-------------------------------------------------------offline rw 100" >> scale.csv 63 | # for idx in $(seq 1 ${#DATA[*]}) 64 | # do 65 | # for i in "${NG[@]}" 66 | # do 67 | # ./build/skywalker_multi --csv -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv 68 | # done 69 | # done 70 | 71 | 72 | 73 | 74 | echo "-------------------------------------------------------offline sp 100" >> scale.csv 75 | for idx in $(seq 1 ${#DATA[*]}) 76 | do 77 | for i in "${NG[@]}" 78 | do 79 | ./build/skywalker_multi --csv -bias=1 --ol=0 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv 80 | done 81 | done 82 | 83 | 84 | 85 | 86 | 87 | 88 | echo "-------------------------------------------------------online rw 100" >> scale.csv 89 | for idx in $(seq 1 ${#DATA[*]}) 90 | do 91 | for i in "${NG[@]}" 92 | do 93 | ./build/skywalker_multi --csv -bias=1 --ol=1 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv 94 | done 95 | done 96 | 97 | 98 | 99 | 100 | 101 | echo "-------------------------------------------------------online sp 100" >> scale.csv 102 | for idx in $(seq 1 ${#DATA[*]}) 103 | do 104 | for i in "${NG[@]}" 105 | do 106 | ./build/skywalker_multi --csv -bias=1 --ol=1 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv 107 | done 108 | done 109 | 110 | # echo "-------------------------------------------------------online ppr 0.15" >> scale.csv 111 | # for idx in $(seq 1 ${#DATA[*]}) 112 | # do 113 | # for i in "${NG[@]}" 114 | # do 115 | # ./build/skywalker_multi --csv -bias=1 --ol=1 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) --ngpu=$i --s >> scale.csv 116 | # done 117 | # done 118 | 119 | # echo "-------------------------------------------------------offline ppr 0.15" >> scale.csv 120 | # for idx in $(seq 1 ${#DATA[*]}) 121 | # do 122 | # for i in "${NG[@]}" 123 | # do 124 | # ./build/skywalker_multi --csv -bias=1 --ol=0 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) --ngpu=$i --s >> scale.csv 125 | # done 126 | # done 127 | 128 | 129 | # echo "-------------------------------------------------------online node2vec 0.15" >> scale.csv 130 | # for idx in $(seq 1 ${#DATA[*]}) 131 | # do 132 | # for i in "${NG[@]}" 133 | # do 134 | # ./bin/node2vec -node2vec --n=40000 ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) --ngpu=$i --s >> scale.csv 135 | # done 136 | # done -------------------------------------------------------------------------------- /scripts/table3_unbiased.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-17 21:38:38 7 | # @FilePath: /skywalker/figs/unbiased.sh 8 | ### 9 | 10 | # using sample dataset 11 | DATA=(lj) 12 | HD=(0.5) 13 | NV=(4847571) 14 | 15 | 16 | # DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 17 | # HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 18 | # NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 19 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 20 | 21 | # DATA=( sk-2005 friendster) 22 | # HD=( 4 1 ) 23 | ITR=1 24 | NG=4 #8 25 | 26 | GR=".w.gr" 27 | EXE="./bin/main" #main_degree 28 | SG="--ngpu=1 --s" 29 | RW="--rw=1 --k 1 --d 100 " 30 | SP="--rw=0 --k 20 --d 2 " 31 | BATCH="--n=40000 -v" 32 | 33 | ROOT_DIR=$PWD 34 | LOG_FILE=${ROOT_DIR}"/result/table3_unbiased.csv" 35 | 36 | # DATA_DIR="~/data" 37 | DATA_DIR=${ROOT_DIR}"/dataset" 38 | GraphWalker_DIR="/home/pywang/sampling/GraphWalker" 39 | KnightKing_DIR="/home/pywang/sampling/KnightKing" 40 | CSAW_DIR="/home/pywang/sampling/C-SAW" 41 | NEXTDOOR_DIR="/home/pywang/sampling/nextdoor-experiments" 42 | 43 | echo "-------------------------------------------------------Skywalker unbias rw 100" >>"${LOG_FILE}" 44 | for idx in $(seq 1 ${#DATA[*]}); do 45 | ./bin/main --bias=0 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 ${RW} ${BATCH} >>"${LOG_FILE}" 46 | done 47 | 48 | echo "-------------------------------------------------------Skywalker unbias ppr 100" >>"${LOG_FILE}" 49 | for idx in $(seq 1 ${#DATA[*]}); do 50 | ./bin/main --bias=0 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --tp=0.15 ${RW} ${BATCH} >>"${LOG_FILE}" 51 | done 52 | 53 | echo "-------------------------------------------------------Skywalker unbias node2vec" >>"${LOG_FILE}" 54 | for idx in $(seq 1 ${#DATA[*]}); do 55 | ./bin/main --bias=0 --ol=0 --buffer --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --node2vec ${BATCH} >>"${LOG_FILE}" 56 | done 57 | 58 | echo "-------------------------------------------------------Skywalker unbias sage 40k" >>"${LOG_FILE}" 59 | for idx in $(seq 1 ${#DATA[*]}); do 60 | ./bin/main --bias=0 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --sage ${BATCH} >>"${LOG_FILE}" 61 | done 62 | 63 | echo "----------------------KnightKing unbiased 40k degree-------------------" >>"${LOG_FILE}" 64 | for idx in $(seq 1 ${#DATA[*]}); do 65 | echo ${DATA[idx - 1]} >>"${LOG_FILE}" 66 | $KnightKing_DIR/build/bin/deepwalk -w 40000 -l 100 -s unweighted -g $DATA_DIR/${DATA[idx - 1]}.uw.data -v ${NV[idx - 1]} >>"${LOG_FILE}" 67 | done 68 | echo "----------------------KnightKing unbiased node2vec-------------------" >>"${LOG_FILE}" 69 | for idx in $(seq 1 ${#DATA[*]}); do 70 | echo ${DATA[idx - 1]} >>"${LOG_FILE}" 71 | $KnightKing_DIR/build/bin/node2vec -w 40000 -l 100 -s unweighted -p 2.0 -q 0.5 -g $DATA_DIR/${DATA[idx - 1]}.uw.data -v ${NV[idx - 1]} >>"${LOG_FILE}" 72 | done 73 | 74 | echo "----------------------KnightKing ppr unbiased ------------------" >>"${LOG_FILE}" 75 | for idx in $(seq 1 ${#DATA[*]}); do 76 | echo ${DATA[idx - 1]} >>"${LOG_FILE}" 77 | $KnightKing_DIR/build/bin/ppr -w 40000 -s unweighted -t 0.15 -v ${NV[idx - 1]} -g $DATA_DIR/${DATA[idx - 1]}.uw.data >>"${LOG_FILE}" 78 | done 79 | 80 | echo "----------------------nextdoor node2vec -------------------" >>"${LOG_FILE}" 81 | for idx in $(seq 1 ${#DATA[*]}); do 82 | echo "------------"${DATA[idx - 1]} >>"${LOG_FILE}" 83 | $NEXTDOOR_DIR/NextDoor/src/apps/randomwalks/Node2VecSampling -g $DATA_DIR/${DATA[idx - 1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l >>"${LOG_FILE}" 84 | done 85 | echo "----------------------nextdoor kh sample-------------------" >>"${LOG_FILE}" 86 | for idx in $(seq 1 ${#DATA[*]}); do 87 | echo "------------"${DATA[idx - 1]} >>"${LOG_FILE}" 88 | $NEXTDOOR_DIR/NextDoor/src/apps/khop/KHopSampling -g $DATA_DIR/${DATA[idx - 1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l >>"${LOG_FILE}" 89 | done 90 | 91 | ED=".w.edge" 92 | EXE="./bin/apps/rwdomination" #main_degree 93 | 94 | cd $GraphWalker_DIR 95 | echo "------------skiping web-Google and orkut for GraphWalker due to internal errors" 96 | echo "-------------------------------------------------------GraphWalker unbias rw 40000 100" >>"${LOG_FILE}" 97 | for idx in $(seq 1 ${#DATA[*]}); do 98 | if [ ${DATA[idx - 1]} != "web-Google" -a ${DATA[idx - 1]} != "orkut" ] 99 | then 100 | ./bin/apps/rawrandomwalks file ~/data/${DATA[idx - 1]}.w.edge R 40000 L 100 N ${NV[idx - 1]} >>"${LOG_FILE}" 101 | fi 102 | done 103 | 104 | echo "-------------------------------------------------------GraphWalker unbias ppr 40000 100" >>"${LOG_FILE}" 105 | for idx in $(seq 1 ${#DATA[*]}); do 106 | if [ ${DATA[idx - 1]} != "web-Google" -a ${DATA[idx - 1]} != "orkut" ] 107 | then 108 | ./bin/apps/msppr file ~/data/${DATA[idx - 1]}.w.edge firstsource 0 numsources 40000 walkspersource 1 maxwalklength 100 prob 0.15 >>"${LOG_FILE}" 109 | fi 110 | done 111 | -------------------------------------------------------------------------------- /include/app.cuh: -------------------------------------------------------------------------------- 1 | #include "alias_table.cuh" 2 | #include "kernel.cuh" 3 | #include "roller.cuh" 4 | #include "sampler.cuh" 5 | #include "sampler_result.cuh" 6 | #include "util.cuh" 7 | 8 | // #include 9 | // #include 10 | #include 11 | 12 | DECLARE_bool(debug); 13 | DECLARE_bool(v); 14 | DECLARE_double(tp); 15 | DECLARE_bool(printresult); 16 | DECLARE_int32(m); 17 | DECLARE_bool(peritr); 18 | 19 | DECLARE_bool(static); 20 | DECLARE_bool(buffer); 21 | DECLARE_bool(loc); 22 | template 23 | struct duplicate_checker { 24 | T sampled[length]; 25 | int size = 0; 26 | __device__ bool check(T input) { 27 | for (size_t i = 0; i < size; i++) { 28 | if (sampled[i] == input) return false; 29 | } 30 | sampled[size] = input; 31 | size++; 32 | return true; 33 | } 34 | }; 35 | 36 | template 37 | struct matrixBuffer { 38 | T data[blockSize * tileSize]; 39 | uint *ptr_per_thread[blockSize]; 40 | int length[blockSize]; 41 | uint mainLength[blockSize / 42 | 32]; // each warp maintains one lengh, 是用来干啥的 43 | uint outItr[blockSize / 32]; // indicate the output location when need flash 44 | // multiple times 45 | 46 | uint tileLen; 47 | 48 | __device__ void Init() { 49 | // if (!LID) printf("行号:%d 函数名:%s \n", __LINE__, __FUNCTION__); 50 | length[LTID] = 0; 51 | ptr_per_thread[LTID] = nullptr; 52 | if (LID == 0) { 53 | tileLen = tileSize; 54 | mainLength[WID] = 0; 55 | outItr[WID] = 0; 56 | } 57 | } 58 | // depraced due to error? 59 | __device__ void Flush(uint *ptr, uint itr, coalesced_group &active) { 60 | // if (!LID) printf("行号:%d 函数名:%s \n", __LINE__, __FUNCTION__); 61 | // coalesced_group active = coalesced_threads(); 62 | // printf("active.size() %u\n",active.size()); 63 | // if (active.thread_rank() == 0) mainLength[WID]++; 64 | uint active_size = active.size(); 65 | uint rank = active.thread_rank(); 66 | ptr_per_thread[LTID] = ptr; 67 | active.sync(); 68 | for (size_t i = WID * 32; i < WID * 32 + 32; 69 | i++) { // loop over threads in warp 70 | active.sync(); 71 | // if (i == 2) printf("adding rank %u length[i] %u\n",rank,length[i]); 72 | for (size_t j = rank; j < length[i]; 73 | j += active_size) { // loop over data // active.size() 74 | // if (i == 2) printf("add for 2\n"); 75 | if (ptr_per_thread[i] != nullptr) 76 | *(ptr_per_thread[i] + outItr[WID] + j + 1) = data[i * tileSize + j]; 77 | // plus 1 as the sampleResult start with root id 78 | // if(idx_i==0) printf("add %u to idx\n",graph->getOutNode(src_id, 79 | // candidate)); 80 | // if (i == 2) printf("add0 %u to idx\n", data[i * tileSize + j]); 81 | } 82 | } 83 | } 84 | __device__ void Flush2(uint *ptr, coalesced_group &active) { 85 | // if (!LID) printf("行号:%d 函数名:%s \n", __LINE__, __FUNCTION__); 86 | // coalesced_group active = coalesced_threads(); 87 | // if (active.size() != 32) printf("active.size() %u\n", active.size()); 88 | // if (active.thread_rank() == 0) mainLength[WID]++; 89 | int active_size = active.size(); 90 | int rank = active.thread_rank(); 91 | ptr_per_thread[LTID] = ptr; 92 | active.sync(); 93 | for (size_t i = WID * 32; i < WID * 32 + 32; 94 | i++) { // loop over threads in warp 95 | active.sync(); 96 | for (size_t j = rank; j < length[i]; 97 | j += active_size) { // loop over data // active.size() 98 | if (ptr_per_thread[i] != nullptr) 99 | *(ptr_per_thread[i] + outItr[WID] + j) = data[i * tileSize + j]; 100 | // if(i==0) printf("add %u to idx\n",data[i * tileSize + j]); 101 | } 102 | } 103 | } 104 | __device__ void CheckFlush(uint *ptr, uint itr, coalesced_group &active) { 105 | if (active.thread_rank() == 0) mainLength[WID]++; 106 | active.sync(); 107 | // printf("active.sync() %u itr %u \n", active.thread_rank(), itr); 108 | 109 | if (mainLength[WID] >= tileSize) { 110 | active.sync(); 111 | ptr_per_thread[LTID] = ptr; 112 | for (size_t i = WID * 32; i < WID * 32 + 32; 113 | i++) { // loop over threads in warp 114 | active.sync(); 115 | for (size_t j = active.thread_rank(); j < length[i]; // loop over data 116 | j += active.size()) { 117 | *(ptr_per_thread[i] + outItr[WID] + j + 1) = data[i * tileSize + j]; 118 | // if (i == 2) printf("add %u to idx\n", data[i * tileSize + j]); 119 | } 120 | if (active.thread_rank() == 0) length[i] = 0; 121 | } 122 | // active.sync(); 123 | if (active.thread_rank() == 0) { 124 | mainLength[WID] = 0; 125 | outItr[WID] += tileSize; 126 | } 127 | } 128 | } 129 | __device__ void Finish() { length[LTID] = 0; } 130 | 131 | /** 132 | * @description: set data in buffer for each thread 133 | * @param {*} 134 | * @return {*} 135 | */ 136 | __forceinline__ __device__ void Set(uint v) { 137 | data[LTID * tileSize + length[LTID]] = v; 138 | // length[LTID]=length[LTID]+1; 139 | atomicAdd(length + LTID, 1); 140 | // if(length[LTID]>=tileSize) // better to manually flush in case of 141 | // divergence 142 | } 143 | __device__ void CollectiveSet(uint id, uint v) { 144 | coalesced_group local = coalesced_threads(); 145 | data[id * tileSize + length[id] + local.thread_rank()] = v; 146 | if (local.thread_rank() == 0) length[id] += local.size(); 147 | // if(length[LTID]>=tileSize) // better to manually flush in case of 148 | // divergence 149 | } 150 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Skywalker 2 | 3 | This is the repo for Skywalker, an Efficient Alias-method-based Graph Sampling and Random Walk framework on GPUs. 4 | 5 | ## Introduction 6 | 7 | Graph sampling and random walk operations, capturing the structural properties of graphs, are playing an important role today as we cannot directly adopt computing-intensive 8 | algorithms on large-scale graphs. Existing system frameworks for these tasks are not only spatially and temporally inefficient, but many also lead to biased results. This paper presents Skywalker, a high-throughput, quality-preserving random walk and sampling framework based on GPUs. Skywalker makes three key contributions: first, it takes the first step to realize efficient biased sampling with the alias method on a GPU. Second, it introduces well-crafted load-balancing techniques to effectively utilize the massive parallelism of GPUs. Third, it accelerates alias table construction and reduce the GPU memory requirement with efficient memory management scheme. We show that Skywalker 9 | greatly outperforms the state-of-the-art CPU-based and GPUbased baselines, in a wide spectrum of workload scenarios. 10 | 11 | For details, please first refer to our 2021 PACT paper ["Skywalker: Efficient Alias-Method-Based Graph Sampling and Random Walk on GPUs"](https://ieeexplore.ieee.org/document/9563020) by Pengyu Wang, Chao Li, Jing Wang, Taolei Wang, Lu Zhang, Jingwen Leng, Quan Chen, and Minyi Guo. If you have any questions, please be free to contact us. 12 | 13 | Beyond the contributions mentioned before, we further extend the framework to multi-gpu version and make a series of optimizations. The new work, named Skywalker+, has been submitted to TPDS. 14 | 15 | ## Setup 16 | ``` 17 | git clone https://github.com/wpybtw/skywalker_artifact --recursive 18 | ``` 19 | 20 | Note that Cmake is not correctly setted yet. We use cmake to build glfags and then make. 21 | ``` 22 | cd build 23 | cmake .. 24 | make -j 25 | cd .. 26 | make 27 | ``` 28 | 29 | ## Dataset 30 | When evaluating Skywalker, we use 7 commonly used Graph datasets: web-Google, Livejournal, Orkut, Arabic-2005, UK-2005, Friendster, and SK-2005. The datasets can be downloaded from [SNAP](http://snap.stanford.edu/data/index.html) and [Webgraph](http://law.di.unimi.it/datasets.php). You can also execute Skywalker on your preferred datasets, as long as the datasets are processed correctly as mentioned in the section of Preprosessing. 31 | 32 | 33 | ## Preprocessing 34 | Skywalker uses [Galios](https://iss.oden.utexas.edu/?p=projects/galois) graph format (.gr) as the input. Other formats like Edgelist (form [SNAP](http://snap.stanford.edu/data/index.html)) or Matrix Market can be transformed into it with GALOIS' graph-convert tool. Compressed graphs like [Webgraph](http://law.di.unimi.it/datasets.php) need to be uncompressed first. 35 | Here is an example: 36 | ``` 37 | wget http://snap.stanford.edu/data/wiki-Vote.txt.gz 38 | gzip -d wiki-Vote.txt.gz 39 | $GALOIS_PATH/build/tools/graph-convert/graph-convert -edgelist2gr ~/data/wiki-Vote.txt ~/data/wiki-Vote.gr 40 | ``` 41 | ## Execution 42 | We implemented four different algorithms in Skywalker, namely DeepWalk, PPR, Node2vec and Neighbour Sampling all based on alias method. We support both online and offline sampling, that's constructing the alias table on the fly or for all vertices in one graph dataset at once as a preprocessing procedure. The source code files are placed under ``` ./src``` and ```./include``` folders. The configuration of Skywalker is set through gflags, and the default values are written in ```main.cu``` in the ```src``` folder. You can easily checkout what different flags mean with the annotation and change the configuration simply by editting the command line. Here are several samples demonstrating the inputs of Skywalker: 43 | 44 | Execute Deepwalk: 45 | ``` 46 | ./bin/main --deepwalk --bias=1 --ol=0 --buffer --input ~/data/friendster.w.gr -v -n 40000 47 | ``` 48 | Here, ```--deepwalk``` points out what algorithms we are going to execute. ```--bias=1 --ol=0``` means that we want to do biased Deepwalk in offline mode. ```--buffer``` means we want to use GPU buffer to enhance performance, ```--input``` sets the input graph, ```-v``` means print more information and ```-n``` assigns the batch size. 49 | 50 | Execute node2vec: 51 | ``` 52 | ./bin/main --gmgraph --bias=1 --ol=0 --buffer --input ~/data/friendster.w.gr --ngpu 4 --node2vec -n 40000 53 | ``` 54 | Besides the basic settings in Skywalker, Skywalker+ enables more options. You can assign what kind of memory you are going to use and the number of GPUs to execute Skywalker in multi-GPU pattern. 55 | 56 | If you just want to verify the results in our paper, you can simply use the scripts in ```./scripts``` folder where we store all the scripts we used in generating the data when doing the evaluation. As long as you have stored and preprocessed the datasets correctly, all you need to change is the paths in the scripts and the results will automatically be stored in csv format. 57 | 58 | Run scriopts: 59 | ``` 60 | bash ./scripts/biased.sh 61 | ``` 62 | 63 | Notice that we also execute several other Graph sampling and random walk frameworks as comparison. Although we also write the configurations of these framworks in our scripts, you may have to also implement their frameworks and set the paths, or you may generate errors in some scripts. 64 | 65 | 66 | # Details 67 | "--newsampler" indicates using Sampler_new for correct result layout. Is it only for unbiased and offline sampling? 68 | 69 | ## Contributors 70 | Our team has been working on related technologies since 2017. Thank you to everyone for contributing to this project. 71 | 72 | Correspondence to: 73 | - [Pengyu Wang](wpybtw@sjtu.edu.cn) (wpybtw@sjtu.edu.cn) 74 | - [Cheng Xu](jerryxu@sjtu.edu.cn) (jerryxu@sjtu.edu.cn) 75 | - [Chao Li](lichao@cs.sjtu.edu.cn) (lichao@cs.sjtu.edu.cn) 76 | - [Jieping Ye](yejieping.ye@alibaba-inc.com)(yejieping.ye@alibaba-inc.com) 77 | - [Jing Wang](jing618@sjtu.edu.cn) (jing618@sjtu.edu.cn) 78 | - [Taolei Wang](sjtuwtl@sjtu.edu.cn) (sjtuwtl@sjtu.edu.cn) 79 | - [Lu Zhang](luzhang@sjtu.edu.cn) (luzhang@sjtu.edu.cn) 80 | - [Yue Wu](matthew.wy@alibaba-inc.com)(matthew.wy@alibaba-inc.com) 81 | - [Jingwen Leng](leng-jw@cs.sjtu.edu.cn) (leng-jw@cs.sjtu.edu.cn) 82 | - [Quan Chen](chen-quan@cs.sjtu.edu.cn) (chen-quan@cs.sjtu.edu.cn) 83 | - [Minyi Guo](guo-my@cs.sjtu.edu.cn) (guo-my@cs.sjtu.edu.cn) 84 | 85 | -------------------------------------------------------------------------------- /src/util.cu: -------------------------------------------------------------------------------- 1 | #include "util.cuh" 2 | 3 | // __device__ char char_atomicCAS(char *addr, char cmp, char val) { 4 | // unsigned *al_addr = reinterpret_cast(((unsigned long long)addr) 5 | // & 6 | // (0xFFFFFFFFFFFFFFFCULL)); 7 | // unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8; 8 | // unsigned mask = 0xFFU; 9 | // mask <<= al_offset; 10 | // mask = ~mask; 11 | // unsigned sval = val; 12 | // sval <<= al_offset; 13 | // unsigned old = *al_addr, assumed, setval; 14 | // do { 15 | // assumed = old; 16 | // setval = assumed & mask; 17 | // setval |= sval; 18 | // old = atomicCAS(al_addr, assumed, setval); 19 | // } while (assumed != old); 20 | // return (char)((assumed >> al_offset) & 0xFFU); 21 | // } 22 | 23 | // template 24 | // __inline__ __device__ T warpPrefixSum(T val, int lane_id) { 25 | // T val_shuffled; 26 | // for (int offset = 1; offset < warpSize; offset *= 2) { 27 | // val_shuffled = __shfl_up(val, offset); 28 | // if (lane_id >= offset) { 29 | // val += val_shuffled; 30 | // } 31 | // } 32 | // return val; 33 | // } 34 | 35 | double wtime() { 36 | double time[2]; 37 | struct timeval time1; 38 | gettimeofday(&time1, NULL); 39 | 40 | time[0] = time1.tv_sec; 41 | time[1] = time1.tv_usec; 42 | 43 | return time[0] + time[1] * 1.0e-6; 44 | } 45 | __device__ void __conv() { coalesced_group active = coalesced_threads(); } 46 | __device__ void active_size(int n = 0) { 47 | coalesced_group active = coalesced_threads(); 48 | if (active.thread_rank() == 0) 49 | printf("TBID: %d WID: %d coalesced_group %llu at line %d\n", BID, WID, 50 | active.size(), n); 51 | } 52 | __device__ int active_size2(char *txt, int n = 0) { 53 | coalesced_group active = coalesced_threads(); 54 | if (active.thread_rank() == 0) 55 | printf("%s coalesced_group %llu at line %d\n", txt, active.size(), n); 56 | } 57 | template 58 | void printH(T *ptr, int size) { 59 | T *ptrh = new T[size]; 60 | CUDA_RT_CALL(cudaMemcpy(ptrh, ptr, size * sizeof(T), cudaMemcpyDeviceToHost)); 61 | printf("printH: "); 62 | for (size_t i = 0; i < size; i++) { 63 | // printf("%d\t", ptrh[i]); 64 | std::cout << ptrh[i] << "\t"; 65 | } 66 | printf("\n"); 67 | delete ptrh; 68 | } 69 | 70 | // https://forums.developer.nvidia.com/t/how-can-i-use-atomicsub-for-floats-and-doubles/64340/5 71 | // __device__ double my_atomicSub(double *address, double val) { 72 | // unsigned long long int *address_as_ull = (unsigned long long int *)address; 73 | // unsigned long long int old = *address_as_ull, assumed; 74 | // do { 75 | // assumed = old; 76 | // old = atomicCAS( 77 | // address_as_ull, assumed, 78 | // __double_as_longlong(__longlong_as_double(assumed) - 79 | // val)); // Note: uses integer comparison to avoid 80 | // // hang in case of NaN (since NaN != NaN) 81 | // } while (assumed != old); 82 | // return __longlong_as_double(old); 83 | // } 84 | 85 | // https://forums.developer.nvidia.com/t/how-can-i-use-atomicsub-for-floats-and-doubles/64340/5 86 | __device__ float my_atomicSub(float *address, float val) { 87 | int *address_as_int = (int *)address; 88 | int old = *address_as_int, assumed; 89 | do { 90 | assumed = old; 91 | old = atomicCAS( 92 | address_as_int, assumed, 93 | __float_as_int(__int_as_float(assumed) - 94 | val)); // Note: uses integer comparison to avoid hang in 95 | // case of NaN (since NaN != NaN) 96 | } while (assumed != old); 97 | return __int_as_float(old); 98 | } 99 | 100 | // __device__ long long my_atomicSub(long long *address, long long val) { 101 | // unsigned long long int *address_as_ull = (unsigned long long int *)address; 102 | // unsigned long long int old = *address_as_ull, assumed; 103 | // do { 104 | // assumed = old; 105 | // old = atomicCAS(address_as_ull, assumed, 106 | // ((assumed)-val)); // Note: uses integer comparison to avoid 107 | // // hang in case of NaN (since NaN != NaN) 108 | // } while (assumed != old); 109 | // return (old); 110 | // } 111 | 112 | // __device__ unsigned long long my_atomicSub(unsigned long long *address, 113 | // unsigned long long val) { 114 | // unsigned long long int *address_as_ull = (unsigned long long int *)address; 115 | // unsigned long long int old = *address_as_ull, assumed; 116 | // do { 117 | // assumed = old; 118 | // old = atomicCAS(address_as_ull, assumed, ((assumed)-val)); 119 | // } while (assumed != old); 120 | // return (old); 121 | // } 122 | 123 | // __device__ long long my_atomicAdd(long long *address, long long val) { 124 | // unsigned long long int *address_as_ull = (unsigned long long int *)address; 125 | // unsigned long long int old = *address_as_ull, assumed; 126 | // do { 127 | // assumed = old; 128 | // old = atomicCAS(address_as_ull, assumed, ((assumed) + val)); 129 | // } while (assumed != old); 130 | // return (old); 131 | // } 132 | 133 | template <> 134 | __device__ void printD(float *ptr, size_t size) { 135 | printf("printDf: size %llu: ", (u64)size); 136 | for (size_t i = 0; i < size; i++) { 137 | printf("%f\t", ptr[i]); 138 | } 139 | printf("\n"); 140 | } 141 | template <> 142 | __device__ void printD(int *ptr, size_t size) { 143 | printf("printDf: size %llu: ", (u64)size); 144 | for (size_t i = 0; i < size; i++) { 145 | printf("%d\t", ptr[i]); 146 | } 147 | printf("\n"); 148 | } 149 | 150 | template <> 151 | __device__ void printD(uint *ptr, size_t size) { 152 | printf("printDf: size %llu: ", (u64)size); 153 | for (size_t i = 0; i < size; i++) { 154 | printf("%u\t", ptr[i]); 155 | } 156 | printf("\n"); 157 | } 158 | 159 | // template __global__ void init_range_d(T *ptr, size_t size) { 160 | // if (TID < size) { 161 | // ptr[TID] = TID; 162 | // } 163 | // } 164 | // template void init_range(T *ptr, size_t size) { 165 | // init_range_d<<>>(ptr, size); 166 | // } 167 | // template __global__ void init_array_d(T *ptr, size_t size, T v) 168 | // { 169 | // if (TID < size) { 170 | // ptr[TID] = v; 171 | // } 172 | // } 173 | // template void init_array(T *ptr, size_t size, T v) { 174 | // init_array_d<<>>(ptr, size, v); 175 | // } 176 | -------------------------------------------------------------------------------- /figs/spec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | ### 3 | # @Description: 4 | # @Date: 2020-11-17 13:39:45 5 | # @LastEditors: Pengyu Wang 6 | # @LastEditTime: 2021-01-15 16:43:38 7 | # @FilePath: /skywalker/figs/online.sh 8 | ### 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 10 | HD=(0.25 0.5 1 0.25 0.25 1 1) # uk-union rmat29 web-ClueWeb09) 11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 12 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 13 | 14 | # DATA=( sk-2005 friendster) 15 | # HD=( 4 1 ) 16 | ITR=1 17 | NG=4 #8 18 | 19 | GR=".w.gr" 20 | EXE="./bin/main" #main_degree 21 | SG="--ngpu=1 --s" 22 | RW="--rw=1 --k 1 --d 100 " 23 | SP="--rw=0 --k 20 --d 2 " 24 | BATCH="--n 40000" 25 | 26 | # --randomweight=1 --weightrange=2 27 | 28 | 29 | # echo "------------------------------table construction compare-----------------" >> spec.csv 30 | 31 | # echo "-------------------------------------------------------main_nospec" >> spec.csv 32 | # for idx in $(seq 1 ${#DATA[*]}) 33 | # do 34 | # for i in $(seq 1 ${ITR}) 35 | # do 36 | # ./bin/main_nospec -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv 37 | # done 38 | # done 39 | # echo "-------------------------------------------------------main_spec" >> spec.csv 40 | # for idx in $(seq 1 ${#DATA[*]}) 41 | # do 42 | # for i in $(seq 1 ${ITR}) 43 | # do 44 | # ./bin/main_spec -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv 45 | # done 46 | # done 47 | # echo "-------------------------------------------------------main_degree_nospec table" >> spec.csv 48 | # for idx in $(seq 1 ${#DATA[*]}) 49 | # do 50 | # for i in $(seq 1 ${ITR}) 51 | # do 52 | # ./bin/main_degree_nospec -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv 53 | # done 54 | # done 55 | # echo "-------------------------------------------------------main_degree_spec table" >> spec.csv 56 | # for idx in $(seq 1 ${#DATA[*]}) 57 | # do 58 | # for i in $(seq 1 ${ITR}) 59 | # do 60 | # ./bin/main_degree_spec -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv 61 | # done 62 | # done 63 | 64 | 65 | # echo "-------------------------------------------------------main_nospec sp 100" >> spec.csv 66 | # for idx in $(seq 1 ${#DATA[*]}) 67 | # do 68 | # for i in $(seq 1 ${ITR}) 69 | # do 70 | # ./bin/main_nospec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 71 | # done 72 | # done 73 | # echo "-------------------------------------------------------main_degree_nospec sp 100" >> spec.csv 74 | # for idx in $(seq 1 ${#DATA[*]}) 75 | # do 76 | # for i in $(seq 1 ${ITR}) 77 | # do 78 | # ./bin/main_degree_nospec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 79 | # done 80 | # done 81 | # echo "-------------------------------------------------------main_spec sp 100" >> spec.csv 82 | # for idx in $(seq 1 ${#DATA[*]}) 83 | # do 84 | # for i in $(seq 1 ${ITR}) 85 | # do 86 | # ./bin/main_spec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 87 | # done 88 | # done 89 | 90 | # echo "-------------------------------------------------------main_degree_spec sp 100" >> spec.csv 91 | # for idx in $(seq 1 ${#DATA[*]}) 92 | # do 93 | # for i in $(seq 1 ${ITR}) 94 | # do 95 | # ./bin/main_degree_spec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 96 | # done 97 | # done 98 | 99 | 100 | echo "-------------------------------------------------------main_nospec rw 100" >> spec.csv 101 | for idx in $(seq 1 ${#DATA[*]}) 102 | do 103 | for i in $(seq 1 ${ITR}) 104 | do 105 | ./bin/main_nospec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 106 | done 107 | done 108 | echo "-------------------------------------------------------main_spec rw 100" >> spec.csv 109 | for idx in $(seq 1 ${#DATA[*]}) 110 | do 111 | for i in $(seq 1 ${ITR}) 112 | do 113 | ./bin/main_spec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 114 | done 115 | done 116 | echo "-------------------------------------------------------main_degree_nospec rw 100" >> spec.csv 117 | for idx in $(seq 1 ${#DATA[*]}) 118 | do 119 | for i in $(seq 1 ${ITR}) 120 | do 121 | ./bin/main_degree_nospec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 122 | done 123 | done 124 | echo "-------------------------------------------------------main_degree_spec rw 100" >> spec.csv 125 | for idx in $(seq 1 ${#DATA[*]}) 126 | do 127 | for i in $(seq 1 ${ITR}) 128 | do 129 | ./bin/main_degree_spec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 130 | done 131 | done 132 | 133 | 134 | 135 | # echo "-------------------------------------------------------online ppr 0.15" >> spec.csv 136 | # for idx in $(seq 1 ${#DATA[*]}) 137 | # do 138 | # for i in $(seq 1 ${ITR}) 139 | # do 140 | # ./bin/main -bias=1 --ol=1 --n=40000 ${RW} --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> spec.csv 141 | # done 142 | # done 143 | 144 | # echo "-------------------------------------------------------online node2vec 0.15" >> spec.csv 145 | # for idx in $(seq 1 ${#DATA[*]}) 146 | # do 147 | # for i in $(seq 1 ${ITR}) 148 | # do 149 | # ./bin/node2vec -node2vec --n=40000 ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> spec.csv 150 | # done 151 | # done 152 | 153 | 154 | 155 | # echo "------------------------------------------------------- spec ---------------------------------------------------------" >> spec.csv 156 | # echo "-------------------------------------------------------online rw 100" >> spec.csv 157 | # for idx in $(seq 1 ${#DATA[*]}) 158 | # do 159 | # for i in $(seq 1 ${ITR}) 160 | # do 161 | # ./bin/main_spec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv 162 | # done 163 | # done 164 | 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /old/graph.cuh: -------------------------------------------------------------------------------- 1 | #ifndef _GRAPH_CUH 2 | #define _GRAPH_CUH 3 | 4 | // #include "app.cuh" 5 | #include "common.cuh" 6 | // #include "intrinsics.cuh" 7 | // #include "job.cuh" 8 | // #include "print.cuh" 9 | // #include "timer.cuh" 10 | // #include "worklist.cuh" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | using namespace intrinsics; 31 | DECLARE_bool(map); 32 | 33 | template void PrintResults(T *results, uint n); 34 | 35 | class Graph { 36 | public: 37 | // Input input; 38 | // vector tasks; 39 | // ulong memRequest; 40 | // ulong memRequest_d; 41 | string graphFilePath; 42 | int napp; 43 | void *gr_ptr; 44 | edge_t *map_xadj; 45 | vtx_t *map_adjncy; 46 | weight_t *map_adjwgt; 47 | size_t filesize; 48 | 49 | bool hasZeroID; 50 | uint64_t numNode; 51 | uint64_t numEdge; 52 | // std::vector edges; 53 | std::vector weights; 54 | uint64_t sizeEdgeTy; 55 | 56 | // graph 57 | // vtx_t *vwgt_d,*vwgt ; 58 | edge_t *xadj, *xadj_d; 59 | vtx_t *adjncy, *adjncy_d; 60 | weight_t *adjwgt, *adjwgt_d; 61 | uint *inDegree; 62 | uint *outDegree; 63 | bool weighted; 64 | bool withWeight; 65 | 66 | 67 | // scheduler-specific 68 | // uint assigned_sm; 69 | // int device = 0; 70 | // uint64_t gmem_used = 0; 71 | // uint64_t um_used = 0; 72 | 73 | Graph(); 74 | ~Graph() { 75 | // if (!FLAGS_map) { 76 | H_ERR(cudaFree(xadj)); 77 | H_ERR(cudaFree(adjncy)); 78 | H_ERR(cudaFree(adjwgt)); 79 | // }else{ 80 | // munmap(gr_ptr, filesize); 81 | // } 82 | } 83 | 84 | 85 | void Load(); 86 | void Map(); 87 | 88 | void Process_mmap(cudaStream_t &stream); 89 | void Prepare(cudaStream_t &stream); 90 | void Transfer(cudaStream_t &stream, bool large = false); 91 | void Process(cudaStream_t &stream); 92 | void Cleanup(cudaStream_t &stream); 93 | 94 | void Process_single(cudaStream_t &stream); 95 | void Process_one_by_one(cudaStream_t &stream, bool large_graph = false); 96 | void Process_multi_thread(); 97 | void Set_Mem_Policy(cudaStream_t &stream, bool needWeight = false); 98 | void gk_fclose(FILE *fp) { fclose(fp); } 99 | 100 | FILE *gk_fopen(const char *fname, const char *mode, const char *msg) { 101 | FILE *fp; 102 | char errmsg[8192]; 103 | fp = fopen(fname, mode); 104 | if (fp != NULL) 105 | return fp; 106 | sprintf(errmsg, "file: %s, mode: %s, [%s]", fname, mode, msg); 107 | perror(errmsg); 108 | printf("Failed on gk_fopen()\n"); 109 | return NULL; 110 | } 111 | 112 | void ReadGraphGRHead() { 113 | FILE *fpin; 114 | bool readew; 115 | fpin = gk_fopen(graphFilePath.data(), "r", "ReadGraphGR: Graph"); 116 | size_t read; 117 | uint64_t x[4]; 118 | if (fread(x, sizeof(uint64_t), 4, fpin) != 4) { 119 | printf("Unable to read header\n"); 120 | } 121 | if (x[0] != 1) /* version */ 122 | printf("Unknown file version\n"); 123 | sizeEdgeTy = x[1]; 124 | // uint64_t sizeEdgeTy = le64toh(x[1]); 125 | numNode = x[2]; 126 | numEdge = x[3]; 127 | weighted = (bool)sizeEdgeTy; 128 | gk_fclose(fpin); 129 | } 130 | 131 | void ReadGraphGR() { 132 | // uint *vsize; 133 | FILE *fpin; 134 | bool readew; 135 | cout<, 9 | BufferType::SHMEM>; //, AliasTableStorePolicy::STORE 10 | WCTable *tables = (WCTable *)buffer; 11 | WCTable *table = &tables[WID]; 12 | 13 | bool not_all_zero = 14 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 15 | ggraph->getDegree(node_id), current_itr, node_id); 16 | if (not_all_zero) { 17 | table->construct(); 18 | table->SaveAliasTable(ggraph); 19 | if (LID == 0) ggraph->SetValid(node_id); 20 | } 21 | table->Clean(); 22 | } 23 | 24 | __device__ void ConstructBlockCentic(Sampler *sampler, sample_result &result, 25 | gpu_graph *ggraph, curandState state, 26 | int current_itr, int node_id, void *buffer, 27 | Vector_pack2 *vector_packs) { 28 | using BCTable = 29 | alias_table_constructor_shmem; 31 | BCTable *tables = (BCTable *)buffer; 32 | BCTable *table = &tables[0]; 33 | table->loadGlobalBuffer(vector_packs); 34 | __syncthreads(); 35 | bool not_all_zero = 36 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 37 | ggraph->getDegree(node_id), current_itr, node_id); 38 | __syncthreads(); 39 | if (not_all_zero) { 40 | table->constructBC(); 41 | table->SaveAliasTable(ggraph); 42 | if (LTID == 0) ggraph->SetValid(node_id); 43 | } 44 | __syncthreads(); 45 | table->Clean(); 46 | } 47 | 48 | __global__ void ConstructAliasTableKernel(Sampler *sampler, 49 | Vector_pack2 *vector_pack) { 50 | sample_result &result = sampler->result; 51 | gpu_graph *ggraph = &sampler->ggraph; 52 | Vector_pack2 *vector_packs = &vector_pack[BID]; 53 | using WCTable = alias_table_constructor_shmem< 54 | uint, thread_block_tile<32>, 55 | BufferType::SHMEM>; //, AliasTableStorePolicy::STORE 56 | __shared__ WCTable table[WARP_PER_BLK]; 57 | void *buffer = &table[0]; 58 | curandState state; 59 | curand_init(TID, 0, 0, &state); 60 | 61 | __shared__ uint current_itr; 62 | if (threadIdx.x == 0) current_itr = 0; 63 | __syncthreads(); 64 | 65 | Vector_gmem *high_degrees = &sampler->result.high_degrees[0]; 66 | 67 | sample_job job; 68 | __threadfence_block(); 69 | if (LID == 0) job = result.requireOneJob(current_itr); 70 | __syncwarp(FULL_WARP_MASK); 71 | job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0); 72 | job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0); 73 | job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0); 74 | __syncwarp(FULL_WARP_MASK); 75 | while (job.val) { 76 | if (ggraph->getDegree(job.node_id) < ELE_PER_WARP) { 77 | ConstructWarpCentic(sampler, result, ggraph, state, current_itr, job.idx, 78 | job.node_id, buffer); 79 | } else { 80 | if (LID == 0) result.AddHighDegree(current_itr, job.node_id); 81 | } 82 | __syncwarp(FULL_WARP_MASK); 83 | if (LID == 0) job = result.requireOneJob(current_itr); 84 | job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0); 85 | job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0); 86 | job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0); 87 | } 88 | __syncthreads(); 89 | __shared__ sample_job high_degree_job; 90 | if (LTID == 0) { 91 | job = result.requireOneHighDegreeJob(current_itr); 92 | high_degree_job.val = job.val; 93 | high_degree_job.node_id = job.node_id; 94 | } 95 | __syncthreads(); 96 | while (high_degree_job.val) { 97 | ConstructBlockCentic(sampler, result, ggraph, state, current_itr, 98 | high_degree_job.node_id, buffer, 99 | vector_packs); // buffer_pointer 100 | // __syncthreads(); 101 | if (LTID == 0) { 102 | job = result.requireOneHighDegreeJob(current_itr); 103 | high_degree_job.val = job.val; 104 | high_degree_job.node_id = job.node_id; 105 | } 106 | __syncthreads(); 107 | } 108 | } 109 | __global__ void PrintTable(Sampler *sampler) { 110 | if (TID == 0) { 111 | printf("\nprob:\n"); 112 | printD(sampler->prob_array, 100); 113 | printf("\nalias:\n"); 114 | printD(sampler->alias_array, 100); 115 | } 116 | } 117 | 118 | // todo offset 119 | float ConstructTable(Sampler &sampler, uint ngpu, uint index) { 120 | LOG("%s\n", __FUNCTION__); 121 | int device; 122 | cudaDeviceProp prop; 123 | cudaGetDevice(&device); 124 | cudaGetDeviceProperties(&prop, device); 125 | int n_sm = prop.multiProcessorCount; 126 | 127 | sampler.AllocateAliasTablePartial(ngpu, index); 128 | 129 | // paster(sizeof(alias_table_constructor_shmem, 130 | // BufferType::SHMEM>)); 131 | // paster(sizeof( 132 | // alias_table_constructor_shmem)); 134 | 135 | Sampler *sampler_ptr; 136 | MyCudaMalloc(&sampler_ptr, sizeof(Sampler)); 137 | CUDA_RT_CALL(cudaMemcpy(sampler_ptr, &sampler, sizeof(Sampler), 138 | cudaMemcpyHostToDevice)); 139 | double start_time, total_time; 140 | init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr,true); 141 | 142 | // allocate global buffer 143 | int block_num = n_sm * FLAGS_m; 144 | int gbuff_size = sampler.ggraph.MaxDegree; 145 | 146 | LOG("alllocate GMEM buffer %d MB\n", 147 | block_num * gbuff_size * MEM_PER_ELE / 1024 / 1024); 148 | 149 | Vector_pack2 *vector_pack_h = new Vector_pack2[block_num]; 150 | for (size_t i = 0; i < block_num; i++) { 151 | vector_pack_h[i].Allocate(gbuff_size, index); 152 | } 153 | CUDA_RT_CALL(cudaDeviceSynchronize()); 154 | Vector_pack2 *vector_packs; 155 | CUDA_RT_CALL( 156 | MyCudaMalloc(&vector_packs, sizeof(Vector_pack2) * block_num)); 157 | CUDA_RT_CALL(cudaMemcpy(vector_packs, vector_pack_h, 158 | sizeof(Vector_pack2) * block_num, 159 | cudaMemcpyHostToDevice)); 160 | 161 | // Global_buffer 162 | CUDA_RT_CALL(cudaDeviceSynchronize()); 163 | start_time = wtime(); 164 | #ifdef check 165 | ConstructAliasTableKernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs); 166 | #else 167 | ConstructAliasTableKernel<<>>(sampler_ptr, 168 | vector_packs); 169 | #endif 170 | CUDA_RT_CALL(cudaDeviceSynchronize()); 171 | // CUDA_RT_CALL(cudaPeekAtLastError()); 172 | total_time = wtime() - start_time; 173 | LOG("Construct table time:\t%.6f\n", total_time); 174 | // paster(FLAGS_hmgraph); 175 | if ((FLAGS_weight || FLAGS_randomweight) && (!FLAGS_hmgraph)) { 176 | CUDA_RT_CALL(cudaFree(sampler.ggraph.adjwgt)); 177 | } 178 | return total_time; 179 | } 180 | -------------------------------------------------------------------------------- /src/online_sample.cu: -------------------------------------------------------------------------------- 1 | #include "app.cuh" 2 | 3 | static __device__ void SampleWarpCentic(sample_result &result, 4 | gpu_graph *ggraph, curandState state, 5 | int current_itr, int idx, int node_id, 6 | void *buffer) { 7 | alias_table_constructor_shmem> *tables = 8 | (alias_table_constructor_shmem> *)buffer; 9 | alias_table_constructor_shmem> *table = 10 | &tables[WID]; 11 | bool not_all_zero = 12 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 13 | ggraph->getDegree(node_id), current_itr, node_id); 14 | if (not_all_zero) { 15 | table->construct(); 16 | table->roll_atomic(&state, result); 17 | } 18 | table->Clean(); 19 | } 20 | 21 | static __device__ void SampleBlockCentic(sample_result &result, 22 | gpu_graph *ggraph, curandState state, 23 | int current_itr, int node_id, 24 | void *buffer, 25 | Vector_pack *vector_packs) { 26 | alias_table_constructor_shmem *tables = 27 | (alias_table_constructor_shmem *) 28 | buffer; 29 | alias_table_constructor_shmem *table = 30 | &tables[0]; 31 | table->loadGlobalBuffer(vector_packs); 32 | __syncthreads(); 33 | bool not_all_zero = 34 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 35 | ggraph->getDegree(node_id), current_itr, node_id); 36 | __syncthreads(); 37 | if (not_all_zero) { 38 | table->constructBC(); 39 | uint target_size = 40 | MIN(ggraph->getDegree(node_id), result.hops[current_itr + 1]); 41 | table->roll_atomic(target_size, &state, result); 42 | } 43 | __syncthreads(); 44 | table->Clean(); 45 | } 46 | 47 | __global__ void sample_kernel(Sampler *sampler, 48 | Vector_pack *vector_pack) { 49 | sample_result &result = sampler->result; 50 | gpu_graph *ggraph = &sampler->ggraph; 51 | Vector_pack *vector_packs = &vector_pack[BID]; 52 | __shared__ alias_table_constructor_shmem> 53 | table[WARP_PER_BLK]; 54 | void *buffer = &table[0]; 55 | curandState state; 56 | curand_init(TID, 0, 0, &state); 57 | 58 | __shared__ uint current_itr; 59 | if (threadIdx.x == 0) current_itr = 0; 60 | __syncthreads(); 61 | for (; current_itr < result.hop_num - 1;) // for 2-hop, hop_num=3 62 | { 63 | // Vector_gmem *high_degrees = 64 | // &sampler->result.high_degrees[current_itr]; 65 | sample_job job; 66 | __threadfence_block(); 67 | if (LID == 0) job = result.requireOneJob(current_itr); 68 | __syncwarp(FULL_WARP_MASK); 69 | job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0); 70 | job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0); 71 | job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0); 72 | __syncwarp(FULL_WARP_MASK); 73 | while (job.val) { 74 | if (ggraph->getDegree(job.node_id) < ELE_PER_WARP) { 75 | SampleWarpCentic(result, ggraph, state, current_itr, job.idx, 76 | job.node_id, buffer); 77 | } else { 78 | #ifdef skip8k 79 | if (LID == 0 && ggraph->getDegree(job.node_id) < 8000) 80 | #else 81 | if (LID == 0) 82 | #endif // skip8k 83 | result.AddHighDegree(current_itr, job.node_id); 84 | } 85 | __syncwarp(FULL_WARP_MASK); 86 | if (LID == 0) job = result.requireOneJob(current_itr); 87 | job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0); 88 | job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0); 89 | job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0); 90 | } 91 | __syncthreads(); 92 | __shared__ sample_job high_degree_job; 93 | if (LTID == 0) { 94 | job = result.requireOneHighDegreeJob(current_itr); 95 | high_degree_job.val = job.val; 96 | high_degree_job.node_id = job.node_id; 97 | } 98 | __syncthreads(); 99 | while (high_degree_job.val) { 100 | SampleBlockCentic(result, ggraph, state, current_itr, 101 | high_degree_job.node_id, buffer, 102 | vector_packs); // buffer_pointer 103 | __syncthreads(); 104 | if (LTID == 0) { 105 | job = result.requireOneHighDegreeJob(current_itr); 106 | high_degree_job.val = job.val; 107 | high_degree_job.node_id = job.node_id; 108 | } 109 | __syncthreads(); 110 | } 111 | __syncthreads(); 112 | if (threadIdx.x == 0) { 113 | // while (!result.checkFinish(current_itr)) 114 | // { 115 | // printf("waiting "); 116 | // } 117 | result.NextItr(current_itr); 118 | } 119 | __syncthreads(); 120 | } 121 | } 122 | 123 | static __global__ void print_result(Sampler *sampler) { 124 | sampler->result.PrintResult(); 125 | } 126 | 127 | #include "date.h" 128 | // void Start_high_degree(Sampler sampler) 129 | float OnlineGBSample(Sampler &sampler) { 130 | // orkut max degree 932101 131 | 132 | LOG("%s\n", __FUNCTION__); 133 | #ifdef skip8k 134 | LOG("skipping 8k\n"); 135 | #endif // skip8k 136 | 137 | int device; 138 | cudaDeviceProp prop; 139 | cudaGetDevice(&device); 140 | cudaGetDeviceProperties(&prop, device); 141 | int n_sm = prop.multiProcessorCount; 142 | 143 | Sampler *sampler_ptr; 144 | MyCudaMalloc(&sampler_ptr, sizeof(Sampler)); 145 | CUDA_RT_CALL(cudaMemcpy(sampler_ptr, &sampler, sizeof(Sampler), 146 | cudaMemcpyHostToDevice)); 147 | double start_time, total_time; 148 | init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr, true); 149 | 150 | // allocate global buffer 151 | int block_num = n_sm * FLAGS_m; 152 | int gbuff_size = sampler.ggraph.MaxDegree; 153 | 154 | LOG("alllocate GMEM buffer %d MB\n", 155 | block_num * gbuff_size * MEM_PER_ELE / 1024 / 1024); 156 | 157 | Vector_pack *vector_pack_h = new Vector_pack[block_num]; 158 | for (size_t i = 0; i < block_num; i++) { 159 | vector_pack_h[i].Allocate(gbuff_size, sampler.device_id); 160 | } 161 | CUDA_RT_CALL(cudaDeviceSynchronize()); 162 | #pragma omp barrier 163 | Vector_pack *vector_packs; 164 | CUDA_RT_CALL( 165 | MyCudaMalloc(&vector_packs, sizeof(Vector_pack) * block_num)); 166 | CUDA_RT_CALL(cudaMemcpy(vector_packs, vector_pack_h, 167 | sizeof(Vector_pack) * block_num, 168 | cudaMemcpyHostToDevice)); 169 | 170 | // Global_buffer 171 | CUDA_RT_CALL(cudaDeviceSynchronize()); 172 | start_time = wtime(); 173 | #ifdef check 174 | sample_kernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs); 175 | #else 176 | // using namespace date; 177 | // using namespace std::chrono; 178 | // std::cout << "start: " << system_clock::now() << '\n'; 179 | sample_kernel<<>>(sampler_ptr, vector_packs); 180 | #endif 181 | CUDA_RT_CALL(cudaDeviceSynchronize()); 182 | // std::cout << "end: " << system_clock::now() << '\n'; 183 | // CUDA_RT_CALL(cudaPeekAtLastError()); 184 | total_time = wtime() - start_time; 185 | #pragma omp barrier 186 | LOG("Device %d sampling time:\t%.2f ms ratio:\t %.1f MSEPS\n", 187 | omp_get_thread_num(), total_time * 1000, 188 | static_cast(sampler.result.GetSampledNumber() / total_time / 189 | 1000000)); 190 | sampler.sampled_edges = sampler.result.GetSampledNumber(); 191 | LOG("sampled_edges %d\n", sampler.sampled_edges); 192 | if (FLAGS_printresult) print_result<<<1, 32, 0, 0>>>(sampler_ptr); 193 | CUDA_RT_CALL(cudaDeviceSynchronize()); 194 | return total_time; 195 | } 196 | -------------------------------------------------------------------------------- /include/roller.cuh: -------------------------------------------------------------------------------- 1 | // #include "gpu_graph.cuh" 2 | // #include "kernel.cuh" 3 | // #include "sampler_result.cuh" 4 | // #include "util.cuh" 5 | // #include "vec.cuh" 6 | 7 | // template struct alias_table_roller_shmem; 8 | 9 | // template struct alias_table_roller_shmem { 10 | // uint size; 11 | // uint current_itr; 12 | // gpu_graph *ggraph; 13 | // int src_id; 14 | // uint src_degree; 15 | 16 | // Vector_virtual alias; 17 | // Vector_virtual prob; 18 | // // Vector_shmem 20 | // // selected; 21 | // // Vector_gmem selected_high_degree; 22 | 23 | // // __device__ bool loadGlobalBuffer(Vector_pack_short *pack) { 24 | // // if (LID == 0) { 25 | // // selected_high_degree = pack->selected; 26 | // // } 27 | // // } 28 | 29 | // __device__ bool SetVirtualVector(gpu_graph *graph) { 30 | // alias.Construt(graph->alias_array + graph->xadj[src_id], 31 | // graph->getDegree((uint)src_id)); 32 | // prob.Construt(graph->prob_array + graph->xadj[src_id], 33 | // graph->getDegree((uint)src_id)); 34 | // } 35 | 36 | // __host__ __device__ uint Size() { return size; } 37 | // __device__ void loadFromGraph(T *_ids, gpu_graph *graph, int _size, 38 | // uint _current_itr, int _src_id) { 39 | // ggraph = graph; 40 | // current_itr = _current_itr; 41 | // size = _size; 42 | // // ids = _ids; 43 | // src_id = _src_id; 44 | // src_degree = graph->getDegree((uint)_src_id); 45 | // // weights = _weights; 46 | // SetVirtualVector(graph); 47 | // Init(src_degree); 48 | // } 49 | // __device__ void Init(uint sz) { 50 | // alias.Init(sz); 51 | // prob.Init(sz); 52 | // } 53 | // __device__ void roll_atomic(T *array, curandState *local_state, 54 | // sample_result result) { 55 | // uint target_size = result.hops[current_itr + 1]; 56 | // if ((target_size > 0) && (target_size < src_degree)) { 57 | // // int itr = 0; 58 | // for (size_t i = 0; i < target_size; i++) { 59 | // int col = (int)floor(curand_uniform(local_state) * size); 60 | // float p = curand_uniform(local_state); 61 | // uint candidate; 62 | // if (p < prob[col]) 63 | // candidate = col; 64 | // else 65 | // candidate = alias[col]; 66 | // result.AddActive(current_itr, array, 67 | // ggraph->getOutNode(src_id, candidate)); 68 | // } 69 | // } else if (target_size >= src_degree) { 70 | // for (size_t i = 0; i < src_degree; i++) { 71 | // result.AddActive(current_itr, array, ggraph->getOutNode(src_id, i)); 72 | // } 73 | // } 74 | // } 75 | // }; 76 | 77 | // template struct alias_table_roller_shmem { 78 | // uint size; 79 | // // float weight_sum; 80 | // // T *ids; 81 | // // float *weights; 82 | // uint current_itr; 83 | // gpu_graph *ggraph; 84 | // int src_id; 85 | // uint src_degree; 86 | 87 | // Vector_virtual alias; 88 | // Vector_virtual prob; 89 | // Vector_shmem 90 | // selected; 91 | // Vector_gmem selected_high_degree; 92 | 93 | // __device__ void loadGlobalBuffer(Vector_pack_short *pack) { 94 | // if (LID == 0) { 95 | // selected_high_degree = pack->selected; 96 | // } 97 | // } 98 | 99 | // __device__ bool SetVirtualVector(gpu_graph *graph) { 100 | // alias.Construt(graph->alias_array + graph->xadj[src_id], 101 | // graph->getDegree((uint)src_id)); 102 | // prob.Construt(graph->prob_array + graph->xadj[src_id], 103 | // graph->getDegree((uint)src_id)); 104 | // } 105 | 106 | // __host__ __device__ uint Size() { return size; } 107 | // __device__ void loadFromGraph(T *_ids, gpu_graph *graph, int _size, 108 | // uint _current_itr, int _src_id) { 109 | // if (LID == 0) { 110 | // ggraph = graph; 111 | // current_itr = _current_itr; 112 | // size = _size; 113 | // // ids = _ids; 114 | // src_id = _src_id; 115 | // src_degree = graph->getDegree((uint)_src_id); 116 | // // weights = _weights; 117 | // SetVirtualVector(graph); 118 | // Init(src_degree); 119 | // } 120 | 121 | // __syncwarp(FULL_WARP_MASK); 122 | // active_size(__LINE__); 123 | // } 124 | // __device__ void Init(uint sz) { 125 | // alias.Init(sz); 126 | // prob.Init(sz); 127 | // selected.Init(sz); 128 | // selected_high_degree.Init(sz); 129 | // } 130 | // __device__ void Clean() { 131 | // // if (LID == 0) { 132 | // // alias.Clean(); 133 | // // prob.Clean(); 134 | // selected.Clean(); 135 | // // } 136 | // selected_high_degree.CleanWC(); 137 | // // selected_high_degree.CleanDataWC(); //! todo using GMEM per warp 138 | // } 139 | // __device__ void roll_atomic(T *array, curandState *state, 140 | // sample_result result) { 141 | // coalesced_group active = coalesced_threads(); 142 | // active.sync(); 143 | // active_size(__LINE__); 144 | // // if (LID == 0) { 145 | // // printf("%s \n", __FUNCTION__); 146 | // // } 147 | // // __syncwarp(FULL_WARP_MASK); 148 | // active.sync(); 149 | // // curandState state; 150 | // // paster(current_itr); 151 | // uint target_size = result.hops[current_itr + 1]; 152 | // if ((target_size > 0) && (target_size < src_degree)) { 153 | // int itr = 0; 154 | // __shared__ uint sizes[WARP_PER_BLK]; 155 | // uint *local_size = sizes + WID; 156 | // if (LID == 0) 157 | // *local_size = 0; 158 | // // __syncwarp(FULL_WARP_MASK); 159 | // // if (LID == 0) { 160 | // // paster(*local_size); 161 | // // paster(target_size); 162 | // // } 163 | // // __syncwarp(FULL_WARP_MASK); 164 | // active.sync(); 165 | // active_size(__LINE__); 166 | // while (*local_size < target_size) { 167 | // active_size(__LINE__); 168 | // for (size_t i = *local_size + LID; 169 | // i < 32 * (target_size / 32 + 1); // 32 * (target_size / 32 + 1) 170 | // i += 32) { 171 | // active_size(__LINE__); 172 | // roll_once(array, local_size, state, target_size, result); 173 | // } 174 | // // __syncwarp(FULL_WARP_MASK); 175 | // active.sync(); 176 | // itr++; 177 | // if (itr > 10) { 178 | // break; 179 | // } 180 | // } 181 | // active.sync(); 182 | // } else if (target_size >= src_degree) { 183 | // for (size_t i = LID; i < src_degree; i += 32) { 184 | // result.AddActive(current_itr, array, ggraph->getOutNode(src_id, i)); 185 | // } 186 | // } 187 | // } 188 | 189 | // __device__ void roll_once(T *array, uint *local_size, 190 | // curandState *local_state, size_t target_size, 191 | // sample_result result) { 192 | // if (LID == 0) 193 | // printf("%s \n", __FUNCTION__); 194 | // int col = (int)floor(curand_uniform(local_state) * size); 195 | // float p = curand_uniform(local_state); 196 | // uint candidate; 197 | // if (p < prob[col]) 198 | // candidate = col; 199 | // else 200 | // candidate = alias[col]; 201 | // unsigned short int updated = true; 202 | // // if (src_degree <= ELE_PER_WARP) 203 | // // updated = atomicCAS(&selected[candidate], (unsigned short int)0, 204 | // // (unsigned short int)1); 205 | // // else { 206 | // // updated = atomicCAS(&selected_high_degree[candidate], 207 | // // (unsigned short int)0, (unsigned short int)1); 208 | // // } 209 | // if (!updated) { 210 | // if (AddTillSize(local_size, target_size)) { 211 | // result.AddActive(current_itr, array, 212 | // ggraph->getOutNode(src_id, candidate)); 213 | // } 214 | // // return true; 215 | // } 216 | // // else 217 | // // return false; 218 | // } 219 | // }; -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | ### 2 | # @Description: 3 | # @Date: 2020-11-17 13:39:45 4 | # @LastEditors: PengyuWang 5 | # @LastEditTime: 2021-01-12 20:16:14 6 | # @FilePath: /skywalker/scripts/test.sh 7 | ### 8 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010 9 | HD=(0.25 0.5 1 0.25 0.25 0.5 1) # uk-union rmat29 web-ClueWeb09) 10 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180) 11 | # HD=(4 2 1 4 4 2 1) # uk-union rmat29 web-ClueWeb09) 12 | 13 | # DATA=( sk-2005 friendster) 14 | # HD=( 4 1 ) 15 | ITR=1 16 | NG=4 17 | 18 | GR=".w.gr" 19 | EXE="./bin/main" #main_degree 20 | SG="--ngpu=1 --s" 21 | 22 | # node2vec always online 23 | # export OMP_PROC_BIND=TRUE 24 | # GOMP_CPU_AFFINITY="0-9 10-19 20-29 30-99" 25 | # OMP_PLACES=cores 26 | # OMP_PROC_BIND=close 27 | 28 | # correct one 29 | # OMP_PLACES=cores OMP_PROC_BIND=spread 30 | 31 | # --randomweight=1 --weightrange=2 32 | 33 | 34 | # walker 35 | # echo "-------------------------------------------------------unbias rw 100" 36 | # for idx in $(seq 1 ${#DATA[*]}) 37 | # do 38 | # ./bin/main --rw=1 --k 1 --d 100 --ol=1 --bias=0 --input ~/data/${DATA[idx-1]}${GR} -v --ngpu 1 --full --umresult 1 --umbuf 1 39 | # done 40 | 41 | # walker 42 | # echo "-------------------------------------------------------offline ppr 0.15 4k" 43 | # for idx in $(seq 1 ${#DATA[*]}) 44 | # do 45 | # ./bin/main --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} -bias=0 --rw=1 --n=40000 --k 1 --d 100 --tp=0.15 --ngpu 1 --umgraph=0 --umresult=0 --umbuf=0 --weight=0 46 | # done 47 | 48 | 49 | # echo "-------------------------------------------------------online layer sampling 4k 100" 50 | # for idx in $(seq 1 ${#DATA[*]}) 51 | # do 52 | # ./bin/main --rw=0 --k 1 --d 100 --ol=1 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} 53 | # done 54 | 55 | 56 | # echo "-------------------------------------------------------online walkload 4k---------------------" 57 | # echo "-------------------------------------------------------online walk 4k 100" 58 | # for idx in $(seq 1 ${#DATA[*]}) 59 | # do 60 | # for i in $(seq 1 ${ITR}) 61 | # do 62 | # ./bin/main --k 1 --d 100 --rw=1 --ol=1 --n=4000 --input ~/data/${DATA[idx-1]}${GR} ${SG} -v 63 | # done 64 | # done 65 | 66 | # echo "-------------------------------------------------------online ppr 0.15" 67 | # for idx in $(seq 1 ${#DATA[*]}) 68 | # do 69 | # ./bin/main -bias=1 --rw=1 --ol=1 --n=4000 --k 1 --d 100 --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} ${SG} --hd=${HD[idx-1]} 70 | # done 71 | 72 | # echo "-------------------------------------------------------online sample 4k 20,2" 73 | # for idx in $(seq 1 ${#DATA[*]}) 74 | # do 75 | # for i in $(seq 1 ${ITR}) 76 | # do 77 | # ./bin/main --k 20 --d 2 --ol=1 --n=4000 --input ~/data/${DATA[idx-1]}${GR} ${SG} --hd=${HD[idx-1]} 78 | # done 79 | # done 80 | 81 | # echo "-------------------------------------------------------online node2vec 4000" 82 | # for idx in $(seq 1 ${#DATA[*]}) 83 | # do 84 | # ./bin/node2vec --node2vec --ol=1 --bias=1 --d 100 --n=4000 --ngpu=4 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} 85 | # done 86 | 87 | # echo "-------------------------------------------------------online sage 4k 25,10" 88 | # for idx in $(seq 1 ${#DATA[*]}) 89 | # do 90 | # ./bin/main --sage=1 --ol=1 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} 91 | # done 92 | 93 | 94 | # ---------------------- 95 | 96 | # echo "-------------------------------------------------------offline table" 97 | # for idx in $(seq 1 ${#DATA[*]}) 98 | # do 99 | # for i in $(seq 1 ${ITR}) 100 | # do 101 | # ./bin/main --ol=0 --ngpu=1 --s --rw=1 --k=1 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} 102 | # done 103 | # done 104 | 105 | 106 | # echo "---------------------------------scale ------------------------------" 107 | 108 | # echo "-------------------------------------------------------online node2vec" 109 | # for idx in $(seq 1 ${#DATA[*]}) 110 | # do 111 | # for i in $(seq 1 ${NG}) 112 | # do 113 | # ./bin/node2vec --node2vec --ol=1 --bias=1 --d 100 --n=400000 --input ~/data/${DATA[idx-1]}${GR} --ngpu=$i --s --hd=${HD[idx-1]} 114 | # done 115 | # done 116 | 117 | 118 | 119 | # echo "-------------------------------------------------------unbias rw 100" 120 | # for idx in $(seq 1 ${#DATA[*]}) 121 | # do 122 | # ./bin/main --rw=1 --k 1 --d 100 --ol=1 --bias=0 --input ~/data/${DATA[idx-1]}${GR} -v --ngpu 1 --full --umresult 1 --umbuf 1 123 | # done 124 | 125 | # echo "-------------------------------------------------------unbias rw 100" 126 | # for idx in $(seq 1 ${#DATA[*]}) 127 | # do 128 | # for i in $(seq 1 ${NG}) 129 | # do 130 | # ./bin/main --rw=1 --k 1 --d 100 --bias=0 --ol=0 --n=400000 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=$i --s 131 | # done 132 | # done 133 | 134 | # echo "-------------------------------------------------------offline ppr 0.15" 135 | # for idx in $(seq 1 ${#DATA[*]}) 136 | # do 137 | # for i in $(seq 1 ${NG}) 138 | # do 139 | # ./bin/main -bias=1 --rw=1 --ol=0 --n=400000 --k 1 --d 100 --tp=0.15 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=$i --s 140 | # done 141 | # done 142 | 143 | # echo "comparing with csaw" 144 | # echo "-------------------------------------------------------offline walk 100" 145 | # for idx in $(seq 1 ${#DATA[*]}) 146 | # do 147 | # for i in $(seq 1 ${NG}) 148 | # do 149 | # ./bin/main --k 1 --d 100 --rw=1 --ol=0 --n=400000 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=$i --s 150 | # done 151 | # done 152 | 153 | # echo "-------------------------------------------------------offline sample 20,2" 154 | # for idx in $(seq 1 ${#DATA[*]}) 155 | # do 156 | # for i in $(seq 1 ${NG}) 157 | # do 158 | # ./bin/main --k 20 --d 2 --ol=0 --n=400000 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=$i --s 159 | # done 160 | # done 161 | 162 | # echo "-------------------------------------------------------offline sample 40k 2,2" 163 | # for idx in $(seq 1 ${#DATA[*]}) 164 | # do 165 | # for i in $(seq 1 ${NG}) 166 | # do 167 | # ./bin/main --k 2 --d 2 --ol=0 --n=40000 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=$i --s 168 | # done 169 | # done 170 | 171 | # echo "-------------------------------------------------------offline sample 40k 2,2" 172 | # for idx in $(seq 1 ${#DATA[*]}) 173 | # do 174 | # ./bin/main --k 2 --d 2 --ol=0 --n=40000 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=4 --bias=1 175 | # done 176 | 177 | # echo "-------------------------------------------------------offline rw |V| 100. no weight" 178 | # for idx in $(seq 1 ${#DATA[*]}) 179 | # do 180 | # ./bin/main --rw=1 --k 1 --d 100 --ol=1 --bias=0 --full --input ~/data/${DATA[idx-1]}${GR} 181 | # done 182 | 183 | 184 | # echo "-------------------------------------------------------offline sample |V| 2,2" 185 | # for idx in $(seq 1 ${#DATA[*]}) 186 | # do 187 | # ./bin/main --rw=0 --k 2 --d 2 --ol=0 --input ~/data/${DATA[idx-1]}${GR} 188 | # done 189 | 190 | # echo "-------------------------------------------------------offline rw 4k 100" 191 | # for idx in $(seq 1 ${#DATA[*]}) 192 | # do 193 | # for ng in $(seq 1 4) 194 | # do 195 | # ./bin/main --rw=1 --k 1 --d 100 --ol=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=${ng} --hd=${HD[idx-1]} --n=40000 196 | # done 197 | # done 198 | 199 | # echo "-------------------------------------------------------offline 4k 10 10 " 200 | # for idx in $(seq 1 ${#DATA[*]}) 201 | # do 202 | # for ng in $(seq 1 4) 203 | # do 204 | # ./bin/main --rw=0 --k 10 --d 2 --ol=0 --input ~/data/${DATA[idx-1]}${GR} --ngpu=${ng} --hd=${HD[idx-1]} --n=4000 205 | # done 206 | # done 207 | 208 | 209 | # /////////////////// 210 | # |V| hard 211 | # echo "-------------------------------------------------------offline rw |V| 100" 212 | # for idx in $(seq 1 ${#DATA[*]}) 213 | # do 214 | # ./bin/main --rw=1 --k 1 --d 100 --ol=0 --full --input ~/data/${DATA[idx-1]}${GR} 215 | # done 216 | 217 | 218 | idx=1 219 | # echo "-------------------------------------------------------online walk for table time" 220 | # for i in $(seq 1 10) 221 | # do 222 | # val=`expr ${NV[${idx}-1]} / 100 \* ${i} \* 1` 223 | # echo "----------${val}" 224 | # ./bin/main --k 1 --d 100 --rw=1 --ol=1 --n=${val} --input ~/data/${DATA[idx-1]}${GR} ${SG} 225 | # done 226 | 227 | echo "-------------------------------------------------------offline walk for table time" 228 | for i in $(seq 1 10) 229 | do 230 | val=`expr ${NV[$idx-1]} / 100 \* ${i} \* 1` 231 | echo "----------${val}" 232 | ./bin/main --k 1 --d 100 --rw=1 --ol=0 --n=${val} --input ~/data/${DATA[idx-1]}${GR} ${SG} 233 | done 234 | -------------------------------------------------------------------------------- /src/offline_walk.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * @Description: just perform RW 3 | * @Date: 2020-11-30 14:30:06 4 | * @LastEditors: Pengyu Wang 5 | * @LastEditTime: 2022-03-03 22:46:47 6 | * @FilePath: /skywalker/src/offline_walk.cu 7 | */ 8 | #include "app.cuh" 9 | 10 | __global__ void sample_kernel_static_buffer(Walker *walker) { 11 | Jobs_result &result = walker->result; 12 | gpu_graph *graph = &walker->ggraph; 13 | curandState state; 14 | curand_init(TID, 0, 0, &state); 15 | __shared__ matrixBuffer buffer; 16 | buffer.Init(); 17 | 18 | size_t idx_i = TID; 19 | if (idx_i < result.size) { 20 | result.length[idx_i] = result.hop_num - 1; 21 | uint src_id; 22 | // bool alive = true; 23 | coalesced_group warp = coalesced_threads(); 24 | for (uint current_itr = 0; current_itr < result.hop_num - 1; 25 | current_itr++) { 26 | // coalesced_group active = coalesced_threads(); 27 | if (result.alive[idx_i] != 0) { 28 | Vector_virtual alias; 29 | Vector_virtual prob; 30 | src_id = current_itr == 0 ? result.GetData(current_itr, idx_i) : src_id; 31 | uint src_degree = graph->getDegree((uint)src_id); 32 | alias.Construt( 33 | graph->alias_array + graph->xadj[src_id] - graph->local_vtx_offset, 34 | src_degree); 35 | prob.Construt( 36 | graph->prob_array + graph->xadj[src_id] - graph->local_vtx_offset, 37 | src_degree); 38 | alias.Init(src_degree); 39 | prob.Init(src_degree); 40 | const uint target_size = 1; 41 | 42 | if (target_size < src_degree) { 43 | int col = (int)floor(curand_uniform(&state) * src_degree); 44 | float p = curand_uniform(&state); 45 | uint candidate; 46 | if (p < prob[col]) 47 | candidate = col; 48 | else 49 | candidate = alias[col]; 50 | uint next_src = graph->getOutNode(src_id, candidate); 51 | // if (idx_i == 1) printf("%u adding1 %u \n", idx_i, next_src); 52 | buffer.Set(next_src); 53 | src_id = next_src; 54 | } else if (src_degree == 0) { 55 | result.alive[idx_i] = 0; 56 | result.length[idx_i] = current_itr; 57 | // buffer.Finish(); 58 | // return; 59 | } else { 60 | uint next_src = graph->getOutNode(src_id, 0); 61 | buffer.Set(next_src); 62 | src_id = next_src; 63 | // if (idx_i == 1) printf("%u adding %u \n", idx_i, next_src); 64 | } 65 | } 66 | warp.sync(); 67 | buffer.CheckFlush(result.data + result.hop_num * idx_i, current_itr, 68 | warp); 69 | } 70 | warp.sync(); 71 | buffer.Flush(result.data + result.hop_num * idx_i, 0, warp); 72 | } 73 | } 74 | // 48 kb , 404 per sampler 75 | __global__ void sample_kernel_static(Walker *walker) { 76 | Jobs_result &result = walker->result; 77 | gpu_graph *graph = &walker->ggraph; 78 | curandState state; 79 | curand_init(TID, 0, 0, &state); 80 | 81 | size_t idx_i = TID; 82 | if (idx_i < result.size) { 83 | result.length[idx_i] = result.hop_num - 1; 84 | for (uint current_itr = 0; current_itr < result.hop_num - 1; 85 | current_itr++) { 86 | if (result.alive[idx_i] != 0) { 87 | Vector_virtual alias; 88 | Vector_virtual prob; 89 | uint src_id = result.GetData(current_itr, idx_i); 90 | uint src_degree = graph->getDegree((uint)src_id); 91 | alias.Construt( 92 | graph->alias_array + graph->xadj[src_id] - graph->local_vtx_offset, 93 | src_degree); 94 | prob.Construt( 95 | graph->prob_array + graph->xadj[src_id] - graph->local_vtx_offset, 96 | src_degree); 97 | alias.Init(src_degree); 98 | prob.Init(src_degree); 99 | const uint target_size = 1; 100 | if (target_size < src_degree) { 101 | // int itr = 0; 102 | // for (size_t i = 0; i < target_size; i++) { 103 | int col = (int)floor(curand_uniform(&state) * src_degree); 104 | float p = curand_uniform(&state); 105 | uint candidate; 106 | if (p < prob[col]) 107 | candidate = col; 108 | else 109 | candidate = alias[col]; 110 | *result.GetDataPtr(current_itr + 1, idx_i) = 111 | graph->getOutNode(src_id, candidate); 112 | // } 113 | } else if (src_degree == 0) { 114 | result.alive[idx_i] = 0; 115 | result.length[idx_i] = current_itr; 116 | break; 117 | } else { 118 | *result.GetDataPtr(current_itr + 1, idx_i) = 119 | graph->getOutNode(src_id, 0); 120 | } 121 | } 122 | } 123 | } 124 | } 125 | 126 | __global__ void sample_kernel(Walker *walker) { 127 | Jobs_result &result = walker->result; 128 | gpu_graph *graph = &walker->ggraph; 129 | curandState state; 130 | curand_init(TID, 0, 0, &state); 131 | 132 | for (size_t idx_i = TID; idx_i < result.size; 133 | idx_i += gridDim.x * blockDim.x) { 134 | result.length[idx_i] = result.hop_num - 1; 135 | for (uint current_itr = 0; current_itr < result.hop_num - 1; 136 | current_itr++) { 137 | if (result.alive[idx_i] != 0) { 138 | Vector_virtual alias; 139 | Vector_virtual prob; 140 | uint src_id = result.GetData(current_itr, idx_i); 141 | uint src_degree = graph->getDegree((uint)src_id); 142 | alias.Construt( 143 | graph->alias_array + graph->xadj[src_id] - graph->local_vtx_offset, 144 | src_degree); 145 | prob.Construt( 146 | graph->prob_array + graph->xadj[src_id] - graph->local_vtx_offset, 147 | src_degree); 148 | alias.Init(src_degree); 149 | prob.Init(src_degree); 150 | const uint target_size = 1; 151 | if (target_size < src_degree) { 152 | // int itr = 0; 153 | // for (size_t i = 0; i < target_size; i++) { 154 | int col = (int)floor(curand_uniform(&state) * src_degree); 155 | float p = curand_uniform(&state); 156 | uint candidate; 157 | if (p < prob[col]) 158 | candidate = col; 159 | else 160 | candidate = alias[col]; 161 | *result.GetDataPtr(current_itr + 1, idx_i) = 162 | graph->getOutNode(src_id, candidate); 163 | // } 164 | } else if (src_degree == 0) { 165 | result.alive[idx_i] = 0; 166 | result.length[idx_i] = current_itr; 167 | break; 168 | } else { 169 | *result.GetDataPtr(current_itr + 1, idx_i) = 170 | graph->getOutNode(src_id, 0); 171 | } 172 | } 173 | } 174 | } 175 | } 176 | 177 | static __global__ void print_result(Walker *walker) { 178 | walker->result.PrintResult(); 179 | } 180 | 181 | float OfflineWalk(Walker &walker) { 182 | LOG("%s\n", __FUNCTION__); 183 | int device; 184 | cudaDeviceProp prop; 185 | cudaGetDevice(&device); 186 | cudaGetDeviceProperties(&prop, device); 187 | int n_sm = prop.multiProcessorCount; 188 | 189 | Walker *sampler_ptr; 190 | MyCudaMalloc(&sampler_ptr, sizeof(Walker)); 191 | CUDA_RT_CALL( 192 | cudaMemcpy(sampler_ptr, &walker, sizeof(Walker), cudaMemcpyHostToDevice)); 193 | double start_time, total_time; 194 | // init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr,true); 195 | BindResultKernel<<<1, 32, 0, 0>>>(sampler_ptr); 196 | // allocate global buffer 197 | int block_num = n_sm * FLAGS_m; 198 | CUDA_RT_CALL(cudaDeviceSynchronize()); 199 | CUDA_RT_CALL(cudaPeekAtLastError()); 200 | start_time = wtime(); 201 | #ifdef check 202 | sample_kernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr); 203 | #else 204 | if (FLAGS_static) { 205 | if (FLAGS_buffer) 206 | // sample_kernel_static_buffer<<<1, 32, 0, 0>>>(sampler_ptr); 207 | sample_kernel_static_buffer<<>>(sampler_ptr); 209 | else 210 | sample_kernel_static<<>>(sampler_ptr); 212 | } 213 | 214 | else 215 | sample_kernel<<>>(sampler_ptr); 216 | #endif 217 | CUDA_RT_CALL(cudaDeviceSynchronize()); 218 | // CUDA_RT_CALL(cudaPeekAtLastError()); 219 | total_time = wtime() - start_time; 220 | #pragma omp barrier 221 | LOG("Device %d sampling time:\t%.6f ratio:\t %.2f MSEPS\n", 222 | omp_get_thread_num(), total_time, 223 | static_cast(walker.result.GetSampledNumber() / total_time / 224 | 1000000)); 225 | walker.sampled_edges = walker.result.GetSampledNumber(); 226 | LOG("sampled_edges %d\n", walker.sampled_edges); 227 | if (FLAGS_printresult) print_result<<<1, 32, 0, 0>>>(sampler_ptr); 228 | CUDA_RT_CALL(cudaDeviceSynchronize()); 229 | return total_time; 230 | } 231 | -------------------------------------------------------------------------------- /old/shmem/alias_table.cuh: -------------------------------------------------------------------------------- 1 | #include "util.cuh" 2 | #include "vec.cuh" 3 | 4 | #define verbose 5 | 6 | template struct alias_table; 7 | 8 | __global__ void load_id_weight(); 9 | inline __device__ char char_atomicCAS(char *addr, char cmp, char val) { 10 | unsigned *al_addr = reinterpret_cast(((unsigned long long)addr) & 11 | (0xFFFFFFFFFFFFFFFCULL)); 12 | unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8; 13 | unsigned mask = 0xFFU; 14 | mask <<= al_offset; 15 | mask = ~mask; 16 | unsigned sval = val; 17 | sval <<= al_offset; 18 | unsigned old = *al_addr, assumed, setval; 19 | do { 20 | assumed = old; 21 | setval = assumed & mask; 22 | setval |= sval; 23 | old = atomicCAS(al_addr, assumed, setval); 24 | } while (assumed != old); 25 | return (char)((assumed >> al_offset) & 0xFFU); 26 | } 27 | 28 | template 29 | __device__ void AddTillSize(T *array, uint32_t *size, T t, u64 target_size) { 30 | u64 old = atomicAdd(size, 1); 31 | if (old < target_size) { 32 | array[old] = t; 33 | } else 34 | printf("wtf vector overflow"); 35 | } 36 | 37 | template struct alias_table_constructor_shmem { 38 | 39 | // u64 degree; 40 | u64 size; 41 | float weight_sum; 42 | T *ids; 43 | float *weights; 44 | 45 | Vector_shmem large; 46 | Vector_shmem small; 47 | Vector_shmem alias; 48 | Vector_shmem prob; 49 | 50 | // to roll 51 | Vector_shmem selected; 52 | // Vector_shmem result; 53 | 54 | // __host__ __device__ u64 &Degree() { return degree; } 55 | __host__ __device__ u64 &Size() { return size; } 56 | __device__ void load(T *_ids, float *_weights, size_t _size) { 57 | if (LID == 0) { 58 | size = _size; 59 | ids = _ids; 60 | weights = _weights; 61 | } 62 | float local_sum = 0.0, tmp; 63 | for (size_t i = LID; i < size; i += 32) { 64 | local_sum += _weights[i]; 65 | } 66 | tmp = warpReduce(local_sum, LID); 67 | // #ifdef verbose 68 | // if (LID == 0) { 69 | // weight_sum = tmp; 70 | // printf("sum: %f\n", tmp); 71 | // } 72 | // #endif // verbose 73 | normalize(); 74 | } 75 | __device__ void Init() { 76 | large.Init(); 77 | small.Init(); 78 | alias.Init(Size()); 79 | prob.Init(Size()); 80 | selected.Init(); 81 | } 82 | __device__ void normalize() { 83 | float scale = size / weight_sum; 84 | for (size_t i = LID; i < size; i += 32) { 85 | prob[i] = weights[i] * scale; 86 | } 87 | } 88 | __device__ void Clean() { 89 | if (LID == 0) { 90 | large.Clean(); 91 | small.Clean(); 92 | alias.Clean(); 93 | prob.Clean(); 94 | selected.Clean(); 95 | } 96 | } 97 | __device__ void roll_atomic(Vector v, int count) { 98 | curandState state; 99 | int itr = 1; 100 | while (v.Size() < count) { 101 | for (size_t i = v.Size() + LID; i < count; i += 32) { 102 | curand_init((unsigned long long)clock() + TID, 0, 0, &state); 103 | roll_once(v, state, count); 104 | } 105 | // break; 106 | itr++; 107 | if (itr > 10) 108 | break; 109 | // if (LID == 0) 110 | // printf("v.Size() %d count %d\n", v.Size(), count); 111 | } 112 | if (LID == 0) { 113 | printf("itr: %d till done\n", itr); 114 | } 115 | } 116 | __device__ void roll_atomic(T *array, int count) { 117 | curandState state; 118 | int itr = 1; 119 | __shared__ uint32_t sizes[WARP_PER_SM]; 120 | uint32_t *local_size = &sizes[WID]; 121 | if (LID == 0) 122 | *local_size = 0; 123 | while (*local_size < count) { 124 | for (size_t i = *local_size + LID; i < count; i += 32) { 125 | curand_init((unsigned long long)clock() + TID, 0, 0, &state); 126 | roll_once(array, local_size, state, count); 127 | } 128 | itr++; 129 | if (itr > 10) 130 | break; 131 | } 132 | if (LID == 0) { 133 | printf("itr: %d till done\n", itr); 134 | } 135 | } 136 | 137 | __device__ void roll(Vector v, int count, size_t target_size) { 138 | curandState state; 139 | for (size_t i = LID; i < count; i += 32) { 140 | curand_init((unsigned long long)clock() + TID, 0, 0, &state); 141 | bool suc = roll_once(v, state); 142 | int itr = 1; 143 | while (!suc) { 144 | curand_init((unsigned long long)clock() + TID, 0, 0, &state); 145 | // suc = roll_once(v, state); 146 | suc = roll_once(v, state, count); 147 | itr++; 148 | if (itr > 100) 149 | return; 150 | } 151 | // if (LID==0) 152 | // { 153 | // printf("itr: %d till done\n",itr); 154 | // } 155 | } 156 | } 157 | __device__ bool roll_once(T *array, uint32_t *local_size, 158 | curandState local_state, size_t target_size) { 159 | 160 | int col = (int)floor(curand_uniform(&local_state) * size); 161 | float p = curand_uniform(&local_state); 162 | // printf("tid %d col %d p %f\n", LID, col, p); 163 | int candidate; 164 | if (p < prob[col]) { 165 | candidate = col; 166 | } else { 167 | candidate = alias[col]; 168 | } 169 | char updated = char_atomicCAS(&selected[candidate], 0, 1); 170 | if (!updated) { 171 | // v.add(candidate); 172 | AddTillSize(array, local_size, candidate, target_size); 173 | // printf("tid %d suc sampled %d\n",LID, candidate); 174 | return true; 175 | } else 176 | return false; 177 | } 178 | __device__ bool roll_once(Vector v, curandState local_state, 179 | size_t target_size) { 180 | 181 | int col = (int)floor(curand_uniform(&local_state) * size); 182 | float p = curand_uniform(&local_state); 183 | // printf("tid %d col %d p %f\n", LID, col, p); 184 | int candidate; 185 | if (p < prob[col]) { 186 | candidate = col; 187 | } else { 188 | candidate = alias[col]; 189 | } 190 | char updated = char_atomicCAS(&selected[candidate], 0, 1); 191 | if (!updated) { 192 | // v.add(candidate); 193 | v.AddTillSize(candidate, target_size); 194 | // printf("tid %d suc sampled %d\n",LID, candidate); 195 | return true; 196 | } else 197 | return false; 198 | } 199 | __device__ void construct() { 200 | for (size_t i = LID; i < size; i += 32) { 201 | if (prob[i] > 1) 202 | large.Add(i); 203 | else 204 | small.Add(i); 205 | } 206 | active_size(__LINE__); 207 | if (LID == 0) { 208 | printf("large: "); 209 | printD(large.data, large.size); 210 | printf("small: "); 211 | printD(small.data, small.size); 212 | printf("prob: "); 213 | printD(prob.data, prob.size); 214 | printf("alias: "); 215 | printD(alias.data, alias.size); 216 | } 217 | int itr = 0; 218 | if (LID == 0) { 219 | prob.size = size; 220 | alias.size = size; 221 | } 222 | while (!small.Empty() && !large.Empty()) { 223 | 224 | int old_small_id = small.size - LID - 1; 225 | int old_small_size = small.size; 226 | // printf("old_small_id %d\n", old_small_id); 227 | if (old_small_id >= 0) { 228 | active_size(__LINE__); 229 | if (LID == 0) { 230 | small.size -= MIN(small.size, 32); 231 | } 232 | T smallV = small[old_small_id]; 233 | int res = old_small_id % large.size; 234 | // bool holder = (old_small_id / large.size == 0); 235 | bool holder = (LID < MIN(large.size, 32)) ? true : false; 236 | 237 | T largeV = large[large.size - res - 1]; //large.size cloud error 238 | // printf("lid %d largeV %d smallV %d holder %d\n", LID, largeV, 239 | // smallV, 240 | // holder); 241 | if (LID == 0) { 242 | large.size -= MIN(large.size, old_small_size); 243 | // printf("large.size %d min %d\n", large.size, 244 | // MIN(large.size, old_small_size)); 245 | } 246 | // todo how to ensure holder alwasy success?? 247 | float old; 248 | if (holder) 249 | old = atomicAdd(&prob[largeV], prob[smallV] - 1.0); 250 | if (!holder) 251 | old = atomicAdd(&prob[largeV], prob[smallV] - 1.0); 252 | if (old + prob[smallV] - 1.0 >= 0) { 253 | // printf("old - 1 + prob[smallV] %f\n ", old - 1.0 + prob[smallV]); 254 | // prob[smallV] = weights[smallV]; 255 | alias[smallV] = largeV; 256 | if (holder) { 257 | if (prob[largeV] < 1) 258 | small.Add(largeV); 259 | else if (prob[largeV] > 1) { 260 | // printf("add back %d %f\n", largeV, prob[largeV]); 261 | large.Add(largeV); 262 | } 263 | } 264 | } else { 265 | atomicAdd(&prob[largeV], 1 - prob[smallV]); 266 | small.Add(smallV); 267 | } 268 | } 269 | // if (LID == 0) { 270 | // printf("itr: %d\n", itr++); 271 | // printf("large: "); 272 | // printD(large.data, large.size); 273 | // printf("small: "); 274 | // printD(small.data, small.size); 275 | // printf("prob: "); 276 | // printD(prob.data, prob.size); 277 | // printf("alias: "); 278 | // printD(alias.data, alias.size); 279 | // } 280 | // if (itr == 5) 281 | // return; 282 | } 283 | } 284 | }; 285 | 286 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num, 287 | Vector out); 288 | -------------------------------------------------------------------------------- /include/frontier.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * @Date: 2022-03-10 14:04:55 3 | * @LastEditors: Pengyu Wang 4 | * @Description: 5 | * @FilePath: /skywalker/include/frontier.cuh 6 | * @LastEditTime: 2022-04-11 14:25:22 7 | */ 8 | 9 | #pragma once 10 | 11 | #include "vec.cuh" 12 | #define ADD_FRONTIER 1 13 | 14 | // #define LOCALITY 1 15 | 16 | #ifdef ADD_FRONTIER 17 | template 18 | struct sampleJob { 19 | uint instance_idx; 20 | uint offset; 21 | // uint itr; 22 | T src_id; 23 | int itr; 24 | bool val; 25 | }; 26 | 27 | template 28 | static __global__ void InitSampleFrontier(sampleJob *data, uint *seed, 29 | uint size) { 30 | if (TID < size) { 31 | sampleJob tmp = {TID, 0, seed[TID], 0, true}; 32 | data[TID] = tmp; 33 | } 34 | } 35 | template 36 | static __global__ void InitLocalitySampleFrontier(sampleJob **data, 37 | uint *seed, uint size, 38 | uint vtx_per_bucket, 39 | int *sizes) { 40 | if (TID < size) { 41 | sampleJob tmp = {TID, 0, seed[TID], 0, true}; 42 | uint bucket_idx = seed[TID] / vtx_per_bucket; 43 | size_t old = atomicAdd(&sizes[bucket_idx], 1); 44 | // assert(old < capacity[itr]); //change to ring buffer? 45 | data[bucket_idx][old] = tmp; 46 | } 47 | } 48 | template 49 | struct LocalitySampleFrontier { 50 | // sampleJob *data[bucket_num]; 51 | sampleJob **data, **data_h; 52 | int capacity; 53 | uint vtx_per_bucket; 54 | int *sizes; 55 | int *floor; 56 | int *focus; 57 | uint _bucket_num; 58 | uint size_per_bucket; 59 | bool finish; 60 | // int hop_num = depth; 61 | LocalitySampleFrontier() {} 62 | void Allocate(size_t _size, uint num_vtx) { 63 | _bucket_num = bucket_num; 64 | 65 | vtx_per_bucket = num_vtx / bucket_num + 1; 66 | 67 | assert(num_vtx != 0); 68 | assert(vtx_per_bucket != 0); 69 | capacity = _size; 70 | // CUDA_RT_CALL(MyCudaMalloc(&seed, capacity * sizeof(T))); 71 | uint length = 1; 72 | size_per_bucket = 73 | capacity * 26; // / bucket_num, hard to tell the buffer size 74 | // paster(size_per_bucket); 75 | // paster(bucket_num); 76 | data_h = new sampleJob *[bucket_num]; 77 | CUDA_RT_CALL(MyCudaMalloc(&data, bucket_num * sizeof(sampleJob *))); 78 | 79 | // printf("%s:%d %s for %d\n", __FILE__, __LINE__, __FUNCTION__, 0); 80 | for (size_t i = 0; i < bucket_num; i++) { 81 | // capacity[0] *= hops[i]; 82 | CUDA_RT_CALL( 83 | MyCudaMalloc(&data_h[i], size_per_bucket * sizeof(sampleJob))); 84 | } 85 | LOG(" frontier overhead %d MB\n ", 86 | bucket_num * size_per_bucket * sizeof(sampleJob) / 1024 / 1024); 87 | CUDA_RT_CALL(MyCudaMalloc(&sizes, bucket_num * sizeof(int))); 88 | CUDA_RT_CALL(MyCudaMalloc(&floor, bucket_num * sizeof(int))); 89 | CUDA_RT_CALL(MyCudaMalloc(&focus, sizeof(int))); 90 | 91 | CUDA_RT_CALL(cudaMemcpy(data, data_h, bucket_num * sizeof(sampleJob *), 92 | cudaMemcpyHostToDevice)); 93 | // printf("%s:%d %s for %d\n", __FILE__, __LINE__, __FUNCTION__, 0); 94 | } 95 | __host__ void Free() { 96 | CUDA_RT_CALL(cudaFree(data)); 97 | for (size_t i = 0; i < bucket_num; i++) CUDA_RT_CALL(cudaFree(data_h[i])); 98 | CUDA_RT_CALL(cudaFree(sizes)); 99 | CUDA_RT_CALL(cudaFree(floor)); 100 | CUDA_RT_CALL(cudaFree(focus)); 101 | } 102 | __host__ void Init(uint *seed, uint size) { 103 | InitLocalitySampleFrontier 104 | <<>>(data, seed, size, vtx_per_bucket, sizes); 105 | // int tmp = size; 106 | // CUDA_RT_CALL(cudaMemset(sizes, 0, bucket_num * sizeof(int))); 107 | CUDA_RT_CALL(cudaMemset(floor, 0, bucket_num * sizeof(int))); 108 | CUDA_RT_CALL(cudaMemset(focus, 0, sizeof(int))); 109 | // CUDA_RT_CALL( 110 | // cudaMemcpy(&sizes[0], &tmp, sizeof(int), cudaMemcpyHostToDevice)); 111 | } 112 | // __device__ void CheckActive(uint itr) {} 113 | __forceinline__ __device__ void Add(uint instance_idx, uint offset, uint itr, 114 | T src_id) { 115 | assert(vtx_per_bucket != 0); 116 | uint bucket_idx = src_id / (vtx_per_bucket); 117 | 118 | size_t old = atomicAdd(&sizes[bucket_idx], 1); 119 | assert(old < size_per_bucket); // change to ring buffer? 120 | 121 | sampleJob tmp = {instance_idx, offset, src_id, itr, true}; 122 | data[bucket_idx][old] = tmp; 123 | } 124 | // __device__ void Reset(uint itr) { size[itr % 3] = 0; } 125 | __device__ int Size(uint bucket_idx) { return sizes[bucket_idx]; } 126 | 127 | __device__ sampleJob Get(uint bucket_idx, uint idx) { 128 | return data[bucket_idx][idx]; 129 | } 130 | 131 | __forceinline__ __device__ bool checkFocus(int idx) { 132 | if (floor[idx] < sizes[idx]) { 133 | // if (!LTID) 134 | // printf(" idx %d floor[idx] %d sizes[idx] %d focus %d \n", idx, 135 | // floor[idx], sizes[idx], *focus); 136 | return true; 137 | } else 138 | return false; 139 | } 140 | __device__ void printSize() { 141 | if (!TID) { 142 | // printf("frontier size:\n"); 143 | for (int i = 0; i < bucket_num; i++) { 144 | // if (sizes[i] != floor[i]) 145 | printf(" frontier depth %d size %d floor %d\n", i, sizes[i], 146 | floor[i]); 147 | } 148 | } 149 | } 150 | 151 | __forceinline__ __device__ bool needWork() { 152 | // if (!LTID) printf(" block %d checking\n", blockIdx.x); 153 | 154 | for (int i = 0; i < bucket_num; i++) { 155 | if (checkFocus(i)) return true; 156 | } 157 | return false; 158 | // } else 159 | } 160 | __forceinline__ __device__ void nextFocus(int current_focus) { 161 | for (size_t i = 1; i < bucket_num; i++) { 162 | int tmp = (current_focus + 1) % bucket_num; 163 | if (checkFocus(tmp)) { 164 | // CAS? 165 | int old = atomicCAS(focus, current_focus, tmp); 166 | // return tmp; 167 | } 168 | } 169 | } 170 | 171 | __forceinline__ __device__ sampleJob requireOneJobFromBucket( 172 | int bucket_idx) { 173 | int old = atomicAdd(&floor[bucket_idx], 1); 174 | // int old = atomicAggInc(&floor[bucket_idx]); 175 | if (old < sizes[bucket_idx]) { 176 | return data[bucket_idx][old]; 177 | } else { 178 | atomicSub(&floor[bucket_idx], 1); 179 | sampleJob tmp = {0, 0, 0, 0, false}; 180 | return tmp; 181 | } 182 | } 183 | __forceinline__ __device__ sampleJob requireOneJob() { 184 | // printf("not implemented\n"); 185 | int current_focus = *focus; 186 | if (!checkFocus(current_focus)) { 187 | nextFocus(current_focus); 188 | } 189 | current_focus = *focus; 190 | return requireOneJobFromBucket(current_focus); 191 | } 192 | }; 193 | 194 | template 195 | struct SampleFrontier { 196 | sampleJob *data[depth]; 197 | int capacity[depth]; 198 | int *sizes; 199 | int *floor; 200 | int hop_num = depth; 201 | // T *seed; 202 | 203 | void Allocate(size_t _size, uint *hops, uint num_vtx = 0) { 204 | capacity[0] = _size; 205 | // CUDA_RT_CALL(MyCudaMalloc(&seed, capacity * sizeof(T))); 206 | uint length = 1; 207 | u64 l = 0; 208 | for (size_t i = 0; i < depth; i++) { 209 | // capacity[0] *= hops[i]; 210 | CUDA_RT_CALL(MyCudaMalloc(&data[i], capacity[i] * sizeof(sampleJob))); 211 | if (i + 1 < depth) capacity[i + 1] = capacity[i] * hops[i + 1]; 212 | l += capacity[i] * sizeof(sampleJob); 213 | } 214 | CUDA_RT_CALL(MyCudaMalloc(&sizes, depth * sizeof(int))); 215 | CUDA_RT_CALL(MyCudaMalloc(&floor, depth * sizeof(int))); 216 | // printf("%s:%d %s for %d\n", __FILE__, __LINE__, __FUNCTION__, 0); 217 | LOG(" frontier overhead %d MB\n ", l / 1024 / 1024); 218 | } 219 | __device__ void printSize() { 220 | if (!TID) { 221 | printf("frontier size:\n"); 222 | for (size_t i = 0; i < depth; i++) { 223 | printf(" depth %d size %d floor %d\n", i, sizes[i], floor[i]); 224 | } 225 | } 226 | } 227 | __host__ void Init(uint *seed, uint size, uint vtx_per_bucket = 0) { 228 | InitSampleFrontier<<>>(data[0], seed, size); 229 | int tmp = size; 230 | CUDA_RT_CALL(cudaMemset(sizes, 0, depth * sizeof(int))); 231 | CUDA_RT_CALL(cudaMemset(floor, 0, depth * sizeof(int))); 232 | CUDA_RT_CALL( 233 | cudaMemcpy(&sizes[0], &tmp, sizeof(int), cudaMemcpyHostToDevice)); 234 | } 235 | // __device__ void CheckActive(uint itr) {} 236 | __device__ void Add(uint instance_idx, uint offset, uint itr, T src_id) { 237 | size_t old = atomicAdd(&sizes[itr], 1); 238 | #ifndef NDEBUG 239 | if (old >= capacity[itr]) 240 | printf("%s:%d %s vec overflow capacity %u loc %llu\n", __FILE__, __LINE__, 241 | __FUNCTION__, capacity[itr], (unsigned long long)old); 242 | #endif 243 | assert(old < capacity[itr]); 244 | sampleJob tmp = {instance_idx, offset, src_id, 0, true}; 245 | data[itr][old] = tmp; 246 | } 247 | // __device__ void Reset(uint itr) { size[itr % 3] = 0; } 248 | __device__ int Size(uint itr) { return sizes[itr]; } 249 | __device__ sampleJob Get(uint itr, uint idx) { return data[itr][idx]; } 250 | __device__ sampleJob requireOneJob(uint itr) { 251 | int old = atomicAggInc(&floor[itr]); 252 | // size_t old = atomicAdd(&floor[itr], 1); 253 | if (old < sizes[itr]) { 254 | return data[itr][old]; 255 | } else { 256 | atomicSub(&floor[itr], 1); 257 | sampleJob tmp = {0, 0, 0, 0, false}; 258 | return tmp; 259 | } 260 | } 261 | }; 262 | 263 | #endif -------------------------------------------------------------------------------- /src/online_sample_twc.cu: -------------------------------------------------------------------------------- 1 | #include "app.cuh" 2 | 3 | using block_table = 4 | alias_table_constructor_shmem; 5 | using warp_table = alias_table_constructor_shmem>; 6 | using subwarp_table = 7 | alias_table_constructor_shmem, 8 | BufferType::SHMEM, 9 | AliasTableStorePolicy::NONE>; 10 | 11 | static __device__ void SampleSubwarpCentic(sample_result &result, 12 | gpu_graph *ggraph, curandState state, 13 | int current_itr, int idx, 14 | int node_id, void *buffer) { 15 | subwarp_table *tables = (subwarp_table *)buffer; 16 | subwarp_table *table = &tables[SWID]; 17 | bool not_all_zero = 18 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 19 | ggraph->getDegree(node_id), current_itr, node_id); 20 | if (not_all_zero) { 21 | table->construct(); 22 | table->roll_atomic(&state, result); 23 | } 24 | table->Clean(); 25 | } 26 | 27 | static __device__ void SampleWarpCentic(sample_result &result, 28 | gpu_graph *ggraph, curandState state, 29 | int current_itr, int idx, int node_id, 30 | void *buffer) { 31 | // subwarp_table *tables = (subwarp_table *)buffer; 32 | warp_table *tables = 33 | (warp_table *)((void *)buffer + 34 | WID * WARP_SIZE / SUBWARP_SIZE * sizeof(subwarp_table)); 35 | warp_table *table = &tables[0]; 36 | bool not_all_zero = 37 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 38 | ggraph->getDegree(node_id), current_itr, node_id); 39 | if (not_all_zero) { 40 | table->construct(); 41 | table->roll_atomic(&state, result); 42 | } 43 | table->Clean(); 44 | } 45 | 46 | static __device__ void SampleBlockCentic(sample_result &result, 47 | gpu_graph *ggraph, curandState state, 48 | int current_itr, int node_id, 49 | void *buffer, 50 | Vector_pack *vector_packs) { 51 | block_table *tables = (block_table *)buffer; 52 | block_table *table = &tables[0]; 53 | table->loadGlobalBuffer(vector_packs); 54 | __syncthreads(); 55 | bool not_all_zero = 56 | table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph, 57 | ggraph->getDegree(node_id), current_itr, node_id); 58 | __syncthreads(); 59 | if (not_all_zero) { 60 | table->constructBC(); 61 | uint target_size = 62 | MIN(ggraph->getDegree(node_id), result.hops[current_itr + 1]); 63 | table->roll_atomic(target_size, &state, result); 64 | } 65 | __syncthreads(); 66 | table->Clean(); 67 | } 68 | 69 | // template 70 | // struct worker; 71 | 72 | static __global__ void sample_kernel(Sampler *sampler, 73 | Vector_pack *vector_pack) { 74 | sample_result &result = sampler->result; 75 | gpu_graph *ggraph = &sampler->ggraph; 76 | Vector_pack *vector_packs = &vector_pack[BID]; 77 | __shared__ subwarp_table table[SUBWARP_PER_BLK]; 78 | 79 | void *buffer = &table[0]; 80 | curandState state; 81 | curand_init(TID, 0, 0, &state); 82 | 83 | thread_block tb = this_thread_block(); 84 | auto warp = tiled_partition<32>(tb); 85 | auto subwarp = tiled_partition<4>(warp); 86 | 87 | __shared__ uint current_itr; 88 | if (threadIdx.x == 0) current_itr = 0; 89 | __syncthreads(); 90 | for (; current_itr < result.hop_num - 1;) // for 2-hop, hop_num=3 91 | { 92 | sample_job job; 93 | __threadfence_block(); 94 | 95 | if (subwarp.thread_rank() == 0) { 96 | job = result.requireOneJob(current_itr); 97 | } 98 | subwarp.sync(); 99 | job.idx = subwarp.shfl(job.idx, 0); 100 | job.val = subwarp.shfl(job.val, 0); 101 | job.node_id = subwarp.shfl(job.node_id, 0); 102 | subwarp.sync(); 103 | while (job.val) { 104 | subwarp.sync(); 105 | if (ggraph->getDegree(job.node_id) < ELE_PER_SUBWARP) { 106 | SampleSubwarpCentic(result, ggraph, state, current_itr, job.idx, 107 | job.node_id, buffer); 108 | } else if (ggraph->getDegree(job.node_id) < ELE_PER_WARP) { 109 | if (subwarp.thread_rank() == 0) { 110 | result.AddMidDegree(current_itr, job.node_id); 111 | } 112 | } else { 113 | #ifdef skip8k 114 | if (subwarp.thread_rank() == 0 && ggraph->getDegree(job.node_id) < 8000) 115 | #else 116 | if (subwarp.thread_rank() == 0) 117 | #endif // skip8k 118 | { 119 | result.AddHighDegree(current_itr, job.node_id); 120 | } 121 | } 122 | subwarp.sync(); 123 | if (subwarp.thread_rank() == 0) job = result.requireOneJob(current_itr); 124 | job.idx = subwarp.shfl(job.idx, 0); 125 | job.val = subwarp.shfl(job.val, 0); 126 | job.node_id = subwarp.shfl(job.node_id, 0); 127 | subwarp.sync(); 128 | } 129 | 130 | // warp process 131 | warp.sync(); 132 | // __syncwarp(FULL_WARP_MASK); 133 | if (warp.thread_rank() == 0) 134 | job = result.requireOneMidDegreeJob(current_itr); 135 | warp.sync(); 136 | job.idx = warp.shfl(job.idx, 0); 137 | job.val = warp.shfl(job.val, 0); 138 | job.node_id = warp.shfl(job.node_id, 0); 139 | warp.sync(); 140 | while (job.val) { 141 | SampleWarpCentic(result, ggraph, state, current_itr, job.idx, job.node_id, 142 | buffer); 143 | warp.sync(); 144 | if (warp.thread_rank() == 0) 145 | job = result.requireOneMidDegreeJob(current_itr); 146 | job.idx = warp.shfl(job.idx, 0); 147 | job.val = warp.shfl(job.val, 0); 148 | job.node_id = warp.shfl(job.node_id, 0); 149 | } 150 | 151 | // block process 152 | __syncthreads(); // cannot reach?? 153 | __shared__ sample_job high_degree_job; 154 | if (LTID == 0) { 155 | job = result.requireOneHighDegreeJob(current_itr); 156 | high_degree_job.val = job.val; 157 | high_degree_job.node_id = job.node_id; 158 | } 159 | __syncthreads(); 160 | while (high_degree_job.val) { 161 | SampleBlockCentic(result, ggraph, state, current_itr, 162 | high_degree_job.node_id, buffer, 163 | vector_packs); // buffer_pointer 164 | __syncthreads(); 165 | if (LTID == 0) { 166 | job = result.requireOneHighDegreeJob(current_itr); 167 | high_degree_job.val = job.val; 168 | high_degree_job.node_id = job.node_id; 169 | } 170 | __syncthreads(); 171 | } 172 | __syncthreads(); 173 | if (threadIdx.x == 0) { 174 | result.NextItr(current_itr); 175 | } 176 | __syncthreads(); 177 | } 178 | } 179 | 180 | static __global__ void print_result(Sampler *sampler) { 181 | sampler->result.PrintResult(); 182 | } 183 | 184 | // void Start_high_degree(Sampler sampler) 185 | float OnlineGBSampleTWC(Sampler &sampler) { 186 | // orkut max degree 932101 187 | 188 | LOG("%s\n", __FUNCTION__); 189 | #ifdef skip8k 190 | LOG("skipping 8k\n"); 191 | #endif // skip8k 192 | // paster( 193 | // sizeof(alias_table_constructor_shmem>) 195 | // * 196 | // BLOCK_SIZE / SUBWARP_SIZE); 197 | // paster(sizeof(warp_table) * WARP_PER_BLK); 198 | 199 | int device; 200 | cudaDeviceProp prop; 201 | cudaGetDevice(&device); 202 | cudaGetDeviceProperties(&prop, device); 203 | int n_sm = prop.multiProcessorCount; 204 | 205 | Sampler *sampler_ptr; 206 | MyCudaMalloc(&sampler_ptr, sizeof(Sampler)); 207 | CUDA_RT_CALL(cudaMemcpy(sampler_ptr, &sampler, sizeof(Sampler), 208 | cudaMemcpyHostToDevice)); 209 | double start_time, total_time; 210 | init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr, true); 211 | 212 | // allocate global buffer 213 | int block_num = n_sm * FLAGS_m; 214 | int gbuff_size = sampler.ggraph.MaxDegree; 215 | 216 | LOG("alllocate GMEM buffer %d MB\n", 217 | block_num * gbuff_size * MEM_PER_ELE / 1024 / 1024); 218 | 219 | Vector_pack *vector_pack_h = new Vector_pack[block_num]; 220 | for (size_t i = 0; i < block_num; i++) { 221 | vector_pack_h[i].Allocate(gbuff_size, sampler.device_id); 222 | } 223 | CUDA_RT_CALL(cudaDeviceSynchronize()); 224 | #pragma omp barrier 225 | Vector_pack *vector_packs; 226 | CUDA_RT_CALL( 227 | MyCudaMalloc(&vector_packs, sizeof(Vector_pack) * block_num)); 228 | CUDA_RT_CALL(cudaMemcpy(vector_packs, vector_pack_h, 229 | sizeof(Vector_pack) * block_num, 230 | cudaMemcpyHostToDevice)); 231 | 232 | // Global_buffer 233 | CUDA_RT_CALL(cudaDeviceSynchronize()); 234 | start_time = wtime(); 235 | if (FLAGS_debug) 236 | sample_kernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs); 237 | else 238 | sample_kernel<<>>(sampler_ptr, vector_packs); 239 | 240 | CUDA_RT_CALL(cudaDeviceSynchronize()); 241 | // CUDA_RT_CALL(cudaPeekAtLastError()); 242 | total_time = wtime() - start_time; 243 | #pragma omp barrier 244 | LOG("Device %d sampling time:\t%.2f ms ratio:\t %.1f MSEPS\n", 245 | omp_get_thread_num(), total_time * 1000, 246 | static_cast(sampler.result.GetSampledNumber() / total_time / 247 | 1000000)); 248 | sampler.sampled_edges = sampler.result.GetSampledNumber(); 249 | LOG("sampled_edges %d\n", sampler.sampled_edges); 250 | if (FLAGS_printresult) print_result<<<1, 32, 0, 0>>>(sampler_ptr); 251 | CUDA_RT_CALL(cudaDeviceSynchronize()); 252 | return total_time; 253 | } 254 | --------------------------------------------------------------------------------