├── .clang-format
├── include
    ├── alias_table.cuh
    ├── kernel.cuh
    ├── instance.cuh
    ├── app.cuh
    ├── roller.cuh
    └── frontier.cuh
├── old
    ├── shmem
    │   ├── alias
    │   ├── Makefile
    │   ├── alias_table.cu
    │   ├── tmp.cu
    │   ├── util.cu
    │   ├── util.cuh
    │   ├── vec.cuh
    │   └── alias_table.cuh
    ├── Makefile
    ├── vec.cuh
    ├── common.cuh
    ├── alias.cu
    └── graph.cuh
├── tools
    ├── getDegree
    ├── Makefile
    ├── getsnapgraph.sh
    ├── drop-caches.sh
    ├── getwebgraph.sh
    ├── getDegree.cu
    └── gr2npz.cu
├── .gitmodules
├── scripts
    ├── saint.sh
    ├── debug.sh
    ├── data.sh
    ├── simple_args_parsing.sh
    ├── numa.sh
    ├── graphwalker.sh
    ├── mem_test.sh
    ├── trw-biased.sh
    ├── my.sh
    ├── biased.sh
    ├── trw-unbiased.sh
    ├── trans.py
    ├── trans2.py
    ├── csaw.sh
    ├── multiple-gpu.sh
    ├── fig8_biased.sh
    ├── table3_unbiased.sh
    └── test.sh
├── .gitignore
├── src
    ├── api
    │   ├── bias_static.cu
    │   ├── bias_degree.cu
    │   └── bias_node2vec.cu
    ├── kernel.cu
    ├── util.cu
    ├── createTable.cu
    ├── online_sample.cu
    ├── offline_walk.cu
    └── online_sample_twc.cu
├── Makefile
├── .vscode
    ├── c_cpp_properties.json
    ├── tasks.json
    ├── launch.json
    └── settings.json
├── cmake
    └── FindNuma.cmake
├── result
    ├── nextdoor_unbias.sh
    ├── nextdoor.sh
    └── knightking.sh
├── test
    ├── tmp.cu
    └── alias_table.cu
├── figs
    ├── dynamic.sh
    ├── with_nextdoor.sh
    ├── offline.sh
    ├── online.sh
    ├── unbiased.sh
    ├── test_driver.sh
    ├── v100.sh
    ├── scale.sh
    └── spec.sh
├── CMakeLists.txt
└── README.md


/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google


--------------------------------------------------------------------------------
/include/alias_table.cuh:
--------------------------------------------------------------------------------
1 | # include "alias_table_new.cuh"


--------------------------------------------------------------------------------
/old/shmem/alias:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wpybtw/Skywalker/HEAD/old/shmem/alias


--------------------------------------------------------------------------------
/tools/getDegree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wpybtw/Skywalker/HEAD/tools/getDegree


--------------------------------------------------------------------------------
/old/Makefile:
--------------------------------------------------------------------------------
1 | all: alias
2 | 
3 | 
4 | alias: alias.cu alias.cuh #-arch=sm_75
5 | 	nvcc alias.cu  -G   -o alias


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "others/C-SAW"]
2 | 	path = others/C-SAW
3 | 	url = https://github.com/concept-inversion/C-SAW
4 | [submodule "deps/gflags"]
5 | 	path = deps/gflags
6 | 	url = https://github.com/gflags/gflags
7 | 


--------------------------------------------------------------------------------
/scripts/saint.sh:
--------------------------------------------------------------------------------
1 | cd GraphSAINT
2 | python -m graphsaint.tensorflow_version.train --data_prefix ./data/amazon --train_config ./train_config/table2/amazon_2_rw.yml --gpu -1
3 | 
4 | python -m graphsaint.tensorflow_version.train --data_prefix ./data/ppi --train_config ./train_config/table2/ppi2_rw.yml --gpu -1


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | C-SAW/*
 2 | alias-method/*
 3 | build/*
 4 | bin/*
 5 | deps/*
 6 | bin_old/*
 7 | 
 8 | perf_result/*
 9 | .clangd/*
10 | .VSCodeCounter/*
11 | 
12 | *.o
13 | *.cubin
14 | *.fatbin
15 | *.fatbin.*
16 | *.reg.cu
17 | *.ii
18 | *.gpu
19 | *.stub.c
20 | *.module_id
21 | *.cudafe1.*
22 | *.reg.c
23 | *.ptx
24 | *.ncu-rep
25 | 


--------------------------------------------------------------------------------
/scripts/debug.sh:
--------------------------------------------------------------------------------
1 | cuobjdump -sass ./sampler_high_degree.cubin >dump.txt
2 | 
3 | nvdisasm ./sampler_high_degree.cubin -g > dump.disasm
4 | 
5 | cuda-gdb ./main_gbuffer lj ~/data/orkut.w.edge_beg_pos.bin  ~/data/orkut.w.edge_csr.bin  100 32 1 1 2 2 1
6 | 
7 | set cuda memcheck  on
8 | r   lj ~/data/orkut.w.edge_beg_pos.bin  ~/data/orkut.w.edge_csr.bin  100 32 10 1 2 2 1


--------------------------------------------------------------------------------
/include/kernel.cuh:
--------------------------------------------------------------------------------
 1 | #include "sampler.cuh"
 2 | #include "sampler_result.cuh"
 3 | 
 4 | __global__ void init_kernel_ptr(Sampler *sampler, bool biasInit);
 5 | __global__ void init_kernel_ptr(Walker *sampler, bool biasInit);
 6 | // __global__ void initSeed(ResultBase<uint> *results, uint *seeds, size_t
 7 | // size);
 8 | 
 9 | __device__ bool AddTillSize(uint *size, size_t target_size);
10 | 
11 | __global__ void BindResultKernel(Walker *walker);


--------------------------------------------------------------------------------
/old/shmem/Makefile:
--------------------------------------------------------------------------------
 1 | all: util alias_table alias
 2 | 
 3 | CUFLAG= -G -g -rdc=true -gencode=arch=compute_75,code=sm_75 -std=c++11
 4 | 
 5 | util: util.cuh util.cu
 6 | 	nvcc util.cu  $(CUFLAG) -c 
 7 | 
 8 | alias_table: alias_table.cuh alias_table.cu
 9 | 	nvcc alias_table.cu  $(CUFLAG)   -c 
10 | 
11 | 
12 | alias: tmp.cu util.o alias_table.o #-arch=sm_75
13 | 	nvcc tmp.cu util.o alias_table.o $(CUFLAG)  -o alias
14 | 
15 | clean:
16 | 	rm *.o


--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
 1 | all: getDegree
 2 | # util sampler main
 3 | 
 4 | CUFLAG= -I../include -rdc=true -gencode=arch=compute_75,code=sm_75 -std=c++11 -res-usage  -lineinfo  -Xptxas -v  #-keep   #-Xptxas -O3,-v   
 5 | debug: CUFLAG +=  -G -g 
 6 | debug: main main_gbuffer main_spliced	
 7 | 
 8 | # main: main.cu util.cu sampler.cu
 9 | 
10 | getDegree: getDegree.cu ../src/util.cu 
11 | 	nvcc getDegree.cu ../src/util.cu   $(CUFLAG) -o getDegree
12 | 
13 | 


--------------------------------------------------------------------------------
/src/api/bias_static.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * @Description: 
 3 |  * @Date: 2020-12-03 16:46:11
 4 |  * @LastEditors: PengyuWang
 5 |  * @LastEditTime: 2020-12-27 20:41:22
 6 |  * @FilePath: /sampling/src/api/bias_static.cu
 7 |  */
 8 | #include "gpu_graph.cuh"
 9 | DEFINE_bool(weight, true, "load edge weight from file");
10 | // DEFINE_bool(bias, true, "biased or unbiased sampling");
11 | 
12 | __device__ float gpu_graph::getBias(edge_t dst, uint src, uint idx) {
13 |   return adjwgt[dst];
14 | }
15 | __device__ void gpu_graph::UpdateWalkerState(uint idx, uint info){}


--------------------------------------------------------------------------------
/tools/getsnapgraph.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2021-01-04 22:39:00
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-10 14:24:09
 6 |  # @FilePath: /skywalker/scripts/getsnapgraph.sh
 7 | ### 
 8 | 
 9 | cd ~/data
10 | wget http://data.law.di.unimi.it/webdata/$1/$1.properties
11 | # mv $1.txt $1
12 | # python2 ~/graph/gunrock/tools/associate_weights.py ~/data/$1
13 | 
14 | # mv $1.random.weight.mtx $1.w.edge
15 | # ~/graph/Galois/build/tools/graph-convert/graph-convert -edgelist2gr  ~/data/$1.w.edge ~/data/$1.w.gr -edgeType=uint32


--------------------------------------------------------------------------------
/src/api/bias_degree.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * @Description: 
 3 |  * @Date: 2020-12-08 17:22:17
 4 |  * @LastEditors: PengyuWang
 5 |  * @LastEditTime: 2020-12-27 20:41:05
 6 |  * @FilePath: /sampling/src/api/bias_degree.cu
 7 |  */
 8 | 
 9 | #include "gpu_graph.cuh"
10 | DEFINE_bool(weight, false, "load edge weight from file");
11 | // DEFINE_bool(bias, false, "biased or unbiased sampling");
12 | 
13 | __device__ float gpu_graph::getBias(uint dst, uint src, uint idx) {
14 |   // printf("degree\t");
15 |   return xadj[dst + 1] - xadj[dst];
16 | }
17 | __device__ void gpu_graph::UpdateWalkerState(uint idx, uint info){}


--------------------------------------------------------------------------------
/tools/drop-caches.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2021-01-10 22:36:32
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-10 22:37:02
 6 |  # @FilePath: /skywalker/tools/drop-caches.sh
 7 | ### 
 8 | # Mark it as executable using chmod a+x drop-caches
 9 | # Call it using sudo ./drop-caches
10 | # If you place the script in /usr/local/bin you can call it using sudo drop-caches
11 | 
12 | #!/bin/bash
13 | if [[ $(id -u) -ne 0 ]] ; then echo "Please run as root" ; exit 1 ; fi
14 | sync; echo 1 > /proc/sys/vm/drop_caches
15 | sync; echo 2 > /proc/sys/vm/drop_caches
16 | sync; echo 3 > /proc/sys/vm/drop_caches


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: build
 2 | debug: debug
 3 | 
 4 | 
 5 | .PHONY:build debug test
 6 | 
 7 | SRC_DIR:= src
 8 | SRC_FILES := $(wildcard $(SRC_DIR)/*.cu)
 9 | HEADER_FILES := $(wildcard include/*.cuh)
10 | 
11 | build: 
12 | 	-mkdir build;cd build;cmake ..;make -j
13 | 
14 | debug: 
15 | 	-mkdir build;cd build;cmake .. -DCMAKE_BUILD_TYPE=Debug;make -j
16 | 	
17 | test: 
18 | 	./build/skywalker --k 2 --d 2 --ol=1  --input ~/data/lj.w.gr --ngpu=4 --hd=1 --n=400000
19 | 	./build/skywalker  -bias=1 --ol=1 --ngpu=1 --s --sage --input ~/data/orkut.w.gr  -v
20 | 	./build/skywalker  -bias=1 --ol=0 --ngpu=1 --s --sage --input ~/data/orkut.w.gr  -v
21 | 
22 | 
23 | clean:
24 | 	cd build;make clean


--------------------------------------------------------------------------------
/tools/getwebgraph.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2021-01-04 22:39:00
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-04 22:52:19
 6 |  # @FilePath: /sampling/scripts/downloadgraph.sh
 7 | ### 
 8 | 
 9 | cd ~/data
10 | 
11 | 
12 | wget http://data.law.di.unimi.it/webdata/$1/$1.properties
13 | wget http://data.law.di.unimi.it/webdata/$1/$1.graph
14 | 
15 | cd webgraph-big-3.5.1
16 | 
17 | java -cp "*" it.unimi.dsi.webgraph.ArcListASCIIGraph    ../$1 ../$1
18 | 
19 | cd ..
20 | python2 ~/graph/gunrock/tools/associate_weights.py ~/data/$1
21 | 
22 | mv $1.random.weight.mtx $1.w.edge
23 | ~/graph/Galois/build/tools/graph-convert/graph-convert -edgelist2gr  ~/data/$1.w.edge ~/data/$1.w.gr -edgeType=uint32


--------------------------------------------------------------------------------
/scripts/data.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2020-11-17 13:30:33
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-10 14:22:31
 6 |  # @FilePath: /skywalker/scripts/data.sh
 7 | ### 
 8 | DATA=(    sk-2005 friendster) # $orkut uk-union rmat29 web-ClueWeb09)   twitter-2010
 9 | # lj orkut web-Google uk-2005
10 | 
11 | ED=".w.edge"
12 | 
13 | EL=".el"
14 | 
15 | 
16 | # # for c-saw
17 | # for idx in $(seq 1 ${#DATA[*]}) 
18 | # do
19 | #     ~/graph_project_start/tuple_text_to_binary_csr_mem/text_to_bin.bin   ~/data/${DATA[idx-1]}${ED}  0 0 40
20 | # done
21 | 
22 | 
23 | for idx in $(seq 1 ${#DATA[*]}) 
24 | do
25 |     ~/sampling/KnightKing/build/bin/gconverter -i ~/data/${DATA[idx-1]}${EL} -o ~/data/${DATA[idx-1]}.uw.data -s unweighted 
26 | done


--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "name": "Linux",
 5 |             "includePath": [
 6 |                 // "${workspaceFolder}/**",
 7 |                 "${workspaceFolder}/include/**",
 8 |                 // "${workspaceFolder}/build/deps/gflags/include/gflags",
 9 |                 "${workspaceFolder}/build/deps/gflags/include",
10 |                 "/usr/local/cuda/include",
11 |                 "/usr/local/cuda-11.0/targets/x86_64-linux/include"
12 |                 // "/usr/lib/gcc/x86_64-linux-gnu/7/include"
13 |             ],
14 |             "defines": [],
15 |             "compilerPath": "/usr/bin/clang", // "/usr/local/cuda/bin/nvcc", // 
16 |             "cStandard": "c11",
17 |             "cppStandard": "c++17",
18 |             "intelliSenseMode": "gcc-x64",
19 |             "compileCommands": "${workspaceFolder}/compile_commands.json"
20 |         }
21 |     ],
22 |     "version": 4
23 | }
24 | 


--------------------------------------------------------------------------------
/src/api/bias_node2vec.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * @Author: Pengyu Wang
 3 |  * @Date: 2020-12-08 17:22:17
 4 |  * @LastEditTime: 2020-12-27 20:01:54
 5 |  * @Description:
 6 |  * @FilePath: /sampling/src/api/bias_node2vec.cu
 7 |  */
 8 | 
 9 | #include <gflags/gflags.h>
10 | #include "gpu_graph.cuh"
11 | 
12 | DEFINE_bool(weight, true, "load edge weight from file");
13 | // DEFINE_bool(bias, true, "biased or unbiased sampling");
14 | 
15 | __device__ float gpu_graph::getBias(edge_t dst, uint src, uint idx) {
16 |   // if(LID==0)
17 |   // printf("%s:%d %s\n", __FILE__, __LINE__, __FUNCTION__);
18 |   if (this->result->state[idx].last == dst) {
19 |     return adjwgt[dst] / this->result->p;
20 |   } else if (CheckConnect(this->result->state[idx].last, dst)) {
21 |     // printf("Connect\t");
22 |     return adjwgt[dst];
23 |   } else {
24 |     // printf("NotConnect\t");
25 |     return adjwgt[dst] / this->result->q;
26 |   }
27 | }
28 | __device__ void gpu_graph::UpdateWalkerState(uint idx, uint info){
29 |   this->result->state[idx].last = info;
30 | }


--------------------------------------------------------------------------------
/scripts/simple_args_parsing.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #
 4 | # a simple way to parse shell script arguments
 5 | # 
 6 | # please edit and use to your hearts content
 7 | # 
 8 | 
 9 | 
10 | ENVIRONMENT="dev"
11 | DB_PATH="/data/db"
12 | 
13 | function usage()
14 | {
15 |     echo "if this was a real script you would see something useful here"
16 |     echo ""
17 |     echo "./simple_args_parsing.sh"
18 |     echo "\t-h --help"
19 |     echo "\t--environment=$ENVIRONMENT"
20 |     echo "\t--db-path=$DB_PATH"
21 |     echo ""
22 | }
23 | 
24 | while [ "$1" != "" ]; do
25 |     PARAM=`echo $1 | awk -F= '{print $1}'`
26 |     VALUE=`echo $1 | awk -F= '{print $2}'`
27 |     case $PARAM in
28 |         -h | --help)
29 |             usage
30 |             exit
31 |             ;;
32 |         --environment)
33 |             ENVIRONMENT=$VALUE
34 |             ;;
35 |         --db-path)
36 |             DB_PATH=$VALUE
37 |             ;;
38 |         *)
39 |             echo "ERROR: unknown parameter \"$PARAM\""
40 |             usage
41 |             exit 1
42 |             ;;
43 |     esac
44 |     shift
45 | done
46 | 
47 | 
48 | echo "ENVIRONMENT is $ENVIRONMENT";
49 | echo "DB_PATH is $DB_PATH";


--------------------------------------------------------------------------------
/scripts/numa.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: https://stackoverflow.com/questions/16056800/multi-gpu-programming-using-cuda-on-a-numa-machine
 3 |  # @Date: 2020-12-29 18:14:52
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2020-12-29 18:16:18
 6 |  # @FilePath: /sampling/scripts/numa.sh
 7 | ### 
 8 | #!/bin/bash
 9 | #this script will output a listing of each GPU and it's CPU core affinity mask
10 | file="/proc/driver/nvidia/gpus/0000:3d:00.0/information"
11 | if [ ! -e $file ]; then
12 |   echo "Unable to locate any GPUs!"
13 | else
14 |   gpu_num=0
15 |   file="/proc/driver/nvidia/gpus/$gpu_num/information"
16 |   if [ "-v" == "$1" ]; then echo "GPU:  CPU CORE AFFINITY MASK: PCI:"; fi
17 |   while [ -e $file ]
18 |   do
19 |     line=`grep "Bus Location" $file | { read line; echo $line; }`
20 |     pcibdf=${line:14}
21 |     pcibd=${line:14:7}
22 |     file2="/sys/class/pci_bus/$pcibd/cpuaffinity"
23 |     read line2 < $file2
24 |     if [ "-v" == "$1" ]; then
25 |       echo " $gpu_num     $line2                  $pcibdf"
26 |     else
27 |       echo " $gpu_num     $line2 "
28 |     fi
29 |     gpu_num=`expr $gpu_num + 1`
30 |     file="/proc/driver/nvidia/gpus/$gpu_num/information"
31 |   done
32 | fi


--------------------------------------------------------------------------------
/scripts/graphwalker.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2021-01-07 19:01:38
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-10 18:34:44
 6 |  # @FilePath: /skywalker/scripts/graphwalker.sh
 7 | ### 
 8 | DATA=( lj.w.edge     arabic-2005.w.edge     uk-2005.w.edge  sk-2005 friendster.w.edge) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 9 | NV=(  4847571         22744077             39459923   50636151    124836180)
10 | 
11 | # DATA=( uk-2005.w.edge) 
12 | # NV=(  39459923 )
13 | 
14 | # grep  "00_runtime\|g_loadSubGraph:\|file:"
15 | 
16 | ITR=1
17 | 
18 | ED=".w.edge"
19 | EXE="./bin/apps/rwdomination" #main_degree
20 | DIR="/home/pywang/sampling/GraphWalker"
21 | 
22 | cd $DIR
23 | # ${EXE} file ~/data/${DATA[idx-1]}${ED} firstsource 0 numsources 400000 walkspersource 1 maxwalklength 100 prob 0.0 L 100 N 4847571 
24 | echo "-------------------------------------------------------unbias rw 40000 100"
25 | for idx in $(seq 1 ${#DATA[*]}) 
26 | do
27 |     ./bin/apps/rawrandomwalks file ~/data/${DATA[idx-1]} R 40000 L 100 N  ${NV[idx-1]}
28 | done
29 | 
30 | # echo "-------------------------------------------------------unbias ppr 40000 100"
31 | # for idx in $(seq 1 ${#DATA[*]}) 
32 | # do
33 | #     ./bin/apps/msppr file ~/data/${DATA[idx-1]} firstsource 0 numsources 40000 walkspersource 1 maxwalklength 100 prob 0.15
34 | # done
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/old/vec.cuh:
--------------------------------------------------------------------------------
 1 | 
 2 | template <typename T> class Vector_itf{
 3 | public:
 4 |   Vector(){}
 5 |   ~Vector(){}
 6 |   virtual void init(){}
 7 |   virtual void add(){}
 8 |   virtual void clean(){}
 9 |   virtual bool empty(){}
10 |   virtual size_t size(){}
11 |   virtial T &operator[](int id){}
12 | };
13 | 
14 | 
15 | template <typename T> class Vector {
16 |   size_t *size;
17 |   size_t *capacity;
18 |   T *data = nullptr;
19 |   // bool use_self_buffer = false;
20 |   // T data[VECTOR_SHMEM_SIZE];
21 |   Vector() {}
22 |   __host__ ~Vector() {
23 |     if (use_self_buffer && data != nullptr)
24 |       cudaFree(data);
25 |   }
26 |   __host__ Vector(int _capacity) {
27 |     cudaMallocManaged(&size, sizeof(size_t));
28 |     cudaMallocManaged(&capacity, sizeof(size_t));
29 |     *capacity = _capacity;
30 |     *size=0;
31 |     cudaMalloc(&data, _capacity * sizeof(T));
32 |     use_self_buffer = true;
33 |   }
34 |   __host__ __device__  size_t& size(){
35 |     return *size;
36 |   }
37 |   __device__ void add(T t) {
38 |     size_t old = atomicAdd(size, 1);
39 |     if (old < *capacity)
40 |       data[old] = t;
41 |     else
42 |       printf("wtf vector overflow");
43 |   }
44 |   __device__ void clean() { *size = 0; }
45 |   __device__ bool empty() {
46 |     if (*size == 0)
47 |       return true;
48 |     return false;
49 |   }
50 |   __device__ T &operator[](int id) { return data[id]; }
51 | };


--------------------------------------------------------------------------------
/src/kernel.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * @Description:
 3 |  * @Date: 2020-11-25 13:28:14
 4 |  * @LastEditors: PengyuWang
 5 |  * @LastEditTime: 2020-12-07 16:32:47
 6 |  * @FilePath: /sampling/src/kernel.cu
 7 |  */
 8 | #include "gpu_graph.cuh"
 9 | #include "kernel.cuh"
10 | 
11 | // __global__ void initSeed(ResultBase<uint> *results, uint *seeds, size_t size)
12 | // {
13 | //   if (TID < size) {
14 | //     results[TID].data[0] = seeds[TID];
15 | //   }
16 | // }
17 | __global__ void BindResultKernel(Walker *walker) {
18 |   if (TID == 0) walker->BindResult();
19 | }
20 | 
21 | __global__ void init_kernel_ptr(Sampler *sampler, bool biasInit) {
22 |   if (TID == 0) {
23 |     sampler->result.setAddrOffset();
24 |     if (biasInit)
25 |       for (size_t i = 0; i < sampler->result.hop_num; i++) {
26 |         sampler->result.high_degrees[i].Init();
27 |       }
28 |   }
29 | }
30 | 
31 | __global__ void init_kernel_ptr(Walker *sampler, bool biasInit) {
32 |   if (TID == 0) {
33 |     sampler->result.setAddrOffset();
34 |     if (biasInit)
35 |       for (size_t i = 0; i < sampler->result.hop_num; i++) {
36 |         sampler->result.high_degrees[i].Init();
37 |       }
38 |   }
39 | }
40 | 
41 | __device__ bool AddTillSize(uint *size,
42 |                             size_t target_size)  // T *array,       T t,
43 | {
44 |   uint old = atomicAdd(size, 1);
45 |   if (old < target_size) {
46 |     return true;
47 |   }
48 |   return false;
49 | }


--------------------------------------------------------------------------------
/scripts/mem_test.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Author: Pengyu Wang
 3 |  # @Date: 2021-01-15 14:35:15
 4 |  # @LastEditTime: 2021-01-15 14:38:16
 5 |  # @Description: 
 6 |  # @FilePath: /skywalker/scripts/mem_test.sh
 7 | ### 
 8 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 9 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
10 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
11 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
12 | 
13 | # DATA=( sk-2005 friendster) 
14 | # HD=(   4  1 )
15 | ITR=1
16 | NG=4
17 | 
18 | GR=".w.gr"
19 | EXE="./bin/main" #main_degree
20 | SG="--ngpu=1 --s"
21 | 
22 | # node2vec always online
23 | # export OMP_PROC_BIND=TRUE
24 | # GOMP_CPU_AFFINITY="0-9 10-19 20-29 30-99"
25 | # OMP_PLACES=cores
26 | # OMP_PROC_BIND=close
27 | # correct one
28 | # OMP_PLACES=cores OMP_PROC_BIND=spread
29 | # --randomweight=1 --weightrange=2 
30 | 
31 | 
32 | 
33 | echo "-------------------------------------------------------unbias sample 2 20 40k"
34 | for idx in $(seq 1 ${#DATA[*]}) 
35 | do
36 |     ./bin/main  --input ~/data/${DATA[idx-1]}${GR} --d 2 --k 20 --n 40000 --bias=0 --rw=0 --ngpu=1  --ol=0 --umgraph=1 -v
37 |     ./bin/main  --input ~/data/${DATA[idx-1]}${GR} --d 2 --k 20 --n 40000 --bias=0 --rw=0 --ngpu=1  --ol=0 --hmgraph=1 -v
38 |     ./bin/main  --input ~/data/${DATA[idx-1]}${GR} --d 2 --k 20 --n 40000 --bias=0 --rw=0 --ngpu=1  --ol=0 --gmgraph=1 --gmid=1 -v
39 | done
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/tools/getDegree.cu:
--------------------------------------------------------------------------------
 1 | #include <arpa/inet.h>
 2 | #include <assert.h>
 3 | #include <errno.h>
 4 | #include <iostream>
 5 | #include <netdb.h>
 6 | #include <netinet/in.h>
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | #include <string.h>
10 | #include <sys/socket.h>
11 | #include <sys/types.h>
12 | #include <unistd.h>
13 | 
14 | #include "gpu_graph.cuh"
15 | // #include "graph.h"
16 | // #include "sampler.cuh"
17 | 
18 | using namespace std;
19 | 
20 | int main(int argc, char *argv[]) {
21 |   if (argc != 5) {
22 |     std::cout << "Input: ./exe <dataset name> <beg file> <csr file> "
23 |                  "<ThreadsPerBLock> <# of samples> <FrontierSize> "
24 |                  "<NeighborSize> <Depth/Length> <#GPUs>\n";
25 |     exit(0);
26 |   }
27 |   // <ThreadBlocks>
28 |   // SampleSize, FrontierSize, NeighborSize
29 |   // printf("MPI started\n");
30 |   // int n_blocks = atoi(argv[4]);
31 |   //   int block_size = atoi(argv[5]);
32 |   //   int SampleSize = atoi(argv[5]);
33 |   //   int FrontierSize = atoi(argv[6]);
34 |   //   int NeighborSize = atoi(argv[7]);
35 |   //   int Depth = atoi(argv[8]);
36 |   //   int total_GPU = atoi(argv[9]);
37 | 
38 |   const char *beg_file = argv[2];
39 |   const char *csr_file = argv[3];
40 |   const char *weight_file = argv[3];
41 |   int node = atoi(argv[4]);
42 | 
43 |   graph<long, long, long, vtx_t, index_t, weight_t> *ginst =
44 |       new graph<long, long, long, vtx_t, index_t, weight_t>(
45 |           beg_file, csr_file, weight_file);
46 |   gpu_graph ggraph(ginst);
47 |   printf("node %d has degree %d\n", node, ggraph.getDegree_h(node));
48 | 
49 |   return 0;
50 | }


--------------------------------------------------------------------------------
/cmake/FindNuma.cmake:
--------------------------------------------------------------------------------
 1 | # 
 2 | # (c) Copyright 2016 Hewlett Packard Enterprise Development LP
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 | 
17 | # Find the numa policy library.
18 | # Output variables:
19 | #  NUMA_INCLUDE_DIR : e.g., /usr/include/.
20 | #  NUMA_LIBRARY     : Library path of numa library
21 | #  NUMA_FOUND       : True if found.
22 | FIND_PATH(NUMA_INCLUDE_DIR NAME numa.h
23 |   HINTS $ENV{HOME}/local/include /opt/local/include /usr/local/include /usr/include)
24 | 
25 | FIND_LIBRARY(NUMA_LIBRARY NAME numa
26 |   HINTS $ENV{HOME}/local/lib64 $ENV{HOME}/local/lib /usr/local/lib64 /usr/local/lib /opt/local/lib64 /opt/local/lib /usr/lib64 /usr/lib
27 | )
28 | 
29 | IF (NUMA_INCLUDE_DIR AND NUMA_LIBRARY)
30 |     SET(NUMA_FOUND TRUE)
31 |     MESSAGE(STATUS "Found numa library: inc=${NUMA_INCLUDE_DIR}, lib=${NUMA_LIBRARY}")
32 | ELSE ()
33 |     SET(NUMA_FOUND FALSE)
34 |     MESSAGE(STATUS "WARNING: Numa library not found.")
35 |     MESSAGE(STATUS "Try: 'sudo apt-get install libnuma libnuma-dev' (or sudo yum install numactl numactl-devel)")
36 | ENDIF ()


--------------------------------------------------------------------------------
/scripts/trw-biased.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2021-10-21
 4 |  # @LastEditors: Xu Cheng
 5 |  # @FilePath: /ThunderRW
 6 | ### 
 7 | DATA=(GG lj OK AB UK SK FS) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 8 | 
 9 | 
10 |  echo "biased rw" >>"/home/xucheng/ThunderRW/biased3.csv"
11 |  for idx in $(seq 1 ${#DATA[*]}) 
12 |  do
13 |      echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/biased3.csv"
14 |      ./build/random_walk/deepwalk.out -f sample_dataset/${DATA[idx-1]}/ -n 20  -ew -l 100 >>"/home/xucheng/ThunderRW/biased3.csv" 2>&1 
15 |      echo " " >>"/home/xucheng/ThunderRW/biased3.csv"
16 |  done
17 |  
18 |  echo " " >>"/home/xucheng/ThunderRW/biased3.csv"
19 |  
20 |  echo "biased ppr" >>"/home/xucheng/ThunderRW/biased3.csv"
21 |  for idx in $(seq 1 ${#DATA[*]}) 
22 |  do
23 |      echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/biased3.csv"
24 |      ./build/random_walk/ppr.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -sp 0.15 -em 1 -sm 2 -l 100  >>"/home/xucheng/ThunderRW/biased3.csv" 2>&1 
25 |      echo " " >>"/home/xucheng/ThunderRW/biased3.csv"
26 |  done
27 |  
28 |  echo " " >>"/home/xucheng/ThunderRW/biased3.csv"
29 |  
30 |  echo "biased node2vec" >>"/home/xucheng/ThunderRW/biased3.csv"
31 |  for idx in $(seq 1 ${#DATA[*]}) 
32 |  do
33 |      echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/biased3.csv"
34 |       ./build/random_walk/node2vec.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -ew -l 100 >>"/home/xucheng/ThunderRW/biased3.csv" 2>&1 
35 |      echo " " >>"/home/xucheng/ThunderRW/biased3.csv"
36 |  done
37 |  
38 |  
39 |  
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/old/common.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_CUH
 2 | #define COMMON_CUH
 3 | 
 4 | #include <chrono>
 5 | #include <cstdlib>
 6 | #include <cstring>
 7 | #include <ctime>
 8 | #include <fstream>
 9 | #include <iostream>
10 | #include <iterator>
11 | #include <locale>
12 | #include <math.h>
13 | #include <sstream>
14 | #include <stdexcept>
15 | #include <stdio.h>
16 | #include <stdlib.h>
17 | #include <string>
18 | #include <sys/stat.h>
19 | #include <sys/types.h>
20 | #include <vector>
21 | 
22 | #include <cuda.h>
23 | #include <cuda_runtime.h>
24 | #include <gflags/gflags.h>
25 | 
26 | #define BLOCK_SIZE 512
27 | 
28 | #define ALPHA 0.85
29 | #define EPSILON 0.01
30 | 
31 | #define ACT_TH 0.01
32 | 
33 | using std::cout;
34 | using std::endl;
35 | using std::flush;
36 | using std::ifstream;
37 | using std::ofstream;
38 | using std::string;
39 | using std::stringstream;
40 | using std::to_string;
41 | using std::vector;
42 | 
43 | using uint = unsigned int;
44 | using ulong = unsigned long;
45 | 
46 | using vtx_t = unsigned int; // vertex_num < 4B
47 | using edge_t = unsigned int; // vertex_num < 4B
48 | // using edge_t = unsigned long long int; // vertex_num > 4B
49 | using weight_t = unsigned int; 
50 | 
51 | 
52 | const unsigned int INFINIT = std::numeric_limits<uint>::max() - 1;
53 | 
54 | #define TID_1D (threadIdx.x + blockIdx.x * blockDim.x)
55 | 
56 | template <typename T>
57 | void printD(T *DeviceData, int n)
58 | {
59 |   T *tmp = new T[n];
60 |   cudaMemcpy(tmp, DeviceData, n * sizeof(T), cudaMemcpyDeviceToHost);
61 |   for (size_t i = 0; i < n; i++)
62 |   {
63 |     cout << tmp[i] << "\t";
64 |     if (i % 10 == 9)
65 |     {
66 |       cout << endl;
67 |     }
68 |   }
69 | }
70 | 
71 | #endif


--------------------------------------------------------------------------------
/include/instance.cuh:
--------------------------------------------------------------------------------
 1 | #include "gpu_graph.cuh"
 2 | #include "result.cuh"
 3 | // #include "alias_table.cuh"
 4 | #include <random>
 5 | 
 6 | // struct sample_result;
 7 | // class Sampler;
 8 | 
 9 | template <typename T>
10 | void printH(T *ptr, int size) {
11 |   T *ptrh = new T[size];
12 |   CUDA_RT_CALL(cudaMemcpy(ptrh, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
13 |   printf("printH: ");
14 |   for (size_t i = 0; i < size; i++) {
15 |     // printf("%d\t", ptrh[i]);
16 |     std::cout << ptrh[i] << "\t";
17 |   }
18 |   printf("\n");
19 |   delete ptrh;
20 | }
21 | 
22 | class InstanceBase {
23 |  public:
24 |   gpu_graph ggraph;
25 | 
26 |  public:
27 |   InstanceBase(gpu_graph graph) : ggraph(graph) {}
28 |   ~InstanceBase() {}
29 | };
30 | 
31 | class WalkInstance : InstanceBase {
32 |  public:
33 |   ResultsRW<uint> result;
34 |   uint num_seed;
35 | 
36 |  public:
37 |   WalkInstance(gpu_graph graph) : InstanceBase(graph) {}
38 |   ~WalkInstance() {}
39 |   void SetSeed(uint _num_seed, uint _hop_num) {
40 |     // printf("%s\t %s :%d\n", __FILE__, __PRETTY_FUNCTION__, __LINE__);
41 |     num_seed = _num_seed;
42 |     std::random_device rd;
43 |     std::mt19937 gen(56);
44 |     std::uniform_int_distribution<> dis(1, 10000);  // ggraph.vtx_num);
45 |     uint *seeds = new uint[num_seed];
46 |     for (int n = 0; n < num_seed; ++n) {
47 | #ifdef check
48 |       // seeds[n] = n;
49 |       seeds[n] = 1;
50 | // seeds[n] = 339;
51 | #else
52 |       // seeds[n] = n;
53 |       seeds[n] = dis(gen);
54 | #endif  // check
55 |     }
56 |     result.init(num_seed, _hop_num, seeds);
57 |   }
58 |   // void Start();
59 | };
60 | 
61 | void Start(WalkInstance WalkInstance);
62 | void Start_high_degree(WalkInstance WalkInstance);
63 | 


--------------------------------------------------------------------------------
/old/shmem/alias_table.cu:
--------------------------------------------------------------------------------
 1 | #include "alias_table.cuh"
 2 | // template <typename T>
 3 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num,
 4 |                              Vector<int> out) {
 5 | 
 6 |   __shared__ alias_table_constructor_shmem<int> tables[WARP_PER_SM];
 7 |   alias_table_constructor_shmem<int> *table = &tables[WID];
 8 |   // printf("table size %llu\n",table->size);
 9 | 
10 |   table->Init();
11 |   if (LID == 0) {
12 |     printf("table large size %llu\n", table->large.capacity);
13 |   }
14 |   if (TID == 0) {
15 |     printf("load\n");
16 |   }
17 |   table->load(ids, weights, size);
18 |   if (TID == 0) {
19 |     printf("construct\n");
20 |   }
21 |   table->construct();
22 |   if (TID == 0) {
23 |     printf("roll\n");
24 |   }
25 |   table->roll_atomic(out, num);
26 |   if (LID == 0) {
27 |       printf("out: ");
28 |       printD(out.data, out.Size());
29 |   }
30 | }
31 | 
32 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num,
33 |                              int * out) {
34 | 
35 |   __shared__ alias_table_constructor_shmem<int> tables[WARP_PER_SM];
36 |   alias_table_constructor_shmem<int> *table = &tables[WID];
37 |   // printf("table size %llu\n",table->size);
38 | 
39 |   table->Init();
40 |   if (LID == 0) {
41 |     printf("table large size %llu\n", table->large.capacity);
42 |   }
43 |   if (TID == 0) {
44 |     printf("load\n");
45 |   }
46 |   table->load(ids, weights, size);
47 |   if (TID == 0) {
48 |     printf("construct\n");
49 |   }
50 |   table->construct();
51 |   if (TID == 0) {
52 |     printf("roll\n");
53 |   }
54 |   table->roll_atomic(out, num);
55 |   if (LID == 0) {
56 |       printf("out: ");
57 |       printD(out, num);
58 |   }
59 | }


--------------------------------------------------------------------------------
/scripts/my.sh:
--------------------------------------------------------------------------------
 1 | cd ../src
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-25 16:38:51
 5 |  # @LastEditors: PengyuWang
 6 |  # @LastEditTime: 2020-12-06 17:45:38
 7 |  # @FilePath: /sampling/scripts/my.sh
 8 | ### 
 9 | 
10 | ./main_degree --rw=0 --k 1 --d 100 --ol=1 --input ~/data/lj.w.gr
11 | ./main_degree --rw=0 --k 1 --d 100 --ol=1 --input ~/data/orkut.w.gr
12 | 
13 | ./main_degree --rw=0 --k 2 --d 2 --ol=1 --input ~/data/lj.w.gr
14 | ./main_degree --rw=0 --k 2 --d 2 --ol=1 --input ~/data/orkut.w.gr
15 | 
16 | ./main --rw=0 --k 2 --d 2 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/lj.w.gr
17 | ./main --rw=0 --k 2 --d 2 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/orkut.w.gr
18 | 
19 | 
20 | ./main --rw=0 --k 1 --d 100 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/lj.w.gr
21 | ./main --rw=0 --k 1 --d 100 --ol=1 --randomweight=1 --weightrange=2 --input ~/data/orkut.w.gr
22 | 
23 | ./main --rw=0 --k 1 --d 100 --ol=1  --input ~/data/lj.w.gr
24 | ./main --rw=0 --k 1 --d 100 --ol=1  --input ~/data/orkut.w.gr
25 | ./main --rw=1 --k 1 --d 100 --ol=1  --input ~/data/lj.w.gr
26 | ./main --rw=1 --k 1 --d 100 --ol=1  --input ~/data/orkut.w.gr
27 | 
28 | ./main_degree --rw=0 --k 1 --d 100 --ol=1  --input ~/data/lj.w.gr
29 | ./main_degree --rw=0 --k 1 --d 100 --ol=1  --input ~/data/orkut.w.gr
30 | ./main_degree --rw=1 --k 1 --d 100 --ol=1  --input ~/data/lj.w.gr
31 | ./main_degree --rw=1 --k 1 --d 100 --ol=1  --input ~/data/orkut.w.gr
32 | 
33 | 
34 | 
35 | ./main  --rw=1 --ol=0 --k 1 --d 100  --input ~/data/lj.w.gr
36 | ./main  --rw=1 --ol=0 --k 1 --d 100  --input ~/data/orkut.w.gr
37 | 
38 | ./main  --rw=1 --ol=0 --n=4847571 --k 1 --d 100  --input ~/data/lj.w.gr
39 | ./main  --rw=1 --ol=0 --n=3072627 --k 1 --d 100  --input ~/data/orkut.w.gr


--------------------------------------------------------------------------------
/result/nextdoor_unbias.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2020-11-25 16:50:34
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-11 16:30:59
 6 |  # @FilePath: /skywalker/result/knightking.sh
 7 | ### 
 8 | DATA=(web-Google    lj    orkut      ) #   sk-2005 friendster) #  twitter-2010 uk-union rmat29 web-ClueWeb09)  
 9 | NV=(916428          4847571 3072627  39459923   22744077     50636151 124836180) #41652230
10 | 
11 | DIR="/home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/randomwalks/"
12 | # DeepWalkSampling
13 | # Node2VecSampling
14 | # PPRSampling
15 | # KHopSampling
16 | 
17 | # echo "----------------------biased_walk  -------------------"
18 | # for idx in $(seq 1 ${#DATA[*]}) 
19 | # do
20 | #     echo "------------"${DATA[idx-1]}
21 | #     ${DIR}DeepWalkSampling  -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
22 | # done
23 | # echo "----------------------ppr biased -------------------"
24 | # for idx in $(seq 1 ${#DATA[*]}) 
25 | # do
26 | #     echo "------------"${DATA[idx-1]}
27 | #     ${DIR}PPRSampling -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
28 | # done
29 | # echo "---------------------- unweighted node2vec -------------------"
30 | # for idx in $(seq 1 ${#DATA[*]}) 
31 | # do
32 | #     echo "------------"${DATA[idx-1]}
33 | #     ${DIR}Node2VecSampling -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
34 | # done
35 | echo "----------------------kh 40k-------------------"
36 | for idx in $(seq 1 ${#DATA[*]}) 
37 | do
38 |     echo "------------"${DATA[idx-1]}
39 |     /home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/khop/KHopSampling -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
40 | done
41 | 


--------------------------------------------------------------------------------
/scripts/biased.sh:
--------------------------------------------------------------------------------
 1 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 2 | HD=(0.25 0.5 1 0.25 0.25 0.5 1)                                   # uk-union rmat29 web-ClueWeb09)
 3 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180)
 4 | #HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 5 | 
 6 | # DATA=( sk-2005 friendster)
 7 | # HD=(   4  1 )
 8 | ITR=1
 9 | NG=4 #8
10 | 
11 | GR=".w.gr"
12 | EXE="./bin/main" #main_degree
13 | SG="--ngpu=1 --s"
14 | RW="--rw=1 --k 1 --d 100 "
15 | SP="--rw=0 --k 20 --d 2 "
16 | BATCH="--n=40000 -v"
17 | 
18 | ROOT_DIR=$PWD
19 | LOG_FILE=${ROOT_DIR}"/result/table3_unbiased.csv"
20 | 
21 | DATA_DIR="/home/xucheng//data"
22 | #DATA_DIR=${ROOT_DIR}"/dataset"
23 | GraphWalker_DIR="/home/pywang/sampling/GraphWalker"
24 | KnightKing_DIR="/home/pywang/sampling/KnightKing"
25 | CSAW_DIR="/home/pywang/sampling/C-SAW"
26 | NEXTDOOR_DIR="/home/pywang/sampling/nextdoor-experiments"
27 | 
28 | echo "-------------------------------------------------------Skywalker unbias rw 100" #>>"${LOG_FILE}"
29 | for idx in $(seq 1 ${#DATA[*]}); do
30 |     ./bin/main --bias=1 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 ${RW} ${BATCH} #>>"${LOG_FILE}"
31 | done
32 | 
33 | echo "-------------------------------------------------------Skywalker unbias ppr 100" #>>"${LOG_FILE}"
34 | for idx in $(seq 1 ${#DATA[*]}); do
35 |     ./bin/main --bias=1 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --tp=0.15 ${RW} ${BATCH} #>>"${LOG_FILE}"
36 | done
37 | 
38 | echo "-------------------------------------------------------Skywalker unbias node2vec" #>>"${LOG_FILE}"
39 | for idx in $(seq 1 ${#DATA[*]}); do
40 |     ./bin/main --bias=1 --ol=0 --buffer --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --node2vec ${BATCH} # >>"${LOG_FILE}"
41 | done


--------------------------------------------------------------------------------
/old/shmem/tmp.cu:
--------------------------------------------------------------------------------
 1 | #include "alias_table.cuh"
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <unistd.h>
 5 | 
 6 | template <typename T> __global__ void init_range_d(T *ptr, size_t size) {
 7 |   if (TID < size) {
 8 |     ptr[TID] = TID;
 9 |   }
10 | }
11 | template <typename T> void init_range(T *ptr, size_t size) {
12 |   init_range_d<T><<<size / 512 + 1, 512>>>(ptr, size);
13 | }
14 | template <typename T> __global__ void init_array_d(T *ptr, size_t size, T v) {
15 |   if (TID < size) {
16 |     ptr[TID] = v;
17 |   }
18 | }
19 | template <typename T> void init_array(T *ptr, size_t size, T v) {
20 |   init_array_d<T><<<size / 512 + 1, 512>>>(ptr, size, v);
21 | }
22 | // todo
23 | /*
24 | 1. prefix sum to normalize
25 | 2.
26 | */
27 | #define paster( n ) printf( "var: " #n " =  %d\n", n )
28 | int main(int argc, char const *argv[]) {
29 | 
30 |   int *buf7;
31 |   int size = 40;
32 | 
33 |   cudaSetDevice(1);
34 |   cudaMalloc(&buf7, size / 2 * sizeof(int));
35 | 
36 |   int *id_ptr;
37 |   float *weight_ptr;
38 |   cudaMalloc(&id_ptr, size * sizeof(int));
39 |   cudaMalloc(&weight_ptr, size * sizeof(float));
40 |   init_range<int>(id_ptr, size);
41 |   init_array<float>(weight_ptr, size / 8 * 7, 0.5);
42 |   init_array<float>(weight_ptr + size / 8 * 7, size - size / 8 * 7, 2.0);
43 | 
44 |   // P;
45 |   // alias_table<int> *table_ptr;
46 |   // alias_table<int> table_h;
47 |   Vector<int> out;
48 |   out.init(40);
49 |   paster(SHMEM_PER_WARP);
50 |   paster(TMP_PER_ELE);
51 |   paster(ELE_PER_WARP);
52 | 
53 |   shmem_kernel<<<1, 32, 0, 0>>>(id_ptr, weight_ptr, size, size / 2, out);
54 |   // printf("size %d\n",sizeof(alias_table_constructor_shmem<int>));
55 |   // printf("size %d %d\n",sizeof(Vector_shmem<int>),ELE_PER_WARP);
56 |   P;
57 |   usleep(5000);
58 |   HERR(cudaDeviceSynchronize());
59 |   HERR(cudaPeekAtLastError());
60 |   return 0;
61 | }
62 | 


--------------------------------------------------------------------------------
/test/tmp.cu:
--------------------------------------------------------------------------------
 1 | #include "alias_table.cuh"
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <unistd.h>
 5 | 
 6 | template <typename T> __global__ void init_range_d(T *ptr, size_t size) {
 7 |   if (TID < size) {
 8 |     ptr[TID] = TID;
 9 |   }
10 | }
11 | template <typename T> void init_range(T *ptr, size_t size) {
12 |   init_range_d<T><<<size / 512 + 1, 512>>>(ptr, size);
13 | }
14 | template <typename T> __global__ void init_array_d(T *ptr, size_t size, T v) {
15 |   if (TID < size) {
16 |     ptr[TID] = v;
17 |   }
18 | }
19 | template <typename T> void init_array(T *ptr, size_t size, T v) {
20 |   init_array_d<T><<<size / 512 + 1, 512>>>(ptr, size, v);
21 | }
22 | // todo
23 | /*
24 | 1. prefix sum to normalize
25 | 2.
26 | */
27 | #define paster( n ) printf( "var: " #n " =  %d\n", n )
28 | int main(int argc, char const *argv[]) {
29 | 
30 |   int *buf7;
31 |   int size = 40;
32 | 
33 |   cudaSetDevice(1);
34 |   MyCudaMalloc(&buf7, size / 2 * sizeof(int));
35 | 
36 |   int *id_ptr;
37 |   float *weight_ptr;
38 |   MyCudaMalloc(&id_ptr, size * sizeof(int));
39 |   MyCudaMalloc(&weight_ptr, size * sizeof(float));
40 |   init_range<int>(id_ptr, size);
41 |   init_array<float>(weight_ptr, size / 8 * 7, 0.5);
42 |   init_array<float>(weight_ptr + size / 8 * 7, size - size / 8 * 7, 2.0);
43 | 
44 |   // P;
45 |   // alias_table<int> *table_ptr;
46 |   // alias_table<int> table_h;
47 |   Vector<int> out;
48 |   out.init(40);
49 |   paster(SHMEM_PER_WARP);
50 |   paster(MEM_PER_ELE);
51 |   paster(ELE_PER_WARP);
52 | 
53 |   shmem_kernel<<<1, 32, 0, 0>>>(id_ptr, weight_ptr, size, size / 2, out);
54 |   // printf("size %d\n",sizeof(alias_table_constructor_shmem<int>));
55 |   // printf("size %d %d\n",sizeof(Vector_shmem<int>),ELE_PER_WARP);
56 |   P;
57 |   usleep(5000);
58 |   H_ERR(cudaDeviceSynchronize());
59 |   H_ERR(cudaPeekAtLastError());
60 |   return 0;
61 | }
62 | 


--------------------------------------------------------------------------------
/figs/dynamic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash  -x
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-17 13:39:45
 5 |  # @LastEditors: Pengyu Wang
 6 |  # @LastEditTime: 2021-01-17 21:38:38
 7 |  # @FilePath: /skywalker/figs/unbiased.sh
 8 | ### 
 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
13 | 
14 | # DATA=( sk-2005 friendster) 
15 | # HD=(   4  1 )
16 | ITR=1
17 | NG=4 #8
18 | 
19 | GR=".w.gr"
20 | EXE="./bin/main" #main_degree
21 | SG="--ngpu=1 --s"
22 | RW="--rw=1 --k 1 --d 100 "
23 | SP="--rw=0 --k 20 --d 2 "
24 | BATCH="--n=40000"
25 | OUT='>> ./figs/result/dynamic.csv'
26 | 
27 | # --randomweight=1 --weightrange=2 
28 | 
29 | 
30 | # walker
31 | echo "-------------------------------------------------------unbias rw 100" >> ./figs/result/dynamic.csv
32 | for idx in $(seq 1 ${#DATA[*]}) 
33 | do
34 |     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 ${RW} ${BATCH} >> ./figs/result/dynamic.csv
35 | done
36 | 
37 | echo "-------------------------------------------------------unbias rw 100 dynamic" >> ./figs/result/dynamic.csv
38 | for idx in $(seq 1 ${#DATA[*]}) 
39 | do
40 |     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 ${RW} ${BATCH} --dynamic=1>> ./figs/result/dynamic.csv
41 | done
42 | 
43 | # echo "-------------------------------------------------------unbias sp" >> ./figs/result/dynamic.csv
44 | # for idx in $(seq 1 ${#DATA[*]}) 
45 | # do
46 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 ${SP} ${BATCH} >> ./figs/result/dynamic.csv
47 | # done
48 | 
49 | 


--------------------------------------------------------------------------------
/test/alias_table.cu:
--------------------------------------------------------------------------------
 1 | #include "alias_table.cuh"
 2 | // template <typename T>
 3 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num,
 4 |                              Vector<int> out)
 5 | {
 6 | 
 7 |   __shared__ alias_table_constructor_shmem<int> tables[WARP_PER_BLK];
 8 |   alias_table_constructor_shmem<int> *table = &tables[WID];
 9 |   // printf("table size %llu\n",table->size);
10 | 
11 |   table->Init();
12 |   if (LID == 0)
13 |   {
14 |     printf("table large size %llu\n", table->large.capacity);
15 |   }
16 |   if (TID == 0)
17 |   {
18 |     printf("load\n");
19 |   }
20 |   table->load(ids, weights, size);
21 |   if (TID == 0)
22 |   {
23 |     printf("construct\n");
24 |   }
25 |   table->construct();
26 |   if (TID == 0)
27 |   {
28 |     printf("roll\n");
29 |   }
30 |   curandState state;
31 |   curand_init(TID, 0, 0, &state); //(unsigned long long)clock() +
32 |   if(num>0)
33 |   table->roll_atomic(out, num, &state);
34 |   if (LID == 0)
35 |   {
36 |     printf("out: ");
37 |     printD(out.data, out.Size());
38 |   }
39 | }
40 | 
41 | // __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num,
42 | //                              int * out) {
43 | 
44 | //   __shared__ alias_table_constructor_shmem<int> tables[WARP_PER_BLK];
45 | //   alias_table_constructor_shmem<int> *table = &tables[WID];
46 | //   // printf("table size %llu\n",table->size);
47 | 
48 | //   table->Init();
49 | //   if (LID == 0) {
50 | //     printf("table large size %llu\n", table->large.capacity);
51 | //   }
52 | //   if (TID == 0) {
53 | //     printf("load\n");
54 | //   }
55 | //   table->load(ids, weights, size);
56 | //   if (TID == 0) {
57 | //     printf("construct\n");
58 | //   }
59 | //   table->construct();
60 | //   if (TID == 0) {
61 | //     printf("roll\n");
62 | //   }
63 | //   table->roll_atomic(out, num);
64 | //   if (LID == 0) {
65 | //       printf("out: ");
66 | //       printD(out, num);
67 | //   }
68 | // }


--------------------------------------------------------------------------------
/result/nextdoor.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2020-11-25 16:50:34
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-11 16:30:59
 6 |  # @FilePath: /skywalker/result/knightking.sh
 7 | ### 
 8 | DATA=(web-Google    lj    orkut      ) #   sk-2005 friendster) #  twitter-2010 uk-union rmat29 web-ClueWeb09)  
 9 | NV=(916428          4847571 3072627  39459923   22744077     50636151 124836180) #41652230
10 | 
11 | DIR="/home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/randomwalks/"
12 | # DeepWalkSampling
13 | # Node2VecSampling
14 | # PPRSampling
15 | # KHopSampling
16 | echo "----------------------unbiased_walk  -------------------"
17 | for idx in $(seq 1 ${#DATA[*]}) 
18 | do
19 |     echo "------------"${DATA[idx-1]}
20 |     ${DIR}DeepWalkSampling  -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
21 | done
22 | 
23 | # echo "----------------------unbiased_walk  -------------------"
24 | # for idx in $(seq 1 ${#DATA[*]}) 
25 | # do
26 | #     echo "------------"${DATA[idx-1]}
27 | #     ${DIR}DeepWalkSampling  -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
28 | # done
29 | # echo "----------------------ppr unbiased -------------------"
30 | # for idx in $(seq 1 ${#DATA[*]}) 
31 | # do
32 | #     echo "------------"${DATA[idx-1]}
33 | #     ${DIR}PPRSampling -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
34 | # done
35 | # echo "----------------------node2vec -------------------"
36 | # for idx in $(seq 1 ${#DATA[*]}) 
37 | # do
38 | #     echo "------------"${DATA[idx-1]}
39 | #     ${DIR}Node2VecSampling -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
40 | # done
41 | # echo "----------------------kh -------------------"
42 | # for idx in $(seq 1 ${#DATA[*]}) 
43 | # do
44 | #     echo "------------"${DATA[idx-1]}
45 | #     /home/pywang/sampling/nextdoor-experiments/NextDoor/src/apps/khop/KHopSampling -g ~/data/${DATA[idx-1]}.data  -t edge-list -f binary -n 1 -k TransitParallel -l
46 | # done
47 | 


--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "2.0.0",
 3 |   "tasks": [
 4 |     //   {
 5 |     //     "type": "shell",
 6 |     //     "label": "g++ build active file",
 7 |     //     "command": "/usr/bin/g++",
 8 |     //     "args": ["-g", "${file}", "-o", "${fileDirname}/${fileBasenameNoExtension}"],
 9 |     //     "options": {
10 |     //       "cwd": "/usr/bin"
11 |     //     },
12 |     //     "problemMatcher": ["$gcc"],
13 |     //     "group": {
14 |     //       "kind": "build",
15 |     //       "isDefault": true
16 |     //     }
17 |     //   },
18 |     {
19 |       "type": "shell",
20 |       "label": "CUDA make",
21 |       "command": "make",
22 |       // "args": ["-g", "${file}", "-o", "${fileDirname}/${fileBasenameNoExtension}"],
23 |       // "options": {
24 |       //   "cwd": "${workspaceFolder}/src"
25 |       // },
26 |       "problemMatcher": [
27 |         "$nvcc"
28 |       ],
29 |       "group": {
30 |         "kind": "build",
31 |         "isDefault": true
32 |       }
33 |     },
34 |     // {
35 |     //   "label": "bandwidthTestMakefile",
36 |     //   "type": "shell",
37 |     //   "command": "make",
38 |     //   "options": {
39 |     //     "cwd": "/home/bigeye/NVIDIA_CUDA-9.2_Samples/1_Utilities/bandwidthTest"
40 |     //   },
41 |     //   "group": {
42 |     //     "kind": "build",
43 |     //     "isDefault": true
44 |     //   },
45 |     //   "presentation": {
46 |     //     "echo": true,
47 |     //     "reveal": "always",
48 |     //     "focus": false,
49 |     //     "panel": "shared"
50 |     //   },
51 |     //   "args": [
52 |     //     "QUIET=0"
53 |     //   ],
54 |     //   "problemMatcher": {
55 |     //     "owner": "cpp",
56 |     //     "fileLocation": [
57 |     //       "absolute"
58 |     //     ],
59 |     //     "pattern": {
60 |     //       "regexp": "^(.*):(\\d+):(\\d+):\\s+(warning|error):\\s+(.*)$",
61 |     //       "file": 1,
62 |     //       "line": 2,
63 |     //       "column": 3,
64 |     //       "severity": 4,
65 |     //       "message": 5
66 |     //     }
67 |     //   }
68 |     // }
69 |   ]
70 | }


--------------------------------------------------------------------------------
/figs/with_nextdoor.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-17 13:39:45
 5 |  # @LastEditors: Pengyu Wang
 6 |  # @LastEditTime: 2021-01-15 15:49:20
 7 |  # @FilePath: /skywalker/figs/offline.sh
 8 | ### 
 9 | DATA=(web-Google lj orkut) # arabic-2005 uk-2005 ) # sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
13 | 
14 | # DATA=( sk-2005 friendster) 
15 | # HD=(   4  1 )
16 | ITR=1
17 | NG=4 #8
18 | 
19 | GR=".w.gr"
20 | EXE="./bin/main" #main_degree
21 | SG="--ngpu=1 --s"
22 | RW="--deepwalk "
23 | SP="--sage "
24 | BATCH="--full -v"
25 | LOG_FILE="with_nextdoor.csv"
26 | 
27 | # --randomweight=1 --weightrange=2 
28 | 
29 | # echo "-------------------------------------------------------offline rw 100" >> ${LOG_FILE}
30 | # for idx in $(seq 1 ${#DATA[*]}) 
31 | # do
32 | #     for i in $(seq 1  ${ITR})
33 | #     do
34 | #         ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 
35 | #     done
36 | # done
37 | 
38 | # echo "-------------------------------------------------------offline ppr 0.15" >> ${LOG_FILE}
39 | # for idx in $(seq 1 ${#DATA[*]}) 
40 | # do
41 | #     for i in $(seq 1  ${ITR})
42 | #     do
43 | #         ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --tp=0.15 >> ${LOG_FILE} 
44 | #     done
45 | # done
46 | 
47 | 
48 | echo "-------------------------------------------------------unbiased sp 100" >> ${LOG_FILE}
49 | for idx in $(seq 1 ${#DATA[*]}) 
50 | do
51 |     for i in $(seq 1  ${ITR})
52 |     do
53 |         ./bin/main -bias=0 --ol=0 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE}
54 |     done
55 | done
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // 使用 IntelliSense 了解相关属性。 
 3 |     // 悬停以查看现有属性的描述。
 4 |     // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "CUDA C++: Launch",
 9 |             "type": "cuda-gdb",
10 |             "request": "launch",
11 |             "externalConsole": false,
12 |             "program": "${workspaceFolder}/build/skywalker",
13 |             "args": "-bias=1 --ol=1 --ngpu=1 --s --sage --input /home/pywang/data/lj.w.gr --hd=1 --n=40000 -v  --printresult=1   --newsampler=1 --loc=1",
14 |             "setupCommands": [
15 |                 {
16 |                     "description": "为 gdb 启用整齐打印",
17 |                     "text": "-enable-pretty-printing",
18 |                     // "ignoreFailures": true
19 |                 }
20 |             ]
21 |         },
22 |         // {
23 |         //     "name": "(gdb) 启动",
24 |         //     "type": "cppdbg",
25 |         //     "request": "launch",
26 |         //     "program": "${workspaceFolder}/bin/main", //${fileBasenameNoExtension}.out
27 |         //     "args": [" --k 1 --d 100 --rw=0 --ol=0 --hd=4  --ngpu 4 --v --n=10"],
28 |         //     "stopAtEntry": false,
29 |         //     "cwd": "${workspaceFolder}",
30 |         //     "environment": [],
31 |         //     "externalConsole": false,
32 |         //     "MIMode": "gdb",
33 |         //     "miDebuggerPath": "gdb", ///usr/local/cuda/bin/cuda-gdb
34 |         //     "setupCommands": [
35 |         //         {
36 |         //             "description": "为 gdb 启用整齐打印",
37 |         //             "text": "-enable-pretty-printing",
38 |         //             "ignoreFailures": true
39 |         //         }
40 |         //     ]
41 |         // },
42 |         // {
43 |         //     "name": "cuda-gdb",
44 |         //     "type": "cuda-gdb",
45 |         //     "request": "launch",
46 |         //     "program": "${workspaceFolder}/bin/main", //${fileBasenameNoExtension}.out
47 |         //     "args": [" -bias=1 --ol=1 --deepwalk -v --ngpu=1 --dw=1 --n=400"]
48 |         // }
49 |     ]
50 | }


--------------------------------------------------------------------------------
/scripts/trw-unbiased.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2021-10-21
 4 |  # @LastEditors: Xu Cheng
 5 |  # @FilePath: /ThunderRW
 6 | ### 
 7 | DATA=(GG lj OK AB UK SK FS) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 8 | 
 9 | #
10 | # echo "unbiased rw" >>"/home/xucheng/ThunderRW/unbiased.csv"
11 | # for idx in $(seq 1 ${#DATA[*]}) 
12 | # do
13 | #     echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased.csv"
14 | #     ./build/random_walk/deepwalk.out -f sample_dataset/${DATA[idx-1]}/ -n 20  -em 0 -sm 0 -l 100 >>"/home/xucheng/ThunderRW/unbiased.csv" 2>&1 
15 | #     echo " " >>"/home/xucheng/ThunderRW/unbiased.csv"
16 | # done
17 | # 
18 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv"
19 | # 
20 | # echo "unbiased ppr" >>"/home/xucheng/ThunderRW/unbiased.csv"
21 | # for idx in $(seq 1 ${#DATA[*]}) 
22 | # do
23 | #     echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased.csv"
24 | #     ./build/random_walk/ppr.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -sp 0.15 -em 0 -sm 0  >>"/home/xucheng/ThunderRW/unbiased.csv" 2>&1 
25 | #     echo " " >>"/home/xucheng/ThunderRW/unbiased.csv"
26 | # done
27 | # 
28 | # echo " " >>"/home/xucheng/ThunderRW/unbiased.csv"
29 | # 
30 | # echo "unbiased node2vec" >>"/home/xucheng/ThunderRW/unbiased.csv"
31 | # for idx in $(seq 1 ${#DATA[*]}) 
32 | # do
33 | #     echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased.csv"
34 | #      ./build/random_walk/node2vec.out -f sample_dataset/${DATA[idx-1]}/ -n 20 -em 0 -sm 0 >>"/home/xucheng/ThunderRW/unbiased.csv" 2>&1 
35 | #     echo " " >>"/home/xucheng/ThunderRW/unbiased.csv"
36 | # done
37 | # 
38 |  
39 |  echo "unbiased rw" >>"/home/xucheng/ThunderRW/unbiased40.csv"
40 |  for idx in $(seq 1 ${#DATA[*]}) 
41 |  do
42 |      echo " result of ${DATA[idx-1]} " >>"/home/xucheng/ThunderRW/unbiased40.csv"
43 |      ./build/random_walk/deepwalk.out -f sample_dataset/${DATA[idx-1]}/ -n 40  -em 0 -sm 0 -l 100 >>"/home/xucheng/ThunderRW/unbiased40.csv" 2>&1 
44 |      echo " " >>"/home/xucheng/ThunderRW/unbiased40.csv"
45 |  done
46 |  
47 |  
48 |  
49 |  
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/scripts/trans.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Date: 2020-12-16 11:06:22
 4 | LastEditors: PengyuWang
 5 | LastEditTime: 2020-12-16 17:15:33
 6 | FilePath: /sampling/scripts/trans.py
 7 | '''
 8 | # import numpy as np
 9 | # import pandas as pd
10 | # import scipy.sparse as ss
11 | 
12 | # def read_data_file_as_coo_matrix(filename='edges.txt'):
13 | #     "Read data file and return sparse matrix in coordinate format."
14 | 
15 | #     # if the nodes are integers, use 'dtype = np.uint32'
16 | #     data = pd.read_csv(filename, sep = '\t', encoding = 'utf-8')
17 | 
18 | #     # where 'rows' is node category one and 'cols' node category 2
19 | #     rows = data['agn']  # Not a copy, just a reference.
20 | #     cols = data['fct']
21 | 
22 | #     # crucial third array in python, which can be left out in r
23 | #     ones = np.ones(len(rows), np.uint32)
24 | #     matrix = ss.coo_matrix((ones, (rows, cols)))
25 | #     return matrix
26 | 
27 | # def save_csr_matrix(filename, matrix):
28 | #     """Save compressed sparse row (csr) matrix to file.
29 | 
30 | #     Based on http://stackoverflow.com/a/8980156/232571
31 | 
32 | #     """
33 | #     assert filename.endswith('.npz')
34 | #     attributes = {
35 | #         'data': matrix.data,
36 | #         'indices': matrix.indices,
37 | #         'indptr': matrix.indptr,
38 | #         'shape': matrix.shape,
39 | #     }
40 | #     np.savez(filename, **attributes)
41 | 
42 | # read_data_file_as_coo_matrix()
43 | # read_weighted_edgelist
44 | # G= networkit.graphio.readGraph("/home/pywang/data/lj.w.edge", networkit.Format.EdgeList, separator=" ", continuous=False)
45 | import scipy as sp
46 | import networkx as nx
47 | 
48 | 
49 | 
50 | def save_csr_matrix(filename):
51 |     G=nx.read_weighted_edgelist("/home/pywang/data/" + filename+ ".w.edge")
52 |     S=nx.to_scipy_sparse_matrix(G)
53 |     sp.sparse.save_npz("/home/pywang/data/" + filename+ ".w.npz", S)
54 | 
55 | # save_csr_matrix("orkut")
56 | print("uk-2005")
57 | save_csr_matrix("uk-2005")
58 | print("twitter-2010")
59 | save_csr_matrix("twitter-2010")
60 | # print("sk-2005")
61 | # save_csr_matrix("sk-2005")
62 | # print("friendster")
63 | # save_csr_matrix("friendster")


--------------------------------------------------------------------------------
/scripts/trans2.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Description: 
 3 | Date: 2020-12-16 11:06:22
 4 | LastEditors: PengyuWang
 5 | LastEditTime: 2020-12-16 19:39:34
 6 | FilePath: /sampling/scripts/trans2.py
 7 | '''
 8 | import numpy as np
 9 | import pandas as pd
10 | import scipy.sparse as ss
11 | 
12 | def read_data_file_as_coo_matrix(filename='edges.txt'):
13 |     "Read data file and return sparse matrix in coordinate format."
14 |     data = pd.read_csv(filename, sep=' ', header=None, dtype=np.uint32)
15 |     rows = data[0]  # Not a copy, just a reference.
16 |     cols = data[1]
17 |     ones = np.ones(len(rows), np.uint32)
18 |     matrix = ss.coo_matrix((ones, (rows, cols)))
19 |     return matrix
20 | 
21 | def save_csr_matrix(filename, matrix):
22 |     """Save compressed sparse row (csr) matrix to file.
23 | 
24 |     Based on http://stackoverflow.com/a/8980156/232571
25 | 
26 |     """
27 |     assert filename.endswith('.npz')
28 |     attributes = {
29 |         'data': matrix.data,
30 |         'indices': matrix.indices,
31 |         'indptr': matrix.indptr,
32 |         'shape': matrix.shape,
33 |     }
34 |     np.savez(filename, **attributes)
35 |     
36 | def tx(filename):
37 |     "Test data file parsing and matrix serialization."
38 |     coo_matrix = read_data_file_as_coo_matrix("/home/pywang/data/" + filename+ ".w.edge")
39 |     csr_matrix = coo_matrix.tocsr()
40 |     save_csr_matrix("/home/pywang/data/" + filename+ ".w.npz", csr_matrix)
41 |     
42 | if __name__ == '__main__':
43 |     print("uk-2005")
44 |     tx("uk-2005")
45 |     # print("twitter-2010")
46 |     # tx("twitter-2010")
47 |     
48 | # read_data_file_as_coo_matrix()
49 | # read_weighted_edgelist
50 | # G= networkit.graphio.readGraph("/home/pywang/data/lj.w.edge", networkit.Format.EdgeList, separator=" ", continuous=False)
51 | import scipy as sp
52 | import networkx as nx
53 | 
54 | 
55 | 
56 | # def save_csr_matrix(filename):
57 | #     G=nx.read_weighted_edgelist("/home/pywang/data/" + filename+ ".w.edge")
58 | #     S=nx.to_scipy_sparse_matrix(G)
59 | #     sp.sparse.save_npz("/home/pywang/data/" + filename+ ".w.npz", S)
60 | 
61 | # save_csr_matrix("orkut")
62 | 
63 | 
64 | # print("sk-2005")
65 | # save_csr_matrix("sk-2005")
66 | # print("friendster")
67 | # save_csr_matrix("friendster")


--------------------------------------------------------------------------------
/tools/gr2npz.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * @Description:
 3 |  * @Date: 2020-12-24 14:04:06
 4 |  * @LastEditors: PengyuWang
 5 |  * @LastEditTime: 2020-12-24 14:22:35
 6 |  * @FilePath: /sampling/tools/gr2npz.cu
 7 |  */
 8 | // https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html
 9 | #include <iostream>
10 | #include <iostream>
11 | #include <pybind11/embed.h>
12 | #include <pybind11/numpy.h>
13 | #include <pybind11/pybind11.h>
14 | #include <pybind11/stl.h> // for myPyObject.cast<std::vector<T>>()
15 | #include <vector>
16 | #include "graph.cuh"
17 | 
18 | 
19 | namespace py = pybind11;
20 | 
21 | int main() {
22 |   py::scoped_interpreter guard{};
23 | 
24 |   py::module np = py::module::import("numpy");
25 |   // py::object random = np.attr("random");
26 |   // py::module scipy = py::module::import("scipy.optimize");
27 | 
28 |   // Load created module containing f_a(x) = a*x^2
29 |   // py::module myModule = py::module::import("MyPythonModule.MyFunctionality");
30 | 
31 |   // Create some data for fitting
32 |   std::vector<double> xValues(11, 0);
33 |   std::vector<double> yValues(11, 0);
34 |   for (int i = -5; i < 6; ++i) {
35 |     xValues[i + 5] = i;
36 |     yValues[i + 5] = i * i;
37 |   }
38 | 
39 |   // Cast data to numpy arrays
40 |   py::array_t<double> pyXValues = py::cast(xValues);
41 |   py::array_t<double> pyYValues = py::cast(yValues);
42 | 
43 |   // The return value contains the optimal values and the covariance matrix.
44 |   // Get the optimal values
45 |   py::object optVals = retVals.attr("__getitem__")(0);
46 | 
47 |   // Cast return value back to std::vector and show the result
48 |   std::vector<double> retValsStd = optVals.cast<std::vector<double>>();
49 |   std::cout << "Fitted parameter a = " << retValsStd[0] << std::endl;
50 | 
51 |   return 0;
52 | }
53 | 
54 | py::array_t<uint> my_fft1d_complex(py::array_t<> input) {
55 | 
56 |     if (input.ndim() != 1)
57 |         throw std::runtime_error("input dim must be 1");
58 | 
59 |     vector<complex<float>> in, out;
60 |     auto r1 = input.unchecked<1>();
61 |     for (int i = 0; i < input.size(); i++)
62 |     {
63 |         in.push_back(r1(i));
64 |     }
65 | 
66 |     fft1d(in, out, in.size());
67 | 
68 |     py::array_t<std::complex<float>> result(out.size());
69 |     auto r2 = result.mutable_unchecked<1>();
70 | 
71 |     for (int i = 0; i < out.size(); i++)
72 |     {
73 |         r2(i) = out[i];
74 |     }
75 | 
76 |     return result;
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/figs/offline.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-17 13:39:45
 5 |  # @LastEditors: Pengyu Wang
 6 |  # @LastEditTime: 2021-01-15 15:49:20
 7 |  # @FilePath: /skywalker/figs/offline.sh
 8 | ### 
 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
13 | 
14 | # DATA=( sk-2005 friendster) 
15 | # HD=(   4  1 )
16 | ITR=1
17 | NG=4 #8
18 | 
19 | GR=".w.gr"
20 | EXE="./bin/main" #main_degree
21 | SG="--ngpu=1 --s"
22 | RW="--deepwalk "
23 | SP="--sage "
24 | BATCH="--n 40000 "
25 | LOG_FILE="offline.csv"
26 | 
27 | 
28 | 
29 | 
30 | 
31 | # --randomweight=1 --weightrange=2 
32 | 
33 | # echo "-------------------------------------------------------offline rw 100  ${BATCH}" >> ${LOG_FILE}
34 | # for idx in $(seq 1 ${#DATA[*]}) 
35 | # do
36 | #     for i in $(seq 1  ${ITR})
37 | #     do
38 | #         ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE} 
39 | #     done
40 | # done
41 | 
42 | # echo "-------------------------------------------------------offline ppr 0.15  ${BATCH}" >> ${LOG_FILE}
43 | # for idx in $(seq 1 ${#DATA[*]}) 
44 | # do
45 | #     for i in $(seq 1  ${ITR})
46 | #     do
47 | #         ./bin/main  -bias=1 --ol=0  ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> ${LOG_FILE}
48 | #     done
49 | # done
50 | 
51 | 
52 | # echo "-------------------------------------------------------offline sp 100${BATCH}" >> ${LOG_FILE}
53 | # for idx in $(seq 1 ${#DATA[*]}) 
54 | # do
55 | #     for i in $(seq 1  ${ITR})
56 | #     do
57 | #         ./bin/main -bias=1 --ol=0 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE}
58 | #     done
59 | # done
60 | 
61 | echo "-------------------------------------------------------offline sp 20 20 ${BATCH}" >> ${LOG_FILE}
62 | for idx in $(seq 1 ${#DATA[*]}) 
63 | do
64 |     for i in $(seq 1  ${ITR})
65 |     do
66 |         ./bin/main -bias=1 --ol=0 ${SG} --rw=0 --k=20 --d=2 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> ${LOG_FILE}
67 |     done
68 | done
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/figs/online.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-17 13:39:45
 5 |  # @LastEditors: Pengyu Wang
 6 |  # @LastEditTime: 2021-01-15 16:43:38
 7 |  # @FilePath: /skywalker/figs/online.sh
 8 | ### 
 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
13 | 
14 | # DATA=( sk-2005 friendster) 
15 | # HD=(   4  1 )
16 | ITR=1
17 | NG=4 #8
18 | 
19 | GR=".w.gr"
20 | EXE="./bin/main" #main_degree
21 | SG="--ngpu=1 --s"
22 | RW="--rw=1 --k 1 --d 100 "
23 | SP="--rw=0 --k 20 --d 2 "
24 | BATCH="--n 40000 "
25 | POLICY="--static=0"
26 | OUTPUT=" online.csv "
27 | 
28 | # --randomweight=1 --weightrange=2 
29 | 
30 | # echo "-------------------------------------------------------online rw 100 ${POLICY} " >> ${OUTPUT}
31 | # for idx in $(seq 1 ${#DATA[*]}) 
32 | # do
33 | #     for i in $(seq 1  ${ITR})
34 | #     do
35 | #         ./bin/main -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${POLICY} >>  ${OUTPUT}
36 | #     done
37 | # done
38 | 
39 | # echo "-------------------------------------------------------online ppr 0.15" >> online.csv
40 | # for idx in $(seq 1 ${#DATA[*]}) 
41 | # do
42 | #     for i in $(seq 1  ${ITR})
43 | #     do
44 | #         ./bin/main  -bias=1 --ol=1  ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> online.csv
45 | #     done
46 | # done
47 | 
48 | echo "-------------------------------------------------------node2vec ${POLICY} " >>  ${OUTPUT}
49 | for idx in $(seq 1 ${#DATA[*]}) 
50 | do
51 |     for i in $(seq 1  ${ITR})
52 |     do
53 |         ./bin/node2vec  -node2vec  ${RW}  --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} ${POLICY} >> ${OUTPUT}
54 |     done
55 | done
56 | 
57 | # echo "-------------------------------------------------------online sp 100 ${POLICY} " >> ${OUTPUT}
58 | # for idx in $(seq 1 ${#DATA[*]}) 
59 | # do
60 | #     for i in $(seq 1  ${ITR})
61 | #     do
62 | #         ./bin/main -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${POLICY} >>  ${OUTPUT}
63 | #     done
64 | # done
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/scripts/csaw.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-25 16:31:37
 5 |  # @LastEditors: PengyuWang
 6 |  # @LastEditTime: 2021-01-11 11:31:12
 7 |  # @FilePath: /skywalker/scripts/csaw.sh
 8 | ### 
 9 | DATA=( web-Google    lj    orkut   arabic-2005   uk-2005 ) # 
10 | # DATA=( uk-2005   sk-2005 friendster)
11 | cd /home/pywang/sampling/C-SAW/non-stream
12 | 
13 | # ./sampling.bin  wg  ~/data/soc-LiveJournal1.txt_beg_pos.bin  ~/data/soc-LiveJournal1.txt_csr.bin  100 32 4000 1 2 2 1
14 | # ./sampling.bin  wg  ~/data/lj.w.edge_beg_pos.bin  ~/data/lj.w.edge_csr.bin  100 32 4000 1 1 100 1
15 | # ./sampling.bin  wg  ~/data/lj.w.edge_beg_pos.bin  ~/data/lj.w.edge_csr.bin  100 32 4000 1 2 2 1
16 | 
17 | # ./sampling.bin  wg  ~/data/orkut.w.edge_beg_pos.bin  ~/data/orkut.w.edge_csr.bin  100 32 4000 1 1 100 1
18 | # ./sampling.bin  wg  ~/data/orkut.w.edge_beg_pos.bin  ~/data/orkut.w.edge_csr.bin  100 32 4000 1 2 2 1
19 | 
20 | # ./sampling.bin  wg  ~/data/uk-2005.w.edge_beg_pos.bin  ~/data/uk-2005.w.edge_csr.bin  100 32 4000 1 1 100 1
21 | # ./sampling.bin  wg  ~/data/uk-2005.w.edge_beg_pos.bin  ~/data/uk-2005.w.edge_csr.bin  100 32 4000 1 2 2 1
22 | 
23 | echo "----------------------biased walk 4k 64-------------------"
24 | for idx in $(seq 1 ${#DATA[*]}) 
25 | do
26 |     echo ${DATA[idx-1]}
27 |     /home/pywang/sampling/C-SAW/non-stream/sampling.bin  wg  ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin   ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 1 100 1
28 | done
29 | 
30 | # echo "----------------------sampling biased 4k 20 2 64-------------------"
31 | # for idx in $(seq 1 ${#DATA[*]}) 
32 | # do
33 | #     echo ${DATA[idx-1]}
34 | #     /home/pywang/sampling/C-SAW/non-stream/sampling.bin  wg  ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin   ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 20 2 1
35 | # done
36 | 
37 | # echo "----------------------biased walk 4k 64-------------------"
38 | # for idx in $(seq 1 ${#DATA[*]}) 
39 | # do
40 | #     echo ${DATA[idx-1]}
41 | #     /home/pywang/sampling/C-SAW/streaming/streaming.bin  wg  ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin   ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 1 100 1
42 | # done
43 | 
44 | # echo "----------------------sampling biased 4k 20 2 64-------------------"
45 | # for idx in $(seq 1 ${#DATA[*]}) 
46 | # do
47 | #     echo ${DATA[idx-1]}
48 | #     /home/pywang/sampling/C-SAW/streaming/streaming.bin  wg  ~/data/${DATA[idx-1]}.w.edge_beg_pos.bin   ~/data/${DATA[idx-1]}.w.edge_csr.bin 100 32 4000 1 20 2 1
49 | # done


--------------------------------------------------------------------------------
/old/shmem/util.cu:
--------------------------------------------------------------------------------
 1 | #include "util.cuh"
 2 | 
 3 | // __device__ char char_atomicCAS(char *addr, char cmp, char val) {
 4 | //   unsigned *al_addr = reinterpret_cast<unsigned *>(((unsigned long long)addr) &
 5 | //                                                    (0xFFFFFFFFFFFFFFFCULL));
 6 | //   unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8;
 7 | //   unsigned mask = 0xFFU;
 8 | //   mask <<= al_offset;
 9 | //   mask = ~mask;
10 | //   unsigned sval = val;
11 | //   sval <<= al_offset;
12 | //   unsigned old = *al_addr, assumed, setval;
13 | //   do {
14 | //     assumed = old;
15 | //     setval = assumed & mask;
16 | //     setval |= sval;
17 | //     old = atomicCAS(al_addr, assumed, setval);
18 | //   } while (assumed != old);
19 | //   return (char)((assumed >> al_offset) & 0xFFU);
20 | // }
21 | 
22 | 
23 | // template <typename T>
24 | // __inline__ __device__ T warpPrefixSum(T val, int lane_id) {
25 | //   T val_shuffled;
26 | //   for (int offset = 1; offset < warpSize; offset *= 2) {
27 | //     val_shuffled = __shfl_up(val, offset);
28 | //     if (lane_id >= offset) {
29 | //       val += val_shuffled;
30 | //     }
31 | //   }
32 | //   return val;
33 | // }
34 | __device__ void active_size(int n=0) {
35 |   coalesced_group active = coalesced_threads();
36 |   if (active.thread_rank() == 0)
37 |     printf("coalesced_group %d at line %d\n", active.size(),n);
38 | }
39 | template <typename T> void printH(T *ptr, int size) {
40 |   T *ptrh = new T[size];
41 |   HERR(cudaMemcpy(ptrh, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
42 |   printf("printH: ");
43 |   for (size_t i = 0; i < size; i++) {
44 |     // printf("%d\t", ptrh[i]);
45 |     std::cout << ptrh[i] << "\t";
46 |   }
47 |   printf("\n");
48 |   delete ptrh;
49 | }
50 | __device__ void printD(float *ptr, int size) {
51 |   printf("printDf: size%d, ", size);
52 |   for (size_t i = 0; i < size; i++) {
53 |     printf("%f\t", ptr[i]);
54 |   }
55 |   printf("\n");
56 | }
57 | __device__ void printD(int *ptr, int size) {
58 |   printf("printDi: size%d, ", size);
59 |   for (size_t i = 0; i < size; i++) {
60 |     printf("%d\t", ptr[i]);
61 |   }
62 |   printf("\n");
63 | }
64 | // template <typename T> __global__ void init_range_d(T *ptr, size_t size) {
65 | //   if (TID < size) {
66 | //     ptr[TID] = TID;
67 | //   }
68 | // }
69 | // template <typename T> void init_range(T *ptr, size_t size) {
70 | //   init_range_d<T><<<size / 512 + 1, 512>>>(ptr, size);
71 | // }
72 | // template <typename T> __global__ void init_array_d(T *ptr, size_t size, T v) {
73 | //   if (TID < size) {
74 | //     ptr[TID] = v;
75 | //   }
76 | // }
77 | // template <typename T> void init_array(T *ptr, size_t size, T v) {
78 | //   init_array_d<T><<<size / 512 + 1, 512>>>(ptr, size, v);
79 | // }
80 | 


--------------------------------------------------------------------------------
/old/shmem/util.cuh:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <cuda.h>
 3 | // #include <thrust/host_vector.h>
 4 | // #include <thrust/device_vector.h>
 5 | #include <curand.h>
 6 | #include <curand_kernel.h>
 7 | #include <cooperative_groups.h>
 8 | using namespace cooperative_groups;
 9 | #include <iostream>
10 | #include <stdio.h>
11 | #include <stdlib.h>
12 | #define u64 unsigned long long int
13 | #define TID (threadIdx.x + blockIdx.x * blockDim.x)
14 | #define LID (threadIdx.x % 32)
15 | #define WID (threadIdx.x / 32)
16 | #define MIN(x, y) ((x < y) ? x : y)
17 | #define MAX(x, y) ((x > y) ? x : y)
18 | #define P printf("%d\n", __LINE__)
19 | #define HERR(ans)                                                              \
20 |   { gpuAssert((ans), __FILE__, __LINE__); }
21 | inline void gpuAssert(cudaError_t code, const char *file, int line,
22 |                       bool abort = true) {
23 |   if (code != cudaSuccess) {
24 |     fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
25 |             line);
26 |     if (abort)
27 |       exit(code);
28 |   }
29 | }
30 | __device__ void active_size(int n);
31 | 
32 | 
33 | // __device__ char char_atomicCAS(char *addr, char cmp, char val) {
34 | //   unsigned *al_addr = reinterpret_cast<unsigned *>(((unsigned long long)addr)
35 | //   &
36 | //                                                    (0xFFFFFFFFFFFFFFFCULL));
37 | //   unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8;
38 | //   unsigned mask = 0xFFU;
39 | //   mask <<= al_offset;
40 | //   mask = ~mask;
41 | //   unsigned sval = val;
42 | //   sval <<= al_offset;
43 | //   unsigned old = *al_addr, assumed, setval;
44 | //   do {
45 | //     assumed = old;
46 | //     setval = assumed & mask;
47 | //     setval |= sval;
48 | //     old = atomicCAS(al_addr, assumed, setval);
49 | //   } while (assumed != old);
50 | //   return (char)((assumed >> al_offset) & 0xFFU);
51 | // }
52 | 
53 | // template <typename T>
54 | // __inline__ __device__ T warpPrefixSum(T val, int lane_id) {
55 | //   T val_shuffled;
56 | //   for (int offset = 1; offset < warpSize; offset *= 2) {
57 | //     val_shuffled = __shfl_up(val, offset);
58 | //     if (lane_id >= offset) {
59 | //       val += val_shuffled;
60 | //     }
61 | //   }
62 | //   return val;
63 | // }
64 | #define FULL_MASK 0xffffffff
65 | 
66 | template <typename T>
67 | __inline__ __device__ T warpReduce(T val, int lane_id) {
68 |   // T val_shuffled;
69 |   for (int offset = 16; offset > 0; offset /= 2)
70 |     val += __shfl_down_sync(FULL_MASK, val, offset);
71 |   return val;
72 | }
73 | 
74 | template <typename T> void printH(T *ptr, int size);
75 | __device__ void printD(float *ptr, int size);
76 | __device__ void printD(int *ptr, int size);
77 | // template <typename T> __global__ void init_range_d(T *ptr, size_t size);
78 | // template <typename T> void init_range(T *ptr, size_t size);
79 | // template <typename T> __global__ void init_array_d(T *ptr, size_t size, T v);
80 | // template <typename T> void init_array(T *ptr, size_t size, T v);
81 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "files.associations": {
  3 |         "*.cu": "cpp",
  4 |         "*.cuh": "cpp",
  5 |         "*.tcu": "cpp",
  6 |         "array": "cpp",
  7 |         "string": "cpp",
  8 |         "string_view": "cpp",
  9 |         "*.tcc": "cpp",
 10 |         "atomic": "cpp",
 11 |         "bitset": "cpp",
 12 |         "initializer_list": "cpp",
 13 |         "utility": "cpp",
 14 |         "memory_resource": "cpp",
 15 |         "chrono": "cpp",
 16 |         "algorithm": "cpp",
 17 |         "cctype": "cpp",
 18 |         "clocale": "cpp",
 19 |         "cmath": "cpp",
 20 |         "cstdarg": "cpp",
 21 |         "cstddef": "cpp",
 22 |         "cstdio": "cpp",
 23 |         "cstdlib": "cpp",
 24 |         "cstring": "cpp",
 25 |         "ctime": "cpp",
 26 |         "cwchar": "cpp",
 27 |         "cwctype": "cpp",
 28 |         "strstream": "cpp",
 29 |         "complex": "cpp",
 30 |         "cstdint": "cpp",
 31 |         "deque": "cpp",
 32 |         "list": "cpp",
 33 |         "unordered_map": "cpp",
 34 |         "vector": "cpp",
 35 |         "exception": "cpp",
 36 |         "functional": "cpp",
 37 |         "optional": "cpp",
 38 |         "ratio": "cpp",
 39 |         "system_error": "cpp",
 40 |         "tuple": "cpp",
 41 |         "type_traits": "cpp",
 42 |         "fstream": "cpp",
 43 |         "iomanip": "cpp",
 44 |         "iosfwd": "cpp",
 45 |         "iostream": "cpp",
 46 |         "istream": "cpp",
 47 |         "limits": "cpp",
 48 |         "memory": "cpp",
 49 |         "new": "cpp",
 50 |         "ostream": "cpp",
 51 |         "numeric": "cpp",
 52 |         "sstream": "cpp",
 53 |         "stdexcept": "cpp",
 54 |         "streambuf": "cpp",
 55 |         "thread": "cpp",
 56 |         "cfenv": "cpp",
 57 |         "cinttypes": "cpp",
 58 |         "typeindex": "cpp",
 59 |         "typeinfo": "cpp",
 60 |         "iterator": "cpp",
 61 |         "map": "cpp",
 62 |         "random": "cpp",
 63 |         "set": "cpp",
 64 |         "forward_list": "cpp",
 65 |         "unordered_set": "cpp",
 66 |         "cerrno": "cpp",
 67 |         "condition_variable": "cpp",
 68 |         "mutex": "cpp",
 69 |         "regex": "cpp",
 70 |         "codecvt": "cpp",
 71 |         "filesystem": "cpp",
 72 |         "valarray": "cpp",
 73 |         "variant": "cpp",
 74 |         "__nullptr": "cpp",
 75 |         "ios": "cpp",
 76 |         "locale": "cpp",
 77 |         "__locale": "cpp",
 78 |         "__config": "cpp",
 79 |         "__string": "cpp",
 80 |         "cassert": "cpp",
 81 |         "ccomplex": "cpp",
 82 |         "cfloat": "cpp",
 83 |         "ciso646": "cpp",
 84 |         "climits": "cpp",
 85 |         "queue": "cpp",
 86 |         "stack": "cpp",
 87 |         "cstdbool": "cpp",
 88 |         "__threading_support": "cpp",
 89 |         "__hash_table": "cpp",
 90 |         "__split_buffer": "cpp",
 91 |         "__tree": "cpp",
 92 |         "*.ipp": "cpp",
 93 |         "future": "cpp",
 94 |         "slist": "cpp",
 95 |         "__bit_reference": "cpp",
 96 |         "cross_module_gil_utils.cpp": "cuda-cpp",
 97 |         "charconv": "cpp",
 98 |         "*.inc": "cpp"
 99 |     }
100 | }


--------------------------------------------------------------------------------
/figs/unbiased.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash  -x
 2 | ###
 3 |  # @Description: 
 4 |  # @Date: 2020-11-17 13:39:45
 5 |  # @LastEditors: Pengyu Wang
 6 |  # @LastEditTime: 2021-01-17 21:38:38
 7 |  # @FilePath: /skywalker/figs/unbiased.sh
 8 | ### 
 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
13 | 
14 | # DATA=( sk-2005 friendster) 
15 | # HD=(   4  1 )
16 | ITR=1
17 | NG=4 #8
18 | 
19 | GR=".w.gr"
20 | EXE="./bin/main" #main_degree
21 | SG="--ngpu=1 --s"
22 | RW="--rw=1 --k 1 --d 100 "
23 | SP="--rw=0 --k 20 --d 2 "
24 | BATCH="--n=40000"
25 | OUT='>> unbiased.csv'
26 | 
27 | # --randomweight=1 --weightrange=2 
28 | 
29 | 
30 | # echo "-------------------------------------------------------unbias rw 100 full" >> unbiased.csv
31 | # for idx in $(seq 1 ${#DATA[*]}) 
32 | # do
33 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}   ${SG} ${RW} -full >> unbiased.csv
34 | # done
35 | # walker
36 | # echo "-------------------------------------------------------unbias rw 100" >> unbiased.csv
37 | # for idx in $(seq 1 ${#DATA[*]}) 
38 | # do
39 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 ${RW} ${BATCH} >> unbiased.csv
40 | # done
41 | 
42 | # echo "-------------------------------------------------------unbias ppr 100" >> unbiased.csv
43 | # for idx in $(seq 1 ${#DATA[*]}) 
44 | # do
45 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1  --tp=0.15 ${RW} ${BATCH} >> unbiased.csv
46 | # done
47 | 
48 | # echo "-------------------------------------------------------unbias node2vec" >> unbiased.csv
49 | # for idx in $(seq 1 ${#DATA[*]}) 
50 | # do
51 | #     ./bin/main --bias=0 --ol=0 --buffer --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 --node2vec ${BATCH} >> unbiased.csv
52 | # done
53 | # echo "-------------------------------------------------------unbias node2vec full" >> unbiased.csv
54 | # for idx in $(seq 1 ${#DATA[*]}) 
55 | # do
56 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 --node2vec --full >> unbiased.csv
57 | # done
58 | echo "-------------------------------------------------------unbias sage 40k" >> unbiased.csv
59 | for idx in $(seq 1 ${#DATA[*]}) 
60 | do
61 |     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 --sage ${BATCH}  >> unbiased.csv
62 | done
63 | # echo "-------------------------------------------------------unbias sage" >> unbiased.csv
64 | # for idx in $(seq 1 ${#DATA[*]}) 
65 | # do
66 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 --sage ${BATCH}  >> unbiased.csv
67 | # done
68 | 
69 | # echo "-------------------------------------------------------unbias sp" >> unbiased.csv
70 | # for idx in $(seq 1 ${#DATA[*]}) 
71 | # do
72 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 ${SP} ${BATCH} --m=2 >> unbiased2.csv
73 | # done
74 | 
75 | # echo "-------------------------------------------------------unbias sp" >> unbiased.csv
76 | # for idx in $(seq 1 ${#DATA[*]}) 
77 | # do
78 | #     ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu 1 ${SP} ${BATCH} --m=3>> unbiased3.csv
79 | # done
80 | 


--------------------------------------------------------------------------------
/old/shmem/vec.cuh:
--------------------------------------------------------------------------------
  1 | #include "util.cuh"
  2 | 
  3 | #define SHMEM_SIZE 49152
  4 | 
  5 | #define BLOCK_SIZE 1024
  6 | #define WARP_PER_SM (BLOCK_SIZE / 32)
  7 | 
  8 | #define SHMEM_PER_WARP (SHMEM_SIZE / WARP_PER_SM)
  9 | 
 10 | #define TMP_PER_ELE (4 + 4 + 4 + 4 + 1)
 11 | 
 12 | // alignment
 13 | #define ELE_PER_WARP (SHMEM_PER_WARP / TMP_PER_ELE - 8)
 14 | 
 15 | template <typename T> class Vector_itf {
 16 | public:
 17 |   Vector_itf() {}
 18 |   ~Vector_itf() {}
 19 |   virtual void init() {}
 20 |   virtual void add() {}
 21 |   virtual void clean() {}
 22 |   virtual bool empty() {}
 23 |   virtual size_t size() {}
 24 |   virtual T &operator[](int id) {}
 25 | };
 26 | 
 27 | template <typename T> struct buf { T data[ELE_PER_WARP]; };
 28 | 
 29 | 
 30 | 
 31 | template <typename T> struct Vector_shmem {
 32 |   u64 size = 0;
 33 |   u64 capacity = ELE_PER_WARP;
 34 |   T data[ELE_PER_WARP];
 35 | 
 36 |   __device__ void Init(size_t s = 0) {
 37 |     if (LID == 0) {
 38 |       capacity = ELE_PER_WARP;
 39 |       size = s;
 40 |     }
 41 |     for (size_t i = LID; i < capacity; i += 32) {
 42 |       data[i] = 0;
 43 |     }
 44 |   }
 45 |   __device__ u64 &Size() { return size; }
 46 |   __device__ void Add(T t) {
 47 |     u64 old = atomicAdd(&size, 1);
 48 |     if (old < capacity)
 49 |       data[old] = t;
 50 |     else
 51 |       printf("Vector_shmem overflow %llu\n", capacity);
 52 |   }
 53 |   __device__ void Clean() { size = 0; }
 54 |   __device__ bool Empty() {
 55 |     if (size == 0)
 56 |       return true;
 57 |     return false;
 58 |   }
 59 |   __device__ T &operator[](int id) { return data[id]; }
 60 | };
 61 | 
 62 | // template <typename T> __global__ void myMemsetKernel(T *ptr, size_t size){
 63 | //   for (size_t i = TID; i < size; i+=BLOCK_SIZE)
 64 | //   {
 65 | //     ptr[i]=
 66 | //   }
 67 | 
 68 | // }
 69 | 
 70 | // template <typename T> void myMemset(T *ptr, size_t size){
 71 | 
 72 | // }
 73 | 
 74 | template <typename T> class Vector {
 75 | public:
 76 |   u64 *size;
 77 |   u64 *capacity;
 78 |   T *data = nullptr;
 79 |   bool use_self_buffer = false;
 80 |   // T data[VECTOR_SHMEM_SIZE];
 81 | 
 82 |   __host__ Vector() {}
 83 |   __host__ void free() {
 84 |     if (use_self_buffer && data != nullptr)
 85 |       cudaFree(data);
 86 |   }
 87 |   __device__ __host__ ~Vector() {}
 88 |   __host__ void init(int _capacity) {
 89 |     cudaMallocManaged(&size, sizeof(u64));
 90 |     cudaMallocManaged(&capacity, sizeof(u64));
 91 |     *capacity = _capacity;
 92 |     *size = 0;
 93 |     // init_array(capacity,1,_capacity);
 94 |     // init_array(capacity,1,_capacity);
 95 |     cudaMalloc(&data, _capacity * sizeof(T));
 96 |     use_self_buffer = true;
 97 |   }
 98 |   __host__ __device__ u64 &Size() { return *size; }
 99 |   __device__ void add(T t) {
100 |     u64 old = atomicAdd(size, 1);
101 |     if (old < *capacity)
102 |       data[old] = t;
103 |     else
104 |       printf("wtf vector overflow");
105 |   }
106 |   __device__ void AddTillSize(T t, u64 target_size) {
107 |     u64 old = atomicAdd(size, 1);
108 |     if (old < *capacity) {
109 |       if (old < target_size)
110 |         data[old] = t;
111 |     } else
112 |       printf("wtf vector overflow");
113 |   }
114 |   __device__ void clean() { *size = 0; }
115 |   __device__ bool empty() {
116 |     if (*size == 0)
117 |       return true;
118 |     return false;
119 |   }
120 |   __device__ T &operator[](int id) { return data[id]; }
121 | };


--------------------------------------------------------------------------------
/figs/test_driver.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ONLINE=false
  4 | BIAS=false
  5 | FULL=false
  6 | STATIC=false
  7 | BUFFER=false
  8 | for i in "$@"
  9 | do
 10 | case $i in
 11 |     -app=*|--app=*)
 12 |     APP="${i#*=}"
 13 |     shift # past argument=value
 14 |     # ;;
 15 |     # -s=*|--searchpath=*)
 16 |     # SEARCHPATH="${i#*=}"
 17 |     # shift # past argument=value
 18 |     # ;;
 19 |     # -l=*|--lib=*)
 20 |     # LIBPATH="${i#*=}"
 21 |     # shift # past argument=value
 22 |     ;;
 23 |     -online)
 24 |     ONLINE=true
 25 |     shift # past argument with no value
 26 |     ;;
 27 |     -bias)
 28 |     BIAS=true
 29 |     shift # past argument with no value
 30 |     ;;
 31 |     -full)
 32 |     FULL=true
 33 |     shift # past argument with no value
 34 |     ;;
 35 |     -static)
 36 |     STATIC=true
 37 |     shift # past argument with no value
 38 |     ;;
 39 |     -buffer)
 40 |     BUFFER=true
 41 |     shift # past argument with no value
 42 |     ;;
 43 |     *)
 44 |           # unknown option
 45 |     ;;
 46 | esac
 47 | done
 48 | # echo "APP  = ${APP}"
 49 | # echo "SEARCH PATH     = ${SEARCHPATH}"
 50 | # echo "LIBRARY PATH    = ${LIBPATH}"
 51 | # echo "DEFAULT         = ${DEFAULT}"
 52 | # echo "Number files in SEARCH PATH with EXTENSION:" $(ls -1 "${SEARCHPATH}"/*."${EXTENSION}" | wc -l)
 53 | # if [[ -n $1 ]]; then
 54 | #     echo "Last line of file specified as non-opt/last argument:"
 55 | #     tail -1 $1
 56 | # fi
 57 | echo ${BIN}
 58 | BIN="./bin/main"
 59 | if [ ${APP} = "node2vec" ] 
 60 | then
 61 |     if ${BIAS} 
 62 |     then
 63 |         BIN="./bin/node2vec"
 64 |     fi
 65 | fi
 66 | # echo ${BIN}
 67 | # if [  ${APP}="node2vec" ] && ${BIAS}  ; then
 68 | #     BIN="./bin/node2vec"
 69 | # else
 70 | #     BIN="./bin/main"
 71 | # fi
 72 | 
 73 | if ${BIAS} ; then
 74 |     BIN=${BIN}" -bias=1 "
 75 | else
 76 |     BIN=${BIN}" -bias=0 "
 77 | fi
 78 | 
 79 | if ${ONLINE} ; then
 80 |     BIN=${BIN}" -ol=1 "
 81 | else
 82 |     BIN=${BIN}" -ol=0 "
 83 | fi
 84 | 
 85 | if  ${FULL} ; then
 86 |     BIN=${BIN}" --full "
 87 | else
 88 |     BIN=${BIN}" --n 40000 "
 89 | fi
 90 | if  ${STATIC} ; then
 91 |     BIN=${BIN}" --static=1 "
 92 | else
 93 |     BIN=${BIN}" --static=0 "
 94 | fi
 95 | if  ${BUFFER} ; then
 96 |     BIN=${BIN}" --buffer=1 "
 97 | else
 98 |     BIN=${BIN}" --buffer=0 "
 99 | fi
100 | 
101 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
102 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
103 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
104 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
105 | 
106 | # DATA=( sk-2005 friendster) 
107 | # HD=(   4  1 )
108 | ITR=1
109 | NG=4 #8
110 | 
111 | GR=".w.gr"
112 | EXE="./bin/main" #main_degree
113 | SG="--ngpu=1 --s"
114 | RW="--deepwalk "
115 | SP="--sage "
116 | # BATCH="--n 40000 "
117 | LOG_FILE="offline.csv"
118 | 
119 | # echo "-------------------------------------------------------offline rw 100  ${BATCH}" >> ${LOG_FILE}
120 | echo "-------------------------------------------------------${APP} ${BIN} BIAS=${BIAS} ONLINE=${ONLINE} FULL=${FULL}------------"  
121 | for idx in $(seq 1 ${#DATA[*]}) 
122 | do
123 |     for i in $(seq 1  ${ITR})
124 |     do
125 |         ${BIN} ${SG} --${APP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} # >> ${LOG_FILE} 
126 |     done
127 | done


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | project(Skywalker  CXX )
  2 | cmake_minimum_required ( VERSION 3.17 )
  3 | 
  4 | set ( CUDA_ENABLE true )
  5 | if ( CUDA_ENABLE )
  6 |     enable_language( CUDA )
  7 | 	list ( APPEND PRJ_COMPILE_OPTIONS -Xcompiler )
  8 | endif()
  9 | 
 10 | find_package ( OpenMP )
 11 | if ( OpenMP_FOUND )
 12 | 	list ( APPEND PRJ_COMPILE_DEF ENABLE_OPENMP )
 13 | 	list ( APPEND PRJ_LIBRARIES ${OpenMP_CXX_LIBRARIES} )
 14 | 	list ( APPEND PRJ_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS} )
 15 | endif ()
 16 | 
 17 | # set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 18 | # find_package ( Numa  REQUIRED)
 19 | 
 20 | add_subdirectory(deps/gflags)
 21 | include_directories(${PROJECT_BINARY_DIR}/deps/gflags/include)
 22 | link_directories(${PROJECT_BINARY_DIR}/deps/gflags/lib)
 23 | 
 24 | ############## BUILD ##############   
 25 | set(EXTRA_LIBS gflags)
 26 | 
 27 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}  -rdc=true  -res-usage -lnuma -fopenmp  -Xptxas -v ")
 28 | if (CMAKE_BUILD_TYPE STREQUAL "Debug")
 29 |   message("Debug mode")
 30 |   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g -G -O0 -Xcompiler   -ggdb  -lnvrt -Woverloaded-virtual")
 31 | else()
 32 |   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo -O3 -Xcompiler  -DNDEBUG ")
 33 | endif()
 34 | 
 35 | # -gencode arch=compute_75,code=sm_75 -std=c++11
 36 | 
 37 | #SET(CUDA_SEPARABLE_COMPILATION ON)
 38 | #set(CUDA_VERBOSE_BUILD ON)
 39 | 
 40 | include_directories(include)
 41 | aux_source_directory(src DIR_SRCS)
 42 | # aux_source_directory(src/util DIR_SRCS)
 43 | 
 44 | if(NOT DEFINED CMAKE_CUDA_STANDARD)
 45 |     set(CMAKE_CUDA_STANDARD 11)
 46 |     set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 47 | endif()
 48 | 
 49 | if (NOT DEFINED CUDA_SEPARABLE_COMPILATION)
 50 |   set(CUDA_SEPARABLE_COMPILATION ON)
 51 | endif(NOT DEFINED CUDA_SEPARABLE_COMPILATION)
 52 | 
 53 | 
 54 | add_executable(skywalker
 55 |     ${DIR_SRCS}
 56 |     src/api/bias_static.cu
 57 | )
 58 | 
 59 | 
 60 | target_link_libraries(skywalker 
 61 |     ${EXTRA_LIBS} 
 62 |     ${CUDA_LIBRARIES}
 63 | )
 64 | if(OpenMP_CXX_FOUND)
 65 |     target_link_libraries(skywalker OpenMP::OpenMP_CXX)
 66 | endif()
 67 | set_target_properties(skywalker PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
 68 | set_target_properties(skywalker PROPERTIES COMPILE_DEFINITIONS  LOCALITY)
 69 | 
 70 | add_executable(skywalker_noloc
 71 |     ${DIR_SRCS}
 72 |     src/api/bias_static.cu
 73 | )
 74 | target_link_libraries(skywalker_noloc 
 75 |     ${EXTRA_LIBS} 
 76 |     ${CUDA_LIBRARIES}
 77 | )
 78 | if(OpenMP_CXX_FOUND)
 79 |     target_link_libraries(skywalker_noloc OpenMP::OpenMP_CXX)
 80 | endif()
 81 | set_target_properties(skywalker_noloc PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
 82 | 
 83 | 
 84 | add_executable(skywalker_node2vec
 85 |     ${DIR_SRCS}
 86 |     src/api/bias_node2vec.cu
 87 | )
 88 | target_link_libraries(skywalker_node2vec 
 89 |     ${EXTRA_LIBS} 
 90 |     ${CUDA_LIBRARIES}
 91 | )
 92 | if(OpenMP_CXX_FOUND)
 93 |     target_link_libraries(skywalker_node2vec OpenMP::OpenMP_CXX)
 94 | endif()
 95 | set_target_properties(skywalker_node2vec PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
 96 | set_target_properties(skywalker_node2vec PROPERTIES COMPILE_DEFINITIONS  LOCALITY)
 97 | 
 98 | set_property(TARGET skywalker PROPERTY CUDA_ARCHITECTURES 75)
 99 | set_property(TARGET skywalker skywalker_noloc CUDA_ARCHITECTURES 75)
100 | set_property(TARGET skywalker_node2vec PROPERTY CUDA_ARCHITECTURES 75)
101 | 
102 | # add_executable(skywalker_degree
103 | #     ${DIR_SRCS}
104 | #     src/api/bias_degree.cu
105 | # )
106 | # target_link_libraries(skywalker_degree 
107 | #     ${EXTRA_LIBS} 
108 | #     ${CUDA_LIBRARIES}
109 | # )
110 | # if(OpenMP_CXX_FOUND)
111 | #     target_link_libraries(skywalker_degree OpenMP::OpenMP_CXX)
112 | # endif()
113 | # set_target_properties(skywalker_degree PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/old/alias.cu:
--------------------------------------------------------------------------------
  1 | #include "alias.cuh"
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <unistd.h>
  5 | 
  6 | template <typename T> __global__ void init_array_d(T *ptr, size_t size, T v) {
  7 |   if (TID < size) {
  8 |     ptr[TID] = v;
  9 |   }
 10 | }
 11 | template <typename T> void init_array(T *ptr, size_t size, T v) {
 12 |   init_array_d<T><<<size / 512 + 1, 512>>>(ptr, size, v);
 13 | }
 14 | template <typename T> __global__ void init_range_d(T *ptr, size_t size) {
 15 |   if (TID < size) {
 16 |     ptr[TID] = TID;
 17 |   }
 18 | }
 19 | template <typename T> void init_range(T *ptr, size_t size) {
 20 |   init_range_d<T><<<size / 512 + 1, 512>>>(ptr, size);
 21 | }
 22 | 
 23 | template <typename T>
 24 | __global__ void init(alias_table<T> *table, T *buf1, T *buf2, T *buf3,
 25 |                      float *buf4, float *buf5, char *buf6, T *buf7, int size,
 26 |                      int size2) {
 27 |   if (TID == 0) {
 28 |     printf("init\n");
 29 |     table->init_buffer(buf1, buf2, buf3, buf4, buf5, buf6, buf7, size, size2);
 30 |   }
 31 | }
 32 | template <typename T>
 33 | __global__ void load(alias_table<T> *table, T *buf1, float *weight, int size) {
 34 |   // if (TID == 0) {
 35 |   //   for (int i = 0; i < size; i++) {
 36 |   //     printf("%f\t", weight[i]);
 37 |   //   }
 38 |   //   printf("\n");
 39 |   // }
 40 |   if (TID == 0) {
 41 |     printf("load\n");
 42 |   }
 43 |   table->load(buf1, weight, size);
 44 | }
 45 | template <typename T> __global__ void kernel(alias_table<T> *table) {
 46 |   if (TID == 0) {
 47 |     printf("kernel\n");
 48 |   }
 49 |   table->normalize();
 50 |   if (TID == 0) {
 51 |     printf("construct\n");
 52 |   }
 53 |   table->construct();
 54 | }
 55 | template <typename T> __global__ void roll(alias_table<T> *table, size_t num) {
 56 |   if (TID == 0) {
 57 |     printf("roll\n");
 58 |   }
 59 |   // curandState state;
 60 |   // curand_init(0, TID, 0, &state);
 61 |   table->roll(&table->result,  num);
 62 | }
 63 | 
 64 | // todo
 65 | /*
 66 | 1. prefix sum to normalize
 67 | 2.
 68 | */
 69 | int main(int argc, char const *argv[]) {
 70 |   int *buf1, *buf2, *buf3;
 71 |   float *buf4, *buf5;
 72 |   char *buf6;
 73 |   int *buf7;
 74 |   int size = 4000000;
 75 | 
 76 |   cudaSetDevice(1);
 77 |   cudaMalloc(&buf1, size * sizeof(int));
 78 |   cudaMalloc(&buf2, size * sizeof(int));
 79 |   cudaMalloc(&buf3, size * sizeof(int));
 80 |   cudaMalloc(&buf4, size * sizeof(float));
 81 |   cudaMalloc(&buf5, size * sizeof(float));
 82 |   cudaMalloc(&buf6, size * sizeof(char));
 83 |   cudaMalloc(&buf7, size / 2 * sizeof(int));
 84 | 
 85 |   cudaMemset(buf6, size * sizeof(char), 0);
 86 | 
 87 |   int *id_ptr;
 88 |   float *weight_ptr;
 89 |   cudaMalloc(&id_ptr, size * sizeof(int));
 90 |   cudaMalloc(&weight_ptr, size * sizeof(float));
 91 |   init_range<int>(id_ptr, size);
 92 |   init_array<float>(weight_ptr, size / 8 * 7, 0.5);
 93 |   init_array<float>(weight_ptr + size / 8 * 7, size - size / 8 * 7, 2.0);
 94 | 
 95 |   // printH(weight_ptr, size);
 96 | 
 97 |   P;
 98 |   alias_table<int> *table_ptr;
 99 |   alias_table<int> table_h;
100 | 
101 |   P;
102 |   cudaMalloc(&table_ptr, 1 * sizeof(alias_table<int>));
103 |   cudaMemcpy(table_ptr, &table_h, 1 * sizeof(alias_table<int>),
104 |              cudaMemcpyHostToDevice);
105 |   P;
106 |   init<int><<<1, 32, 0, 0>>>(table_ptr, buf1, buf2, buf3, buf4, buf5, buf6,
107 |                              buf7, size, size / 2);
108 |   // table_ptr->init( buf1, buf2, buf3, buf4, buf5, size);
109 |   // table_h.init( buf1, buf2, buf3, buf4, buf5, size);
110 |   HERR(cudaPeekAtLastError());
111 |   P;
112 |   load<int><<<1, 32, 0, 0>>>(table_ptr, id_ptr, weight_ptr, size);
113 |   HERR(cudaPeekAtLastError());
114 |   P;
115 |   kernel<int><<<1, 32, 0, 0>>>(table_ptr);
116 |   P;
117 |   roll<int><<<1, 32, 0, 0>>>(table_ptr, size / 2);
118 |   P;
119 |   usleep(5000);
120 |   HERR(cudaDeviceSynchronize());
121 |   HERR(cudaPeekAtLastError());
122 |   return 0;
123 | }
124 | 


--------------------------------------------------------------------------------
/result/knightking.sh:
--------------------------------------------------------------------------------
 1 | ###
 2 |  # @Description: 
 3 |  # @Date: 2020-11-25 16:50:34
 4 |  # @LastEditors: PengyuWang
 5 |  # @LastEditTime: 2021-01-11 16:30:59
 6 |  # @FilePath: /skywalker/result/knightking.sh
 7 | ### 
 8 | DATA=( web-Google    lj    orkut      uk-2005 arabic-2005   sk-2005 friendster) #  twitter-2010 uk-union rmat29 web-ClueWeb09)  
 9 | NV=(916428          4847571 3072627  39459923   22744077     50636151 124836180) #41652230
10 | # cd ../KnightKing/build
11 | # DATA=(   lj        ) #  twitter-2010 uk-union rmat29 web-ClueWeb09)  
12 | # NV=(  4847571         ) #41652230
13 | # DATA=(  web-Google  orkut      arabic-2005   ) #  twitter-2010 uk-union rmat29 web-ClueWeb09)  
14 | # NV=(916428  3072627     22744077     ) #41652230
15 | 
16 | # echo "----------------------unbiased 4k degree-------------------"
17 | # for idx in $(seq 1 ${#DATA[*]}) 
18 | # do
19 | #     echo ${DATA[idx-1]}
20 | #     ~/sampling/KnightKing/build/bin/deepwalk  -w 40000  -l 100 -s unweighted -g ~/data/${DATA[idx-1]}.uw.data -v ${NV[idx-1]}
21 | # done
22 | # echo "----------------------unbiased node2vec-------------------"
23 | # for idx in $(seq 1 ${#DATA[*]}) 
24 | # do
25 | #     echo ${DATA[idx-1]}
26 | #     ~/sampling/KnightKing/build/bin/node2vec  -w 40000  -l 100 -s unweighted  -p 2.0 -q 0.5 -g ~/data/${DATA[idx-1]}.uw.data -v ${NV[idx-1]} 
27 | # done
28 | 
29 | # echo "----------------------ppr unbiased 40k 64-------------------"
30 | # for idx in $(seq 1 ${#DATA[*]}) 
31 | # do
32 | #     echo ${DATA[idx-1]}
33 | #     ~/sampling/KnightKing/build/bin/ppr  -w 40000  -s unweighted  -t 0.15 -v  ${NV[idx-1]} -g ~/data/${DATA[idx-1]}.uw.data 
34 | # done
35 | 
36 | 
37 | # echo "----------------------simple_walk 4k-------------------"
38 | # for idx in $(seq 1 ${#DATA[*]}) 
39 | # do
40 | #     echo ${DATA[idx-1]}
41 | #     ~/sampling/KnightKing/build/bin/simple_walk -g ~/sampling/KnightKing/build/${DATA[idx-1]}.data -v ${NV[idx-1]} -w 4000  -l 100
42 | # done
43 | 
44 | # echo "----------------------online 40k-------------------"
45 | # echo "----------------------biased_walk 4k degree-------------------"
46 | # for idx in $(seq 1 ${#DATA[*]}) 
47 | # do
48 | #     echo ${DATA[idx-1]}
49 | #     ~/sampling/KnightKing/build/bin/biased_walk  -w 40000  -l 100 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]}
50 | # done
51 | # echo "----------------------ppr biased 4k 64-------------------"
52 | # for idx in $(seq 1 ${#DATA[*]}) 
53 | # do
54 | #     echo ${DATA[idx-1]}
55 | #     ~/sampling/KnightKing/build/bin/ppr  -s weighted  -t 0.15  -w 40000  -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]}
56 | # done
57 | # echo "----------------------node2vec-------------------"
58 | # for idx in $(seq 1 ${#DATA[*]}) 
59 | # do
60 | #     echo ${DATA[idx-1]}
61 | #     ~/sampling/KnightKing/build/bin/node2vec  -w 40000  -l 100 -s weighted  -p 2.0 -q 0.5 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 
62 | # done
63 | 
64 | # echo "----------------------biased_walk 40k degree-------------------"
65 | # for idx in $(seq 1 ${#DATA[*]}) 
66 | # do
67 | #     echo ${DATA[idx-1]}
68 | #     ~/sampling/KnightKing/build/bin/biased_walk  -w 40000  -l 100 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]}
69 | # done
70 | 
71 | 
72 | 
73 | # echo "----------------------ppr biased 40k 64-------------------"
74 | # for idx in $(seq 1 ${#DATA[*]}) 
75 | # do
76 | #     echo ${DATA[idx-1]}
77 | #     ~/sampling/KnightKing/build/bin/ppr  -s weighted  -t 0.15  -w 40000  -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]}
78 | # done
79 | 
80 | 
81 | # echo "----------------------biased_walk 40k degree-------------------"
82 | # for idx in $(seq 1 ${#DATA[*]}) 
83 | # do
84 | #     echo ${DATA[idx-1]}
85 | #     ~/sampling/KnightKing/build/bin/biased_walk -w 40000 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]}   -l 100
86 | # done
87 | # -w ${NV[idx-1]}
88 | 
89 | echo "----------------------biased node2vec-------------------"
90 | for idx in $(seq 1 ${#DATA[*]}) 
91 | do
92 |     echo ${DATA[idx-1]}
93 |     ~/sampling/KnightKing/build/bin/node2vec  -w 40000  -l 100 -s weighted  -p 2.0 -q 0.5 -g ~/data/${DATA[idx-1]}.data -v ${NV[idx-1]} 
94 | done
95 | 


--------------------------------------------------------------------------------
/figs/v100.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash 
  2 | ###
  3 |  # @Description: 
  4 |  # @Date: 2020-11-17 13:39:45
  5 |  # @LastEditors: Pengyu Wang
  6 |  # @LastEditTime: 2021-01-15 16:45:05
  7 |  # @FilePath: /skywalker/figs/scale2.sh
  8 | ### 
  9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 10 | HD=(0.25          0.5  1     0.25        0.25      0.7           1) # uk-union rmat29 web-ClueWeb09)
 11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
 12 | MEM=(--umgraph  --umgraph --umgraph   --umgraph  --umgraph     --umgraph    --umgraph)
 13 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 14 | 
 15 | # DATA=( sk-2005 friendster) 
 16 | # HD=(   4  1 )
 17 | ITR=1
 18 | NG=8 #8
 19 | 
 20 | 
 21 | GR=".w.gr"
 22 | EXE="./bin/main" #main_degree
 23 | SG="--ngpu=1 --s"
 24 | RW="--rw=1 --k 1 --d 100 "
 25 | SP="--rw=0 --k 20 --d 2 "
 26 | BATCH="--n 40000"
 27 | 
 28 | # --randomweight=1 --weightrange=2 
 29 | 
 30 | echo "-------------------------------------------------------unbias sp scale" >> scale.csv
 31 | for idx in $(seq 1 ${#DATA[*]}) 
 32 | do
 33 |     for i in $(seq 1  ${ITR})
 34 |     do
 35 |         ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu=${NG} ${SP} ${BATCH} --m=1  >> scale.csv
 36 |     done
 37 | done
 38 | 
 39 | echo "-------------------------------------------------------unbias rw scale" >> scale.csv
 40 | for idx in $(seq 1 ${#DATA[*]}) 
 41 | do
 42 |     for i in $(seq 1  ${ITR})
 43 |     do
 44 |         ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu=${NG} ${RW} ${BATCH} --m=1  >> scale.csv
 45 |     done
 46 | done
 47 | 
 48 | 
 49 | # echo "-------------------------------------------------------offline rw 100" >> scale.csv
 50 | # for idx in $(seq 1 ${#DATA[*]}) 
 51 | # do
 52 | #     for i in $(seq 1  ${ITR})
 53 | #     do
 54 | #         ./bin/main -bias=1 --ol=0 --ngpu=${NG}  ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> scale.csv
 55 | #     done
 56 | # done
 57 | 
 58 | # echo "-------------------------------------------------------offline ppr 0.15" >> scale.csv
 59 | # for idx in $(seq 1 ${#DATA[*]}) 
 60 | # do
 61 | #     for i in $(seq 1  ${ITR})
 62 | #     do
 63 | #         ./bin/main  -bias=1 --ol=0 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG}  >> scale.csv
 64 | #     done
 65 | # done
 66 | 
 67 | 
 68 | # echo "-------------------------------------------------------offline sp 100" >> scale.csv
 69 | # for idx in $(seq 1 ${#DATA[*]}) 
 70 | # do
 71 | #     for i in $(seq 1  ${ITR})
 72 | #     do
 73 | #         ./bin/main -bias=1 --ol=0 --ngpu=${NG}  ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> scale.csv
 74 | #     done
 75 | # done
 76 | 
 77 | # echo "-------------------------------------------------------online rw 100" >> scale.csv
 78 | # for idx in $(seq 1 ${#DATA[*]}) 
 79 | # do
 80 | #     for i in $(seq 1  ${ITR})
 81 | #     do
 82 | #         ./bin/main -bias=1 --ol=1  ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG} >> scale.csv
 83 | #     done
 84 | # done
 85 | 
 86 | # echo "-------------------------------------------------------online ppr 0.15" >> scale.csv
 87 | # for idx in $(seq 1 ${#DATA[*]}) 
 88 | # do
 89 | #     for i in $(seq 1  ${ITR})
 90 | #     do
 91 | #         ./bin/main  -bias=1 --ol=1 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH}  --ngpu=${NG} >> scale.csv
 92 | #     done
 93 | # done
 94 | 
 95 | # echo "-------------------------------------------------------online ppr 0.15" >> scale.csv
 96 | # for idx in $(seq 1 ${#DATA[*]}) 
 97 | # do
 98 | #     for i in $(seq 1  ${ITR})
 99 | #     do
100 | #         ./bin/node2vec  -node2vec --n=40000 ${RW}  --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH}  --ngpu=${NG}>> scale.csv
101 | #     done
102 | # done
103 | 
104 | # echo "-------------------------------------------------------online sp 100" >> scale.csv
105 | # for idx in $(seq 1 ${#DATA[*]}) 
106 | # do
107 | #     for i in $(seq 1  ${ITR})
108 | #     do
109 | #         ./bin/main -bias=1 --ol=1  ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=${NG}>> scale.csv
110 | #     done
111 | # done
112 | 
113 | 


--------------------------------------------------------------------------------
/scripts/multiple-gpu.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | ###
  3 |  # @Description: 
  4 |  # @Date: 2020-11-17 13:39:45
  5 |  # @LastEditors: Pengyu Wang
  6 |  # @LastEditTime: 2021-01-15 16:45:34
  7 |  # @FilePath: /skywalker/figs/scale.sh
  8 | ### 
  9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
 11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
 12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 13 | 
 14 | # DATA=( sk-2005 friendster) 
 15 | # HD=(   0.5           1 )
 16 | ITR=1
 17 | NG=4 #8
 18 | 
 19 | 
 20 | GR=".w.gr"
 21 | EXE="./bin/main" #main_degree
 22 | SG="--ngpu=1 --s"
 23 | RW="--rw=1 --k 1 --d 100 "
 24 | SP="--rw=0 --k 20 --d 2 "
 25 | BATCH="--n 40000"
 26 | 
 27 | # BATCH="--n 4"
 28 | 
 29 | # --randomweight=1 --weightrange=2 
 30 | 
 31 | # echo "-------------------------------------------------------unbias sp scale" >> multiple-gpu.csv
 32 | # for idx in $(seq 1 ${#DATA[*]}) 
 33 | # do
 34 | #     for i in $(seq 1  ${NG})
 35 | #     do
 36 | #         ./bin/main --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu=$i ${SP} ${BATCH} --m=1  >> multiple-gpu.csv
 37 | #     done
 38 | # done
 39 | 
 40 | echo "-------------------------------------------------------table" >> multiple-gpu.csv
 41 | for idx in $(seq 1 ${#DATA[*]}) 
 42 | do
 43 |     for i in $(seq 1  ${NG})
 44 |     do
 45 |         ./bin/main -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n=0 >> multiple-gpu.csv
 46 |     done
 47 | done
 48 | 
 49 | # echo "-------------------------------------------------------offline rw 100" >> multiple-gpu.csv
 50 | # for idx in $(seq 1 ${#DATA[*]}) 
 51 | # do
 52 | #     for i in $(seq 1  ${NG})
 53 | #     do
 54 | #         ./bin/main -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv
 55 | #     done
 56 | # done
 57 | 
 58 | # echo "-------------------------------------------------------offline ppr 0.15" >> multiple-gpu.csv
 59 | # for idx in $(seq 1 ${#DATA[*]}) 
 60 | # do
 61 | #     for i in $(seq 1  ${NG})
 62 | #     do
 63 | #         ./bin/main  -bias=1 --ol=0 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=$i --s >> multiple-gpu.csv
 64 | #     done
 65 | # done
 66 | 
 67 | 
 68 | # echo "-------------------------------------------------------offline sp 100" >> multiple-gpu.csv
 69 | # for idx in $(seq 1 ${#DATA[*]}) 
 70 | # do
 71 | #     for i in $(seq 1  ${NG})
 72 | #     do
 73 | #         ./bin/main -bias=1 --ol=0 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv
 74 | #     done
 75 | # done
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | # echo "-------------------------------------------------------online rw 100" >> multiple-gpu.csv
 83 | # for idx in $(seq 1 ${#DATA[*]}) 
 84 | # do
 85 | #     for i in $(seq 1  ${NG})
 86 | #     do
 87 | #         ./bin/main -bias=1 --ol=1 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv
 88 | #     done
 89 | # done
 90 | 
 91 | # echo "-------------------------------------------------------online ppr 0.15" >> multiple-gpu.csv
 92 | # for idx in $(seq 1 ${#DATA[*]}) 
 93 | # do
 94 | #     for i in $(seq 1  ${NG})
 95 | #     do
 96 | #         ./bin/main  -bias=1 --ol=1 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=$i --s >> multiple-gpu.csv
 97 | #     done
 98 | # done
 99 | 
100 | # echo "-------------------------------------------------------online ppr 0.15" >> multiple-gpu.csv
101 | # for idx in $(seq 1 ${#DATA[*]}) 
102 | # do
103 | #     for i in $(seq 1  ${NG})
104 | #     do
105 | #         ./bin/node2vec  -node2vec --n=40000 ${RW}  --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} --ngpu=$i --s >> multiple-gpu.csv
106 | #     done
107 | # done
108 | 
109 | # echo "-------------------------------------------------------online sp 100" >> multiple-gpu.csv
110 | # for idx in $(seq 1 ${#DATA[*]}) 
111 | # do
112 | #     for i in $(seq 1  ${NG})
113 | #     do
114 | #         ./bin/main -bias=1 --ol=1 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> multiple-gpu.csv
115 | #     done
116 | # done
117 | 
118 | 


--------------------------------------------------------------------------------
/scripts/fig8_biased.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | ###
 3 | # @Description:
 4 | # @Date: 2020-11-17 13:39:45
 5 | # @LastEditors: Pengyu Wang
 6 | # @LastEditTime: 2021-01-15 15:49:20
 7 | ###
 8 | 
 9 | DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
10 | HD=(0.25 0.5 1 0.25 0.25 0.5 1)                                   # uk-union rmat29 web-ClueWeb09)
11 | NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180)
12 | #HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
13 | 
14 | # DATA=( sk-2005 friendster)
15 | # HD=(   4  1 )
16 | 
17 | ITR=1
18 | NG=4 #8
19 | GR=".w.gr"
20 | EXE="./bin/main" #main_degree
21 | SG="--ngpu=1 --s"
22 | RW="--deepwalk "
23 | SP="--sage "
24 | BATCH="--n 40000 -v"
25 | 
26 | ROOT_DIR=$PWD
27 | LOG_FILE=${ROOT_DIR}"/result/fig8_biased.csv"
28 | 
29 | # DATA_DIR="~/data"
30 | DATA_DIR=${ROOT_DIR}"/dataset"
31 | GraphWalker_DIR="/home/pywang/sampling/GraphWalker"
32 | KnightKing_DIR="/home/pywang/sampling/KnightKing"
33 | CSAW_DIR="/home/pywang/sampling/C-SAW"
34 | 
35 | 
36 | echo "-------------------------------------------------------Skywalker offline rw 100  ${BATCH}" >>"${LOG_FILE}"
37 | for idx in $(seq 1 ${#DATA[*]}); do
38 |     for i in $(seq 1 ${ITR}); do
39 |         ./bin/main -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} >>"${LOG_FILE}"
40 |     done
41 | done
42 | 
43 | echo "-------------------------------------------------------Skywalker offline ppr 0.15  ${BATCH}" >>"${LOG_FILE}"
44 | for idx in $(seq 1 ${#DATA[*]}); do
45 |     for i in $(seq 1 ${ITR}); do
46 |         ./bin/main -bias=1 --ol=0 ${RW} --tp=0.15 --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} ${SG} >>"${LOG_FILE}"
47 |     done
48 | done
49 | 
50 | echo "-------------------------------------------------------Skywalker node2vec  " >>"${LOG_FILE}"
51 | for idx in $(seq 1 ${#DATA[*]}); do
52 |     for i in $(seq 1 ${ITR}); do
53 |      #   ./bin/node2vec -node2vec ${RW} --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} ${SG} ${POLICY} >>"${LOG_FILE}"
54 |      ./bin/main --bias=1 --ol=0 --buffer --input ~/data/${DATA[idx - 1]}${GR} --ngpu 1 --node2vec ${BATCH}  >>"${LOG_FILE}"
55 |     done
56 | done
57 | #
58 | #echo "-------------------------------------------------------Skywalker offline sp sage ${BATCH}" >>"${LOG_FILE}"
59 | #for idx in $(seq 1 ${#DATA[*]}); do
60 | #    for i in $(seq 1 ${ITR}); do
61 | #        ./bin/main -bias=1 --ol=0 ${SG} --rw=0 --sage --input ~/data/${DATA[idx - 1]}${GR} --hd=${HD[idx - 1]} ${BATCH} >>"${LOG_FILE}"
62 | #    done
63 | #done
64 | #
65 | #echo "-------------------Runtime of C-SAW  need to be scale by 10 due to 4k as batch size. And scale by sampled edges ratio-------------------" >>"${LOG_FILE}"
66 | #
67 | #echo "----------------------C-SAW  biased walk 4k 64-------------------" >>"${LOG_FILE}"
68 | #for idx in $(seq 1 ${#DATA[*]}); do
69 | #    echo "------------"${DATA[idx - 1]}
70 | #    $CSAW_DIR/non-stream/sampling.bin wg ~/data/${DATA[idx - 1]}.w.edge_beg_pos.bin ~/data/${DATA[idx - 1]}.w.edge_csr.bin 100 32 4000 1 1 100 1 >>"${LOG_FILE}"
71 | #done
72 | #
73 | #echo "----------------------C-SAW  sampling biased 4k 20 2 64-------------------" >>"${LOG_FILE}"
74 | #for idx in $(seq 1 ${#DATA[*]}); do
75 | #    echo "------------"${DATA[idx - 1]}
76 | #    $CSAW_DIR/non-stream/sampling.bin wg ~/data/${DATA[idx - 1]}.w.edge_beg_pos.bin ~/data/${DATA[idx - 1]}.w.edge_csr.bin 100 32 4000 1 20 2 1 >>"${LOG_FILE}"
77 | #done
78 | #
79 | #echo "----------------------KnightKing biased_walk  -------------------" >>"${LOG_FILE}"
80 | #for idx in $(seq 1 ${#DATA[*]}); do
81 | #    echo "------------"${DATA[idx - 1]}
82 | #    $KnightKing_DIR/build/bin/biased_walk -w 40000 -g ~/data/${DATA[idx - 1]}.data -v ${NV[idx - 1]} -l 100 >>"${LOG_FILE}"
83 | #done
84 | #
85 | #echo "----------------------KnightKing biased node2vec-------------------" >>"${LOG_FILE}"
86 | #for idx in $(seq 1 ${#DATA[*]}); do
87 | #    echo "------------"${DATA[idx - 1]}
88 | #    $KnightKing_DIR/build/bin/node2vec -w 40000 -l 100 -s weighted -p 2.0 -q 0.5 -g ~/data/${DATA[idx - 1]}.data -v ${NV[idx - 1]} >>"${LOG_FILE}"
89 | #done
90 | #
91 | #echo "----------------------KnightKing ppr biased -------------------" >>"${LOG_FILE}"
92 | #for idx in $(seq 1 ${#DATA[*]}); do
93 | #    echo "------------"${DATA[idx - 1]}
94 | #    $KnightKing_DIR/build/bin/ppr -s weighted -t 0.15 -w 40000 -g ~/data/${DATA[idx - 1]}.data -v ${NV[idx - 1]} >>"${LOG_FILE}"
95 | #done
96 | 


--------------------------------------------------------------------------------
/figs/scale.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | ###
  3 |  # @Description: 
  4 |  # @Date: 2020-11-17 13:39:45
  5 |  # @LastEditors: Pengyu Wang
  6 |  # @LastEditTime: 2021-01-15 16:45:34
  7 |  # @FilePath: /skywalker/figs/scale.sh
  8 | ### 
  9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 10 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
 11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
 12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 13 | 
 14 | # DATA=( sk-2005 friendster) 
 15 | # HD=(   0.5           1 )
 16 | ITR=1
 17 | # NG=4 #8
 18 | NG=(1 2 4)
 19 | 
 20 | 
 21 | GR=".w.gr"
 22 | EXE="./build/skywalker_multi --csv  " #main_degree
 23 | SG="--ngpu=1 --s"
 24 | RW="--rw=1 --k 1 --d 100 "
 25 | SP="--rw=0 --k 20 --d 2 "
 26 | BATCH="--n 40000"
 27 | 
 28 | # BATCH="--n 4"
 29 | 
 30 | # --randomweight=1 --weightrange=2 
 31 | 
 32 | echo "-------------------------------------------------------unbias sp scale" >> scale.csv
 33 | for idx in $(seq 1 ${#DATA[*]}) 
 34 | do
 35 |     for i in "${NG[@]}"
 36 |     do
 37 |         ./build/skywalker_multi --csv   --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu=$i ${SP} --n $(( $i * 40000 )) >> scale.csv
 38 |     done
 39 | done
 40 | 
 41 | echo "-------------------------------------------------------unbias rw scale" >> scale.csv
 42 | for idx in $(seq 1 ${#DATA[*]}) 
 43 | do
 44 |     for i in "${NG[@]}"
 45 |     do
 46 |         ./build/skywalker_multi --csv --deepwalk  --bias=0  --input ~/data/${DATA[idx-1]}${GR}  --ngpu=$i ${SP} --n $(( $i * 40000 ))  >> scale.csv
 47 |     done
 48 | done
 49 | exit 0
 50 | 
 51 | # echo "-------------------------------------------------------table" >> scale.csv
 52 | # for idx in $(seq 1 ${#DATA[*]}) 
 53 | # do
 54 | #     for i in "${NG[@]}"
 55 | #     do
 56 | #         ./build/skywalker_multi --csv   -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n=0 >> scale.csv
 57 | #     done
 58 | # done
 59 | # exit 0
 60 | 
 61 | 
 62 | # echo "-------------------------------------------------------offline rw 100" >> scale.csv
 63 | # for idx in $(seq 1 ${#DATA[*]}) 
 64 | # do
 65 | #     for i in "${NG[@]}"
 66 | #     do
 67 | #         ./build/skywalker_multi --csv -bias=1 --ol=0 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv
 68 | #     done
 69 | # done
 70 | 
 71 | 
 72 | 
 73 | 
 74 | echo "-------------------------------------------------------offline sp 100" >> scale.csv
 75 | for idx in $(seq 1 ${#DATA[*]}) 
 76 | do
 77 |     for i in "${NG[@]}"
 78 |     do
 79 |         ./build/skywalker_multi --csv   -bias=1 --ol=0 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv
 80 |     done
 81 | done
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | echo "-------------------------------------------------------online rw 100" >> scale.csv
 89 | for idx in $(seq 1 ${#DATA[*]}) 
 90 | do
 91 |     for i in "${NG[@]}"
 92 |     do
 93 |         ./build/skywalker_multi --csv   -bias=1 --ol=1 --ngpu=$i --s ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv
 94 |     done
 95 | done
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | echo "-------------------------------------------------------online sp 100" >> scale.csv
102 | for idx in $(seq 1 ${#DATA[*]}) 
103 | do
104 |     for i in "${NG[@]}"
105 |     do
106 |         ./build/skywalker_multi --csv   -bias=1 --ol=1 --ngpu=$i --s ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) >> scale.csv
107 |     done
108 | done
109 | 
110 | # echo "-------------------------------------------------------online ppr 0.15" >> scale.csv
111 | # for idx in $(seq 1 ${#DATA[*]}) 
112 | # do
113 | #     for i in "${NG[@]}"
114 | #     do
115 | #         ./build/skywalker_multi --csv    -bias=1 --ol=1 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) --ngpu=$i --s >> scale.csv
116 | #     done
117 | # done
118 | 
119 | # echo "-------------------------------------------------------offline ppr 0.15" >> scale.csv
120 | # for idx in $(seq 1 ${#DATA[*]}) 
121 | # do
122 | #     for i in "${NG[@]}"
123 | #     do
124 | #         ./build/skywalker_multi --csv    -bias=1 --ol=0 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) --ngpu=$i --s >> scale.csv
125 | #     done
126 | # done
127 | 
128 | 
129 | # echo "-------------------------------------------------------online node2vec 0.15" >> scale.csv
130 | # for idx in $(seq 1 ${#DATA[*]}) 
131 | # do
132 | #     for i in "${NG[@]}"
133 | #     do
134 | #         ./bin/node2vec  -node2vec --n=40000 ${RW}  --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n $(( $i * 40000 )) --ngpu=$i --s >> scale.csv
135 | #     done
136 | # done


--------------------------------------------------------------------------------
/scripts/table3_unbiased.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash  -x
  2 | ###
  3 | # @Description:
  4 | # @Date: 2020-11-17 13:39:45
  5 | # @LastEditors: Pengyu Wang
  6 | # @LastEditTime: 2021-01-17 21:38:38
  7 | # @FilePath: /skywalker/figs/unbiased.sh
  8 | ###
  9 | 
 10 | # using sample dataset
 11 | DATA=(lj)
 12 | HD=(0.5)
 13 | NV=(4847571)
 14 | 
 15 | 
 16 | # DATA=(web-Google lj orkut arabic-2005 uk-2005 sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 17 | # HD=(0.25 0.5 1 0.25 0.25 0.5 1)                                   # uk-union rmat29 web-ClueWeb09)
 18 | # NV=(916428 4847571 3072627 39459923 22744077 50636151 124836180)
 19 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 20 | 
 21 | # DATA=( sk-2005 friendster)
 22 | # HD=(   4  1 )
 23 | ITR=1
 24 | NG=4 #8
 25 | 
 26 | GR=".w.gr"
 27 | EXE="./bin/main" #main_degree
 28 | SG="--ngpu=1 --s"
 29 | RW="--rw=1 --k 1 --d 100 "
 30 | SP="--rw=0 --k 20 --d 2 "
 31 | BATCH="--n=40000 -v"
 32 | 
 33 | ROOT_DIR=$PWD
 34 | LOG_FILE=${ROOT_DIR}"/result/table3_unbiased.csv"
 35 | 
 36 | # DATA_DIR="~/data"
 37 | DATA_DIR=${ROOT_DIR}"/dataset"
 38 | GraphWalker_DIR="/home/pywang/sampling/GraphWalker"
 39 | KnightKing_DIR="/home/pywang/sampling/KnightKing"
 40 | CSAW_DIR="/home/pywang/sampling/C-SAW"
 41 | NEXTDOOR_DIR="/home/pywang/sampling/nextdoor-experiments"
 42 | 
 43 | echo "-------------------------------------------------------Skywalker unbias rw 100" >>"${LOG_FILE}"
 44 | for idx in $(seq 1 ${#DATA[*]}); do
 45 |     ./bin/main --bias=0 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 ${RW} ${BATCH} >>"${LOG_FILE}"
 46 | done
 47 | 
 48 | echo "-------------------------------------------------------Skywalker unbias ppr 100" >>"${LOG_FILE}"
 49 | for idx in $(seq 1 ${#DATA[*]}); do
 50 |     ./bin/main --bias=0 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --tp=0.15 ${RW} ${BATCH} >>"${LOG_FILE}"
 51 | done
 52 | 
 53 | echo "-------------------------------------------------------Skywalker unbias node2vec" >>"${LOG_FILE}"
 54 | for idx in $(seq 1 ${#DATA[*]}); do
 55 |     ./bin/main --bias=0 --ol=0 --buffer --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --node2vec ${BATCH} >>"${LOG_FILE}"
 56 | done
 57 | 
 58 | echo "-------------------------------------------------------Skywalker unbias sage 40k" >>"${LOG_FILE}"
 59 | for idx in $(seq 1 ${#DATA[*]}); do
 60 |     ./bin/main --bias=0 --input $DATA_DIR/${DATA[idx - 1]}${GR} --ngpu 1 --sage ${BATCH} >>"${LOG_FILE}"
 61 | done
 62 | 
 63 | echo "----------------------KnightKing unbiased 40k degree-------------------" >>"${LOG_FILE}"
 64 | for idx in $(seq 1 ${#DATA[*]}); do
 65 |     echo ${DATA[idx - 1]} >>"${LOG_FILE}"
 66 |     $KnightKing_DIR/build/bin/deepwalk -w 40000 -l 100 -s unweighted -g $DATA_DIR/${DATA[idx - 1]}.uw.data -v ${NV[idx - 1]} >>"${LOG_FILE}"
 67 | done
 68 | echo "----------------------KnightKing unbiased node2vec-------------------" >>"${LOG_FILE}"
 69 | for idx in $(seq 1 ${#DATA[*]}); do
 70 |     echo ${DATA[idx - 1]} >>"${LOG_FILE}"
 71 |     $KnightKing_DIR/build/bin/node2vec -w 40000 -l 100 -s unweighted -p 2.0 -q 0.5 -g $DATA_DIR/${DATA[idx - 1]}.uw.data -v ${NV[idx - 1]} >>"${LOG_FILE}"
 72 | done
 73 | 
 74 | echo "----------------------KnightKing ppr unbiased ------------------" >>"${LOG_FILE}"
 75 | for idx in $(seq 1 ${#DATA[*]}); do
 76 |     echo ${DATA[idx - 1]} >>"${LOG_FILE}"
 77 |     $KnightKing_DIR/build/bin/ppr -w 40000 -s unweighted -t 0.15 -v ${NV[idx - 1]} -g $DATA_DIR/${DATA[idx - 1]}.uw.data >>"${LOG_FILE}"
 78 | done
 79 | 
 80 | echo "----------------------nextdoor node2vec -------------------" >>"${LOG_FILE}"
 81 | for idx in $(seq 1 ${#DATA[*]}); do
 82 |     echo "------------"${DATA[idx - 1]} >>"${LOG_FILE}"
 83 |     $NEXTDOOR_DIR/NextDoor/src/apps/randomwalks/Node2VecSampling -g $DATA_DIR/${DATA[idx - 1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l >>"${LOG_FILE}"
 84 | done
 85 | echo "----------------------nextdoor kh sample-------------------" >>"${LOG_FILE}"
 86 | for idx in $(seq 1 ${#DATA[*]}); do
 87 |     echo "------------"${DATA[idx - 1]} >>"${LOG_FILE}"
 88 |     $NEXTDOOR_DIR/NextDoor/src/apps/khop/KHopSampling -g $DATA_DIR/${DATA[idx - 1]}.data -t edge-list -f binary -n 1 -k TransitParallel -l >>"${LOG_FILE}"
 89 | done
 90 | 
 91 | ED=".w.edge"
 92 | EXE="./bin/apps/rwdomination"                    #main_degree
 93 | 
 94 | cd $GraphWalker_DIR
 95 | echo "------------skiping web-Google and orkut for GraphWalker due to internal errors"
 96 | echo "-------------------------------------------------------GraphWalker unbias rw 40000 100" >>"${LOG_FILE}"
 97 | for idx in $(seq 1 ${#DATA[*]}); do
 98 |     if [ ${DATA[idx - 1]} != "web-Google" -a ${DATA[idx - 1]} != "orkut" ]    
 99 |     then  
100 |     ./bin/apps/rawrandomwalks file  ~/data/${DATA[idx - 1]}.w.edge R 40000 L 100 N ${NV[idx - 1]} >>"${LOG_FILE}"  
101 |     fi
102 | done
103 | 
104 | echo "-------------------------------------------------------GraphWalker unbias ppr 40000 100" >>"${LOG_FILE}"
105 | for idx in $(seq 1 ${#DATA[*]}); do
106 |     if [ ${DATA[idx - 1]} != "web-Google" -a ${DATA[idx - 1]} != "orkut" ]  
107 |     then
108 |     ./bin/apps/msppr file ~/data/${DATA[idx - 1]}.w.edge firstsource 0 numsources 40000 walkspersource 1 maxwalklength 100 prob 0.15 >>"${LOG_FILE}"
109 |     fi
110 | done
111 | 


--------------------------------------------------------------------------------
/include/app.cuh:
--------------------------------------------------------------------------------
  1 | #include "alias_table.cuh"
  2 | #include "kernel.cuh"
  3 | #include "roller.cuh"
  4 | #include "sampler.cuh"
  5 | #include "sampler_result.cuh"
  6 | #include "util.cuh"
  7 | 
  8 | // #include <cooperative_groups.h>
  9 | // #include <cooperative_groups/memcpy_async.h>
 10 | #include <cooperative_groups/reduce.h>
 11 | 
 12 | DECLARE_bool(debug);
 13 | DECLARE_bool(v);
 14 | DECLARE_double(tp);
 15 | DECLARE_bool(printresult);
 16 | DECLARE_int32(m);
 17 | DECLARE_bool(peritr);
 18 | 
 19 | DECLARE_bool(static);
 20 | DECLARE_bool(buffer);
 21 | DECLARE_bool(loc);
 22 | template <typename T, uint length>
 23 | struct duplicate_checker {
 24 |   T sampled[length];
 25 |   int size = 0;
 26 |   __device__ bool check(T input) {
 27 |     for (size_t i = 0; i < size; i++) {
 28 |       if (sampled[i] == input) return false;
 29 |     }
 30 |     sampled[size] = input;
 31 |     size++;
 32 |     return true;
 33 |   }
 34 | };
 35 | 
 36 | template <uint blockSize, uint tileSize, typename T>
 37 | struct matrixBuffer {
 38 |   T data[blockSize * tileSize];
 39 |   uint *ptr_per_thread[blockSize];
 40 |   int length[blockSize];
 41 |   uint mainLength[blockSize /
 42 |                   32];  // each warp maintains one lengh, 是用来干啥的
 43 |   uint outItr[blockSize / 32];  // indicate the output location when need flash
 44 |                                 // multiple times
 45 | 
 46 |   uint tileLen;
 47 | 
 48 |   __device__ void Init() {
 49 |     // if (!LID) printf("行号：%d 函数名：%s \n", __LINE__, __FUNCTION__);
 50 |     length[LTID] = 0;
 51 |     ptr_per_thread[LTID] = nullptr;
 52 |     if (LID == 0) {
 53 |       tileLen = tileSize;
 54 |       mainLength[WID] = 0;
 55 |       outItr[WID] = 0;
 56 |     }
 57 |   }
 58 |   // depraced due to error?
 59 |   __device__ void Flush(uint *ptr, uint itr, coalesced_group &active) {
 60 |     // if (!LID) printf("行号：%d 函数名：%s \n", __LINE__, __FUNCTION__);
 61 |     // coalesced_group active = coalesced_threads();
 62 |     // printf("active.size() %u\n",active.size());
 63 |     // if (active.thread_rank() == 0) mainLength[WID]++;
 64 |     uint active_size = active.size();
 65 |     uint rank = active.thread_rank();
 66 |     ptr_per_thread[LTID] = ptr;
 67 |     active.sync();
 68 |     for (size_t i = WID * 32; i < WID * 32 + 32;
 69 |          i++) {  // loop over threads in warp
 70 |       active.sync();
 71 |       // if (i == 2) printf("adding rank %u length[i] %u\n",rank,length[i]);
 72 |       for (size_t j = rank; j < length[i];
 73 |            j += active_size) {  // loop over data // active.size()
 74 |         // if (i == 2) printf("add for 2\n");
 75 |         if (ptr_per_thread[i] != nullptr)
 76 |           *(ptr_per_thread[i] + outItr[WID] + j + 1) = data[i * tileSize + j];
 77 |         // plus 1 as the sampleResult start with root id
 78 |         // if(idx_i==0) printf("add %u to idx\n",graph->getOutNode(src_id,
 79 |         // candidate));
 80 |         // if (i == 2) printf("add0 %u to idx\n", data[i * tileSize + j]);
 81 |       }
 82 |     }
 83 |   }
 84 |   __device__ void Flush2(uint *ptr, coalesced_group &active) {
 85 |     // if (!LID) printf("行号：%d 函数名：%s \n", __LINE__, __FUNCTION__);
 86 |     // coalesced_group active = coalesced_threads();
 87 |     // if (active.size() != 32) printf("active.size() %u\n", active.size());
 88 |     // if (active.thread_rank() == 0) mainLength[WID]++;
 89 |     int active_size = active.size();
 90 |     int rank = active.thread_rank();
 91 |     ptr_per_thread[LTID] = ptr;
 92 |     active.sync();
 93 |     for (size_t i = WID * 32; i < WID * 32 + 32;
 94 |          i++) {  // loop over threads in warp
 95 |       active.sync();
 96 |       for (size_t j = rank; j < length[i];
 97 |            j += active_size) {  // loop over data // active.size()
 98 |         if (ptr_per_thread[i] != nullptr)
 99 |           *(ptr_per_thread[i] + outItr[WID] + j) = data[i * tileSize + j];
100 |         // if(i==0) printf("add %u to idx\n",data[i * tileSize + j]);
101 |       }
102 |     }
103 |   }
104 |   __device__ void CheckFlush(uint *ptr, uint itr, coalesced_group &active) {
105 |     if (active.thread_rank() == 0) mainLength[WID]++;
106 |     active.sync();
107 |     // printf("active.sync() %u itr %u \n", active.thread_rank(), itr);
108 | 
109 |     if (mainLength[WID] >= tileSize) {
110 |       active.sync();
111 |       ptr_per_thread[LTID] = ptr;
112 |       for (size_t i = WID * 32; i < WID * 32 + 32;
113 |            i++) {  // loop over threads in warp
114 |         active.sync();
115 |         for (size_t j = active.thread_rank(); j < length[i];  // loop over data
116 |              j += active.size()) {
117 |           *(ptr_per_thread[i] + outItr[WID] + j + 1) = data[i * tileSize + j];
118 |           // if (i == 2) printf("add %u to idx\n", data[i * tileSize + j]);
119 |         }
120 |         if (active.thread_rank() == 0) length[i] = 0;
121 |       }
122 |       // active.sync();
123 |       if (active.thread_rank() == 0) {
124 |         mainLength[WID] = 0;
125 |         outItr[WID] += tileSize;
126 |       }
127 |     }
128 |   }
129 |   __device__ void Finish() { length[LTID] = 0; }
130 | 
131 |   /**
132 |    * @description: set data in buffer for each thread
133 |    * @param {*}
134 |    * @return {*}
135 |    */
136 |   __forceinline__ __device__ void Set(uint v) {
137 |     data[LTID * tileSize + length[LTID]] = v;
138 |     // length[LTID]=length[LTID]+1;
139 |     atomicAdd(length + LTID, 1);
140 |     // if(length[LTID]>=tileSize) // better to manually flush in case of
141 |     // divergence
142 |   }
143 |   __device__ void CollectiveSet(uint id, uint v) {
144 |     coalesced_group local = coalesced_threads();
145 |     data[id * tileSize + length[id] + local.thread_rank()] = v;
146 |     if (local.thread_rank() == 0) length[id] += local.size();
147 |     // if(length[LTID]>=tileSize) // better to manually flush in case of
148 |     // divergence
149 |   }
150 | };


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Skywalker
 2 | 
 3 | This is the repo for Skywalker, an Efficient Alias-method-based Graph Sampling and Random Walk framework on GPUs. 
 4 | 
 5 | ## Introduction
 6 | 
 7 | Graph sampling and random walk operations, capturing the structural properties of graphs, are playing an important role today as we cannot directly adopt computing-intensive
 8 | algorithms on large-scale graphs. Existing system frameworks for these tasks are not only spatially and temporally inefficient, but many also lead to biased results. This paper presents Skywalker, a high-throughput, quality-preserving random walk and sampling framework based on GPUs. Skywalker makes three key contributions: first, it takes the first step to realize efficient biased sampling with the alias method on a GPU. Second, it introduces well-crafted load-balancing techniques to effectively utilize the massive parallelism of GPUs. Third, it accelerates alias table construction and reduce the GPU memory requirement with efficient memory management scheme. We show that Skywalker
 9 | greatly outperforms the state-of-the-art CPU-based and GPUbased baselines, in a wide spectrum of workload scenarios.
10 | 
11 | For details, please first refer to our 2021 PACT paper ["Skywalker: Efficient Alias-Method-Based Graph Sampling and Random Walk on GPUs"](https://ieeexplore.ieee.org/document/9563020) by Pengyu Wang, Chao Li, Jing Wang, Taolei Wang, Lu Zhang, Jingwen Leng, Quan Chen, and Minyi Guo. If you have any questions, please be free to contact us.
12 | 
13 | Beyond the contributions mentioned before, we further extend the framework to multi-gpu version and make a series of optimizations. The new work, named Skywalker+, has been submitted to TPDS.
14 | 
15 | ## Setup
16 | ```
17 | git clone https://github.com/wpybtw/skywalker_artifact --recursive
18 | ```
19 | 
20 | Note that Cmake is not correctly setted yet. We use cmake to build glfags and then make. 
21 | ```
22 | cd build
23 | cmake ..
24 | make -j
25 | cd ..
26 | make
27 | ```
28 | 
29 | ## Dataset 
30 | When evaluating Skywalker, we use 7 commonly used Graph datasets:  web-Google, Livejournal, Orkut, Arabic-2005, UK-2005, Friendster, and SK-2005. The datasets can be downloaded from [SNAP](http://snap.stanford.edu/data/index.html) and [Webgraph](http://law.di.unimi.it/datasets.php). You can also execute Skywalker on your preferred datasets, as long as the datasets are processed correctly as mentioned in the section of Preprosessing.
31 | 
32 | 
33 | ## Preprocessing
34 | Skywalker uses [Galios](https://iss.oden.utexas.edu/?p=projects/galois) graph format (.gr) as the input. Other formats like Edgelist (form [SNAP](http://snap.stanford.edu/data/index.html)) or Matrix Market can be transformed into it with GALOIS' graph-convert tool. Compressed graphs like [Webgraph](http://law.di.unimi.it/datasets.php) need to be uncompressed first.
35 | Here is an example:
36 | ```
37 | wget http://snap.stanford.edu/data/wiki-Vote.txt.gz
38 | gzip -d wiki-Vote.txt.gz
39 | $GALOIS_PATH/build/tools/graph-convert/graph-convert -edgelist2gr  ~/data/wiki-Vote.txt  ~/data/wiki-Vote.gr
40 | ```
41 | ## Execution
42 | We implemented four different algorithms in Skywalker, namely DeepWalk, PPR, Node2vec and Neighbour Sampling all based on alias method. We support both online and offline sampling, that's constructing the alias table on the fly or for all vertices in one graph dataset at once as a preprocessing procedure. The source code files are placed under ``` ./src``` and ```./include``` folders. The configuration of Skywalker is set through gflags, and the default values are written in ```main.cu``` in the ```src``` folder. You can easily checkout what different flags mean with the annotation and change the configuration simply by editting the command line. Here are several samples demonstrating the inputs of Skywalker:
43 | 
44 | Execute Deepwalk:
45 | ```
46 | ./bin/main --deepwalk --bias=1 --ol=0 --buffer --input ~/data/friendster.w.gr   -v -n 40000
47 | ```
48 | Here, ```--deepwalk``` points out what algorithms we are going to execute. ```--bias=1 --ol=0``` means that we want to do biased Deepwalk in offline mode. ```--buffer``` means we want to use GPU buffer to enhance performance, ```--input``` sets the input graph, ```-v``` means print more information and ```-n``` assigns the batch size. 
49 | 
50 | Execute node2vec:
51 | ```
52 |  ./bin/main --gmgraph --bias=1 --ol=0 --buffer --input ~/data/friendster.w.gr  --ngpu 4 --node2vec -n 40000
53 | ```
54 | Besides the basic settings in Skywalker, Skywalker+ enables more options. You can assign what kind of memory you are going to use and the number of GPUs to execute Skywalker in multi-GPU pattern. 
55 | 
56 | If you just want to verify the results in our paper, you can simply use the scripts in ```./scripts``` folder where we store all the scripts we used in generating the data when doing the evaluation. As long as you have stored and preprocessed the datasets correctly, all you need to change is the paths in the scripts and the results will automatically be stored in csv format.
57 | 
58 | Run scriopts:
59 | ```
60 | bash ./scripts/biased.sh
61 | ```
62 | 
63 | Notice that we also execute several other Graph sampling and random walk frameworks as comparison. Although we also write the configurations of these framworks in our scripts, you may have to also implement their frameworks and set the paths, or you may generate errors in some scripts. 
64 | 
65 | 
66 | # Details
67 | "--newsampler" indicates using Sampler_new for correct result layout. Is it only for unbiased and offline sampling?
68 | 
69 | ## Contributors
70 | Our team has been working on related technologies since 2017. Thank you to everyone for contributing to this project. 
71 | 
72 | Correspondence to: 
73 |   - [Pengyu Wang](wpybtw@sjtu.edu.cn) (wpybtw@sjtu.edu.cn)
74 |   - [Cheng Xu](jerryxu@sjtu.edu.cn) (jerryxu@sjtu.edu.cn)
75 |   - [Chao Li](lichao@cs.sjtu.edu.cn) (lichao@cs.sjtu.edu.cn)
76 |   - [Jieping Ye](yejieping.ye@alibaba-inc.com)(yejieping.ye@alibaba-inc.com)
77 |   - [Jing Wang](jing618@sjtu.edu.cn) (jing618@sjtu.edu.cn)
78 |   - [Taolei Wang](sjtuwtl@sjtu.edu.cn) (sjtuwtl@sjtu.edu.cn)
79 |   - [Lu Zhang](luzhang@sjtu.edu.cn) (luzhang@sjtu.edu.cn)
80 |   - [Yue Wu](matthew.wy@alibaba-inc.com)(matthew.wy@alibaba-inc.com)
81 |   - [Jingwen Leng](leng-jw@cs.sjtu.edu.cn) (leng-jw@cs.sjtu.edu.cn)
82 |   - [Quan Chen](chen-quan@cs.sjtu.edu.cn) (chen-quan@cs.sjtu.edu.cn)
83 |   - [Minyi Guo](guo-my@cs.sjtu.edu.cn) (guo-my@cs.sjtu.edu.cn)
84 |  
85 | 


--------------------------------------------------------------------------------
/src/util.cu:
--------------------------------------------------------------------------------
  1 | #include "util.cuh"
  2 | 
  3 | // __device__ char char_atomicCAS(char *addr, char cmp, char val) {
  4 | //   unsigned *al_addr = reinterpret_cast<unsigned *>(((unsigned long long)addr)
  5 | //   &
  6 | //                                                    (0xFFFFFFFFFFFFFFFCULL));
  7 | //   unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8;
  8 | //   unsigned mask = 0xFFU;
  9 | //   mask <<= al_offset;
 10 | //   mask = ~mask;
 11 | //   unsigned sval = val;
 12 | //   sval <<= al_offset;
 13 | //   unsigned old = *al_addr, assumed, setval;
 14 | //   do {
 15 | //     assumed = old;
 16 | //     setval = assumed & mask;
 17 | //     setval |= sval;
 18 | //     old = atomicCAS(al_addr, assumed, setval);
 19 | //   } while (assumed != old);
 20 | //   return (char)((assumed >> al_offset) & 0xFFU);
 21 | // }
 22 | 
 23 | // template <typename T>
 24 | // __inline__ __device__ T warpPrefixSum(T val, int lane_id) {
 25 | //   T val_shuffled;
 26 | //   for (int offset = 1; offset < warpSize; offset *= 2) {
 27 | //     val_shuffled = __shfl_up(val, offset);
 28 | //     if (lane_id >= offset) {
 29 | //       val += val_shuffled;
 30 | //     }
 31 | //   }
 32 | //   return val;
 33 | // }
 34 | 
 35 | double wtime() {
 36 |   double time[2];
 37 |   struct timeval time1;
 38 |   gettimeofday(&time1, NULL);
 39 | 
 40 |   time[0] = time1.tv_sec;
 41 |   time[1] = time1.tv_usec;
 42 | 
 43 |   return time[0] + time[1] * 1.0e-6;
 44 | }
 45 | __device__ void __conv() { coalesced_group active = coalesced_threads(); }
 46 | __device__ void active_size(int n = 0) {
 47 |   coalesced_group active = coalesced_threads();
 48 |   if (active.thread_rank() == 0)
 49 |     printf("TBID: %d WID: %d coalesced_group %llu at line %d\n", BID, WID,
 50 |            active.size(), n);
 51 | }
 52 | __device__ int active_size2(char *txt, int n = 0) {
 53 |   coalesced_group active = coalesced_threads();
 54 |   if (active.thread_rank() == 0)
 55 |     printf("%s  coalesced_group %llu at line %d\n", txt, active.size(), n);
 56 | }
 57 | template <typename T>
 58 | void printH(T *ptr, int size) {
 59 |   T *ptrh = new T[size];
 60 |   CUDA_RT_CALL(cudaMemcpy(ptrh, ptr, size * sizeof(T), cudaMemcpyDeviceToHost));
 61 |   printf("printH: ");
 62 |   for (size_t i = 0; i < size; i++) {
 63 |     // printf("%d\t", ptrh[i]);
 64 |     std::cout << ptrh[i] << "\t";
 65 |   }
 66 |   printf("\n");
 67 |   delete ptrh;
 68 | }
 69 | 
 70 | // https://forums.developer.nvidia.com/t/how-can-i-use-atomicsub-for-floats-and-doubles/64340/5
 71 | // __device__ double my_atomicSub(double *address, double val) {
 72 | //   unsigned long long int *address_as_ull = (unsigned long long int *)address;
 73 | //   unsigned long long int old = *address_as_ull, assumed;
 74 | //   do {
 75 | //     assumed = old;
 76 | //     old = atomicCAS(
 77 | //         address_as_ull, assumed,
 78 | //         __double_as_longlong(__longlong_as_double(assumed) -
 79 | //                              val));  // Note: uses integer comparison to avoid
 80 | //                                      // hang in case of NaN (since NaN != NaN)
 81 | //   } while (assumed != old);
 82 | //   return __longlong_as_double(old);
 83 | // }
 84 | 
 85 | // https://forums.developer.nvidia.com/t/how-can-i-use-atomicsub-for-floats-and-doubles/64340/5
 86 | __device__ float my_atomicSub(float *address, float val) {
 87 |   int *address_as_int = (int *)address;
 88 |   int old = *address_as_int, assumed;
 89 |   do {
 90 |     assumed = old;
 91 |     old = atomicCAS(
 92 |         address_as_int, assumed,
 93 |         __float_as_int(__int_as_float(assumed) -
 94 |                        val));  // Note: uses integer comparison to avoid hang in
 95 |                                // case of NaN (since NaN != NaN)
 96 |   } while (assumed != old);
 97 |   return __int_as_float(old);
 98 | }
 99 | 
100 | // __device__ long long my_atomicSub(long long *address, long long val) {
101 | //   unsigned long long int *address_as_ull = (unsigned long long int *)address;
102 | //   unsigned long long int old = *address_as_ull, assumed;
103 | //   do {
104 | //     assumed = old;
105 | //     old = atomicCAS(address_as_ull, assumed,
106 | //                     ((assumed)-val));  // Note: uses integer comparison to avoid
107 | //                                        // hang in case of NaN (since NaN != NaN)
108 | //   } while (assumed != old);
109 | //   return (old);
110 | // }
111 | 
112 | // __device__ unsigned long long my_atomicSub(unsigned long long *address,
113 | //                                            unsigned long long val) {
114 | //   unsigned long long int *address_as_ull = (unsigned long long int *)address;
115 | //   unsigned long long int old = *address_as_ull, assumed;
116 | //   do {
117 | //     assumed = old;
118 | //     old = atomicCAS(address_as_ull, assumed, ((assumed)-val));
119 | //   } while (assumed != old);
120 | //   return (old);
121 | // }
122 | 
123 | // __device__ long long my_atomicAdd(long long *address, long long val) {
124 | //   unsigned long long int *address_as_ull = (unsigned long long int *)address;
125 | //   unsigned long long int old = *address_as_ull, assumed;
126 | //   do {
127 | //     assumed = old;
128 | //     old = atomicCAS(address_as_ull, assumed, ((assumed) + val));
129 | //   } while (assumed != old);
130 | //   return (old);
131 | // }
132 | 
133 | template <>
134 | __device__ void printD<float>(float *ptr, size_t size) {
135 |   printf("printDf: size %llu: ", (u64)size);
136 |   for (size_t i = 0; i < size; i++) {
137 |     printf("%f\t", ptr[i]);
138 |   }
139 |   printf("\n");
140 | }
141 | template <>
142 | __device__ void printD<int>(int *ptr, size_t size) {
143 |   printf("printDf: size %llu: ", (u64)size);
144 |   for (size_t i = 0; i < size; i++) {
145 |     printf("%d\t", ptr[i]);
146 |   }
147 |   printf("\n");
148 | }
149 | 
150 | template <>
151 | __device__ void printD<uint>(uint *ptr, size_t size) {
152 |   printf("printDf: size %llu: ", (u64)size);
153 |   for (size_t i = 0; i < size; i++) {
154 |     printf("%u\t", ptr[i]);
155 |   }
156 |   printf("\n");
157 | }
158 | 
159 | // template <typename T> __global__ void init_range_d(T *ptr, size_t size) {
160 | //   if (TID < size) {
161 | //     ptr[TID] = TID;
162 | //   }
163 | // }
164 | // template <typename T> void init_range(T *ptr, size_t size) {
165 | //   init_range_d<T><<<size / 512 + 1, 512>>>(ptr, size);
166 | // }
167 | // template <typename T> __global__ void init_array_d(T *ptr, size_t size, T v)
168 | // {
169 | //   if (TID < size) {
170 | //     ptr[TID] = v;
171 | //   }
172 | // }
173 | // template <typename T> void init_array(T *ptr, size_t size, T v) {
174 | //   init_array_d<T><<<size / 512 + 1, 512>>>(ptr, size, v);
175 | // }
176 | 


--------------------------------------------------------------------------------
/figs/spec.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash -x
  2 | ###
  3 |  # @Description: 
  4 |  # @Date: 2020-11-17 13:39:45
  5 |  # @LastEditors: Pengyu Wang
  6 |  # @LastEditTime: 2021-01-15 16:43:38
  7 |  # @FilePath: /skywalker/figs/online.sh
  8 | ### 
  9 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
 10 | HD=(0.25          0.5  1     0.25        0.25      1           1) # uk-union rmat29 web-ClueWeb09)
 11 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
 12 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 13 | 
 14 | # DATA=( sk-2005 friendster) 
 15 | # HD=(   4  1 )
 16 | ITR=1
 17 | NG=4 #8
 18 | 
 19 | GR=".w.gr"
 20 | EXE="./bin/main" #main_degree
 21 | SG="--ngpu=1 --s"
 22 | RW="--rw=1 --k 1 --d 100 "
 23 | SP="--rw=0 --k 20 --d 2 "
 24 | BATCH="--n 40000"
 25 | 
 26 | # --randomweight=1 --weightrange=2 
 27 | 
 28 | 
 29 | # echo "------------------------------table construction compare-----------------" >> spec.csv
 30 | 
 31 | # echo "-------------------------------------------------------main_nospec" >> spec.csv
 32 | # for idx in $(seq 1 ${#DATA[*]}) 
 33 | # do
 34 | #     for i in $(seq 1  ${ITR})
 35 | #     do
 36 | #         ./bin/main_nospec -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv
 37 | #     done
 38 | # done
 39 | # echo "-------------------------------------------------------main_spec" >> spec.csv
 40 | # for idx in $(seq 1 ${#DATA[*]}) 
 41 | # do
 42 | #     for i in $(seq 1  ${ITR})
 43 | #     do
 44 | #         ./bin/main_spec -bias=1  --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv
 45 | #     done
 46 | # done
 47 | # echo "-------------------------------------------------------main_degree_nospec table" >> spec.csv
 48 | # for idx in $(seq 1 ${#DATA[*]}) 
 49 | # do
 50 | #     for i in $(seq 1  ${ITR})
 51 | #     do
 52 | #         ./bin/main_degree_nospec -bias=1 --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv
 53 | #     done
 54 | # done
 55 | # echo "-------------------------------------------------------main_degree_spec table" >> spec.csv
 56 | # for idx in $(seq 1 ${#DATA[*]}) 
 57 | # do
 58 | #     for i in $(seq 1  ${ITR})
 59 | #     do
 60 | #         ./bin/main_degree_spec -bias=1  --ol=0 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --n 10 >> spec.csv
 61 | #     done
 62 | # done
 63 | 
 64 | 
 65 | # echo "-------------------------------------------------------main_nospec sp 100" >> spec.csv
 66 | # for idx in $(seq 1 ${#DATA[*]}) 
 67 | # do
 68 | #     for i in $(seq 1  ${ITR})
 69 | #     do
 70 | #         ./bin/main_nospec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
 71 | #     done
 72 | # done
 73 | # echo "-------------------------------------------------------main_degree_nospec sp 100" >> spec.csv
 74 | # for idx in $(seq 1 ${#DATA[*]}) 
 75 | # do
 76 | #     for i in $(seq 1  ${ITR})
 77 | #     do
 78 | #         ./bin/main_degree_nospec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
 79 | #     done
 80 | # done
 81 | # echo "-------------------------------------------------------main_spec sp 100" >> spec.csv
 82 | # for idx in $(seq 1 ${#DATA[*]}) 
 83 | # do
 84 | #     for i in $(seq 1  ${ITR})
 85 | #     do
 86 | #         ./bin/main_spec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
 87 | #     done
 88 | # done
 89 | 
 90 | # echo "-------------------------------------------------------main_degree_spec sp 100" >> spec.csv
 91 | # for idx in $(seq 1 ${#DATA[*]}) 
 92 | # do
 93 | #     for i in $(seq 1  ${ITR})
 94 | #     do
 95 | #         ./bin/main_degree_spec -bias=1 --ol=1 ${SG} ${SP} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
 96 | #     done
 97 | # done
 98 | 
 99 | 
100 | echo "-------------------------------------------------------main_nospec rw 100" >> spec.csv
101 | for idx in $(seq 1 ${#DATA[*]}) 
102 | do
103 |     for i in $(seq 1  ${ITR})
104 |     do
105 |         ./bin/main_nospec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
106 |     done
107 | done
108 | echo "-------------------------------------------------------main_spec rw 100" >> spec.csv
109 | for idx in $(seq 1 ${#DATA[*]}) 
110 | do
111 |     for i in $(seq 1  ${ITR})
112 |     do
113 |         ./bin/main_spec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
114 |     done
115 | done
116 | echo "-------------------------------------------------------main_degree_nospec rw 100" >> spec.csv
117 | for idx in $(seq 1 ${#DATA[*]}) 
118 | do
119 |     for i in $(seq 1  ${ITR})
120 |     do
121 |         ./bin/main_degree_nospec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
122 |     done
123 | done
124 | echo "-------------------------------------------------------main_degree_spec rw 100" >> spec.csv
125 | for idx in $(seq 1 ${#DATA[*]}) 
126 | do
127 |     for i in $(seq 1  ${ITR})
128 |     do
129 |         ./bin/main_degree_spec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
130 |     done
131 | done
132 | 
133 | 
134 | 
135 | # echo "-------------------------------------------------------online ppr 0.15" >> spec.csv
136 | # for idx in $(seq 1 ${#DATA[*]}) 
137 | # do
138 | #     for i in $(seq 1  ${ITR})
139 | #     do
140 | #         ./bin/main  -bias=1 --ol=1 --n=40000 ${RW}  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> spec.csv
141 | #     done
142 | # done
143 | 
144 | # echo "-------------------------------------------------------online node2vec 0.15" >> spec.csv
145 | # for idx in $(seq 1 ${#DATA[*]}) 
146 | # do
147 | #     for i in $(seq 1  ${ITR})
148 | #     do
149 | #         ./bin/node2vec  -node2vec --n=40000 ${RW}  --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} ${SG} >> spec.csv
150 | #     done
151 | # done
152 | 
153 | 
154 | 
155 | # echo "------------------------------------------------------- spec ---------------------------------------------------------" >> spec.csv
156 | # echo "-------------------------------------------------------online rw 100" >> spec.csv
157 | # for idx in $(seq 1 ${#DATA[*]}) 
158 | # do
159 | #     for i in $(seq 1  ${ITR})
160 | #     do
161 | #         ./bin/main_spec -bias=1 --ol=1 ${SG} ${RW} --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} ${BATCH} >> spec.csv
162 | #     done
163 | # done
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/old/graph.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef _GRAPH_CUH
  2 | #define _GRAPH_CUH
  3 | 
  4 | // #include "app.cuh"
  5 | #include "common.cuh"
  6 | // #include "intrinsics.cuh"
  7 | // #include "job.cuh"
  8 | // #include "print.cuh"
  9 | // #include "timer.cuh"
 10 | // #include "worklist.cuh"
 11 | 
 12 | #include <cerrno>
 13 | #include <cstring>
 14 | #include <cuda_runtime.h>
 15 | #include <fcntl.h>
 16 | #include <memory>
 17 | #include <stdexcept>
 18 | #include <sys/mman.h>
 19 | #include <sys/stat.h>
 20 | #include <unistd.h>
 21 | 
 22 | #include <cooperative_groups.h>
 23 | #include <cuda_profiler_api.h>
 24 | #include <cuda_runtime.h>
 25 | #include <nvrtc.h>
 26 | 
 27 | #include <algorithm>
 28 | #include <assert.h>
 29 | #include <gflags/gflags.h>
 30 | using namespace intrinsics;
 31 | DECLARE_bool(map);
 32 | 
 33 | template <typename T> void PrintResults(T *results, uint n);
 34 | 
 35 | class Graph {
 36 | public:
 37 | //   Input input;
 38 | //   vector<Task> tasks;
 39 |   // ulong memRequest;
 40 |   // ulong memRequest_d;
 41 |   string graphFilePath;
 42 |   int napp;
 43 |   void *gr_ptr;
 44 |   edge_t *map_xadj;
 45 |   vtx_t *map_adjncy;
 46 |   weight_t *map_adjwgt;
 47 |   size_t filesize;
 48 | 
 49 |   bool hasZeroID;
 50 |   uint64_t numNode;
 51 |   uint64_t numEdge;
 52 |   // std::vector<Edge> edges;
 53 |   std::vector<weight_t> weights;
 54 |   uint64_t sizeEdgeTy;
 55 | 
 56 |   // graph
 57 |   // vtx_t  *vwgt_d,*vwgt ;
 58 |   edge_t *xadj, *xadj_d;
 59 |   vtx_t *adjncy, *adjncy_d;
 60 |   weight_t *adjwgt, *adjwgt_d;
 61 |   uint *inDegree;
 62 |   uint *outDegree;
 63 |   bool weighted;
 64 |   bool withWeight;
 65 | 
 66 | 
 67 |   // scheduler-specific
 68 | //   uint assigned_sm;
 69 | //   int device = 0;
 70 | //   uint64_t gmem_used = 0;
 71 | //   uint64_t um_used = 0;
 72 | 
 73 |   Graph();
 74 |   ~Graph() {
 75 |     // if (!FLAGS_map) {
 76 |       H_ERR(cudaFree(xadj));
 77 |       H_ERR(cudaFree(adjncy));
 78 |       H_ERR(cudaFree(adjwgt));
 79 |     // }else{
 80 |     //   munmap(gr_ptr, filesize);
 81 |     // }
 82 |   }
 83 | 
 84 | 
 85 |   void Load();
 86 |   void Map();
 87 | 
 88 |   void Process_mmap(cudaStream_t &stream);
 89 |   void Prepare(cudaStream_t &stream);
 90 |   void Transfer(cudaStream_t &stream, bool large = false);
 91 |   void Process(cudaStream_t &stream);
 92 |   void Cleanup(cudaStream_t &stream);
 93 | 
 94 |   void Process_single(cudaStream_t &stream);
 95 |   void Process_one_by_one(cudaStream_t &stream, bool large_graph = false);
 96 |   void Process_multi_thread();
 97 |   void Set_Mem_Policy(cudaStream_t &stream, bool needWeight = false);
 98 |   void gk_fclose(FILE *fp) { fclose(fp); }
 99 | 
100 |   FILE *gk_fopen(const char *fname, const char *mode, const char *msg) {
101 |     FILE *fp;
102 |     char errmsg[8192];
103 |     fp = fopen(fname, mode);
104 |     if (fp != NULL)
105 |       return fp;
106 |     sprintf(errmsg, "file: %s, mode: %s, [%s]", fname, mode, msg);
107 |     perror(errmsg);
108 |     printf("Failed on gk_fopen()\n");
109 |     return NULL;
110 |   }
111 | 
112 |   void ReadGraphGRHead() {
113 |     FILE *fpin;
114 |     bool readew;
115 |     fpin = gk_fopen(graphFilePath.data(), "r", "ReadGraphGR: Graph");
116 |     size_t read;
117 |     uint64_t x[4];
118 |     if (fread(x, sizeof(uint64_t), 4, fpin) != 4) {
119 |       printf("Unable to read header\n");
120 |     }
121 |     if (x[0] != 1) /* version */
122 |       printf("Unknown file version\n");
123 |     sizeEdgeTy = x[1];
124 |     // uint64_t sizeEdgeTy = le64toh(x[1]);
125 |     numNode = x[2];
126 |     numEdge = x[3];
127 |     weighted = (bool)sizeEdgeTy;
128 |     gk_fclose(fpin);
129 |   }
130 | 
131 |   void ReadGraphGR() {
132 |     // uint *vsize;
133 |     FILE *fpin;
134 |     bool readew;
135 |     cout<<graphFilePath.data()<<endl;
136 |     fpin = gk_fopen(graphFilePath.data(), "r", "ReadGraphGR: Graph");
137 |     size_t read;
138 |     uint64_t x[4];
139 |     if (fread(x, sizeof(uint64_t), 4, fpin) != 4) {
140 |       printf("Unable to read header\n");
141 |     }
142 |     if (x[0] != 1) /* version */
143 |       printf("Unknown file version\n");
144 |     sizeEdgeTy = x[1];
145 |     // uint64_t sizeEdgeTy = le64toh(x[1]);
146 |     uint64_t num_Node = x[2];
147 |     uint64_t num_Edge = x[3];
148 |     cout << graphFilePath + " has " << num_Node << " nodes and " << num_Edge
149 |          << "  edges\n";
150 | 
151 |     // H_ERR(cudaMallocHost(&xadj, (num_Node + 1) * sizeof(uint)));
152 |     // H_ERR(cudaMallocHost(&adjncy, num_Edge * sizeof(uint)));
153 |     H_ERR(cudaMallocManaged(&xadj, (num_Node + 1) * sizeof(edge_t)));
154 |     H_ERR(cudaMallocManaged(&adjncy, num_Edge * sizeof(vtx_t)));
155 |     um_used += (num_Node + 1) * sizeof(vtx_t) + num_Edge * sizeof(vtx_t);
156 | 
157 |     adjwgt = nullptr;
158 |     H_ERR(cudaMallocManaged(&adjwgt, num_Edge * sizeof(weight_t)));
159 |     // um_used += num_Edge * sizeof(uint);
160 |     weighted = true;
161 |     if (!sizeEdgeTy) {
162 |       // adjwgt = new uint[num_Edge];
163 |       for (size_t i = 0; i < num_Edge; i++) {
164 |         adjwgt[i] = 1;
165 |       }
166 |       weighted = false;
167 |     }
168 |     outDegree = new uint[num_Node];
169 |     assert(xadj != NULL);
170 |     assert(adjncy != NULL);
171 |     // assert(vwgt != NULL);
172 |     // assert(adjwgt != NULL);
173 |     if (sizeof(edge_t) == sizeof(uint64_t)) {
174 |       read = fread(xadj + 1, sizeof(uint64_t), num_Node,
175 |                    fpin); // This is little-endian data
176 |       if (read < num_Node)
177 |         printf("Error: Partial read of node data\n");
178 |       fprintf(stderr, "read %lu nodes\n", num_Node);
179 |     } else {
180 |       for (size_t i = 0; i < num_Node; i++) {
181 |         uint64_t rs;
182 |         if (fread(&rs, sizeof(uint64_t), 1, fpin) != 1)
183 |           printf("Error: Unable to read node data\n");
184 |         xadj[i + 1] = rs;
185 |       }
186 |     }
187 |     // edges are 32-bit
188 |     if (sizeof(vtx_t) == sizeof(uint32_t)) {
189 |       read = fread(adjncy, sizeof(uint), num_Edge,
190 |                    fpin); // This is little-endian data
191 |       if (read < num_Edge)
192 |         printf("Error: Partial read of edge destinations\n");
193 |       // fprintf(stderr, "read %lu edges\n", numEdge);
194 |     } else {
195 |       assert(false &&
196 |              "Not implemented"); /* need to convert sizes when reading */
197 |     }
198 |     for (size_t i = 0; i < num_Node; i++) {
199 |       outDegree[i] = xadj[i + 1] - xadj[i];
200 |     }
201 |     uint maxD = std::distance(
202 |         outDegree, std::max_element(outDegree, outDegree + num_Node));
203 |     printf("%d has max out degree %d\n", maxD, outDegree[maxD]);
204 |     if (sizeEdgeTy) {
205 |       if (num_Edge % 2)
206 |         if (fseek(fpin, 4, SEEK_CUR) != 0) // skip
207 |           printf("Error when seeking\n");
208 |       if (sizeof(uint) == sizeof(uint32_t)) {
209 |         read = fread(adjwgt, sizeof(uint), num_Edge,
210 |                      fpin); // This is little-endian data
211 |         readew = true;
212 |         if (read < num_Edge)
213 |           printf("Error: Partial read of edge data\n");
214 | 
215 |         // fprintf(stderr, "read data for %lu edges\n", num_Edge);
216 |       } else {
217 |         assert(false &&
218 |                "Not implemented"); /* need to convert sizes when reading */
219 |       }
220 |     }
221 |     numNode = num_Node;
222 |     numEdge = num_Edge;
223 |     gk_fclose(fpin);
224 |   }
225 | };
226 | #endif
227 | 


--------------------------------------------------------------------------------
/src/createTable.cu:
--------------------------------------------------------------------------------
  1 | #include "app.cuh"
  2 | 
  3 | __device__ void ConstructWarpCentic(Sampler *sampler, sample_result &result,
  4 |                                     gpu_graph *ggraph, curandState state,
  5 |                                     int current_itr, int idx, int node_id,
  6 |                                     void *buffer) {
  7 |   using WCTable = alias_table_constructor_shmem<
  8 |       uint, thread_block_tile<32>,
  9 |       BufferType::SHMEM>;  //, AliasTableStorePolicy::STORE
 10 |   WCTable *tables = (WCTable *)buffer;
 11 |   WCTable *table = &tables[WID];
 12 | 
 13 |   bool not_all_zero =
 14 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 15 |                            ggraph->getDegree(node_id), current_itr, node_id);
 16 |   if (not_all_zero) {
 17 |     table->construct();
 18 |     table->SaveAliasTable(ggraph);
 19 |     if (LID == 0) ggraph->SetValid(node_id);
 20 |   }
 21 |   table->Clean();
 22 | }
 23 | 
 24 | __device__ void ConstructBlockCentic(Sampler *sampler, sample_result &result,
 25 |                                      gpu_graph *ggraph, curandState state,
 26 |                                      int current_itr, int node_id, void *buffer,
 27 |                                      Vector_pack2<uint> *vector_packs) {
 28 |   using BCTable =
 29 |       alias_table_constructor_shmem<uint, thread_block, BufferType::GMEM,
 30 |                                     AliasTableStorePolicy::STORE>;
 31 |   BCTable *tables = (BCTable *)buffer;
 32 |   BCTable *table = &tables[0];
 33 |   table->loadGlobalBuffer(vector_packs);
 34 |   __syncthreads();
 35 |   bool not_all_zero =
 36 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 37 |                            ggraph->getDegree(node_id), current_itr, node_id);
 38 |   __syncthreads();
 39 |   if (not_all_zero) {
 40 |     table->constructBC();
 41 |     table->SaveAliasTable(ggraph);
 42 |     if (LTID == 0) ggraph->SetValid(node_id);
 43 |   }
 44 |   __syncthreads();
 45 |   table->Clean();
 46 | }
 47 | 
 48 | __global__ void ConstructAliasTableKernel(Sampler *sampler,
 49 |                                           Vector_pack2<uint> *vector_pack) {
 50 |   sample_result &result = sampler->result;
 51 |   gpu_graph *ggraph = &sampler->ggraph;
 52 |   Vector_pack2<uint> *vector_packs = &vector_pack[BID];
 53 |   using WCTable = alias_table_constructor_shmem<
 54 |       uint, thread_block_tile<32>,
 55 |       BufferType::SHMEM>;  //, AliasTableStorePolicy::STORE
 56 |   __shared__ WCTable table[WARP_PER_BLK];
 57 |   void *buffer = &table[0];
 58 |   curandState state;
 59 |   curand_init(TID, 0, 0, &state);
 60 | 
 61 |   __shared__ uint current_itr;
 62 |   if (threadIdx.x == 0) current_itr = 0;
 63 |   __syncthreads();
 64 | 
 65 |   Vector_gmem<uint> *high_degrees = &sampler->result.high_degrees[0];
 66 | 
 67 |   sample_job job;
 68 |   __threadfence_block();
 69 |   if (LID == 0) job = result.requireOneJob(current_itr);
 70 |   __syncwarp(FULL_WARP_MASK);
 71 |   job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0);
 72 |   job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0);
 73 |   job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0);
 74 |   __syncwarp(FULL_WARP_MASK);
 75 |   while (job.val) {
 76 |     if (ggraph->getDegree(job.node_id) < ELE_PER_WARP) {
 77 |       ConstructWarpCentic(sampler, result, ggraph, state, current_itr, job.idx,
 78 |                           job.node_id, buffer);
 79 |     } else {
 80 |       if (LID == 0) result.AddHighDegree(current_itr, job.node_id);
 81 |     }
 82 |     __syncwarp(FULL_WARP_MASK);
 83 |     if (LID == 0) job = result.requireOneJob(current_itr);
 84 |     job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0);
 85 |     job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0);
 86 |     job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0);
 87 |   }
 88 |   __syncthreads();
 89 |   __shared__ sample_job high_degree_job;
 90 |   if (LTID == 0) {
 91 |     job = result.requireOneHighDegreeJob(current_itr);
 92 |     high_degree_job.val = job.val;
 93 |     high_degree_job.node_id = job.node_id;
 94 |   }
 95 |   __syncthreads();
 96 |   while (high_degree_job.val) {
 97 |     ConstructBlockCentic(sampler, result, ggraph, state, current_itr,
 98 |                          high_degree_job.node_id, buffer,
 99 |                          vector_packs);  // buffer_pointer
100 |     // __syncthreads();
101 |     if (LTID == 0) {
102 |       job = result.requireOneHighDegreeJob(current_itr);
103 |       high_degree_job.val = job.val;
104 |       high_degree_job.node_id = job.node_id;
105 |     }
106 |     __syncthreads();
107 |   }
108 | }
109 | __global__ void PrintTable(Sampler *sampler) {
110 |   if (TID == 0) {
111 |     printf("\nprob:\n");
112 |     printD(sampler->prob_array, 100);
113 |     printf("\nalias:\n");
114 |     printD(sampler->alias_array, 100);
115 |   }
116 | }
117 | 
118 | // todo offset
119 | float ConstructTable(Sampler &sampler, uint ngpu, uint index) {
120 |   LOG("%s\n", __FUNCTION__);
121 |   int device;
122 |   cudaDeviceProp prop;
123 |   cudaGetDevice(&device);
124 |   cudaGetDeviceProperties(&prop, device);
125 |   int n_sm = prop.multiProcessorCount;
126 | 
127 |   sampler.AllocateAliasTablePartial(ngpu, index);
128 | 
129 |   // paster(sizeof(alias_table_constructor_shmem<uint, thread_block_tile<32>,
130 |   //                                             BufferType::SHMEM>));
131 |   // paster(sizeof(
132 |   //     alias_table_constructor_shmem<uint, thread_block, BufferType::GMEM,
133 |   //                                   AliasTableStorePolicy::STORE>));
134 | 
135 |   Sampler *sampler_ptr;
136 |   MyCudaMalloc(&sampler_ptr, sizeof(Sampler));
137 |   CUDA_RT_CALL(cudaMemcpy(sampler_ptr, &sampler, sizeof(Sampler),
138 |                           cudaMemcpyHostToDevice));
139 |   double start_time, total_time;
140 |   init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr,true);
141 | 
142 |   // allocate global buffer
143 |   int block_num = n_sm * FLAGS_m;
144 |   int gbuff_size = sampler.ggraph.MaxDegree;
145 | 
146 |   LOG("alllocate GMEM buffer %d MB\n",
147 |       block_num * gbuff_size * MEM_PER_ELE / 1024 / 1024);
148 | 
149 |   Vector_pack2<uint> *vector_pack_h = new Vector_pack2<uint>[block_num];
150 |   for (size_t i = 0; i < block_num; i++) {
151 |     vector_pack_h[i].Allocate(gbuff_size, index);
152 |   }
153 |   CUDA_RT_CALL(cudaDeviceSynchronize());
154 |   Vector_pack2<uint> *vector_packs;
155 |   CUDA_RT_CALL(
156 |       MyCudaMalloc(&vector_packs, sizeof(Vector_pack2<uint>) * block_num));
157 |   CUDA_RT_CALL(cudaMemcpy(vector_packs, vector_pack_h,
158 |                           sizeof(Vector_pack2<uint>) * block_num,
159 |                           cudaMemcpyHostToDevice));
160 | 
161 |   //  Global_buffer
162 |   CUDA_RT_CALL(cudaDeviceSynchronize());
163 |   start_time = wtime();
164 | #ifdef check
165 |   ConstructAliasTableKernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs);
166 | #else
167 |   ConstructAliasTableKernel<<<block_num, BLOCK_SIZE, 0, 0>>>(sampler_ptr,
168 |                                                              vector_packs);
169 | #endif
170 |   CUDA_RT_CALL(cudaDeviceSynchronize());
171 |   // CUDA_RT_CALL(cudaPeekAtLastError());
172 |   total_time = wtime() - start_time;
173 |   LOG("Construct table time:\t%.6f\n", total_time);
174 |   // paster(FLAGS_hmgraph);
175 |   if ((FLAGS_weight || FLAGS_randomweight) && (!FLAGS_hmgraph)) {
176 |     CUDA_RT_CALL(cudaFree(sampler.ggraph.adjwgt));
177 |   }
178 |   return total_time;
179 | }
180 | 


--------------------------------------------------------------------------------
/src/online_sample.cu:
--------------------------------------------------------------------------------
  1 | #include "app.cuh"
  2 | 
  3 | static __device__ void SampleWarpCentic(sample_result &result,
  4 |                                         gpu_graph *ggraph, curandState state,
  5 |                                         int current_itr, int idx, int node_id,
  6 |                                         void *buffer) {
  7 |   alias_table_constructor_shmem<uint, thread_block_tile<32>> *tables =
  8 |       (alias_table_constructor_shmem<uint, thread_block_tile<32>> *)buffer;
  9 |   alias_table_constructor_shmem<uint, thread_block_tile<32>> *table =
 10 |       &tables[WID];
 11 |   bool not_all_zero =
 12 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 13 |                            ggraph->getDegree(node_id), current_itr, node_id);
 14 |   if (not_all_zero) {
 15 |     table->construct();
 16 |     table->roll_atomic(&state, result);
 17 |   }
 18 |   table->Clean();
 19 | }
 20 | 
 21 | static __device__ void SampleBlockCentic(sample_result &result,
 22 |                                          gpu_graph *ggraph, curandState state,
 23 |                                          int current_itr, int node_id,
 24 |                                          void *buffer,
 25 |                                          Vector_pack<uint> *vector_packs) {
 26 |   alias_table_constructor_shmem<uint, thread_block, BufferType::GMEM> *tables =
 27 |       (alias_table_constructor_shmem<uint, thread_block, BufferType::GMEM> *)
 28 |           buffer;
 29 |   alias_table_constructor_shmem<uint, thread_block, BufferType::GMEM> *table =
 30 |       &tables[0];
 31 |   table->loadGlobalBuffer(vector_packs);
 32 |   __syncthreads();
 33 |   bool not_all_zero =
 34 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 35 |                            ggraph->getDegree(node_id), current_itr, node_id);
 36 |   __syncthreads();
 37 |   if (not_all_zero) {
 38 |     table->constructBC();
 39 |     uint target_size =
 40 |         MIN(ggraph->getDegree(node_id), result.hops[current_itr + 1]);
 41 |     table->roll_atomic(target_size, &state, result);
 42 |   }
 43 |   __syncthreads();
 44 |   table->Clean();
 45 | }
 46 | 
 47 | __global__ void sample_kernel(Sampler *sampler,
 48 |                               Vector_pack<uint> *vector_pack) {
 49 |   sample_result &result = sampler->result;
 50 |   gpu_graph *ggraph = &sampler->ggraph;
 51 |   Vector_pack<uint> *vector_packs = &vector_pack[BID];
 52 |   __shared__ alias_table_constructor_shmem<uint, thread_block_tile<32>>
 53 |       table[WARP_PER_BLK];
 54 |   void *buffer = &table[0];
 55 |   curandState state;
 56 |   curand_init(TID, 0, 0, &state);
 57 | 
 58 |   __shared__ uint current_itr;
 59 |   if (threadIdx.x == 0) current_itr = 0;
 60 |   __syncthreads();
 61 |   for (; current_itr < result.hop_num - 1;)  // for 2-hop, hop_num=3
 62 |   {
 63 |     // Vector_gmem<uint> *high_degrees =
 64 |     //     &sampler->result.high_degrees[current_itr];
 65 |     sample_job job;
 66 |     __threadfence_block();
 67 |     if (LID == 0) job = result.requireOneJob(current_itr);
 68 |     __syncwarp(FULL_WARP_MASK);
 69 |     job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0);
 70 |     job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0);
 71 |     job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0);
 72 |     __syncwarp(FULL_WARP_MASK);
 73 |     while (job.val) {
 74 |       if (ggraph->getDegree(job.node_id) < ELE_PER_WARP) {
 75 |         SampleWarpCentic(result, ggraph, state, current_itr, job.idx,
 76 |                          job.node_id, buffer);
 77 |       } else {
 78 | #ifdef skip8k
 79 |         if (LID == 0 && ggraph->getDegree(job.node_id) < 8000)
 80 | #else
 81 |         if (LID == 0)
 82 | #endif  // skip8k
 83 |           result.AddHighDegree(current_itr, job.node_id);
 84 |       }
 85 |       __syncwarp(FULL_WARP_MASK);
 86 |       if (LID == 0) job = result.requireOneJob(current_itr);
 87 |       job.idx = __shfl_sync(FULL_WARP_MASK, job.idx, 0);
 88 |       job.val = __shfl_sync(FULL_WARP_MASK, job.val, 0);
 89 |       job.node_id = __shfl_sync(FULL_WARP_MASK, job.node_id, 0);
 90 |     }
 91 |     __syncthreads();
 92 |     __shared__ sample_job high_degree_job;
 93 |     if (LTID == 0) {
 94 |       job = result.requireOneHighDegreeJob(current_itr);
 95 |       high_degree_job.val = job.val;
 96 |       high_degree_job.node_id = job.node_id;
 97 |     }
 98 |     __syncthreads();
 99 |     while (high_degree_job.val) {
100 |       SampleBlockCentic(result, ggraph, state, current_itr,
101 |                         high_degree_job.node_id, buffer,
102 |                         vector_packs);  // buffer_pointer
103 |       __syncthreads();
104 |       if (LTID == 0) {
105 |         job = result.requireOneHighDegreeJob(current_itr);
106 |         high_degree_job.val = job.val;
107 |         high_degree_job.node_id = job.node_id;
108 |       }
109 |       __syncthreads();
110 |     }
111 |     __syncthreads();
112 |     if (threadIdx.x == 0) {
113 |       // while (!result.checkFinish(current_itr))
114 |       // {
115 |       //   printf("waiting ");
116 |       // }
117 |       result.NextItr(current_itr);
118 |     }
119 |     __syncthreads();
120 |   }
121 | }
122 | 
123 | static __global__ void print_result(Sampler *sampler) {
124 |   sampler->result.PrintResult();
125 | }
126 | 
127 | #include "date.h"
128 | // void Start_high_degree(Sampler sampler)
129 | float OnlineGBSample(Sampler &sampler) {
130 |   // orkut max degree 932101
131 | 
132 |   LOG("%s\n", __FUNCTION__);
133 | #ifdef skip8k
134 |   LOG("skipping 8k\n");
135 | #endif  // skip8k
136 | 
137 |   int device;
138 |   cudaDeviceProp prop;
139 |   cudaGetDevice(&device);
140 |   cudaGetDeviceProperties(&prop, device);
141 |   int n_sm = prop.multiProcessorCount;
142 | 
143 |   Sampler *sampler_ptr;
144 |   MyCudaMalloc(&sampler_ptr, sizeof(Sampler));
145 |   CUDA_RT_CALL(cudaMemcpy(sampler_ptr, &sampler, sizeof(Sampler),
146 |                           cudaMemcpyHostToDevice));
147 |   double start_time, total_time;
148 |   init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr, true);
149 | 
150 |   // allocate global buffer
151 |   int block_num = n_sm * FLAGS_m;
152 |   int gbuff_size = sampler.ggraph.MaxDegree;
153 | 
154 |   LOG("alllocate GMEM buffer %d MB\n",
155 |       block_num * gbuff_size * MEM_PER_ELE / 1024 / 1024);
156 | 
157 |   Vector_pack<uint> *vector_pack_h = new Vector_pack<uint>[block_num];
158 |   for (size_t i = 0; i < block_num; i++) {
159 |     vector_pack_h[i].Allocate(gbuff_size, sampler.device_id);
160 |   }
161 |   CUDA_RT_CALL(cudaDeviceSynchronize());
162 | #pragma omp barrier
163 |   Vector_pack<uint> *vector_packs;
164 |   CUDA_RT_CALL(
165 |       MyCudaMalloc(&vector_packs, sizeof(Vector_pack<uint>) * block_num));
166 |   CUDA_RT_CALL(cudaMemcpy(vector_packs, vector_pack_h,
167 |                           sizeof(Vector_pack<uint>) * block_num,
168 |                           cudaMemcpyHostToDevice));
169 | 
170 |   //  Global_buffer
171 |   CUDA_RT_CALL(cudaDeviceSynchronize());
172 |   start_time = wtime();
173 | #ifdef check
174 |   sample_kernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs);
175 | #else
176 |   // using namespace date;
177 |   // using namespace std::chrono;
178 |   // std::cout << "start: " << system_clock::now() << '\n';
179 |   sample_kernel<<<block_num, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs);
180 | #endif
181 |   CUDA_RT_CALL(cudaDeviceSynchronize());
182 |   // std::cout << "end: " << system_clock::now() << '\n';
183 |   // CUDA_RT_CALL(cudaPeekAtLastError());
184 |   total_time = wtime() - start_time;
185 | #pragma omp barrier
186 |   LOG("Device %d sampling time:\t%.2f ms ratio:\t %.1f MSEPS\n",
187 |       omp_get_thread_num(), total_time * 1000,
188 |       static_cast<float>(sampler.result.GetSampledNumber() / total_time /
189 |                          1000000));
190 |   sampler.sampled_edges = sampler.result.GetSampledNumber();
191 |   LOG("sampled_edges %d\n", sampler.sampled_edges);
192 |   if (FLAGS_printresult) print_result<<<1, 32, 0, 0>>>(sampler_ptr);
193 |   CUDA_RT_CALL(cudaDeviceSynchronize());
194 |   return total_time;
195 | }
196 | 


--------------------------------------------------------------------------------
/include/roller.cuh:
--------------------------------------------------------------------------------
  1 | // #include "gpu_graph.cuh"
  2 | // #include "kernel.cuh"
  3 | // #include "sampler_result.cuh"
  4 | // #include "util.cuh"
  5 | // #include "vec.cuh"
  6 | 
  7 | // template <typename T, ExecutionPolicy policy> struct alias_table_roller_shmem;
  8 | 
  9 | // template <typename T> struct alias_table_roller_shmem<T, ExecutionPolicy::TC> {
 10 | //   uint size;
 11 | //   uint current_itr;
 12 | //   gpu_graph *ggraph;
 13 | //   int src_id;
 14 | //   uint src_degree;
 15 | 
 16 | //   Vector_virtual<T> alias;
 17 | //   Vector_virtual<float> prob;
 18 | //   //   Vector_shmem<unsigned short int, ExecutionPolicy::WC, ELE_PER_WARP,
 19 | //   //   false>
 20 | //   //       selected;
 21 | //   //   Vector_gmem<unsigned short int> selected_high_degree;
 22 | 
 23 | //   //   __device__ bool loadGlobalBuffer(Vector_pack_short<T> *pack) {
 24 | //   //     if (LID == 0) {
 25 | //   //       selected_high_degree = pack->selected;
 26 | //   //     }
 27 | //   //   }
 28 | 
 29 | //   __device__ bool SetVirtualVector(gpu_graph *graph) {
 30 | //     alias.Construt(graph->alias_array + graph->xadj[src_id],
 31 | //                    graph->getDegree((uint)src_id));
 32 | //     prob.Construt(graph->prob_array + graph->xadj[src_id],
 33 | //                   graph->getDegree((uint)src_id));
 34 | //   }
 35 | 
 36 | //   __host__ __device__ uint Size() { return size; }
 37 | //   __device__ void loadFromGraph(T *_ids, gpu_graph *graph, int _size,
 38 | //                                 uint _current_itr, int _src_id) {
 39 | //     ggraph = graph;
 40 | //     current_itr = _current_itr;
 41 | //     size = _size;
 42 | //     // ids = _ids;
 43 | //     src_id = _src_id;
 44 | //     src_degree = graph->getDegree((uint)_src_id);
 45 | //     // weights = _weights;
 46 | //     SetVirtualVector(graph);
 47 | //     Init(src_degree);
 48 | //   }
 49 | //   __device__ void Init(uint sz) {
 50 | //     alias.Init(sz);
 51 | //     prob.Init(sz);
 52 | //   }
 53 | //   __device__ void roll_atomic(T *array, curandState *local_state,
 54 | //                               sample_result result) {
 55 | //     uint target_size = result.hops[current_itr + 1];
 56 | //     if ((target_size > 0) && (target_size < src_degree)) {
 57 | //       //   int itr = 0;
 58 | //       for (size_t i = 0; i < target_size; i++) {
 59 | //         int col = (int)floor(curand_uniform(local_state) * size);
 60 | //         float p = curand_uniform(local_state);
 61 | //         uint candidate;
 62 | //         if (p < prob[col])
 63 | //           candidate = col;
 64 | //         else
 65 | //           candidate = alias[col];
 66 | //         result.AddActive(current_itr, array,
 67 | //                          ggraph->getOutNode(src_id, candidate));
 68 | //       }
 69 | //     } else if (target_size >= src_degree) {
 70 | //       for (size_t i = 0; i < src_degree; i++) {
 71 | //         result.AddActive(current_itr, array, ggraph->getOutNode(src_id, i));
 72 | //       }
 73 | //     }
 74 | //   }
 75 | // };
 76 | 
 77 | // template <typename T> struct alias_table_roller_shmem<T, ExecutionPolicy::WC> {
 78 | //   uint size;
 79 | //   // float weight_sum;
 80 | //   // T *ids;
 81 | //   // float *weights;
 82 | //   uint current_itr;
 83 | //   gpu_graph *ggraph;
 84 | //   int src_id;
 85 | //   uint src_degree;
 86 | 
 87 | //   Vector_virtual<T> alias;
 88 | //   Vector_virtual<float> prob;
 89 | //   Vector_shmem<unsigned short int, ExecutionPolicy::WC, ELE_PER_WARP, false>
 90 | //       selected;
 91 | //   Vector_gmem<unsigned short int> selected_high_degree;
 92 | 
 93 | //   __device__ void loadGlobalBuffer(Vector_pack_short<T> *pack) {
 94 | //     if (LID == 0) {
 95 | //       selected_high_degree = pack->selected;
 96 | //     }
 97 | //   }
 98 | 
 99 | //   __device__ bool SetVirtualVector(gpu_graph *graph) {
100 | //     alias.Construt(graph->alias_array + graph->xadj[src_id],
101 | //                    graph->getDegree((uint)src_id));
102 | //     prob.Construt(graph->prob_array + graph->xadj[src_id],
103 | //                   graph->getDegree((uint)src_id));
104 | //   }
105 | 
106 | //   __host__ __device__ uint Size() { return size; }
107 | //   __device__ void loadFromGraph(T *_ids, gpu_graph *graph, int _size,
108 | //                                 uint _current_itr, int _src_id) {
109 | //     if (LID == 0) {
110 | //       ggraph = graph;
111 | //       current_itr = _current_itr;
112 | //       size = _size;
113 | //       // ids = _ids;
114 | //       src_id = _src_id;
115 | //       src_degree = graph->getDegree((uint)_src_id);
116 | //       // weights = _weights;
117 | //       SetVirtualVector(graph);
118 | //       Init(src_degree);
119 | //     }
120 | 
121 | //     __syncwarp(FULL_WARP_MASK);
122 | //     active_size(__LINE__);
123 | //   }
124 | //   __device__ void Init(uint sz) {
125 | //     alias.Init(sz);
126 | //     prob.Init(sz);
127 | //     selected.Init(sz);
128 | //     selected_high_degree.Init(sz);
129 | //   }
130 | //   __device__ void Clean() {
131 | //     // if (LID == 0) {
132 | //     // alias.Clean();
133 | //     // prob.Clean();
134 | //     selected.Clean();
135 | //     // }
136 | //     selected_high_degree.CleanWC();
137 | //     // selected_high_degree.CleanDataWC(); //! todo using GMEM per warp
138 | //   }
139 | //   __device__ void roll_atomic(T *array, curandState *state,
140 | //                               sample_result result) {
141 | //     coalesced_group active = coalesced_threads();
142 | //     active.sync();
143 | //     active_size(__LINE__);
144 | //     // if (LID == 0) {
145 | //     //   printf("%s \n", __FUNCTION__);
146 | //     // }
147 | //     // __syncwarp(FULL_WARP_MASK);
148 | //     active.sync();
149 | //     // curandState state;
150 | //     // paster(current_itr);
151 | //     uint target_size = result.hops[current_itr + 1];
152 | //     if ((target_size > 0) && (target_size < src_degree)) {
153 | //       int itr = 0;
154 | //       __shared__ uint sizes[WARP_PER_BLK];
155 | //       uint *local_size = sizes + WID;
156 | //       if (LID == 0)
157 | //         *local_size = 0;
158 | //       // __syncwarp(FULL_WARP_MASK);
159 | //       // if (LID == 0) {
160 | //       //   paster(*local_size);
161 | //       //   paster(target_size);
162 | //       // }
163 | //       // __syncwarp(FULL_WARP_MASK);
164 | //       active.sync();
165 | //       active_size(__LINE__);
166 | //       while (*local_size < target_size) {
167 | //         active_size(__LINE__);
168 | //         for (size_t i = *local_size + LID;
169 | //              i < 32 * (target_size / 32 + 1); // 32 * (target_size / 32 + 1)
170 | //              i += 32) {
171 | //           active_size(__LINE__);
172 | //           roll_once(array, local_size, state, target_size, result);
173 | //         }
174 | //         // __syncwarp(FULL_WARP_MASK);
175 | //         active.sync();
176 | //         itr++;
177 | //         if (itr > 10) {
178 | //           break;
179 | //         }
180 | //       }
181 | //       active.sync();
182 | //     } else if (target_size >= src_degree) {
183 | //       for (size_t i = LID; i < src_degree; i += 32) {
184 | //         result.AddActive(current_itr, array, ggraph->getOutNode(src_id, i));
185 | //       }
186 | //     }
187 | //   }
188 | 
189 | //   __device__ void roll_once(T *array, uint *local_size,
190 | //                             curandState *local_state, size_t target_size,
191 | //                             sample_result result) {
192 | //     if (LID == 0)
193 | //       printf("%s \n", __FUNCTION__);
194 | //     int col = (int)floor(curand_uniform(local_state) * size);
195 | //     float p = curand_uniform(local_state);
196 | //     uint candidate;
197 | //     if (p < prob[col])
198 | //       candidate = col;
199 | //     else
200 | //       candidate = alias[col];
201 | //     unsigned short int updated = true;
202 | //     // if (src_degree <= ELE_PER_WARP)
203 | //     //   updated = atomicCAS(&selected[candidate], (unsigned short int)0,
204 | //     //                       (unsigned short int)1);
205 | //     // else {
206 | //     //   updated = atomicCAS(&selected_high_degree[candidate],
207 | //     //                       (unsigned short int)0, (unsigned short int)1);
208 | //     // }
209 | //     if (!updated) {
210 | //       if (AddTillSize(local_size, target_size)) {
211 | //         result.AddActive(current_itr, array,
212 | //                          ggraph->getOutNode(src_id, candidate));
213 | //       }
214 | //       // return true;
215 | //     }
216 | //     // else
217 | //     //   return false;
218 | //   }
219 | // };


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
  1 | ###
  2 |  # @Description: 
  3 |  # @Date: 2020-11-17 13:39:45
  4 |  # @LastEditors: PengyuWang
  5 |  # @LastEditTime: 2021-01-12 20:16:14
  6 |  # @FilePath: /skywalker/scripts/test.sh
  7 | ### 
  8 | DATA=(web-Google lj orkut arabic-2005 uk-2005  sk-2005 friendster) # uk-union rmat29 web-ClueWeb09) eu-2015-host-nat twitter-2010
  9 | HD=(0.25          0.5  1     0.25        0.25      0.5           1) # uk-union rmat29 web-ClueWeb09)
 10 | NV=(916428    4847571 3072627  39459923   22744077     50636151 124836180)
 11 | # HD=(4             2   1     4         4       2           1) # uk-union rmat29 web-ClueWeb09)
 12 | 
 13 | # DATA=( sk-2005 friendster) 
 14 | # HD=(   4  1 )
 15 | ITR=1
 16 | NG=4
 17 | 
 18 | GR=".w.gr"
 19 | EXE="./bin/main" #main_degree
 20 | SG="--ngpu=1 --s"
 21 | 
 22 | # node2vec always online
 23 | # export OMP_PROC_BIND=TRUE
 24 | # GOMP_CPU_AFFINITY="0-9 10-19 20-29 30-99"
 25 | # OMP_PLACES=cores
 26 | # OMP_PROC_BIND=close
 27 | 
 28 | # correct one
 29 | # OMP_PLACES=cores OMP_PROC_BIND=spread
 30 | 
 31 | # --randomweight=1 --weightrange=2 
 32 | 
 33 | 
 34 | # walker
 35 | # echo "-------------------------------------------------------unbias rw 100"
 36 | # for idx in $(seq 1 ${#DATA[*]}) 
 37 | # do
 38 | #     ./bin/main --rw=1 --k 1 --d 100 --ol=1 --bias=0  --input ~/data/${DATA[idx-1]}${GR} -v --ngpu 1 --full --umresult 1 --umbuf 1 
 39 | # done
 40 | 
 41 | # walker
 42 | # echo "-------------------------------------------------------offline ppr 0.15 4k"
 43 | # for idx in $(seq 1 ${#DATA[*]}) 
 44 | # do
 45 | #     ./bin/main --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]} -bias=0 --rw=1  --n=40000 --k 1 --d 100  --tp=0.15 --ngpu 1 --umgraph=0  --umresult=0 --umbuf=0  --weight=0
 46 | # done
 47 | 
 48 | 
 49 | # echo "-------------------------------------------------------online layer sampling 4k 100"
 50 | # for idx in $(seq 1 ${#DATA[*]}) 
 51 | # do
 52 | #     ./bin/main --rw=0 --k 1 --d 100 --ol=1 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]}
 53 | # done
 54 | 
 55 | 
 56 | # echo "-------------------------------------------------------online walkload 4k---------------------"
 57 | # echo "-------------------------------------------------------online walk 4k 100"
 58 | # for idx in $(seq 1 ${#DATA[*]}) 
 59 | # do
 60 | #     for i in $(seq 1  ${ITR})
 61 | #     do
 62 | #         ./bin/main --k 1 --d 100 --rw=1 --ol=1  --n=4000 --input ~/data/${DATA[idx-1]}${GR} ${SG} -v
 63 | #     done
 64 | # done
 65 | 
 66 | # echo "-------------------------------------------------------online ppr 0.15"
 67 | # for idx in $(seq 1 ${#DATA[*]}) 
 68 | # do
 69 | #     ./bin/main  -bias=1 --rw=1 --ol=1 --n=4000 --k 1 --d 100  --tp=0.15  --input ~/data/${DATA[idx-1]}${GR} ${SG} --hd=${HD[idx-1]}
 70 | # done
 71 | 
 72 | # echo "-------------------------------------------------------online sample 4k 20,2"
 73 | # for idx in $(seq 1 ${#DATA[*]}) 
 74 | # do
 75 | #     for i in $(seq 1  ${ITR})
 76 | #     do
 77 | #         ./bin/main --k 20 --d 2 --ol=1 --n=4000 --input ~/data/${DATA[idx-1]}${GR} ${SG} --hd=${HD[idx-1]} 
 78 | #     done
 79 | # done
 80 | 
 81 | # echo "-------------------------------------------------------online node2vec 4000"
 82 | # for idx in $(seq 1 ${#DATA[*]}) 
 83 | # do
 84 | #     ./bin/node2vec --node2vec --ol=1  --bias=1  --d 100 --n=4000 --ngpu=4 --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]}
 85 | # done
 86 | 
 87 | # echo "-------------------------------------------------------online sage 4k 25,10"
 88 | # for idx in $(seq 1 ${#DATA[*]}) 
 89 | # do
 90 | #     ./bin/main --sage=1 --ol=1 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]}
 91 | # done
 92 | 
 93 | 
 94 | # ----------------------
 95 | 
 96 | # echo "-------------------------------------------------------offline table"
 97 | # for idx in $(seq 1 ${#DATA[*]}) 
 98 | # do
 99 | #     for i in $(seq 1  ${ITR})
100 | #     do
101 | #         ./bin/main  --ol=0 --ngpu=1 --s --rw=1 --k=1 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} 
102 | #     done
103 | # done
104 | 
105 | 
106 | # echo "---------------------------------scale ------------------------------"
107 | 
108 | # echo "-------------------------------------------------------online node2vec"
109 | # for idx in $(seq 1 ${#DATA[*]}) 
110 | # do
111 | #     for i in $(seq 1  ${NG})
112 | #     do
113 | #         ./bin/node2vec --node2vec --ol=1  --bias=1  --d 100 --n=400000  --input ~/data/${DATA[idx-1]}${GR} --ngpu=$i  --s --hd=${HD[idx-1]}
114 | #     done
115 | # done
116 | 
117 | 
118 | 
119 | # echo "-------------------------------------------------------unbias rw 100"
120 | # for idx in $(seq 1 ${#DATA[*]}) 
121 | # do
122 | #     ./bin/main --rw=1 --k 1 --d 100 --ol=1 --bias=0  --input ~/data/${DATA[idx-1]}${GR} -v --ngpu 1 --full --umresult 1 --umbuf 1 
123 | # done
124 | 
125 | # echo "-------------------------------------------------------unbias rw 100"
126 | # for idx in $(seq 1 ${#DATA[*]}) 
127 | # do
128 | #     for i in $(seq 1  ${NG})
129 | #     do
130 | #         ./bin/main  --rw=1 --k 1 --d 100 --bias=0   --ol=0 --n=400000  --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]} --ngpu=$i --s
131 | #     done
132 | # done
133 | 
134 | # echo "-------------------------------------------------------offline ppr 0.15"
135 | # for idx in $(seq 1 ${#DATA[*]}) 
136 | # do
137 | #     for i in $(seq 1  ${NG})
138 | #     do
139 | #         ./bin/main  -bias=1 --rw=1 --ol=0 --n=400000 --k 1 --d 100  --tp=0.15   --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]} --ngpu=$i --s
140 | #     done
141 | # done
142 | 
143 | # echo "comparing with csaw"
144 | # echo "-------------------------------------------------------offline walk  100"
145 | # for idx in $(seq 1 ${#DATA[*]}) 
146 | # do
147 | #     for i in $(seq 1  ${NG})
148 | #     do
149 | #         ./bin/main --k 1 --d 100 --rw=1 --ol=0 --n=400000 --input ~/data/${DATA[idx-1]}${GR} --hd=${HD[idx-1]} --ngpu=$i --s
150 | #     done
151 | # done
152 | 
153 | # echo "-------------------------------------------------------offline sample  20,2"
154 | # for idx in $(seq 1 ${#DATA[*]}) 
155 | # do
156 | #     for i in $(seq 1  ${NG})
157 | #     do
158 | #         ./bin/main --k 20 --d 2 --ol=0 --n=400000 --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]} --ngpu=$i --s
159 | #     done
160 | # done
161 | 
162 | # echo "-------------------------------------------------------offline sample 40k 2,2"
163 | # for idx in $(seq 1 ${#DATA[*]}) 
164 | # do
165 | #     for i in $(seq 1  ${NG})
166 | #     do
167 | #         ./bin/main --k 2 --d 2 --ol=0 --n=40000 --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]} --ngpu=$i --s
168 | #     done
169 | # done
170 | 
171 | # echo "-------------------------------------------------------offline sample 40k 2,2"
172 | # for idx in $(seq 1 ${#DATA[*]}) 
173 | # do
174 | #     ./bin/main --k 2 --d 2 --ol=0 --n=40000 --input ~/data/${DATA[idx-1]}${GR}  --hd=${HD[idx-1]} --ngpu=4  --bias=1
175 | # done
176 | 
177 | # echo "-------------------------------------------------------offline rw |V| 100. no weight"
178 | # for idx in $(seq 1 ${#DATA[*]}) 
179 | # do
180 | #     ./bin/main --rw=1 --k 1 --d 100 --ol=1 --bias=0  --full --input ~/data/${DATA[idx-1]}${GR}
181 | # done
182 | 
183 | 
184 | # echo "-------------------------------------------------------offline sample |V| 2,2"
185 | # for idx in $(seq 1 ${#DATA[*]}) 
186 | # do
187 | #     ./bin/main --rw=0 --k 2 --d 2 --ol=0 --input ~/data/${DATA[idx-1]}${GR}
188 | # done
189 | 
190 | # echo "-------------------------------------------------------offline rw 4k 100"
191 | # for idx in $(seq 1 ${#DATA[*]}) 
192 | # do
193 | #     for ng in $(seq 1 4)
194 | #     do
195 | #         ./bin/main --rw=1 --k 1 --d 100 --ol=0  --input ~/data/${DATA[idx-1]}${GR} --ngpu=${ng} --hd=${HD[idx-1]} --n=40000
196 | #     done
197 | # done
198 | 
199 | # echo "-------------------------------------------------------offline  4k 10 10 "
200 | # for idx in $(seq 1 ${#DATA[*]}) 
201 | # do
202 | #     for ng in $(seq 1 4)
203 | #     do
204 | #         ./bin/main --rw=0 --k 10 --d 2 --ol=0  --input ~/data/${DATA[idx-1]}${GR} --ngpu=${ng} --hd=${HD[idx-1]} --n=4000
205 | #     done
206 | # done
207 | 
208 | 
209 | # ///////////////////
210 | # |V| hard
211 | # echo "-------------------------------------------------------offline rw |V| 100"
212 | # for idx in $(seq 1 ${#DATA[*]}) 
213 | # do
214 | #     ./bin/main --rw=1 --k 1 --d 100 --ol=0 --full --input ~/data/${DATA[idx-1]}${GR}
215 | # done
216 | 
217 | 
218 | idx=1
219 | # echo "-------------------------------------------------------online walk for table time"
220 | # for i in $(seq 1  10)
221 | # do
222 | #     val=`expr ${NV[${idx}-1]} / 100 \* ${i} \* 1`
223 | #     echo "----------${val}"
224 | #     ./bin/main --k 1 --d 100 --rw=1 --ol=1  --n=${val} --input ~/data/${DATA[idx-1]}${GR} ${SG}  
225 | # done
226 | 
227 | echo "-------------------------------------------------------offline walk for table time"
228 | for i in $(seq 1  10)
229 | do
230 |     val=`expr ${NV[$idx-1]} / 100 \* ${i} \* 1`
231 |     echo "----------${val}"
232 |     ./bin/main --k 1 --d 100 --rw=1 --ol=0  --n=${val} --input ~/data/${DATA[idx-1]}${GR} ${SG}  
233 | done
234 | 


--------------------------------------------------------------------------------
/src/offline_walk.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * @Description: just perform RW
  3 |  * @Date: 2020-11-30 14:30:06
  4 |  * @LastEditors: Pengyu Wang
  5 |  * @LastEditTime: 2022-03-03 22:46:47
  6 |  * @FilePath: /skywalker/src/offline_walk.cu
  7 |  */
  8 | #include "app.cuh"
  9 | 
 10 | __global__ void sample_kernel_static_buffer(Walker *walker) {
 11 |   Jobs_result<JobType::RW, uint> &result = walker->result;
 12 |   gpu_graph *graph = &walker->ggraph;
 13 |   curandState state;
 14 |   curand_init(TID, 0, 0, &state);
 15 |   __shared__ matrixBuffer<BLOCK_SIZE, 31, uint> buffer;
 16 |   buffer.Init();
 17 | 
 18 |   size_t idx_i = TID;
 19 |   if (idx_i < result.size) {
 20 |     result.length[idx_i] = result.hop_num - 1;
 21 |     uint src_id;
 22 |     // bool alive = true;
 23 |     coalesced_group warp = coalesced_threads();
 24 |     for (uint current_itr = 0; current_itr < result.hop_num - 1;
 25 |          current_itr++) {
 26 |       // coalesced_group active = coalesced_threads();
 27 |       if (result.alive[idx_i] != 0) {
 28 |         Vector_virtual<uint> alias;
 29 |         Vector_virtual<float> prob;
 30 |         src_id = current_itr == 0 ? result.GetData(current_itr, idx_i) : src_id;
 31 |         uint src_degree = graph->getDegree((uint)src_id);
 32 |         alias.Construt(
 33 |             graph->alias_array + graph->xadj[src_id] - graph->local_vtx_offset,
 34 |             src_degree);
 35 |         prob.Construt(
 36 |             graph->prob_array + graph->xadj[src_id] - graph->local_vtx_offset,
 37 |             src_degree);
 38 |         alias.Init(src_degree);
 39 |         prob.Init(src_degree);
 40 |         const uint target_size = 1;
 41 | 
 42 |         if (target_size < src_degree) {
 43 |           int col = (int)floor(curand_uniform(&state) * src_degree);
 44 |           float p = curand_uniform(&state);
 45 |           uint candidate;
 46 |           if (p < prob[col])
 47 |             candidate = col;
 48 |           else
 49 |             candidate = alias[col];
 50 |           uint next_src = graph->getOutNode(src_id, candidate);
 51 |           // if (idx_i == 1) printf("%u adding1 %u \n", idx_i, next_src);
 52 |           buffer.Set(next_src);
 53 |           src_id = next_src;
 54 |         } else if (src_degree == 0) {
 55 |           result.alive[idx_i] = 0;
 56 |           result.length[idx_i] = current_itr;
 57 |           // buffer.Finish();
 58 |           // return;
 59 |         } else {
 60 |           uint next_src = graph->getOutNode(src_id, 0);
 61 |           buffer.Set(next_src);
 62 |           src_id = next_src;
 63 |           // if (idx_i == 1) printf("%u adding %u \n", idx_i, next_src);
 64 |         }
 65 |       }
 66 |       warp.sync();
 67 |       buffer.CheckFlush(result.data + result.hop_num * idx_i, current_itr,
 68 |                         warp);
 69 |     }
 70 |     warp.sync();
 71 |     buffer.Flush(result.data + result.hop_num * idx_i, 0, warp);
 72 |   }
 73 | }
 74 | // 48 kb , 404 per sampler
 75 | __global__ void sample_kernel_static(Walker *walker) {
 76 |   Jobs_result<JobType::RW, uint> &result = walker->result;
 77 |   gpu_graph *graph = &walker->ggraph;
 78 |   curandState state;
 79 |   curand_init(TID, 0, 0, &state);
 80 | 
 81 |   size_t idx_i = TID;
 82 |   if (idx_i < result.size) {
 83 |     result.length[idx_i] = result.hop_num - 1;
 84 |     for (uint current_itr = 0; current_itr < result.hop_num - 1;
 85 |          current_itr++) {
 86 |       if (result.alive[idx_i] != 0) {
 87 |         Vector_virtual<uint> alias;
 88 |         Vector_virtual<float> prob;
 89 |         uint src_id = result.GetData(current_itr, idx_i);
 90 |         uint src_degree = graph->getDegree((uint)src_id);
 91 |         alias.Construt(
 92 |             graph->alias_array + graph->xadj[src_id] - graph->local_vtx_offset,
 93 |             src_degree);
 94 |         prob.Construt(
 95 |             graph->prob_array + graph->xadj[src_id] - graph->local_vtx_offset,
 96 |             src_degree);
 97 |         alias.Init(src_degree);
 98 |         prob.Init(src_degree);
 99 |         const uint target_size = 1;
100 |         if (target_size < src_degree) {
101 |           //   int itr = 0;
102 |           // for (size_t i = 0; i < target_size; i++) {
103 |           int col = (int)floor(curand_uniform(&state) * src_degree);
104 |           float p = curand_uniform(&state);
105 |           uint candidate;
106 |           if (p < prob[col])
107 |             candidate = col;
108 |           else
109 |             candidate = alias[col];
110 |           *result.GetDataPtr(current_itr + 1, idx_i) =
111 |               graph->getOutNode(src_id, candidate);
112 |           // }
113 |         } else if (src_degree == 0) {
114 |           result.alive[idx_i] = 0;
115 |           result.length[idx_i] = current_itr;
116 |           break;
117 |         } else {
118 |           *result.GetDataPtr(current_itr + 1, idx_i) =
119 |               graph->getOutNode(src_id, 0);
120 |         }
121 |       }
122 |     }
123 |   }
124 | }
125 | 
126 | __global__ void sample_kernel(Walker *walker) {
127 |   Jobs_result<JobType::RW, uint> &result = walker->result;
128 |   gpu_graph *graph = &walker->ggraph;
129 |   curandState state;
130 |   curand_init(TID, 0, 0, &state);
131 | 
132 |   for (size_t idx_i = TID; idx_i < result.size;
133 |        idx_i += gridDim.x * blockDim.x) {
134 |     result.length[idx_i] = result.hop_num - 1;
135 |     for (uint current_itr = 0; current_itr < result.hop_num - 1;
136 |          current_itr++) {
137 |       if (result.alive[idx_i] != 0) {
138 |         Vector_virtual<uint> alias;
139 |         Vector_virtual<float> prob;
140 |         uint src_id = result.GetData(current_itr, idx_i);
141 |         uint src_degree = graph->getDegree((uint)src_id);
142 |         alias.Construt(
143 |             graph->alias_array + graph->xadj[src_id] - graph->local_vtx_offset,
144 |             src_degree);
145 |         prob.Construt(
146 |             graph->prob_array + graph->xadj[src_id] - graph->local_vtx_offset,
147 |             src_degree);
148 |         alias.Init(src_degree);
149 |         prob.Init(src_degree);
150 |         const uint target_size = 1;
151 |         if (target_size < src_degree) {
152 |           //   int itr = 0;
153 |           // for (size_t i = 0; i < target_size; i++) {
154 |           int col = (int)floor(curand_uniform(&state) * src_degree);
155 |           float p = curand_uniform(&state);
156 |           uint candidate;
157 |           if (p < prob[col])
158 |             candidate = col;
159 |           else
160 |             candidate = alias[col];
161 |           *result.GetDataPtr(current_itr + 1, idx_i) =
162 |               graph->getOutNode(src_id, candidate);
163 |           // }
164 |         } else if (src_degree == 0) {
165 |           result.alive[idx_i] = 0;
166 |           result.length[idx_i] = current_itr;
167 |           break;
168 |         } else {
169 |           *result.GetDataPtr(current_itr + 1, idx_i) =
170 |               graph->getOutNode(src_id, 0);
171 |         }
172 |       }
173 |     }
174 |   }
175 | }
176 | 
177 | static __global__ void print_result(Walker *walker) {
178 |   walker->result.PrintResult();
179 | }
180 | 
181 | float OfflineWalk(Walker &walker) {
182 |   LOG("%s\n", __FUNCTION__);
183 |   int device;
184 |   cudaDeviceProp prop;
185 |   cudaGetDevice(&device);
186 |   cudaGetDeviceProperties(&prop, device);
187 |   int n_sm = prop.multiProcessorCount;
188 | 
189 |   Walker *sampler_ptr;
190 |   MyCudaMalloc(&sampler_ptr, sizeof(Walker));
191 |   CUDA_RT_CALL(
192 |       cudaMemcpy(sampler_ptr, &walker, sizeof(Walker), cudaMemcpyHostToDevice));
193 |   double start_time, total_time;
194 |   // init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr,true);
195 |   BindResultKernel<<<1, 32, 0, 0>>>(sampler_ptr);
196 |   // allocate global buffer
197 |   int block_num = n_sm * FLAGS_m;
198 |   CUDA_RT_CALL(cudaDeviceSynchronize());
199 |   CUDA_RT_CALL(cudaPeekAtLastError());
200 |   start_time = wtime();
201 | #ifdef check
202 |   sample_kernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr);
203 | #else
204 |   if (FLAGS_static) {
205 |     if (FLAGS_buffer)
206 |       // sample_kernel_static_buffer<<<1, 32, 0, 0>>>(sampler_ptr);
207 |       sample_kernel_static_buffer<<<walker.num_seed / BLOCK_SIZE + 1,
208 |                                     BLOCK_SIZE, 0, 0>>>(sampler_ptr);
209 |     else
210 |       sample_kernel_static<<<walker.num_seed / BLOCK_SIZE + 1, BLOCK_SIZE, 0,
211 |                              0>>>(sampler_ptr);
212 |   }
213 | 
214 |   else
215 |     sample_kernel<<<block_num, BLOCK_SIZE, 0, 0>>>(sampler_ptr);
216 | #endif
217 |   CUDA_RT_CALL(cudaDeviceSynchronize());
218 |   // CUDA_RT_CALL(cudaPeekAtLastError());
219 |   total_time = wtime() - start_time;
220 | #pragma omp barrier
221 |   LOG("Device %d sampling time:\t%.6f ratio:\t %.2f MSEPS\n",
222 |       omp_get_thread_num(), total_time,
223 |       static_cast<float>(walker.result.GetSampledNumber() / total_time /
224 |                          1000000));
225 |   walker.sampled_edges = walker.result.GetSampledNumber();
226 |   LOG("sampled_edges %d\n", walker.sampled_edges);
227 |   if (FLAGS_printresult) print_result<<<1, 32, 0, 0>>>(sampler_ptr);
228 |   CUDA_RT_CALL(cudaDeviceSynchronize());
229 |   return total_time;
230 | }
231 | 


--------------------------------------------------------------------------------
/old/shmem/alias_table.cuh:
--------------------------------------------------------------------------------
  1 | #include "util.cuh"
  2 | #include "vec.cuh"
  3 | 
  4 | #define verbose
  5 | 
  6 | template <typename T> struct alias_table;
  7 | 
  8 | __global__ void load_id_weight();
  9 | inline __device__ char char_atomicCAS(char *addr, char cmp, char val) {
 10 |   unsigned *al_addr = reinterpret_cast<unsigned *>(((unsigned long long)addr) &
 11 |                                                    (0xFFFFFFFFFFFFFFFCULL));
 12 |   unsigned al_offset = ((unsigned)(((unsigned long long)addr) & 3)) * 8;
 13 |   unsigned mask = 0xFFU;
 14 |   mask <<= al_offset;
 15 |   mask = ~mask;
 16 |   unsigned sval = val;
 17 |   sval <<= al_offset;
 18 |   unsigned old = *al_addr, assumed, setval;
 19 |   do {
 20 |     assumed = old;
 21 |     setval = assumed & mask;
 22 |     setval |= sval;
 23 |     old = atomicCAS(al_addr, assumed, setval);
 24 |   } while (assumed != old);
 25 |   return (char)((assumed >> al_offset) & 0xFFU);
 26 | }
 27 | 
 28 | template <typename T>
 29 | __device__ void AddTillSize(T *array, uint32_t *size, T t, u64 target_size) {
 30 |   u64 old = atomicAdd(size, 1);
 31 |   if (old < target_size) {
 32 |     array[old] = t;
 33 |   } else
 34 |     printf("wtf vector overflow");
 35 | }
 36 | 
 37 | template <typename T> struct alias_table_constructor_shmem {
 38 | 
 39 |   // u64 degree;
 40 |   u64 size;
 41 |   float weight_sum;
 42 |   T *ids;
 43 |   float *weights;
 44 | 
 45 |   Vector_shmem<T> large;
 46 |   Vector_shmem<T> small;
 47 |   Vector_shmem<T> alias;
 48 |   Vector_shmem<float> prob;
 49 | 
 50 |   // to roll
 51 |   Vector_shmem<char> selected;
 52 |   //   Vector_shmem<T> result;
 53 | 
 54 |   // __host__ __device__ u64 &Degree() { return degree; }
 55 |   __host__ __device__ u64 &Size() { return size; }
 56 |   __device__ void load(T *_ids, float *_weights, size_t _size) {
 57 |     if (LID == 0) {
 58 |       size = _size;
 59 |       ids = _ids;
 60 |       weights = _weights;
 61 |     }
 62 |     float local_sum = 0.0, tmp;
 63 |     for (size_t i = LID; i < size; i += 32) {
 64 |       local_sum += _weights[i];
 65 |     }
 66 |     tmp = warpReduce<float>(local_sum, LID);
 67 |     // #ifdef verbose
 68 |     //     if (LID == 0) {
 69 |     //       weight_sum = tmp;
 70 |     //       printf("sum: %f\n", tmp);
 71 |     //     }
 72 |     // #endif // verbose
 73 |     normalize();
 74 |   }
 75 |   __device__ void Init() {
 76 |     large.Init();
 77 |     small.Init();
 78 |     alias.Init(Size());
 79 |     prob.Init(Size());
 80 |     selected.Init();
 81 |   }
 82 |   __device__ void normalize() {
 83 |     float scale = size / weight_sum;
 84 |     for (size_t i = LID; i < size; i += 32) {
 85 |       prob[i] = weights[i] * scale;
 86 |     }
 87 |   }
 88 |   __device__ void Clean() {
 89 |     if (LID == 0) {
 90 |       large.Clean();
 91 |       small.Clean();
 92 |       alias.Clean();
 93 |       prob.Clean();
 94 |       selected.Clean();
 95 |     }
 96 |   }
 97 |   __device__ void roll_atomic(Vector<T> v, int count) {
 98 |     curandState state;
 99 |     int itr = 1;
100 |     while (v.Size() < count) {
101 |       for (size_t i = v.Size() + LID; i < count; i += 32) {
102 |         curand_init((unsigned long long)clock() + TID, 0, 0, &state);
103 |         roll_once(v, state, count);
104 |       }
105 |       // break;
106 |       itr++;
107 |       if (itr > 10)
108 |         break;
109 |       // if (LID == 0)
110 |       //   printf("v.Size() %d count %d\n", v.Size(), count);
111 |     }
112 |     if (LID == 0) {
113 |       printf("itr: %d till done\n", itr);
114 |     }
115 |   }
116 |   __device__ void roll_atomic(T *array, int count) {
117 |     curandState state;
118 |     int itr = 1;
119 |     __shared__ uint32_t sizes[WARP_PER_SM];
120 |     uint32_t *local_size = &sizes[WID];
121 |     if (LID == 0)
122 |       *local_size = 0;
123 |     while (*local_size < count) {
124 |       for (size_t i = *local_size + LID; i < count; i += 32) {
125 |         curand_init((unsigned long long)clock() + TID, 0, 0, &state);
126 |         roll_once(array, local_size, state, count);
127 |       }
128 |       itr++;
129 |       if (itr > 10)
130 |         break;
131 |     }
132 |     if (LID == 0) {
133 |       printf("itr: %d till done\n", itr);
134 |     }
135 |   }
136 | 
137 |   __device__ void roll(Vector<T> v, int count, size_t target_size) {
138 |     curandState state;
139 |     for (size_t i = LID; i < count; i += 32) {
140 |       curand_init((unsigned long long)clock() + TID, 0, 0, &state);
141 |       bool suc = roll_once(v, state);
142 |       int itr = 1;
143 |       while (!suc) {
144 |         curand_init((unsigned long long)clock() + TID, 0, 0, &state);
145 |         // suc = roll_once(v, state);
146 |         suc = roll_once(v, state, count);
147 |         itr++;
148 |         if (itr > 100)
149 |           return;
150 |       }
151 |       // if (LID==0)
152 |       // {
153 |       //   printf("itr: %d till done\n",itr);
154 |       // }
155 |     }
156 |   }
157 |   __device__ bool roll_once(T *array, uint32_t *local_size,
158 |                             curandState local_state, size_t target_size) {
159 | 
160 |     int col = (int)floor(curand_uniform(&local_state) * size);
161 |     float p = curand_uniform(&local_state);
162 |     // printf("tid %d col %d p %f\n", LID, col, p);
163 |     int candidate;
164 |     if (p < prob[col]) {
165 |       candidate = col;
166 |     } else {
167 |       candidate = alias[col];
168 |     }
169 |     char updated = char_atomicCAS(&selected[candidate], 0, 1);
170 |     if (!updated) {
171 |       // v.add(candidate);
172 |       AddTillSize(array, local_size, candidate, target_size);
173 |       // printf("tid %d suc sampled %d\n",LID, candidate);
174 |       return true;
175 |     } else
176 |       return false;
177 |   }
178 |   __device__ bool roll_once(Vector<T> v, curandState local_state,
179 |                             size_t target_size) {
180 | 
181 |     int col = (int)floor(curand_uniform(&local_state) * size);
182 |     float p = curand_uniform(&local_state);
183 |     // printf("tid %d col %d p %f\n", LID, col, p);
184 |     int candidate;
185 |     if (p < prob[col]) {
186 |       candidate = col;
187 |     } else {
188 |       candidate = alias[col];
189 |     }
190 |     char updated = char_atomicCAS(&selected[candidate], 0, 1);
191 |     if (!updated) {
192 |       // v.add(candidate);
193 |       v.AddTillSize(candidate, target_size);
194 |       // printf("tid %d suc sampled %d\n",LID, candidate);
195 |       return true;
196 |     } else
197 |       return false;
198 |   }
199 |   __device__ void construct() {
200 |     for (size_t i = LID; i < size; i += 32) {
201 |       if (prob[i] > 1)
202 |         large.Add(i);
203 |       else
204 |         small.Add(i);
205 |     }
206 |     active_size(__LINE__);
207 |     if (LID == 0) {
208 |       printf("large: ");
209 |       printD(large.data, large.size);
210 |       printf("small: ");
211 |       printD(small.data, small.size);
212 |       printf("prob: ");
213 |       printD(prob.data, prob.size);
214 |       printf("alias: ");
215 |       printD(alias.data, alias.size);
216 |     }
217 |     int itr = 0;
218 |     if (LID == 0) {
219 |       prob.size = size;
220 |       alias.size = size;
221 |     }
222 |     while (!small.Empty() && !large.Empty()) {
223 | 
224 |       int old_small_id = small.size - LID - 1;
225 |       int old_small_size = small.size;
226 |       // printf("old_small_id %d\n", old_small_id);
227 |       if (old_small_id >= 0) {
228 |         active_size(__LINE__);
229 |         if (LID == 0) {
230 |           small.size -= MIN(small.size, 32);
231 |         }
232 |         T smallV = small[old_small_id];
233 |         int res = old_small_id % large.size;
234 |         // bool holder = (old_small_id / large.size == 0);
235 |         bool holder = (LID < MIN(large.size, 32)) ? true : false;
236 | 
237 |         T largeV = large[large.size - res - 1];  //large.size cloud error
238 |         // printf("lid %d largeV %d  smallV %d holder %d\n", LID, largeV,
239 |         // smallV,
240 |         //        holder);
241 |         if (LID == 0) {
242 |           large.size -= MIN(large.size, old_small_size);
243 |           // printf("large.size %d min %d\n", large.size,
244 |           //        MIN(large.size, old_small_size));
245 |         }
246 |         // todo how to ensure holder alwasy success??
247 |         float old;
248 |         if (holder)
249 |           old = atomicAdd(&prob[largeV], prob[smallV] - 1.0);
250 |         if (!holder)
251 |           old = atomicAdd(&prob[largeV], prob[smallV] - 1.0);
252 |         if (old + prob[smallV] - 1.0 >= 0) {
253 |           // printf("old - 1 + prob[smallV] %f\n ", old - 1.0 + prob[smallV]);
254 |           // prob[smallV] = weights[smallV];
255 |           alias[smallV] = largeV;
256 |           if (holder) {
257 |             if (prob[largeV] < 1)
258 |               small.Add(largeV);
259 |             else if (prob[largeV] > 1) {
260 |               // printf("add back %d %f\n", largeV, prob[largeV]);
261 |               large.Add(largeV);
262 |             }
263 |           }
264 |         } else {
265 |           atomicAdd(&prob[largeV], 1 - prob[smallV]);
266 |           small.Add(smallV);
267 |         }
268 |       }
269 |       // if (LID == 0) {
270 |       //   printf("itr: %d\n", itr++);
271 |       //   printf("large: ");
272 |       //   printD(large.data, large.size);
273 |       //   printf("small: ");
274 |       //   printD(small.data, small.size);
275 |       //   printf("prob: ");
276 |       //   printD(prob.data, prob.size);
277 |       //   printf("alias: ");
278 |       //   printD(alias.data, alias.size);
279 |       // }
280 |       // if (itr == 5)
281 |       // return;
282 |     }
283 |   }
284 | };
285 | 
286 | __global__ void shmem_kernel(int *ids, float *weights, size_t size, size_t num,
287 |                              Vector<int> out);
288 | 


--------------------------------------------------------------------------------
/include/frontier.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * @Date: 2022-03-10 14:04:55
  3 |  * @LastEditors: Pengyu Wang
  4 |  * @Description:
  5 |  * @FilePath: /skywalker/include/frontier.cuh
  6 |  * @LastEditTime: 2022-04-11 14:25:22
  7 |  */
  8 | 
  9 | #pragma once
 10 | 
 11 | #include "vec.cuh"
 12 | #define ADD_FRONTIER 1
 13 | 
 14 | // #define LOCALITY 1
 15 | 
 16 | #ifdef ADD_FRONTIER
 17 | template <typename T = uint>
 18 | struct sampleJob {
 19 |   uint instance_idx;
 20 |   uint offset;
 21 |   // uint itr;
 22 |   T src_id;
 23 |   int itr;
 24 |   bool val;
 25 | };
 26 | 
 27 | template <typename T = uint>
 28 | static __global__ void InitSampleFrontier(sampleJob<T> *data, uint *seed,
 29 |                                           uint size) {
 30 |   if (TID < size) {
 31 |     sampleJob<T> tmp = {TID, 0, seed[TID], 0, true};
 32 |     data[TID] = tmp;
 33 |   }
 34 | }
 35 | template <typename T = uint>
 36 | static __global__ void InitLocalitySampleFrontier(sampleJob<T> **data,
 37 |                                                   uint *seed, uint size,
 38 |                                                   uint vtx_per_bucket,
 39 |                                                   int *sizes) {
 40 |   if (TID < size) {
 41 |     sampleJob<T> tmp = {TID, 0, seed[TID], 0, true};
 42 |     uint bucket_idx = seed[TID] / vtx_per_bucket;
 43 |     size_t old = atomicAdd(&sizes[bucket_idx], 1);
 44 |     // assert(old < capacity[itr]); //change to ring buffer?
 45 |     data[bucket_idx][old] = tmp;
 46 |   }
 47 | }
 48 | template <typename T, uint bucket_num = 10>
 49 | struct LocalitySampleFrontier {
 50 |   // sampleJob<T> *data[bucket_num];
 51 |   sampleJob<T> **data, **data_h;
 52 |   int capacity;
 53 |   uint vtx_per_bucket;
 54 |   int *sizes;
 55 |   int *floor;
 56 |   int *focus;
 57 |   uint _bucket_num;
 58 |   uint size_per_bucket;
 59 |   bool finish;
 60 |   // int hop_num = depth;
 61 |   LocalitySampleFrontier() {}
 62 |   void Allocate(size_t _size, uint num_vtx) {
 63 |     _bucket_num = bucket_num;
 64 | 
 65 |     vtx_per_bucket = num_vtx / bucket_num + 1;
 66 | 
 67 |     assert(num_vtx != 0);
 68 |     assert(vtx_per_bucket != 0);
 69 |     capacity = _size;
 70 |     // CUDA_RT_CALL(MyCudaMalloc(&seed, capacity * sizeof(T)));
 71 |     uint length = 1;
 72 |     size_per_bucket =
 73 |         capacity * 26;  //  / bucket_num, hard to tell the buffer size
 74 |     // paster(size_per_bucket);
 75 |     // paster(bucket_num);
 76 |     data_h = new sampleJob<T> *[bucket_num];
 77 |     CUDA_RT_CALL(MyCudaMalloc(&data, bucket_num * sizeof(sampleJob<T> *)));
 78 | 
 79 |     // printf("%s:%d %s for %d\n", __FILE__, __LINE__, __FUNCTION__, 0);
 80 |     for (size_t i = 0; i < bucket_num; i++) {
 81 |       // capacity[0] *= hops[i];
 82 |       CUDA_RT_CALL(
 83 |           MyCudaMalloc(&data_h[i], size_per_bucket * sizeof(sampleJob<T>)));
 84 |     }
 85 |     LOG(" frontier overhead %d MB\n ",
 86 |         bucket_num * size_per_bucket * sizeof(sampleJob<T>) / 1024 / 1024);
 87 |     CUDA_RT_CALL(MyCudaMalloc(&sizes, bucket_num * sizeof(int)));
 88 |     CUDA_RT_CALL(MyCudaMalloc(&floor, bucket_num * sizeof(int)));
 89 |     CUDA_RT_CALL(MyCudaMalloc(&focus, sizeof(int)));
 90 | 
 91 |     CUDA_RT_CALL(cudaMemcpy(data, data_h, bucket_num * sizeof(sampleJob<T> *),
 92 |                             cudaMemcpyHostToDevice));
 93 |     // printf("%s:%d %s for %d\n", __FILE__, __LINE__, __FUNCTION__, 0);
 94 |   }
 95 |   __host__ void Free() {
 96 |     CUDA_RT_CALL(cudaFree(data));
 97 |     for (size_t i = 0; i < bucket_num; i++) CUDA_RT_CALL(cudaFree(data_h[i]));
 98 |     CUDA_RT_CALL(cudaFree(sizes));
 99 |     CUDA_RT_CALL(cudaFree(floor));
100 |     CUDA_RT_CALL(cudaFree(focus));
101 |   }
102 |   __host__ void Init(uint *seed, uint size) {
103 |     InitLocalitySampleFrontier<T>
104 |         <<<size / 1024 + 1, 1024>>>(data, seed, size, vtx_per_bucket, sizes);
105 |     // int tmp = size;
106 |     // CUDA_RT_CALL(cudaMemset(sizes, 0, bucket_num * sizeof(int)));
107 |     CUDA_RT_CALL(cudaMemset(floor, 0, bucket_num * sizeof(int)));
108 |     CUDA_RT_CALL(cudaMemset(focus, 0, sizeof(int)));
109 |     // CUDA_RT_CALL(
110 |     //     cudaMemcpy(&sizes[0], &tmp, sizeof(int), cudaMemcpyHostToDevice));
111 |   }
112 |   // __device__ void CheckActive(uint itr) {}
113 |   __forceinline__ __device__ void Add(uint instance_idx, uint offset, uint itr,
114 |                                       T src_id) {
115 |     assert(vtx_per_bucket != 0);
116 |     uint bucket_idx = src_id / (vtx_per_bucket);
117 | 
118 |     size_t old = atomicAdd(&sizes[bucket_idx], 1);
119 |     assert(old < size_per_bucket);  // change to ring buffer?
120 | 
121 |     sampleJob<T> tmp = {instance_idx, offset, src_id, itr, true};
122 |     data[bucket_idx][old] = tmp;
123 |   }
124 |   // __device__ void Reset(uint itr) { size[itr % 3] = 0; }
125 |   __device__ int Size(uint bucket_idx) { return sizes[bucket_idx]; }
126 | 
127 |   __device__ sampleJob<T> Get(uint bucket_idx, uint idx) {
128 |     return data[bucket_idx][idx];
129 |   }
130 | 
131 |   __forceinline__ __device__ bool checkFocus(int idx) {
132 |     if (floor[idx] < sizes[idx]) {
133 |       //   if (!LTID)
134 |       //     printf(" idx %d floor[idx] %d  sizes[idx] %d  focus %d \n", idx,
135 |       //            floor[idx], sizes[idx], *focus);
136 |       return true;
137 |     } else
138 |       return false;
139 |   }
140 |   __device__ void printSize() {
141 |     if (!TID) {
142 |       //   printf("frontier size:\n");
143 |       for (int i = 0; i < bucket_num; i++) {
144 |         // if (sizes[i] != floor[i])
145 |         printf(" frontier   depth %d size %d floor %d\n", i, sizes[i],
146 |                floor[i]);
147 |       }
148 |     }
149 |   }
150 | 
151 |   __forceinline__ __device__ bool needWork() {
152 |     // if (!LTID) printf(" block %d checking\n", blockIdx.x);
153 | 
154 |     for (int i = 0; i < bucket_num; i++) {
155 |       if (checkFocus(i)) return true;
156 |     }
157 |     return false;
158 |     // } else
159 |   }
160 |   __forceinline__ __device__ void nextFocus(int current_focus) {
161 |     for (size_t i = 1; i < bucket_num; i++) {
162 |       int tmp = (current_focus + 1) % bucket_num;
163 |       if (checkFocus(tmp)) {
164 |         // CAS?
165 |         int old = atomicCAS(focus, current_focus, tmp);
166 |         // return tmp;
167 |       }
168 |     }
169 |   }
170 | 
171 |   __forceinline__ __device__ sampleJob<T> requireOneJobFromBucket(
172 |       int bucket_idx) {
173 |     int old = atomicAdd(&floor[bucket_idx], 1);
174 |     // int old = atomicAggInc<int>(&floor[bucket_idx]);
175 |     if (old < sizes[bucket_idx]) {
176 |       return data[bucket_idx][old];
177 |     } else {
178 |       atomicSub(&floor[bucket_idx], 1);
179 |       sampleJob<T> tmp = {0, 0, 0, 0, false};
180 |       return tmp;
181 |     }
182 |   }
183 |   __forceinline__ __device__ sampleJob<T> requireOneJob() {
184 |     // printf("not implemented\n");
185 |     int current_focus = *focus;
186 |     if (!checkFocus(current_focus)) {
187 |       nextFocus(current_focus);
188 |     }
189 |     current_focus = *focus;
190 |     return requireOneJobFromBucket(current_focus);
191 |   }
192 | };
193 | 
194 | template <typename T, uint depth = 3>
195 | struct SampleFrontier {
196 |   sampleJob<T> *data[depth];
197 |   int capacity[depth];
198 |   int *sizes;
199 |   int *floor;
200 |   int hop_num = depth;
201 |   // T *seed;
202 | 
203 |   void Allocate(size_t _size, uint *hops, uint num_vtx = 0) {
204 |     capacity[0] = _size;
205 |     // CUDA_RT_CALL(MyCudaMalloc(&seed, capacity * sizeof(T)));
206 |     uint length = 1;
207 |     u64 l = 0;
208 |     for (size_t i = 0; i < depth; i++) {
209 |       // capacity[0] *= hops[i];
210 |       CUDA_RT_CALL(MyCudaMalloc(&data[i], capacity[i] * sizeof(sampleJob<T>)));
211 |       if (i + 1 < depth) capacity[i + 1] = capacity[i] * hops[i + 1];
212 |       l += capacity[i] * sizeof(sampleJob<T>);
213 |     }
214 |     CUDA_RT_CALL(MyCudaMalloc(&sizes, depth * sizeof(int)));
215 |     CUDA_RT_CALL(MyCudaMalloc(&floor, depth * sizeof(int)));
216 |     // printf("%s:%d %s for %d\n", __FILE__, __LINE__, __FUNCTION__, 0);
217 |     LOG(" frontier overhead %d MB\n ", l / 1024 / 1024);
218 |   }
219 |   __device__ void printSize() {
220 |     if (!TID) {
221 |       printf("frontier size:\n");
222 |       for (size_t i = 0; i < depth; i++) {
223 |         printf("    depth %d size %d floor %d\n", i, sizes[i], floor[i]);
224 |       }
225 |     }
226 |   }
227 |   __host__ void Init(uint *seed, uint size, uint vtx_per_bucket = 0) {
228 |     InitSampleFrontier<T><<<size / 1024 + 1, 1024>>>(data[0], seed, size);
229 |     int tmp = size;
230 |     CUDA_RT_CALL(cudaMemset(sizes, 0, depth * sizeof(int)));
231 |     CUDA_RT_CALL(cudaMemset(floor, 0, depth * sizeof(int)));
232 |     CUDA_RT_CALL(
233 |         cudaMemcpy(&sizes[0], &tmp, sizeof(int), cudaMemcpyHostToDevice));
234 |   }
235 |   // __device__ void CheckActive(uint itr) {}
236 |   __device__ void Add(uint instance_idx, uint offset, uint itr, T src_id) {
237 |     size_t old = atomicAdd(&sizes[itr], 1);
238 | #ifndef NDEBUG
239 |     if (old >= capacity[itr])
240 |       printf("%s:%d %s vec overflow capacity %u loc %llu\n", __FILE__, __LINE__,
241 |              __FUNCTION__, capacity[itr], (unsigned long long)old);
242 | #endif
243 |     assert(old < capacity[itr]);
244 |     sampleJob<T> tmp = {instance_idx, offset, src_id, 0, true};
245 |     data[itr][old] = tmp;
246 |   }
247 |   // __device__ void Reset(uint itr) { size[itr % 3] = 0; }
248 |   __device__ int Size(uint itr) { return sizes[itr]; }
249 |   __device__ sampleJob<T> Get(uint itr, uint idx) { return data[itr][idx]; }
250 |   __device__ sampleJob<T> requireOneJob(uint itr) {
251 |     int old = atomicAggInc(&floor[itr]);
252 |     // size_t old = atomicAdd(&floor[itr], 1);
253 |     if (old < sizes[itr]) {
254 |       return data[itr][old];
255 |     } else {
256 |       atomicSub(&floor[itr], 1);
257 |       sampleJob<T> tmp = {0, 0, 0, 0, false};
258 |       return tmp;
259 |     }
260 |   }
261 | };
262 | 
263 | #endif


--------------------------------------------------------------------------------
/src/online_sample_twc.cu:
--------------------------------------------------------------------------------
  1 | #include "app.cuh"
  2 | 
  3 | using block_table =
  4 |     alias_table_constructor_shmem<uint, thread_block, BufferType::GMEM>;
  5 | using warp_table = alias_table_constructor_shmem<uint, thread_block_tile<32>>;
  6 | using subwarp_table =
  7 |     alias_table_constructor_shmem<uint, thread_block_tile<SUBWARP_SIZE>,
  8 |                                   BufferType::SHMEM,
  9 |                                   AliasTableStorePolicy::NONE>;
 10 | 
 11 | static __device__ void SampleSubwarpCentic(sample_result &result,
 12 |                                            gpu_graph *ggraph, curandState state,
 13 |                                            int current_itr, int idx,
 14 |                                            int node_id, void *buffer) {
 15 |   subwarp_table *tables = (subwarp_table *)buffer;
 16 |   subwarp_table *table = &tables[SWID];
 17 |   bool not_all_zero =
 18 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 19 |                            ggraph->getDegree(node_id), current_itr, node_id);
 20 |   if (not_all_zero) {
 21 |     table->construct();
 22 |     table->roll_atomic(&state, result);
 23 |   }
 24 |   table->Clean();
 25 | }
 26 | 
 27 | static __device__ void SampleWarpCentic(sample_result &result,
 28 |                                         gpu_graph *ggraph, curandState state,
 29 |                                         int current_itr, int idx, int node_id,
 30 |                                         void *buffer) {
 31 |   // subwarp_table *tables = (subwarp_table *)buffer;
 32 |   warp_table *tables =
 33 |       (warp_table *)((void *)buffer +
 34 |                      WID * WARP_SIZE / SUBWARP_SIZE * sizeof(subwarp_table));
 35 |   warp_table *table = &tables[0];
 36 |   bool not_all_zero =
 37 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 38 |                            ggraph->getDegree(node_id), current_itr, node_id);
 39 |   if (not_all_zero) {
 40 |     table->construct();
 41 |     table->roll_atomic(&state, result);
 42 |   }
 43 |   table->Clean();
 44 | }
 45 | 
 46 | static __device__ void SampleBlockCentic(sample_result &result,
 47 |                                          gpu_graph *ggraph, curandState state,
 48 |                                          int current_itr, int node_id,
 49 |                                          void *buffer,
 50 |                                          Vector_pack<uint> *vector_packs) {
 51 |   block_table *tables = (block_table *)buffer;
 52 |   block_table *table = &tables[0];
 53 |   table->loadGlobalBuffer(vector_packs);
 54 |   __syncthreads();
 55 |   bool not_all_zero =
 56 |       table->loadFromGraph(ggraph->getNeighborPtr(node_id), ggraph,
 57 |                            ggraph->getDegree(node_id), current_itr, node_id);
 58 |   __syncthreads();
 59 |   if (not_all_zero) {
 60 |     table->constructBC();
 61 |     uint target_size =
 62 |         MIN(ggraph->getDegree(node_id), result.hops[current_itr + 1]);
 63 |     table->roll_atomic(target_size, &state, result);
 64 |   }
 65 |   __syncthreads();
 66 |   table->Clean();
 67 | }
 68 | 
 69 | // template <typename T>
 70 | // struct worker;
 71 | 
 72 | static __global__ void sample_kernel(Sampler *sampler,
 73 |                                      Vector_pack<uint> *vector_pack) {
 74 |   sample_result &result = sampler->result;
 75 |   gpu_graph *ggraph = &sampler->ggraph;
 76 |   Vector_pack<uint> *vector_packs = &vector_pack[BID];
 77 |   __shared__ subwarp_table table[SUBWARP_PER_BLK];
 78 | 
 79 |   void *buffer = &table[0];
 80 |   curandState state;
 81 |   curand_init(TID, 0, 0, &state);
 82 | 
 83 |   thread_block tb = this_thread_block();
 84 |   auto warp = tiled_partition<32>(tb);
 85 |   auto subwarp = tiled_partition<4>(warp);
 86 | 
 87 |   __shared__ uint current_itr;
 88 |   if (threadIdx.x == 0) current_itr = 0;
 89 |   __syncthreads();
 90 |   for (; current_itr < result.hop_num - 1;)  // for 2-hop, hop_num=3
 91 |   {
 92 |     sample_job job;
 93 |     __threadfence_block();
 94 | 
 95 |     if (subwarp.thread_rank() == 0) {
 96 |       job = result.requireOneJob(current_itr);
 97 |     }
 98 |     subwarp.sync();
 99 |     job.idx = subwarp.shfl(job.idx, 0);
100 |     job.val = subwarp.shfl(job.val, 0);
101 |     job.node_id = subwarp.shfl(job.node_id, 0);
102 |     subwarp.sync();
103 |     while (job.val) {
104 |       subwarp.sync();
105 |       if (ggraph->getDegree(job.node_id) < ELE_PER_SUBWARP) {
106 |         SampleSubwarpCentic(result, ggraph, state, current_itr, job.idx,
107 |                             job.node_id, buffer);
108 |       } else if (ggraph->getDegree(job.node_id) < ELE_PER_WARP) {
109 |         if (subwarp.thread_rank() == 0) {
110 |           result.AddMidDegree(current_itr, job.node_id);
111 |         }
112 |       } else {
113 | #ifdef skip8k
114 |         if (subwarp.thread_rank() == 0 && ggraph->getDegree(job.node_id) < 8000)
115 | #else
116 |         if (subwarp.thread_rank() == 0)
117 | #endif  // skip8k
118 |         {
119 |           result.AddHighDegree(current_itr, job.node_id);
120 |         }
121 |       }
122 |       subwarp.sync();
123 |       if (subwarp.thread_rank() == 0) job = result.requireOneJob(current_itr);
124 |       job.idx = subwarp.shfl(job.idx, 0);
125 |       job.val = subwarp.shfl(job.val, 0);
126 |       job.node_id = subwarp.shfl(job.node_id, 0);
127 |       subwarp.sync();
128 |     }
129 | 
130 |     // warp process
131 |     warp.sync();
132 |     // __syncwarp(FULL_WARP_MASK);
133 |     if (warp.thread_rank() == 0)
134 |       job = result.requireOneMidDegreeJob(current_itr);
135 |     warp.sync();
136 |     job.idx = warp.shfl(job.idx, 0);
137 |     job.val = warp.shfl(job.val, 0);
138 |     job.node_id = warp.shfl(job.node_id, 0);
139 |     warp.sync();
140 |     while (job.val) {
141 |       SampleWarpCentic(result, ggraph, state, current_itr, job.idx, job.node_id,
142 |                        buffer);
143 |       warp.sync();
144 |       if (warp.thread_rank() == 0)
145 |         job = result.requireOneMidDegreeJob(current_itr);
146 |       job.idx = warp.shfl(job.idx, 0);
147 |       job.val = warp.shfl(job.val, 0);
148 |       job.node_id = warp.shfl(job.node_id, 0);
149 |     }
150 | 
151 |     // block process
152 |     __syncthreads();  // cannot reach？？
153 |     __shared__ sample_job high_degree_job;
154 |     if (LTID == 0) {
155 |       job = result.requireOneHighDegreeJob(current_itr);
156 |       high_degree_job.val = job.val;
157 |       high_degree_job.node_id = job.node_id;
158 |     }
159 |     __syncthreads();
160 |     while (high_degree_job.val) {
161 |       SampleBlockCentic(result, ggraph, state, current_itr,
162 |                         high_degree_job.node_id, buffer,
163 |                         vector_packs);  // buffer_pointer
164 |       __syncthreads();
165 |       if (LTID == 0) {
166 |         job = result.requireOneHighDegreeJob(current_itr);
167 |         high_degree_job.val = job.val;
168 |         high_degree_job.node_id = job.node_id;
169 |       }
170 |       __syncthreads();
171 |     }
172 |     __syncthreads();
173 |     if (threadIdx.x == 0) {
174 |       result.NextItr(current_itr);
175 |     }
176 |     __syncthreads();
177 |   }
178 | }
179 | 
180 | static __global__ void print_result(Sampler *sampler) {
181 |   sampler->result.PrintResult();
182 | }
183 | 
184 | // void Start_high_degree(Sampler sampler)
185 | float OnlineGBSampleTWC(Sampler &sampler) {
186 |   // orkut max degree 932101
187 | 
188 |   LOG("%s\n", __FUNCTION__);
189 | #ifdef skip8k
190 |   LOG("skipping 8k\n");
191 | #endif  // skip8k
192 |   // paster(
193 |   //     sizeof(alias_table_constructor_shmem<uint,
194 |   //                                          thread_block_tile<SUBWARP_SIZE>>)
195 |   //                                          *
196 |   //     BLOCK_SIZE / SUBWARP_SIZE);
197 |   // paster(sizeof(warp_table) * WARP_PER_BLK);
198 | 
199 |   int device;
200 |   cudaDeviceProp prop;
201 |   cudaGetDevice(&device);
202 |   cudaGetDeviceProperties(&prop, device);
203 |   int n_sm = prop.multiProcessorCount;
204 | 
205 |   Sampler *sampler_ptr;
206 |   MyCudaMalloc(&sampler_ptr, sizeof(Sampler));
207 |   CUDA_RT_CALL(cudaMemcpy(sampler_ptr, &sampler, sizeof(Sampler),
208 |                           cudaMemcpyHostToDevice));
209 |   double start_time, total_time;
210 |   init_kernel_ptr<<<1, 32, 0, 0>>>(sampler_ptr, true);
211 | 
212 |   // allocate global buffer
213 |   int block_num = n_sm * FLAGS_m;
214 |   int gbuff_size = sampler.ggraph.MaxDegree;
215 | 
216 |   LOG("alllocate GMEM buffer %d MB\n",
217 |       block_num * gbuff_size * MEM_PER_ELE / 1024 / 1024);
218 | 
219 |   Vector_pack<uint> *vector_pack_h = new Vector_pack<uint>[block_num];
220 |   for (size_t i = 0; i < block_num; i++) {
221 |     vector_pack_h[i].Allocate(gbuff_size, sampler.device_id);
222 |   }
223 |   CUDA_RT_CALL(cudaDeviceSynchronize());
224 | #pragma omp barrier
225 |   Vector_pack<uint> *vector_packs;
226 |   CUDA_RT_CALL(
227 |       MyCudaMalloc(&vector_packs, sizeof(Vector_pack<uint>) * block_num));
228 |   CUDA_RT_CALL(cudaMemcpy(vector_packs, vector_pack_h,
229 |                           sizeof(Vector_pack<uint>) * block_num,
230 |                           cudaMemcpyHostToDevice));
231 | 
232 |   //  Global_buffer
233 |   CUDA_RT_CALL(cudaDeviceSynchronize());
234 |   start_time = wtime();
235 |   if (FLAGS_debug)
236 |     sample_kernel<<<1, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs);
237 |   else
238 |     sample_kernel<<<block_num, BLOCK_SIZE, 0, 0>>>(sampler_ptr, vector_packs);
239 | 
240 |   CUDA_RT_CALL(cudaDeviceSynchronize());
241 |   // CUDA_RT_CALL(cudaPeekAtLastError());
242 |   total_time = wtime() - start_time;
243 | #pragma omp barrier
244 |   LOG("Device %d sampling time:\t%.2f ms ratio:\t %.1f MSEPS\n",
245 |       omp_get_thread_num(), total_time * 1000,
246 |       static_cast<float>(sampler.result.GetSampledNumber() / total_time /
247 |                          1000000));
248 |   sampler.sampled_edges = sampler.result.GetSampledNumber();
249 |   LOG("sampled_edges %d\n", sampler.sampled_edges);
250 |   if (FLAGS_printresult) print_result<<<1, 32, 0, 0>>>(sampler_ptr);
251 |   CUDA_RT_CALL(cudaDeviceSynchronize());
252 |   return total_time;
253 | }
254 | 


--------------------------------------------------------------------------------