├── .dockerignore ├── .gitignore ├── .gitmodules ├── 3rdparty ├── dgl.patch ├── dgl_install.sh ├── pyg_install.sh └── readme.md ├── Dockerfile ├── LICENSE ├── README.md ├── build.sh ├── datagen ├── README.md ├── papers100M.py ├── products.py ├── twitter.sh └── uk-2006-05.sh ├── docker ├── .condarc ├── pip.conf └── sources.list ├── example ├── auto_runner │ ├── common.py │ ├── hete │ │ └── run_hete.sh │ ├── run_dgl.py │ ├── run_pyg.py │ ├── run_samgraph.py │ ├── run_sgnn.py │ ├── run_sgnn_dgl.py │ ├── single │ │ └── run_single.sh │ └── switch │ │ └── run_switch.sh ├── dgl │ ├── common_config.py │ ├── multi_gpu │ │ ├── async │ │ │ ├── common_config.py │ │ │ ├── train_accuracy.py │ │ │ └── train_graphsage.py │ │ ├── common_config.py │ │ ├── train_accuracy.py │ │ ├── train_gcn.py │ │ ├── train_graphsage.py │ │ └── train_pinsage.py │ ├── train_accuracy.py │ ├── train_gat.py │ ├── train_gcn.py │ ├── train_graphsage.py │ └── train_pinsage.py ├── env_options.sh ├── pyg │ ├── common_config.py │ ├── multi_gpu │ │ ├── common_config.py │ │ ├── train_gcn.py │ │ └── train_graphsage.py │ ├── train_gcn.py │ └── train_graphsage.py ├── runner.py ├── runner_helper.py ├── samgraph │ ├── balance_switcher │ │ ├── common_config.py │ │ ├── train_pinsage.py │ │ └── train_pinsage_no_switch_async.py │ ├── common_config.py │ ├── multi_gpu │ │ ├── async │ │ │ ├── common_config.py │ │ │ ├── train_accuracy.py │ │ │ └── train_graphsage.py │ │ ├── common_config.py │ │ ├── train_accuracy.py │ │ ├── train_gcn.py │ │ ├── train_graphsage.py │ │ └── train_pinsage.py │ ├── sgnn │ │ ├── common_config.py │ │ ├── train_accuracy.py │ │ ├── train_gcn.py │ │ ├── train_graphsage.py │ │ └── train_pinsage.py │ ├── sgnn_dgl │ │ ├── common_config.py │ │ ├── train_gcn.py │ │ ├── train_graphsage.py │ │ └── train_pinsage.py │ ├── train_gat.py │ ├── train_gcn.py │ ├── train_graphsage.py │ └── train_pinsage.py └── train_accuracy.py ├── exp ├── Makefile ├── README.md ├── common │ ├── common_parser.py │ ├── runner_helper.py │ └── runner_helper2.py ├── fig10 │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig11a │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig11b │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig11c │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig12 │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig13 │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig14a │ ├── README.md │ ├── logtable_def.py │ ├── run.py │ └── scale-gcn.plt ├── fig14b │ ├── README.md │ ├── logtable_def.py │ ├── run.py │ └── scale-gcn.plt ├── fig15 │ ├── README.md │ ├── logtable_def.py │ ├── run.py │ └── scale-break.plt ├── fig16a │ ├── README.md │ ├── fig16a.plt │ ├── parse_acc.py │ └── run.sh ├── fig17a │ ├── README.md │ ├── fig17a.plt │ └── run.sh ├── fig17b │ ├── README.md │ ├── fig17b.plt │ └── run.sh ├── fig4a │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig4b │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig5a │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── fig5b │ ├── parser.py │ ├── plot.plt │ ├── readme.md │ └── runner.py ├── table1 │ ├── README.md │ ├── logtable_def.py │ └── run.py ├── table2 │ ├── parser.py │ ├── readme.md │ └── runner.py ├── table4 │ ├── README.md │ ├── logtable_def.py │ └── run.py ├── table5 │ ├── README.md │ ├── logtable_def.py │ └── run.py └── table6 │ ├── parser.py │ ├── readme.md │ ├── runner.py │ └── runner.sh ├── samgraph.exp ├── samgraph.lds ├── samgraph ├── __init__.py ├── common │ ├── __init__.py │ ├── common.cc │ ├── common.h │ ├── constant.cc │ ├── constant.h │ ├── cpu │ │ ├── cpu_common.h │ │ ├── cpu_device.cc │ │ ├── cpu_device.h │ │ ├── cpu_engine.cc │ │ ├── cpu_engine.h │ │ ├── cpu_extraction.cc │ │ ├── cpu_function.h │ │ ├── cpu_hashtable.h │ │ ├── cpu_hashtable0.cc │ │ ├── cpu_hashtable0.h │ │ ├── cpu_hashtable1.cc │ │ ├── cpu_hashtable1.h │ │ ├── cpu_hashtable2.cc │ │ ├── cpu_hashtable2.h │ │ ├── cpu_loops.cc │ │ ├── cpu_loops.h │ │ ├── cpu_loops_arch0.cc │ │ ├── cpu_random.cc │ │ ├── cpu_sampling_khop0.cc │ │ ├── cpu_sampling_khop1.cc │ │ ├── cpu_sampling_khop2.cc │ │ ├── cpu_sampling_random_walk.cc │ │ ├── cpu_sampling_weighted_khop.cc │ │ ├── cpu_sanity_check.cc │ │ ├── cpu_shuffler.cc │ │ ├── cpu_shuffler.h │ │ ├── mmap_cpu_device.cc │ │ └── mmap_cpu_device.h │ ├── cuda │ │ ├── cuda_cache.cu │ │ ├── cuda_cache_manager.h │ │ ├── cuda_cache_manager_device.cu │ │ ├── cuda_cache_manager_host.cc │ │ ├── cuda_common.h │ │ ├── cuda_device.cc │ │ ├── cuda_device.h │ │ ├── cuda_engine.cc │ │ ├── cuda_engine.h │ │ ├── cuda_extract_neighbour.cu │ │ ├── cuda_extraction.cu │ │ ├── cuda_frequency_hashmap.cu │ │ ├── cuda_frequency_hashmap.h │ │ ├── cuda_function.h │ │ ├── cuda_hashtable.cu │ │ ├── cuda_hashtable.h │ │ ├── cuda_loops.cc │ │ ├── cuda_loops.h │ │ ├── cuda_loops_arch1.cc │ │ ├── cuda_loops_arch2.cc │ │ ├── cuda_loops_arch3.cc │ │ ├── cuda_loops_arch4.cc │ │ ├── cuda_loops_arch7.cc │ │ ├── cuda_mapping.cu │ │ ├── cuda_random_states.cu │ │ ├── cuda_random_states.h │ │ ├── cuda_sampling_khop0.cu │ │ ├── cuda_sampling_khop1.cu │ │ ├── cuda_sampling_khop2.cu │ │ ├── cuda_sampling_random_walk.cu │ │ ├── cuda_sampling_weighted_khop.cu │ │ ├── cuda_sampling_weighted_khop_hash_dedup.cu │ │ ├── cuda_sampling_weighted_khop_prefix.cu │ │ ├── cuda_sanity_check.cu │ │ ├── cuda_shuffler.cc │ │ ├── cuda_shuffler.h │ │ ├── cuda_utils.h │ │ ├── pre_sampler.cc │ │ └── pre_sampler.h │ ├── device.cc │ ├── device.h │ ├── dist │ │ ├── dist_cache_manager.h │ │ ├── dist_cache_manager_device.cu │ │ ├── dist_cache_manager_host.cc │ │ ├── dist_engine.cc │ │ ├── dist_engine.h │ │ ├── dist_loops.cc │ │ ├── dist_loops.h │ │ ├── dist_loops_arch5.cc │ │ ├── dist_loops_arch6.cc │ │ ├── dist_shuffler.cc │ │ ├── dist_shuffler.h │ │ ├── dist_shuffler_aligned.cc │ │ ├── dist_shuffler_aligned.h │ │ ├── pre_sampler.cc │ │ └── pre_sampler.h │ ├── engine.cc │ ├── engine.h │ ├── function.h │ ├── graph_pool.cc │ ├── graph_pool.h │ ├── logging.cc │ ├── logging.h │ ├── memory_queue.cc │ ├── memory_queue.h │ ├── operation.cc │ ├── operation.h │ ├── profiler.cc │ ├── profiler.h │ ├── run_config.cc │ ├── run_config.h │ ├── task_queue.cc │ ├── task_queue.h │ ├── timer.h │ ├── workspace_pool.cc │ └── workspace_pool.h └── torch │ ├── __init__.py │ ├── adapter.cc │ ├── adapter.h │ └── adapter.py ├── setup.py ├── tests ├── CMakeLists.txt ├── Makefile ├── build.sh ├── cuda_class_test.cu ├── device_query_test.cc ├── memcpy_test.cc ├── memory_race_test.cu └── test_common │ ├── common.h │ └── timer.h └── utility ├── data-process ├── CMakeLists.txt ├── common │ ├── graph_loader.cc │ ├── graph_loader.h │ ├── options.cc │ ├── options.h │ └── utils.h ├── dataset │ ├── comfriendster.ipynb │ ├── comfriendster_coo_generator.cc │ ├── comfriendster_csr_generator.cc │ ├── papers100M.ipynb │ ├── products.ipynb │ ├── reddit.ipynb │ ├── reddit.py │ └── twitter.cc └── toolkit │ ├── bandwidth │ ├── mem_bandwidth.cc │ └── memcpy_test.cc │ ├── cache │ ├── cache_by_degree.cc │ ├── cache_by_degree_hop.cc │ ├── cache_by_fake_optimal.cc │ ├── cache_by_heuristic.cc │ └── cache_by_random.cc │ ├── degree │ └── degree_info.cc │ ├── generator │ ├── 32to64.cc │ ├── coo_to_dataset.cc │ └── nodeset_generator.cc │ ├── load │ └── load_mem.cc │ ├── memory │ └── memory.py │ ├── property │ ├── csr_checker.cc │ └── graph_property.cc │ ├── train_graph_size │ └── train_graph_size.cc │ └── weight │ ├── create_alias_table.cc │ └── create_prob_prefix_table.cc ├── fastgraph ├── fastgraph │ ├── __init__.py │ ├── dataset_loader.py │ └── meta_reader.py └── setup.py ├── fg_install.sh └── webgraph ├── Makefile ├── pom.xml └── src └── main └── java └── ipads └── samgraph └── webgraph └── WebgraphDecoder.java /.dockerignore: -------------------------------------------------------------------------------- 1 | **/.git* 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # vscode 2 | .vscode 3 | *.gz 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | /dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | bin/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | # pycharm 112 | .idea 113 | 114 | # mac 115 | .DS_Store 116 | 117 | # for development 118 | scripts/ 119 | exps/ 120 | 121 | # language server 122 | .ccls-cache/ 123 | .ccls 124 | .clangd 125 | compile_commands.json 126 | 127 | # tests 128 | tests/*.o 129 | tests/*.out 130 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "3rdparty/cub"] 2 | path = 3rdparty/cub 3 | url = https://github.com/NVIDIA/cub.git 4 | [submodule "3rdparty/parallel-hashmap"] 5 | path = 3rdparty/parallel-hashmap 6 | url = https://github.com/greg7mdp/parallel-hashmap.git 7 | [submodule "3rdparty/dgl"] 8 | path = 3rdparty/dgl 9 | url = https://github.com/dmlc/dgl.git 10 | [submodule "3rdparty/CLI11"] 11 | path = 3rdparty/CLI11 12 | url = https://github.com/CLIUtils/CLI11.git 13 | -------------------------------------------------------------------------------- /3rdparty/dgl_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HERE="$(dirname $(readlink -f $0))" 4 | 5 | echo $HERE 6 | 7 | pushd "$HERE/dgl" 8 | 9 | git apply ../dgl.patch # patching for dataset loading 10 | 11 | export CUDNN_LIBRARY=$CONDA_PREFIX/lib 12 | export CUDNN_LIBRARY_PATH=$CONDA_PREFIX/lib 13 | export CUDNN_ROOT=$CONDA_PREFIX 14 | export CUDNN_INCLUDE_DIR=$CONDA_PREFIX/include 15 | export CUDNN_INCLUDE_PATH=$CONDA_PREFIX/include 16 | cmake -S . -B build -DUSE_CUDA=ON -DBUILD_TORCH=ON -DCMAKE_BUILD_TYPE=Release 17 | 18 | pushd build 19 | make -j 20 | popd 21 | 22 | pushd python 23 | python setup.py install 24 | popd 25 | 26 | popd 27 | -------------------------------------------------------------------------------- /3rdparty/pyg_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # FORCE_CUDA=1 pip install --no-cache-dir --verbose torch-scatter==2.0.8 \ 4 | pip install torch-scatter==2.0.7 -f https://data.pyg.org/whl/torch-1.7.0+cu101.html \ 5 | && pip install torch-sparse==0.6.12 -f https://data.pyg.org/whl/torch-1.7.0+cu101.html \ 6 | && pip install torch-geometric==2.0.1 \ 7 | && pip install torch-cluster==1.5.9 -f https://data.pyg.org/whl/torch-1.7.0+cu101.html \ 8 | && pip install torch-spline-conv==1.2.1 -f https://data.pyg.org/whl/torch-1.7.0+cu101.html 9 | -------------------------------------------------------------------------------- /3rdparty/readme.md: -------------------------------------------------------------------------------- 1 | # Build & Install DGL 2 | 3 | First apply our patch: 4 | ```bash 5 | cd 3rdparty/dgl 6 | git apply ../dgl.patch 7 | ``` 8 | 9 | Then build dgl: 10 | ```bash 11 | export CUDNN_LIBRARY=$CONDA_PREFIX/lib 12 | export CUDNN_LIBRARY_PATH=$CONDA_PREFIX/lib 13 | export CUDNN_ROOT=$CONDA_PREFIX 14 | export CUDNN_INCLUDE_DIR=$CONDA_PREFIX/include 15 | export CUDNN_INCLUDE_PATH=$CONDA_PREFIX/include 16 | cmake -S . -B build -DUSE_CUDA=ON -DBUILD_TORCH=ON -DCMAKE_BUILD_TYPE=Release 17 | pushd build 18 | make -j 19 | popd build 20 | ``` 21 | 22 | If you want to specify path of cuda toolkit or you do not have `/usr/local/cuda`, then you must pass `-DCUDA_TOOLKIT_ROOT_DIR=` to cmake above. 23 | 24 | Lastly, install dgl: 25 | ```bash 26 | pushd python 27 | python setup.py install 28 | popd 29 | ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 2 | 3 | # change software source 4 | COPY ./docker/sources.list /etc/apt/sources.list 5 | COPY ./docker/.condarc /root/.condarc 6 | COPY ./docker/pip.conf /root/.pip/pip.conf 7 | 8 | # apt software 9 | RUN apt-get update && apt-get install -y wget gnuplot git\ 10 | && apt-get clean \ 11 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 12 | 13 | # miniconda 14 | WORKDIR /app 15 | ENV PATH="/miniconda3/bin:$PATH" 16 | # installation 17 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda-latest.sh \ 18 | && bash ./miniconda-latest.sh -b -p /miniconda3 \ 19 | && ln -s /miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh \ 20 | && echo ". /miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc \ 21 | && find /miniconda3/ -follow -type f -name '*.a' -delete \ 22 | && find /miniconda3/ -follow -type f -name '*.js.map' -delete \ 23 | && conda clean -afy 24 | # create environment 25 | RUN conda create -n fgnn_env cmake cudnn==7.6.5 python==3.8 \ 26 | pytorch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 cudatoolkit=10.1 -c pytorch -y \ 27 | && conda clean -afy \ 28 | && echo "conda activate fgnn_env" >> ~/.bashrc 29 | 30 | # Make RUN commands use the new environment: 31 | SHELL ["conda", "run", "--no-capture-output", "-n", "fgnn_env", "/bin/bash", "-c"] 32 | 33 | WORKDIR /app/source 34 | COPY . ./fgnn 35 | # install dgl 36 | RUN pip install 'numpy>=1.14.0' 'scipy>=1.1.0' 'networkx>=2.1' 'requests>=2.19.0' \ 37 | && bash ./fgnn/3rdparty/dgl_install.sh 38 | # install fastgraph 39 | RUN bash ./fgnn/utility/fg_install.sh 40 | # install pyg 41 | RUN bash ./fgnn/3rdparty/pyg_install.sh 42 | 43 | # install fgnn 44 | RUN pushd ./fgnn \ 45 | && bash ./build.sh \ 46 | && rm -rf build \ 47 | && rm -rf 3rdparty/dgl/build \ 48 | && popd \ 49 | && echo "ulimit -l unlimited" >> ~/.bashrc 50 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## ./build.sh [-c] 3 | ## -c for clean build 4 | export MAX_JOBS=40 5 | if [ "$1" == '-c' ]; then 6 | echo "Cleaning..." 7 | python3 setup.py clean -q | \ 8 | grep -v '^g\+\+' | \ 9 | grep -v '^[a-zA-Z\-]*ed ' | \ 10 | grep -v '^[a-zA-Z\-]*ing ' 11 | fi 12 | echo "Building..." 13 | python3 setup.py build -q | \ 14 | grep -v -e '^\[[0-9]*/[0-9]*\]' | \ 15 | grep -v 'valid for C/ObjC but not for C++' | \ 16 | grep -v '^g\+\+' | \ 17 | grep -v '^[a-zA-Z\-]*ed ' | \ 18 | grep -v '^[a-zA-Z\-]*ing ' 19 | echo "Installing..." 20 | python3 setup.py install -q | \ 21 | grep -v -e '^\[[0-9]*/[0-9]*\]' | \ 22 | grep -v '^g\+\+' | \ 23 | grep -v '^[a-zA-Z\-]*ed ' | \ 24 | grep -v '^[a-zA-Z\-]*ing ' 25 | echo "Done." 26 | -------------------------------------------------------------------------------- /datagen/twitter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | RAW_DATA_DIR='/graph-learning/data-raw' 3 | TW_RAW_DATA_DIR="${RAW_DATA_DIR}/twitter" 4 | OUTPUT_DATA_DIR='/graph-learning/samgraph/twitter' 5 | 6 | download(){ 7 | mkdir -p ${TW_RAW_DATA_DIR} 8 | if [ ! -e "${TW_RAW_DATA_DIR}/twitter-2010.graph" ]; then 9 | pushd ${TW_RAW_DATA_DIR} 10 | wget http://data.law.di.unimi.it/webdata/twitter-2010/twitter-2010.graph 11 | wget http://data.law.di.unimi.it/webdata/twitter-2010/twitter-2010.properties 12 | popd 13 | elif [ ! -e "${TW_RAW_DATA_DIR}/twitter-2010.properties" ]; then 14 | pushd ${TW_RAW_DATA_DIR} 15 | wget http://data.law.di.unimi.it/webdata/twitter-2010/twitter-2010.properties 16 | popd 17 | else 18 | echo "Binary file already downloaded." 19 | fi 20 | } 21 | 22 | generate_coo(){ 23 | download 24 | if [ ! -e "${TW_RAW_DATA_DIR}/coo.bin" ]; then 25 | java -cp ../utility/webgraph/target/webgraph-0.1-SNAPSHOT.jar it.unimi.dsi.webgraph.BVGraph -o -O -L "${TW_RAW_DATA_DIR}/twitter-2010" 26 | java -cp ../utility/webgraph/target/webgraph-0.1-SNAPSHOT.jar ipads.samgraph.webgraph.WebgraphDecoder "${TW_RAW_DATA_DIR}/twitter-2010" 27 | mv ${TW_RAW_DATA_DIR}/twitter-2010_coo.bin ${TW_RAW_DATA_DIR}/coo.bin 28 | else 29 | echo "COO already generated." 30 | fi 31 | } 32 | 33 | generate_coo 34 | mkdir -p ${OUTPUT_DATA_DIR} 35 | cat << EOF > ${OUTPUT_DATA_DIR}/meta.txt 36 | NUM_NODE 41652230 37 | NUM_EDGE 1468365182 38 | FEAT_DIM 256 39 | NUM_CLASS 150 40 | NUM_TRAIN_SET 416500 41 | NUM_VALID_SET 100000 42 | NUM_TEST_SET 200000 43 | EOF 44 | 45 | ../utility/data-process/build/coo-to-dataset -g twitter 46 | -------------------------------------------------------------------------------- /datagen/uk-2006-05.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | RAW_DATA_DIR='/graph-learning/data-raw' 3 | UK_RAW_DATA_DIR="${RAW_DATA_DIR}/uk-2006-05" 4 | OUTPUT_DATA_DIR='/graph-learning/samgraph/uk-2006-05' 5 | 6 | download(){ 7 | mkdir -p ${UK_RAW_DATA_DIR} 8 | if [ ! -e "${UK_RAW_DATA_DIR}/uk-2006-05.graph" ]; then 9 | pushd ${UK_RAW_DATA_DIR} 10 | wget http://data.law.di.unimi.it/webdata/uk-2006-05/uk-2006-05.graph 11 | wget http://data.law.di.unimi.it/webdata/uk-2006-05/uk-2006-05.properties 12 | popd 13 | elif [ ! -e "${UK_RAW_DATA_DIR}/uk-2006-05.properties" ]; then 14 | pushd ${UK_RAW_DATA_DIR} 15 | wget http://data.law.di.unimi.it/webdata/uk-2006-05/uk-2006-05.properties 16 | popd 17 | else 18 | echo "Binary file already downloaded." 19 | fi 20 | } 21 | 22 | generate_coo(){ 23 | download 24 | if [ ! -e "${UK_RAW_DATA_DIR}/coo.bin" ]; then 25 | java -cp ../utility/webgraph/target/webgraph-0.1-SNAPSHOT.jar it.unimi.dsi.webgraph.BVGraph -o -O -L "${UK_RAW_DATA_DIR}/uk-2006-05" 26 | java -cp ../utility/webgraph/target/webgraph-0.1-SNAPSHOT.jar ipads.samgraph.webgraph.WebgraphDecoder "${UK_RAW_DATA_DIR}/uk-2006-05" 27 | mv ${UK_RAW_DATA_DIR}/uk-2006-05_coo.bin ${UK_RAW_DATA_DIR}/coo.bin 28 | else 29 | echo "COO already generated." 30 | fi 31 | } 32 | 33 | generate_coo 34 | mkdir -p ${OUTPUT_DATA_DIR} 35 | cat << EOF > ${OUTPUT_DATA_DIR}/meta.txt 36 | NUM_NODE 77741046 37 | NUM_EDGE 2965197340 38 | FEAT_DIM 256 39 | NUM_CLASS 150 40 | NUM_TRAIN_SET 1000000 41 | NUM_VALID_SET 200000 42 | NUM_TEST_SET 100000 43 | EOF 44 | 45 | ../utility/data-process/build/coo-to-dataset -g uk-2006-05 46 | -------------------------------------------------------------------------------- /docker/.condarc: -------------------------------------------------------------------------------- 1 | channels: 2 | - defaults 3 | show_channel_urls: true 4 | default_channels: 5 | - http://mirrors.bfsu.edu.cn/anaconda/pkgs/main 6 | - http://mirrors.bfsu.edu.cn/anaconda/pkgs/r 7 | - http://mirrors.bfsu.edu.cn/anaconda/pkgs/msys2 8 | custom_channels: 9 | conda-forge: http://mirrors.bfsu.edu.cn/anaconda/cloud 10 | msys2: http://mirrors.bfsu.edu.cn/anaconda/cloud 11 | bioconda: http://mirrors.bfsu.edu.cn/anaconda/cloud 12 | menpo: http://mirrors.bfsu.edu.cn/anaconda/cloud 13 | pytorch: http://mirrors.bfsu.edu.cn/anaconda/cloud 14 | simpleitk: http://mirrors.bfsu.edu.cn/anaconda/cloud 15 | -------------------------------------------------------------------------------- /docker/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | index-url = https://mirrors.aliyun.com/pypi/simple/ 3 | 4 | [install] 5 | trusted-host=mirrors.aliyun.com 6 | -------------------------------------------------------------------------------- /docker/sources.list: -------------------------------------------------------------------------------- 1 | deb http://mirrors.aliyun.com/ubuntu/ bionic main restricted universe multiverse 2 | deb-src http://mirrors.aliyun.com/ubuntu/ bionic main restricted universe multiverse 3 | 4 | deb http://mirrors.aliyun.com/ubuntu/ bionic-security main restricted universe multiverse 5 | deb-src http://mirrors.aliyun.com/ubuntu/ bionic-security main restricted universe multiverse 6 | 7 | deb http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted universe multiverse 8 | deb-src http://mirrors.aliyun.com/ubuntu/ bionic-updates main restricted universe multiverse 9 | 10 | deb http://mirrors.aliyun.com/ubuntu/ bionic-proposed main restricted universe multiverse 11 | deb-src http://mirrors.aliyun.com/ubuntu/ bionic-proposed main restricted universe multiverse 12 | 13 | deb http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse 14 | deb-src http://mirrors.aliyun.com/ubuntu/ bionic-backports main restricted universe multiverse 15 | -------------------------------------------------------------------------------- /example/auto_runner/single/run_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dgl_dir=../../dgl/ 3 | sam_dir=../../samgraph/multi_gpu/ 4 | 5 | TIME_STAMPS=$(date "+%Y-%m-%d_%H-%M-%S") 6 | log_dir=./run_logs/single/${TIME_STAMPS} 7 | num_epoch=10 8 | 9 | mkdir -p $log_dir 10 | 11 | #dgl 12 | 13 | # python ${dgl_dir}/train_gcn.py --dataset products --pipelining --num-epoch ${num_epoch} --use-gpu-sampling 2>&1 | tee ${log_dir}/dgl_gcn_products_single.log 14 | # python ${dgl_dir}/train_gcn.py --dataset papers100M --pipelining --num-epoch ${num_epoch} --use-gpu-sampling 2>&1 | tee ${log_dir}/dgl_gcn_paper_single.log 15 | # python ${dgl_dir}/train_gcn.py --dataset twitter --pipelining --num-epoch ${num_epoch} --use-gpu-sampling 2>&1 | tee ${log_dir}/dgl_gcn_twitter_single.log 16 | # 17 | # python ${dgl_dir}/train_graphsage.py --dataset products --pipelining --num-epoch ${num_epoch} --use-gpu-sampling 2>&1 | tee ${log_dir}/dgl_graphsage_products_single.log 18 | # python ${dgl_dir}/train_graphsage.py --dataset papers100M --pipelining --num-epoch ${num_epoch} --use-gpu-sampling 2>&1 | tee ${log_dir}/dgl_graphsage_papers_single.log 19 | # python ${dgl_dir}/train_graphsage.py --dataset twitter --pipelining --num-epoch ${num_epoch} --use-gpu-sampling 2>&1 | tee ${log_dir}/dgl_graphsage_twitter_single.log 20 | 21 | # multi_gpu for single gpu run 22 | python ${sam_dir}/train_gcn.py --dataset products --single-gpu --num-epoch ${num_epoch} --cache-percentage 1.0 2>&1 | tee ${log_dir}/sam_gcn_products_single.log 23 | python ${sam_dir}/train_gcn.py --dataset papers100M --single-gpu --num-epoch ${num_epoch} --cache-percentage 0.03 2>&1 | tee ${log_dir}/sam_gcn_papers_single.log 24 | 25 | # a special case, use pytorch overlap can get best performance with cache Pct. 0.04 26 | python ${sam_dir}/train_gcn.py --dataset twitter --single-gpu --num-epoch ${num_epoch} --cache-percentage 0.0 2>&1 | tee ${log_dir}/sam_gcn_twitter_single.log 27 | 28 | python ${sam_dir}/train_graphsage.py --dataset products --single-gpu --num-epoch ${num_epoch} --cache-percentage 1.0 2>&1 | tee ${log_dir}/sam_graphsage_products_single.log 29 | python ${sam_dir}/train_graphsage.py --dataset papers100M --single-gpu --num-epoch ${num_epoch} --cache-percentage 0.07 2>&1 | tee ${log_dir}/sam_graphsage_papers_single.log 30 | python ${sam_dir}/train_graphsage.py --dataset twitter --single-gpu --num-epoch ${num_epoch} --cache-percentage 0.13 2>&1 | tee ${log_dir}/sam_graphsage_twitter_single.log 31 | 32 | python ${sam_dir}/train_pinsage.py --dataset products --single-gpu --num-epoch ${num_epoch} --cache-percentage 1.0 2>&1 | tee ${log_dir}/sam_pinsage_products_single.log 33 | python ${sam_dir}/train_pinsage.py --dataset papers100M --single-gpu --num-epoch ${num_epoch} --cache-percentage 0.03 2>&1 | tee ${log_dir}/sam_pinsage_papers_single.log 34 | python ${sam_dir}/train_pinsage.py --dataset twitter --single-gpu --num-epoch ${num_epoch} --cache-percentage 0.03 2>&1 | tee ${log_dir}/sam_pinsage_twitter_single.log 35 | -------------------------------------------------------------------------------- /example/dgl/common_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | def event_sync(): 5 | event = torch.cuda.Event(blocking=True) 6 | event.record() 7 | event.synchronize() 8 | 9 | def get_default_timeout(): 10 | # In seconds 11 | return 600 12 | 13 | def wait_and_join(processes): 14 | ret = os.waitpid(-1, 0) 15 | if os.WEXITSTATUS(ret[1]) != 0: 16 | print("Detect pid {:} error exit".format(ret[0])) 17 | for p in processes: 18 | p.kill() 19 | 20 | for p in processes: 21 | p.join() -------------------------------------------------------------------------------- /example/dgl/multi_gpu/async/common_config.py: -------------------------------------------------------------------------------- 1 | ../../common_config.py -------------------------------------------------------------------------------- /example/dgl/multi_gpu/async/train_accuracy.py: -------------------------------------------------------------------------------- 1 | ../../../train_accuracy.py -------------------------------------------------------------------------------- /example/dgl/multi_gpu/common_config.py: -------------------------------------------------------------------------------- 1 | ../common_config.py -------------------------------------------------------------------------------- /example/dgl/multi_gpu/train_accuracy.py: -------------------------------------------------------------------------------- 1 | ../../train_accuracy.py -------------------------------------------------------------------------------- /example/dgl/train_accuracy.py: -------------------------------------------------------------------------------- 1 | ../train_accuracy.py -------------------------------------------------------------------------------- /example/env_options.sh: -------------------------------------------------------------------------------- 1 | # usage: source env_options.sh 2 | 3 | export SAMGRAPH_LOG_LEVEL=info 4 | export SAMGRAPH_LOG_HIDE_TIME=1 5 | 6 | export SAMGRAPH_PROFILE_LEVEL=3 7 | 8 | export SAMGRAPH_LOG_NODE_ACCESS=OFF 9 | export SAMGRAPH_SANITY_CHECK=OFF 10 | export SAMGRAPH_PROFILE_CUDA=OFF -------------------------------------------------------------------------------- /example/pyg/multi_gpu/common_config.py: -------------------------------------------------------------------------------- 1 | ../common_config.py -------------------------------------------------------------------------------- /example/runner.py: -------------------------------------------------------------------------------- 1 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, TMP_LOG_DIR, run_in_list, SampleType, percent_gen 2 | import os 3 | 4 | def tmp_call_back(cfg: RunConfig): 5 | os.system(f"grep -A 4 'average' \"{cfg.get_log_fname()}.log\"") 6 | 7 | if __name__ == '__main__': 8 | from sys import argv 9 | do_mock = False 10 | durable_log = True 11 | for arg in argv[1:]: 12 | if arg == '-m' or arg == '--mock': 13 | do_mock = True 14 | elif arg == '-i' or arg == '--interactive': 15 | durable_log = False 16 | 17 | run_in_list(ConfigList() 18 | .select('app', [ 19 | App.gcn, 20 | # App.graphsage, 21 | # App.pinsage, 22 | ]).select('dataset', [ 23 | # Dataset.reddit, 24 | # Dataset.products, 25 | Dataset.papers100M, 26 | # Dataset.friendster, 27 | ]).select('cache_policy', [ 28 | CachePolicy.no_cache, 29 | # CachePolicy.cache_by_degree, 30 | # CachePolicy.cache_by_heuristic, 31 | # CachePolicy.dynamic_cache, 32 | ]).select('pipeline', [ 33 | False, 34 | # True, 35 | ]) 36 | # .override_arch(Arch.arch0) 37 | .override('logdir', [TMP_LOG_DIR]) 38 | .override('dataset', [ 39 | Dataset.papers100M_300, 40 | ]).override('sample_type', [ 41 | # SampleType.kKHop1, 42 | # SampleType.kWeightedKHop, 43 | SampleType.kDefaultForApp, 44 | ]).override('cache_policy', [ 45 | # CachePolicy.cache_by_degree, 46 | CachePolicy.cache_by_heuristic, 47 | # CachePolicy.cache_by_presample, 48 | # CachePolicy.cache_by_degree_hop, 49 | # CachePolicy.cache_by_presample_static, 50 | # CachePolicy.cache_by_fake_optimal, 51 | # CachePolicy.cache_by_presample_1, 52 | # CachePolicy.cache_by_presample_2, 53 | ]).override('batch_size',[ 54 | # 1000, 55 | 8000, 56 | ]).override('cache_percent', [ 57 | # 0.0, 58 | 0.01,0.02,0.03,0.04,0.05,0.10,0.15,0.20,0.25,0.30,0.35,0.40,0.45,0.50, 59 | # 0.55, 0.60, 60 | # 1, 61 | ]) 62 | .conf_list 63 | ,do_mock 64 | ,durable_log 65 | # , tmp_call_back 66 | ) 67 | 68 | -------------------------------------------------------------------------------- /example/samgraph/balance_switcher/common_config.py: -------------------------------------------------------------------------------- 1 | ../common_config.py -------------------------------------------------------------------------------- /example/samgraph/multi_gpu/async/common_config.py: -------------------------------------------------------------------------------- 1 | ../../common_config.py -------------------------------------------------------------------------------- /example/samgraph/multi_gpu/async/train_accuracy.py: -------------------------------------------------------------------------------- 1 | ../../../train_accuracy.py -------------------------------------------------------------------------------- /example/samgraph/multi_gpu/common_config.py: -------------------------------------------------------------------------------- 1 | ../common_config.py -------------------------------------------------------------------------------- /example/samgraph/multi_gpu/train_accuracy.py: -------------------------------------------------------------------------------- 1 | ../../train_accuracy.py -------------------------------------------------------------------------------- /example/samgraph/sgnn/common_config.py: -------------------------------------------------------------------------------- 1 | ../common_config.py -------------------------------------------------------------------------------- /example/samgraph/sgnn/train_accuracy.py: -------------------------------------------------------------------------------- 1 | ../../train_accuracy.py -------------------------------------------------------------------------------- /example/samgraph/sgnn_dgl/common_config.py: -------------------------------------------------------------------------------- 1 | ../common_config.py -------------------------------------------------------------------------------- /exp/Makefile: -------------------------------------------------------------------------------- 1 | run_list_1=fig4a.run fig4b.run fig5a.run fig5b.run fig10.run fig11a.run fig11b.run fig11c.run fig12.run fig13.run table2.run 2 | run_list_2=table1.run table4.run table5.run fig14a.run fig14b.run fig15.run 3 | run_list_3=fig16a.run fig17a.run fig17b.run 4 | clean_list_1=$(patsubst %.run,%.clean,$(run_list_1)) 5 | 6 | all: $(run_list_1) $(run_list_2) $(run_list_3) 7 | 8 | # target looks like "fig4a.run", and "$(patsubst %.run,%,$@)" gives "fig4a" 9 | $(run_list_1): 10 | cd $(patsubst %.run,%,$@); if [ ! -e "run-logs/run.fin" ]; then python3 runner.py && touch run-logs/run.fin ; fi 11 | cd $(patsubst %.run,%,$@); python3 parser.py; if [ -e "plot.plt" ]; then gnuplot plot.plt ; fi 12 | 13 | $(run_list_2): 14 | cd $(patsubst %.run,%,$@); python3 run.py 15 | 16 | $(run_list_3): 17 | cd $(patsubst %.run,%,$@); bash run.sh 18 | 19 | $(clean_list_1): 20 | rm -rf $(patsubst %.clean,%,$@)/run-logs 21 | 22 | clean: $(clean_list_1) 23 | 24 | .PHONY: clean 25 | -------------------------------------------------------------------------------- /exp/fig10/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | 6 | selected_col = ['cache_policy', 'cache_percentage'] 7 | selected_col += ['dataset_short', 'sample_type', 'app'] 8 | selected_col += ['hit_percent', 'optimal_hit_percent'] 9 | 10 | if __name__ == '__main__': 11 | with open(f'data.dat', 'w') as f: 12 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) -------------------------------------------------------------------------------- /exp/fig10/plot.plt: -------------------------------------------------------------------------------- 1 | outputfname = "fig10.eps" 2 | dat_file='data.dat' 3 | fit_policy='degree' 4 | 5 | # col numbers 6 | col_cache_policy = 1 7 | col_cache_percent = 2 8 | col_dataset = 3 9 | col_sample_type = 4 10 | col_app = 5 11 | col_hit_percent = 6 12 | col_optimal_hit_percent= 7 13 | 14 | set datafile sep '\t' 15 | 16 | set terminal postscript "Helvetica,16" eps enhance color dl 2 17 | set style data histogram 18 | 19 | set style histogram clustered gap 2 20 | set style fill solid border -2 21 | set pointsize 1 22 | set size 0.8,0.5 23 | set boxwidth 0.5 relative 24 | # set no zeroaxis 25 | 26 | 27 | 28 | set tics font ",14" scale 0.5 29 | 30 | set rmargin 2 31 | set lmargin 5.5 32 | set tmargin 1.5 33 | set bmargin 2.5 34 | 35 | set output outputfname 36 | 37 | #### magic to filter expected data entry from dat file 38 | format_str=" python runner.py 22 | > python parser.py 23 | > gnuplot plot.plt 24 | ``` 25 | 26 | There are serveral command line arguments for `runner.py`: 27 | 28 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 29 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 30 | 31 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 32 | 33 | 34 | ## Output Example 35 | 36 | `python runner.py` will redirect all logs to `run-logs` directory. An short example of the `data.dat` looks like this: 37 | ```sh 38 | > cat data.dat 39 | cache_policy cache_percentage dataset_short sample_type app hit_percent optimal_hit_percent 40 | random 0.0 PA kKHop2 gcn 0.000 0.000 41 | random 5.0 PA kKHop2 gcn 5.000 97.164 42 | random 10.0 PA kKHop2 gcn 9.960 100.000 43 | random 15.0 PA kKHop2 gcn 14.940 100.000 44 | random 20.0 PA kKHop2 gcn 19.960 100.000 45 | random 25.0 PA kKHop2 gcn 24.960 100.000 46 | random 30.0 PA kKHop2 gcn 29.960 100.000 47 | degree 0.0 PA kKHop2 gcn 0.000 0.000 48 | degree 5.0 PA kKHop2 gcn 28.930 97.164 49 | degree 10.0 PA kKHop2 gcn 49.570 100.000 50 | degree 15.0 PA kKHop2 gcn 63.960 100.000 51 | degree 20.0 PA kKHop2 gcn 75.110 100.000 52 | degree 25.0 PA kKHop2 gcn 82.900 100.000 53 | degree 30.0 PA kKHop2 gcn 88.400 100.000 54 | presample_1 0.0 PA kKHop2 gcn 0.000 0.000 55 | presample_1 5.0 PA kKHop2 gcn 96.020 97.164 56 | presample_1 10.0 PA kKHop2 gcn 98.290 100.000 57 | presample_1 15.0 PA kKHop2 gcn 98.510 100.000 58 | presample_1 20.0 PA kKHop2 gcn 98.520 100.000 59 | presample_1 25.0 PA kKHop2 gcn 98.530 100.000 60 | presample_1 30.0 PA kKHop2 gcn 98.690 100.000 61 | 62 | ``` 63 | 64 | ## FAQ -------------------------------------------------------------------------------- /exp/fig11b/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | def copy_optimal(cfg: RunConfig): 9 | os.system(f"rm -f \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 10 | os.system(f"mv node_access_optimal_cache_hit* \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 11 | os.system(f"rm -f node_access_optimal_cache_*") 12 | 13 | cur_common_base = (ConfigList() 14 | .override('app', [App.gcn]) 15 | .override('sample_type', [SampleType.kKHop2]) 16 | .override('dataset', [Dataset.papers100M]) 17 | .override('cache_policy', [CachePolicy.cache_by_degree]) 18 | .override('copy_job', [1]) 19 | .override('sample_job', [1]) 20 | .override('pipeline', [False]) 21 | .override('epoch', [1]) 22 | .override('logdir', ['run-logs',]) 23 | .override('profile_level', [3]) 24 | .override('log_level', ['error']) 25 | .override('multi_gpu', [True])) 26 | 27 | cfg_list_collector = ConfigList.Empty() 28 | cfg_list_collector.concat(cur_common_base.copy() 29 | .override('cache_percent', percent_gen(0, 30, 1)) 30 | .override('cache_policy', [ 31 | CachePolicy.cache_by_random, 32 | CachePolicy.cache_by_degree, 33 | CachePolicy.cache_by_presample_1,]) 34 | ) 35 | 36 | if __name__ == '__main__': 37 | from sys import argv 38 | for arg in argv[1:]: 39 | if arg == '-m' or arg == '--mock': 40 | do_mock = True 41 | elif arg == '-i' or arg == '--interactive': 42 | durable_log = False 43 | 44 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log) 45 | # optimal results requires a seperate run 46 | cur_common_base.override('arch', [Arch.arch3]).override('multi_gpu', [False]).override('report_optimal', [1]).override('cache_percent', [0]) 47 | run_in_list(cur_common_base.conf_list, do_mock, durable_log, copy_optimal) 48 | 49 | -------------------------------------------------------------------------------- /exp/fig11c/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | import pandas 6 | 7 | selected_col = ['cache_policy', 'cache_percentage'] 8 | selected_col += ['dataset_short', 'sample_type', 'app'] 9 | selected_col += ['hit_percent', 'optimal_hit_percent', 'batch_feat_nbytes', 'batch_miss_nbytes'] 10 | 11 | if __name__ == '__main__': 12 | with open(f'data.dat', 'w') as f: 13 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) 14 | 15 | with open(f'data.dat', 'r') as f: 16 | table = pandas.read_csv(f, sep='\t') 17 | # by fixing cache size to 5GB, we may calculate performance under different dimension by varying cache rate 18 | # 54228 / 128 * new_dim * cache_rate/100 = 5000 19 | # thus we have: cache_rate = 5000 * 128 / cache_rate / 54228 * 100 20 | table['dim'] = 5120 * 128 / table['cache_percentage'] / 54228 * 100 21 | table['new_batch_feat_GB'] = table['batch_feat_nbytes'] / 128 * table['dim'] / 1024/1024/1024 22 | table['new_batch_miss_GB'] = (100 - table['hit_percent'])/100 * table['new_batch_feat_GB'] 23 | 24 | with open(f'data.dat', 'w') as f: 25 | table.to_csv(f, sep='\t', index=None) -------------------------------------------------------------------------------- /exp/fig11c/plot.plt: -------------------------------------------------------------------------------- 1 | outputfname = "fig11c.eps" 2 | dat_file='data.dat' 3 | 4 | # col numbers 5 | col_cache_policy=1 6 | col_cache_percent=2 7 | col_dataset=3 8 | col_hit_percent=6 9 | col_optimal_hit_percent=7 10 | col_dim=10 11 | col_feat_GB=11 12 | col_miss_GB=12 13 | 14 | # cache_policy=1 15 | # cache_percentage=2 16 | # dataset_short=3 17 | # sample_type=4 18 | # app=5 19 | # hit_percent=6 20 | # optimal_hit_percent=7 21 | # batch_feat_nbytes=8 22 | # batch_miss_nbytes=9 23 | # dim=10 24 | # new_batch_feat_GB=11 25 | # new_batch_miss_GB=12 26 | 27 | set fit logfile '/dev/null' 28 | set fit quiet 29 | set datafile sep '\t' 30 | 31 | set terminal postscript "Helvetica,16" eps enhance color dl 2 32 | set pointsize 1 33 | set size 0.4,0.5 34 | set zeroaxis 35 | 36 | set tics font ",14" scale 0.5 37 | 38 | set rmargin 2 39 | set lmargin 5.5 40 | set tmargin 1.5 41 | set bmargin 2.5 42 | 43 | set output outputfname 44 | 45 | #### magic to filter expected data entry from dat file 46 | format_str=" python run.py 27 | ``` 28 | 29 | 30 | 31 | There are several command line arguments: 32 | 33 | - `--num-epoch`: Number of epochs to run per test case. The default value is set to 3 for fast run. In the paper, we set it to 10. 34 | - `--mock`: Show the run command for each test case but not actually run it. 35 | - `--rerun-tests` Rerun the most recently tests. Sometimes not all the test cases run successfully(e.g. cache percentage is too large and leads to OOM). You can adjust the configurations and rerun the tests again. The `--rerun-tests` option only reruns those failed test cases. 36 | 37 | 38 | 39 | ```sh 40 | > python run.py --help 41 | usage: Table 1 Runner [-h] [--num-epoch NUM_EPOCH] [--mock] [--rerun-tests] 42 | 43 | optional arguments: 44 | -h, --help show this help message and exit 45 | --num-epoch NUM_EPOCH 46 | Number of epochs to run per test case 47 | --mock Show the run command for each test case but not actually run it 48 | --rerun-tests Rerun the most recently tests 49 | ``` 50 | 51 | 52 | 53 | 54 | 55 | ## Output Example 56 | 57 | `python run.py` will create a new folder(e.g. `output_2022-01-29_20-10-39`) as result. 58 | 59 | `python run.py --rerun-tests` does not create a new folder and reuse the last created folder. 60 | 61 | ```sh 62 | > tree output_2022-01-29_20-10-39 -L 1 63 | output_2022-01-29_20-10-39 64 | ├── fig14a.eps # Output figure 65 | ├── fig14a-full.res # Output data with comments 66 | ├── fig14a.res # Output data 67 | ├── logs_dgl 68 | └── logs_fgnn 69 | 70 | 2 directories, 3 files 71 | ``` 72 | 73 | 74 | 75 | ```sh 76 | > cat output_2022-01-29_20-10-39/fig14a.res 77 | "GPUs" "DGL" "SGNN" "1S" "2S" "3S" 78 | 1 18.45 10.02 - - - 79 | 2 9.85 6.94 4.10 - - 80 | 3 7.15 5.15 2.16 4.20 - 81 | 4 6.01 4.18 1.48 2.15 4.15 82 | ... 83 | ``` 84 | 85 | 86 | 87 | 88 | 89 | ## FAQ 90 | -------------------------------------------------------------------------------- /exp/fig14a/scale-gcn.plt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | 3 | reset 4 | set output outfile 5 | 6 | set terminal postscript "Helvetica,16" eps enhance color dl 2 7 | set pointsize 1 8 | set size 0.4,0.4 9 | set zeroaxis 10 | 11 | set tics font ",14" scale 0.5 12 | 13 | # set rmargin 2 #2 14 | # set lmargin 5 #5.5 15 | # set tmargin 0.5 #1.5 16 | # set bmargin 1 #2.5 17 | 18 | set rmargin 2 19 | set lmargin 5.5 20 | set tmargin 1.5 21 | set bmargin 2.5 22 | 23 | ### Key 24 | set key inside right Right top enhanced nobox 25 | set key samplen 1.5 spacing 1.5 height 0.2 font ',11' noopaque #maxrow 3 #at 7.8, 15 26 | 27 | 28 | ### X-axis 29 | set xrange [1:8] 30 | set xtics 1,1,8 31 | set xlabel "Number of GPUs" offset 0,0.7 32 | set xtics nomirror offset -0.2,0.3 33 | 34 | ## Y-axis 35 | set yrange [0:16] 36 | set ytics 0,4,16 37 | set ylabel "Epoch Time (sec)" offset 1.5,0 38 | set ytics offset 0.5,0 #format "%.1f" #nomirror 39 | 40 | 41 | plot resfile u ($1):($2) t "DGL" w lp lt 1 lw 3 pt 4 ps 1.5 lc rgb '#c00000', \ 42 | resfile u ($1):($3) t "T_{SOTA}" w lp lt 1 lw 3 pt 3 ps 1.5 lc rgb '#ff9900', \ 43 | resfile u ($1):($4) t "FGNN/1S" w lp lt 1 lw 3 pt 6 ps 1.5 lc rgb '#008800', \ 44 | resfile u ($1):($5) t "FGNN/2S" w lp lt 1 lw 3 pt 8 ps 1.5 lc rgb '#00bb00', \ 45 | resfile u ($1):($6) t "FGNN/3S" w lp lt 1 lw 3 pt 2 ps 1.5 lc rgb '#00dd00' 46 | -------------------------------------------------------------------------------- /exp/fig14b/README.md: -------------------------------------------------------------------------------- 1 | # Figure 14b: GCN Scalability Test 2 | 3 | The goal of this experiment is to show the scalability performance of DGL, SGNN and FGNN on GCN model. 4 | 5 | Dataset: twitter 6 | 7 | - `run.py` is the runner script. 8 | - `logtable_def.py` defines log parsing rules. 9 | 10 | 11 | 12 | ## Hardware Requirements 13 | 14 | - Paper's configurations: **8x16GB** NVIDIA V100 GPUs, **2x24** cores Intel 8163 CPU 15 | - For other hardware configurations, you may need to modify the ①Number of GPU. ②Number of CPU threads ③Number of vertex (in percentage, 0<=pct. <=1) to be cached. 16 | - **DGL:** Modify `L66-L67(#GPU)` in `run.py`. 17 | - **FGNN:** Modify `L108(#CPU threads), L118-L153(#GPU, #Cache percentage)` in `run.py`. 18 | - **SGNN:** Modify `L187(#Cache percentage), L190(#GPU)` in `run.py`. 19 | 20 | 21 | 22 | ## Run Command 23 | 24 | 25 | ```sh 26 | > python run.py 27 | ``` 28 | 29 | 30 | 31 | There are several command line arguments: 32 | 33 | - `--num-epoch`: Number of epochs to run per test case. The default value is set to 3 for fast run. In the paper, we set it to 10. 34 | - `--mock`: Show the run command for each test case but not actually run it. 35 | - `--rerun-tests` Rerun the most recently tests. Sometimes not all the test cases run successfully(e.g. cache percentage is too large and leads to OOM). You can adjust the configurations and rerun the tests again. The `--rerun-tests` option only reruns those failed test cases. 36 | 37 | 38 | 39 | ```sh 40 | > python run.py --help 41 | usage: Table 1 Runner [-h] [--num-epoch NUM_EPOCH] [--mock] [--rerun-tests] 42 | 43 | optional arguments: 44 | -h, --help show this help message and exit 45 | --num-epoch NUM_EPOCH 46 | Number of epochs to run per test case 47 | --mock Show the run command for each test case but not actually run it 48 | --rerun-tests Rerun the most recently tests 49 | ``` 50 | 51 | 52 | 53 | 54 | 55 | ## Output Example 56 | 57 | `python run.py` will create a new folder(e.g. `output_2022-01-29_20-10-39`) as result. 58 | 59 | `python run.py --rerun-tests` does not create a new folder and reuse the last created folder. 60 | 61 | ```sh 62 | > tree output_2022-01-29_20-10-39 -L 1 63 | output_2022-01-29_20-10-39 64 | ├── fig14b.eps # Output figure 65 | ├── fig14b-full.res # Output data with comments 66 | ├── fig14b.res # Output data 67 | ├── logs_dgl 68 | ├── logs_fgnn 69 | └── logs_sgnn 70 | 71 | 3 directories, 3 files 72 | ``` 73 | 74 | 75 | 76 | ```sh 77 | > cat output_2022-01-29_20-10-39/fig14b.res 78 | "GPUs" "DGL" "SGNN" "1S" "2S" "3S" 79 | 1 11.86 5.47 - - - 80 | 2 7.07 4.26 1.69 - - 81 | 3 5.65 3.20 0.95 1.70 - 82 | 4 4.88 2.65 0.71 0.97 1.69 83 | ... 84 | ``` 85 | 86 | 87 | 88 | 89 | 90 | ## FAQ 91 | -------------------------------------------------------------------------------- /exp/fig14b/scale-gcn.plt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | 3 | reset 4 | set output outfile 5 | 6 | set terminal postscript "Helvetica,16" eps enhance color dl 2 7 | set pointsize 1 8 | set size 0.4,0.4 9 | set zeroaxis 10 | 11 | set tics font ",14" scale 0.5 12 | 13 | # set rmargin 2 #2 14 | # set lmargin 5 #5.5 15 | # set tmargin 0.5 #1.5 16 | # set bmargin 1 #2.5 17 | 18 | set rmargin 2 19 | set lmargin 5.5 20 | set tmargin 1.5 21 | set bmargin 2.5 22 | 23 | ### Key 24 | set key inside right Right top enhanced nobox 25 | set key samplen 1.5 spacing 1.5 height 0.2 font ',11' noopaque #maxrow 3 #at 7.8, 15 26 | 27 | 28 | ### X-axis 29 | set xrange [1:8] 30 | set xtics 1,1,8 31 | set xlabel "Number of GPUs" offset 0,0.7 32 | set xtics nomirror offset -0.2,0.3 33 | 34 | ## Y-axis 35 | set yrange [0:16] 36 | set ytics 0,4,16 37 | set ylabel "Epoch Time (sec)" offset 1.5,0 38 | set ytics offset 0.5,0 #format "%.1f" #nomirror 39 | 40 | 41 | plot resfile u ($1):($2) t "DGL" w lp lt 1 lw 3 pt 4 ps 1.5 lc rgb '#c00000', \ 42 | resfile u ($1):($3) t "T_{SOTA}" w lp lt 1 lw 3 pt 3 ps 1.5 lc rgb '#ff9900', \ 43 | resfile u ($1):($4) t "FGNN/1S" w lp lt 1 lw 3 pt 6 ps 1.5 lc rgb '#008800', \ 44 | resfile u ($1):($5) t "FGNN/2S" w lp lt 1 lw 3 pt 8 ps 1.5 lc rgb '#00bb00', \ 45 | resfile u ($1):($6) t "FGNN/3S" w lp lt 1 lw 3 pt 2 ps 1.5 lc rgb '#00dd00' 46 | -------------------------------------------------------------------------------- /exp/fig15/scale-break.plt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | 3 | reset 4 | set output outfile 5 | set terminal postscript "Helvetica,16" eps enhance color dl 2 6 | 7 | set pointsize 1 8 | set size 0.8,0.4 9 | set zeroaxis 10 | 11 | set tics font ",14" scale 0.5 12 | 13 | # set rmargin 0 #2 14 | # set lmargin 5 #5.5 15 | # set tmargin 0.5 #1.5 16 | # set bmargin 1 #2.5 17 | 18 | 19 | set style data histogram 20 | set style histogram clustered gap 2 21 | set style fill solid border -1 22 | set boxwidth 0.6 relative 23 | 24 | 25 | ### Key 26 | set key inside right Right top enhanced nobox 27 | set key samplen 1.5 spacing 1.5 height 0.2 width 0 font ',11' #maxrows 1 at graph 0.02, graph 0.975 noopaque 28 | 29 | 30 | ## Y-axis 31 | set ylabel "Runtime (sec)" offset 1.,0 32 | set yrange [0:5] 33 | set ytics 0,1,5 34 | set ytics offset 0.5,0 #format "%.1f" #nomirror 35 | 36 | 37 | ### X-axis 38 | #set xlabel "Number of GPUs" offset 0,0.7 39 | set xrange [0:21] 40 | #set xtics 1,1,8 41 | set xtics nomirror offset -0.2,0.3 rotate by -90 42 | 43 | set arrow from 0, graph -0.3 to 0, graph 0.0 nohead lt 1 lw 2 lc "#000000" front 44 | set arrow from 8, graph -0.3 to 8, graph 1.0 nohead lt 1 lw 2 lc "#000000" front 45 | set arrow from 15, graph -0.3 to 15, graph 1.0 nohead lt 1 lw 2 lc "#000000" front 46 | set arrow from 21, graph -0.3 to 21, graph 0.0 nohead lt 1 lw 2 lc "#000000" front 47 | 48 | set datafile missing "-" 49 | 50 | 51 | plot resfile using ($2):xticlabels(1) t "Sample" w histogram lc rgb "#ff9900",\ 52 | resfile using ($3):xticlabels(1) t "Extract" w histogram lc rgb "#c00000", \ 53 | resfile using ($4):xticlabels(1) t "Train" w histogram lc rgb "#0000ee", \ 54 | resfile using ($5):xticlabels(1) t "FGNN" w lp lt 1 lw 3 pt 6 ps 1.5 lc rgb '#000000', \ 55 | 56 | ##008800 57 | -------------------------------------------------------------------------------- /exp/fig16a/README.md: -------------------------------------------------------------------------------- 1 | # Figure 16a: Convergence Test 2 | 3 | The goal of this experiment is to give a comparison of training time for GraphSAGE to the same accuracy target between GNNLab, TSOTA and DGL on papers100M. 4 | 5 | - `run.sh` is the runner script. 6 | 7 | ## Hardware Requirements 8 | 9 | - Paper's configurations: **8x16GB** NVIDIA V100 GPUs, **2x24** cores Intel 8163 CPU 10 | - For other hardware configurations, you may need to modify the ①Total Number of Epochs ②Number of GPU. ③Number of vertex(in percentage, 0<=pct. <=1) to be cached. 11 | - **DGL**: Modify the arguments `num-epoch`, `devices` of L19 in run.sh. 12 | - **GNNLab**: Modify the arguments `num-epoch`, `num-sample-worker` and `num-train-worker`, `cache-percentage` of L20 in run.sh. 13 | - **TSOTA**: Modify the arguments `num-epoch`, `num-worker` and `cache-percentage` of L21 in run.sh. 14 | 15 | ## Run Command 16 | 17 | 18 | ```sh 19 | > bash run.sh 20 | ``` 21 | 22 | ## Output Example 23 | 24 | `bash run.sh` will create a new folder(e.g. `run_logs/acc_test/one/2022-01-29_15-28-45/`) to store log files. 25 | 26 | ```sh 27 | > tree -L 4 . 28 | . 29 | ├── acc_one.res # the results of all tests 30 | ├── fig16a.plt # drawing script of fig16a 31 | ├── fig16a.eps # fig16a 32 | ├── parse_acc.py # the script to parse the log files 33 | ├── run_logs 34 | │   └── acc_test 35 | │   └── one 36 | │   └── 2022-01-29_15-28-45 # running log files 37 | └── run.sh # the main running script 38 | ``` 39 | 40 | 41 | 42 | ```sh 43 | > cat acc_one.res 44 | system dataset batch_size time acc 45 | dgl papers 8000 2.10 1.11 46 | dgl papers 8000 7.08 20.86 47 | dgl papers 8000 11.85 33.28 48 | # ... 49 | dgl papers 8000 780.10 56.27 50 | dgl papers 8000 783.71 56.55 51 | fgnn papers 8000 1.86 4.19 52 | fgnn papers 8000 2.10 25.99 53 | fgnn papers 8000 2.17 26.37 54 | # ... 55 | 56 | ``` 57 | 58 | 59 | 60 | 61 | ## FAQ 62 | -------------------------------------------------------------------------------- /exp/fig16a/parse_acc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: UTF-8 -*- 3 | import argparse 4 | import re 5 | 6 | def parse_args(): 7 | argparser = argparse.ArgumentParser('Acc Timeline Parser') 8 | argparser.add_argument('-f', '--file', type=str, 9 | help='the log file path to parse') 10 | argparser.add_argument('--system', choices=['dgl', 'fgnn', 'sgnn'], 11 | type=str, help='the system name of this test, like dgl/fgnn/sgnn') 12 | argparser.add_argument('--dataset', choices=['papers', 'products'], 13 | type=str, help='the dataset of this test') 14 | argparser.add_argument('--batch-size', type=int, 15 | help='the batch size of this test') 16 | ret = vars(argparser.parse_args()) 17 | if (ret['file'] == None): 18 | argparser.error('Add --file argument') 19 | if (ret['system'] == None): 20 | argparser.error('Add --system argument') 21 | if (ret['dataset'] == None): 22 | argparser.error('Add --dataset argument') 23 | if (ret['batch_size'] == None): 24 | argparser.error('Add --batch-size argument') 25 | return ret 26 | 27 | def parse_data(file_name, system, dataset, batch_size, 28 | pattern = r'^Valid Acc: (.+)\% \| .* \| Time Cost: (.+)'): 29 | with open(file_name, 'r') as file: 30 | for line in file: 31 | m = re.match(pattern, line) 32 | if m: 33 | # print('{} {}'.format(m.group(1), m.group(2))) 34 | # system(like dgl) dataset batch_size time acc 35 | print('{}\t{}\t{}\t{}\t{}'.format( 36 | system, dataset, batch_size, m.group(2), m.group(1))) 37 | 38 | 39 | if __name__ == '__main__': 40 | arguments = parse_args() 41 | file_name = arguments['file'] 42 | system = arguments['system'] 43 | dataset = arguments['dataset'] 44 | batch_size = arguments['batch_size'] 45 | parse_data(file_name, system, dataset, batch_size) 46 | -------------------------------------------------------------------------------- /exp/fig16a/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | dgl_dir=../../example/dgl/multi_gpu/ 3 | sam_dir=../../example/samgraph/multi_gpu/ 4 | sgnn_dir=../../example/samgraph/sgnn/ 5 | 6 | TIME_STAMPS=$(date "+%Y-%m-%d_%H-%M-%S") 7 | log_dir=./run-logs/acc_test/one/${TIME_STAMPS} 8 | 9 | # TODO: need change these configs 10 | dgl_devices="0 1 2 3 4 5 6 7" 11 | num_sgnn_worker=8 12 | 13 | num_sam_sampler=2 14 | num_sam_trainer=6 15 | 16 | mkdir -p $log_dir 17 | 18 | # papers100M acc: 56% 19 | python ${dgl_dir}/train_graphsage.py --dataset papers100M --pipelining --report-acc 151 --num-epoch 200 --use-gpu-sampling --devices ${dgl_devices} > ${log_dir}/dgl_papers.log 2> ${log_dir}/dgl_papers.err.log 20 | python ${sam_dir}/train_graphsage.py --dataset papers100M --cache-percentage 0.20 --pipeline --report-acc 151 --num-epoch 200 --num-sample-worker ${num_sam_sampler} --num-train-worker ${num_sam_trainer} > ${log_dir}/sam_papers.log 2> ${log_dir}/sam_papers.err.log 21 | python ${sgnn_dir}/train_graphsage.py --dataset papers100M --pipeline --cache-policy degree --cache-percentage 0.11 --report-acc 151 --num-epoch 200 --num-worker ${num_sgnn_worker} > ${log_dir}/sgnn_papers.log 2> ${log_dir}/sgnn_papers.err.log 22 | 23 | num_sam_sampler=4 24 | num_sam_trainer=4 25 | 26 | # products acc: 91% 27 | # python ${dgl_dir}/train_graphsage.py --dataset products --pipelining --report-acc 25 --num-epoch 200 --use-gpu-sampling --devices ${dgl_devices} > ${log_dir}/dgl_products.log 2> ${log_dir}/dgl_products.err.log 28 | # python ${sam_dir}/train_graphsage.py --dataset products --cache-percentage 1.0 --pipeline --report-acc 25 --num-epoch 200 --num-sample-worker ${num_sam_sampler} --num-train-worker ${num_sam_trainer} > ${log_dir}/sam_products.log 2> ${log_dir}/sam_products.err.log 29 | # python ${sgnn_dir}/train_graphsage.py --dataset products --pipeline --cache-policy degree --cache-percentage 1.0 --report-acc 25 --num-epoch 200 --num-worker ${num_sgnn_worker} > ${log_dir}/sgnn_products.log 2> ${log_dir}/sgnn_products.err.log 30 | 31 | 32 | # parse data 33 | touch acc_one.res 34 | echo -e "system\tdataset\tbatch_size\ttime\tacc" >> acc_one.res 35 | python ./parse_acc.py -f ${log_dir}/dgl_papers.log --system dgl --dataset papers --batch-size 8000 >> acc_one.res 36 | python ./parse_acc.py -f ${log_dir}/sam_papers.log --system fgnn --dataset papers --batch-size 8000 >> acc_one.res 37 | python ./parse_acc.py -f ${log_dir}/sgnn_papers.log --system sgnn --dataset papers --batch-size 8000 >> acc_one.res 38 | 39 | # python ./parse_acc.py -f ${log_dir}/dgl_products.log --system dgl --dataset products --batch-size 8000 >> acc_one.res 40 | # python ./parse_acc.py -f ${log_dir}/sam_products.log --system fgnn --dataset products --batch-size 8000 >> acc_one.res 41 | # python ./parse_acc.py -f ${log_dir}/sgnn_products.log --system sgnn --dataset products --batch-size 8000 >> acc_one.res 42 | 43 | # gnuplot 44 | gnuplot ./fig16a.plt 45 | -------------------------------------------------------------------------------- /exp/fig17a/README.md: -------------------------------------------------------------------------------- 1 | # Figure 17a: Dynamic Swtching Test 2 | 3 | The goal of this experiment is to get the runtime of one epoch in GNNLab w/ and w/o dynamic switching for training PinSAGE on Papers100M. 4 | 5 | - `run.sh` is the runner script. 6 | 7 | ## Hardware Requirements 8 | 9 | - Paper's configurations: **8x16GB** NVIDIA V100 GPUs, **2x24** cores Intel 8163 CPU 10 | - For other hardware configurations, you may need to modify the ①Number of vertex(in percentage, 0<=pct. <=1) to be cached. ②Number of GPU. 11 | - **Original GNNLab with async training**: Modify the arguments `cache-percentage`, `num-sample-worker` and `num-train-worker` of L11-L17 in run.sh. 12 | - **GNNLab with dynamic switching**: Modify the arguments `cache-percentage`(cache percentage for trainer) and `switch-cache-percentage`(cache percentage for switcher), `num-sample-worker` and `num-train-worker` of L20-L26 in run.sh. 13 | 14 | ## Run Command 15 | 16 | 17 | ```sh 18 | > bash run.sh 19 | ``` 20 | 21 | ## Output Example 22 | 23 | `bash run.sh` will create a new folder(e.g. `run-logs/switch/2022-01-30_12-51-05/`) to store log files. 24 | 25 | ```sh 26 | > tree -L 3 . 27 | . 28 | ├── fig17a.dat # the results of this test 29 | ├── fig17a.eps # fig16a 30 | ├── fig17a.plt # drawing script of fig16a 31 | ├── run-logs 32 | │   └── switch 33 | │   └── 2022-01-30_12-51-05 # running log files 34 | └── run.sh # the main running script 35 | 36 | ``` 37 | 38 | 39 | 40 | ```sh 41 | > cat fig17a.dat 42 | Config "w/o DS" "w/ DS" 43 | "1S 1T" 6.48 3.8678 44 | "1S 2T" 3.29 2.5552 45 | # ... 46 | 47 | ``` 48 | 49 | ## FAQ 50 | -------------------------------------------------------------------------------- /exp/fig17a/fig17a.plt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gnuplot 2 | 3 | reset 4 | set output "fig17a.eps" 5 | set terminal postscript "Helvetica,16" eps enhance color dl 2 6 | 7 | set pointsize 1 8 | set size 0.4,0.4 9 | set nozeroaxis 10 | 11 | set tics font ",14" scale 0.5 12 | 13 | set rmargin 1 #2 14 | set lmargin 5 #5.5 15 | set tmargin 0.5 #1.5 16 | set bmargin 1 #2.5 17 | 18 | 19 | set style data histogram 20 | set style histogram clustered gap 2 21 | set style fill solid border -2 22 | set boxwidth 0.6 relative 23 | 24 | 25 | ### Key 26 | set key inside right Right top enhanced nobox 27 | set key samplen 1.5 spacing 1.5 height 0.2 width 0 autotitles columnhead font ',13' #maxrows 1 at graph 0.02, graph 0.975 noopaque 28 | 29 | 30 | ## Y-axis 31 | set ylabel "Epoch Time (sec)" offset 1.,0 32 | set yrange [0:8] 33 | set ytics 0,2,8 34 | set ytics offset 0.5,0 #format "%.1f" #nomirror 35 | 36 | 37 | ### X-axis 38 | #set xlabel "Number of GPUs" offset 0,0.7 39 | set xrange [-0.5:6.5] 40 | #set xtics 1,1,8 41 | set xtics nomirror offset -0.2,0.3 rotate by -90 42 | 43 | # set arrow from 0, graph -0.3 to 0, graph 0.0 nohead lt 1 lw 2 lc "#000000" front 44 | # set arrow from 8, graph -0.3 to 8, graph 1.0 nohead lt 1 lw 2 lc "#000000" front 45 | # set arrow from 15, graph -0.3 to 15, graph 1.0 nohead lt 1 lw 2 lc "#000000" front 46 | # set arrow from 21, graph -0.3 to 21, graph 0.0 nohead lt 1 lw 2 lc "#000000" front 47 | 48 | set datafile missing "-" 49 | 50 | 51 | plot "fig17a.dat" using ($2):xticlabels(1) t "w/o DS" w histogram lc rgb "#c00000",\ 52 | "fig17a.dat" using ($3):xticlabels(1) t "w/ DS" w histogram lc rgb "#008800", \ 53 | 54 | ##008800 55 | -------------------------------------------------------------------------------- /exp/fig17b/README.md: -------------------------------------------------------------------------------- 1 | # Figure 17b: Performance Test on a Single GPU 2 | 3 | The goal of this experiment is to get the end-to-end performance between DGL, TSOTA and GNNLab over a single GPU. 4 | 5 | - `run.sh` is the runner script. 6 | 7 | ## Hardware Requirements 8 | 9 | - Paper's configurations: **8x16GB** NVIDIA V100 GPUs, **2x24** cores Intel 8163 CPU 10 | - For other hardware configurations, you may need to modify the ①Number of vertex(in percentage, 0<=pct. <=1) to be cached. 11 | - **GNNLab**: Modify the arguments `cache-percentage` of L27-L37 in run.sh. 12 | - **TSOTA**: Modify the arguments `cache-percentage` of L40-L50 in run.sh. 13 | 14 | ## Run Command 15 | 16 | 17 | ```sh 18 | > bash run.sh 19 | ``` 20 | 21 | ## Output Example 22 | 23 | `bash run.sh` will create a new folder(e.g. `run-logs/single/2022-01-30_02-23-22`) to store log files. 24 | 25 | ```sh 26 | > tree -L 3 . 27 | . 28 | ├── fig17b.dat # the results of this test 29 | ├── fig17b.eps # fig17b 30 | ├── fig17b.plt # drawing script of fig17b 31 | ├── run-logs 32 | │   └── single 33 | │   └── 2022-01-30_02-23-22 # running log files 34 | └── run.sh # the main running script 35 | 36 | ``` 37 | 38 | 39 | 40 | ```sh 41 | > cat fig17b.dat 42 | dataset dgl fgnn sgnn app 43 | PR 4.47 1.62 1.52 GCN 44 | TW 11.74 2.72 4.14 GCN 45 | PA 16.17 4.97 9.42 GCN 46 | PR 2.29 .47 0.43 GraphSAGE 47 | # ... 48 | 49 | ``` 50 | 51 | ## FAQ 52 | -------------------------------------------------------------------------------- /exp/fig17b/fig17b.plt: -------------------------------------------------------------------------------- 1 | outputfname = "fig17b.eps" 2 | dat_file='fig17b.dat' 3 | 4 | set datafile sep '\t' 5 | 6 | set terminal postscript "Helvetica,16" eps enhance color dl 2 7 | set style data histogram 8 | 9 | set style histogram clustered gap 2 10 | set style fill solid border -2 11 | set pointsize 1 12 | set size 0.5,0.6 13 | set boxwidth 0.6 relative 14 | # set no zeroaxis 15 | 16 | set tics font ",14" scale 0.5 17 | 18 | set rmargin 1 19 | set lmargin 6 20 | set tmargin 0.5 21 | set bmargin 3.5 22 | 23 | set output outputfname 24 | 25 | ### Key 26 | set key inside right Right top enhanced nobox autotitles columnhead 27 | set key samplen 1.5 spacing 1.5 height 0.2 width 0.5 font ',13' #at graph 1, graph 0.975 noopaque 28 | 29 | set xrange [-.5:8.5] 30 | set xtics nomirror offset -0.2,0.3 31 | 32 | set arrow from 0-0.4,graph -0.11 to 2.3,graph -0.11 nohead lt 2 lw 1 lc "#000000" front 33 | set arrow from 3-0.4,graph -0.11 to 5.3,graph -0.11 nohead lt 2 lw 1 lc "#000000" front 34 | set arrow from 6-0.4,graph -0.11 to 8.3,graph -0.11 nohead lt 2 lw 1 lc "#000000" front 35 | set label "GCN" center at 1, graph -0.18 font ",14" tc rgb "#000000" front 36 | set label "GraphSAGE" center at 4, graph -0.18 font ",14" tc rgb "#000000" front 37 | set label "PinSAGE" center at 7, graph -0.18 font ",14" tc rgb "#000000" front 38 | 39 | set arrow from graph 0, first 100 to graph 1, first 100 nohead lt 1 lw 1 lc "#000000" front 40 | 41 | ## Y-axis 42 | set ylabel "Epoch Time (sec)" offset 1.2,0 43 | set yrange [0:20] 44 | set ytics 5 45 | set ytics offset 0.5,0 #format "%.1f" #nomirror 46 | 47 | # ^((?!PR).)*$ 48 | plot dat_file using 2:xticlabels(1) lc "#c00000" title "DGL" \ 49 | ,dat_file using 4:xticlabels(1) lc "#ff9900" title "T_{SOTA}" \ 50 | ,dat_file using 3:xticlabels(1) lc "#008800" title "GNNLab" \ 51 | -------------------------------------------------------------------------------- /exp/fig4a/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | 6 | selected_col = ['cache_policy', 'cache_percentage'] 7 | selected_col += ['dataset_short', 'sample_type', 'app'] 8 | selected_col += ['hit_percent', 'batch_copy_time', 'batch_train_time'] 9 | 10 | if __name__ == '__main__': 11 | with open(f'data.dat', 'w') as f: 12 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) -------------------------------------------------------------------------------- /exp/fig4a/readme.md: -------------------------------------------------------------------------------- 1 | # Figure 4a: Impact of cache ratio 2 | 3 | The goal of this experiment is to show how cache ratio affects feature extraction(i.e. cache hit rate, extraction time). 4 | This proves that by enabling GPU-based sampling, the reduced cache ratio leads to significant slowdown in feature extraction. 5 | 6 | `runner.py` runs all necessary tests and redirect logs to directory `run-logs`. 7 | `parser.py` parses results from log files and generate `data.dat`. 8 | `plot.plt` plots corresponding figure to `fig4a.eps`. 9 | 10 | ## Hardware Requirements 11 | 12 | - Paper's configurations: Two 16GB NVIDIA V100 GPUs 13 | - For other hardware configurations, you may need to modify the cache percentage 14 | - Modify `L29` in `runner.py`. `percent_gen(0, 30, 1)` means run test from cache ratio 0% to 30% with step=1%. 15 | 16 | ## Run Command 17 | 18 | ```sh 19 | > python runner.py 20 | > python parser.py 21 | > gnuplot plot.plt 22 | ``` 23 | 24 | There are serveral command line arguments for `runner.py`: 25 | 26 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 27 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 28 | 29 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 30 | 31 | 32 | ## Output Example 33 | 34 | `python runner.py` will redirect all logs to `run-logs` directory. 35 | ```sh 36 | > cat data.dat 37 | cache_policy cache_percentage dataset_short sample_type app hit_percent batch_copy_time batch_train_time 38 | degree 0.0 PA kKHop2 gcn 0.000 0.0411 0.0279 39 | degree 5.0 PA kKHop2 gcn 28.920 0.0309 0.0297 40 | degree 10.0 PA kKHop2 gcn 49.570 0.0229 0.0295 41 | degree 15.0 PA kKHop2 gcn 63.960 0.0171 0.0293 42 | degree 20.0 PA kKHop2 gcn 75.120 0.0126 0.0293 43 | degree 25.0 PA kKHop2 gcn 82.890 0.0102 0.0299 44 | degree 30.0 PA kKHop2 gcn 88.410 0.0075 0.0301 45 | ``` 46 | 47 | ## FAQ -------------------------------------------------------------------------------- /exp/fig4a/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | def copy_optimal(cfg: RunConfig): 9 | os.system(f"rm -f \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 10 | os.system(f"mv node_access_optimal_cache_hit* \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 11 | os.system(f"rm -f node_access_optimal_cache_*") 12 | 13 | cur_common_base = (ConfigList() 14 | .override('app', [App.gcn]) 15 | .override('sample_type', [SampleType.kKHop2]) 16 | .override('dataset', [Dataset.papers100M]) 17 | .override('cache_policy', [CachePolicy.cache_by_degree]) 18 | .override('copy_job', [1]) 19 | .override('sample_job', [1]) 20 | .override('pipeline', [False]) 21 | .override('epoch', [3]) 22 | .override('logdir', ['run-logs',]) 23 | .override('profile_level', [3]) 24 | .override('log_level', ['error']) 25 | .override('multi_gpu', [True])) 26 | 27 | cfg_list_collector = ConfigList.Empty() 28 | cfg_list_collector.concat(cur_common_base.copy() 29 | .override('cache_percent', percent_gen(0, 30, 1)) 30 | .override('cache_policy', [ 31 | CachePolicy.cache_by_degree,]) 32 | ) 33 | 34 | if __name__ == '__main__': 35 | from sys import argv 36 | for arg in argv[1:]: 37 | if arg == '-m' or arg == '--mock': 38 | do_mock = True 39 | elif arg == '-i' or arg == '--interactive': 40 | durable_log = False 41 | 42 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log) 43 | cur_common_base.override('arch', [Arch.arch3]).override('multi_gpu', [False]).override('report_optimal', [1]).override('cache_percent', [0]) 44 | run_in_list(cur_common_base.conf_list, do_mock, durable_log, copy_optimal) 45 | -------------------------------------------------------------------------------- /exp/fig4b/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | import pandas 6 | 7 | selected_col = ['cache_policy', 'cache_percentage'] 8 | selected_col += ['dataset_short', 'sample_type', 'app'] 9 | selected_col += ['hit_percent', 'batch_feat_nbytes', 'batch_miss_nbytes'] 10 | 11 | if __name__ == '__main__': 12 | with open(f'data.dat', 'w') as f: 13 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) 14 | 15 | with open(f'data.dat', 'r') as f: 16 | table = pandas.read_csv(f, sep='\t') 17 | # by fixing cache size to 5GB, we may calculate performance under different dimension by varying cache rate 18 | # 54228 / 128 * new_dim * cache_rate/100 = 5120 19 | # thus we have: cache_rate = 5120 * 128 / cache_rate / 54228 * 100 20 | table['dim'] = 5120 * 128 / table['cache_percentage'] / 54228 * 100 21 | table['new_copy_GB'] = (100 - table['hit_percent'])/100 * table['batch_feat_nbytes'] / 128 * table['dim'] / 1024/1024/1024 22 | 23 | with open(f'data.dat', 'w') as f: 24 | table.to_csv(f, sep='\t', index=None) -------------------------------------------------------------------------------- /exp/fig4b/plot.plt: -------------------------------------------------------------------------------- 1 | outputfname = "fig4b.eps" 2 | src_file='data.dat' 3 | 4 | set datafile sep '\t' 5 | 6 | set terminal postscript "Helvetica,16 " eps enhance color dl 2 7 | set size 0.4,0.4 8 | set zeroaxis 9 | 10 | set output outputfname 11 | 12 | 13 | set key inside left Left reverse top enhanced nobox 14 | set key samplen 1 spacing 1.2 height 0.2 font ',13' at 120, 95 noopaque 15 | 16 | set rmargin 5.5 17 | set lmargin 5.5 18 | set tmargin 1.5 19 | set bmargin 2.5 20 | set tics font ",14" scale 0.5 21 | 22 | ########################################################################################## 23 | 24 | set xlabel "Feature Dim" offset 0,0.7 25 | set xrange [50:950] 26 | set xtics 100,200,900 offset 0,0.3 27 | set xtics nomirror 28 | 29 | set ylabel "Cache Hit Rate (%)" offset 1.7,0 30 | set yrange [0:100] 31 | set ytics 20 offset 0.5,0 32 | set ytics nomirror 33 | 34 | set y2label "Transfer Size (GB)" offset -1.7,0 35 | set y2range[0:2] 36 | set y2tics 0.5 format "%.1f" offset -0.5,0 37 | set y2tics nomirror 38 | 39 | set arrow 1 from 128, 0 to 128, 100 nohead lt 2 lw 3 lc rgb "#000000" back 40 | set arrow 2 from 600, 0 to 602, 100 nohead lt 2 lw 3 lc rgb "#000000" back 41 | 42 | set label 21 "OGB-Papers" left at 50, 108 font ",14" tc rgb "#000000" front 43 | set label 22 "Reddit[22]=602" left at 500, 108 font ",14" tc rgb "#000000" front 44 | 45 | NonZero(t)=(t == 0 ? NaN : t) 46 | 47 | plot src_file using 9:6 w l lw 3 lc "#c00000" title "Hit Rate" \ 48 | ,src_file using 9:10 w l lw 3 lc "#0000ee" title "Data Size" axis x1y2\ 49 | # ,src_file using 1:2 w l lw 5 title "Cache Rate" smooth bezier \ 50 | # ,src_file using 1:2 w l lc 0 title "Train Time" axis x1y2 \ 51 | 52 | -------------------------------------------------------------------------------- /exp/fig4b/readme.md: -------------------------------------------------------------------------------- 1 | # Figure 4b: Impact of feature dimension 2 | 3 | The goal of this experiment is to show that, under fixed cache space, how the dimension of feature affects feature extraction(i.e. cache hit rate, extraction time). 4 | 5 | `runner.py` runs all necessary tests and redirect logs to directory `run-logs`. 6 | `parser.py` parses results from log files and generate `data.dat`. 7 | `plot.plt` plots corresponding figure to `fig4b.eps`. 8 | 9 | This test is an simulation. 10 | By rerunning tests in figure 4a, we have the relationship between cache ratio and hit rate. 11 | Now we can calculate that, given a 5GB(5120MB) cache, if we could only cache X% features of papers100M dataset, what is the dimension of it. 12 | Since the original dimension is 128 and the original feature is 54228MB, we have this equation: 13 | 14 | $$ 15 | \frac{54228}{128} * new_dimension * \frac{X}{100} = 5120 16 | 17 | new_dimension = 5120 * \frac{128}{54228} * \frac{100}{X} 18 | $$ 19 | 20 | And the transfer size in a batch is 21 | $$ 22 | new_miss_size = \frac{100 - hit_percent}{100} * original_miss_size * \frac{new_dimension}{128} 23 | $$ 24 | 25 | ## Hardware Requirements 26 | 27 | - Paper's configurations: Two 16GB NVIDIA V100 GPUs 28 | - For other hardware configurations, you may need to modify the cache percentage 29 | - Modify `L29` in `runner.py`. `percent_gen(0, 30, 1)` means run test from cache ratio 0% to 30% with step=1%. 30 | 31 | ## Run Command 32 | 33 | ```sh 34 | > python runner.py 35 | > python parser.py 36 | > gnuplot plot.plt 37 | ``` 38 | 39 | There are serveral command line arguments for `runner.py`: 40 | 41 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 42 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 43 | 44 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 45 | 46 | 47 | ## Output Example 48 | 49 | `python runner.py` will redirect all logs to `run-logs` directory. 50 | ```sh 51 | > cat data.dat 52 | cache_policy cache_percentage dataset_short sample_type app hit_percent batch_feat_nbytes batch_miss_nbytes dim new_copy_GB 53 | degree 0.0 PA kKHop2 gcn 0.0 290906671.4702 290906671.4702 inf inf 54 | degree 5.0 PA kKHop2 gcn 28.93 290975260.8212 206806790.7815 241.7053920483883 0.36367954993908674 55 | degree 10.0 PA kKHop2 gcn 49.57 290975589.7219 146735831.3113 120.85269602419415 0.12903039562581198 56 | degree 15.0 PA kKHop2 gcn 63.96 290926724.2384 104853159.8411 80.56846401612943 0.061464399626513115 57 | degree 20.0 PA kKHop2 gcn 75.12 290952203.8675 72392237.7748 60.42634801209707 0.03182647463469112 58 | degree 25.0 PA kKHop2 gcn 82.9 290943272.6887 49754539.2318 48.341078409677664 0.017498907082961285 59 | degree 30.0 PA kKHop2 gcn 88.4 290989422.1987 33745753.8543 40.28423200806471 0.009893738794973135 60 | 61 | ``` 62 | 63 | ## FAQ -------------------------------------------------------------------------------- /exp/fig4b/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | def copy_optimal(cfg: RunConfig): 9 | os.system(f"rm -f \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 10 | os.system(f"mv node_access_optimal_cache_hit* \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 11 | os.system(f"rm -f node_access_optimal_cache_*") 12 | 13 | cur_common_base = (ConfigList() 14 | .override('app', [App.gcn]) 15 | .override('sample_type', [SampleType.kKHop2]) 16 | .override('dataset', [Dataset.papers100M]) 17 | .override('cache_policy', [CachePolicy.cache_by_degree]) 18 | .override('copy_job', [1]) 19 | .override('sample_job', [1]) 20 | .override('pipeline', [False]) 21 | .override('epoch', [3]) 22 | .override('logdir', ['run-logs',]) 23 | .override('profile_level', [3]) 24 | .override('log_level', ['error']) 25 | .override('multi_gpu', [True])) 26 | 27 | cfg_list_collector = ConfigList.Empty() 28 | cfg_list_collector.concat(cur_common_base.copy() 29 | .override('cache_percent', percent_gen(0, 30, 1)) 30 | .override('cache_policy', [ 31 | CachePolicy.cache_by_degree,]) 32 | ) 33 | 34 | if __name__ == '__main__': 35 | from sys import argv 36 | for arg in argv[1:]: 37 | if arg == '-m' or arg == '--mock': 38 | do_mock = True 39 | elif arg == '-i' or arg == '--interactive': 40 | durable_log = False 41 | 42 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log) 43 | cur_common_base.override('arch', [Arch.arch3]).override('multi_gpu', [False]).override('report_optimal', [1]).override('cache_percent', [0]) 44 | run_in_list(cur_common_base.conf_list, do_mock, durable_log, copy_optimal) 45 | -------------------------------------------------------------------------------- /exp/fig5a/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | 6 | selected_col = ['cache_policy', 'cache_percentage'] 7 | selected_col += ['dataset_short', 'sample_type', 'app'] 8 | selected_col += ['hit_percent', 'optimal_hit_percent', 'batch_miss_nbytes', 'batch_feat_nbytes'] 9 | 10 | if __name__ == '__main__': 11 | with open(f'data.dat', 'w') as f: 12 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) 13 | 14 | -------------------------------------------------------------------------------- /exp/fig5a/readme.md: -------------------------------------------------------------------------------- 1 | # Figure 5: Gap between degree-based policy and optimal. 2 | 3 | The goal of this experiment is to show the gap between degree-based policy and optimal. 4 | 5 | `runner.py` runs all necessary tests and redirect logs to directory `run-logs`. 6 | `parser.py` parses results from log files and generate `data.dat`. 7 | `plot.plt` plots corresponding figure to `fig5a.eps`. 8 | 9 | degree-based results is done by running same tests like figure 4a, while the results of optimal requires an extra test and is calculated by profiling each batch's access(corresponding log file looks like `run-logs/report_optimal_..._optimal_cache_hit.txt`) 10 | 11 | Fig 5a & 5b is similar, while 5a uses 3hop neighbour sampling on papers100M dataset, and 5b uses 3hop weighted sampling on twitter dataset. 12 | 13 | ## Hardware Requirements 14 | 15 | - Paper's configurations: Two 16GB NVIDIA V100 GPUs 16 | - For other hardware configurations, you may need to modify the cache percentage 17 | - Modify `L29` in `runner.py`. `percent_gen(0, 30, 1)` means run test from cache ratio 0% to 30% with step=1%. 18 | 19 | ## Run Command 20 | 21 | ```sh 22 | > python runner.py 23 | > python parser.py 24 | > gnuplot plot.plt 25 | ``` 26 | 27 | There are serveral command line arguments for `runner.py`: 28 | 29 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 30 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 31 | 32 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 33 | 34 | 35 | ## Output Example 36 | 37 | `python runner.py` will redirect all logs to `run-logs` directory. An short example of the `data.dat` looks like this: 38 | ```sh 39 | > cat data.dat 40 | cache_policy cache_percentage dataset_short sample_type app hit_percent optimal_hit_percent batch_miss_nbytes batch_feat_nbytes 41 | degree 0.0 PA kKHop2 gcn 0.000 0.000 290971183.4702 290971183.4702 42 | degree 5.0 PA kKHop2 gcn 28.930 97.166 206796937.3245 290974516.5563 43 | degree 10.0 PA kKHop2 gcn 49.570 100.000 146735258.2781 290975610.0662 44 | degree 15.0 PA kKHop2 gcn 63.960 100.000 104867731.4967 290967316.3444 45 | degree 20.0 PA kKHop2 gcn 75.110 100.000 72411585.2715 290979756.9272 46 | degree 25.0 PA kKHop2 gcn 82.900 100.000 49750253.351 290937805.1391 47 | degree 30.0 PA kKHop2 gcn 88.400 100.000 33741050.9139 290962416.7417 48 | 49 | ``` 50 | 51 | ## FAQ -------------------------------------------------------------------------------- /exp/fig5a/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | def copy_optimal(cfg: RunConfig): 9 | os.system(f"rm -f \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 10 | os.system(f"mv node_access_optimal_cache_hit* \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 11 | os.system(f"rm -f node_access_optimal_cache_*") 12 | 13 | cur_common_base = (ConfigList() 14 | .override('app', [App.gcn]) 15 | .override('sample_type', [SampleType.kKHop2]) 16 | .override('dataset', [Dataset.papers100M]) 17 | .override('cache_policy', [CachePolicy.cache_by_degree]) 18 | .override('copy_job', [1]) 19 | .override('sample_job', [1]) 20 | .override('pipeline', [False]) 21 | .override('epoch', [3]) 22 | .override('logdir', ['run-logs',]) 23 | .override('profile_level', [3]) 24 | .override('log_level', ['error']) 25 | .override('multi_gpu', [True])) 26 | 27 | cfg_list_collector = ConfigList.Empty() 28 | cfg_list_collector.concat(cur_common_base.copy() 29 | .override('cache_percent', percent_gen(0, 30, 1)) 30 | .override('cache_policy', [ 31 | CachePolicy.cache_by_degree,]) 32 | ) 33 | 34 | if __name__ == '__main__': 35 | from sys import argv 36 | for arg in argv[1:]: 37 | if arg == '-m' or arg == '--mock': 38 | do_mock = True 39 | elif arg == '-i' or arg == '--interactive': 40 | durable_log = False 41 | 42 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log) 43 | cur_common_base.override('arch', [Arch.arch3]).override('multi_gpu', [False]).override('report_optimal', [1]).override('cache_percent', [0]) 44 | run_in_list(cur_common_base.conf_list, do_mock, durable_log, copy_optimal) 45 | -------------------------------------------------------------------------------- /exp/fig5b/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | 6 | selected_col = ['cache_policy', 'cache_percentage'] 7 | selected_col += ['dataset_short', 'sample_type', 'app'] 8 | selected_col += ['hit_percent', 'optimal_hit_percent', 'batch_miss_nbytes', 'batch_feat_nbytes'] 9 | 10 | if __name__ == '__main__': 11 | with open(f'data.dat', 'w') as f: 12 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) 13 | -------------------------------------------------------------------------------- /exp/fig5b/readme.md: -------------------------------------------------------------------------------- 1 | # Figure 5: Gap between degree-based policy and optimal. 2 | 3 | The goal of this experiment is to show the gap between degree-based policy and optimal. 4 | 5 | `runner.py` runs all necessary tests and redirect logs to directory `run-logs`. 6 | `parser.py` parses results from log files and generate `data.dat`. 7 | `plot.plt` plots corresponding figure to `fig5b.eps`. 8 | 9 | degree-based results is done by running same tests like figure 4a, while the results of optimal requires an extra test and is calculated by profiling each batch's access(corresponding log file looks like `run-logs/report_optimal_..._optimal_cache_hit.txt`) 10 | 11 | Fig 5a & 5b is similar, while 5a uses 3hop neighbour sampling on papers100M dataset, and 5b uses 3hop weighted sampling on twitter dataset. 12 | 13 | ## Hardware Requirements 14 | 15 | - Paper's configurations: Two 16GB NVIDIA V100 GPUs 16 | - For other hardware configurations, you may need to modify the cache percentage 17 | - Modify `L29` in `runner.py`. `percent_gen(0, 35, 1)` means run test from cache ratio 0% to 35% with step=1%. 18 | 19 | ## Run Command 20 | 21 | ```sh 22 | > python runner.py 23 | > python parser.py 24 | > gnuplot plot.plt 25 | ``` 26 | 27 | There are serveral command line arguments for `runner.py`: 28 | 29 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 30 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 31 | 32 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 33 | 34 | 35 | ## Output Example 36 | 37 | `python runner.py` will redirect all logs to `run-logs` directory. An short example of the `data.dat` looks like this: 38 | ```sh 39 | > cat data.dat 40 | cache_policy cache_percentage dataset_short sample_type app hit_percent optimal_hit_percent batch_miss_nbytes batch_feat_nbytes 41 | degree 0.0 TW kWeightedKHopPrefix gcn 0.000 0.000 344889073.5094 344889073.5094 42 | degree 5.0 TW kWeightedKHopPrefix gcn 42.920 59.066 196767260.9811 344712153.3585 43 | degree 10.0 TW kWeightedKHopPrefix gcn 48.380 74.328 177905789.5849 344619288.1509 44 | degree 15.0 TW kWeightedKHopPrefix gcn 50.960 84.666 169084155.1698 344796208.3019 45 | degree 20.0 TW kWeightedKHopPrefix gcn 52.450 91.498 163933106.717 344790730.8679 46 | degree 25.0 TW kWeightedKHopPrefix gcn 53.580 95.388 160116504.1509 344915755.4717 47 | degree 30.0 TW kWeightedKHopPrefix gcn 54.520 99.279 156894604.0755 344983271.8491 48 | degree 35.0 TW kWeightedKHopPrefix gcn 55.300 100.000 154118105.3585 344808979.3208 49 | 50 | 51 | ``` 52 | 53 | ## FAQ -------------------------------------------------------------------------------- /exp/fig5b/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | def copy_optimal(cfg: RunConfig): 9 | os.system(f"rm -f \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 10 | os.system(f"mv node_access_optimal_cache_hit* \"{cfg.get_log_fname()}_optimal_cache_hit.txt\"") 11 | os.system(f"rm -f node_access_optimal_cache_*") 12 | 13 | cur_common_base = (ConfigList() 14 | .override('app', [App.gcn]) 15 | .override('sample_type', [SampleType.kWeightedKHopPrefix]) 16 | .override('dataset', [Dataset.twitter]) 17 | .override('cache_policy', [CachePolicy.cache_by_degree]) 18 | .override('copy_job', [1]) 19 | .override('sample_job', [1]) 20 | .override('pipeline', [False]) 21 | .override('epoch', [3]) 22 | .override('logdir', ['run-logs',]) 23 | .override('profile_level', [3]) 24 | .override('log_level', ['error']) 25 | .override('multi_gpu', [True])) 26 | 27 | cfg_list_collector = ConfigList.Empty() 28 | cfg_list_collector.concat(cur_common_base.copy() 29 | .override('cache_percent', percent_gen(0, 35, 1)) 30 | .override('cache_policy', [ 31 | CachePolicy.cache_by_degree,]) 32 | ) 33 | 34 | if __name__ == '__main__': 35 | from sys import argv 36 | for arg in argv[1:]: 37 | if arg == '-m' or arg == '--mock': 38 | do_mock = True 39 | elif arg == '-i' or arg == '--interactive': 40 | durable_log = False 41 | 42 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log) 43 | cur_common_base.override('arch', [Arch.arch3]).override('multi_gpu', [False]).override('report_optimal', [1]).override('cache_percent', [0]) 44 | run_in_list(cur_common_base.conf_list, do_mock, durable_log, copy_optimal) 45 | -------------------------------------------------------------------------------- /exp/table1/logtable_def.py: -------------------------------------------------------------------------------- 1 | """ 2 | Log table definition 3 | """ 4 | import os 5 | import sys 6 | 7 | sys.path.append(os.path.join(os.getcwd(), '../common')) 8 | from runner_helper2 import * 9 | 10 | def get_dgl_logtable(): 11 | return LogTable( 12 | num_row=2, 13 | num_col=4 14 | ).update_col_definition( 15 | col_id=0, 16 | definition='sample_time' 17 | ).update_col_definition( 18 | col_id=1, 19 | definition='copy_time' 20 | ).update_col_definition( 21 | col_id=2, 22 | definition='train_time' 23 | ).update_col_definition( 24 | col_id=3, 25 | definition='epoch_time' 26 | ).update_row_definition( 27 | row_id=0, 28 | col_range=[0, 3], 29 | BOOL_use_gpu_sampling='no_use_gpu_sampling' 30 | ).update_row_definition( 31 | row_id=1, 32 | col_range=[0, 3], 33 | BOOL_use_gpu_sampling='use_gpu_sampling' 34 | ).create() 35 | 36 | 37 | def get_sgnn_logtable(): 38 | return LogTable( 39 | num_row=4, 40 | num_col=6 41 | ).update_col_definition( 42 | col_id=0, 43 | definition='epoch_time:sample_time' 44 | ).update_col_definition( 45 | col_id=1, 46 | definition='epoch_time:copy_time' 47 | ).update_col_definition( 48 | col_id=2, 49 | definition='epoch_time:train_total' 50 | ).update_col_definition( 51 | col_id=3, 52 | definition='epoch_time:total' 53 | ).update_col_definition( 54 | col_id=4, 55 | definition='cache_percentage' 56 | ).update_col_definition( 57 | col_id=5, 58 | definition='cache_hit_rate' 59 | ).update_row_definition( 60 | row_id=0, 61 | col_range=[0, 5], 62 | arch='arch0', 63 | cache_percentage=0 64 | ).update_row_definition( 65 | row_id=1, 66 | col_range=[0, 5], 67 | arch='arch0', 68 | cache_percentage=0.20 69 | ).update_row_definition( 70 | row_id=2, 71 | col_range=[0, 5], 72 | arch='arch2', 73 | cache_percentage=0 74 | ).update_row_definition( 75 | row_id=3, 76 | col_range=[0, 5], 77 | arch='arch2', 78 | cache_percentage=0.07 79 | ).create() 80 | -------------------------------------------------------------------------------- /exp/table2/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | import pandas 6 | 7 | selected_col = [] 8 | selected_col += ['dataset_short', 'sample_type', 'app'] 9 | selected_col += ['node_access:epoch_similarity'] 10 | 11 | if __name__ == '__main__': 12 | with open(f'data.dat', 'w') as f: 13 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) 14 | 15 | with open(f'data.dat', 'r') as f, open('table2.dat', 'w') as table2: 16 | a = pandas.read_csv(f, sep="\t") 17 | a = a.pivot_table(values=['node_access:epoch_similarity'], columns=['dataset_short'], index=['sample_type', 'app']) 18 | a = a[[('node_access:epoch_similarity', 'PR'), ('node_access:epoch_similarity', 'TW'), ('node_access:epoch_similarity', 'PA'), ('node_access:epoch_similarity', 'UK')]] 19 | print(a, file=table2) -------------------------------------------------------------------------------- /exp/table2/readme.md: -------------------------------------------------------------------------------- 1 | # Table 2: Similarity of hot nodes across epochs 2 | 3 | The goal of this experiment is to show that, the hottest nodes in different epoch is similar. The definition of similarity is in paper. 4 | 5 | `runner.py` runs all necessary tests and redirect logs to directory `run-logs`. 6 | `parser.py` parses results from log files and generate `data.dat`. 7 | 8 | ## Hardware Requirements 9 | 10 | - Paper's configurations: Two 16GB NVIDIA V100 GPUs 11 | 12 | ## Run Command 13 | 14 | ```sh 15 | > python runner.py 16 | > python parser.py 17 | ``` 18 | 19 | There are serveral command line arguments for `runner.py`: 20 | 21 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 22 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 23 | 24 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 25 | 26 | 27 | ## Output Example 28 | 29 | ```sh 30 | > cat table2.dat 31 | node_access:epoch_similarity 32 | dataset_short PR TW PA UK 33 | sample_type app 34 | kKHop2 gcn 73.924075 78.884412 91.303635 77.457764 35 | kRandomWalk pinsage 78.214917 72.702608 87.112816 64.021383 36 | kWeightedKHopPrefix gcn 77.701915 66.630311 89.575640 72.996494 37 | 38 | ``` 39 | 40 | ## FAQ -------------------------------------------------------------------------------- /exp/table2/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | def rm_optimal(_: RunConfig): 9 | os.system(f"rm -f node_access_optimal_cache_*") 10 | 11 | cur_common_base = (ConfigList() 12 | .override('copy_job', [1]) 13 | .override('sample_job', [1]) 14 | .override('pipeline', [False]) 15 | .override('epoch', [3]) 16 | .override('logdir', ['run-logs',]) 17 | .override('cache_policy', [CachePolicy.cache_by_degree]) 18 | .override('profile_level', [3]) 19 | .override('log_level', ['error']) 20 | .override('arch', [Arch.arch3]) 21 | .override('multi_gpu', [False]) 22 | .override('cache_percent', percent_gen(0, 0, 1)) 23 | .override('report_optimal', [1])) 24 | 25 | cfg_list_collector = ConfigList.Empty() 26 | cfg_list_collector.concat(cur_common_base.copy().override('app', [App.gcn ]).override('sample_type', [SampleType.kKHop2])) 27 | cfg_list_collector.concat(cur_common_base.copy().override('app', [App.pinsage ]).override('sample_type', [SampleType.kRandomWalk])) 28 | cfg_list_collector.concat(cur_common_base.copy().override('app', [App.gcn ]).override('sample_type', [SampleType.kWeightedKHopPrefix])) 29 | 30 | cfg_list_collector.override('dataset', [ 31 | Dataset.products, 32 | Dataset.twitter, 33 | Dataset.papers100M, 34 | Dataset.uk_2006_05, 35 | ]) 36 | 37 | 38 | if __name__ == '__main__': 39 | from sys import argv 40 | for arg in argv[1:]: 41 | if arg == '-m' or arg == '--mock': 42 | do_mock = True 43 | elif arg == '-i' or arg == '--interactive': 44 | durable_log = False 45 | 46 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log, rm_optimal) 47 | 48 | -------------------------------------------------------------------------------- /exp/table6/parser.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.append(os.getcwd()+'/../common') 3 | from common_parser import * 4 | from runner import cfg_list_collector 5 | import pandas 6 | 7 | selected_col = ['cache_policy', 'cache_percentage'] 8 | selected_col += ['dataset_short', 'sample_type', 'app'] 9 | selected_col += [ 10 | 'init:load_dataset:mmap', # disk to dram 11 | 'init:copy', # dram to gpu 12 | # 'init:other', 13 | 'init:load_dataset:copy', # dram to gpu: copy dataset to gpu 14 | # 'init:load_dataset:copy:sampler', # dram to gpu: copy dataset to gpu 15 | # 'init:load_dataset:copy:trainer', # should be zero 16 | 'init:cache', # dram to gpu: copy cache to gpu 17 | 'init:presample', # presample 18 | # 'init:dist_queue', 19 | # 'init:internal', 20 | ] 21 | 22 | if __name__ == '__main__': 23 | with open(f'data.dat', 'w') as f: 24 | BenchInstance.print_dat([BenchInstance().init_from_cfg(cfg) for cfg in cfg_list_collector.conf_list], f,selected_col) 25 | with open('data.dat', 'r') as f: 26 | table = pandas.read_csv(f, sep='\t') 27 | table = table.T 28 | table = table.rename({ 29 | 'init:load_dataset:mmap' : 'Disk to DRAM', 30 | 'init:copy' : 'DRAM to GPU-mem', 31 | 'init:load_dataset:copy' : 'Load graph topology', 32 | 'init:cache' : 'Load feature cache', 33 | 'init:presample' : 'Pre-sampling for PreSC#1'}, axis=0) 34 | with open('table6.dat', 'w') as f: 35 | print(table, file=f) -------------------------------------------------------------------------------- /exp/table6/readme.md: -------------------------------------------------------------------------------- 1 | # Table 6: Init cost 2 | 3 | The goal of this experiment is to show that the init cost of presample is small. 4 | 5 | Before running this test, please generate feature for twitter and uk-2006-05 dataset: 6 | ```bash 7 | dd if=/dev/zero of=/graph-learning/samgraph/twitter/feat.bin count=41652230 bs=1024 8 | dd if=/dev/zero of=/graph-learning/samgraph/uk-2006-05/feat.bin count=77741046 bs=1024 9 | ``` 10 | This is necessary since these dataset does not provides feature, while this tests requires measuring time for loading feature data from disk. Please make sure that you have enough disk space(120GB) to hold them. 11 | 12 | This test also requires page cache to be cleaned before each run. Normally, this can be achieved by: 13 | ```bash 14 | sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' 15 | ``` 16 | 17 | In our provided machine, this can be achieved by `sudo /opt/clean_page_cache/run.sh` without password. 18 | 19 | `runner.sh` handles page cache properly, then calls `runner.py`. It may ask you for sudo password. 20 | `runner.py` runs all necessary tests and redirect logs to directory `run-logs`. 21 | `parser.py` parses results from log files and generate `data.dat`. 22 | 23 | ## Hardware Requirements 24 | 25 | - Paper's configurations: Two 16GB NVIDIA V100 GPUs 26 | 27 | ## Run Command 28 | 29 | ```sh 30 | > bash ./runner.sh 31 | > python parser.py 32 | ``` 33 | 34 | There are serveral command line arguments for `runner.py`: 35 | 36 | - `-m`, `--mock`: Show the run command for each test case but not actually run it 37 | - `-i`, `--interactive`: run these tests with output printed to terminal, rather than redirec to log directory. 38 | 39 | The number of epochs to run is set to 3 for fast reproduce. You may change line containing `.override('epoch', [3])` to change the numer of epochs. 40 | 41 | 42 | ## Output Example 43 | 44 | ```sh 45 | > cat table6.dat 46 | 0 1 2 3 47 | cache_policy presample_1 presample_1 presample_1 presample_1 48 | cache_percentage 100.0 24.0 20.0 13.0 49 | dataset_short PR TW PA UK 50 | sample_type kKHop2 kKHop2 kKHop2 kKHop2 51 | app gcn gcn gcn gcn 52 | Disk to DRAM 12.92 51.59 553.23 104.25 53 | DRAM to GPU-mem 0.82 9.18 10.1 10.67 54 | Load graph topology 0.11 1.17 1.42 2.49 55 | Load feature cache 0.71 8.01 8.68 8.18 56 | Pre-sampling for PreSC#1 0.43 0.7 1.98 1.15 57 | 58 | ``` 59 | 60 | ## FAQ -------------------------------------------------------------------------------- /exp/table6/runner.py: -------------------------------------------------------------------------------- 1 | import os, sys, copy 2 | sys.path.append(os.getcwd()+'/../common') 3 | from runner_helper import Arch, RunConfig, ConfigList, App, Dataset, CachePolicy, run_in_list, SampleType, percent_gen 4 | 5 | do_mock = False 6 | durable_log = True 7 | 8 | cur_common_base = (ConfigList() 9 | .override('app', [App.gcn,]) 10 | .override('copy_job', [1]) 11 | .override('sample_job', [1]) 12 | .override('cache_percent', [0]) 13 | .override('epoch', [3]) 14 | .override('pipeline', [False,]) 15 | .override('logdir', ['run-logs',]) 16 | .override('cache_policy', [CachePolicy.cache_by_presample_1,]) 17 | .override('profile_level', [3]) 18 | .override('log_level', ['error']) 19 | .override('multi_gpu', [True])) 20 | 21 | cfg_list_collector = ConfigList.Empty() 22 | cfg_list_collector.concat(cur_common_base.copy().override('dataset', [Dataset.products,]).override('cache_percent', percent_gen(100, 100, 1))) 23 | cfg_list_collector.concat(cur_common_base.copy().override('dataset', [Dataset.twitter,]).override('cache_percent', percent_gen(24, 24, 1))) 24 | cfg_list_collector.concat(cur_common_base.copy().override('dataset', [Dataset.papers100M,]).override('cache_percent', percent_gen(20, 20, 1))) 25 | cfg_list_collector.concat(cur_common_base.copy().override('dataset', [Dataset.uk_2006_05,]).override('cache_percent', percent_gen(13, 13, 1))) 26 | 27 | if __name__ == '__main__': 28 | from sys import argv 29 | for arg in argv[1:]: 30 | if arg == '-m' or arg == '--mock': 31 | do_mock = True 32 | elif arg == '-i' or arg == '--interactive': 33 | durable_log = False 34 | 35 | run_in_list(cfg_list_collector.conf_list, do_mock, durable_log) 36 | 37 | -------------------------------------------------------------------------------- /exp/table6/runner.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ `cat /etc/hostname` == "iZ0xi7b9pbfh70yjxzxh1mZ" ]; then 4 | sudo /opt/clean_page_cache/run.sh 5 | else 6 | sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' 7 | fi 8 | 9 | python3 runner.py -------------------------------------------------------------------------------- /samgraph.exp: -------------------------------------------------------------------------------- 1 | *samgraph_* 2 | # PyTorch binding 3 | *PyInit* 4 | *initc_lib* 5 | -------------------------------------------------------------------------------- /samgraph.lds: -------------------------------------------------------------------------------- 1 | { 2 | global: 3 | *samgraph_*; 4 | # PyTorch binding 5 | *PyInit*; 6 | *initc_lib*; 7 | local: *; 8 | }; 9 | -------------------------------------------------------------------------------- /samgraph/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_COMMON_H 19 | #define SAMGRAPH_CPU_COMMON_H 20 | 21 | namespace samgraph { 22 | namespace common { 23 | namespace cpu { 24 | 25 | enum CPUHashType { kCPUHash0 = 0, kCPUHash1, kCPUHash2 }; 26 | 27 | } 28 | } // namespace common 29 | } // namespace samgraph 30 | 31 | #endif // SAMGRAPH_CPU_COMMON_H -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_device.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_DEVICE_H 19 | #define SAMGRAPH_CPU_DEVICE_H 20 | 21 | #include "../device.h" 22 | 23 | namespace samgraph { 24 | namespace common { 25 | namespace cpu { 26 | 27 | class CPUDevice final : public Device { 28 | public: 29 | void SetDevice(Context ctx) override; 30 | void *AllocDataSpace(Context ctx, size_t nbytes, 31 | size_t alignment = kAllocAlignment) override; 32 | void FreeDataSpace(Context ctx, void *ptr) override; 33 | void *AllocWorkspace(Context ctx, size_t nbytes, 34 | double scale = Constant::kAllocScale) override; 35 | void FreeWorkspace(Context ctx, void *ptr, size_t nbytes = 0) override; 36 | void CopyDataFromTo(const void *from, size_t from_offset, void *to, 37 | size_t to_offset, size_t nbytes, Context ctx_from, 38 | Context ctx_to, StreamHandle stream) override; 39 | 40 | void StreamSync(Context ctx, StreamHandle stream) override; 41 | 42 | static const std::shared_ptr &Global(); 43 | }; 44 | 45 | } // namespace cpu 46 | } // namespace common 47 | } // namespace samgraph 48 | 49 | #endif // SAMGRAPH_CPU_DEVICE_H 50 | -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_engine.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_ENGINE_H 19 | #define SAMGRAPH_CPU_ENGINE_H 20 | 21 | #include 22 | 23 | #include 24 | 25 | #include "../cuda/cuda_cache_manager.h" 26 | #include "../engine.h" 27 | #include "../logging.h" 28 | #include "cpu_hashtable.h" 29 | #include "cpu_shuffler.h" 30 | 31 | namespace samgraph { 32 | namespace common { 33 | namespace cpu { 34 | 35 | class CPUEngine : public Engine { 36 | public: 37 | CPUEngine(); 38 | 39 | void Init() override; 40 | void Start() override; 41 | void Shutdown() override; 42 | void RunSampleOnce() override; 43 | void ExamineDataset() override; 44 | 45 | CPUShuffler* GetShuffler() { return _shuffler; } 46 | cudaStream_t GetWorkStream() { return _work_stream; } 47 | CPUHashTable* GetHashTable() { return _hash_table; } 48 | cuda::GPUCacheManager* GetCacheManager() { return _cache_manager; } 49 | 50 | static CPUEngine* Get() { return dynamic_cast(Engine::_engine); } 51 | 52 | private: 53 | // Task queue 54 | std::vector _threads; 55 | 56 | cudaStream_t _work_stream; 57 | // Random node batch generator 58 | CPUShuffler* _shuffler; 59 | // Hash table 60 | CPUHashTable* _hash_table; 61 | // GPU cache manager 62 | cuda::GPUCacheManager* _cache_manager; 63 | 64 | void ArchCheck() override; 65 | std::unordered_map GetGraphFileCtx() override; 66 | }; 67 | 68 | } // namespace cpu 69 | } // namespace common 70 | } // namespace samgraph 71 | 72 | #endif // SAMGRAPH_CPU_ENGINE_H -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_hashtable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_HASHTABLE_H 19 | #define SAMGRAPH_CPU_HASHTABLE_H 20 | 21 | #include "../common.h" 22 | 23 | namespace samgraph { 24 | namespace common { 25 | namespace cpu { 26 | 27 | class CPUHashTable { 28 | public: 29 | virtual ~CPUHashTable() {} 30 | virtual void Populate(const IdType *input, const size_t num_input) = 0; 31 | virtual void MapNodes(IdType *ouput, size_t num_output) = 0; 32 | virtual void MapEdges(const IdType *src, const IdType *dst, const size_t len, 33 | IdType *new_src, IdType *new_dst) = 0; 34 | virtual void Reset() = 0; 35 | virtual size_t NumItems() const = 0; 36 | }; 37 | 38 | } // namespace cpu 39 | } // namespace common 40 | } // namespace samgraph 41 | 42 | #endif // SAMGRAPH_CPU_HASHTABLE_H -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_hashtable0.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "cpu_hashtable0.h" 19 | 20 | #include "../device.h" 21 | #include "../logging.h" 22 | 23 | namespace samgraph { 24 | namespace common { 25 | namespace cpu { 26 | 27 | CPUHashTable0::CPUHashTable0(size_t max_items) { 28 | _n2o_table = static_cast( 29 | Device::Get(CPU())->AllocDataSpace(CPU(), max_items * sizeof(BucketN2O))); 30 | _num_items = 0; 31 | } 32 | 33 | CPUHashTable0::~CPUHashTable0() { 34 | Device::Get(CPU())->FreeDataSpace(CPU(), _n2o_table); 35 | } 36 | 37 | void CPUHashTable0::Populate(const IdType *input, const size_t num_input) { 38 | for (size_t i = 0; i < num_input; i++) { 39 | IdType oid = input[i]; 40 | IdType nid = _num_items; 41 | auto res = _o2n_table.insert({oid, nid}); 42 | if (res.second) { 43 | _n2o_table[nid].global = oid; 44 | _num_items++; 45 | } 46 | } 47 | } 48 | 49 | void CPUHashTable0::MapNodes(IdType *output, size_t num_output) { 50 | memcpy(output, _n2o_table, sizeof(IdType) * num_output); 51 | } 52 | 53 | void CPUHashTable0::MapEdges(const IdType *src, const IdType *dst, 54 | const size_t len, IdType *new_src, 55 | IdType *new_dst) { 56 | for (size_t i = 0; i < len; i++) { 57 | auto it0 = _o2n_table.find(src[i]); 58 | auto it1 = _o2n_table.find(dst[i]); 59 | 60 | CHECK(it0 != _o2n_table.end()); 61 | CHECK(it1 != _o2n_table.end()); 62 | 63 | new_src[i] = it0->second; 64 | new_dst[i] = it1->second; 65 | } 66 | } 67 | 68 | void CPUHashTable0::Reset() { 69 | _o2n_table = phmap::flat_hash_map(); 70 | _num_items = 0; 71 | } 72 | 73 | } // namespace cpu 74 | } // namespace common 75 | } // namespace samgraph 76 | -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_hashtable0.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_HASHTABLE0_H 19 | #define SAMGRAPH_CPU_HASHTABLE0_H 20 | 21 | #include 22 | 23 | #include "cpu_hashtable.h" 24 | 25 | namespace samgraph { 26 | namespace common { 27 | namespace cpu { 28 | 29 | // A DGL-like single thread hashtable 30 | class CPUHashTable0 : public CPUHashTable { 31 | public: 32 | CPUHashTable0(size_t max_items); 33 | ~CPUHashTable0(); 34 | 35 | void Populate(const IdType *input, const size_t num_input) override; 36 | void MapNodes(IdType *output, size_t num_output) override; 37 | void MapEdges(const IdType *src, const IdType *dst, const size_t len, 38 | IdType *new_src, IdType *new_dst) override; 39 | void Reset() override; 40 | size_t NumItems() const override { return _num_items; } 41 | 42 | private: 43 | struct BucketN2O { 44 | IdType global; 45 | }; 46 | 47 | phmap::flat_hash_map _o2n_table; 48 | BucketN2O *_n2o_table; 49 | size_t _num_items; 50 | }; 51 | 52 | } // namespace cpu 53 | } // namespace common 54 | } // namespace samgraph 55 | 56 | #endif // SAMGRAPH_CPU_HASHTABLE0_H 57 | -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_hashtable1.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_HASHTABLE1_H 19 | #define SAMGRAPH_CPU_HASHTABLE1_H 20 | 21 | #include "../common.h" 22 | #include "cpu_hashtable.h" 23 | 24 | namespace samgraph { 25 | namespace common { 26 | namespace cpu { 27 | 28 | // A simple multi-thread parallel hashtable 29 | class CPUHashTable1 : public CPUHashTable { 30 | public: 31 | CPUHashTable1(size_t max_items); 32 | ~CPUHashTable1(); 33 | 34 | void Populate(const IdType *input, const size_t num_input); 35 | void MapNodes(IdType *output, size_t num_output) override; 36 | void MapEdges(const IdType *src, const IdType *dst, const size_t len, 37 | IdType *new_src, IdType *new_dst) override; 38 | void Reset() override; 39 | size_t NumItems() const override { return _num_items; } 40 | 41 | private: 42 | struct BucketO2N { 43 | IdType id; 44 | IdType local; 45 | }; 46 | 47 | struct BucketN2O { 48 | IdType global; 49 | }; 50 | 51 | BucketO2N *_o2n_table; 52 | BucketN2O *_n2o_table; 53 | 54 | IdType _num_items; 55 | size_t _capacity; 56 | 57 | void InitTable(); 58 | }; 59 | 60 | } // namespace cpu 61 | } // namespace common 62 | } // namespace samgraph 63 | 64 | #endif // SAMGRAPH_CPU_HASHTABLE1_H -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_hashtable2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_HASHTABLE2_H 19 | #define SAMGRAPH_CPU_HASHTABLE2_H 20 | 21 | #include "../common.h" 22 | #include "cpu_hashtable.h" 23 | 24 | namespace samgraph { 25 | namespace common { 26 | namespace cpu { 27 | /** SXN: FIXME: this hash table is buggy when fanout is set to large number */ 28 | // An optimized parallel hashtable 29 | class CPUHashTable2 : public CPUHashTable { 30 | public: 31 | CPUHashTable2(size_t max_items); 32 | ~CPUHashTable2(); 33 | 34 | void Populate(const IdType *input, const size_t num_input) override; 35 | void MapNodes(IdType *ouput, size_t num_output) override; 36 | void MapEdges(const IdType *src, const IdType *dst, const size_t len, 37 | IdType *new_src, IdType *new_dst) override; 38 | void Reset() override; 39 | size_t NumItems() const override { return _num_items; } 40 | 41 | private: 42 | struct BucketO2N { 43 | IdType key; 44 | IdType index; 45 | IdType local; 46 | IdType version; 47 | }; 48 | 49 | struct BucketN2O { 50 | IdType global; 51 | }; 52 | 53 | struct PrefixItem { 54 | size_t val; 55 | size_t _padding[7]; 56 | 57 | PrefixItem() : val(0) {} 58 | }; 59 | 60 | BucketO2N *_o2n_table; 61 | BucketN2O *_n2o_table; 62 | 63 | IdType _num_items; 64 | size_t _capacity; 65 | IdType _version; 66 | 67 | void InitTable(); 68 | }; 69 | 70 | } // namespace cpu 71 | } // namespace common 72 | } // namespace samgraph 73 | 74 | #endif // SAMGRAPH_CPU_HASHTABLE2_H -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_loops.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_LOOPS_H 19 | #define SAMGRAPH_CPU_LOOPS_H 20 | 21 | #include "../common.h" 22 | 23 | namespace samgraph { 24 | namespace common { 25 | namespace cpu { 26 | 27 | void RunArch0LoopsOnce(); 28 | 29 | std::vector GetArch0Loops(); 30 | 31 | // common steps 32 | TaskPtr DoShuffle(); 33 | void DoCPUSample(TaskPtr task); 34 | void DoGraphCopy(TaskPtr task); 35 | void DoFeatureExtract(TaskPtr task); 36 | void DoFeatureCopy(TaskPtr task); 37 | 38 | void DoCacheIdCopy(TaskPtr task); 39 | void DoGPULabelExtract(TaskPtr task); 40 | void DoCPULabelExtractAndCopy(TaskPtr task); 41 | void DoCacheFeatureExtractCopy(TaskPtr task); 42 | 43 | } // namespace cpu 44 | } // namespace common 45 | } // namespace samgraph 46 | 47 | #endif // SAMGRAPH_CPU_LOOPS_H -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_random.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include 19 | 20 | #include "cpu_function.h" 21 | 22 | namespace samgraph { 23 | namespace common { 24 | namespace cpu { 25 | 26 | IdType RandomID(const IdType &min, const IdType &max) { 27 | static thread_local std::mt19937 generator; 28 | std::uniform_int_distribution distribution(min, max); 29 | return distribution(generator); 30 | } 31 | 32 | } // namespace cpu 33 | } // namespace common 34 | } // namespace samgraph -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_sampling_khop1.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "cpu_function.h" 19 | 20 | namespace samgraph { 21 | namespace common { 22 | namespace cpu { 23 | 24 | void CPUSampleKHop1(const IdType *const indptr, const IdType *const indices, 25 | const IdType *const input, const size_t num_input, 26 | IdType *output_src, IdType *output_dst, size_t *num_ouput, 27 | const size_t fanout) {} 28 | 29 | } // namespace cpu 30 | } // namespace common 31 | } // namespace samgraph -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_sampling_random_walk.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "cpu_function.h" 19 | 20 | namespace samgraph { 21 | namespace common { 22 | namespace cpu { 23 | 24 | void CPUSampleRandomWalk(const IdType *const indptr, 25 | const IdType *const indices, const IdType *const input, 26 | const size_t num_input, IdType *output_src, 27 | IdType *output_dst, size_t *num_ouput, 28 | const size_t fanout) {} 29 | 30 | } // namespace cpu 31 | } // namespace common 32 | } // namespace samgraph -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_sampling_weighted_khop.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "cpu_function.h" 19 | 20 | namespace samgraph { 21 | namespace common { 22 | namespace cpu { 23 | 24 | void CPUSampleWeightedKHop(const IdType *const indptr, 25 | const IdType *const indices, 26 | const IdType *const input, const size_t num_input, 27 | IdType *output_src, IdType *output_dst, 28 | size_t *num_ouput, const size_t fanout) {} 29 | } // namespace cpu 30 | } // namespace common 31 | } // namespace samgraph -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_sanity_check.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include 19 | 20 | #include "../common.h" 21 | #include "../constant.h" 22 | #include "../logging.h" 23 | #include "cpu_function.h" 24 | 25 | namespace samgraph { 26 | namespace common { 27 | namespace cpu { 28 | 29 | void CPUSanityCheckList(const IdType *input, size_t num_input, 30 | IdType invalid_val) { 31 | for (size_t i = 0; i < num_input; i++) { 32 | CHECK_NE(input[i], invalid_val); 33 | } 34 | } 35 | 36 | void CPUSanityCheckNoDuplicate(const IdType *input, size_t num_input) { 37 | std::unordered_set visited_elem; 38 | for (size_t i = 0; i < num_input; i++) { 39 | if (visited_elem.count(input[i]) > 0) { 40 | LOG(DEBUG) << "duplicate" << input[i]; 41 | CHECK(false); 42 | } 43 | visited_elem.insert(input[i]); 44 | } 45 | } 46 | 47 | } // namespace cpu 48 | } // namespace common 49 | } // namespace samgraph -------------------------------------------------------------------------------- /samgraph/common/cpu/cpu_shuffler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CPU_SHUFFLER_H 19 | #define SAMGRAPH_CPU_SHUFFLER_H 20 | 21 | #include 22 | #include 23 | 24 | #include "../common.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | 29 | class CPUShuffler : public Shuffler { 30 | public: 31 | CPUShuffler(TensorPtr input, int num_epoch, size_t batch_size, 32 | bool drop_last); 33 | TensorPtr GetBatch(StreamHandle stream = nullptr); 34 | 35 | uint64_t Epoch() { return _cur_epoch; } 36 | uint64_t Step() { return _cur_step; } 37 | 38 | size_t NumEpoch() { return _num_epoch; } 39 | size_t NumStep() { return _num_step; } 40 | 41 | private: 42 | bool _drop_last; 43 | bool _initialized; 44 | 45 | uint64_t _cur_epoch; 46 | uint64_t _cur_step; 47 | 48 | size_t _num_epoch; 49 | size_t _num_step; 50 | 51 | TensorPtr _data; 52 | size_t _num_data; 53 | 54 | size_t _batch_size; 55 | size_t _last_batch_size; 56 | 57 | void ReShuffle(); 58 | }; 59 | 60 | } // namespace common 61 | } // namespace samgraph 62 | 63 | #endif // SAMGRAPH_CPU_SHUFFLER_H 64 | -------------------------------------------------------------------------------- /samgraph/common/cpu/mmap_cpu_device.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "mmap_cpu_device.h" 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "../logging.h" 32 | 33 | namespace samgraph { 34 | namespace common { 35 | namespace cpu { 36 | 37 | void MmapCPUDevice::SetDevice(Context ctx) {} 38 | 39 | void *MmapCPUDevice::AllocDataSpace(Context ctx, size_t nbytes, 40 | size_t alignment) { 41 | LOG(FATAL) << "Device does not support AllocDataSpace api"; 42 | return nullptr; 43 | } 44 | 45 | void MmapCPUDevice::FreeDataSpace(Context ctx, void *ptr) {} 46 | 47 | void MmapCPUDevice::CopyDataFromTo(const void *from, size_t from_offset, 48 | void *to, size_t to_offset, size_t nbytes, 49 | Context ctx_from, Context ctx_to, 50 | StreamHandle stream) { 51 | LOG(FATAL) << "Device does not support CopyDataFromTo api"; 52 | } 53 | 54 | void MmapCPUDevice::StreamSync(Context ctx, StreamHandle stream) { 55 | LOG(FATAL) << "Device does not support StreamSync api"; 56 | } 57 | 58 | void *MmapCPUDevice::AllocWorkspace(Context ctx, size_t nbytes, double scale) { 59 | LOG(FATAL) << "Device does not support AllocWorkspace api"; 60 | return nullptr; 61 | } 62 | 63 | void MmapCPUDevice::FreeWorkspace(Context ctx, void *data, size_t nbytes) { 64 | int ret = munmap(data, nbytes); 65 | CHECK_EQ(ret, 0); 66 | } 67 | 68 | const std::shared_ptr &MmapCPUDevice::Global() { 69 | static std::shared_ptr inst = 70 | std::make_shared(); 71 | return inst; 72 | } 73 | 74 | } // namespace cpu 75 | } // namespace common 76 | } // namespace samgraph 77 | -------------------------------------------------------------------------------- /samgraph/common/cpu/mmap_cpu_device.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_MMAP_DEVICE_H 19 | #define SAMGRAPH_MMAP_DEVICE_H 20 | 21 | #include "../device.h" 22 | 23 | namespace samgraph { 24 | namespace common { 25 | namespace cpu { 26 | 27 | class MmapCPUDevice final : public Device { 28 | public: 29 | void SetDevice(Context ctx) override; 30 | void *AllocDataSpace(Context ctx, size_t nbytes, size_t alignment) override; 31 | void FreeDataSpace(Context ctx, void *ptr) override; 32 | void *AllocWorkspace(Context ctx, size_t nbytes, 33 | double scale = Constant::kAllocScale) override; 34 | void FreeWorkspace(Context ctx, void *ptr, size_t nbytes = 0) override; 35 | void CopyDataFromTo(const void *from, size_t from_offset, void *to, 36 | size_t to_offset, size_t nbytes, Context ctx_from, 37 | Context ctx_to, StreamHandle stream) override; 38 | 39 | void StreamSync(Context ctx, StreamHandle stream) override; 40 | 41 | static const std::shared_ptr &Global(); 42 | }; 43 | 44 | } // namespace cpu 45 | } // namespace common 46 | } // namespace samgraph 47 | 48 | #endif // SAMGRAPH_MMAP_DEVICE_H 49 | -------------------------------------------------------------------------------- /samgraph/common/cuda/cuda_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CUDA_COMMON_H 19 | #define SAMGRAPH_CUDA_COMMON_H 20 | 21 | namespace samgraph { 22 | namespace common { 23 | namespace cuda { 24 | 25 | enum QueueType { kGPUSample = 0, kDataCopy, kNumQueues }; 26 | 27 | const int QueueNum = (int)kNumQueues; 28 | 29 | } // namespace cuda 30 | } // namespace common 31 | } // namespace samgraph 32 | 33 | #endif // SAMGRAPH_CUDA_COMMON_H -------------------------------------------------------------------------------- /samgraph/common/cuda/cuda_device.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_GPU_DEVICE_H 19 | #define SAMGRAPH_GPU_DEVICE_H 20 | 21 | #include 22 | #include 23 | 24 | #include "../device.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | namespace cuda { 29 | 30 | class GPUDevice final : public Device { 31 | public: 32 | GPUDevice(); 33 | void SetDevice(Context ctx) override; 34 | void *AllocDataSpace(Context ctx, size_t nbytes, 35 | size_t alignment = kAllocAlignment) override; 36 | void FreeDataSpace(Context ctx, void *ptr) override; 37 | void *AllocWorkspace(Context ctx, size_t nbytes, 38 | double scale = Constant::kAllocScale) override; 39 | void FreeWorkspace(Context ctx, void *ptr, size_t nbytes = 0) override; 40 | void CopyDataFromTo(const void *from, size_t from_offset, void *to, 41 | size_t to_offset, size_t nbytes, Context ctx_from, 42 | Context ctx_to, StreamHandle stream) override; 43 | 44 | StreamHandle CreateStream(Context ctx) override; 45 | void FreeStream(Context ctx, StreamHandle stream) override; 46 | void StreamSync(Context ctx, StreamHandle stream) override; 47 | void SyncStreamFromTo(Context ctx, StreamHandle event_src, 48 | StreamHandle event_dst) override; 49 | size_t TotalSize(Context ctx) override; 50 | size_t DataSize(Context ctx) override; 51 | size_t WorkspaceSize(Context ctx) override; 52 | size_t FreeWorkspaceSize(Context ctx) override; 53 | 54 | static const std::shared_ptr &Global(); 55 | 56 | private: 57 | static void GPUCopy(const void *from, void *to, size_t nbytes, 58 | cudaMemcpyKind kind, cudaStream_t stream); 59 | static void GPUCopyPeer(const void *from, int from_device, void *to, 60 | int to_device, size_t nbytes, cudaStream_t stream); 61 | // std::array _allocated_size_list; 62 | size_t* _allocated_size_list; 63 | }; 64 | 65 | } // namespace cuda 66 | } // namespace common 67 | } // namespace samgraph 68 | 69 | #endif // SAMGRAPH_GPU_DEVICE_H 70 | -------------------------------------------------------------------------------- /samgraph/common/cuda/cuda_loops.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CUDA_LOOPS_H 19 | #define SAMGRAPH_CUDA_LOOPS_H 20 | 21 | #include 22 | 23 | #include "../common.h" 24 | 25 | namespace samgraph { 26 | namespace common { 27 | namespace cuda { 28 | 29 | void RunArch1LoopsOnce(); 30 | void RunArch2LoopsOnce(); 31 | void RunArch3LoopsOnce(); 32 | void RunArch4LoopsOnce(); 33 | void RunArch7LoopsOnce(); 34 | 35 | std::vector GetArch1Loops(); 36 | std::vector GetArch2Loops(); 37 | std::vector GetArch3Loops(); 38 | std::vector GetArch4Loops(); 39 | 40 | // common steps 41 | TaskPtr DoShuffle(); 42 | void DoGPUSample(TaskPtr task); 43 | void DoGPUSampleDyCache(TaskPtr task, std::function &neighbour_cb); 44 | void DoGPUSampleAllNeighbour(TaskPtr task); 45 | void DoGraphCopy(TaskPtr task); 46 | void DoIdCopy(TaskPtr task); 47 | void DoCacheIdCopy(TaskPtr task); 48 | void DoCacheIdCopyToCPU(TaskPtr task); 49 | void DoCPUFeatureExtract(TaskPtr task); 50 | void DoGPUFeatureExtract(TaskPtr task); 51 | void DoGPULabelExtract(TaskPtr task); 52 | void DoCPULabelExtractAndCopy(TaskPtr task); 53 | void DoFeatureCopy(TaskPtr task); 54 | void DoCacheFeatureCopy(TaskPtr task); 55 | void DoDynamicCacheFeatureCopy(TaskPtr task); 56 | 57 | } // namespace cuda 58 | } // namespace common 59 | } // namespace samgraph 60 | 61 | #endif // SAMGRAPH_CUDA_LOOPS_H -------------------------------------------------------------------------------- /samgraph/common/cuda/cuda_random_states.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_RANDOM_STATES_H 19 | #define SAMGRAPH_RANDOM_STATES_H 20 | 21 | #include 22 | 23 | #include 24 | 25 | #include "../common.h" 26 | #include "../constant.h" 27 | 28 | namespace samgraph { 29 | namespace common { 30 | namespace cuda { 31 | 32 | class GPURandomStates { 33 | public: 34 | GPURandomStates(SampleType sample_type, const std::vector& fanout, 35 | const size_t batch_size, Context ctx); 36 | ~GPURandomStates(); 37 | 38 | curandState* GetStates() { return _states; }; 39 | size_t NumStates() { return _num_states; }; 40 | 41 | private: 42 | curandState* _states; 43 | size_t _num_states; 44 | Context _ctx; 45 | }; 46 | 47 | } // namespace cuda 48 | } // namespace common 49 | } // namespace samgraph 50 | 51 | #endif // SAMGRAPH_RANDOM_STATES_H 52 | -------------------------------------------------------------------------------- /samgraph/common/cuda/cuda_shuffler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CUDA_SHUFFLER_H 19 | #define SAMGRAPH_CUDA_SHUFFLER_H 20 | 21 | #include 22 | #include 23 | 24 | #include "../common.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | namespace cuda { 29 | 30 | class GPUShuffler : public Shuffler { 31 | public: 32 | GPUShuffler(TensorPtr input, size_t num_epoch, size_t batch_size, 33 | bool drop_last); 34 | TensorPtr GetBatch(StreamHandle stream = nullptr); 35 | 36 | uint64_t Epoch() { return _cur_epoch; } 37 | uint64_t Step() { return _cur_step; } 38 | 39 | size_t NumEpoch() { return _num_epoch; } 40 | size_t NumStep() { return _num_step; } 41 | 42 | void Reset() { _cur_step = _num_step; _cur_epoch = 0; _initialized = false; } 43 | 44 | private: 45 | bool _drop_last; 46 | bool _initialized; 47 | 48 | uint64_t _cur_epoch; 49 | uint64_t _cur_step; 50 | 51 | size_t _num_epoch; 52 | size_t _num_step; 53 | 54 | TensorPtr _data; 55 | TensorPtr _gpu_data; 56 | size_t _num_data; 57 | 58 | size_t _batch_size; 59 | size_t _last_batch_size; 60 | 61 | IdType *_sanity_check_map; 62 | 63 | void ReShuffle(StreamHandle stream = nullptr); 64 | }; 65 | 66 | } // namespace cuda 67 | } // namespace common 68 | } // namespace samgraph 69 | 70 | #endif // SAMGRAPH_CUDA_SHUFFLER_H 71 | -------------------------------------------------------------------------------- /samgraph/common/cuda/cuda_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_CUDA_UTILS_H 19 | #define SAMGRAPH_CUDA_UTILS_H 20 | 21 | #include 22 | 23 | namespace samgraph { 24 | namespace common { 25 | namespace cuda { 26 | 27 | /** 28 | * This structure is used with cub's block-level prefixscan in order to 29 | * keep a running sum as items are iteratively processed. 30 | */ 31 | template 32 | struct BlockPrefixCallbackOp { 33 | T _running_total; 34 | 35 | __device__ BlockPrefixCallbackOp(const T running_total) 36 | : _running_total(running_total) {} 37 | 38 | __device__ T operator()(const T block_aggregate) { 39 | const T old_prefix = _running_total; 40 | _running_total += block_aggregate; 41 | return old_prefix; 42 | } 43 | }; 44 | 45 | } // namespace cuda 46 | } // namespace common 47 | } // namespace samgraph 48 | 49 | #endif // SAMGRAPH_CUDA_UTILS_H -------------------------------------------------------------------------------- /samgraph/common/cuda/pre_sampler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #pragma once 19 | 20 | #include "../common.h" 21 | #include "../constant.h" 22 | namespace samgraph { 23 | namespace common { 24 | namespace cuda { 25 | 26 | class PreSampler { 27 | public: 28 | PreSampler(size_t num_nodes, size_t num_step); 29 | ~PreSampler(); 30 | void DoPreSample(); 31 | TensorPtr GetFreq(); 32 | TensorPtr GetRankNode(); 33 | void GetRankNode(TensorPtr &); 34 | static inline void SetSingleton(PreSampler* p) { singleton = p; } 35 | static inline PreSampler* Get() { return singleton; } 36 | private: 37 | Id64Type * freq_table; 38 | // TensorPtr freq_table; 39 | size_t _num_nodes, _num_step; 40 | static PreSampler* singleton; 41 | 42 | }; 43 | 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /samgraph/common/device.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_DEVICE_H 19 | #define SAMGRAPH_DEVICE_H 20 | 21 | #include 22 | #include 23 | 24 | #include "common.h" 25 | #include "constant.h" 26 | 27 | namespace samgraph { 28 | namespace common { 29 | 30 | // Number of bytes each allocation must align to 31 | constexpr int kAllocAlignment = 64; 32 | 33 | // Number of bytes each allocation must align to in temporary allocation 34 | constexpr int kTempAllocaAlignment = 64; 35 | 36 | class Device { 37 | public: 38 | virtual ~Device() {} 39 | virtual void SetDevice(Context ctx) = 0; 40 | virtual void *AllocDataSpace(Context ctx, size_t nbytes, 41 | size_t alignment = kAllocAlignment) = 0; 42 | virtual void FreeDataSpace(Context ctx, void *ptr) = 0; 43 | virtual void *AllocWorkspace(Context ctx, size_t nbytes, 44 | double scale = Constant::kAllocScale); 45 | virtual void FreeWorkspace(Context ctx, void *ptr, size_t nbytes = 0); 46 | virtual void CopyDataFromTo(const void *from, size_t from_offset, void *to, 47 | size_t to_offset, size_t nbytes, Context ctx_from, 48 | Context ctx_to, StreamHandle stream = 0) = 0; 49 | 50 | virtual StreamHandle CreateStream(Context ctx); 51 | virtual void FreeStream(Context ctx, StreamHandle stream); 52 | virtual void StreamSync(Context ctx, StreamHandle stream) = 0; 53 | virtual void SyncStreamFromTo(Context ctx, StreamHandle event_src, 54 | StreamHandle event_dst); 55 | virtual size_t TotalSize(Context ctx) {return 0;}; 56 | virtual size_t DataSize(Context ctx) {return 0;}; 57 | virtual size_t WorkspaceSize(Context ctx) {return 0;}; 58 | virtual size_t FreeWorkspaceSize(Context ctx) {return 0;}; 59 | static Device *Get(Context ctx); 60 | }; 61 | } // namespace common 62 | } // namespace samgraph 63 | 64 | #endif // SAMGRAPH_DEVICE_H 65 | -------------------------------------------------------------------------------- /samgraph/common/dist/dist_loops.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #pragma once 19 | 20 | #ifndef SAMGRAPH_DIST_LOOPS_H 21 | #define SAMGRAPH_DIST_LOOPS_H 22 | 23 | #include "../common.h" 24 | #include "dist_engine.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | namespace dist { 29 | 30 | void RunArch5LoopsOnce(DistType dist_type); 31 | void RunArch6LoopsOnce(); 32 | 33 | // common steps 34 | TaskPtr DoShuffle(); 35 | void DoGPUSample(TaskPtr task); 36 | void DoGetCacheMissIndex(TaskPtr task); 37 | void DoGraphCopy(TaskPtr task); 38 | void DoIdCopy(TaskPtr task); 39 | void DoCPUFeatureExtract(TaskPtr task); 40 | void DoFeatureCopy(TaskPtr task); 41 | 42 | void DoCacheIdCopy(TaskPtr task); 43 | void DoCacheIdCopyToCPU(TaskPtr task); 44 | void DoSwitchCacheFeatureCopy(TaskPtr task); 45 | void DoCacheFeatureCopy(TaskPtr task); 46 | void DoGPULabelExtract(TaskPtr task); 47 | void DoCPULabelExtractAndCopy(TaskPtr task); 48 | 49 | void DoArch6GetCacheMissIndex(TaskPtr task); 50 | void DoArch6CacheFeatureCopy(TaskPtr task); 51 | 52 | typedef void (*ExtractFunction)(int); 53 | ExtractFunction GetArch5Loops(); 54 | std::vector GetArch6Loops(); 55 | 56 | } // namespace dist 57 | } // namespace common 58 | } // namespace samgraph 59 | 60 | #endif // SAMGRAPH_DIST_LOOPS_H 61 | -------------------------------------------------------------------------------- /samgraph/common/dist/dist_shuffler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_DIST_SHUFFLER_H 19 | #define SAMGRAPH_DIST_SHUFFLER_H 20 | 21 | #include 22 | #include 23 | 24 | #include "../common.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | namespace dist { 29 | 30 | class DistShuffler : public Shuffler { 31 | public: 32 | DistShuffler(TensorPtr input, size_t num_epoch, size_t batch_size, 33 | int sampler_id, int num_sampler, int num_trainer, 34 | bool drop_last); 35 | TensorPtr GetBatch(StreamHandle stream = nullptr) override; 36 | 37 | uint64_t Epoch() override { return _cur_epoch; } 38 | uint64_t Step() override { 39 | return (_dataset_offset / _batch_size) + _cur_step; 40 | } 41 | 42 | size_t NumEpoch() override { return _num_epoch; } 43 | // return the total steps for each epoch 44 | // reasons: profiler needs this to create total space 45 | size_t NumStep() override { return _epoch_step; } 46 | bool IsLastBatch() { return _cur_step == (_num_step - 1); } 47 | 48 | void Reset() { 49 | _cur_step = _num_step; 50 | _cur_epoch = 0; 51 | _initialized = false; 52 | } 53 | 54 | private: 55 | bool _drop_last; 56 | bool _initialized; 57 | 58 | uint64_t _cur_epoch; 59 | uint64_t _cur_step; 60 | 61 | size_t _num_epoch; 62 | // number of steps for this sampler 63 | size_t _num_step; 64 | // total steps each epoch 65 | size_t _epoch_step; 66 | // the offset of train set for this sampler 67 | size_t _dataset_offset; 68 | 69 | TensorPtr _data; 70 | TensorPtr _gpu_data; 71 | size_t _num_data; 72 | 73 | size_t _batch_size; 74 | size_t _last_batch_size; 75 | 76 | IdType *_sanity_check_map; 77 | 78 | void ReShuffle(StreamHandle stream = nullptr); 79 | }; 80 | 81 | } // namespace dist 82 | } // namespace common 83 | } // namespace samgraph 84 | 85 | #endif // SAMGRAPH_DIST_SHUFFLER_H 86 | -------------------------------------------------------------------------------- /samgraph/common/dist/dist_shuffler_aligned.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_DIST_SHUFFLER_ALIGNED_H 19 | #define SAMGRAPH_DIST_SHUFFLER_ALIGNED_H 20 | 21 | #include 22 | #include 23 | 24 | #include "../common.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | namespace dist { 29 | 30 | class DistAlignedShuffler : public Shuffler { 31 | public: 32 | // drop last is disable 33 | DistAlignedShuffler(TensorPtr input, size_t num_epoch, size_t batch_size, 34 | size_t worker_id, size_t num_worker); 35 | 36 | TensorPtr GetBatch(StreamHandle stream = nullptr) override; 37 | 38 | uint64_t Epoch() override { return _cur_epoch; } 39 | // Global step 40 | uint64_t Step() override { return _global_step_offset + _cur_local_step; } 41 | size_t NumEpoch() override { return _num_epoch; } 42 | size_t NumStep() override { return _num_global_step; } 43 | size_t NumLocalStep() override { return _num_local_step; } 44 | 45 | private: 46 | bool _initialized; 47 | 48 | TensorPtr _data; 49 | TensorPtr _gpu_data; 50 | size_t _num_data; 51 | 52 | size_t _batch_size; 53 | size_t _last_batch_size; 54 | 55 | size_t _num_epoch; 56 | size_t _num_global_step; 57 | size_t _num_local_step; // should be same among all the trainer 58 | 59 | uint64_t _cur_epoch; 60 | uint64_t _cur_local_step; 61 | 62 | // the offset of train set for this sampler 63 | uint64_t _global_step_offset; 64 | size_t _global_data_offset; 65 | 66 | void ReShuffle(StreamHandle stream = nullptr); 67 | }; 68 | 69 | } // namespace dist 70 | } // namespace common 71 | } // namespace samgraph 72 | 73 | #endif // SAMGRAPH_DIST_SHUFFLER_ALIGNED_H -------------------------------------------------------------------------------- /samgraph/common/dist/pre_sampler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #pragma once 19 | 20 | #include "../common.h" 21 | #include "../constant.h" 22 | #include "../cuda/cuda_shuffler.h" 23 | namespace samgraph { 24 | namespace common { 25 | namespace dist { 26 | 27 | class PreSampler { 28 | public: 29 | PreSampler(TensorPtr input, size_t batch_size, size_t num_nodes); 30 | ~PreSampler(); 31 | void DoPreSample(); 32 | TensorPtr GetFreq(); 33 | TensorPtr GetRankNode(); 34 | void GetRankNode(TensorPtr &); 35 | static inline void SetSingleton(PreSampler* p) { singleton = p; } 36 | static inline PreSampler* Get() { return singleton; } 37 | TaskPtr DoPreSampleShuffle(); 38 | private: 39 | Id64Type * freq_table; 40 | // TensorPtr freq_table; 41 | size_t _num_nodes, _num_step; 42 | static PreSampler* singleton; 43 | cuda::GPUShuffler* _shuffler; 44 | 45 | }; 46 | 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /samgraph/common/function.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_FUNCTION_H 19 | #define SAMGRAPH_FUNCTION_H 20 | 21 | #include "cpu/cpu_function.h" 22 | #include "cuda/cuda_function.h" 23 | 24 | #endif // SAMGRAPH_FUNCTION_H -------------------------------------------------------------------------------- /samgraph/common/graph_pool.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "graph_pool.h" 19 | 20 | #include 21 | 22 | #include "common.h" 23 | #include "constant.h" 24 | #include "logging.h" 25 | 26 | namespace samgraph { 27 | namespace common { 28 | 29 | GraphPool::~GraphPool() { _stop = true; } 30 | 31 | std::shared_ptr GraphPool::GetGraphBatch() { 32 | while (true) { 33 | { 34 | std::lock_guard lock(_mutex); 35 | if (!_pool.empty()) { 36 | auto batch = _pool.front(); 37 | _pool.pop(); 38 | auto key = batch->key; 39 | LOG(DEBUG) << "GraphPool: Get batch with key " << key; 40 | return batch; 41 | } else if (_stop) { 42 | return nullptr; 43 | } 44 | } 45 | std::this_thread::sleep_for(std::chrono::nanoseconds(1000)); 46 | } 47 | 48 | return nullptr; 49 | } 50 | 51 | void GraphPool::Submit(uint64_t key, std::shared_ptr batch) { 52 | std::lock_guard lock(_mutex); 53 | CHECK(!_stop); 54 | _pool.push(batch); 55 | 56 | LOG(DEBUG) << "GraphPool: Add batch with key " << key; 57 | } 58 | 59 | bool GraphPool::Full() { 60 | std::lock_guard lock(_mutex); 61 | return _pool.size() >= _max_size; 62 | } 63 | 64 | } // namespace common 65 | } // namespace samgraph 66 | -------------------------------------------------------------------------------- /samgraph/common/graph_pool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_GRAPH_POOL_H 19 | #define SAMGRAPH_GRAPH_POOL_H 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "common.h" 27 | 28 | namespace samgraph { 29 | namespace common { 30 | 31 | class GraphPool { 32 | public: 33 | GraphPool(size_t max_size) : _stop(false), _max_size(max_size) {} 34 | ~GraphPool(); 35 | 36 | std::shared_ptr GetGraphBatch(); 37 | void Submit(uint64_t key, std::shared_ptr batch); 38 | bool Full(); 39 | 40 | private: 41 | bool _stop; 42 | std::mutex _mutex; 43 | const size_t _max_size; 44 | // std::unordered_map> _pool; 45 | std::queue> _pool; 46 | }; 47 | 48 | } // namespace common 49 | } // namespace samgraph 50 | 51 | #endif // SAMGRAPH_GRAPH_POLL_H 52 | -------------------------------------------------------------------------------- /samgraph/common/task_queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_TASK_QUEUE_H 19 | #define SAMGRAPH_TASK_QUEUE_H 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include "common.h" 26 | #include "memory_queue.h" 27 | 28 | namespace samgraph { 29 | namespace common { 30 | 31 | class TaskQueue { 32 | public: 33 | TaskQueue(size_t max_len); 34 | virtual ~TaskQueue() {}; 35 | 36 | void AddTask(std::shared_ptr); 37 | std::shared_ptr GetTask(); 38 | bool Full(); 39 | size_t PendingLength(); 40 | 41 | private: 42 | std::vector> _q; 43 | std::mutex _mutex; 44 | size_t _max_len; 45 | }; 46 | 47 | class MessageTaskQueue : public TaskQueue { 48 | public: 49 | MessageTaskQueue(size_t max_len); 50 | void PinMemory() { _mq->PinMemory(); } 51 | void Send(std::shared_ptr); 52 | std::shared_ptr Recv(); 53 | 54 | private: 55 | std::shared_ptr _mq; 56 | }; 57 | 58 | } // namespace common 59 | } // namespace samgraph 60 | 61 | #endif // SAMGRAPH_TASK_QUEUE_H 62 | -------------------------------------------------------------------------------- /samgraph/common/timer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_COMMON_TIMER_H 19 | #define SAMGRAPH_COMMON_TIMER_H 20 | 21 | #include 22 | 23 | namespace samgraph { 24 | namespace common { 25 | 26 | class Timer { 27 | public: 28 | Timer(std::chrono::time_point tp = 29 | std::chrono::steady_clock::now()) 30 | : _start_time(tp) {} 31 | 32 | template 33 | bool Timeout(double count) const { 34 | return Passed() >= count; 35 | } 36 | 37 | double Passed() const { return Passed>(); } 38 | 39 | double PassedSec() const { return Passed(); } 40 | 41 | double PassedMicro() const { return Passed(); } 42 | 43 | double PassedNano() const { return Passed(); } 44 | 45 | template 46 | double Passed() const { 47 | return Passed(std::chrono::steady_clock::now()); 48 | } 49 | 50 | template 51 | double Passed(std::chrono::time_point tp) const { 52 | const auto elapsed = std::chrono::duration_cast(tp - _start_time); 53 | return elapsed.count(); 54 | } 55 | 56 | uint64_t TimePointMicro() const { 57 | return std::chrono::duration_cast( 58 | _start_time.time_since_epoch()).count(); 59 | } 60 | 61 | void Reset() { _start_time = std::chrono::steady_clock::now(); } 62 | 63 | private: 64 | std::chrono::time_point _start_time; 65 | }; 66 | 67 | } // namespace common 68 | } // namespace samgraph 69 | 70 | #endif // SAMGRAPH_COMMON_TIMER_H -------------------------------------------------------------------------------- /samgraph/common/workspace_pool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_WORKSPACE_POOL_H 19 | #define SAMGRAPH_WORKSPACE_POOL_H 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include "common.h" 26 | #include "device.h" 27 | 28 | namespace samgraph { 29 | namespace common { 30 | 31 | class WorkspacePool { 32 | public: 33 | WorkspacePool(DeviceType device_type, std::shared_ptr device); 34 | ~WorkspacePool(); 35 | 36 | void* AllocWorkspace(Context ctx, size_t size, double scale); 37 | void FreeWorkspace(Context ctx, void* ptr); 38 | size_t TotalSize(Context ctx); 39 | size_t FreeSize(Context ctx); 40 | 41 | private: 42 | static constexpr int kMaxDevice = 32; 43 | 44 | class Pool; 45 | std::array _array; 46 | DeviceType _device_type; 47 | std::shared_ptr _device; 48 | std::mutex _mutex; 49 | }; 50 | 51 | } // namespace common 52 | } // namespace samgraph 53 | 54 | #endif // SAMGRAPH_WORKSPACE_POOL_H 55 | -------------------------------------------------------------------------------- /samgraph/torch/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from samgraph.torch.adapter import * 18 | -------------------------------------------------------------------------------- /samgraph/torch/adapter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef SAMGRAPH_TORCH_OPS_H 19 | #define SAMGRAPH_TORCH_OPS_H 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | namespace samgraph { 27 | namespace torch { 28 | 29 | extern "C" { 30 | 31 | THCudaTensor samgraph_torch_get_graph_feat(uint64_t key); 32 | THCudaTensor samgraph_torch_get_graph_label(uint64_t key); 33 | THCudaTensor samgraph_torch_get_graph_row(uint64_t key, int layer_idx); 34 | THCudaTensor samgraph_torch_get_graph_col(uint64_t key, int layer_idx); 35 | THCudaTensor samgraph_torch_get_graph_data(uint64_t key, int layer_idx); 36 | 37 | THTensor samgraph_torch_get_dataset_feat(); 38 | THTensor samgraph_torch_get_dataset_label(); 39 | THCudaTensor samgraph_torch_get_graph_input_nodes(uint64_t key); 40 | THCudaTensor samgraph_torch_get_graph_output_nodes(uint64_t key); 41 | 42 | } 43 | 44 | } // namespace torch 45 | } // namespace samgraph 46 | 47 | #endif // SAMGRAPH_TORCH_OPS_H -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(samgraph_test CXX CUDA) 3 | 4 | # GoogleTest requires at least C++11 5 | set(CMAKE_CXX_STANDARD 11) 6 | 7 | include(FetchContent) 8 | FetchContent_Declare( 9 | googletest 10 | URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip 11 | ) 12 | 13 | # For Windows: Prevent overriding the parent project's compiler/linker settings 14 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 15 | FetchContent_MakeAvailable(googletest) 16 | 17 | include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) 18 | 19 | enable_testing() 20 | 21 | add_executable( 22 | samgraph_test 23 | cuda_class_test.cu 24 | device_query_test.cc 25 | memory_race_test.cu 26 | memcpy_test.cc 27 | ) 28 | 29 | target_link_libraries( 30 | samgraph_test 31 | gtest_main 32 | ) 33 | 34 | include(GoogleTest) 35 | gtest_discover_tests(samgraph_test) 36 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | BUILD_DIR := ./build 2 | EXECUTABLE := samgraph_test 3 | TARGET = $(BUILD_DIR)/$(EXECUTABLE) 4 | 5 | all: test 6 | 7 | test: $(TARGET) 8 | $(TARGET) 9 | 10 | build: FORCE 11 | ./build.sh 12 | 13 | .PHONY: clean 14 | clean: 15 | @rm -rf build 16 | @rm -rf chcore.out 17 | 18 | .PHONY: FORCE 19 | FORCE: 20 | -------------------------------------------------------------------------------- /tests/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ! -d "build" ]; then 4 | mkdir build 5 | fi 6 | 7 | cmake -S . -B build -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc 8 | cmake --build build 9 | -------------------------------------------------------------------------------- /tests/cuda_class_test.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | class A { 5 | public: 6 | A() : a(10) {} 7 | int a; 8 | 9 | __device__ void Print() { printf("%d\n", a); } 10 | }; 11 | 12 | __global__ void accessA(A a) { a.Print(); } 13 | 14 | TEST(CudaClassTest, ClassTest) { 15 | dim3 grid(1); 16 | dim3 block(1); 17 | 18 | A a; 19 | accessA<<>>(a); 20 | } 21 | -------------------------------------------------------------------------------- /tests/device_query_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "test_common/common.h" 7 | 8 | TEST(DeviceTestQuery, Peer2PeerTest) { 9 | int access1from0, access0from1; 10 | CUDA_CALL(cudaDeviceCanAccessPeer(&access1from0, 0, 1)); 11 | CUDA_CALL(cudaDeviceCanAccessPeer(&access0from1, 1, 0)); 12 | 13 | LOG << "access1from0: " << access1from0 << " | access0from1: " << access0from1 14 | << "\n"; 15 | } 16 | 17 | TEST(DeviceTestQuery, AsyncTest) { 18 | int deviceCount; 19 | CUDA_CALL(cudaGetDeviceCount(&deviceCount)); 20 | int device; 21 | for (device = 0; device < deviceCount; ++device) { 22 | cudaDeviceProp deviceProp; 23 | 24 | CUDA_CALL(cudaGetDeviceProperties(&deviceProp, device)); 25 | 26 | LOG << "Device " << device << " has " << deviceProp.asyncEngineCount 27 | << " async engines\n"; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /tests/memcpy_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "test_common/common.h" 8 | #include "test_common/timer.h" 9 | 10 | int n_iters = 10; 11 | size_t copy_nbytes = 500ull * 1024 * 1024; 12 | 13 | TEST(MemcpyTest, HostMalloc) { 14 | double pinned_time = 0, unpinned_time = 0, mlock_time; 15 | 16 | void *host_pinned_data, *host_unppined_data, *host_mlock_data, *dev_data; 17 | host_unppined_data = malloc(copy_nbytes); 18 | host_mlock_data = malloc(copy_nbytes); 19 | mlock(host_mlock_data, copy_nbytes); 20 | CUDA_CALL( 21 | cudaHostAlloc(&host_pinned_data, copy_nbytes, cudaHostAllocDefault)); 22 | CUDA_CALL(cudaMalloc(&dev_data, copy_nbytes)); 23 | 24 | for (int i = 0; i < n_iters; i++) { 25 | Timer t0; 26 | CUDA_CALL(cudaMemcpy(dev_data, host_pinned_data, copy_nbytes, 27 | cudaMemcpyHostToDevice)); 28 | pinned_time += t0.Passed(); 29 | 30 | Timer t1; 31 | CUDA_CALL(cudaMemcpy(dev_data, host_unppined_data, copy_nbytes, 32 | cudaMemcpyHostToDevice)); 33 | unpinned_time += t1.Passed(); 34 | 35 | Timer t2; 36 | CUDA_CALL(cudaMemcpy(dev_data, host_mlock_data, copy_nbytes, 37 | cudaMemcpyHostToDevice)); 38 | mlock_time += t2.Passed(); 39 | } 40 | 41 | CUDA_CALL(cudaFreeHost(host_pinned_data)); 42 | CUDA_CALL(cudaFree(dev_data)); 43 | munlock(host_mlock_data, copy_nbytes); 44 | free(host_mlock_data); 45 | free(host_unppined_data); 46 | 47 | LOG << "pinned: " << pinned_time / n_iters 48 | << " | unpinned: " << unpinned_time / n_iters << " | mlock" 49 | << mlock_time / n_iters << "\n"; 50 | } -------------------------------------------------------------------------------- /tests/test_common/common.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTS_COMMON_H 2 | #define TESTS_COMMON_H 3 | 4 | #include 5 | 6 | #define CUDA_CALL(func) \ 7 | { \ 8 | cudaError_t e = (func); \ 9 | ASSERT_TRUE(e == cudaSuccess || e == cudaErrorCudartUnloading) \ 10 | << "CUDA: " << cudaGetErrorString(e); \ 11 | } 12 | 13 | #define GPU_TILE_SIZE 1024 14 | #define GPU_BLOCK_SIZE 256 15 | 16 | #define ANSI_TXT_GRN "\033[0;32m" 17 | #define ANSI_TXT_MGT "\033[0;35m" // Magenta 18 | #define ANSI_TXT_DFT "\033[0;0m" // Console default 19 | #define GTEST_BOX "[ LOG ] " 20 | #define COUT_GTEST ANSI_TXT_GRN << GTEST_BOX // You could add the Default 21 | #define COUT_GTEST_MGT COUT_GTEST << ANSI_TXT_MGT 22 | 23 | #define LOG std::cout << COUT_GTEST_MGT 24 | 25 | #endif // TESTS_COMMON_H -------------------------------------------------------------------------------- /tests/test_common/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_COMMON_TIMER_H 2 | #define TEST_COMMON_TIMER_H 3 | 4 | #include 5 | 6 | class Timer { 7 | public: 8 | Timer(std::chrono::time_point tp = 9 | std::chrono::steady_clock::now()) 10 | : _start_time(tp) {} 11 | 12 | double Passed() { 13 | auto tp = std::chrono::steady_clock::now(); 14 | const auto elapsed = 15 | std::chrono::duration_cast>(tp - 16 | _start_time); 17 | return elapsed.count(); 18 | } 19 | 20 | void Reset() { _start_time = std::chrono::steady_clock::now(); } 21 | 22 | private: 23 | std::chrono::time_point _start_time; 24 | }; 25 | 26 | #endif // TEST_COMMON_TIMER_H 27 | -------------------------------------------------------------------------------- /utility/data-process/common/options.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "options.h" 19 | 20 | #include 21 | 22 | #include 23 | 24 | namespace utility { 25 | 26 | std::string Options::root = "/graph-learning/samgraph/"; 27 | std::string Options::graph = "papers100M"; 28 | size_t Options::num_threads = 48; 29 | bool Options::is64type = false; 30 | CLI::App Options::_app; 31 | 32 | void Options::InitOptions(std::string app_name) { 33 | _app.add_option("-p,--root", root); 34 | _app.add_option("-g,--graph", graph) 35 | ->check(CLI::IsMember({ 36 | "reddit", 37 | "products", 38 | "papers100M", 39 | "com-friendster", 40 | "uk-2006-05", 41 | "twitter", 42 | "sk-2005", 43 | })); 44 | _app.add_option("-t,--threads", num_threads); 45 | _app.add_flag("--64", is64type); 46 | } 47 | 48 | void Options::Parse(int argc, char* argv[]) { 49 | _app.parse(argc, argv); 50 | 51 | std::cout << "Root: " << root << std::endl; 52 | std::cout << "Graph: " << graph << std::endl; 53 | std::cout << "Threads: " << num_threads << std::endl; 54 | std::cout << "64 bit: " << is64type << std::endl; 55 | } 56 | 57 | int Options::Exit(const CLI::ParseError& e) { return _app.exit(e); } 58 | 59 | void Options::EnableOptions() { omp_set_num_threads(num_threads); } 60 | 61 | } // namespace utility -------------------------------------------------------------------------------- /utility/data-process/common/options.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef UTILITY_COMMON_OPTIONS_H 19 | #define UTILITY_COMMON_OPTIONS_H 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "graph_loader.h" 28 | 29 | namespace utility { 30 | 31 | class Options { 32 | public: 33 | static void InitOptions(std::string app_name); 34 | template 35 | static void CustomOption(std::string key, T& val) { 36 | _app.add_option(key, val); 37 | } 38 | static void Parse(int argc, char *argv[]); 39 | static int Exit(const CLI::ParseError &e); 40 | static void EnableOptions(); 41 | 42 | static std::string root; 43 | static std::string graph; 44 | static bool is64type; 45 | static size_t num_threads; 46 | 47 | private: 48 | static CLI::App _app; 49 | }; 50 | 51 | #ifndef OPTIONS_PARSE 52 | #define OPTIONS_PARSE(argc, argv) \ 53 | try { \ 54 | utility::Options::Parse((argc), (argv)); \ 55 | utility::Options::EnableOptions(); \ 56 | } catch (const CLI::ParseError &e) { \ 57 | return utility::Options::Exit(e); \ 58 | } 59 | #endif 60 | 61 | } // namespace utility 62 | 63 | #endif // UTILITY_COMMON_OPTIONS_H 64 | -------------------------------------------------------------------------------- /utility/data-process/common/utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #ifndef UTILITY_COMMON_UTILS_H 19 | #define UTILITY_COMMON_UTILS_H 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace utility { 27 | 28 | inline bool FileExist(const std::string &filepath) { 29 | std::ifstream f(filepath); 30 | return f.good(); 31 | } 32 | 33 | inline void Check(bool cond, std::string error_msg = "") { 34 | if (!cond) { 35 | std::cout << error_msg << std::endl; 36 | exit(1); 37 | } 38 | } 39 | 40 | class Timer { 41 | public: 42 | Timer(std::chrono::time_point tp = 43 | std::chrono::steady_clock::now()) 44 | : _start_time(tp) {} 45 | 46 | double Passed() const { 47 | const auto elapsed = 48 | std::chrono::duration_cast>( 49 | std::chrono::steady_clock::now() - _start_time); 50 | return elapsed.count(); 51 | } 52 | 53 | private: 54 | std::chrono::time_point _start_time; 55 | }; 56 | 57 | } // namespace utility 58 | 59 | #endif // UTILITY_COMMON_UTILS_H -------------------------------------------------------------------------------- /utility/data-process/dataset/comfriendster.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "language_info": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 3 7 | }, 8 | "file_extension": ".py", 9 | "mimetype": "text/x-python", 10 | "name": "python", 11 | "nbconvert_exporter": "python", 12 | "pygments_lexer": "ipython3", 13 | "version": 3 14 | }, 15 | "orig_nbformat": 4 16 | }, 17 | "nbformat": 4, 18 | "nbformat_minor": 2, 19 | "cells": [ 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import os\n", 27 | "import numpy as np\n", 28 | "import pandas as pd\n", 29 | "from scipy.sparse import coo_matrix, csr_matrix" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "src_dir = '/graph-learning/data-raw/com-friendster-bin/'\n", 39 | "output_dir = '/graph-learning/samgraph/com-friendster/'" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "num_nodes = 65608366\n", 49 | "num_edges = 1806067135\n", 50 | "row = np.memmap(os.path.join(src_dir, 'coo_row.bin'), dtype='uint32', mode='r', shape=(num_edges,))\n", 51 | "col = np.memmap(os.path.join(src_dir, 'coo_col.bin'), dtype='uint32', mode='r', shape=(num_edges,))" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "src = np.concatenate((row, col))\n", 61 | "dst = np.concatenate((col, row))\n", 62 | "data = np.zeros(src.shape)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "coo = coo_matrix((data, (src, dst)), shape=(num_nodes, num_nodes),dtype=np.uint32)\n", 72 | "csr = coo.tocsr()\n", 73 | "\n", 74 | "indptr = csr.indptr\n", 75 | "indices = csr.indices" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "indptr.astype('uint32').tofile(output_dir + 'indptr.bin')\n", 85 | "indices.astype('uint32').tofile(output_dir + 'indices.bin')" 86 | ] 87 | } 88 | ] 89 | } -------------------------------------------------------------------------------- /utility/data-process/dataset/reddit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.sparse import coo_matrix, csr_matrix 3 | 4 | if __name__ == '__main__': 5 | src_dir = '/graph-learning/data-raw/reddit/' 6 | output_dir = '/graph-learning/samgraph/reddit/' 7 | 8 | file0 = np.load(src_dir + 'reddit_graph.npz') 9 | file1 = np.load(src_dir + 'reddit_data.npz') 10 | 11 | feature = file1['feature'] 12 | label = file1['label'] 13 | node_ids = file1['node_types'] 14 | 15 | row = file0['row'] 16 | col = file0['col'] 17 | data = file0['data'] 18 | 19 | train_idx = np.where(node_ids == 1)[0] 20 | valid_idx = np.where(node_ids == 2)[0] 21 | test_idx = np.where(node_ids == 3)[0] 22 | 23 | # swap col and row to make csc graph 24 | coo = coo_matrix((data, (col, row)), shape=(feature.shape[0], feature.shape[0]),dtype=np.uint32) 25 | csr = coo.tocsr() 26 | 27 | indptr = csr.indptr 28 | indices = csr.indices 29 | 30 | 31 | indptr.astype('uint32').tofile(output_dir + 'indptr.bin') 32 | indices.astype('uint32').tofile(output_dir + 'indices.bin') 33 | 34 | 35 | train_idx.astype('uint32').tofile(output_dir + 'train_set.bin') 36 | valid_idx.astype('uint32').tofile(output_dir + 'valid_set.bin') 37 | test_idx.astype('uint32').tofile(output_dir + 'test_set.bin') 38 | 39 | feature.astype('float32').tofile(output_dir + 'feat.bin') 40 | label.astype('uint64').tofile(output_dir + 'label.bin') 41 | 42 | with open(f'{output_dir}meta.txt', 'w') as f: 43 | f.write('{}\t{}\n'.format('NUM_NODE', 232965)) 44 | f.write('{}\t{}\n'.format('NUM_EDGE', 114615892)) 45 | f.write('{}\t{}\n'.format('FEAT_DIM', 602)) 46 | f.write('{}\t{}\n'.format('NUM_CLASS', 41)) 47 | f.write('{}\t{}\n'.format('NUM_TRAIN_SET', 153431)) 48 | f.write('{}\t{}\n'.format('NUM_VALID_SET', 23831)) 49 | f.write('{}\t{}\n'.format('NUM_TEST_SET', 55703)) 50 | -------------------------------------------------------------------------------- /utility/data-process/dataset/twitter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | -------------------------------------------------------------------------------- /utility/data-process/toolkit/bandwidth/mem_bandwidth.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "common/utils.h" 30 | 31 | using namespace utility; 32 | 33 | int main() { 34 | size_t n_iters = 10; 35 | size_t mem_size = 1 * 1024 * 1024 * 1024; 36 | char *origin_data, *shm_data, *norm_mem_data; 37 | 38 | shm_data = 39 | (char *)mmap(NULL, mem_size, PROT_READ | PROT_WRITE, 40 | MAP_ANONYMOUS | MAP_SHARED | 0x40000 /*MAP_HUGETLB*/, -1, 0); 41 | mlock(shm_data, mem_size); 42 | 43 | origin_data = (char *)malloc(mem_size); 44 | norm_mem_data = (char *)malloc(mem_size); 45 | 46 | double norm_time = 0; 47 | double shm_time = 0; 48 | 49 | size_t copy_size = 30 * 1024 * 1024; 50 | size_t current_offset_0 = 0; 51 | size_t current_offset_1 = mem_size - copy_size; 52 | for (int i = 0; i < n_iters; i++) { 53 | Timer t0; 54 | memcpy(norm_mem_data + current_offset_0, origin_data + current_offset_0, 55 | copy_size); 56 | norm_time += t0.Passed(); 57 | 58 | Timer t1; 59 | memcpy(shm_data + current_offset_1, origin_data + current_offset_1, 60 | copy_size); 61 | shm_time += t1.Passed(); 62 | } 63 | 64 | std::cout << "normal: " << norm_time / n_iters 65 | << " | shm: " << shm_time / n_iters << std::endl; 66 | } -------------------------------------------------------------------------------- /utility/data-process/toolkit/bandwidth/memcpy_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include "common/graph_loader.h" 24 | #include "common/options.h" 25 | #include "common/utils.h" 26 | 27 | int main(int argc, char *argv[]) { 28 | utility::Options::InitOptions("Degree generator"); 29 | OPTIONS_PARSE(argc, argv); 30 | 31 | utility::GraphLoader graph_loader(utility::Options::root); 32 | auto graph = graph_loader.GetGraphDataset(utility::Options::graph); 33 | 34 | uint32_t *indptr = graph->indptr; 35 | uint32_t *indices = graph->indices; 36 | size_t num_nodes = graph->num_nodes; 37 | size_t num_edges = graph->num_edges; 38 | float *feat = graph->feature; 39 | size_t dim = graph->feat_dim; 40 | 41 | size_t num_runs = 10; 42 | size_t num_inputs = 8000; 43 | 44 | std::vector> inputs(num_runs, std::vector(num_inputs)); 45 | std::vector> outputs(num_runs, 46 | std::vector(num_inputs * dim)); 47 | 48 | std::random_device dev; 49 | std::mt19937 rng(dev()); 50 | std::uniform_int_distribution dist( 51 | 0, num_nodes - 1); // distribution in range [1, 6] 52 | 53 | for (size_t i = 0; i < num_runs; i++) { 54 | for (size_t j = 0; j < num_inputs; j++) { 55 | inputs[i][j] = dist(rng); 56 | } 57 | } 58 | 59 | for (size_t i = 0; i < num_runs; i++) { 60 | utility::Timer t; 61 | // #pragma omp parallel for num_threads(1) 62 | for (size_t j = 0; j < num_inputs; j++) { 63 | // #pragma omp simd 64 | for (size_t k = 0; k < dim; k++) { 65 | outputs[i][j * dim + k] = feat[inputs[i][j] * dim + k]; 66 | } 67 | 68 | // std::cout << "OMP thread id %d" << omp_get_thread_num() << std::endl; 69 | } 70 | 71 | double d = t.Passed(); 72 | std::cout << "[Run " << i << "] " << d << " secs" << std::endl; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /utility/data-process/toolkit/cache/cache_by_degree.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include 19 | #ifdef __linux__ 20 | #include 21 | #else 22 | #include 23 | #endif 24 | 25 | #include "common/graph_loader.h" 26 | #include "common/options.h" 27 | #include "common/utils.h" 28 | 29 | void randkingNodesToFile(utility::GraphPtr graph, 30 | std::shared_ptr info) { 31 | size_t num_nodes = graph->num_nodes; 32 | const std::vector& out_degrees = info->out_degrees; 33 | std::vector> outdegree_id_list(num_nodes); 34 | 35 | #pragma omp parallel for 36 | for (uint32_t i = 0; i < num_nodes; i++) { 37 | outdegree_id_list[i] = {out_degrees[i], i}; 38 | } 39 | 40 | #ifdef __linux__ 41 | __gnu_parallel::sort(outdegree_id_list.begin(), outdegree_id_list.end(), 42 | std::greater>()); 43 | #else 44 | std::sort(outdegree_id_list.begin(), outdegree_id_list.end(), 45 | std::greater>()); 46 | #endif 47 | 48 | std::vector ranking_nodes(num_nodes); 49 | 50 | #pragma omp parallel for 51 | for (size_t i = 0; i < num_nodes; i++) { 52 | ranking_nodes[i] = outdegree_id_list[i].second; 53 | } 54 | 55 | std::ofstream ofs( 56 | graph->folder + "cache_by_degree.bin", 57 | std::ofstream::out | std::ofstream::binary | std::ofstream::trunc); 58 | 59 | ofs.write((const char*)ranking_nodes.data(), 60 | ranking_nodes.size() * sizeof(uint32_t)); 61 | 62 | ofs.close(); 63 | } 64 | 65 | int main(int argc, char* argv[]) { 66 | utility::Options::InitOptions("Graph property"); 67 | OPTIONS_PARSE(argc, argv); 68 | 69 | utility::GraphLoader graph_loader(utility::Options::root); 70 | auto graph = graph_loader.GetGraphDataset(utility::Options::graph); 71 | auto degree_info = utility::DegreeInfo::GetDegrees(graph); 72 | 73 | randkingNodesToFile(graph, degree_info); 74 | } -------------------------------------------------------------------------------- /utility/data-process/toolkit/cache/cache_by_random.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include 19 | #ifdef __linux__ 20 | #include 21 | #else 22 | #include 23 | #endif 24 | #include 25 | 26 | #include "common/graph_loader.h" 27 | #include "common/options.h" 28 | #include "common/utils.h" 29 | 30 | void randkingNodesToFile(utility::GraphPtr graph) { 31 | size_t num_nodes = graph->num_nodes; 32 | std::vector ranking_nodes(num_nodes); 33 | 34 | #pragma omp parallel for 35 | for (uint32_t i = 0; i < num_nodes; i++) { 36 | ranking_nodes[i] = {i}; 37 | } 38 | 39 | std::mt19937 generator; 40 | for (uint32_t i = 0; i < num_nodes; i++) { 41 | std::uniform_int_distribution distribution(0, num_nodes - i - 1); 42 | std::swap(ranking_nodes[num_nodes - i - 1], ranking_nodes[distribution(generator)]); 43 | } 44 | 45 | std::ofstream ofs( 46 | graph->folder + "cache_by_random.bin", 47 | std::ofstream::out | std::ofstream::binary | std::ofstream::trunc); 48 | 49 | ofs.write((const char*)ranking_nodes.data(), 50 | ranking_nodes.size() * sizeof(uint32_t)); 51 | 52 | ofs.close(); 53 | } 54 | 55 | int main(int argc, char* argv[]) { 56 | utility::Options::InitOptions("Graph property"); 57 | OPTIONS_PARSE(argc, argv); 58 | 59 | utility::GraphLoader graph_loader(utility::Options::root); 60 | auto graph = graph_loader.GetGraphDataset(utility::Options::graph); 61 | 62 | randkingNodesToFile(graph); 63 | } -------------------------------------------------------------------------------- /utility/data-process/toolkit/load/load_mem.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | #include "common/graph_loader.h" 19 | #include "common/options.h" 20 | #include "common/utils.h" 21 | 22 | int main(int argc, char *argv[]) { 23 | utility::Options::InitOptions("Graph property"); 24 | OPTIONS_PARSE(argc, argv); 25 | 26 | utility::GraphLoader graph_loader(utility::Options::root); 27 | auto graph = graph_loader.GetGraphDataset(utility::Options::graph, 28 | utility::Options::is64type); 29 | } -------------------------------------------------------------------------------- /utility/data-process/toolkit/memory/memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from typing import BinaryIO 18 | 19 | 20 | TB = 1 * 1024 * 1024 * 1024 * 1024 21 | GB = 1 * 1024 * 1024 * 1024 22 | MB = 1 * 1024 * 1024 23 | KB = 1 * 1024 24 | 25 | BILLION = 1 * 10**9 26 | MILLION = 1 * 10**6 27 | THOUSAND = 1 * 10**3 28 | INT_BYTES = 4 29 | FLOAT_BYTES = 4 30 | LONG_BYTES = 8 31 | 32 | 33 | def sz_format(sz): 34 | if sz >= TB: 35 | return "{:<6.2f} TB".format(sz / TB) 36 | elif sz >= GB: 37 | return "{:<6.2f} GB".format(sz / GB) 38 | elif sz >= MB: 39 | return "{:<6.2f} MB".format(sz / MB) 40 | elif sz >= KB: 41 | return "{:<6.2f} KB".format(sz / KB) 42 | else: 43 | return "{:<6.2f} B".format(sz) 44 | 45 | 46 | class Graph: 47 | def __init__(self, name, num_nodes, num_edges, feat_dim): 48 | self.name = name 49 | self.num_nodes = num_nodes * 1.0 50 | self.num_edges = num_edges * 1.0 51 | self.feat_dim = feat_dim * 1.0 52 | 53 | def __str__(self): 54 | graph_sz = sz_format( 55 | ((self.num_nodes + 1) + self.num_edges) * INT_BYTES) 56 | feat_sz = sz_format((self.num_nodes * self.feat_dim) * FLOAT_BYTES) 57 | 58 | return "{:15s} | topology: {:s} | feat: {:s}".format(self.name, graph_sz, feat_sz) 59 | 60 | 61 | Reddit = Graph("Reddit", 232965, 114615892, 602) 62 | Products = Graph("Products", 2449029, 123718152, 100) 63 | Papers100M = Graph('Papers100M', 111059956, 1615685872, 128) 64 | ComFriendster = Graph('com-friendster', 65608366, 3612134270, 140) 65 | AlipayGraph = Graph('AlipayGraph', 4 * BILLION, 26 * BILLION, 128 * 4) 66 | Amazon = Graph('Amazon', 65 * MILLION, 3.6 * MILLION, 300) 67 | Mag240M_lsc = Graph('Mag240M_lsc', 121 * MILLION, 1.2 * BILLION, 768) 68 | Twitter = Graph('Twitter', 41652230, 1468365182, 256) 69 | Uk = Graph('UK-2006-06', 77741046, 2965197340, 256) 70 | 71 | graph_list = [Reddit, Products, Papers100M, ComFriendster, 72 | AlipayGraph, Amazon, Mag240M_lsc, Twitter, Uk] 73 | 74 | 75 | def run(): 76 | for graph in graph_list: 77 | print(graph) 78 | 79 | 80 | if __name__ == '__main__': 81 | run() 82 | -------------------------------------------------------------------------------- /utility/fastgraph/fastgraph/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import os 18 | 19 | from .dataset_loader import DatasetLoader 20 | 21 | 22 | def dataset(name, root_path, force_load64=False): 23 | assert(name in ['papers100M', 'com-friendster', 24 | 'reddit', 'products', 'twitter', 'uk-2006-05']) 25 | dataset_path = os.path.join(root_path, name) 26 | dataset_loader = DatasetLoader(dataset_path, force_load64) 27 | return dataset_loader 28 | 29 | 30 | __all__ = ['dataset'] 31 | -------------------------------------------------------------------------------- /utility/fastgraph/fastgraph/meta_reader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import os 18 | 19 | 20 | class MetaReader(object): 21 | def __init__(self): 22 | pass 23 | 24 | def read(self, folder): 25 | meta = {} 26 | with open(os.path.join(folder, 'meta.txt'), 'r') as f: 27 | lines = f.readlines() 28 | for line in lines: 29 | line = line.split() 30 | assert len(line) == 2 31 | meta[line[0]] = int(line[1]) 32 | 33 | meta_keys = meta.keys() 34 | 35 | assert('NUM_NODE' in meta_keys) 36 | assert('NUM_EDGE' in meta_keys) 37 | assert('FEAT_DIM' in meta_keys) 38 | assert('NUM_CLASS' in meta_keys) 39 | assert('NUM_TRAIN_SET' in meta_keys) 40 | assert('NUM_VALID_SET' in meta_keys) 41 | assert('NUM_TEST_SET' in meta_keys) 42 | 43 | return meta 44 | -------------------------------------------------------------------------------- /utility/fastgraph/setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Institute of Parallel and Distributed Systems, Shanghai Jiao Tong University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | from setuptools import setup 18 | 19 | setup( 20 | name='fastgraph', 21 | packages=['fastgraph'], 22 | version='0.2.0', 23 | description='Fast graph dataset loading tools', 24 | ) 25 | -------------------------------------------------------------------------------- /utility/fg_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HERE="$(dirname $(readlink -f $0))" 4 | 5 | pushd "$HERE/fastgraph" 6 | python setup.py install 7 | popd -------------------------------------------------------------------------------- /utility/webgraph/Makefile: -------------------------------------------------------------------------------- 1 | 2 | ifeq ($(mvn), ) 3 | mvn=mvn 4 | endif 5 | build: 6 | $(mvn) clean package 7 | convert: 8 | ifeq ($(graph),) 9 | @echo "please set graph name!" 10 | else 11 | java -cp target/webgraph-0.1-SNAPSHOT.jar it.unimi.dsi.webgraph.BVGraph -o -O -L $(graph) 12 | java -cp target/webgraph-0.1-SNAPSHOT.jar ipads.samgraph.webgraph.WebgraphDecoder $(graph) 13 | endif 14 | help: 15 | @echo "Usage: make build; make convert graph=/graph-learning/data-raw/example" 16 | @echo " there must exist file 'example.graph' and 'example.properties'" -------------------------------------------------------------------------------- /utility/webgraph/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 4.0.0 7 | 8 | ipads.samgraph.webgraph 9 | webgraph 10 | 0.1-SNAPSHOT 11 | jar 12 | 13 | Webgraph Converter 14 | 15 | 2021 16 | 17 | 18 | 19 | it.unimi.dsi 20 | fastutil 21 | 8.5.4 22 | 23 | 24 | it.unimi.dsi 25 | webgraph 26 | 3.6.10 27 | 28 | 29 | 30 | 31 | 32 | 33 | org.apache.maven.plugins 34 | maven-compiler-plugin 35 | 3.0 36 | 37 | 1.8 38 | 1.8 39 | true 40 | true 41 | 42 | 43 | 44 | org.apache.maven.plugins 45 | maven-assembly-plugin 46 | 2.4 47 | 48 | 49 | jar-with-dependencies 50 | 51 | false 52 | 53 | 54 | 55 | package 56 | 57 | single 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | --------------------------------------------------------------------------------