├── test ├── local_test_data │ ├── small.PNG │ ├── small_T.bin │ ├── small.mtx │ └── small_T.mtx ├── run_all_tests.sh ├── ref │ ├── nerstrand │ │ ├── Makefile │ │ ├── nestrand.sh │ │ ├── README.txt │ │ └── nerstrand_driver.cpp │ ├── cpu_ref_SSSP.py │ ├── cpu_ref_widest.py │ ├── ref_sssp_BGL.cpp │ └── cpu_ref_pagerank.py ├── generators │ ├── convertors │ │ ├── Makefile │ │ ├── pprocess.sh │ │ ├── README.txt │ │ ├── sort_eges.cpp │ │ ├── edges_to_H.cpp │ │ └── H_to_HtSorted_and_a.cpp │ ├── Makefile │ ├── rmat.cpp │ └── plod.cpp ├── Makefile ├── data_gen.sh └── log_converter.py ├── .gitmodules ├── cpp ├── cmake │ ├── Templates │ │ └── GoogleTest.CMakeLists.txt.cmake │ └── Modules │ │ └── ConfigureGoogleTest.cmake ├── include │ ├── pagerank_kernels.hxx │ ├── jaccard_gpu.cuh │ ├── debug_help.h │ ├── lobpcg.hxx │ ├── async_event.hxx │ ├── graph_visitors.hxx │ ├── async_event.cuh │ ├── thrust_traits.hxx │ ├── nvgraphP.h │ ├── triangles_counting_kernels.hxx │ ├── nvgraph_vector_kernels.hxx │ ├── high_res_clock.h │ ├── triangles_counting.hxx │ ├── debug_macros.h │ ├── csrmv_cub.h │ ├── nvgraph_lapack.hxx │ ├── size2_selector.hxx │ ├── cnmem_shared_ptr.hxx │ ├── triangles_counting_defines.hxx │ ├── sssp.hxx │ ├── widest_path.hxx │ ├── test │ │ ├── delta_modularity_test.cuh │ │ ├── mem_test.cuh │ │ ├── k_in_test.cuh │ │ └── thrust_test.cuh │ ├── modularity_maximization.hxx │ ├── bfs2d.hxx │ ├── valued_csr_graph.hxx │ ├── nvgraph_csrmv.hxx │ ├── graph.hxx │ ├── pagerank.hxx │ ├── bfs.hxx │ ├── app │ │ ├── nvlouvain_app.cu │ │ └── nvlouvain_app_hierarchy.cu │ ├── nvgraph_cublas.hxx │ ├── nvgraph_vector.hxx │ ├── nvgraph_convert.hxx │ ├── kmeans.hxx │ ├── partition.hxx │ └── stacktrace.h ├── src │ ├── csr_graph.cpp │ ├── valued_csr_graph.cpp │ ├── nvgraph_error.cu │ ├── graph_contraction │ │ ├── contraction_csr_max.cu │ │ ├── contraction_csr_min.cu │ │ ├── contraction_csr_sum.cu │ │ ├── contraction_csr_mul.cu │ │ ├── contraction_mv_float_max.cu │ │ ├── contraction_mv_float_min.cu │ │ ├── contraction_mv_float_sum.cu │ │ ├── contraction_mv_double_max.cu │ │ ├── contraction_mv_double_min.cu │ │ ├── contraction_mv_double_sum.cu │ │ ├── contraction_mv_float_mul.cu │ │ └── contraction_mv_double_mul.cu │ ├── pagerank_kernels.cu │ └── graph_extractor.cu └── tests │ ├── benchmarkScripts │ ├── run_graphMat.sh │ ├── run_galois.sh │ └── run_nvgraph.sh │ ├── 2d_partitioning_test.cpp │ └── nvgraph_test_common.h ├── conda-recipes └── nvgraph │ ├── build.sh │ └── meta.yaml ├── Acknowledgements.md └── external └── cub_semiring ├── util_namespace.cuh ├── block └── specializations │ └── block_histogram_atomic.cuh ├── cub.cuh └── util_macro.cuh /test/local_test_data/small.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rapidsai/nvgraph/HEAD/test/local_test_data/small.PNG -------------------------------------------------------------------------------- /test/local_test_data/small_T.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rapidsai/nvgraph/HEAD/test/local_test_data/small_T.bin -------------------------------------------------------------------------------- /test/local_test_data/small.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate real general 2 | 6 6 10 3 | 1 2 0.500000 4 | 1 3 0.500000 5 | 3 1 0.333333 6 | 3 2 0.333333 7 | 3 5 0.333333 8 | 4 5 0.500000 9 | 4 6 0.500000 10 | 5 4 0.500000 11 | 5 6 0.500000 12 | 6 4 1.000000 13 | -------------------------------------------------------------------------------- /test/local_test_data/small_T.mtx: -------------------------------------------------------------------------------- 1 | %%MatrixMarket matrix coordinate real general 2 | %%AMGX rhs 3 | 6 6 10 4 | 1 3 0.333333000 5 | 2 1 0.500000000 6 | 2 3 0.333333000 7 | 3 1 0.500000000 8 | 4 5 0.500000000 9 | 4 6 1.000000000 10 | 5 3 0.333333000 11 | 5 4 0.500000000 12 | 6 4 0.500000000 13 | 6 5 0.500000000 14 | 0 15 | 1 16 | 0 17 | 0 18 | 0 19 | 0 20 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "cpp/thirdparty/cnmem"] 2 | path = cpp/thirdparty/cnmem 3 | url = https://github.com/NVIDIA/cnmem.git 4 | [submodule "cpp/nvgraph/cpp/thirdparty/cub"] 5 | path = cpp/thirdparty/cub 6 | url = https://github.com/NVlabs/cub.git 7 | [submodule "cpp/nvgraph/external/cusp"] 8 | path = external/cusp 9 | url = https://github.com/cusplibrary/cusplibrary.git 10 | -------------------------------------------------------------------------------- /test/run_all_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #Usage sh run_all_tests.sh 3 | #Run all the tests in the current directory (ie. you should copy it in your build/test/ directory). 4 | test="nvgraph_test 5 | csrmv_test 6 | semiring_maxmin_test 7 | semiring_minplus_test 8 | semiring_orand_test 9 | pagerank_test 10 | sssp_test 11 | max_flow_test" 12 | 13 | for i in $test 14 | do 15 | ./$i 16 | done 17 | -------------------------------------------------------------------------------- /test/ref/nerstrand/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-O3 -fopenmp 3 | LDFLAGS=-I. -L. libnerstrand.a 4 | EXEC=nerstrand_bench 5 | SOURCES=nerstrand_driver.cpp mmio.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | 8 | $(EXEC): $(OBJECTS) 9 | $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) 10 | 11 | mmio.o: mmio.cpp mmio.h 12 | $(CC) $(CFLAGS) -c $< 13 | 14 | nerstand_driver.o: nerstand_driver.cpp mmio.h 15 | $(CC) $(CFLAGS) -c $< 16 | clean: 17 | rm *.o -------------------------------------------------------------------------------- /test/generators/convertors/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-O3 -march=native -pipe -w 3 | LDFLAGS=-lm 4 | 5 | all: sort HTA H mtob 6 | 7 | sort: sort_eges.cpp 8 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 9 | 10 | HTA: H_to_HtSorted_and_a.cpp 11 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 12 | 13 | H: edges_to_H.cpp 14 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 15 | 16 | mtob: binary_converter.cpp 17 | $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 18 | 19 | clean: 20 | rm sort HTA mtob 21 | 22 | -------------------------------------------------------------------------------- /test/generators/convertors/pprocess.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | edges="$1" 4 | echo "Starting Sort on $edges..." 5 | ./sort $edges 6 | echo "Done" 7 | 8 | tmp="_s" 9 | sedges=$edges$tmp 10 | echo "Starting H on $sedges ..." 11 | ./H $sedges 12 | echo "Done" 13 | 14 | tmp="_mtx" 15 | matrix=$sedges$tmp 16 | #delete soted edges 17 | rm $sedges 18 | 19 | echo "Starting HTa on $matrix ..." 20 | ./HTA $matrix 21 | 22 | tmp="_T" 23 | outp=$edges$tmp 24 | outpp=$matrix$tmp 25 | mv $outpp $outp 26 | #delete H 27 | rm $matrix 28 | 29 | echo "Starting binary conversion ..." 30 | ./mtob $outp 31 | echo "Done" 32 | 33 | -------------------------------------------------------------------------------- /cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.12) 3 | 4 | include(ExternalProject) 5 | 6 | ExternalProject_Add(GoogleTest 7 | GIT_REPOSITORY https://github.com/google/googletest.git 8 | GIT_TAG release-1.8.0 9 | SOURCE_DIR "${GTEST_ROOT}/googletest" 10 | BINARY_DIR "${GTEST_ROOT}/build" 11 | INSTALL_DIR "${GTEST_ROOT}/install" 12 | CMAKE_ARGS ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install) 13 | 14 | -------------------------------------------------------------------------------- /conda-recipes/nvgraph/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CMAKE_COMMON_VARIABLES=" -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX11_ABI=$CMAKE_CXX11_ABI" 4 | 5 | 6 | if [ -n "$MACOSX_DEPLOYMENT_TARGET" ]; then 7 | # C++11 requires 10.9 8 | # but cudatoolkit 8 is build for 10.11 9 | export MACOSX_DEPLOYMENT_TARGET=10.11 10 | fi 11 | 12 | # show environment 13 | printenv 14 | # Cleanup local git 15 | git clean -xdf 16 | # Change directory for build process 17 | cd cpp 18 | # Use CMake-based build procedure 19 | mkdir build 20 | cd build 21 | # configure 22 | cmake $CMAKE_COMMON_VARIABLES .. 23 | # build 24 | make -j VERBOSE=1 install -------------------------------------------------------------------------------- /test/generators/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property 4 | # and proprietary rights in and to this software, related documentation 5 | # and any modifications thereto. Any use, reproduction, disclosure or 6 | # distribution of this software and related documentation without an express 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited. 8 | 9 | CXX=g++ 10 | CXXFLAGS=-Wall -Ofast -march=native -pipe 11 | 12 | all: print_info plodg rmatg 13 | 14 | plodg: plod.cpp 15 | $(CXX) $(CXXFLAGS) $< -o $@ 16 | 17 | rmatg: rmat.cpp 18 | $(CXX) $(CXXFLAGS) $< -o $@ 19 | 20 | clean: 21 | rm -f rmatg plodg 22 | 23 | print_info: 24 | $(info The Boost Graph Library is required) 25 | -------------------------------------------------------------------------------- /Acknowledgements.md: -------------------------------------------------------------------------------- 1 | # Acknowledgements 2 | 3 | NVGRAPH is the product of a large community of developers and reserachers since 2014, and we’re deeply 4 | appreciative for their work. Here is a list of people from NVIDIA who helped contribute up until the process of open sourcing it: 5 | 6 | Managers 7 | - Harun Bayraktar 8 | - Joe Eaton 9 | - Alex Fit-Florea 10 | 11 | Nvgraph dev team 12 | - Marat Arsaev 13 | - Alex Fender 14 | - Andrei Schaffer 15 | 16 | Contributors from other teams 17 | - Hugo Braun 18 | - Slawomir Kierat 19 | - Ahmad Kiswani 20 | - Szymon Migacz 21 | - Maxim Naumov 22 | - Nikolay Sakharnykh 23 | - James Wyles 24 | 25 | Interns 26 | - Danielle Maddix 27 | - Tim Moon 28 | 29 | And last but not least, thank you also to the contributors from CUDA PM and QA teams who help building nvgraph since its early days. 30 | -------------------------------------------------------------------------------- /conda-recipes/nvgraph/meta.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018, NVIDIA CORPORATION. 2 | 3 | # Usage: 4 | # conda build -c defaults -c conda-forge . 5 | {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} 6 | {% set git_revision_count=environ.get('GIT_DESCRIBE_NUMBER', 0) %} 7 | {% set cuda_version='.'.join(environ.get('CUDA_VERSION', 'unknown').split('.')[:2]) %} 8 | package: 9 | name: nvgraph 10 | version: {{ version }} 11 | 12 | source: 13 | path: ../.. 14 | 15 | build: 16 | number: {{ git_revision_count }} 17 | string: cuda{{ cuda_version }}_{{ git_revision_count }} 18 | 19 | requirements: 20 | build: 21 | - cmake 3.12.4 22 | 23 | about: 24 | home: http://nvidia.com/ 25 | license: LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS 26 | license_file: LICENSE 27 | summary: nvgraph Library 28 | -------------------------------------------------------------------------------- /cpp/include/pagerank_kernels.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | namespace nvgraph 18 | { 19 | template 20 | void update_dangling_nodes(int n, ValueType_* dangling_nodes, ValueType_ damping_factor, cudaStream_t stream = 0); 21 | 22 | } // end namespace nvgraph 23 | 24 | -------------------------------------------------------------------------------- /cpp/src/csr_graph.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "csr_graph.hxx" 18 | 19 | namespace nvgraph 20 | { 21 | 22 | template 23 | CsrGraph& CsrGraph::operator=(const CsrGraph& graph) 24 | { 25 | 26 | } 27 | 28 | } // end namespace nvgraph 29 | 30 | -------------------------------------------------------------------------------- /cpp/include/jaccard_gpu.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | // Jaccard symilarity edge weights 17 | // Author: Alexandre Fender afender@nvidia.com and Maxim Naumov. 18 | 19 | #pragma once 20 | 21 | namespace nvlouvain 22 | { 23 | template 24 | int jaccard(int n, int e, int *csrPtr, int *csrInd, T * csrVal, T *v, T *work, T gamma, T *weight_i, T *weight_s, T *weight_j); 25 | } 26 | -------------------------------------------------------------------------------- /cpp/src/valued_csr_graph.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "valued_csr_graph.hxx" 18 | #include "cnmem_shared_ptr.hxx" // interface with CuMem (memory pool lib) for shared ptr 19 | 20 | namespace nvgraph 21 | { 22 | template 23 | ValuedCsrGraph& ValuedCsrGraph::operator=(const ValuedCsrGraph& graph) 24 | { 25 | 26 | } 27 | 28 | } 29 | 30 | -------------------------------------------------------------------------------- /test/generators/convertors/README.txt: -------------------------------------------------------------------------------- 1 | ----------------------- 2 | Compile 3 | ----------------------- 4 | > make 5 | 6 | ----------------------- 7 | Run 8 | ----------------------- 9 | 10 | 11 | To preprocess a set of edges in matrix market patern format 12 | > ./pprocess.sh edges.dat 13 | 14 | 15 | 16 | You can run separately 17 | Sort : 18 | > ./sort edges.dat 19 | 20 | Compute H : 21 | > ./H edges.dat 22 | 23 | Compute H transposed and dangling node vector 24 | > ./HTA H.mtx 25 | 26 | Convert in AmgX binary format 27 | > ./mtob HTA.mtx 28 | 29 | ----------------------- 30 | Input 31 | ----------------------- 32 | The format for sort and H is matrix market patern format 33 | example : 34 | 35 | %%comment 36 | % as much comments as you want 37 | %... 38 | size size nonzero 39 | a b 40 | c d 41 | a e 42 | e a 43 | . 44 | . 45 | . 46 | [a-e] are in N* 47 | 48 | 49 | The format for HTA and mtob is matrix market coordinate format 50 | %%comment 51 | % as much comments as you want 52 | %... 53 | size size nonzero 54 | a b f 55 | c d g 56 | a e h 57 | e a i 58 | . 59 | . 60 | . 61 | [a-e] are in N* 62 | [f-i] are in R -------------------------------------------------------------------------------- /test/ref/nerstrand/nestrand.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nvg_data_prefix="/home/mnaumov/cuda_matrices/p4matrices/dimacs10" 4 | 5 | declare -a dataset=( 6 | "$nvg_data_prefix/preferentialAttachment.mtx" 7 | "$nvg_data_prefix/caidaRouterLevel.mtx" 8 | "$nvg_data_prefix/coAuthorsDBLP.mtx" 9 | "$nvg_data_prefix/citationCiteseer.mtx" 10 | "$nvg_data_prefix/coPapersDBLP.mtx" 11 | "$nvg_data_prefix/coPapersCiteseer.mtx" 12 | "/home/afender/modularity/as-Skitter.mtx" 13 | "/home/afender/modularity/hollywood-2009.mtx" 14 | ) 15 | 16 | for i in "${dataset[@]}" 17 | do 18 | ./nerstrand_bench "$i" 7 19 | done 20 | echo 21 | 22 | #run only best case according to Spreadsheet 1 23 | ./nerstrand_bench "$nvg_data_prefix/preferentialAttachment.mtx" 7 24 | ./nerstrand_bench "$nvg_data_prefix/caidaRouterLevel.mtx" 11 25 | ./nerstrand_bench "$nvg_data_prefix/coAuthorsDBLP.mtx" 7 26 | ./nerstrand_bench "$nvg_data_prefix/citationCiteseer.mtx" 17 27 | ./nerstrand_bench "$nvg_data_prefix/coPapersDBLP.mtx" 73 28 | ./nerstrand_bench "$nvg_data_prefix/coPapersCiteseer.mtx" 53 29 | ./nerstrand_bench "/home/afender/modularity/as-Skitter.mtx" 7 30 | ./nerstrand_bench "/home/afender/modularity/hollywood-2009.mtx" 11 31 | -------------------------------------------------------------------------------- /test/ref/nerstrand/README.txt: -------------------------------------------------------------------------------- 1 | This is stand alone host app that reads an undirected graph in matrix market format, convert it into CSR, call Nerstrand with default parameters and returns the modularity score of the clustering. 2 | 3 | Make sure you have downloaded and installed nerstrand : http://www-users.cs.umn.edu/~lasalle/nerstrand/ 4 | You should have libnerstrand.a in /build/Linux-x86_64/lib, move it to the directory containing this README or adjust the Makefile. 5 | 6 | Type "make" to compile the small benchmarking app and "./nerstrand_bench " to execute. 7 | For convenience there is also a benchmarking script that calls the benchmarking app (please adjust paths to binary and data sets). 8 | 9 | Use the following reference: 10 | @article{lasalle2014nerstrand, 11 | title={Multi-threaded Modularity Based Graph Clustering using the Multilevel Paradigm}, 12 | journal = "Journal of Parallel and Distributed Computing ", 13 | year = "2014", 14 | issn = "0743-7315", 15 | doi = "http://dx.doi.org/10.1016/j.jpdc.2014.09.012", 16 | url = "http://www.sciencedirect.com/science/article/pii/S0743731514001750", 17 | author = "Dominique LaSalle and George Karypis" 18 | }​ 19 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for building NVCompute/CUDA BLAS library 2 | SOLNDIR := ../. 3 | 4 | # Get the profile settings 5 | ifdef VULCAN 6 | include $(VULCAN_TOOLKIT_BASE)/build/getprofile.mk 7 | include $(VULCAN_TOOLKIT_BASE)/build/config/$(PROFILE).mk 8 | include $(VULCAN_TOOLKIT_BASE)/build/config/DetectOS.mk 9 | else 10 | include ../../build/getprofile.mk 11 | include ../../build/config/$(PROFILE).mk 12 | include ../../build/config/DetectOS.mk 13 | endif 14 | 15 | export I_AM_SLOPPY = 1 16 | AGNOSTIC_PROJECTS += nvgraph_test 17 | AGNOSTIC_PROJECTS += nvgraph_capi_tests 18 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_subgraph 19 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_conversion 20 | AGNOSTIC_PROJECTS += nvgraph_benchmark 21 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_clustering 22 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_contraction 23 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_traversal 24 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_triangles 25 | AGNOSTIC_PROJECTS += nvgraph_2d_partitioning_test 26 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_2d_bfs 27 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_2d_bfs_net 28 | 29 | ifdef VULCAN 30 | include $(VULCAN_TOOLKIT_BASE)/build/common.mk 31 | else 32 | include ../../build/common.mk 33 | endif 34 | -------------------------------------------------------------------------------- /cpp/tests/benchmarkScripts/run_graphMat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # ****************** Edit this ************************* 4 | #******************************************************* 5 | #Path to graphMat binary data 6 | gm_data_prefix="/home-2/afender/GraphMat-master/data" 7 | #Path to graphMat binary 8 | gm_bin_prefix="/home-2/afender/GraphMat-master/bin" 9 | #Number of core to use in graphMat 10 | export OMP_NUM_THREADS=24 11 | # ****************************************************** 12 | #******************************************************* 13 | # NOTE 14 | #twitter_graphMat.bin and live_journal_graphMat.bin are assumed to be in "gm_data_prefix" directory 15 | #******************************************************* 16 | 17 | # Requiered export according to the doc 18 | export KMP_AFFINITY=scatter 19 | 20 | #Pagerank runs 21 | numactl -i all $gm_bin_prefix/PageRank $gm_data_prefix/twitter.graphmat.bin 22 | numactl -i all $gm_bin_prefix/PageRank $gm_data_prefix/soc-LiveJournal1.graphmat.bin 23 | 24 | # SSSP runs 25 | # Warning: vertices seems to have 1-based indices (nvGraph use 0-base) 26 | numactl -i all $gm_bin_prefix/SSSP $gm_data_prefix/twitter.graphmat.bin 1 27 | numactl -i all $gm_bin_prefix/SSSP $gm_data_prefix/soc-LiveJournal1.graphmat.bin 1 -------------------------------------------------------------------------------- /cpp/include/debug_help.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | /* 17 | * debug_help.h 18 | * 19 | * Created on: Jul 19, 2018 20 | * Author: jwyles 21 | */ 22 | 23 | #include 24 | #include 25 | 26 | #pragma once 27 | 28 | namespace debug { 29 | template 30 | void printDeviceVector(T* dev_ptr, int items, std::string title) { 31 | T* host_ptr = (T*)malloc(sizeof(T) * items); 32 | cudaMemcpy(host_ptr, dev_ptr, sizeof(T) * items, cudaMemcpyDefault); 33 | std::cout << title << ": { "; 34 | for (int i = 0; i < items; i++) { 35 | std::cout << host_ptr[i] << ((i < items - 1) ? ", " : " "); 36 | } 37 | std::cout << "}\n"; 38 | free(host_ptr); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /cpp/tests/benchmarkScripts/run_galois.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # ****************** Edit this ************************* 4 | # Path to local workspace containing p4matrices:2024 sync //matrices/p4matrices/graphs/... 5 | nvg_data_prefix="/home/afender/src/matrices/p4matrices/graphs" 6 | 7 | #Path to galois 8 | galois_root="/home/afender/soft/galois-2.3.0/build/default" 9 | # ***************************************************** 10 | export OMP_NUM_THREADS=24 11 | 12 | declare -a arr=( 13 | #Small mtx just for debug 14 | #"$nvg_data_prefix/small/small.mtx" 15 | "$nvg_data_prefix/soc-liveJournal/soc-LiveJournal1.mtx" 16 | "$nvg_data_prefix/Twitter/twitter.mtx" 17 | ) 18 | 19 | ## now loop through the above array 20 | for i in "${arr[@]}" 21 | do 22 | echo "Pagerank" 23 | echo "$i" 24 | time $galois_root/tools/graph-convert/graph-convert -mtx2gr -edgeType=float32 -print-all-options $i $i.galois 25 | time $galois_root/tools/graph-convert/graph-convert -gr2tgr -edgeType=float32 -print-all-options $i.galois $i_T.galois 26 | time $galois_root/apps/pagerank/app-pagerank $i.galois -graphTranspose="$i_T.galois" -t=$OMP_NUM_THREADS 27 | echo 28 | done 29 | echo 30 | for i in "${arr[@]}" 31 | do 32 | echo "SSSP" 33 | echo "$i" 34 | time $galois_root/apps/sssp/app-sssp $i.galois -startNode=0 -t=$OMP_NUM_THREADS 35 | echo 36 | done 37 | echo 38 | -------------------------------------------------------------------------------- /cpp/include/lobpcg.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include "matrix.hxx" 19 | #include "partition.hxx" 20 | 21 | namespace nvgraph { 22 | 23 | template 24 | int lobpcg_simplified(cublasHandle_t cublasHandle, cusolverDnHandle_t cusolverHandle, 25 | IndexType_ n, IndexType_ k, 26 | /*const*/ Matrix * A, 27 | ValueType_ * __restrict__ eigVecs_dev, 28 | ValueType_ * __restrict__ eigVals_dev, 29 | IndexType_ maxIter,ValueType_ tol, 30 | ValueType_ * __restrict__ work_dev, 31 | IndexType_ & iter); 32 | 33 | } 34 | -------------------------------------------------------------------------------- /cpp/include/async_event.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace nvgraph { 20 | 21 | class AsyncEvent { 22 | public: 23 | AsyncEvent() : async_event(NULL) { } 24 | AsyncEvent(int size) : async_event(NULL) { cudaEventCreate(&async_event); } 25 | ~AsyncEvent() { if (async_event != NULL) cudaEventDestroy(async_event); } 26 | 27 | void create() { cudaEventCreate(&async_event); } 28 | void record(cudaStream_t s=0) { 29 | if (async_event == NULL) 30 | cudaEventCreate(&async_event); // check if we haven't created the event yet 31 | cudaEventRecord(async_event,s); 32 | } 33 | void sync() { 34 | cudaEventSynchronize(async_event); 35 | } 36 | private: 37 | cudaEvent_t async_event; 38 | }; 39 | 40 | } 41 | 42 | -------------------------------------------------------------------------------- /cpp/include/graph_visitors.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef GRAPH_VISITORS_HXX 18 | #define GRAPH_VISITORS_HXX 19 | 20 | namespace nvgraph 21 | { 22 | //PROBLEM: using Visitor Design Pattern over a 23 | // hierarchy of visitees that depend on 24 | // different number of template arguments 25 | // 26 | //SOLUTION:use Acyclic Visitor 27 | // (A. Alexandrescu, "Modern C++ Design", Section 10.4), 28 | // where *concrete* Visitors must be parameterized by all 29 | // the possibile template args of the Visited classes (visitees); 30 | // 31 | struct VisitorBase 32 | { 33 | virtual ~VisitorBase(void) 34 | { 35 | } 36 | }; 37 | 38 | template 39 | struct Visitor 40 | { 41 | virtual void Visit(T& ) = 0; 42 | virtual ~Visitor() { } 43 | }; 44 | }//end namespace 45 | #endif 46 | 47 | -------------------------------------------------------------------------------- /cpp/include/async_event.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | 20 | class AsyncEvent 21 | { 22 | public: 23 | AsyncEvent() : async_event(NULL) { } 24 | AsyncEvent(int size) : async_event(NULL) { cudaEventCreate(&async_event); } 25 | ~AsyncEvent() { if (async_event != NULL) cudaEventDestroy(async_event); } 26 | 27 | void create() { cudaEventCreate(&async_event); } 28 | void record(cudaStream_t s = 0) 29 | { 30 | if (async_event == NULL) 31 | { 32 | cudaEventCreate(&async_event); // check if we haven't created the event yet 33 | } 34 | 35 | cudaEventRecord(async_event, s); 36 | } 37 | void sync() 38 | { 39 | cudaEventSynchronize(async_event); 40 | } 41 | private: 42 | cudaEvent_t async_event; 43 | }; 44 | 45 | -------------------------------------------------------------------------------- /test/data_gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #Usage sh data_gen size1 size2 ... 3 | #Generate power law in-degree plus rmat graphs of size size1 ... sizeN 4 | #Corresponding transposed and binary csr are generated as well 5 | 6 | convert (){ 7 | edges=$1 8 | #echo "Starting Sort on $edges..." 9 | ./generators/convertors/sort $edges 10 | #echo "Done" 11 | 12 | tmp="_s" 13 | sedges=$edges$tmp 14 | echo "Starting H on $sedges ..." 15 | ./generators/convertors/H $sedges 16 | #echo "Done" 17 | 18 | tmp="_mtx" 19 | matrix=$sedges$tmp 20 | #delete soted edges 21 | rm $sedges 22 | 23 | echo "Starting HTa on $matrix ..." 24 | ./generators/convertors/HTA $matrix 25 | 26 | tmp="_T" 27 | outp=$edges$tmp 28 | outpp=$matrix$tmp 29 | mv $outpp $outp 30 | #delete H 31 | rm $matrix 32 | 33 | #echo "Starting binary conversion ..." 34 | ./generators/convertors/mtob $outp 35 | #echo "Generated transposed coo and transposed csr bin" 36 | } 37 | 38 | echo "Building the tools ..." 39 | make -C generators 40 | make -C generators/convertors 41 | #generate the graphs we need here 42 | #loop over script arguments which represent graph sizes. 43 | for var in "$@" 44 | do 45 | echo "Generate graphs of size $var" 46 | vertices=$var 47 | option="i" 48 | ./generators/plodg $vertices $option 49 | ./generators/rmatg $vertices $option 50 | graph="plod_graph_" 51 | format=".mtx" 52 | path_to_data="local_test_data/" 53 | name="$path_to_data$graph$vertices$format" 54 | convert $name 55 | graph="rmat_graph_" 56 | name="$path_to_data$graph$vertices$format" 57 | convert $name 58 | done 59 | -------------------------------------------------------------------------------- /test/log_converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from sys import argv 3 | from subprocess import Popen, PIPE, STDOUT 4 | from os import path, environ 5 | 6 | 7 | def main(): 8 | args = argv[1:] 9 | args[0] = path.join('./', args[0]) 10 | print args 11 | environ["GTEST_PRINT_TIME"] = "0" 12 | popen = Popen(args, stdout=PIPE, stderr=STDOUT) 13 | stillParsing = True 14 | skip = [] 15 | while not popen.poll(): 16 | data = popen.stdout.readline().splitlines() 17 | if len(data) == 0: 18 | break 19 | data = data[0] 20 | try: 21 | STATUS = data[0:12] 22 | NAME = data[12:] 23 | if data.find('Global test environment tear-down') != -1: 24 | stillParsing = False 25 | if stillParsing: 26 | if STATUS == "[ RUN ]": 27 | print('&&&& RUNNING' + NAME) 28 | elif STATUS == "[ OK ]" and NAME.strip() not in skip: 29 | print('&&&& PASSED ' + NAME) 30 | elif STATUS == "[ WAIVED ]": 31 | print('&&&& WAIVED ' + NAME) 32 | skip.append(NAME.strip()) 33 | elif STATUS == "[ FAILED ]": 34 | NAME = NAME.replace(', where', '\n where') 35 | print('&&&& FAILED ' + NAME) 36 | else: 37 | print(data) 38 | else: 39 | print(data) 40 | except IndexError: 41 | print(data) 42 | 43 | return popen.returncode 44 | 45 | if __name__ == '__main__': 46 | main() 47 | -------------------------------------------------------------------------------- /cpp/include/thrust_traits.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | 19 | #ifndef THRUST_TRAITS_HXX 20 | 21 | #define THRUST_TRAITS_HXX 22 | 23 | 24 | 25 | #include 26 | 27 | #include 28 | 29 | 30 | 31 | namespace nvgraph 32 | 33 | { 34 | 35 | //generic Vector Ptr Type facade: 36 | 37 | // 38 | 39 | template 40 | 41 | struct VectorPtrT; 42 | 43 | 44 | 45 | //partial specialization for device_vector: 46 | 47 | // 48 | 49 | template 50 | 51 | struct VectorPtrT > 52 | 53 | { 54 | 55 | typedef thrust::device_ptr PtrT; 56 | 57 | }; 58 | 59 | 60 | 61 | //partial specialization for host_vector: 62 | 63 | // 64 | 65 | template 66 | 67 | struct VectorPtrT > 68 | 69 | { 70 | 71 | typedef typename thrust::host_vector::value_type* PtrT; 72 | 73 | }; 74 | 75 | } 76 | 77 | #endif 78 | 79 | -------------------------------------------------------------------------------- /cpp/include/nvgraphP.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /* 18 | * 19 | * 20 | * WARNING: this is a private header file, it should not be publically exposed. 21 | * 22 | * 23 | */ 24 | 25 | #pragma once 26 | #include "nvgraph.h" 27 | #include "cnmem.h" 28 | 29 | #if defined(__cplusplus) 30 | extern "C" { 31 | #endif 32 | 33 | /* Graph descriptor types */ 34 | typedef enum 35 | { 36 | IS_EMPTY = 0, //nothing 37 | HAS_TOPOLOGY = 1, //connectivity info 38 | HAS_VALUES = 2, //MultiValuedCSRGraph 39 | IS_2D = 3 40 | } nvgraphGraphStatus_t; 41 | 42 | struct nvgraphContext { 43 | cudaStream_t stream; 44 | cnmemDevice_t cnmem_device; 45 | int nvgraphIsInitialized; 46 | }; 47 | 48 | struct nvgraphGraphDescr { 49 | nvgraphGraphStatus_t graphStatus; 50 | cudaDataType T; // This is the type of values for the graph 51 | nvgraphTopologyType_t TT; // The topology type (class to cast graph_handle pointer to) 52 | void* graph_handle; // Opaque pointer to the graph class object 53 | }; 54 | 55 | #if defined(__cplusplus) 56 | }//extern "C" 57 | #endif 58 | 59 | -------------------------------------------------------------------------------- /cpp/include/triangles_counting_kernels.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include 19 | 20 | namespace nvgraph 21 | { 22 | 23 | namespace triangles_counting 24 | { 25 | 26 | template 27 | void tricnt_bsh(T nblock, spmat_t *m, uint64_t *ocnt_d, size_t bmld, cudaStream_t stream); 28 | template 29 | void tricnt_wrp(T nblock, spmat_t *m, uint64_t *ocnt_d, unsigned int *bmap_d, size_t bmld, cudaStream_t stream); 30 | template 31 | void tricnt_thr(T nblock, spmat_t *m, uint64_t *ocnt_d, cudaStream_t stream); 32 | template 33 | void tricnt_b2b(T nblock, spmat_t *m, uint64_t *ocnt_d, unsigned int *bmapL0_d, size_t bmldL0, unsigned int *bmapL1_d, size_t bmldL1, cudaStream_t stream); 34 | 35 | template 36 | uint64_t reduce(uint64_t *v_d, T n, cudaStream_t stream); 37 | template 38 | void create_nondangling_vector(const T *roff, T *p_nonempty, T *n_nonempty, size_t n, cudaStream_t stream); 39 | 40 | void myCudaMemset(unsigned long long *p, unsigned long long v, long long n, cudaStream_t stream); 41 | 42 | } // namespace triangles_counting 43 | 44 | } // namespace nvgraph 45 | -------------------------------------------------------------------------------- /cpp/include/nvgraph_vector_kernels.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | namespace nvgraph 19 | { 20 | template 21 | void nrm1_raw_vec (ValueType_* vec, size_t n, ValueType_* res, cudaStream_t stream = 0); 22 | 23 | template 24 | void fill_raw_vec (ValueType_* vec, size_t n, ValueType_ value, cudaStream_t stream = 0); 25 | 26 | template 27 | void dump_raw_vec (ValueType_* vec, size_t n, int offset, cudaStream_t stream = 0); 28 | 29 | template 30 | void dmv (size_t num_vertices, ValueType_ alpha, ValueType_* D, ValueType_* x, ValueType_ beta, ValueType_* y, cudaStream_t stream = 0); 31 | 32 | template 33 | void copy_vec(ValueType_ *vec1, size_t n, ValueType_ *res, cudaStream_t stream = 0); 34 | 35 | template 36 | void flag_zeros_raw_vec(size_t num_vertices, ValueType_* vec, int* flag, cudaStream_t stream = 0 ); 37 | 38 | template 39 | void set_connectivity( size_t n, IndexType_ root, ValueType_ self_loop_val, ValueType_ unreachable_val, ValueType_* res, cudaStream_t stream = 0); 40 | 41 | } // end namespace nvgraph 42 | 43 | -------------------------------------------------------------------------------- /cpp/src/nvgraph_error.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include "nvgraph_error.hxx" 18 | 19 | namespace nvgraph 20 | { 21 | 22 | 23 | void nvgraph_default_output(const char *msg, int length) { 24 | #if defined(DEBUG) || defined(VERBOSE_DIAG) 25 | printf("%s", msg); 26 | #endif 27 | } 28 | 29 | NVGRAPH_output_callback nvgraph_output = nvgraph_default_output; 30 | NVGRAPH_output_callback error_output = nvgraph_default_output; 31 | //NVGRAPH_output_callback nvgraph_distributed_output = nvgraph_default_output;*/ 32 | 33 | // Timer 34 | struct cuda_timer::event_pair 35 | { 36 | cudaEvent_t start; 37 | cudaEvent_t end; 38 | }; 39 | cuda_timer::cuda_timer(): p(new event_pair()) { } 40 | 41 | void cuda_timer::start() 42 | { 43 | cudaEventCreate(&p->start); 44 | cudaEventCreate(&p->end); 45 | cudaEventRecord(p->start, 0); 46 | cudaCheckError(); 47 | } 48 | float cuda_timer::stop() 49 | { 50 | cudaEventRecord(p->end, 0); 51 | cudaEventSynchronize(p->end); 52 | float elapsed_time; 53 | cudaEventElapsedTime(&elapsed_time, p->start, p->end); 54 | cudaEventDestroy(p->start); 55 | cudaEventDestroy(p->end); 56 | cudaCheckError(); 57 | return elapsed_time; 58 | } 59 | 60 | } // end namespace nvgraph 61 | 62 | -------------------------------------------------------------------------------- /test/ref/cpu_ref_SSSP.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Usage : python3 nvgraph_cpu_ref.py graph.mtx source_vertex 4 | # This works with networkx 1.8.1 (default ubuntu package version in 14.04) 5 | # http://networkx.github.io/documentation/networkx-1.8/ 6 | 7 | # Latest version is currenlty 1.11 in feb 2016 8 | # https://networkx.github.io/documentation/latest/tutorial/index.html 9 | 10 | #import numpy as np 11 | import sys 12 | import time 13 | from scipy.io import mmread 14 | import numpy as np 15 | import networkx as nx 16 | import os 17 | 18 | print ('Networkx version : {} '.format(nx.__version__)) 19 | 20 | # Command line arguments 21 | argc = len(sys.argv) 22 | if argc<=2: 23 | print("Error: usage is : python3 nvgraph_cpu_ref.py graph.mtx source_vertex") 24 | sys.exit() 25 | mmFile = sys.argv[1] 26 | src = int(sys.argv[2]) 27 | print('Reading '+ str(mmFile) + '...') 28 | #Read 29 | M = mmread(mmFile).asfptype().tolil() 30 | 31 | if M is None : 32 | raise TypeError('Could not read the input graph') 33 | 34 | # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly 35 | M = M.transpose().tocsr() 36 | if not M.has_sorted_indices: 37 | M.sort_indices() 38 | 39 | # Directed NetworkX graph 40 | Gnx = nx.DiGraph(M) 41 | 42 | #SSSP 43 | print('Solving... ') 44 | t1 = time.time() 45 | sssp = nx.single_source_dijkstra_path_length(Gnx,source=src) 46 | t2 = time.time() - t1 47 | 48 | print('Time : '+str(t2)) 49 | print('Writing result ... ') 50 | 51 | # fill missing with DBL_MAX 52 | bsssp = np.full(M.shape[0], sys.float_info.max, dtype=np.float64) 53 | for r in sssp.keys(): 54 | bsssp[r] = sssp[r] 55 | # write binary 56 | out_fname = os.path.splitext(os.path.basename(mmFile))[0] + '_T.sssp_' + str(src) + '.bin' 57 | bsssp.tofile(out_fname, "") 58 | print ('Result is in the file: ' + out_fname) 59 | 60 | # write text 61 | #f = open('/tmp/ref_' + os.path.basename(mmFile) + '_sssp.txt', 'w') 62 | #f.write(str(sssp.values())) 63 | 64 | print('Done') 65 | -------------------------------------------------------------------------------- /cpp/tests/2d_partitioning_test.cpp: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "nvgraph.h" 3 | #include 4 | 5 | TEST(SimpleBFS2D, DummyTest) { 6 | nvgraphHandle_t handle; 7 | int* devices = (int*) malloc(sizeof(int) * 2); 8 | devices[0] = 0; 9 | devices[1] = 1; 10 | nvgraphCreateMulti(&handle, 2, devices); 11 | nvgraphGraphDescr_t graph; 12 | nvgraphCreateGraphDescr(handle, &graph); 13 | int rowIds[38] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 14 | 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8 }; 15 | int colIds[38] = { 1, 2, 7, 8, 0, 2, 4, 7, 8, 0, 1, 3, 6, 8, 2, 4, 5, 6, 8, 1, 3, 5, 8, 3, 4, 6, 16 | 7, 2, 3, 5, 0, 1, 5, 0, 1, 2, 3, 4 }; 17 | nvgraph2dCOOTopology32I_st topo; 18 | topo.nvertices = 9; 19 | topo.nedges = 38; 20 | topo.source_indices = rowIds; 21 | topo.destination_indices = colIds; 22 | topo.valueType = CUDA_R_32I; 23 | topo.values = NULL; 24 | topo.numDevices = 2; 25 | topo.devices = devices; 26 | topo.blockN = 2; 27 | topo.tag = NVGRAPH_DEFAULT; 28 | nvgraphSetGraphStructure(handle, graph, &topo, NVGRAPH_2D_32I_32I); 29 | int* distances = (int*) malloc(sizeof(int) * 9); 30 | int* predecessors = (int*) malloc(sizeof(int) * 9); 31 | int sourceId = 0; 32 | std::cout << "Source ID: " << sourceId << "\n"; 33 | nvgraph2dBfs(handle, graph, sourceId, distances, predecessors); 34 | std::cout << "Distances:\n"; 35 | for (int i = 0; i < 9; i++) 36 | std::cout << i << ":" << distances[i] << " "; 37 | std::cout << "\nPredecessors:\n"; 38 | for (int i = 0; i < 9; i++) 39 | std::cout << i << ":" << predecessors[i] << " "; 40 | std::cout << "\n"; 41 | int exp_pred[9] = {-1,0,0,2,1,7,2,0,0}; 42 | int exp_dist[9] = {0,1,1,2,2,2,2,1,1}; 43 | for (int i = 0; i < 9; i++){ 44 | ASSERT_EQ(exp_pred[i], predecessors[i]); 45 | ASSERT_EQ(exp_dist[i], distances[i]); 46 | } 47 | std::cout << "Test run!\n"; 48 | } 49 | 50 | int main(int argc, char **argv) { 51 | ::testing::InitGoogleTest(&argc, argv); 52 | return RUN_ALL_TESTS(); 53 | } 54 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_csr_max.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | CsrGraph* contract_graph_csr_max(CsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce)); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_csr_min.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | CsrGraph* contract_graph_csr_min(CsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce)); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_csr_sum.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | CsrGraph* contract_graph_csr_sum(CsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce)); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_csr_mul.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | CsrGraph* contract_graph_csr_mul(CsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce)); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/include/high_res_clock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | // A wrapper of clock_gettime. 17 | // Michael A. Frumkin (mfrumkin@nvidia.com) 18 | #pragma once 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | class HighResClock { 25 | public: 26 | HighResClock() { 27 | clock_gettime(CLOCK_REALTIME, &_start_time); 28 | clock_gettime(CLOCK_REALTIME, &_stop_time); 29 | } 30 | ~HighResClock() { } 31 | 32 | void start() { clock_gettime(CLOCK_REALTIME, &_start_time); } 33 | 34 | std::string stop() { 35 | clock_gettime(CLOCK_REALTIME, &_stop_time); 36 | char buffer[64]; 37 | long long int start_time = 38 | _start_time.tv_sec * 1e9 + _start_time.tv_nsec; 39 | long long int stop_time = 40 | _stop_time.tv_sec * 1e9 + _stop_time.tv_nsec; 41 | 42 | sprintf(buffer, "%lld us", 43 | (stop_time - start_time) / 1000); 44 | std::string str(buffer); 45 | return str; 46 | } 47 | 48 | void stop(double* elapsed_time) { // returns time in us 49 | clock_gettime(CLOCK_REALTIME, &_stop_time); 50 | long long int start_time = 51 | _start_time.tv_sec * 1e9 + _start_time.tv_nsec; 52 | long long int stop_time = 53 | _stop_time.tv_sec * 1e9 + _stop_time.tv_nsec; 54 | *elapsed_time = (stop_time - start_time) / 1000; 55 | } 56 | 57 | private: 58 | timespec _start_time; 59 | timespec _stop_time; 60 | }; 61 | -------------------------------------------------------------------------------- /cpp/include/triangles_counting.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | 26 | #include 27 | 28 | namespace nvgraph 29 | { 30 | 31 | namespace triangles_counting 32 | { 33 | 34 | 35 | typedef enum { TCOUNT_DEFAULT, TCOUNT_BSH, TCOUNT_B2B, TCOUNT_WRP, TCOUNT_THR } TrianglesCountAlgo; 36 | 37 | 38 | template 39 | class TrianglesCount 40 | { 41 | private: 42 | //CsrGraph & m_last_graph ; 43 | AsyncEvent m_event; 44 | uint64_t m_triangles_number; 45 | spmat_t m_mat; 46 | int m_dev_id; 47 | cudaDeviceProp m_dev_props; 48 | 49 | Vector m_seq; 50 | 51 | cudaStream_t m_stream; 52 | 53 | bool m_done; 54 | 55 | void tcount_bsh(); 56 | void tcount_b2b(); 57 | void tcount_wrp(); 58 | void tcount_thr(); 59 | 60 | public: 61 | // Simple constructor 62 | TrianglesCount(const CsrGraph & graph, cudaStream_t stream = NULL, int device_id = -1); 63 | // Simple destructor 64 | ~TrianglesCount(); 65 | 66 | NVGRAPH_ERROR count(TrianglesCountAlgo algo = TCOUNT_DEFAULT ); 67 | inline uint64_t get_triangles_count() const {return m_triangles_number;} 68 | }; 69 | 70 | } // end namespace triangles_counting 71 | 72 | } // end namespace nvgraph 73 | 74 | -------------------------------------------------------------------------------- /cpp/src/pagerank_kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | 19 | #include "nvgraph_error.hxx" 20 | #include "nvgraph_vector_kernels.hxx" 21 | #include "pagerank_kernels.hxx" 22 | 23 | namespace nvgraph 24 | { 25 | 26 | template 27 | __global__ void update_dn_kernel(int num_vertices, ValueType_* aa, ValueType_ beta) 28 | { 29 | int tidx = blockDim.x * blockIdx.x + threadIdx.x; 30 | for (int r = tidx; r < num_vertices; r += blockDim.x * gridDim.x) 31 | { 32 | // NOTE 1 : a = alpha*a + (1-alpha)e 33 | if (aa[r] == 0.0) 34 | aa[r] = beta; // NOTE 2 : alpha*0 + (1-alpha)*1 = (1-alpha) 35 | } 36 | } 37 | 38 | template 39 | void update_dangling_nodes(int num_vertices, ValueType_* dangling_nodes, ValueType_ damping_factor, cudaStream_t stream) 40 | { 41 | 42 | int num_threads = 256; 43 | int max_grid_size = 4096; 44 | int num_blocks = std::min(max_grid_size, (num_vertices/num_threads)+1); 45 | ValueType_ beta = 1.0-damping_factor; 46 | update_dn_kernel<<>>(num_vertices, dangling_nodes,beta); 47 | cudaCheckError(); 48 | } 49 | 50 | //Explicit 51 | 52 | template void update_dangling_nodes (int num_vertices, double* dangling_nodes, double damping_factor, cudaStream_t stream); 53 | template void update_dangling_nodes (int num_vertices, float* dangling_nodes, float damping_factor, cudaStream_t stream); 54 | } // end namespace nvgraph 55 | 56 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_float_max.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_float_max(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_float_min.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_float_min(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_float_sum.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_float_sum(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_double_max.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_double_max(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_double_min.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_double_min(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_double_sum.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_double_sum(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_float_mul.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_float_mul(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /cpp/src/graph_contraction/contraction_mv_double_mul.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | namespace nvgraph 20 | { 21 | //------------------------- Graph Contraction: ---------------------- 22 | // 23 | MultiValuedCsrGraph* contract_graph_mv_double_mul(MultiValuedCsrGraph& graph, 24 | int* pV, size_t n, 25 | cudaStream_t stream, 26 | const int& VCombine, 27 | const int& VReduce, 28 | const int& ECombine, 29 | const int& EReduce) 30 | { 31 | return static_cast*>(contract_from_aggregates_t::FctrType >(graph, pV, n, stream, 32 | static_cast(VCombine), 33 | static_cast(VReduce), 34 | static_cast(ECombine), 35 | static_cast(EReduce))); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /external/cub_semiring/util_namespace.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2011, Duane Merrill. All rights reserved. 3 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * * Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * * Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * * Neither the name of the NVIDIA CORPORATION nor the 13 | * names of its contributors may be used to endorse or promote products 14 | * derived from this software without specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | ******************************************************************************/ 28 | 29 | /** 30 | * \file 31 | * Place-holder for prefixing the cub namespace 32 | */ 33 | 34 | #pragma once 35 | 36 | // For example: 37 | #define CUB_NS_PREFIX namespace cub_semiring { 38 | #define CUB_NS_POSTFIX } 39 | 40 | #ifndef CUB_NS_PREFIX 41 | #define CUB_NS_PREFIX 42 | #endif 43 | 44 | #ifndef CUB_NS_POSTFIX 45 | #define CUB_NS_POSTFIX 46 | #endif 47 | -------------------------------------------------------------------------------- /cpp/include/debug_macros.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include "nvgraph_error.hxx" 19 | 20 | #define CHECK_STATUS(...) \ 21 | do { \ 22 | if (__VA_ARGS__) { \ 23 | FatalError(#__VA_ARGS__, NVGRAPH_ERR_UNKNOWN); \ 24 | } \ 25 | } while (0) 26 | 27 | #define CHECK_NVGRAPH(...) \ 28 | do { \ 29 | NVGRAPH_ERROR e = __VA_ARGS__; \ 30 | if (e != NVGRAPH_OK) { \ 31 | FatalError(#__VA_ARGS__, e) \ 32 | } \ 33 | } while (0) 34 | 35 | #ifdef DEBUG 36 | #define COUT() (std::cout) 37 | #define CERR() (std::cerr) 38 | #define WARNING(message) \ 39 | do { \ 40 | std::stringstream ss; \ 41 | ss << "Warning (" << __FILE__ << ":" << __LINE__ << "): " << message; \ 42 | CERR() << ss.str() << std::endl; \ 43 | } while (0) 44 | #else // DEBUG 45 | #define WARNING(message) 46 | #endif 47 | -------------------------------------------------------------------------------- /cpp/include/csrmv_cub.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include "nvgraph.h" 19 | #include "nvgraph_error.hxx" 20 | #include "multi_valued_csr_graph.hxx" 21 | 22 | namespace nvgraph 23 | { 24 | 25 | template 26 | class SemiringDispatch 27 | { 28 | public: 29 | template 30 | static NVGRAPH_ERROR Dispatch( 31 | const V* d_values, 32 | const I* d_row_offsets, 33 | const I* d_column_indices, 34 | const V* d_vector_x, 35 | V* d_vector_y, 36 | V alpha, 37 | V beta, 38 | I num_rows, 39 | I num_cols, 40 | I num_nonzeros, 41 | cudaStream_t stream); 42 | 43 | static NVGRAPH_ERROR InitAndLaunch( 44 | const nvgraph::MultiValuedCsrGraph &graph, 45 | const size_t weight_index, 46 | const void *p_alpha, 47 | const size_t x_index, 48 | const void *p_beta, 49 | const size_t y_index, 50 | const nvgraphSemiring_t SR, 51 | cudaStream_t stream 52 | ); 53 | }; 54 | 55 | 56 | // API wrapper to avoid bloating main API object nvgraph.cpp 57 | NVGRAPH_ERROR SemiringAPILauncher(nvgraphHandle_t handle, 58 | const nvgraphGraphDescr_t descrG, 59 | const size_t weight_index, 60 | const void *alpha, 61 | const size_t x, 62 | const void *beta, 63 | const size_t y, 64 | const nvgraphSemiring_t sr); 65 | } //namespace nvgraph 66 | -------------------------------------------------------------------------------- /test/generators/convertors/sort_eges.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // std::sort 4 | #include // std::vector 5 | 6 | struct edge { 7 | unsigned long int r; 8 | unsigned long int c; 9 | }; 10 | 11 | void printUsageAndExit() 12 | { 13 | printf("%s", "Fatal Error\n"); 14 | printf("%s", "Usage: ./sort edges.dat\n"); 15 | printf("%s", "Input : Graph in matrix market parttern format"); 16 | printf("%s", "Output : Graph with sorted edges in matrix market parttern format\n"); 17 | exit(0); 18 | } 19 | 20 | inline bool operator< (const edge& a, const edge& b){ if(a.r edges; 35 | 36 | // Get I/O names 37 | // The output is filename.mtx 38 | while (argv[1][i] != '\0') 39 | {outp[i] = argv[1][i];i++;} 40 | outp[i] = '_'; i++; 41 | outp[i] = 's';i++; 42 | outp[i]='\0'; 43 | 44 | // Open files 45 | fpin = fopen(argv[1],"r"); 46 | fpout = fopen(outp,"w"); 47 | if (!fpin || !fpout) 48 | { 49 | printf("%s", "Fatal Error : I/O fail\n"); 50 | exit(0); 51 | } 52 | 53 | // Skip lines starting with "%"" 54 | do 55 | { 56 | cc = fgetc(fpin); 57 | if (cc == '%') fgets(outp,128,fpin); 58 | } 59 | while (cc == '%'); 60 | fseek( fpin, -1, SEEK_CUR ); 61 | 62 | // Get n and nz 63 | fscanf(fpin,"%lu",&n); 64 | //fscanf(fpin,"%lu",&n); 65 | fscanf(fpin,"%lu",&nz); 66 | fprintf(fpout,"%lu %lu %lu\n",n, n, nz); 67 | // Read the first edge 68 | ok = fscanf(fpin,"%lu",&e.r); 69 | if (ok) 70 | { 71 | fscanf(fpin,"%lu",&e.c); 72 | edges.push_back(e); 73 | } 74 | else 75 | { 76 | printf("%s", "Fatal Error : Wrong data format\n"); 77 | exit(0); 78 | } 79 | 80 | //Loop 81 | for (i=0; i::iterator it = edges.begin() ; it != edges.end(); ++it) 89 | fprintf(fpout,"%lu %lu\n",it->r, it->c); 90 | return 0; 91 | } 92 | 93 | -------------------------------------------------------------------------------- /test/ref/nerstrand/nerstrand_driver.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "mmio.h" 11 | 12 | #include "mm_host.hxx" 13 | #include "nerstrand.h" 14 | 15 | 16 | static double second (void) 17 | { 18 | struct timeval tv; 19 | gettimeofday(&tv, NULL); 20 | return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; 21 | } 22 | 23 | 24 | int main(int argc, const char **argv) 25 | { 26 | 27 | int m, n, nnz; 28 | double start, stop,r_mod; 29 | cid_t n_clusters; 30 | MM_typecode mc; 31 | if (argc != 3) 32 | { 33 | std::cout<<"Usage : ./nerstrand_bench "<(fpin, 1, &mc, &m, &n, &nnz) ; 40 | 41 | // Allocate memory on host 42 | std::vector cooRowIndA(nnz); 43 | std::vector cooColIndA(nnz); 44 | std::vector cooValA(nnz); 45 | std::vector csrRowPtrA(n+1); 46 | std::vector csrColIndA(nnz); 47 | std::vector csrValA(nnz); 48 | 49 | 50 | mm_to_coo(fpin, 1, nnz, &cooRowIndA[0], &cooColIndA[0], &cooValA[0],NULL) ; 51 | coo2csr (n, nnz, &cooValA[0], &cooRowIndA[0], &cooColIndA[0], &csrValA[0], &csrColIndA[0],&csrRowPtrA[0]); 52 | fclose(fpin); 53 | 54 | vtx_t nerstrand_n = static_cast(n); 55 | std::vector nerstrand_csrRowPtrA(csrRowPtrA.begin(), csrRowPtrA.end()); 56 | std::vector nerstrand_csrColIndA(csrColIndA.begin(), csrColIndA.end()); 57 | std::vector nerstrand_csrValA(csrValA.begin(), csrValA.end()); 58 | std::vector clustering(n); 59 | 60 | start = second(); 61 | start = second(); 62 | #pragma omp_parallel 63 | { 64 | int nerstrand_status = nerstrand_cluster_kway(&nerstrand_n, &nerstrand_csrRowPtrA[0],&nerstrand_csrColIndA[0], &nerstrand_csrValA[0], &n_clusters, &clustering[0], &r_mod); 65 | if (nerstrand_status != NERSTRAND_SUCCESS) 66 | std::cout<<"nerstrand execution failed"< 19 | namespace nvgraph 20 | { 21 | template class Lapack; 22 | 23 | template 24 | class Lapack 25 | { 26 | private: 27 | Lapack(); 28 | ~Lapack(); 29 | public: 30 | static void check_lapack_enabled(); 31 | 32 | static void gemm(bool transa, bool transb, int m, int n, int k, T alpha, const T * A, int lda, const T * B, int ldb, T beta, T * C, int ldc); 33 | 34 | // special QR for lanczos 35 | static void sterf(int n, T * d, T * e); 36 | static void steqr(char compz, int n, T * d, T * e, T * z, int ldz, T * work); 37 | 38 | // QR 39 | // computes the QR factorization of a general matrix 40 | static void geqrf (int m, int n, T *a, int lda, T *tau, T *work, int *lwork); 41 | // Generates the real orthogonal matrix Q of the QR factorization formed by geqrf. 42 | //static void orgqr( int m, int n, int k, T* a, int lda, const T* tau, T* work, int* lwork ); 43 | // multiply C by implicit Q 44 | static void ormqr (bool right_side, bool transq, int m, int n, int k, T *a, int lda, T *tau, T *c, int ldc, T *work, int *lwork); 45 | //static void unmqr (bool right_side, bool transq, int m, int n, int k, T *a, int lda, T *tau, T *c, int ldc, T *work, int *lwork); 46 | //static void qrf (int n, T *H, T *Q, T *R); 47 | 48 | //static void hseqr (T* Q, T* R, T* eigenvalues,T* eigenvectors, int dim, int ldh, int ldq); 49 | static void geev(T* A, T* eigenvalues, int dim, int lda); 50 | static void geev(T* A, T* eigenvalues, T* eigenvectors, int dim, int lda, int ldvr); 51 | static void geev(T* A, T* eigenvalues_r, T* eigenvalues_i, T* eigenvectors_r, T* eigenvectors_i, int dim, int lda, int ldvr); 52 | 53 | }; 54 | } // end namespace nvgraph 55 | 56 | -------------------------------------------------------------------------------- /cpp/cmake/Modules/ConfigureGoogleTest.cmake: -------------------------------------------------------------------------------- 1 | set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest") 2 | 3 | set(GTEST_CMAKE_ARGS "") 4 | #" -Dgtest_build_samples=ON" 5 | #" -DCMAKE_VERBOSE_MAKEFILE=ON") 6 | 7 | if(NOT CMAKE_CXX11_ABI) 8 | message(STATUS "GTEST: Disabling the GLIBCXX11 ABI") 9 | list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_C_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0") 10 | list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0") 11 | elseif(CMAKE_CXX11_ABI) 12 | message(STATUS "GTEST: Enabling the GLIBCXX11 ABI") 13 | list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_C_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=1") 14 | list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=1") 15 | endif(NOT CMAKE_CXX11_ABI) 16 | 17 | configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake" 18 | "${GTEST_ROOT}/CMakeLists.txt") 19 | 20 | file(MAKE_DIRECTORY "${GTEST_ROOT}/build") 21 | file(MAKE_DIRECTORY "${GTEST_ROOT}/install") 22 | 23 | execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} . 24 | RESULT_VARIABLE GTEST_CONFIG 25 | WORKING_DIRECTORY ${GTEST_ROOT}) 26 | 27 | if(GTEST_CONFIG) 28 | message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG}) 29 | endif(GTEST_CONFIG) 30 | 31 | set(PARALLEL_BUILD -j) 32 | if($ENV{PARALLEL_LEVEL}) 33 | set(NUM_JOBS $ENV{PARALLEL_LEVEL}) 34 | set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") 35 | endif($ENV{PARALLEL_LEVEL}) 36 | 37 | if(${NUM_JOBS}) 38 | if(${NUM_JOBS} EQUAL 1) 39 | message(STATUS "GTEST BUILD: Enabling Sequential CMake build") 40 | elseif(${NUM_JOBS} GREATER 1) 41 | message(STATUS "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") 42 | endif(${NUM_JOBS} EQUAL 1) 43 | else() 44 | message(STATUS "GTEST BUILD: Enabling Parallel CMake build with all threads") 45 | endif(${NUM_JOBS}) 46 | 47 | execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} 48 | RESULT_VARIABLE GTEST_BUILD 49 | WORKING_DIRECTORY ${GTEST_ROOT}/build) 50 | 51 | if(GTEST_BUILD) 52 | message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD}) 53 | endif(GTEST_BUILD) 54 | 55 | message(STATUS "GoogleTest installed here: " ${GTEST_ROOT}/install) 56 | set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include") 57 | set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib") 58 | set(GTEST_FOUND TRUE) 59 | -------------------------------------------------------------------------------- /cpp/include/size2_selector.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | //#include 20 | #include 21 | #include 22 | 23 | namespace nvgraph { 24 | 25 | typedef enum 26 | { 27 | USER_PROVIDED = 0, // using edge values as is 28 | SCALED_BY_ROW_SUM = 1, // 0.5*(A_ij+A_ji)/max(d(i),d (j)), where d(i) is the sum of the row i 29 | SCALED_BY_DIAGONAL = 2, // 0.5*(A_ij+A_ji)/max(diag(i),diag(j)) 30 | }Matching_t; 31 | 32 | template 33 | class Size2Selector 34 | { 35 | 36 | public: 37 | typedef IndexType_ IndexType; 38 | typedef ValueType_ ValueType; 39 | 40 | Size2Selector(); 41 | 42 | Size2Selector(Matching_t similarity_metric, int deterministic = 1, int max_iterations = 15 , ValueType numUnassigned_tol = 0.05 ,bool two_phase = false, bool merge_singletons = true, cudaStream_t stream = 0) 43 | :m_similarity_metric(similarity_metric), m_deterministic(deterministic), m_max_iterations(max_iterations), m_numUnassigned_tol(numUnassigned_tol), m_two_phase(two_phase), m_merge_singletons(merge_singletons), m_stream(stream) 44 | { 45 | m_aggregation_edge_weight_component = 0; 46 | m_weight_formula = 0; 47 | } 48 | 49 | NVGRAPH_ERROR setAggregates(const ValuedCsrGraph &A, Vector &aggregates, int &num_aggregates); 50 | 51 | protected: 52 | NVGRAPH_ERROR setAggregates_common_sqblocks(const ValuedCsrGraph &A, Vector &aggregates, int &num_aggregates); 53 | Matching_t m_similarity_metric; 54 | int m_deterministic; 55 | int m_max_iterations; 56 | ValueType m_numUnassigned_tol; 57 | bool m_two_phase; 58 | bool m_merge_singletons; 59 | cudaStream_t m_stream; 60 | int m_aggregation_edge_weight_component; 61 | int m_weight_formula; 62 | }; 63 | 64 | }//nvgraph 65 | -------------------------------------------------------------------------------- /cpp/tests/benchmarkScripts/run_nvgraph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # ****************** Edit this ************************* 4 | #Path to nvgraph bin graphs 5 | # From p4matrices:2024 sync //matrices/p4matrices/graphs/... 6 | nvg_data_prefix="/home/afender/src/matrices/p4matrices/graphs" 7 | 8 | #Path to nvgraph 9 | # nvg_bin_prefix should contain a release build of nvgraph's ToT (from p4sw //sw/gpgpu/nvgraph/...) 10 | # and nvgraph_benchmark executable which is build along with nvgraph's tests 11 | nvg_bin_prefix="/home/afender/src/sw/sw/gpgpu/bin/x86_64_Linux_release" 12 | # ***************************************************** 13 | 14 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$nvg_bin_prefix 15 | export PATH=$PATH:$nvg_bin_prefix 16 | 17 | declare -a arr=( 18 | "$nvg_data_prefix/webbase1M/webbase-1M_T.mtx.bin" 19 | "$nvg_data_prefix/liveJournal/ljournal-2008_T.mtx.bin" 20 | "$nvg_data_prefix/webGoogle/web-Google_T.mtx.bin" 21 | "$nvg_data_prefix/citPatents/cit-Patents_T.mtx.bin" 22 | "$nvg_data_prefix/webBerkStan/web-BerkStan_T.mtx.bin" 23 | "$nvg_data_prefix/WikiTalk/wiki-Talk_T.mtx.bin" 24 | "$nvg_data_prefix/soc-liveJournal/soc-LiveJournal1_T.mtx.bin" 25 | # Warning : Twitter case works only on GPU with more than 12 GB of memory 26 | "$nvg_data_prefix/Twitter/twitter.bin" 27 | #Just for debug 28 | #"$nvg_data_prefix/small/small.bin" 29 | ) 30 | 31 | 32 | ## now loop through the above array 33 | for i in "${arr[@]}" 34 | do 35 | echo "Pagerank" 36 | echo "$i" 37 | echo "single precision" 38 | $nvg_bin_prefix/nvgraph_benchmark --pagerank "$i" 0.85 500 1E-6 --float --repeats 10 39 | echo 40 | #echo "Pagerank" 41 | #echo "$i" 42 | #echo "double precision" 43 | #$nvg_bin_prefix/nvgraph_benchmark --pagerank "$i" 0.85 500 1E-6 --double --repeats 10 44 | #echo 45 | done 46 | echo 47 | for i in "${arr[@]}" 48 | do 49 | echo "SSSP" 50 | echo "$i" 51 | echo "single precision" 52 | $nvg_bin_prefix/nvgraph_benchmark --sssp "$i" 0 --float --repeats 10 53 | echo 54 | #echo "SSSP" 55 | #echo "$i" 56 | #echo "double precision" 57 | #$nvg_bin_prefix/nvgraph_benchmark --sssp "$i" 0 --double --repeats 10 58 | #echo 59 | done 60 | echo 61 | for i in "${arr[@]}" 62 | do 63 | echo "Widest Path" 64 | echo "$i" 65 | echo "single precision" 66 | $nvg_bin_prefix/nvgraph_benchmark --widest "$i" 0 --float --repeats 10 67 | echo 68 | #echo "Widest Path" 69 | #echo "$i" 70 | #echo "double precision" 71 | #$nvg_bin_prefix/nvgraph_benchmark --widest "$i" 0 --double --repeats 10 72 | #echo 73 | done 74 | echo 75 | -------------------------------------------------------------------------------- /cpp/src/graph_extractor.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | 20 | 21 | namespace nvgraph 22 | { 23 | //------------------------- SubGraph Extraction: ---------------------- 24 | // 25 | CsrGraph* extract_subgraph_by_vertices(CsrGraph& graph, 26 | int* pV, size_t n, cudaStream_t stream) 27 | { 28 | return extract_from_vertex_subset(graph, pV, n, stream); 29 | } 30 | 31 | MultiValuedCsrGraph* extract_subgraph_by_vertices(MultiValuedCsrGraph& graph, 32 | int* pV, size_t n, cudaStream_t stream) 33 | { 34 | return static_cast*>(extract_from_vertex_subset(graph, pV, n, stream)); 35 | } 36 | 37 | MultiValuedCsrGraph* extract_subgraph_by_vertices(MultiValuedCsrGraph& graph, 38 | int* pV, size_t n, cudaStream_t stream) 39 | { 40 | return static_cast*>(extract_from_vertex_subset(graph, pV, n, stream)); 41 | } 42 | 43 | CsrGraph* extract_subgraph_by_edges(CsrGraph& graph, 44 | int* pV, size_t n, cudaStream_t stream) 45 | { 46 | return extract_from_edge_subset(graph, pV, n, stream); 47 | } 48 | 49 | MultiValuedCsrGraph* extract_subgraph_by_edges(MultiValuedCsrGraph& graph, 50 | int* pV, size_t n, cudaStream_t stream) 51 | { 52 | return static_cast*>(extract_from_edge_subset(graph, pV, n, stream)); 53 | } 54 | 55 | MultiValuedCsrGraph* extract_subgraph_by_edges(MultiValuedCsrGraph& graph, 56 | int* pV, size_t n, cudaStream_t stream) 57 | { 58 | return static_cast*>(extract_from_edge_subset(graph, pV, n, stream)); 59 | } 60 | 61 | 62 | 63 | 64 | 65 | 66 | }// end namespace nvgraph 67 | 68 | -------------------------------------------------------------------------------- /test/generators/convertors/edges_to_H.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | struct edge { 7 | unsigned long int r; 8 | unsigned long int c; 9 | }; 10 | 11 | void printUsageAndExit() 12 | { 13 | printf("%s", "Fatal Error\n"); 14 | printf("%s", "Usage: ./H edges.dat\n"); 15 | printf("%s", "Input : Graph given as a sorted set of edges\n"); 16 | printf("%s", "Output : Row sub-stochastic matrix in MatrixMarket format\n"); 17 | exit(0); 18 | } 19 | 20 | int main (int argc, char *argv[]) 21 | { 22 | // Check args 23 | if (argc != 2) printUsageAndExit(); 24 | 25 | // Vars 26 | unsigned long int n, nz, i = 0, current_r, nbr = 1; 27 | int ok; 28 | double scal; 29 | char outp[128], cc; 30 | FILE *fpin = NULL, *fpout = NULL; 31 | edge e; 32 | std::vector row; 33 | // Get I/O names 34 | // The output is filename.mtx 35 | while (argv[1][i] != '\0') 36 | {outp[i] = argv[1][i];i++;} 37 | outp[i] = '_'; i++; 38 | outp[i] = 'm';i++;outp[i] = 't';i++;outp[i] = 'x';i++; 39 | outp[i]='\0'; 40 | 41 | // Open files 42 | fpin = fopen(argv[1],"r"); 43 | fpout = fopen(outp,"w"); 44 | if (!fpin || !fpout) 45 | { 46 | printf("%s", "Fatal Error : I/O fail\n"); 47 | exit(0); 48 | } 49 | 50 | // Get n and nz 51 | fscanf(fpin,"%lu",&n); 52 | fscanf(fpin,"%lu",&n); 53 | fscanf(fpin,"%lu",&nz); 54 | 55 | fprintf(fpout, "%s", "%%" ); 56 | fprintf(fpout,"MatrixMarket matrix coordinate real general\n"); 57 | fprintf(fpout,"%lu %lu %lu\n",n, n, nz); 58 | 59 | // Read the first edge 60 | ok = fscanf(fpin,"%lu",&e.r); 61 | if (ok) 62 | { 63 | fscanf(fpin,"%lu",&e.c); 64 | current_r = e.r; 65 | row.push_back(e); 66 | } 67 | else 68 | { 69 | printf("%s", "Fatal Error : Wrong data format\n"); 70 | exit(0); 71 | } 72 | 73 | //Loop 74 | for (i=0; i::iterator it = row.begin() ; it != row.end(); ++it) 87 | fprintf(fpout,"%lu %lu %.9lf\n",it->r, it->c, scal); 88 | row.clear(); 89 | nbr = 1; 90 | } 91 | row.push_back(e); 92 | } 93 | // Last print 94 | scal = 1.0/nbr; 95 | for (std::vector::iterator it = row.begin() ; it != row.end(); ++it) 96 | fprintf(fpout,"%lu %lu %.9f\n",it->r, it->c, scal); 97 | 98 | return 0; 99 | } 100 | 101 | -------------------------------------------------------------------------------- /cpp/include/cnmem_shared_ptr.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | 23 | // 24 | 25 | #if __cplusplus > 199711L 26 | #include 27 | #define SHARED_PREFIX std 28 | 29 | #else 30 | #include 31 | #define SHARED_PREFIX boost 32 | 33 | #endif 34 | 35 | #include 36 | #include "nvgraph_error.hxx" 37 | 38 | namespace nvgraph 39 | { 40 | 41 | template< typename T > 42 | class DeviceDeleter 43 | { 44 | cudaStream_t mStream; 45 | public: 46 | DeviceDeleter(cudaStream_t stream) : mStream(stream) {} 47 | void operator()(T *ptr) 48 | { 49 | cnmemStatus_t status = cnmemFree(ptr, mStream); 50 | if( status != CNMEM_STATUS_SUCCESS ) 51 | { 52 | FatalError("Memory manager internal error (free)", NVGRAPH_ERR_UNKNOWN); 53 | } 54 | } 55 | }; 56 | 57 | 58 | template< typename T > 59 | inline SHARED_PREFIX::shared_ptr allocateDevice(size_t n, cudaStream_t stream) 60 | { 61 | T *ptr = NULL; 62 | cnmemStatus_t status = cnmemMalloc((void**) &ptr, n*sizeof(T), stream); 63 | if( status == CNMEM_STATUS_OUT_OF_MEMORY) 64 | { 65 | FatalError("Not enough memory", NVGRAPH_ERR_NO_MEMORY); 66 | } 67 | else if (status != CNMEM_STATUS_SUCCESS) 68 | { 69 | FatalError("Memory manager internal error (alloc)", NVGRAPH_ERR_UNKNOWN); 70 | } 71 | return SHARED_PREFIX::shared_ptr(ptr, DeviceDeleter(stream)); 72 | } 73 | 74 | template< typename T > 75 | class DeviceReleaser 76 | { 77 | cudaStream_t mStream; 78 | public: 79 | DeviceReleaser(cudaStream_t stream) : mStream(stream) {} 80 | void operator()(T *ptr) 81 | { 82 | 83 | } 84 | }; 85 | 86 | template< typename T > 87 | inline SHARED_PREFIX::shared_ptr attachDevicePtr(T * ptr_in, cudaStream_t stream) 88 | { 89 | T *ptr = ptr_in; 90 | return SHARED_PREFIX::shared_ptr(ptr, DeviceReleaser(stream)); 91 | } 92 | 93 | 94 | } // end namespace nvgraph 95 | 96 | -------------------------------------------------------------------------------- /cpp/include/triangles_counting_defines.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | 22 | #ifdef _MSC_VER 23 | #include 24 | #else 25 | #include 26 | #endif 27 | 28 | 29 | /* 30 | #ifdef MSVC_VER 31 | #include 32 | #pragma intrinsic(_BitScanForward) 33 | #pragma intrinsic(_BitScanForward64) 34 | #pragma intrinsic(_BitScanReverse) 35 | #pragma intrinsic(_BitScanReverse64) 36 | #endif 37 | */ 38 | 39 | #define MIN(x,y) (((x)<(y))?(x):(y)) 40 | #define MAX(x,y) (((x)>(y))?(x):(y)) 41 | 42 | #define THREADS (128) 43 | #define DIV_UP(a,b) (((a)+((b)-1))/(b)) 44 | #define BITSOF(x) (sizeof(*x)*8) 45 | 46 | #define BLK_BWL0 (128) 47 | #define WRP_BWL0 (128) 48 | 49 | #define HUGE_GRAPH 50 | 51 | #define DEG_THR1 (3.5) 52 | #define DEG_THR2 (38.0) 53 | 54 | namespace nvgraph 55 | { 56 | 57 | namespace triangles_counting 58 | { 59 | 60 | template struct type_utils; 61 | 62 | template <> 63 | struct type_utils 64 | { 65 | typedef int LOCINT; 66 | static const LOCINT LOCINT_MAX = INT_MAX; 67 | #ifdef MPI_VERSION 68 | static const MPI_Datatype LOCINT_MPI = MPI_INT; 69 | #endif 70 | static __inline__ LOCINT abs(const LOCINT& x) 71 | { 72 | return abs(x); 73 | } 74 | }; 75 | 76 | template <> 77 | struct type_utils 78 | { 79 | typedef uint64_t LOCINT; 80 | static const LOCINT LOCINT_MAX = LLONG_MAX; 81 | #ifdef MPI_VERSION 82 | static const MPI_Datatype LOCINT_MPI = MPI_LONG_LONG; 83 | #endif 84 | 85 | static __inline__ LOCINT abs(const LOCINT& x) 86 | { 87 | return llabs(x); 88 | } 89 | }; 90 | 91 | 92 | template 93 | struct spmat_t { 94 | T N; 95 | T nnz; 96 | T nrows; 97 | const T *roff_d; 98 | const T *rows_d; 99 | const T *cols_d; 100 | bool is_lower_triangular; 101 | }; 102 | 103 | } // namespace triangles_counting 104 | 105 | } // namespace nvgraph 106 | -------------------------------------------------------------------------------- /cpp/include/sssp.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | #include 19 | namespace nvgraph 20 | { 21 | template 22 | class Sssp 23 | { 24 | public: 25 | typedef IndexType_ IndexType; 26 | typedef ValueType_ ValueType; 27 | 28 | private: 29 | ValuedCsrGraph m_network ; 30 | Vector m_sssp; 31 | Vector m_tmp; 32 | Vector m_mask; // mask[i] = 0 if we can ignore the i th column in the csrmv 33 | 34 | IndexType m_source; 35 | ValueType m_residual; 36 | int m_iterations; 37 | bool m_is_setup; 38 | 39 | cudaStream_t m_stream; 40 | 41 | bool solve_it(); 42 | void setup(IndexType source_index, Vector& source_connection, Vector& sssp_result); 43 | 44 | public: 45 | // Simple constructor 46 | Sssp(void) {}; 47 | // Simple destructor 48 | ~Sssp(void) {}; 49 | 50 | // Create a Sssp solver attached to a the transposed of a weighted network 51 | // *** network is the transposed/CSC*** 52 | Sssp(const ValuedCsrGraph & network, cudaStream_t stream = 0):m_network(network),m_is_setup(false), m_stream(stream) {}; 53 | 54 | /*! Find the sortest path from the vertex source_index to every other vertices. 55 | * 56 | * \param source_index The source. 57 | * \param source_connection The connectivity of the source 58 | * if there is a link from source_index to i, source_connection[i] = E(source_index, i) 59 | * otherwise source_connection[i] = inifinity 60 | * source_connection[source_index] = 0 61 | The source_connection is computed somewhere else. 62 | * \param (output) m_sssp m_sssp[i] contains the sortest path from the source to the vertex i. 63 | */ 64 | 65 | NVGRAPH_ERROR solve(IndexType source_index, Vector& source_connection, Vector& sssp_result); 66 | inline int get_iterations() const {return m_iterations;} 67 | }; 68 | 69 | } // end namespace nvgraph 70 | 71 | -------------------------------------------------------------------------------- /cpp/include/widest_path.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | namespace nvgraph 19 | { 20 | template 21 | class WidestPath 22 | { 23 | public: 24 | typedef IndexType_ IndexType; 25 | typedef ValueType_ ValueType; 26 | private: 27 | ValuedCsrGraph m_network ; 28 | Vector m_widest_path; 29 | Vector m_tmp; 30 | Vector m_mask; // mask[i] = 0 if we can ignore the i th column in the csrmv 31 | IndexType m_source; 32 | ValueType m_residual; 33 | int m_iterations; 34 | bool m_is_setup; 35 | cudaStream_t m_stream; 36 | bool solve_it(); 37 | void setup(IndexType source_index, Vector& source_connection, Vector& WidestPath_result); 38 | public: 39 | // Simple constructor 40 | WidestPath(void) {}; 41 | // Simple destructor 42 | ~WidestPath(void) {}; 43 | // Create a WidestPath solver attached to a the transposed of a weighted network 44 | // *** network is the transposed/CSC*** 45 | WidestPath(const ValuedCsrGraph & network, cudaStream_t stream = 0):m_network(network),m_is_setup(false), m_stream(stream) {}; 46 | 47 | /*! Find the Widest Path from the vertex source_index to every other vertices. 48 | * 49 | * \param source_index The source. 50 | * \param source_connection The connectivity of the source 51 | * - if there is a link from source_index to i, source_connection[i] = E(source_index, i) ) 52 | * - otherwise source_connection[i] = op.plus->id 53 | * - source_connection[source_index] = op.time->id 54 | The source_connection is provided as input 55 | * \param (output) m_widest_path m_widest_path[i] contains the Widest Path from the source to the vertex i. 56 | */ 57 | 58 | NVGRAPH_ERROR solve(IndexType source_index, Vector& source_connection, Vector& WidestPath_result); 59 | inline int get_iterations() const {return m_iterations;} 60 | }; 61 | } // end namespace nvgraph 62 | 63 | -------------------------------------------------------------------------------- /cpp/include/test/delta_modularity_test.cuh: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2019, NVIDIA CORPORATION. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | #pragma once 18 | 19 | #include 20 | #include "test_opt_utils.h" 21 | #include "graph_utils.cuh" 22 | #include "louvain.cuh" 23 | #include "gtest/gtest.h" 24 | #include "high_res_clock.h" 25 | #include "util.cuh" 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | 34 | template 35 | __global__ void 36 | kernel_delta_modularity(const int n_vertex, IdxIter csr_ptr_iter, IdxIter csr_ind_iter, ValIter csr_val_iter, IdxIter cluster, ValType* score){ 37 | 38 | int c = blockIdx.x * blockDim.x + threadIdx.x; 39 | int i = blockIdx.y * blockDim.y + threadIdx.y; 40 | if( i &csr_ptr_d, 49 | thrust::device_vector &csr_ind_d, 50 | thrust::device_vector &csr_val_d, 51 | const int size){ 52 | 53 | HighResClock hr_clock; 54 | double timed; 55 | 56 | dim3 block_size((size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, (size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, 1); 57 | dim3 grid_size(BLOCK_SIZE_2D, BLOCK_SIZE_2D, 1); 58 | 59 | 60 | thrust::device_vector cluster_d(size); 61 | thrust::sequence(cluster_d.begin(), cluster_d.end()); 62 | std::cout<<"cluster: "; 63 | nvlouvain::display_vec(cluster_d); 64 | 65 | thrust::device_vector score_d(size*size); 66 | T* score_d_raw_ptr = thrust::raw_pointer_cast(score_d.data()); 67 | 68 | 69 | hr_clock.start(); 70 | 71 | kernel_delta_modularity<<>>(size, csr_ptr_d.begin(), csr_ind_d.begin(), csr_val_d.begin(), cluster_d.begin(), score_d_raw_ptr); 72 | 73 | 74 | CUDA_CALL(cudaDeviceSynchronize()); 75 | 76 | hr_clock.stop(&timed); 77 | double mod_time(timed); 78 | std::cout<<"delta modularity: "< 20 | #include "test_opt_utils.h" 21 | #include "graph_utils.cuh" 22 | #include "louvain.cuh" 23 | #include "gtest/gtest.h" 24 | #include "high_res_clock.h" 25 | #include "util.cuh" 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | 35 | 36 | template 37 | __global__ void 38 | kernel_local_mem(const int n_vertex ){ 39 | 40 | thrust::device_system_tag device_sys; 41 | thrust::pointer temp_i = thrust::malloc(device_sys, n_vertex); // for weight on i and for sum_k 42 | thrust::pointer temp_idx = thrust::malloc(device_sys, n_vertex); // for weight on i and for sum_k 43 | 44 | 45 | 46 | *temp_i = 10.0; 47 | *(temp_i + n_vertex-1) = 100.5; 48 | 49 | thrust::return_temporary_buffer(device_sys, temp_idx); 50 | thrust::return_temporary_buffer(device_sys, temp_i); 51 | } 52 | 53 | template 54 | __global__ void 55 | kernel_local_mem_new(const int n_vertex ){ 56 | 57 | ValType * temp_i = new ValType[n_vertex]; 58 | IdxType * temp_idx = new IdxType[n_vertex]; 59 | 60 | 61 | *temp_i = 10.0; 62 | *(temp_i + n_vertex-1) = 100.5; 63 | thrust::sequence(thrust::cuda::par, temp_idx, temp_idx + n_vertex); 64 | printf("%d %d %d ... %d\n",*temp_idx, *(temp_idx+1), *(temp_idx+2), *(temp_idx + n_vertex - 1) ); 65 | 66 | delete [] temp_i; 67 | delete [] temp_idx; 68 | } 69 | 70 | 71 | 72 | 73 | void mem_allocate_test(const int size){ 74 | 75 | 76 | HighResClock hr_clock; 77 | double timed; 78 | 79 | 80 | dim3 block_size((size + BLOCK_SIZE_1D -1)/ BLOCK_SIZE_1D, 1, 1); 81 | dim3 grid_size(BLOCK_SIZE_1D, 1, 1); 82 | hr_clock.start(); 83 | 84 | kernel_local_mem<<>>(30000); 85 | 86 | kernel_local_mem_new<<>>(30000); 87 | 88 | 89 | CUDA_CALL(cudaDeviceSynchronize()); 90 | hr_clock.stop(&timed); 91 | double raw_ptr_time(timed); 92 | 93 | std::cout<<"allocate_mem_runtime: "< 46 | NVGRAPH_ERROR modularity_maximization( ValuedCsrGraph& G, 47 | IndexType_ nClusters, 48 | IndexType_ nEigVecs, 49 | IndexType_ maxIter_lanczos, 50 | IndexType_ restartIter_lanczos, 51 | ValueType_ tol_lanczos, 52 | IndexType_ maxIter_kmeans, 53 | ValueType_ tol_kmeans, 54 | IndexType_ * __restrict__ clusters, 55 | Vector &eigVals, 56 | Vector &eigVecs, 57 | IndexType_ & iters_lanczos, 58 | IndexType_ & iters_kmeans) ; 59 | 60 | 61 | /// Compute modularity 62 | /** This function determines the modularity based on a graph and cluster assignments 63 | * @param G Weighted graph in CSR format 64 | * @param nClusters Number of clusters. 65 | * @param parts (Input, device memory, n entries) Cluster assignments. 66 | * @param modularity On exit, modularity 67 | */ 68 | template 69 | NVGRAPH_ERROR analyzeModularity(ValuedCsrGraph & G, 70 | IndexType_ nClusters, 71 | const IndexType_ * __restrict__ parts, 72 | ValueType_ & modularity) ; 73 | 74 | } 75 | 76 | -------------------------------------------------------------------------------- /cpp/include/bfs2d.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | 21 | //Used in nvgraph.h 22 | #define TRAVERSAL_DEFAULT_ALPHA 15 23 | #define TRAVERSAL_DEFAULT_BETA 18 24 | 25 | #include "nvgraph_error.hxx" 26 | #include "2d_partitioning.h" 27 | 28 | namespace nvgraph { 29 | template 30 | class Bfs2d { 31 | private: 32 | Matrix2d* M; 33 | 34 | bool directed; 35 | bool deterministic; 36 | GlobalType alpha; 37 | GlobalType beta; 38 | 39 | // edgemask, distances, predecessors are set/read by users - using Vectors 40 | bool useEdgeMask; 41 | bool computeDistances; 42 | bool computePredecessors; 43 | int32_t vertices_bmap_size; 44 | VertexData2D* distances; 45 | VertexData2D* predecessors; 46 | 47 | //Working data 48 | VertexData2D* frontier_bmap; 49 | VertexData2D* visited_bmap; 50 | VertexData2D_Unbuffered* frontier; 51 | VertexData2D_Unbuffered* trim_frontier; 52 | VertexData2D_Unbuffered* frontierSize; 53 | VertexData2D_Unbuffered* degreeFlags; 54 | std::vector frontierSize_h; 55 | VertexData2D_Unbuffered* exSumDegree; 56 | VertexData2D_Unbuffered* exSumStorage; 57 | VertexData2D_Unbuffered* bucketOffsets; 58 | std::vector frontierDegree_h; 59 | 60 | // Output locations 61 | GlobalType* distances_out; 62 | GlobalType* predecessors_out; 63 | 64 | NVGRAPH_ERROR setup(); 65 | 66 | void clean(); 67 | 68 | public: 69 | virtual ~Bfs2d(void) { 70 | clean(); 71 | }; 72 | 73 | Bfs2d(Matrix2d* _M, 74 | bool _directed, 75 | GlobalType _alpha, 76 | GlobalType _beta) : 77 | M(_M), 78 | directed(_directed), 79 | alpha(_alpha), 80 | beta(_beta){ 81 | distances = NULL; 82 | predecessors = NULL; 83 | frontier_bmap = NULL; 84 | visited_bmap = NULL; 85 | setup(); 86 | } 87 | 88 | NVGRAPH_ERROR configure(GlobalType *distances, GlobalType *predecessors); 89 | 90 | NVGRAPH_ERROR traverse(GlobalType source_vertex); 91 | 92 | //Used only for benchmarks 93 | NVGRAPH_ERROR traverse(GlobalType *source_vertices, int32_t nsources); 94 | }; 95 | } // end namespace nvgraph 96 | 97 | -------------------------------------------------------------------------------- /cpp/include/valued_csr_graph.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "csr_graph.hxx" 20 | #include "nvgraph_vector.hxx" 21 | 22 | namespace nvgraph 23 | { 24 | 25 | /*! A ValuedCsrGraph is a graph strored in a CSR data structure. 26 | It represents an weighted graph and has storage for row_offsets and column_indices and values 27 | */ 28 | template 29 | class ValuedCsrGraph : public nvgraph::CsrGraph 30 | { 31 | public: 32 | typedef IndexType_ IndexType; 33 | typedef ValueType_ ValueType; 34 | 35 | private: 36 | typedef nvgraph::CsrGraph Parent; 37 | 38 | protected: 39 | /*! Storage for the nonzero entries of the CSR data structure. 40 | */ 41 | SHARED_PREFIX::shared_ptr values; 42 | 43 | public: 44 | 45 | /*! Construct an empty \p ValuedCsrGraph. 46 | */ 47 | ValuedCsrGraph(void) {} 48 | /*! Destruct a \p ValuedCsrGraph. 49 | */ 50 | ~ValuedCsrGraph(void) {} 51 | 52 | /*! Construct a \p ValuedCsrGraph with a specific shape and number of nonzero entries. 53 | * 54 | * \param num_rows Number of rows. 55 | * \param num_entries Number of nonzero graph entries. 56 | */ 57 | ValuedCsrGraph(size_t num_rows, size_t num_entries, cudaStream_t stream) 58 | : Parent(num_rows, num_entries, stream), 59 | values(allocateDevice(num_entries, NULL)) {} 60 | 61 | /*! Construct a \p ValuedCsrGraph from another graph. 62 | * 63 | * \param ValuedCsrGraph Another graph in csr 64 | */ 65 | ValuedCsrGraph(const ValuedCsrGraph& gr): 66 | Parent(gr), 67 | values(gr.values) 68 | {} 69 | 70 | /*! Construct a \p ValuedCsrGraph from another graph. 71 | * 72 | * \param ValuedCsrGraph Another graph in csr 73 | */ 74 | ValuedCsrGraph(const Parent& gr, Vector& vals): 75 | Parent(gr), 76 | values(vals.raw()) 77 | { 78 | 79 | } 80 | 81 | inline ValueType* get_raw_values() const { return values.get(); } 82 | 83 | 84 | /*! Swap the contents of two \p ValuedCsrGraph objects. 85 | * 86 | * \param graph Another graph in csr 87 | */ 88 | void swap(ValuedCsrGraph& graph); 89 | 90 | /*! Assignment from another graph. 91 | * 92 | * \param graph Another graph in csr 93 | */ 94 | ValuedCsrGraph& operator=(const ValuedCsrGraph& graph); 95 | 96 | //Accept method injection 97 | DEFINE_VISITABLE(IndexType_) 98 | 99 | }; // class ValuedCsrGraph 100 | } 101 | 102 | -------------------------------------------------------------------------------- /cpp/tests/nvgraph_test_common.h: -------------------------------------------------------------------------------- 1 | #include /* import labs() */ 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #if defined(_WIN32) 8 | #if !defined(WIN32_LEAN_AND_MEAN) 9 | #define WIN32_LEAN_AND_MEAN 10 | #endif 11 | #define NOMINMAX 12 | #include 13 | static double second (void) 14 | { 15 | LARGE_INTEGER t; 16 | static double oofreq; 17 | static int checkedForHighResTimer; 18 | static BOOL hasHighResTimer; 19 | 20 | if (!checkedForHighResTimer) { 21 | hasHighResTimer = QueryPerformanceFrequency (&t); 22 | oofreq = 1.0 / (double)t.QuadPart; 23 | checkedForHighResTimer = 1; 24 | } 25 | if (hasHighResTimer) { 26 | QueryPerformanceCounter (&t); 27 | return (double)t.QuadPart * oofreq; 28 | } else { 29 | return (double)GetTickCount() / 1000.0; 30 | } 31 | } 32 | 33 | static long long getSystemMemory() 34 | { 35 | MEMORYSTATUSEX state; // Requires >= win2k 36 | memset (&state, 0, sizeof(state)); 37 | state.dwLength = sizeof(state); 38 | if (0 == GlobalMemoryStatusEx(&state)) { 39 | return 0; 40 | } else { 41 | return (long long)state.ullTotalPhys; 42 | } 43 | } 44 | #elif defined(__linux) || defined(__powerpc64__) 45 | #include 46 | #include 47 | #include 48 | #include 49 | static double second (void) 50 | { 51 | struct timeval tv; 52 | gettimeofday(&tv, NULL); 53 | return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; 54 | } 55 | 56 | static long long getSystemMemory(void) 57 | { 58 | struct sysinfo s_info; 59 | sysinfo (&s_info); 60 | return (long long)s_info.totalram * (long long)s_info.mem_unit; 61 | } 62 | #elif defined(__APPLE__) 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | static double second (void) 69 | { 70 | struct timeval tv; 71 | gettimeofday(&tv, NULL); 72 | return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; 73 | } 74 | 75 | static long long getSystemMemory(void) 76 | { 77 | int memmib[2] = { CTL_HW, HW_MEMSIZE }; 78 | long long mem = (size_t)0; 79 | size_t memsz = sizeof(mem); 80 | 81 | /* NOTE: This may cap memory reported at 2GB */ 82 | if (sysctl(memmib, 2, &mem, &memsz, NULL, 0) == -1) { 83 | return 0; 84 | } else { 85 | return mem; 86 | } 87 | } 88 | #elif defined(__QNX__) 89 | #include 90 | #include 91 | #include 92 | static double second (void) 93 | { 94 | struct timeval tv; 95 | gettimeofday(&tv, NULL); 96 | return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; 97 | } 98 | 99 | static long long getSystemMemory(void) 100 | { 101 | return 0; 102 | } 103 | #else 104 | #error unsupported platform 105 | #endif 106 | 107 | std::string getFileName(const std::string& s) { 108 | 109 | char sep = '/'; 110 | 111 | #ifdef _WIN32 112 | sep = '\\'; 113 | #endif 114 | 115 | size_t i = s.rfind(sep, s.length()); 116 | if (i != std::string::npos) { 117 | return(s.substr(i+1, s.length() - i)); 118 | } 119 | 120 | return(""); 121 | } 122 | -------------------------------------------------------------------------------- /cpp/include/nvgraph_csrmv.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | #include 18 | #include 19 | #include "valued_csr_graph.hxx" 20 | #include "nvgraph_vector.hxx" 21 | 22 | namespace nvgraph{ 23 | 24 | //this header file defines the various semirings using enum 25 | enum Semiring 26 | {//the datatype is assumed to be real unless otherwise specified in the name 27 | PlusTimes, //standard matrix vector multiplication 28 | MinPlus, //breadth first search-also called tropical 29 | MaxMin, //mas flow problems 30 | OrAndBool, 31 | LogPlus 32 | }; 33 | 34 | //Merge Path Coord array depends on the integere type 35 | template 36 | struct Coord 37 | { 38 | IndexType_ x; 39 | IndexType_ y; 40 | }; 41 | 42 | //struct which stores the csr matrix format, templated on the index and value 43 | template 44 | struct CsrMvParams { 45 | ValueType_ alpha; 46 | ValueType_ beta; 47 | ValueType_ *csrVal; //nonzero values from matrix A 48 | //row pointer must look at next address to avoid the 0 in merge path 49 | IndexType_ *csrRowPtr; //row offsets last entry is number of nonzeros size is m +1 50 | IndexType_ *csrColInd; //column indices of nonzeros 51 | ValueType_ *x; //vector x in alpha*A*x 52 | ValueType_ *y; //output y will be modified and store the output 53 | IndexType_ m; //number of rows 54 | IndexType_ n; //number of columns 55 | IndexType_ nnz; 56 | }; 57 | 58 | //create a device function interface to call the above dispatch function 59 | template 60 | cudaError_t csrmv_mp( 61 | IndexType_ n, 62 | IndexType_ m, 63 | IndexType_ nnz, 64 | ValueType_ alpha, 65 | ValueType_ * dValues, //all must be preallocated on the device 66 | IndexType_ * dRowOffsets, 67 | IndexType_ * dColIndices, 68 | ValueType_ *dVectorX, 69 | ValueType_ beta, 70 | ValueType_ *dVectorY, 71 | Semiring SR, //this parameter is of type enum and gives the semiring name 72 | cudaStream_t stream = 0 ); 73 | //overloaded function that has valued_csr_graph parameter to store the matrix 74 | template 75 | cudaError_t csrmv_mp( 76 | IndexType_ n, 77 | IndexType_ m, 78 | IndexType_ nnz, 79 | ValueType_ alpha, 80 | ValuedCsrGraph network, 81 | ValueType_ *dVectorX, 82 | ValueType_ beta, 83 | ValueType_ *dVectorY, 84 | Semiring SR, //this parameter is of type enum and gives the semiring name 85 | cudaStream_t stream = 0); 86 | } //end nvgraph namespace 87 | 88 | template 89 | void callTestCsrmv(IndexType_ num_rows, IndexType_ *dRowOffsets, IndexType_ *dColIndices, ValueType_ *dValues, 90 | ValueType_ *dVectorX, ValueType_ *dVectorY, nvgraph::Semiring SR, ValueType_ alpha, ValueType_ beta); 91 | 92 | -------------------------------------------------------------------------------- /cpp/include/graph.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include // size_t 21 | #include 22 | 23 | #include // 24 | // 25 | namespace nvgraph 26 | { 27 | 28 | #define DEFINE_VISITABLE(T) \ 29 | virtual void Accept(VisitorBase& guest) \ 30 | { BaseVisitableGraph::AcceptImpl(*this, guest); } 31 | 32 | template 33 | struct BaseVisitableGraph 34 | { 35 | virtual void Accept(VisitorBase& v) = 0; 36 | 37 | virtual ~BaseVisitableGraph(void) 38 | { 39 | } 40 | protected: 41 | template 42 | static void AcceptImpl(Host& visited, VisitorBase& guest) 43 | { 44 | if( Visitor* p = dynamic_cast*>(&guest)) 45 | { 46 | p->Visit(visited); 47 | } 48 | } 49 | }; 50 | 51 | template 52 | class Graph: public BaseVisitableGraph 53 | { 54 | public: 55 | typedef IndexType_ IndexType; 56 | 57 | protected: 58 | size_t num_vertices; 59 | size_t num_edges; 60 | Graph *parent; 61 | Graph *child; 62 | 63 | public: 64 | /*! Construct an empty \p Graph. 65 | */ 66 | Graph() 67 | : num_vertices(0),num_edges(0) {} 68 | 69 | /*! Construct a \p Graph with a specific number of vertices. 70 | * 71 | * \param vertices Number of vertices. 72 | */ 73 | Graph(size_t vertices) 74 | : num_vertices(vertices), num_edges(0) {} 75 | 76 | /*! Construct a \p Graph with a specific number of vertices and edges. 77 | * 78 | * \param vertices Number of vertices. 79 | * \param edges Number of edges. 80 | */ 81 | Graph(size_t vertices, size_t edges) 82 | : num_vertices(vertices), num_edges(edges) {} 83 | 84 | /*! Construct a \p CsrGraph from another graph. 85 | * 86 | * \param CsrGraph Another graph in csr 87 | */ 88 | Graph(const Graph& gr) 89 | { 90 | num_vertices = gr.get_num_vertices(); 91 | num_edges = gr.get_num_edges(); 92 | } 93 | 94 | inline void set_num_vertices(IndexType_ p_num_vertices) { num_vertices = p_num_vertices; } 95 | inline void set_num_edges(IndexType_ p_num_edges) { num_edges = p_num_edges; } 96 | inline size_t get_num_vertices() const { return num_vertices; } 97 | inline size_t get_num_edges() const { return num_edges; } 98 | /*! Resize graph dimensions 99 | * 100 | * \param num_rows Number of vertices. 101 | * \param num_cols Number of edges. 102 | */ 103 | //inline void resize(size_t vertices, size_t edges) 104 | //{ 105 | // num_vertices = vertices; 106 | // num_edges = edges; 107 | //} 108 | 109 | //Accept method injection 110 | DEFINE_VISITABLE(IndexType_) 111 | }; 112 | 113 | } // end namespace nvgraph 114 | 115 | -------------------------------------------------------------------------------- /cpp/include/pagerank.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | namespace nvgraph 20 | { 21 | template 22 | class Pagerank 23 | { 24 | public: 25 | typedef IndexType_ IndexType; 26 | typedef ValueType_ ValueType; 27 | 28 | private: 29 | ValuedCsrGraph m_network ; 30 | Vector m_a; 31 | Vector m_b; 32 | Vector m_pagerank; 33 | Vector m_tmp; 34 | ValueType m_damping_factor; 35 | ValueType m_residual; 36 | ValueType m_tolerance; 37 | cudaStream_t m_stream; 38 | int m_iterations; 39 | int m_max_it; 40 | bool m_is_setup; 41 | bool m_has_guess; 42 | 43 | bool solve_it(); 44 | //void update_dangling_nodes(Vector& dangling_nodes); 45 | void setup(ValueType damping_factor, Vector& initial_guess, Vector& pagerank_vector); 46 | 47 | public: 48 | // Simple constructor 49 | Pagerank(void) {}; 50 | // Simple destructor 51 | ~Pagerank(void) {}; 52 | 53 | // Create a Pagerank Solver attached to a the transposed of a transition matrix 54 | // *** network is the transposed of a transition matrix*** 55 | Pagerank(const ValuedCsrGraph & network, Vector& dangling_nodes, cudaStream_t stream = 0); 56 | 57 | // dangling_nodes is a vector of size n where dangling_nodes[i] = 1.0 if vertex i is a dangling node and 0.0 otherwise 58 | // pagerank_vector is the output 59 | //void solve(ValueType damping_factor, Vector& dangling_nodes, Vector& pagerank_vector); 60 | // setup with an initial guess of the pagerank 61 | NVGRAPH_ERROR solve(ValueType damping_factor, Vector& initial_guess, Vector& pagerank_vector, float tolerance =1.0E-6, int max_it = 500); 62 | inline ValueType get_residual() const {return m_residual;} 63 | inline int get_iterations() const {return m_iterations;} 64 | 65 | 66 | // init : 67 | // We need the transpose (=converse =reverse) in input (this can be seen as a CSC matrix that we see as CSR) 68 | // b is a constant and uniform vector, b = 1.0/num_vertices 69 | // a is a constant vector that initialy store the dangling nodes then we set : a = alpha*a + (1-alpha)e 70 | // pagerank is 0 71 | // tmp is random ( 1/n is fine) 72 | // alpha is a constant scalar (0.85 usually) 73 | 74 | //loop : 75 | // pagerank = csrmv (network, tmp) 76 | // scal(pagerank, alpha); //pagerank = alpha*pagerank 77 | // gamma = dot(a, tmp); //gamma = a*tmp 78 | // pagerank = axpy(b, pagerank, gamma); // pagerank = pagerank+gamma*b 79 | 80 | // convergence check 81 | // tmp = axpby(pagerank, tmp, -1, 1); // tmp = pagerank - tmp 82 | // residual_norm = norm(tmp); 83 | // if converged (residual_norm) 84 | // l1 = l1_norm(pagerank); 85 | // pagerank = scal(pagerank, 1/l1); 86 | // return pagerank 87 | // swap(tmp, pagerank) 88 | //end loop 89 | }; 90 | 91 | } // end namespace nvgraph 92 | 93 | -------------------------------------------------------------------------------- /test/generators/rmat.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | void printUsageAndExit() 12 | { 13 | printf("%s", "Usage:./rmatg x\n"); 14 | printf("%s", "x is the size of the graph, x>32 (Boost generator hang if x<32)\n"); 15 | exit(0); 16 | } 17 | 18 | int main(int argc, char *argv[]) 19 | { 20 | 21 | // RMAT paper http://snap.stanford.edu/class/cs224w-readings/chakrabarti04rmat.pdf 22 | // Boost doc on RMAT http://www.boost.org/doc/libs/1_49_0/libs/graph_parallel/doc/html/rmat_generator.html 23 | 24 | typedef boost::adjacency_list Graph; 25 | typedef boost::unique_rmat_iterator RMATGen; 26 | 27 | if (argc < 2) printUsageAndExit(); 28 | int size = atoi (argv[1]); 29 | if (size<32) printUsageAndExit(); 30 | assert (size > 31 && size < INT_MAX); 31 | const unsigned num_edges = 16 * size; 32 | /************************ 33 | * RMAT Gen 34 | ************************/ 35 | std::cout << "generating ... "<<'\n'; 36 | // values of a,b,c,d are from the graph500. 37 | boost::minstd_rand gen; 38 | Graph g(RMATGen(gen, size, num_edges, 0.57, 0.19, 0.19, 0.05, true), RMATGen(), size); 39 | assert (num_edges == boost::num_edges(g)); 40 | 41 | /************************ 42 | * Print 43 | ************************/ 44 | boost::graph_traits::edge_iterator edge, edge_end; 45 | std::cout << "vertices : " << boost::num_vertices(g) <<'\n'; 46 | std::cout << "edges : " << boost::num_edges(g) <<'\n'; 47 | std::cout << "average degree : "<< static_cast(boost::num_edges(g))/boost::num_vertices(g)<< '\n'; 48 | 49 | // Print in matrix coordinate real general format 50 | std::cout << "writing ... "<<'\n'; 51 | std::stringstream tmp; 52 | tmp <<"local_test_data/rmat_graph_" << size << ".mtx"; 53 | const std::string filename = tmp.str(); 54 | std::ofstream fout(tmp.str().c_str()) ; 55 | if (argv[2]==NULL) 56 | { 57 | // Power law out degree with random weights 58 | fout << "%%MatrixMarket matrix coordinate real general\n"; 59 | fout << boost::num_vertices(g) <<' '<< boost::num_vertices(g) <<' '<< boost::num_edges(g) << '\n'; 60 | float val; 61 | for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge) 62 | { 63 | val = (rand()%10)+(rand()%100)*(1e-2f); 64 | fout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< ' ' << val << '\n'; 65 | } 66 | } 67 | else if (argv[2][0]=='i') 68 | { 69 | // Power law in degree (ie the transpose will have a power law) 70 | // -- Edges only -- 71 | // * Wraning * edges will be unsorted, use sort_edges.cpp to sort the dataset. 72 | fout << boost::num_vertices(g) <<' '<< boost::num_edges(g) << '\n'; 73 | for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge) 74 | fout < 49 | struct BlockHistogramAtomic 50 | { 51 | /// Shared memory storage layout type 52 | struct TempStorage {}; 53 | 54 | 55 | /// Constructor 56 | __device__ __forceinline__ BlockHistogramAtomic( 57 | TempStorage &temp_storage) 58 | {} 59 | 60 | 61 | /// Composite data onto an existing histogram 62 | template < 63 | typename T, 64 | typename CounterT, 65 | int ITEMS_PER_THREAD> 66 | __device__ __forceinline__ void Composite( 67 | T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram 68 | CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram 69 | { 70 | // Update histogram 71 | #pragma unroll 72 | for (int i = 0; i < ITEMS_PER_THREAD; ++i) 73 | { 74 | atomicAdd(histogram + items[i], 1); 75 | } 76 | } 77 | 78 | }; 79 | 80 | } // CUB namespace 81 | CUB_NS_POSTFIX // Optional outer namespace(s) 82 | 83 | -------------------------------------------------------------------------------- /test/ref/cpu_ref_widest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Generates widest path vector for the single source vertex to all other vertices using dijkstra-like algorithm 4 | 5 | # Usage : python3 nvgraph_cpu_ref.py graph.mtx source_vertex 6 | # This works with networkx 1.8.1 (default ubuntu package version in 14.04) 7 | # http://networkx.github.io/documentation/networkx-1.8/ 8 | 9 | # Latest version is currenlty 1.11 in feb 2016 10 | # https://networkx.github.io/documentation/latest/tutorial/index.html 11 | 12 | #import numpy as np 13 | import sys 14 | import time 15 | from scipy.io import mmread 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | import networkx as nx 19 | import os 20 | import sys 21 | 22 | #modified widest 23 | def _dijkstra_custom(G, source, get_weight, cutoff=None): 24 | G_succ = G.succ if G.is_directed() else G.adj 25 | width = {node: -sys.float_info.max for node in range(G.number_of_nodes())} # dictionary of final distances 26 | width[source] = sys.float_info.max 27 | #seen = set() 28 | Qset = set([(source, 0)]) 29 | while len(Qset) > 0: 30 | u, depth = Qset.pop() 31 | if cutoff: 32 | if cutoff < depth: 33 | continue 34 | #print "Looking at vertex ", u, ", depth = ", depth 35 | for v, e in G_succ[u].items(): 36 | cost = get_weight(u, v, e) 37 | #print "Looking at vertex ", u, ", edge to ", v 38 | if cost is None: 39 | continue 40 | alt = max(width[v], min(width[u], cost)) 41 | if alt > width[v]: 42 | width[v] = alt 43 | Qset.add((v, depth+1)) 44 | #print "Updated QSET: ", Qset 45 | return width 46 | 47 | def single_source_dijkstra_widest(G, source, cutoff=None, 48 | weight='weight'): 49 | if G.is_multigraph(): 50 | get_weight = lambda u, v, data: min( 51 | eattr.get(weight, 1) for eattr in data.values()) 52 | else: 53 | get_weight = lambda u, v, data: data.get(weight, 1) 54 | 55 | return _dijkstra_custom(G, source, get_weight, cutoff=cutoff) 56 | 57 | print ('Networkx version : {} '.format(nx.__version__)) 58 | 59 | # Command line arguments 60 | argc = len(sys.argv) 61 | if argc<=2: 62 | print("Error: usage is : python3 nvgraph_cpu_ref.py graph.mtx source_vertex") 63 | sys.exit() 64 | mmFile = sys.argv[1] 65 | src = int(sys.argv[2]) 66 | print('Reading '+ str(mmFile) + '...') 67 | #Read 68 | M = mmread(mmFile).transpose() 69 | 70 | if M is None : 71 | raise TypeError('Could not read the input graph') 72 | 73 | # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly 74 | M = M.asfptype().tolil().tocsr() 75 | if not M.has_sorted_indices: 76 | M.sort_indices() 77 | 78 | # Directed NetworkX graph 79 | Gnx = nx.DiGraph(M) 80 | 81 | #widest 82 | print('Solving... ') 83 | t1 = time.time() 84 | widest = single_source_dijkstra_widest(Gnx,source=src) 85 | t2 = time.time() - t1 86 | 87 | print('Time : '+str(t2)) 88 | print('Writing result ... ') 89 | 90 | # fill missing with DBL_MAX 91 | bwidest = np.full(M.shape[0], -sys.float_info.max, dtype=np.float64) 92 | for r in widest.keys(): 93 | bwidest[r] = widest[r] 94 | #print bwidest 95 | # write binary 96 | out_fname = os.path.splitext(os.path.basename(mmFile))[0] + '_T.widest_' + str(src) + '.bin' 97 | bwidest.tofile(out_fname, "") 98 | print ('Result is in the file: ' + out_fname) 99 | 100 | # write text 101 | #f = open('/tmp/ref_' + os.path.basename(mmFile) + '_widest.txt', 'w') 102 | #f.write(str(widest.values())) 103 | 104 | print('Done') 105 | -------------------------------------------------------------------------------- /test/generators/plod.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | void printUsageAndExit() 12 | { 13 | printf("%s", "Usage:./plodg x\n"); 14 | printf("%s", "x is the size of the graph\n"); 15 | exit(0); 16 | } 17 | 18 | int main(int argc, char *argv[]) 19 | { 20 | 21 | /* " The Power Law Out Degree (PLOD) algorithm generates a scale-free graph from three parameters, n, alpha, and beta. 22 | [...] The value of beta controls the y-intercept of the curve, so that increasing beta increases the average degree of vertices (credit = beta*x^-alpha). 23 | [...] The value of alpha controls how steeply the curve drops off, with larger values indicating a steeper curve. */ 24 | // From Boost documentation http://www.boost.org/doc/libs/1_47_0/libs/graph/doc/plod_generator.html 25 | 26 | // we use setS aka std::set for edges storage 27 | // so we have at most one edges between 2 vertices 28 | // the extra cost is O(log(E/V)). 29 | typedef boost::adjacency_list Graph; 30 | typedef boost::plod_iterator SFGen; 31 | 32 | if (argc < 2) printUsageAndExit(); 33 | int size = atoi (argv[1]); 34 | assert (size > 1 && size < INT_MAX); 35 | double alpha = 2.57; // It is known that web graphs have alpha ~ 2.72. 36 | double beta = size*512+1024; // This will give an average degree ~ 15 37 | 38 | // generation 39 | std::cout << "generating ... "<<'\n'; 40 | boost::minstd_rand gen; 41 | Graph g(SFGen(gen, size, alpha, beta, false), SFGen(), size); 42 | boost::graph_traits::edge_iterator edge, edge_end; 43 | 44 | std::cout << "vertices : " << num_vertices(g) <<'\n'; 45 | std::cout << "edges : " << num_edges(g) <<'\n'; 46 | std::cout << "average degree : "<< static_cast(num_edges(g))/num_vertices(g)<< '\n'; 47 | // Print in matrix coordinate real general format 48 | std::cout << "writing ... "<<'\n'; 49 | std::stringstream tmp; 50 | tmp <<"local_test_data/plod_graph_" << size << ".mtx"; 51 | const std::string filename = tmp.str(); 52 | std::ofstream fout(tmp.str().c_str()) ; 53 | 54 | if (argv[2]==NULL) 55 | { 56 | // Power law out degree with random weights 57 | fout << "%%MatrixMarket matrix coordinate real general\n"; 58 | fout << num_vertices(g) <<' '<< num_vertices(g) <<' '<< num_edges(g) << '\n'; 59 | float val; 60 | for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge) 61 | { 62 | val = (rand()%10)+(rand()%100)*(1e-2f); 63 | fout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< ' ' << val << '\n'; 64 | } 65 | } 66 | else if (argv[2][0]=='i') 67 | { 68 | // Power law in degree (ie the transpose will have a power law) 69 | // -- Edges only -- 70 | // * Wraning * edges will be unsorted, use sort_edges.cpp to sort the dataset. 71 | fout << num_vertices(g) <<' '<< num_edges(g) << '\n'; 72 | for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge) 73 | fout < 2 | #include 3 | #include //file output 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | void printUsageAndExit() 16 | { 17 | printf("%s", "Usage:./rmatg x y\n"); 18 | printf("%s", "x is the size of the graph, x>32 (Boost generator hang if x<32)\n"); 19 | printf("%s", "y is the source of sssp\n"); 20 | exit(0); 21 | } 22 | 23 | int main(int argc, char *argv[]) 24 | { 25 | // read size 26 | if (argc < 3) printUsageAndExit(); 27 | int size = atoi (argv[1]); 28 | if (size<32) printUsageAndExit(); 29 | int source_sssp =atoi (argv[2]); 30 | assert (size > 1 && size < INT_MAX); 31 | assert (source_sssp >= 0 && source_sssp < size); 32 | const unsigned num_edges = 15 * size; 33 | 34 | // Some boost types 35 | typedef boost::no_property VertexProperty; 36 | typedef boost::property EdgeProperty; 37 | typedef boost::adjacency_list Graph; 38 | typedef boost::unique_rmat_iterator RMATGen; 39 | typedef boost::graph_traits::vertex_descriptor vertex_descriptor; 40 | boost::minstd_rand gen; 41 | boost::graph_traits::edge_iterator edge, edge_end; 42 | 43 | /************************ 44 | * Random weights 45 | ************************/ 46 | // !!! WARNING !!! 47 | // watch the stack 48 | float* weight = new float[num_edges]; 49 | int count = 0; 50 | for( int i = 0; i < num_edges; ++i) 51 | weight[i] = (rand()%10)+(rand()%100)*(1.2e-2f); 52 | 53 | /************************ 54 | * RMAT Gen 55 | ************************/ 56 | Graph g(RMATGen(gen, size, num_edges, 0.57, 0.19, 0.19, 0.05,true),RMATGen(),weight, size); 57 | std::cout << "Generator : done. Edges = "< p(num_vertices(g)); 67 | std::vector d(num_vertices(g)); 68 | vertex_descriptor s = vertex(source_sssp, g); //define soruce node 69 | 70 | double start = omp_get_wtime(); 71 | dijkstra_shortest_paths(g, s, 72 | predecessor_map(boost::make_iterator_property_map(p.begin(), get(boost::vertex_index, g))). 73 | distance_map(boost::make_iterator_property_map(d.begin(), get(boost::vertex_index, g)))); 74 | 75 | double stop = omp_get_wtime(); 76 | std::cout << "Time = " << stop-start << "s"<< std::endl; 77 | 78 | /************************ 79 | * Print 80 | ************************/ 81 | /* 82 | boost::graph_traits::vertex_iterator vi, vend; 83 | std::cout << "SOURCE = "<< source_sssp << std::endl; 84 | for (boost::tie(vi, vend) = vertices(g); vi != vend; ++vi) 85 | { 86 | if (d[*vi] != FLT_MAX) 87 | { 88 | std::cout << "d(" << *vi << ") = " << d[*vi] << ", "; 89 | std::cout << "parent = " << p[*vi] << std::endl; 90 | } 91 | else 92 | std::cout << "d(" << *vi << ") = INF"<< std::endl; 93 | } 94 | */ 95 | return 0; 96 | 97 | } 98 | 99 | -------------------------------------------------------------------------------- /cpp/include/bfs.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | 18 | 19 | #pragma once 20 | 21 | 22 | 23 | #include 24 | 25 | 26 | 27 | //Used in nvgraph.h 28 | 29 | #define TRAVERSAL_DEFAULT_ALPHA 15 30 | 31 | #define TRAVERSAL_DEFAULT_BETA 18 32 | 33 | 34 | 35 | #include "nvgraph_error.hxx" 36 | 37 | 38 | 39 | namespace nvgraph 40 | 41 | { 42 | 43 | template 44 | 45 | class Bfs 46 | 47 | { 48 | 49 | private: 50 | 51 | IndexType n, nnz; 52 | 53 | IndexType* row_offsets; 54 | 55 | IndexType* col_indices; 56 | 57 | 58 | 59 | bool directed; 60 | bool deterministic; 61 | 62 | 63 | // edgemask, distances, predecessors are set/read by users - using Vectors 64 | 65 | bool useEdgeMask; 66 | 67 | bool computeDistances; 68 | 69 | bool computePredecessors; 70 | 71 | 72 | 73 | IndexType *distances; 74 | 75 | IndexType *predecessors; 76 | 77 | int *edge_mask; 78 | 79 | 80 | 81 | //Working data 82 | 83 | //For complete description of each, go to bfs.cu 84 | 85 | 86 | 87 | IndexType nisolated; 88 | 89 | IndexType *frontier, *new_frontier; 90 | 91 | IndexType * original_frontier; 92 | 93 | IndexType vertices_bmap_size; 94 | 95 | int *visited_bmap, *isolated_bmap; 96 | 97 | IndexType *vertex_degree; 98 | 99 | IndexType *buffer_np1_1, *buffer_np1_2; 100 | 101 | IndexType *frontier_vertex_degree; 102 | 103 | IndexType *exclusive_sum_frontier_vertex_degree; 104 | 105 | IndexType *unvisited_queue; 106 | 107 | IndexType *left_unvisited_queue; 108 | 109 | IndexType *exclusive_sum_frontier_vertex_buckets_offsets; 110 | 111 | 112 | 113 | IndexType *d_counters_pad; 114 | 115 | IndexType *d_new_frontier_cnt; 116 | 117 | IndexType *d_mu; 118 | 119 | IndexType *d_unvisited_cnt; 120 | 121 | IndexType *d_left_unvisited_cnt; 122 | 123 | 124 | 125 | void *d_cub_exclusive_sum_storage; 126 | 127 | size_t cub_exclusive_sum_storage_bytes; 128 | 129 | 130 | 131 | //Parameters for direction optimizing 132 | 133 | IndexType alpha, beta; 134 | 135 | 136 | 137 | cudaStream_t stream; 138 | 139 | //resets pointers defined by d_counters_pad (see implem) 140 | 141 | void resetDevicePointers(); 142 | 143 | NVGRAPH_ERROR setup(); 144 | 145 | void clean(); 146 | 147 | public: 148 | 149 | virtual ~Bfs(void) { 150 | 151 | clean(); 152 | 153 | }; 154 | 155 | 156 | 157 | Bfs(IndexType _n, IndexType _nnz, IndexType *_row_offsets, IndexType *_col_indices, bool _directed, IndexType _alpha, IndexType _beta, cudaStream_t _stream = 0) : n(_n), nnz(_nnz), row_offsets(_row_offsets), col_indices(_col_indices), directed(_directed), alpha(_alpha), beta(_beta), stream(_stream) { 158 | 159 | setup(); 160 | 161 | } 162 | 163 | 164 | 165 | NVGRAPH_ERROR configure(IndexType *distances, IndexType *predecessors, int *edge_mask); 166 | 167 | NVGRAPH_ERROR traverse(IndexType source_vertex); 168 | 169 | //Used only for benchmarks 170 | 171 | NVGRAPH_ERROR traverse(IndexType *source_vertices, IndexType nsources); 172 | 173 | }; 174 | 175 | 176 | 177 | } // end namespace nvgraph 178 | 179 | 180 | 181 | -------------------------------------------------------------------------------- /test/generators/convertors/H_to_HtSorted_and_a.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include // std::sort 5 | #include // std::vector 6 | // This code transpose a matrix H and compute the flag vector of empty rows a. 7 | // We assume that H is row-substochastic, in MatrixMarket format and data are sorted by row id 8 | // The output is filename_T.filetype, H is printed first then a is printed. 9 | 10 | struct elt { 11 | long int r; 12 | long int c; 13 | double v; 14 | }; 15 | 16 | void printUsageAndExit() 17 | { 18 | printf("%s", "Fatal Error\n"); 19 | printf("%s", "Usage: ./HTA H.mtx\n"); 20 | printf("%s", "NOTE1: H is the row-substochastic matrix of a graph\n"); 21 | printf("%s", "NOTE2: H is in MatrixMarket coordinate real general format\n"); 22 | printf("%s", "NOTE3: Data are sorted by row id\n"); 23 | printf("%s", "Output : H^t and the bookmark vector of empty rows\n"); 24 | printf("%s", "***This output fits the input of AMGX PageRank***\n"); 25 | exit(0); 26 | } 27 | 28 | inline bool operator< (const elt& a, const elt& b) 29 | { // ordered by row and then by colum inside a row 30 | return a.r A; 45 | std::vector a; 46 | // Get I/O names 47 | // The output is filename_T 48 | while (argv[1][i] != '\0') 49 | {outp[i] = argv[1][i];i++;} 50 | outp[i] = '_'; i++; 51 | outp[i] = 'T';i++; 52 | outp[i]='\0'; 53 | // Open files 54 | fpin = fopen(argv[1],"r"); 55 | fpout = fopen(outp,"w"); 56 | if (!fpin || !fpout) 57 | { 58 | printf("%s", "Fatal Error : I/O fail\n"); 59 | exit(0); 60 | } 61 | 62 | // Skip lines starting with "%%"" 63 | do 64 | { 65 | cc = fgetc(fpin); 66 | if (cc == '%') fgets(outp,128,fpin); 67 | } 68 | while (cc == '%'); 69 | fseek( fpin, -1, SEEK_CUR ); 70 | 71 | // Get n and nz 72 | fscanf(fpin,"%ld",&n); 73 | fscanf(fpin,"%ld",&n); 74 | fscanf(fpin,"%ld",&nz); 75 | 76 | // Print format and size 77 | fprintf(fpout, "%s", "%%"); 78 | fprintf(fpout,"MatrixMarket matrix coordinate real general\n"); 79 | fprintf(fpout, "%s", "%%"); 80 | fprintf(fpout,"AMGX rhs\n"); 81 | fprintf(fpout,"%ld %ld %ld\n",n, n, nz); 82 | 83 | // Empty rows at the begining 84 | fscanf(fpin,"%ld",&e.c); 85 | fscanf(fpin,"%ld",&e.r); 86 | fscanf(fpin,"%lf",&e.v); 87 | A.push_back(e); 88 | 89 | for (j=0; j(e.c)-1; j++) 90 | { 91 | std::cout< lastr) 105 | { 106 | if (e.c > lastr+1) 107 | { 108 | a.push_back(0); 109 | //Successive empty rows 110 | for (k=0; k(e.c)-lastr-1; k++) 111 | a.push_back(1); 112 | } 113 | else 114 | a.push_back(0); 115 | } 116 | } 117 | a.push_back(0); 118 | 119 | // Empty rows at the end 120 | for (k=a.size(); k::iterator it = A.begin() ; it != A.end(); ++it) 127 | fprintf(fpout,"%ld %ld %.9f\n",it->r, it->c, it->v); 128 | 129 | for (std::vector::iterator it = a.begin() ; it != a.end(); ++it) 130 | fprintf(fpout,"%u\n",*it); 131 | 132 | return 0; 133 | 134 | } 135 | 136 | -------------------------------------------------------------------------------- /external/cub_semiring/cub.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2011, Duane Merrill. All rights reserved. 3 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * * Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * * Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * * Neither the name of the NVIDIA CORPORATION nor the 13 | * names of its contributors may be used to endorse or promote products 14 | * derived from this software without specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | ******************************************************************************/ 28 | 29 | /** 30 | * \file 31 | * CUB umbrella include file 32 | */ 33 | 34 | #pragma once 35 | 36 | 37 | // Block 38 | #include "block/block_histogram.cuh" 39 | #include "block/block_discontinuity.cuh" 40 | #include "block/block_exchange.cuh" 41 | #include "block/block_load.cuh" 42 | #include "block/block_radix_rank.cuh" 43 | #include "block/block_radix_sort.cuh" 44 | #include "block/block_reduce.cuh" 45 | #include "block/block_scan.cuh" 46 | #include "block/block_store.cuh" 47 | //#include "block/block_shift.cuh" 48 | 49 | // Device 50 | #include "device/device_histogram.cuh" 51 | #include "device/device_partition.cuh" 52 | #include "device/device_radix_sort.cuh" 53 | #include "device/device_reduce.cuh" 54 | #include "device/device_run_length_encode.cuh" 55 | #include "device/device_scan.cuh" 56 | #include "device/device_segmented_radix_sort.cuh" 57 | #include "device/device_segmented_reduce.cuh" 58 | #include "device/device_select.cuh" 59 | #include "device/device_spmv.cuh" 60 | 61 | // Grid 62 | //#include "grid/grid_barrier.cuh" 63 | #include "grid/grid_even_share.cuh" 64 | #include "grid/grid_mapping.cuh" 65 | #include "grid/grid_queue.cuh" 66 | 67 | // Thread 68 | #include "thread/thread_load.cuh" 69 | #include "thread/thread_operators.cuh" 70 | #include "thread/thread_reduce.cuh" 71 | #include "thread/thread_scan.cuh" 72 | #include "thread/thread_store.cuh" 73 | 74 | // Warp 75 | #include "warp/warp_reduce.cuh" 76 | #include "warp/warp_scan.cuh" 77 | 78 | // Iterator 79 | #include "iterator/arg_index_input_iterator.cuh" 80 | #include "iterator/cache_modified_input_iterator.cuh" 81 | #include "iterator/cache_modified_output_iterator.cuh" 82 | #include "iterator/constant_input_iterator.cuh" 83 | #include "iterator/counting_input_iterator.cuh" 84 | #include "iterator/tex_obj_input_iterator.cuh" 85 | #include "iterator/tex_ref_input_iterator.cuh" 86 | #include "iterator/transform_input_iterator.cuh" 87 | 88 | // Util 89 | #include "util_arch.cuh" 90 | #include "util_debug.cuh" 91 | #include "util_device.cuh" 92 | #include "util_macro.cuh" 93 | #include "util_ptx.cuh" 94 | #include "util_type.cuh" 95 | 96 | -------------------------------------------------------------------------------- /cpp/include/app/nvlouvain_app.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "test_opt_utils.cuh" 21 | #include "graph_utils.cuh" 22 | 23 | //#define ENABLE_LOG TRUE 24 | #define ENALBE_LOUVAIN true 25 | 26 | #include "nvlouvain.cuh" 27 | #include "gtest/gtest.h" 28 | #include "high_res_clock.h" 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | using T = float; 38 | 39 | int main(int argc, char* argv[]){ 40 | 41 | if(argc < 2) 42 | { 43 | std::cout<< "Help : ./louvain_test matrix_market_file.mtx"<(fin, 1, &mc, &m, &k, &nnz)) ,0); 53 | EXPECT_EQ(m,k); 54 | 55 | thrust::host_vector coo_ind_h(nnz); 56 | thrust::host_vector csr_ptr_h(m+1); 57 | thrust::host_vector csr_ind_h(nnz); 58 | thrust::host_vector csr_val_h(nnz); 59 | 60 | EXPECT_EQ( (mm_to_coo(fin, 1, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL)), 0); 61 | EXPECT_EQ( (coo_to_csr (m, k, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL, &csr_ptr_h[0], NULL, NULL, NULL)), 0); 62 | 63 | EXPECT_EQ(fclose(fin),0); 64 | 65 | thrust::device_vector csr_ptr_d(csr_ptr_h); 66 | thrust::device_vector csr_ind_d(csr_ind_h); 67 | thrust::device_vector csr_val_d(csr_val_h); 68 | 69 | thrust::device_vector tmp_1(nnz); 70 | thrust::fill(thrust::cuda::par, tmp_1.begin(), tmp_1.end(), 1.0); 71 | thrust::device_vector::iterator max_ele = thrust::max_element(thrust::cuda::par, csr_val_d.begin(), csr_val_d.end()); 72 | 73 | bool weighted = (*max_ele!=1.0); 74 | 75 | //std::cout<<(weighted?"Weighted ":"Not Weigthed ")<<" n_vertex: "< cluster_d(m, 0); 85 | int* csr_ptr_ptr = thrust::raw_pointer_cast(csr_ptr_d.data()); 86 | int* csr_ind_ptr = thrust::raw_pointer_cast(csr_ind_d.data()); 87 | T* csr_val_ptr = thrust::raw_pointer_cast(csr_val_d.data()); 88 | int* init_cluster_ptr = thrust::raw_pointer_cast(cluster_d.data()); 89 | int num_level; 90 | 91 | cudaProfilerStart(); 92 | hr_clock.start(); 93 | nvlouvain::louvain(csr_ptr_ptr, csr_ind_ptr, csr_val_ptr, 94 | m, nnz, 95 | weighted, has_init_cluster, 96 | init_cluster_ptr, final_modulartiy, clustering_h, num_level); 97 | 98 | hr_clock.stop(&louvain_time); 99 | cudaProfilerStop(); 100 | 101 | std::cout<<"Final modularity: "< 20 | #include "test_opt_utils.h" 21 | #include "graph_utils.cuh" 22 | #include "louvain.cuh" 23 | #include "gtest/gtest.h" 24 | #include "high_res_clock.h" 25 | #include "util.cuh" 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | template< typename IdxIter, typename ValIter, typename ValType > 34 | __global__ void kernal_k_in_test(const int size, IdxIter csr_ptr_iter, IdxIter csr_ind_iter, ValIter csr_val_iter, IdxIter cluster_iter, int i, ValType* result){ 35 | /* 36 | 37 | //printf("successfully launch kernal\n"); 38 | 39 | int idx_x = blockDim.x*blockIdx.x + threadIdx.x; 40 | int idx_y = blockDim.y*blockIdx.y + threadIdx.y; 41 | 42 | if(idx_x < size && idx_y < size ){ 43 | 44 | int c = *( cluster_iter + idx_y); 45 | //printf(" ** %d %d\n", idx_x, idx_y); 46 | //printf("start compute k with iter passing. (%d, %d, %d) idx = %d %f\n", blockDim.x, blockIdx.x, threadIdx.x, idx, result[idx]); 47 | nvlouvain::compute_k_i_in(size, csr_ptr_iter, csr_ind_iter, csr_val_iter, cluster_iter, c, idx_x, &result[idx_x *size + idx_y ]); 48 | // n_vertex, csr_ptr_iter, csr_idx_iter, csr_val_iter, cluster_iter, c, i, result 49 | printf("k_%d_in_c%d = %f\n", idx_x, idx_y ,result[idx_x *size + idx_y]); 50 | 51 | } 52 | */ 53 | /* 54 | if(idx == 0){ 55 | nvlouvain::display_vec(csr_ptr_iter, size); 56 | nvlouvain::display_vec(csr_ind_iter, csr_ptr_iter[size]); 57 | nvlouvain::display_vec(csr_val_iter, csr_ptr_iter[size]); 58 | 59 | } 60 | */ 61 | return; 62 | 63 | } 64 | 65 | 66 | void k_i_in_compute_test( thrust::device_vector &csr_ptr_d, 67 | thrust::device_vector &csr_ind_d, 68 | thrust::device_vector &csr_val_d, 69 | int size){ 70 | 71 | HighResClock hr_clock; 72 | double timed; 73 | 74 | 75 | dim3 block_size((size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, (size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, 1); 76 | dim3 grid_size(BLOCK_SIZE_2D, BLOCK_SIZE_2D, 1); 77 | 78 | std::cout<< csr_ptr_d.size()<<" "< result_d(size * size); 80 | thrust::device_vector cluster_d(size); 81 | 82 | T* result_ptr = thrust::raw_pointer_cast(result_d.data()); 83 | 84 | 85 | hr_clock.start(); 86 | int i = 0; 87 | std::cout<<"successfully declair device vector.\n"; 88 | kernal_k_in_test<<>>(size, csr_ptr_d.begin(), csr_ind_d.begin(), csr_val_d.begin(), cluster_d.begin(), i, result_ptr); 89 | CUDA_CALL(cudaDeviceSynchronize()); 90 | 91 | hr_clock.stop(&timed); 92 | double iter_time(timed); 93 | nvlouvain::display_vec(result_d); 94 | 95 | std::cout<<"k_i_in runtime: "< &csr_ptr_d, 102 | thrust::device_vector &csr_ind_d, 103 | thrust::device_vector &csr_val_d, 104 | int size){ 105 | for_each_n() 106 | }*/ 107 | -------------------------------------------------------------------------------- /cpp/include/nvgraph_cublas.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include 20 | #include 21 | #include "debug_macros.h" 22 | 23 | namespace nvgraph 24 | { 25 | class Cublas; 26 | 27 | class Cublas 28 | { 29 | private: 30 | static cublasHandle_t m_handle; 31 | // Private ctor to prevent instantiation. 32 | Cublas(); 33 | ~Cublas(); 34 | public: 35 | 36 | // Get the handle. 37 | static cublasHandle_t get_handle() 38 | { 39 | if (m_handle == 0) 40 | CHECK_CUBLAS(cublasCreate(&m_handle)); 41 | return m_handle; 42 | } 43 | 44 | static void destroy_handle() 45 | { 46 | if (m_handle != 0) 47 | CHECK_CUBLAS(cublasDestroy(m_handle)); 48 | m_handle = 0; 49 | } 50 | 51 | static void set_pointer_mode_device(); 52 | static void set_pointer_mode_host(); 53 | static void setStream(cudaStream_t stream) 54 | { 55 | cublasHandle_t handle = Cublas::get_handle(); 56 | CHECK_CUBLAS(cublasSetStream(handle, stream)); 57 | } 58 | 59 | template 60 | static void axpy(int n, T alpha, 61 | const T* x, int incx, 62 | T* y, int incy); 63 | 64 | template 65 | static void copy(int n, const T* x, int incx, 66 | T* y, int incy); 67 | 68 | template 69 | static void dot(int n, const T* x, int incx, 70 | const T* y, int incy, 71 | T* result); 72 | 73 | template 74 | static void gemv(bool transposed, int m, int n, 75 | const T* alpha, const T* A, int lda, 76 | const T* x, int incx, 77 | const T* beta, T* y, int incy); 78 | 79 | template 80 | static void gemv_ext(bool transposed, const int m, const int n, 81 | const T* alpha, const T* A, const int lda, 82 | const T* x, const int incx, 83 | const T* beta, T* y, const int incy, const int offsetx, const int offsety, const int offseta); 84 | 85 | template 86 | static void trsv_v2( cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, 87 | const T *A, int lda, T *x, int incx, int offseta); 88 | 89 | template 90 | static void ger(int m, int n, const T* alpha, 91 | const T* x, int incx, 92 | const T* y, int incy, 93 | T* A, int lda); 94 | 95 | template 96 | static T nrm2(int n, const T* x, int incx); 97 | template 98 | static void nrm2(int n, const T* x, int incx, T* result); 99 | 100 | template 101 | static void scal(int n, T alpha, T* x, int incx); 102 | template 103 | static void scal(int n, T* alpha, T* x, int incx); 104 | 105 | template 106 | static void gemm(bool transa, bool transb, int m, int n, int k, 107 | const T * alpha, const T * A, int lda, 108 | const T * B, int ldb, 109 | const T * beta, T * C, int ldc); 110 | 111 | template 112 | static void geam(bool transa, bool transb, int m, int n, 113 | const T * alpha, const T * A, int lda, 114 | const T * beta, const T * B, int ldb, 115 | T * C, int ldc); 116 | 117 | }; 118 | 119 | } // end namespace nvgraph 120 | 121 | -------------------------------------------------------------------------------- /external/cub_semiring/util_macro.cuh: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2011, Duane Merrill. All rights reserved. 3 | * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * * Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * * Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * * Neither the name of the NVIDIA CORPORATION nor the 13 | * names of its contributors may be used to endorse or promote products 14 | * derived from this software without specific prior written permission. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY 20 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | * 27 | ******************************************************************************/ 28 | 29 | /****************************************************************************** 30 | * Common C/C++ macro utilities 31 | ******************************************************************************/ 32 | 33 | #pragma once 34 | 35 | #include "util_namespace.cuh" 36 | 37 | /// Optional outer namespace(s) 38 | CUB_NS_PREFIX 39 | 40 | /// CUB namespace 41 | namespace cub { 42 | 43 | 44 | /** 45 | * \addtogroup UtilModule 46 | * @{ 47 | */ 48 | 49 | #ifndef CUB_ALIGN 50 | #if defined(_WIN32) || defined(_WIN64) 51 | /// Align struct 52 | #define CUB_ALIGN(bytes) __declspec(align(32)) 53 | #else 54 | /// Align struct 55 | #define CUB_ALIGN(bytes) __attribute__((aligned(bytes))) 56 | #endif 57 | #endif 58 | 59 | #ifndef CUB_MAX 60 | /// Select maximum(a, b) 61 | #define CUB_MAX(a, b) (((b) > (a)) ? (b) : (a)) 62 | #endif 63 | 64 | #ifndef CUB_MIN 65 | /// Select minimum(a, b) 66 | #define CUB_MIN(a, b) (((b) < (a)) ? (b) : (a)) 67 | #endif 68 | 69 | #ifndef CUB_QUOTIENT_FLOOR 70 | /// Quotient of x/y rounded down to nearest integer 71 | #define CUB_QUOTIENT_FLOOR(x, y) ((x) / (y)) 72 | #endif 73 | 74 | #ifndef CUB_QUOTIENT_CEILING 75 | /// Quotient of x/y rounded up to nearest integer 76 | #define CUB_QUOTIENT_CEILING(x, y) (((x) + (y) - 1) / (y)) 77 | #endif 78 | 79 | #ifndef CUB_ROUND_UP_NEAREST 80 | /// x rounded up to the nearest multiple of y 81 | #define CUB_ROUND_UP_NEAREST(x, y) ((((x) + (y) - 1) / (y)) * y) 82 | #endif 83 | 84 | #ifndef CUB_ROUND_DOWN_NEAREST 85 | /// x rounded down to the nearest multiple of y 86 | #define CUB_ROUND_DOWN_NEAREST(x, y) (((x) / (y)) * y) 87 | #endif 88 | 89 | 90 | #ifndef CUB_STATIC_ASSERT 91 | #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document 92 | #define CUB_CAT_(a, b) a ## b 93 | #define CUB_CAT(a, b) CUB_CAT_(a, b) 94 | #endif // DOXYGEN_SHOULD_SKIP_THIS 95 | 96 | /// Static assert 97 | #define CUB_STATIC_ASSERT(cond, msg) typedef int CUB_CAT(cub_static_assert, __LINE__)[(cond) ? 1 : -1] 98 | #endif 99 | 100 | /** @} */ // end group UtilModule 101 | 102 | } // CUB namespace 103 | CUB_NS_POSTFIX // Optional outer namespace(s) 104 | -------------------------------------------------------------------------------- /cpp/include/test/thrust_test.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include 19 | #include "test_opt_utils.h" 20 | #include "graph_utils.cuh" 21 | #include "louvain.cuh" 22 | #include "gtest/gtest.h" 23 | #include "high_res_clock.h" 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | 32 | 33 | 34 | template 35 | __global__ void test_sum(iter begin, iter end, ptr sum){ 36 | 37 | thrust::plus op; 38 | *sum = thrust::reduce(thrust::cuda::par, begin, end, 0.0, op); 39 | 40 | } 41 | 42 | __global__ void test_sum_cast(T* vec, size_t size, T* sum){ 43 | 44 | thrust::plus op; 45 | *sum = thrust::reduce(thrust::cuda::par, vec, vec+size, 0.0, op); 46 | 47 | } 48 | 49 | 50 | void thrust_passing_arg_test( thrust::host_vector &csr_ptr_h, 51 | thrust::host_vector &csr_ind_h, 52 | thrust::host_vector &csr_val_h, 53 | thrust::device_vector &csr_ptr_d, 54 | thrust::device_vector &csr_ind_d, 55 | thrust::device_vector &csr_val_d){ 56 | 57 | HighResClock hr_clock; 58 | double timed; 59 | 60 | thrust::plus binary_op; 61 | hr_clock.start(); 62 | T sum_h = thrust::reduce(csr_val_h.begin(), csr_val_h.end(), 0.0, binary_op); 63 | hr_clock.stop(&timed); 64 | double cpu_time(timed); 65 | 66 | 67 | 68 | thrust::copy(csr_val_d.begin(), csr_val_d.end(), std::ostream_iterator(std::cout, " ")); 69 | std::cout< sum_d(1, 0.0); 86 | test_sum<<>>( csr_val_d.begin(),csr_val_d.end(), sum_d.data()); 87 | CUDA_CALL(cudaDeviceSynchronize()); 88 | hr_clock.stop(&timed); 89 | double cuda_time(timed); 90 | 91 | 92 | hr_clock.start(); 93 | cudaStream_t s; 94 | thrust::device_vector sum_a(1, 0.0); 95 | cudaStreamCreate(&s); 96 | test_sum<<<1,1,0,s>>>(csr_val_d.begin(),csr_val_d.end(), sum_a.data()); 97 | cudaStreamSynchronize(s); 98 | hr_clock.stop(&timed); 99 | double asyn_time(timed); 100 | 101 | 102 | 103 | hr_clock.start(); 104 | T* csr_val_ptr = thrust::raw_pointer_cast(csr_val_d.data()); 105 | double* raw_sum; 106 | double sum_cast; 107 | cudaMalloc((void **) &raw_sum, sizeof(double)); 108 | test_sum_cast<<>>( csr_val_ptr, csr_val_d.size(), raw_sum); 109 | cudaMemcpy(&sum_cast, raw_sum, sizeof(double),cudaMemcpyDeviceToHost); 110 | CUDA_CALL(cudaDeviceSynchronize()); 111 | hr_clock.stop(&timed); 112 | double cast_time(timed); 113 | cudaFree(raw_sum); 114 | 115 | 116 | 117 | 118 | std::cout<<"cpu sum of val: "<< sum_h <<" runtime: "< 17 | #include 18 | #include 19 | #include 20 | #include "test_opt_utils.cuh" 21 | #include "graph_utils.cuh" 22 | 23 | //#define ENABLE_LOG true 24 | #define ENALBE_LOUVAIN true 25 | 26 | #include "nvlouvain.cuh" 27 | #include "gtest/gtest.h" 28 | #include "high_res_clock.h" 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | using T = double; 38 | 39 | int main(int argc, char* argv[]){ 40 | 41 | if(argc < 2) 42 | { 43 | std::cout<< "Help : ./louvain_test matrix_market_file.mtx"<(fin, 1, &mc, &m, &k, &nnz)) ,0); 53 | EXPECT_EQ(m,k); 54 | 55 | thrust::host_vector coo_ind_h(nnz); 56 | thrust::host_vector csr_ptr_h(m+1); 57 | thrust::host_vector csr_ind_h(nnz); 58 | thrust::host_vector csr_val_h(nnz); 59 | 60 | EXPECT_EQ( (mm_to_coo(fin, 1, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL)), 0); 61 | EXPECT_EQ( (coo_to_csr (m, k, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL, &csr_ptr_h[0], NULL, NULL, NULL)), 0); 62 | 63 | EXPECT_EQ(fclose(fin),0); 64 | 65 | thrust::device_vector csr_ptr_d(csr_ptr_h); 66 | thrust::device_vector csr_ind_d(csr_ind_h); 67 | thrust::device_vector csr_val_d(csr_val_h); 68 | 69 | thrust::device_vector tmp_1(nnz); 70 | thrust::fill(thrust::cuda::par, tmp_1.begin(), tmp_1.end(), 1.0); 71 | thrust::device_vector::iterator max_ele = thrust::max_element(thrust::cuda::par, csr_val_d.begin(), csr_val_d.end()); 72 | 73 | bool weighted = (*max_ele!=1.0); 74 | 75 | //std::cout<<(weighted?"Weighted ":"Not Weigthed ")<<" n_vertex: "< cluster_d(m, 0); 84 | std::vector< std::vector > best_cluster_vec; 85 | int* csr_ptr_ptr = thrust::raw_pointer_cast(csr_ptr_d.data()); 86 | int* csr_ind_ptr = thrust::raw_pointer_cast(csr_ind_d.data()); 87 | T* csr_val_ptr = thrust::raw_pointer_cast(csr_val_d.data()); 88 | int* init_cluster_ptr = thrust::raw_pointer_cast(cluster_d.data()); 89 | int num_level; 90 | 91 | cudaProfilerStart(); 92 | hr_clock.start(); 93 | 94 | nvlouvain::louvain(csr_ptr_ptr, csr_ind_ptr, csr_val_ptr, 95 | m, nnz, 96 | weighted, has_init_cluster, 97 | init_cluster_ptr, final_modulartiy, best_cluster_vec, num_level); 98 | 99 | hr_clock.stop(&louvain_time); 100 | cudaProfilerStop(); 101 | 102 | std::cout<<"Final modularity: "<::iterator it = best_cluster_vec[i].begin(); it != best_cluster_vec[i].end(); ++it) 108 | // std::cout << *it <<' '; 109 | // std::cout << std::endl; 110 | //} 111 | } 112 | return 0; 113 | } 114 | 115 | -------------------------------------------------------------------------------- /cpp/include/nvgraph_vector.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | #include 19 | #include "nvgraph_error.hxx" 20 | #include "nvgraph_vector_kernels.hxx" 21 | 22 | #include "debug_macros.h" 23 | 24 | namespace nvgraph 25 | { 26 | 27 | /*! A Vector contains a device vector of size |E| and type T 28 | */ 29 | template 30 | class Vector 31 | { 32 | public: 33 | //typedef IndexType_ IndexType; 34 | typedef ValueType_ ValueType; 35 | 36 | protected: 37 | /*! Storage for the values. 38 | */ 39 | SHARED_PREFIX::shared_ptr values; 40 | 41 | /*! Size of the array 42 | */ 43 | size_t size; 44 | 45 | /*! Storage for a cuda stream 46 | */ 47 | //, cudaStream_t stream = 0 48 | 49 | public: 50 | 51 | /*! Construct an empty \p Vector. 52 | */ 53 | Vector(void) {} 54 | ~Vector(void) {} 55 | /*! Construct a \p Vector of size vertices. 56 | * 57 | * \param vertices The size of the Vector 58 | */ 59 | Vector(size_t vertices, cudaStream_t stream = 0) 60 | : values(allocateDevice(vertices, stream)), 61 | size(vertices) {} 62 | 63 | 64 | size_t get_size() const { return size; } 65 | size_t bytes() const { return size*sizeof(ValueType);} 66 | ValueType* raw() const { return values.get(); } 67 | //cudaStream_t get_stream() const { return stream_; } 68 | void allocate(size_t n, cudaStream_t stream = 0) 69 | { 70 | size = n; 71 | values = allocateDevice(n, stream); 72 | } 73 | 74 | void attach(size_t n, ValueType* vals, cudaStream_t stream = 0) 75 | { 76 | size = n; 77 | values = attachDevicePtr(vals, stream); 78 | } 79 | 80 | Vector(size_t vertices, ValueType * vals, cudaStream_t stream = 0) 81 | : values(attachDevicePtr(vals, stream)), 82 | size(vertices) {} 83 | 84 | void fill(ValueType val, cudaStream_t stream = 0) 85 | { 86 | fill_raw_vec(this->raw(), this->get_size(), val, stream); 87 | } 88 | void copy(Vector &vec1, cudaStream_t stream = 0) 89 | { 90 | if (this->get_size() == 0 && vec1.get_size()>0) 91 | { 92 | allocate(vec1.get_size(), stream); 93 | copy_vec(vec1.raw(), this->get_size(), this->raw(), stream); 94 | } 95 | else if (this->get_size() == vec1.get_size()) 96 | copy_vec(vec1.raw(), this->get_size(), this->raw(), stream); 97 | else if (this->get_size() > vec1.get_size()) 98 | { 99 | //COUT() << "Warning Copy : sizes mismatch "<< this->get_size() <<':'<< vec1.get_size() <raw(), stream); 101 | //dump_raw_vec (this->raw(), vec1.get_size(), 0); 102 | } 103 | else 104 | { 105 | FatalError("Cannot copy a vector into a smaller one", NVGRAPH_ERR_BAD_PARAMETERS); 106 | } 107 | } 108 | void dump(size_t off, size_t sz, cudaStream_t stream = 0) 109 | { 110 | if ((off+sz)<= this->size) 111 | dump_raw_vec(this->raw(), sz, off, stream); 112 | else 113 | FatalError("Offset and Size values doesn't make sense", NVGRAPH_ERR_BAD_PARAMETERS); 114 | } 115 | void flag_zeros(Vector & flags, cudaStream_t stream = 0) 116 | { 117 | flag_zeros_raw_vec(this->get_size(), this->raw(), flags.raw(), stream); 118 | } 119 | 120 | ValueType nrm1(cudaStream_t stream = 0) 121 | { 122 | ValueType res = 0; 123 | nrm1_raw_vec(this->raw(), this->get_size(), &res, stream); 124 | return res; 125 | } 126 | }; // class Vector 127 | } // end namespace nvgraph 128 | 129 | -------------------------------------------------------------------------------- /cpp/include/nvgraph_convert.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | namespace nvgraph{ 23 | void csr2coo( const int *csrSortedRowPtr, 24 | int nnz, int m, 25 | int *cooRowInd, 26 | cusparseIndexBase_t idxBase); 27 | void coo2csr( const int *cooRowInd, 28 | int nnz, int m, 29 | int *csrSortedRowPtr, 30 | cusparseIndexBase_t idxBase ); 31 | 32 | void csr2csc( int m, int n, int nnz, 33 | const void *csrVal, const int *csrRowPtr, const int *csrColInd, 34 | void *cscVal, int *cscRowInd, int *cscColPtr, 35 | cusparseAction_t copyValues, cusparseIndexBase_t idxBase, 36 | cudaDataType_t *dataType); 37 | void csc2csr( int m, int n, int nnz, 38 | const void *cscVal, const int *cscRowInd, const int *cscColPtr, 39 | void *csrVal, int *csrRowPtr, int *csrColInd, 40 | cusparseAction_t copyValues, cusparseIndexBase_t idxBase, 41 | cudaDataType_t *dataType); 42 | 43 | void csr2cscP( int m, int n, int nnz, 44 | const int *csrRowPtr, const int *csrColInd, 45 | int *cscRowInd, int *cscColPtr, int *p, cusparseIndexBase_t idxBase); 46 | 47 | 48 | void cooSortBySource(int m, int n, int nnz, 49 | const void *srcVal, const int *srcRowInd, const int *srcColInd, 50 | void *dstVal, int *dstRowInd, int *dstColInd, 51 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 52 | void cooSortByDestination(int m, int n, int nnz, 53 | const void *srcVal, const int *srcRowInd, const int *srcColInd, 54 | void *dstVal, int *dstRowInd, int *dstColInd, 55 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 56 | 57 | void coos2csc(int m, int n, int nnz, 58 | const void *srcVal, const int *srcRowInd, const int *srcColInd, 59 | void *dstVal, int *dstRowInd, int *dstColInd, 60 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 61 | void cood2csr(int m, int n, int nnz, 62 | const void *srcVal, const int *srcRowInd, const int *srcColInd, 63 | void *dstVal, int *dstRowInd, int *dstColInd, 64 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 65 | void coou2csr(int m, int n, int nnz, 66 | const void *srcVal, const int *srcRowInd, const int *srcColInd, 67 | void *dstVal, int *dstRowInd, int *dstColInd, 68 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 69 | void coou2csc(int m, int n, int nnz, 70 | const void *srcVal, const int *srcRowInd, const int *srcColInd, 71 | void *dstVal, int *dstRowInd, int *dstColInd, 72 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 73 | 74 | ////////////////////////// Utility functions ////////////////////////// 75 | void createIdentityPermutation(int n, int *p); 76 | void gthrX(int nnz, const void *y, void *xVal, const int *xInd, 77 | cusparseIndexBase_t idxBase, cudaDataType_t *dataType); 78 | 79 | void cooSortBufferSize(int m, int n, int nnz, const int *cooRows, const int *cooCols, size_t *pBufferSizeInBytes); 80 | void cooGetSourcePermutation(int m, int n, int nnz, int *cooRows, int *cooCols, int *p, void *pBuffer); 81 | void cooGetDestinationPermutation(int m, int n, int nnz, int *cooRows, int *cooCols, int *p, void *pBuffer); 82 | 83 | void csr2csc2BufferSize(int m, int n, int nnz, const int *csrRowPtr, const int *csrColInd, size_t *pBufferSize); 84 | void csr2csc2(int m, int n, int nnz, 85 | const int *csrRowPtr, const int *csrColInd, 86 | int *cscRowInd, int *cscColPtr, int *p, void *pBuffer, 87 | cusparseIndexBase_t idxBase); 88 | 89 | } //end nvgraph namespace 90 | -------------------------------------------------------------------------------- /cpp/include/kmeans.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #pragma once 17 | 18 | #include "nvgraph_error.hxx" 19 | 20 | namespace nvgraph { 21 | 22 | /// Find clusters with k-means algorithm 23 | /** Initial centroids are chosen with k-means++ algorithm. Empty 24 | * clusters are reinitialized by choosing new centroids with 25 | * k-means++ algorithm. 26 | * 27 | * CNMEM must be initialized before calling this function. 28 | * 29 | * @param cublasHandle_t cuBLAS handle. 30 | * @param n Number of observation vectors. 31 | * @param d Dimension of observation vectors. 32 | * @param k Number of clusters. 33 | * @param tol Tolerance for convergence. k-means stops when the 34 | * change in residual divided by n is less than tol. 35 | * @param maxiter Maximum number of k-means iterations. 36 | * @param obs (Input, device memory, d*n entries) Observation 37 | * matrix. Matrix is stored column-major and each column is an 38 | * observation vector. Matrix dimensions are d x n. 39 | * @param codes (Output, device memory, n entries) Cluster 40 | * assignments. 41 | * @param residual On exit, residual sum of squares (sum of squares 42 | * of distances between observation vectors and centroids). 43 | * @param On exit, number of k-means iterations. 44 | * @return NVGRAPH error flag. 45 | */ 46 | template 47 | NVGRAPH_ERROR kmeans(IndexType_ n, IndexType_ d, IndexType_ k, 48 | ValueType_ tol, IndexType_ maxiter, 49 | const ValueType_ * __restrict__ obs, 50 | IndexType_ * __restrict__ codes, 51 | ValueType_ & residual, 52 | IndexType_ & iters); 53 | 54 | /// Find clusters with k-means algorithm 55 | /** Initial centroids are chosen with k-means++ algorithm. Empty 56 | * clusters are reinitialized by choosing new centroids with 57 | * k-means++ algorithm. 58 | * 59 | * @param n Number of observation vectors. 60 | * @param d Dimension of observation vectors. 61 | * @param k Number of clusters. 62 | * @param tol Tolerance for convergence. k-means stops when the 63 | * change in residual divided by n is less than tol. 64 | * @param maxiter Maximum number of k-means iterations. 65 | * @param obs (Input, device memory, d*n entries) Observation 66 | * matrix. Matrix is stored column-major and each column is an 67 | * observation vector. Matrix dimensions are d x n. 68 | * @param codes (Output, device memory, n entries) Cluster 69 | * assignments. 70 | * @param clusterSizes (Output, device memory, k entries) Number of 71 | * points in each cluster. 72 | * @param centroids (Output, device memory, d*k entries) Centroid 73 | * matrix. Matrix is stored column-major and each column is a 74 | * centroid. Matrix dimensions are d x k. 75 | * @param work (Output, device memory, n*max(k,d) entries) 76 | * Workspace. 77 | * @param work_int (Output, device memory, 2*d*n entries) 78 | * Workspace. 79 | * @param residual_host (Output, host memory, 1 entry) Residual sum 80 | * of squares (sum of squares of distances between observation 81 | * vectors and centroids). 82 | * @param iters_host (Output, host memory, 1 entry) Number of 83 | * k-means iterations. 84 | * @return NVGRAPH error flag. 85 | */ 86 | template 87 | NVGRAPH_ERROR kmeans(IndexType_ n, IndexType_ d, IndexType_ k, 88 | ValueType_ tol, IndexType_ maxiter, 89 | const ValueType_ * __restrict__ obs, 90 | IndexType_ * __restrict__ codes, 91 | IndexType_ * __restrict__ clusterSizes, 92 | ValueType_ * __restrict__ centroids, 93 | ValueType_ * __restrict__ work, 94 | IndexType_ * __restrict__ work_int, 95 | ValueType_ * residual_host, 96 | IndexType_ * iters_host); 97 | 98 | } 99 | 100 | -------------------------------------------------------------------------------- /cpp/include/partition.hxx: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #pragma once 18 | 19 | #include "nvgraph_error.hxx" 20 | #include "valued_csr_graph.hxx" 21 | #include "matrix.hxx" 22 | 23 | 24 | namespace nvgraph { 25 | #define SPECTRAL_USE_COLORING true 26 | 27 | #define SPECTRAL_USE_LOBPCG true 28 | #define SPECTRAL_USE_PRECONDITIONING true 29 | #define SPECTRAL_USE_SCALING_OF_EIGVECS false 30 | 31 | #define SPECTRAL_USE_MAGMA false 32 | #define SPECTRAL_USE_THROTTLE true 33 | #define SPECTRAL_USE_NORMALIZED_LAPLACIAN true 34 | #define SPECTRAL_USE_R_ORTHOGONALIZATION false 35 | 36 | /// Spectral graph partition 37 | /** Compute partition for a weighted undirected graph. This 38 | * partition attempts to minimize the cost function: 39 | * Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition) 40 | * 41 | * @param G Weighted graph in CSR format 42 | * @param nParts Number of partitions. 43 | * @param nEigVecs Number of eigenvectors to compute. 44 | * @param maxIter_lanczos Maximum number of Lanczos iterations. 45 | * @param restartIter_lanczos Maximum size of Lanczos system before 46 | * implicit restart. 47 | * @param tol_lanczos Convergence tolerance for Lanczos method. 48 | * @param maxIter_kmeans Maximum number of k-means iterations. 49 | * @param tol_kmeans Convergence tolerance for k-means algorithm. 50 | * @param parts (Output, device memory, n entries) Partition 51 | * assignments. 52 | * @param iters_lanczos On exit, number of Lanczos iterations 53 | * performed. 54 | * @param iters_kmeans On exit, number of k-means iterations 55 | * performed. 56 | * @return NVGRAPH error flag. 57 | */ 58 | template 59 | NVGRAPH_ERROR partition( ValuedCsrGraph& G, 60 | IndexType_ nParts, 61 | IndexType_ nEigVecs, 62 | IndexType_ maxIter_lanczos, 63 | IndexType_ restartIter_lanczos, 64 | ValueType_ tol_lanczos, 65 | IndexType_ maxIter_kmeans, 66 | ValueType_ tol_kmeans, 67 | IndexType_ * __restrict__ parts, 68 | Vector &eigVals, 69 | Vector &eigVecs, 70 | IndexType_ & iters_lanczos, 71 | IndexType_ & iters_kmeans); 72 | 73 | template 74 | NVGRAPH_ERROR partition_lobpcg( ValuedCsrGraph& G, Matrix * M, cusolverDnHandle_t cusolverHandle, 75 | IndexType_ nParts, 76 | IndexType_ nEigVecs, 77 | IndexType_ maxIter_lanczos, 78 | ValueType_ tol_lanczos, 79 | IndexType_ maxIter_kmeans, 80 | ValueType_ tol_kmeans, 81 | IndexType_ * __restrict__ parts, 82 | Vector &eigVals, 83 | Vector &eigVecs, 84 | IndexType_ & iters_lanczos, 85 | IndexType_ & iters_kmeans); 86 | 87 | 88 | /// Compute cost function for partition 89 | /** This function determines the edges cut by a partition and a cost 90 | * function: 91 | * Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition) 92 | * Graph is assumed to be weighted and undirected. 93 | * 94 | * @param G Weighted graph in CSR format 95 | * @param nParts Number of partitions. 96 | * @param parts (Input, device memory, n entries) Partition 97 | * assignments. 98 | * @param edgeCut On exit, weight of edges cut by partition. 99 | * @param cost On exit, partition cost function. 100 | * @return NVGRAPH error flag. 101 | */ 102 | template 103 | NVGRAPH_ERROR analyzePartition(ValuedCsrGraph & G, 104 | IndexType_ nParts, 105 | const IndexType_ * __restrict__ parts, 106 | ValueType_ & edgeCut, ValueType_ & cost); 107 | 108 | } 109 | 110 | -------------------------------------------------------------------------------- /cpp/include/stacktrace.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019, NVIDIA CORPORATION. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | //adapted from https://idlebox.net/2008/0901-stacktrace-demangled/ and licensed under WTFPL v2.0 18 | #pragma once 19 | 20 | #if defined(_WIN32) || defined (__ANDROID__) || defined(ANDROID) || defined (__QNX__) || defined (__QNXNTO__) 21 | #else 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #endif 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | namespace nvgraph { 34 | 35 | /** Print a demangled stack backtrace of the caller function to FILE* out. */ 36 | static inline void printStackTrace(std::ostream &eout = std::cerr, unsigned int max_frames = 63) 37 | { 38 | #if defined(_WIN32) || defined (__ANDROID__) || defined(ANDROID) || defined (__QNX__) || defined (__QNXNTO__) 39 | //TODO add code for windows stack trace and android stack trace 40 | #else 41 | std::stringstream out; 42 | 43 | // storage array for stack trace address data 44 | void* addrlist[max_frames+1]; 45 | 46 | // retrieve current stack addresses 47 | int addrlen = backtrace(addrlist, sizeof(addrlist) / sizeof(void*)); 48 | 49 | if (addrlen == 0) { 50 | out << " \n"; 51 | return; 52 | } 53 | 54 | // resolve addresses into strings containing "filename(function+address)", 55 | // this array must be free()-ed 56 | char** symbollist = backtrace_symbols(addrlist, addrlen); 57 | 58 | // allocate string which will be filled with the demangled function name 59 | size_t funcnamesize = 256; 60 | char* funcname = (char*)malloc(funcnamesize); 61 | 62 | // iterate over the returned symbol lines. skip the first, it is the 63 | // address of this function. 64 | for (int i = 1; i < addrlen; i++) 65 | { 66 | char *begin_name = 0, *begin_offset = 0, *end_offset = 0; 67 | 68 | // find parentheses and +address offset surrounding the mangled name: 69 | // ./module(function+0x15c) [0x8048a6d] 70 | for (char *p = symbollist[i]; *p; ++p) 71 | { 72 | if (*p == '(') 73 | begin_name = p; 74 | else if (*p == '+') 75 | begin_offset = p; 76 | else if (*p == ')' && begin_offset) { 77 | end_offset = p; 78 | break; 79 | } 80 | } 81 | 82 | if (begin_name && begin_offset && end_offset 83 | && begin_name < begin_offset) 84 | { 85 | *begin_name++ = '\0'; 86 | *begin_offset++ = '\0'; 87 | *end_offset = '\0'; 88 | 89 | // mangled name is now in [begin_name, begin_offset) and caller 90 | // offset in [begin_offset, end_offset). now apply 91 | // __cxa_demangle(): 92 | 93 | int status; 94 | char* ret = abi::__cxa_demangle(begin_name, 95 | funcname, &funcnamesize, &status); 96 | if (status == 0) { 97 | funcname = ret; // use possibly realloc()-ed string 98 | out << " " << symbollist[i] << " : " << funcname << "+" << begin_offset << "\n"; 99 | } 100 | else { 101 | // demangling failed. Output function name as a C function with 102 | // no arguments. 103 | out << " " << symbollist[i] << " : " << begin_name << "()+" << begin_offset << "\n"; 104 | } 105 | } 106 | else 107 | { 108 | // couldn't parse the line? print the whole line. 109 | out << " " << symbollist[i] << "\n"; 110 | } 111 | } 112 | eout << out.str(); 113 | //error_output(out.str().c_str(),out.str().size()); 114 | free(funcname); 115 | free(symbollist); 116 | //printf("PID of failing process: %d\n",getpid()); 117 | //while(1); 118 | #endif 119 | } 120 | 121 | } //end namespace nvgraph 122 | 123 | --------------------------------------------------------------------------------