├── test
    ├── local_test_data
    │   ├── small.PNG
    │   ├── small_T.bin
    │   ├── small.mtx
    │   └── small_T.mtx
    ├── run_all_tests.sh
    ├── ref
    │   ├── nerstrand
    │   │   ├── Makefile
    │   │   ├── nestrand.sh
    │   │   ├── README.txt
    │   │   └── nerstrand_driver.cpp
    │   ├── cpu_ref_SSSP.py
    │   ├── cpu_ref_widest.py
    │   ├── ref_sssp_BGL.cpp
    │   └── cpu_ref_pagerank.py
    ├── generators
    │   ├── convertors
    │   │   ├── Makefile
    │   │   ├── pprocess.sh
    │   │   ├── README.txt
    │   │   ├── sort_eges.cpp
    │   │   ├── edges_to_H.cpp
    │   │   └── H_to_HtSorted_and_a.cpp
    │   ├── Makefile
    │   ├── rmat.cpp
    │   └── plod.cpp
    ├── Makefile
    ├── data_gen.sh
    └── log_converter.py
├── .gitmodules
├── cpp
    ├── cmake
    │   ├── Templates
    │   │   └── GoogleTest.CMakeLists.txt.cmake
    │   └── Modules
    │   │   └── ConfigureGoogleTest.cmake
    ├── include
    │   ├── pagerank_kernels.hxx
    │   ├── jaccard_gpu.cuh
    │   ├── debug_help.h
    │   ├── lobpcg.hxx
    │   ├── async_event.hxx
    │   ├── graph_visitors.hxx
    │   ├── async_event.cuh
    │   ├── thrust_traits.hxx
    │   ├── nvgraphP.h
    │   ├── triangles_counting_kernels.hxx
    │   ├── nvgraph_vector_kernels.hxx
    │   ├── high_res_clock.h
    │   ├── triangles_counting.hxx
    │   ├── debug_macros.h
    │   ├── csrmv_cub.h
    │   ├── nvgraph_lapack.hxx
    │   ├── size2_selector.hxx
    │   ├── cnmem_shared_ptr.hxx
    │   ├── triangles_counting_defines.hxx
    │   ├── sssp.hxx
    │   ├── widest_path.hxx
    │   ├── test
    │   │   ├── delta_modularity_test.cuh
    │   │   ├── mem_test.cuh
    │   │   ├── k_in_test.cuh
    │   │   └── thrust_test.cuh
    │   ├── modularity_maximization.hxx
    │   ├── bfs2d.hxx
    │   ├── valued_csr_graph.hxx
    │   ├── nvgraph_csrmv.hxx
    │   ├── graph.hxx
    │   ├── pagerank.hxx
    │   ├── bfs.hxx
    │   ├── app
    │   │   ├── nvlouvain_app.cu
    │   │   └── nvlouvain_app_hierarchy.cu
    │   ├── nvgraph_cublas.hxx
    │   ├── nvgraph_vector.hxx
    │   ├── nvgraph_convert.hxx
    │   ├── kmeans.hxx
    │   ├── partition.hxx
    │   └── stacktrace.h
    ├── src
    │   ├── csr_graph.cpp
    │   ├── valued_csr_graph.cpp
    │   ├── nvgraph_error.cu
    │   ├── graph_contraction
    │   │   ├── contraction_csr_max.cu
    │   │   ├── contraction_csr_min.cu
    │   │   ├── contraction_csr_sum.cu
    │   │   ├── contraction_csr_mul.cu
    │   │   ├── contraction_mv_float_max.cu
    │   │   ├── contraction_mv_float_min.cu
    │   │   ├── contraction_mv_float_sum.cu
    │   │   ├── contraction_mv_double_max.cu
    │   │   ├── contraction_mv_double_min.cu
    │   │   ├── contraction_mv_double_sum.cu
    │   │   ├── contraction_mv_float_mul.cu
    │   │   └── contraction_mv_double_mul.cu
    │   ├── pagerank_kernels.cu
    │   └── graph_extractor.cu
    └── tests
    │   ├── benchmarkScripts
    │       ├── run_graphMat.sh
    │       ├── run_galois.sh
    │       └── run_nvgraph.sh
    │   ├── 2d_partitioning_test.cpp
    │   └── nvgraph_test_common.h
├── conda-recipes
    └── nvgraph
    │   ├── build.sh
    │   └── meta.yaml
├── Acknowledgements.md
└── external
    └── cub_semiring
        ├── util_namespace.cuh
        ├── block
            └── specializations
            │   └── block_histogram_atomic.cuh
        ├── cub.cuh
        └── util_macro.cuh


/test/local_test_data/small.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rapidsai/nvgraph/HEAD/test/local_test_data/small.PNG


--------------------------------------------------------------------------------
/test/local_test_data/small_T.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rapidsai/nvgraph/HEAD/test/local_test_data/small_T.bin


--------------------------------------------------------------------------------
/test/local_test_data/small.mtx:
--------------------------------------------------------------------------------
 1 | %%MatrixMarket matrix coordinate real general
 2 | 6 6 10
 3 | 1 2 0.500000
 4 | 1 3 0.500000
 5 | 3 1 0.333333
 6 | 3 2 0.333333
 7 | 3 5 0.333333
 8 | 4 5 0.500000
 9 | 4 6 0.500000
10 | 5 4 0.500000
11 | 5 6 0.500000
12 | 6 4 1.000000
13 | 


--------------------------------------------------------------------------------
/test/local_test_data/small_T.mtx:
--------------------------------------------------------------------------------
 1 | %%MatrixMarket matrix coordinate real general
 2 | %%AMGX rhs
 3 | 6 6 10
 4 | 1 3 0.333333000
 5 | 2 1 0.500000000
 6 | 2 3 0.333333000
 7 | 3 1 0.500000000
 8 | 4 5 0.500000000
 9 | 4 6 1.000000000
10 | 5 3 0.333333000
11 | 5 4 0.500000000
12 | 6 4 0.500000000
13 | 6 5 0.500000000
14 | 0
15 | 1
16 | 0
17 | 0
18 | 0
19 | 0
20 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "cpp/thirdparty/cnmem"]
 2 | 	path = cpp/thirdparty/cnmem
 3 | 	url = https://github.com/NVIDIA/cnmem.git
 4 | [submodule "cpp/nvgraph/cpp/thirdparty/cub"]
 5 | 	path = cpp/thirdparty/cub
 6 | 	url = https://github.com/NVlabs/cub.git
 7 | [submodule "cpp/nvgraph/external/cusp"]
 8 | 	path = external/cusp
 9 | 	url = https://github.com/cusplibrary/cusplibrary.git
10 | 


--------------------------------------------------------------------------------
/test/run_all_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #Usage sh run_all_tests.sh
 3 | #Run all the tests in the current directory (ie. you should copy it in your build/test/ directory).
 4 | test="nvgraph_test
 5 | csrmv_test
 6 | semiring_maxmin_test
 7 | semiring_minplus_test
 8 | semiring_orand_test
 9 | pagerank_test
10 | sssp_test
11 | max_flow_test"
12 | 
13 | for i in $test
14 | do
15 | ./$i
16 | done
17 | 


--------------------------------------------------------------------------------
/test/ref/nerstrand/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | CFLAGS=-O3 -fopenmp 
 3 | LDFLAGS=-I. -L. libnerstrand.a
 4 | EXEC=nerstrand_bench
 5 | SOURCES=nerstrand_driver.cpp mmio.cpp
 6 | OBJECTS=$(SOURCES:.cpp=.o)
 7 | 
 8 | $(EXEC): $(OBJECTS)
 9 | 	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
10 | 
11 | mmio.o: mmio.cpp mmio.h
12 | 	$(CC) $(CFLAGS) -c $<
13 | 
14 | nerstand_driver.o: nerstand_driver.cpp mmio.h
15 | 	$(CC) $(CFLAGS) -c $<
16 | clean:
17 | 	rm *.o


--------------------------------------------------------------------------------
/test/generators/convertors/Makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | CFLAGS=-O3 -march=native -pipe -w
 3 | LDFLAGS=-lm
 4 | 
 5 | all: sort HTA H mtob
 6 | 
 7 | sort: sort_eges.cpp
 8 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 
 9 | 
10 | HTA: H_to_HtSorted_and_a.cpp
11 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 
12 |  
13 | H: edges_to_H.cpp
14 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 
15 | 
16 | mtob: binary_converter.cpp
17 | 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ 
18 | 
19 | clean: 
20 | 	rm sort HTA mtob
21 | 
22 | 


--------------------------------------------------------------------------------
/test/generators/convertors/pprocess.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | edges="$1"
 4 | echo "Starting Sort on $edges..."
 5 | ./sort $edges
 6 | echo "Done"
 7 | 
 8 | tmp="_s"
 9 | sedges=$edges$tmp
10 | echo "Starting H on $sedges ..."
11 | ./H $sedges
12 | echo "Done"
13 | 
14 | tmp="_mtx"
15 | matrix=$sedges$tmp
16 | #delete soted edges
17 | rm $sedges
18 | 
19 | echo "Starting HTa on $matrix ..."
20 | ./HTA $matrix
21 | 
22 | tmp="_T"
23 | outp=$edges$tmp
24 | outpp=$matrix$tmp
25 | mv $outpp $outp
26 | #delete H
27 | rm $matrix
28 | 
29 | echo "Starting binary conversion ..."
30 | ./mtob $outp
31 | echo "Done"
32 | 
33 | 


--------------------------------------------------------------------------------
/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake:
--------------------------------------------------------------------------------
 1 |   
 2 | cmake_minimum_required(VERSION 3.12)
 3 | 
 4 | include(ExternalProject)
 5 | 
 6 | ExternalProject_Add(GoogleTest
 7 |                     GIT_REPOSITORY    https://github.com/google/googletest.git
 8 |                     GIT_TAG           release-1.8.0
 9 |                     SOURCE_DIR        "${GTEST_ROOT}/googletest"
10 |                     BINARY_DIR        "${GTEST_ROOT}/build"
11 |                     INSTALL_DIR		  "${GTEST_ROOT}/install"
12 |                     CMAKE_ARGS        ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install)
13 | 
14 |                     


--------------------------------------------------------------------------------
/conda-recipes/nvgraph/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CMAKE_COMMON_VARIABLES=" -DCMAKE_INSTALL_PREFIX=$PREFIX -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX11_ABI=$CMAKE_CXX11_ABI"
 4 | 
 5 | 
 6 | if [ -n "$MACOSX_DEPLOYMENT_TARGET" ]; then
 7 |     # C++11 requires 10.9
 8 |     # but cudatoolkit 8 is build for 10.11
 9 |     export MACOSX_DEPLOYMENT_TARGET=10.11
10 | fi
11 | 
12 | # show environment
13 | printenv
14 | # Cleanup local git
15 | git clean -xdf
16 | # Change directory for build process
17 | cd cpp
18 | # Use CMake-based build procedure
19 | mkdir build
20 | cd build
21 | # configure
22 | cmake $CMAKE_COMMON_VARIABLES ..
23 | # build
24 | make -j VERBOSE=1 install


--------------------------------------------------------------------------------
/test/generators/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015, NVIDIA CORPORATION.  All rights reserved.
 2 | #
 3 | # NVIDIA CORPORATION and its licensors retain all intellectual property
 4 | # and proprietary rights in and to this software, related documentation
 5 | # and any modifications thereto.  Any use, reproduction, disclosure or
 6 | # distribution of this software and related documentation without an express
 7 | # license agreement from NVIDIA CORPORATION is strictly prohibited.
 8 | 
 9 | CXX=g++
10 | CXXFLAGS=-Wall -Ofast -march=native -pipe
11 | 
12 | all: print_info plodg rmatg
13 | 
14 | plodg: plod.cpp
15 | 	$(CXX) $(CXXFLAGS) $< -o $@
16 | 
17 | rmatg: rmat.cpp
18 | 	$(CXX) $(CXXFLAGS) $< -o $@
19 | 
20 | clean:
21 | 	rm -f rmatg plodg
22 | 
23 | print_info:
24 | 	$(info The Boost Graph Library is required)
25 | 


--------------------------------------------------------------------------------
/Acknowledgements.md:
--------------------------------------------------------------------------------
 1 | # Acknowledgements
 2 | 
 3 | NVGRAPH is the product of a large community of developers and reserachers since 2014, and we’re deeply
 4 | appreciative for their work. Here is a list of people from NVIDIA who helped contribute up until the process of open sourcing it:
 5 | 
 6 | Managers
 7 | - Harun Bayraktar
 8 | - Joe Eaton
 9 | - Alex Fit-Florea
10 | 
11 | Nvgraph dev team
12 | - Marat Arsaev 
13 | - Alex Fender
14 | - Andrei Schaffer
15 | 
16 | Contributors from other teams
17 | - Hugo Braun
18 | - Slawomir Kierat
19 | - Ahmad Kiswani
20 | - Szymon Migacz
21 | - Maxim Naumov
22 | - Nikolay Sakharnykh
23 | - James Wyles
24 | 
25 | Interns
26 | - Danielle Maddix
27 | - Tim Moon
28 | 
29 | And last but not least, thank you also to the contributors from CUDA PM and QA teams who help building nvgraph since its early days.
30 | 


--------------------------------------------------------------------------------
/conda-recipes/nvgraph/meta.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018, NVIDIA CORPORATION.
 2 | 
 3 | # Usage:
 4 | #   conda build -c defaults -c conda-forge .
 5 | {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 6 | {% set git_revision_count=environ.get('GIT_DESCRIBE_NUMBER', 0) %}
 7 | {% set cuda_version='.'.join(environ.get('CUDA_VERSION', 'unknown').split('.')[:2]) %}
 8 | package:
 9 |   name: nvgraph
10 |   version: {{ version }}
11 | 
12 | source:
13 |   path: ../..
14 | 
15 | build:
16 |   number: {{ git_revision_count }}
17 |   string: cuda{{ cuda_version }}_{{ git_revision_count }}
18 | 
19 | requirements:
20 |   build:
21 |     - cmake 3.12.4
22 | 
23 | about:
24 |   home: http://nvidia.com/
25 |   license: LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS
26 |   license_file: LICENSE
27 |   summary: nvgraph Library
28 | 


--------------------------------------------------------------------------------
/cpp/include/pagerank_kernels.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | namespace nvgraph
18 | {	
19 | 	template <typename ValueType_>
20 |     void update_dangling_nodes(int n, ValueType_* dangling_nodes, ValueType_ damping_factor,  cudaStream_t stream = 0);
21 | 
22 | } // end namespace nvgraph
23 | 
24 | 


--------------------------------------------------------------------------------
/cpp/src/csr_graph.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #include "csr_graph.hxx"
18 | 
19 | namespace nvgraph
20 | {
21 | 
22 |     template <typename IndexType_>
23 |     CsrGraph<IndexType_>& CsrGraph<IndexType_>::operator=(const CsrGraph<IndexType_>& graph)
24 |     {
25 | 
26 |     }
27 | 
28 | } // end namespace nvgraph
29 | 
30 | 


--------------------------------------------------------------------------------
/cpp/include/jaccard_gpu.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | // Jaccard symilarity edge weights
17 | // Author: Alexandre Fender afender@nvidia.com and Maxim Naumov.
18 | 
19 | #pragma once
20 | 
21 | namespace nvlouvain 
22 | {
23 | template <bool weighted, typename T> 
24 | int jaccard(int n, int e, int *csrPtr, int *csrInd, T * csrVal, T *v, T *work, T gamma, T *weight_i, T *weight_s, T *weight_j);
25 | }
26 | 


--------------------------------------------------------------------------------
/cpp/src/valued_csr_graph.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "valued_csr_graph.hxx"
18 | #include "cnmem_shared_ptr.hxx" // interface with CuMem (memory pool lib) for shared ptr
19 | 
20 | namespace nvgraph
21 | {
22 |     template <typename IndexType_, typename ValueType_>
23 |     ValuedCsrGraph<IndexType_, ValueType_>& ValuedCsrGraph<IndexType_, ValueType_>::operator=(const ValuedCsrGraph<IndexType_, ValueType_>& graph)
24 |     {
25 |     	
26 |     }
27 | 
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/test/generators/convertors/README.txt:
--------------------------------------------------------------------------------
 1 | -----------------------
 2 | Compile
 3 | -----------------------
 4 | > make
 5 | 
 6 | -----------------------
 7 | Run 
 8 | -----------------------
 9 | 
10 | 
11 | To preprocess a set of edges in matrix market patern format
12 | > ./pprocess.sh edges.dat
13 | 
14 | 
15 | 
16 | You can run separately
17 | Sort :
18 | > ./sort edges.dat
19 | 
20 | Compute H :
21 | > ./H edges.dat
22 | 
23 | Compute H transposed and dangling node vector
24 | > ./HTA H.mtx
25 | 
26 | Convert in AmgX binary format
27 | > ./mtob HTA.mtx
28 | 
29 | -----------------------
30 | Input
31 | -----------------------
32 | The format for sort and H is matrix market patern format
33 | example :
34 | 
35 | %%comment
36 | % as much comments as you want
37 | %...
38 | size size nonzero
39 | a b
40 | c d
41 | a e
42 | e a
43 | .
44 | .
45 | .
46 | [a-e] are in N*
47 | 
48 | 
49 | The format for HTA and mtob is matrix market coordinate format
50 | %%comment
51 | % as much comments as you want
52 | %...
53 | size size nonzero
54 | a b f
55 | c d g
56 | a e h
57 | e a i
58 | .
59 | .
60 | .
61 | [a-e] are in N*
62 | [f-i] are in R


--------------------------------------------------------------------------------
/test/ref/nerstrand/nestrand.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | nvg_data_prefix="/home/mnaumov/cuda_matrices/p4matrices/dimacs10"
 4 | 
 5 | declare -a dataset=(
 6 | "$nvg_data_prefix/preferentialAttachment.mtx"
 7 | "$nvg_data_prefix/caidaRouterLevel.mtx"
 8 | "$nvg_data_prefix/coAuthorsDBLP.mtx"
 9 | "$nvg_data_prefix/citationCiteseer.mtx"
10 | "$nvg_data_prefix/coPapersDBLP.mtx"
11 | "$nvg_data_prefix/coPapersCiteseer.mtx"
12 | "/home/afender/modularity/as-Skitter.mtx"
13 | "/home/afender/modularity/hollywood-2009.mtx"
14 | )
15 | 
16 | for i in "${dataset[@]}"
17 | do
18 |    ./nerstrand_bench "$i" 7 
19 | done
20 | echo 
21 | 
22 | #run only best case according to Spreadsheet 1
23 | ./nerstrand_bench "$nvg_data_prefix/preferentialAttachment.mtx" 7
24 | ./nerstrand_bench "$nvg_data_prefix/caidaRouterLevel.mtx" 11
25 | ./nerstrand_bench "$nvg_data_prefix/coAuthorsDBLP.mtx" 7
26 | ./nerstrand_bench "$nvg_data_prefix/citationCiteseer.mtx" 17
27 | ./nerstrand_bench "$nvg_data_prefix/coPapersDBLP.mtx" 73
28 | ./nerstrand_bench "$nvg_data_prefix/coPapersCiteseer.mtx" 53
29 | ./nerstrand_bench "/home/afender/modularity/as-Skitter.mtx" 7
30 | ./nerstrand_bench "/home/afender/modularity/hollywood-2009.mtx" 11
31 | 


--------------------------------------------------------------------------------
/test/ref/nerstrand/README.txt:
--------------------------------------------------------------------------------
 1 | This is stand alone host app that reads an undirected graph in matrix market format, convert it into CSR, call Nerstrand with default parameters and returns the modularity score of the clustering.   
 2 | 
 3 | Make sure you have downloaded and installed nerstrand : http://www-users.cs.umn.edu/~lasalle/nerstrand/
 4 | You should have libnerstrand.a in <nerstrand_directory>/build/Linux-x86_64/lib, move it to the directory containing this README or adjust the Makefile.
 5 | 
 6 | Type "make" to compile the small benchmarking app and "./nerstrand_bench <graph> <number of clusters>" to execute.
 7 | For convenience there is also a benchmarking script that calls the benchmarking app (please adjust paths to binary and data sets).
 8 | 
 9 | Use the following reference: 
10 | @article{lasalle2014nerstrand,
11 |   title={Multi-threaded Modularity Based Graph Clustering using the Multilevel Paradigm},
12 |   journal = "Journal of Parallel and Distributed Computing ",
13 |   year = "2014",
14 |   issn = "0743-7315",
15 |   doi = "http://dx.doi.org/10.1016/j.jpdc.2014.09.012",
16 |   url = "http://www.sciencedirect.com/science/article/pii/S0743731514001750",
17 |   author = "Dominique LaSalle and George Karypis"
18 | }​
19 | 


--------------------------------------------------------------------------------
/test/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for building NVCompute/CUDA BLAS library
 2 | SOLNDIR  := ../.
 3 | 
 4 | # Get the profile settings
 5 | ifdef VULCAN
 6 | include $(VULCAN_TOOLKIT_BASE)/build/getprofile.mk
 7 | include $(VULCAN_TOOLKIT_BASE)/build/config/$(PROFILE).mk
 8 | include $(VULCAN_TOOLKIT_BASE)/build/config/DetectOS.mk
 9 | else
10 | include ../../build/getprofile.mk
11 | include ../../build/config/$(PROFILE).mk
12 | include ../../build/config/DetectOS.mk
13 | endif
14 | 
15 | export I_AM_SLOPPY = 1
16 | AGNOSTIC_PROJECTS += nvgraph_test 
17 | AGNOSTIC_PROJECTS += nvgraph_capi_tests 
18 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_subgraph
19 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_conversion
20 | AGNOSTIC_PROJECTS += nvgraph_benchmark
21 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_clustering
22 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_contraction
23 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_traversal
24 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_triangles
25 | AGNOSTIC_PROJECTS += nvgraph_2d_partitioning_test
26 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_2d_bfs
27 | AGNOSTIC_PROJECTS += nvgraph_capi_tests_2d_bfs_net
28 | 
29 | ifdef VULCAN
30 | include $(VULCAN_TOOLKIT_BASE)/build/common.mk
31 | else
32 | include ../../build/common.mk
33 | endif
34 | 


--------------------------------------------------------------------------------
/cpp/tests/benchmarkScripts/run_graphMat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ****************** Edit this *************************
 4 | #*******************************************************
 5 | #Path to graphMat binary data
 6 | gm_data_prefix="/home-2/afender/GraphMat-master/data"
 7 | #Path to graphMat binary 
 8 | gm_bin_prefix="/home-2/afender/GraphMat-master/bin"
 9 | #Number of core to use in graphMat
10 | export OMP_NUM_THREADS=24
11 | # ******************************************************
12 | #*******************************************************
13 | #  NOTE 
14 | #twitter_graphMat.bin and live_journal_graphMat.bin are assumed to be in "gm_data_prefix" directory
15 | #*******************************************************
16 | 
17 | # Requiered export according to the doc
18 | export KMP_AFFINITY=scatter
19 | 
20 | #Pagerank runs
21 | numactl -i all $gm_bin_prefix/PageRank $gm_data_prefix/twitter.graphmat.bin
22 | numactl -i all $gm_bin_prefix/PageRank $gm_data_prefix/soc-LiveJournal1.graphmat.bin
23 | 
24 | # SSSP runs 
25 | # Warning: vertices seems to have 1-based indices (nvGraph use 0-base)
26 | numactl -i all $gm_bin_prefix/SSSP $gm_data_prefix/twitter.graphmat.bin 1
27 | numactl -i all $gm_bin_prefix/SSSP $gm_data_prefix/soc-LiveJournal1.graphmat.bin 1


--------------------------------------------------------------------------------
/cpp/include/debug_help.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  /*
17 |  * debug_help.h
18 |  *
19 |  *  Created on: Jul 19, 2018
20 |  *      Author: jwyles
21 |  */
22 | 
23 | #include <string>
24 | #include <iostream>
25 | 
26 | #pragma once
27 | 
28 | namespace debug {
29 | 	template <typename T>
30 | 	void printDeviceVector(T* dev_ptr, int items, std::string title) {
31 | 		T* host_ptr = (T*)malloc(sizeof(T) * items);
32 | 		cudaMemcpy(host_ptr, dev_ptr, sizeof(T) * items, cudaMemcpyDefault);
33 | 		std::cout << title << ": { ";
34 | 		for (int i = 0; i < items; i++) {
35 | 			std::cout << host_ptr[i] << ((i < items - 1) ? ", " : " ");
36 | 		}
37 | 		std::cout << "}\n";
38 | 		free(host_ptr);
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/cpp/tests/benchmarkScripts/run_galois.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ****************** Edit this *************************
 4 | # Path to local workspace containing p4matrices:2024 sync //matrices/p4matrices/graphs/...
 5 | nvg_data_prefix="/home/afender/src/matrices/p4matrices/graphs"
 6 | 
 7 | #Path to galois 
 8 | galois_root="/home/afender/soft/galois-2.3.0/build/default"
 9 | # *****************************************************
10 | export OMP_NUM_THREADS=24
11 | 
12 | declare -a arr=(
13 |  #Small mtx just for debug
14 |  #"$nvg_data_prefix/small/small.mtx"
15 |  "$nvg_data_prefix/soc-liveJournal/soc-LiveJournal1.mtx"
16 |  "$nvg_data_prefix/Twitter/twitter.mtx"
17 | )
18 | 
19 | ## now loop through the above array
20 | for i in "${arr[@]}"
21 | do
22 |    echo "Pagerank"
23 |    echo "$i" 
24 |    time $galois_root/tools/graph-convert/graph-convert -mtx2gr -edgeType=float32 -print-all-options $i $i.galois
25 |    time $galois_root/tools/graph-convert/graph-convert -gr2tgr -edgeType=float32 -print-all-options $i.galois $i_T.galois
26 |    time $galois_root/apps/pagerank/app-pagerank $i.galois -graphTranspose="$i_T.galois" -t=$OMP_NUM_THREADS
27 |    echo 
28 | done
29 | echo 
30 | for i in "${arr[@]}"
31 | do
32 |    echo "SSSP"
33 |    echo "$i" 
34 |    time $galois_root/apps/sssp/app-sssp $i.galois -startNode=0 -t=$OMP_NUM_THREADS
35 |    echo 
36 | done
37 | echo 
38 | 


--------------------------------------------------------------------------------
/cpp/include/lobpcg.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include "matrix.hxx"
19 | #include "partition.hxx"
20 | 
21 | namespace nvgraph {
22 | 
23 |     template <typename IndexType_, typename ValueType_>
24 |     int lobpcg_simplified(cublasHandle_t cublasHandle, cusolverDnHandle_t cusolverHandle,
25 |                           IndexType_ n, IndexType_ k,
26 |                           /*const*/ Matrix<IndexType_,ValueType_> * A,
27 |                           ValueType_ * __restrict__ eigVecs_dev,
28 |                           ValueType_ * __restrict__ eigVals_dev,
29 |                           IndexType_ maxIter,ValueType_ tol,
30 |                           ValueType_ * __restrict__ work_dev,
31 |                           IndexType_ & iter);
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/cpp/include/async_event.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | namespace nvgraph {
20 | 
21 |   class AsyncEvent {
22 |     public:
23 |       AsyncEvent() : async_event(NULL) { }
24 |       AsyncEvent(int size) : async_event(NULL) { cudaEventCreate(&async_event); }
25 |       ~AsyncEvent() { if (async_event != NULL) cudaEventDestroy(async_event); }
26 | 
27 |       void create() { cudaEventCreate(&async_event); }
28 |       void record(cudaStream_t s=0) {
29 |         if (async_event == NULL)  
30 |           cudaEventCreate(&async_event); // check if we haven't created the event yet
31 |         cudaEventRecord(async_event,s);
32 |       }
33 |       void sync() {
34 |         cudaEventSynchronize(async_event);
35 |       }
36 |     private:
37 |       cudaEvent_t async_event;
38 |   };
39 | 
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/cpp/include/graph_visitors.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef GRAPH_VISITORS_HXX
18 | #define GRAPH_VISITORS_HXX
19 | 
20 | namespace nvgraph
21 | {
22 |   //PROBLEM: using Visitor Design Pattern over a 
23 |   //         hierarchy of visitees that depend on 
24 |   //         different number of template arguments
25 |   //
26 |   //SOLUTION:use Acyclic Visitor
27 |   //         (A. Alexandrescu, "Modern C++ Design", Section 10.4), 
28 |   //         where *concrete* Visitors must be parameterized by all 
29 |   //         the possibile template args of the Visited classes (visitees);
30 |   //
31 |   struct VisitorBase
32 |   {
33 |     virtual ~VisitorBase(void)
34 |     {
35 |     }
36 |   };
37 | 
38 |   template<typename T>
39 |   struct Visitor
40 |   {
41 |     virtual void Visit(T& ) = 0;
42 |     virtual ~Visitor() { }
43 |   };
44 | }//end namespace
45 | #endif
46 | 
47 | 


--------------------------------------------------------------------------------
/cpp/include/async_event.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | 
20 | class AsyncEvent
21 | {
22 |     public:
23 |         AsyncEvent() : async_event(NULL) { }
24 |         AsyncEvent(int size) : async_event(NULL) { cudaEventCreate(&async_event); }
25 |         ~AsyncEvent() { if (async_event != NULL) cudaEventDestroy(async_event); }
26 | 
27 |         void create() { cudaEventCreate(&async_event); }
28 |         void record(cudaStream_t s = 0)
29 |         {
30 |             if (async_event == NULL)
31 |             {
32 |                 cudaEventCreate(&async_event);    // check if we haven't created the event yet
33 |             }
34 | 
35 |             cudaEventRecord(async_event, s);
36 |         }
37 |         void sync()
38 |         {
39 |             cudaEventSynchronize(async_event);
40 |         }
41 |     private:
42 |         cudaEvent_t async_event;
43 | };
44 | 
45 | 


--------------------------------------------------------------------------------
/test/data_gen.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #Usage sh data_gen size1 size2 ...
 3 | #Generate power law in-degree plus rmat graphs of size size1 ... sizeN
 4 | #Corresponding transposed and binary csr are generated as well
 5 | 
 6 | convert (){
 7 | edges=$1
 8 | #echo "Starting Sort on $edges..."
 9 | ./generators/convertors/sort $edges
10 | #echo "Done"
11 | 
12 | tmp="_s"
13 | sedges=$edges$tmp
14 | echo "Starting H on $sedges ..."
15 | ./generators/convertors/H $sedges
16 | #echo "Done"
17 | 
18 | tmp="_mtx"
19 | matrix=$sedges$tmp
20 | #delete soted edges
21 | rm $sedges
22 | 
23 | echo "Starting HTa on $matrix ..."
24 | ./generators/convertors/HTA $matrix
25 | 
26 | tmp="_T"
27 | outp=$edges$tmp
28 | outpp=$matrix$tmp
29 | mv $outpp $outp
30 | #delete H
31 | rm $matrix
32 | 
33 | #echo "Starting binary conversion ..."
34 | ./generators/convertors/mtob $outp
35 | #echo "Generated transposed coo and transposed csr bin"
36 | }
37 | 
38 | echo "Building the tools ..."
39 | make -C generators
40 | make -C generators/convertors
41 | #generate the graphs we need here
42 | #loop over script arguments which represent graph sizes.
43 | for var in "$@"
44 | do
45 | echo "Generate graphs of size $var"
46 | vertices=$var
47 | option="i"
48 | ./generators/plodg $vertices $option
49 | ./generators/rmatg $vertices $option
50 | graph="plod_graph_"
51 | format=".mtx"
52 | path_to_data="local_test_data/"
53 | name="$path_to_data$graph$vertices$format"
54 | convert $name
55 | graph="rmat_graph_"
56 | name="$path_to_data$graph$vertices$format"
57 | convert $name
58 | done
59 | 


--------------------------------------------------------------------------------
/test/log_converter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | from sys import argv
 3 | from subprocess import Popen, PIPE, STDOUT
 4 | from os import path, environ
 5 | 
 6 | 
 7 | def main():
 8 |     args = argv[1:]
 9 |     args[0] = path.join('./', args[0])
10 |     print args
11 |     environ["GTEST_PRINT_TIME"] = "0"
12 |     popen = Popen(args, stdout=PIPE, stderr=STDOUT)
13 |     stillParsing = True
14 |     skip = []
15 |     while not popen.poll():
16 |         data = popen.stdout.readline().splitlines()
17 |         if len(data) == 0:
18 |             break
19 |         data = data[0]
20 |         try:
21 |             STATUS = data[0:12]
22 |             NAME = data[12:]
23 |             if data.find('Global test environment tear-down') != -1:
24 |                 stillParsing = False
25 |             if stillParsing:
26 |                 if STATUS == "[ RUN      ]":
27 |                     print('&&&& RUNNING' + NAME)
28 |                 elif STATUS == "[       OK ]" and NAME.strip() not in skip:
29 |                     print('&&&& PASSED ' + NAME)
30 |                 elif STATUS == "[  WAIVED  ]":
31 |                     print('&&&& WAIVED ' + NAME)
32 |                     skip.append(NAME.strip())
33 |                 elif STATUS == "[  FAILED  ]":
34 |                     NAME = NAME.replace(', where', '\n where')
35 |                     print('&&&& FAILED ' + NAME)
36 |                 else:
37 |                     print(data)
38 |             else:
39 |                 print(data)
40 |         except IndexError:
41 |             print(data)
42 | 
43 |     return popen.returncode
44 | 
45 | if __name__ == '__main__':
46 |     main()
47 | 


--------------------------------------------------------------------------------
/cpp/include/thrust_traits.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | 
18 | 
19 | #ifndef THRUST_TRAITS_HXX
20 | 
21 | #define THRUST_TRAITS_HXX
22 | 
23 | 
24 | 
25 | #include <thrust/device_vector.h>
26 | 
27 | #include <thrust/host_vector.h>
28 | 
29 | 
30 | 
31 | namespace nvgraph
32 | 
33 | {
34 | 
35 |   //generic Vector Ptr Type facade:
36 | 
37 |   //
38 | 
39 |   template<typename T, typename Vector>
40 | 
41 |   struct VectorPtrT;
42 | 
43 | 
44 | 
45 |   //partial specialization for device_vector:
46 | 
47 |   //
48 | 
49 |   template<typename T>
50 | 
51 |   struct VectorPtrT<T, thrust::device_vector<T> >
52 | 
53 |   {
54 | 
55 |     typedef thrust::device_ptr<T> PtrT;
56 | 
57 |   };
58 | 
59 | 
60 | 
61 |   //partial specialization for host_vector:
62 | 
63 |   //
64 | 
65 |   template<typename T>
66 | 
67 |   struct VectorPtrT<T, thrust::host_vector<T> >
68 | 
69 |   {
70 | 
71 |     typedef typename thrust::host_vector<T>::value_type* PtrT;
72 | 
73 |   };
74 | 
75 | }
76 | 
77 | #endif
78 | 
79 | 


--------------------------------------------------------------------------------
/cpp/include/nvgraphP.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | /* 
18 |  *
19 |  *
20 |  * WARNING: this is a private header file, it should not be publically exposed.
21 |  *
22 |  *
23 |  */
24 | 
25 | #pragma once
26 | #include "nvgraph.h"
27 | #include "cnmem.h"
28 | 
29 | #if defined(__cplusplus) 
30 |   extern "C" {
31 | #endif
32 | 
33 | /* Graph descriptor types */
34 | typedef enum
35 | {
36 |    IS_EMPTY = 0, //nothing
37 |    HAS_TOPOLOGY = 1, //connectivity info
38 |    HAS_VALUES = 2, //MultiValuedCSRGraph
39 |    IS_2D = 3
40 | } nvgraphGraphStatus_t;
41 | 
42 | struct nvgraphContext {
43 |    cudaStream_t stream;
44 |    cnmemDevice_t cnmem_device;  
45 |    int nvgraphIsInitialized;  
46 | };
47 | 
48 | struct nvgraphGraphDescr {
49 |    nvgraphGraphStatus_t graphStatus;
50 |    cudaDataType T;							// This is the type of values for the graph
51 |    nvgraphTopologyType_t TT;				// The topology type (class to cast graph_handle pointer to)
52 |    void* graph_handle;						// Opaque pointer to the graph class object
53 | };
54 | 
55 | #if defined(__cplusplus) 
56 | }//extern "C"
57 | #endif
58 | 
59 | 


--------------------------------------------------------------------------------
/cpp/include/triangles_counting_kernels.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include <triangles_counting.hxx>
19 | 
20 | namespace nvgraph
21 | {
22 | 
23 | namespace triangles_counting
24 | {
25 | 
26 | template <typename T>
27 | void tricnt_bsh(T nblock, spmat_t<T> *m, uint64_t *ocnt_d, size_t bmld, cudaStream_t stream);
28 | template <typename T>
29 | void tricnt_wrp(T nblock, spmat_t<T> *m, uint64_t *ocnt_d, unsigned int *bmap_d, size_t bmld, cudaStream_t stream);
30 | template <typename T>
31 | void tricnt_thr(T nblock, spmat_t<T> *m, uint64_t *ocnt_d, cudaStream_t stream);
32 | template <typename T>
33 | void tricnt_b2b(T nblock, spmat_t<T> *m, uint64_t *ocnt_d, unsigned int *bmapL0_d, size_t bmldL0, unsigned int *bmapL1_d, size_t bmldL1, cudaStream_t stream);
34 | 
35 | template <typename T>
36 | uint64_t reduce(uint64_t *v_d, T n, cudaStream_t stream);
37 | template <typename T>
38 | void create_nondangling_vector(const T *roff, T *p_nonempty, T *n_nonempty, size_t n, cudaStream_t stream);
39 | 
40 | void myCudaMemset(unsigned long long *p, unsigned long long v, long long n, cudaStream_t stream);
41 | 
42 | } // namespace triangles_counting
43 | 
44 | } // namespace nvgraph
45 | 


--------------------------------------------------------------------------------
/cpp/include/nvgraph_vector_kernels.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | namespace nvgraph
19 | {
20 | 	template <typename ValueType_>
21 | 	void nrm1_raw_vec (ValueType_* vec, size_t n, ValueType_* res, cudaStream_t stream = 0);
22 | 
23 |  	template <typename ValueType_>
24 | 	void fill_raw_vec (ValueType_* vec, size_t n, ValueType_ value, cudaStream_t stream = 0);
25 | 
26 | 	template <typename ValueType_>
27 | 	void dump_raw_vec (ValueType_* vec, size_t n, int offset, cudaStream_t stream = 0);
28 | 
29 | 	template <typename ValueType_>
30 | 	void dmv (size_t num_vertices, ValueType_ alpha, ValueType_* D, ValueType_* x, ValueType_ beta, ValueType_* y, cudaStream_t stream = 0);
31 | 
32 | 	template<typename ValueType_>
33 | 	void copy_vec(ValueType_ *vec1, size_t n, ValueType_ *res, cudaStream_t stream = 0);
34 | 
35 | 	template <typename ValueType_>
36 | 	void flag_zeros_raw_vec(size_t num_vertices, ValueType_* vec, int* flag, cudaStream_t stream = 0 );
37 | 
38 | 	template <typename IndexType_, typename ValueType_>
39 | 	void set_connectivity( size_t n, IndexType_ root, ValueType_ self_loop_val, ValueType_ unreachable_val, ValueType_* res, cudaStream_t stream = 0);
40 | 
41 | } // end namespace nvgraph
42 | 
43 | 


--------------------------------------------------------------------------------
/cpp/src/nvgraph_error.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include "nvgraph_error.hxx"
18 | 
19 | namespace nvgraph
20 | {
21 | 
22 | 
23 |   void nvgraph_default_output(const char *msg, int length) {
24 | #if defined(DEBUG) || defined(VERBOSE_DIAG)
25 |     printf("%s", msg);
26 | #endif
27 |   }
28 | 
29 |   NVGRAPH_output_callback nvgraph_output = nvgraph_default_output;
30 |   NVGRAPH_output_callback error_output = nvgraph_default_output;
31 |   //NVGRAPH_output_callback nvgraph_distributed_output = nvgraph_default_output;*/
32 | 
33 |   // Timer 
34 |   struct cuda_timer::event_pair
35 |   {
36 |     cudaEvent_t start;
37 |     cudaEvent_t end;
38 |   };
39 |   cuda_timer::cuda_timer(): p(new event_pair()) { }
40 |   
41 |   void cuda_timer::start()
42 |   {
43 |     cudaEventCreate(&p->start);
44 |     cudaEventCreate(&p->end);
45 |     cudaEventRecord(p->start, 0);
46 |     cudaCheckError();
47 |   }
48 |   float cuda_timer::stop()
49 |   {
50 |     cudaEventRecord(p->end, 0);
51 |     cudaEventSynchronize(p->end);
52 |     float elapsed_time;
53 |     cudaEventElapsedTime(&elapsed_time, p->start, p->end);
54 |     cudaEventDestroy(p->start);
55 |     cudaEventDestroy(p->end);
56 |     cudaCheckError();
57 |     return elapsed_time;
58 |   }
59 | 
60 | } // end namespace nvgraph
61 | 
62 | 


--------------------------------------------------------------------------------
/test/ref/cpu_ref_SSSP.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Usage : python3 nvgraph_cpu_ref.py graph.mtx source_vertex
 4 | # This works with networkx 1.8.1 (default ubuntu package version in 14.04)
 5 | # http://networkx.github.io/documentation/networkx-1.8/
 6 | 
 7 | # Latest version is currenlty 1.11 in feb 2016
 8 | # https://networkx.github.io/documentation/latest/tutorial/index.html
 9 | 
10 | #import numpy as np
11 | import sys
12 | import time
13 | from scipy.io import mmread
14 | import numpy as np
15 | import networkx as nx
16 | import os
17 | 
18 | print ('Networkx version : {} '.format(nx.__version__))
19 | 
20 | # Command line arguments
21 | argc = len(sys.argv)
22 | if argc<=2:
23 |     print("Error: usage is : python3 nvgraph_cpu_ref.py graph.mtx source_vertex")
24 |     sys.exit()
25 | mmFile = sys.argv[1]
26 | src = int(sys.argv[2])
27 | print('Reading '+ str(mmFile) + '...')
28 | #Read
29 | M = mmread(mmFile).asfptype().tolil()
30 | 
31 | if M is None :
32 |     raise TypeError('Could not read the input graph')
33 | 
34 | # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly
35 | M = M.transpose().tocsr()
36 | if not M.has_sorted_indices:
37 |     M.sort_indices()
38 | 
39 | # Directed NetworkX graph
40 | Gnx = nx.DiGraph(M)
41 | 
42 | #SSSP
43 | print('Solving... ')
44 | t1 = time.time()
45 | sssp = nx.single_source_dijkstra_path_length(Gnx,source=src)
46 | t2 =  time.time() - t1
47 | 
48 | print('Time : '+str(t2))
49 | print('Writing result ... ')
50 | 
51 | # fill missing with DBL_MAX
52 | bsssp = np.full(M.shape[0], sys.float_info.max, dtype=np.float64)
53 | for r in sssp.keys():
54 |     bsssp[r] = sssp[r]
55 | # write binary
56 | out_fname = os.path.splitext(os.path.basename(mmFile))[0] + '_T.sssp_' + str(src) + '.bin'
57 | bsssp.tofile(out_fname, "")
58 | print ('Result is in the file: ' + out_fname)
59 | 
60 | # write text
61 | #f = open('/tmp/ref_' + os.path.basename(mmFile) + '_sssp.txt', 'w')
62 | #f.write(str(sssp.values()))
63 | 
64 | print('Done')
65 | 


--------------------------------------------------------------------------------
/cpp/tests/2d_partitioning_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "gtest/gtest.h"
 2 | #include "nvgraph.h"
 3 | #include <iostream>
 4 | 
 5 | TEST(SimpleBFS2D, DummyTest) {
 6 | 	nvgraphHandle_t handle;
 7 | 	int* devices = (int*) malloc(sizeof(int) * 2);
 8 | 	devices[0] = 0;
 9 | 	devices[1] = 1;
10 | 	nvgraphCreateMulti(&handle, 2, devices);
11 | 	nvgraphGraphDescr_t graph;
12 | 	nvgraphCreateGraphDescr(handle, &graph);
13 | 	int rowIds[38] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5,
14 | 			5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8 };
15 | 	int colIds[38] = { 1, 2, 7, 8, 0, 2, 4, 7, 8, 0, 1, 3, 6, 8, 2, 4, 5, 6, 8, 1, 3, 5, 8, 3, 4, 6,
16 | 			7, 2, 3, 5, 0, 1, 5, 0, 1, 2, 3, 4 };
17 | 	nvgraph2dCOOTopology32I_st topo;
18 | 	topo.nvertices = 9;
19 | 	topo.nedges = 38;
20 | 	topo.source_indices = rowIds;
21 | 	topo.destination_indices = colIds;
22 | 	topo.valueType = CUDA_R_32I;
23 | 	topo.values = NULL;
24 | 	topo.numDevices = 2;
25 | 	topo.devices = devices;
26 | 	topo.blockN = 2;
27 | 	topo.tag = NVGRAPH_DEFAULT;
28 | 	nvgraphSetGraphStructure(handle, graph, &topo, NVGRAPH_2D_32I_32I);
29 | 	int* distances = (int*) malloc(sizeof(int) * 9);
30 | 	int* predecessors = (int*) malloc(sizeof(int) * 9);
31 | 	int sourceId = 0;
32 | 	std::cout << "Source ID: " << sourceId << "\n";
33 | 	nvgraph2dBfs(handle, graph, sourceId, distances, predecessors);
34 | 	std::cout << "Distances:\n";
35 | 	for (int i = 0; i < 9; i++)
36 | 		std::cout << i << ":" << distances[i] << "  ";
37 | 	std::cout << "\nPredecessors:\n";
38 | 	for (int i = 0; i < 9; i++)
39 | 		std::cout << i << ":" << predecessors[i] << "  ";
40 | 	std::cout << "\n";
41 | 	int exp_pred[9] = {-1,0,0,2,1,7,2,0,0};
42 | 	int exp_dist[9] = {0,1,1,2,2,2,2,1,1};
43 | 	for (int i = 0; i < 9; i++){
44 | 		ASSERT_EQ(exp_pred[i], predecessors[i]);
45 | 		ASSERT_EQ(exp_dist[i], distances[i]);
46 | 	}
47 | 	std::cout << "Test run!\n";
48 | }
49 | 
50 | int main(int argc, char **argv) {
51 | 	::testing::InitGoogleTest(&argc, argv);
52 | 	return RUN_ALL_TESTS();
53 | }
54 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_csr_max.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   CsrGraph<int>* contract_graph_csr_max(CsrGraph<int>& graph,
24 |                                 int* pV, size_t n,
25 |                                 cudaStream_t stream,
26 |                                 const int& VCombine,
27 |                                 const int& VReduce,
28 |                                 const int& ECombine,
29 |                                 const int& EReduce)
30 |   {
31 |     return contract_from_aggregates_t<int, double, SemiRingFctrSelector<Max, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_csr_min.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   CsrGraph<int>* contract_graph_csr_min(CsrGraph<int>& graph,
24 |                                 int* pV, size_t n,
25 |                                 cudaStream_t stream,
26 |                                 const int& VCombine,
27 |                                 const int& VReduce,
28 |                                 const int& ECombine,
29 |                                 const int& EReduce)
30 |   {
31 |     return contract_from_aggregates_t<int, double, SemiRingFctrSelector<Min, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_csr_sum.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   CsrGraph<int>* contract_graph_csr_sum(CsrGraph<int>& graph,
24 |                                 int* pV, size_t n,
25 |                                 cudaStream_t stream,
26 |                                 const int& VCombine,
27 |                                 const int& VReduce,
28 |                                 const int& ECombine,
29 |                                 const int& EReduce)
30 |   {
31 |     return contract_from_aggregates_t<int, double, SemiRingFctrSelector<Sum, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_csr_mul.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   CsrGraph<int>* contract_graph_csr_mul(CsrGraph<int>& graph,
24 |                                 int* pV, size_t n,
25 |                                 cudaStream_t stream,
26 |                                 const int& VCombine,
27 |                                 const int& VReduce,
28 |                                 const int& ECombine,
29 |                                 const int& EReduce)
30 |   {
31 |     return contract_from_aggregates_t<int, double, SemiRingFctrSelector<Multiply, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/include/high_res_clock.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | // A wrapper of clock_gettime.
17 | // Michael A. Frumkin (mfrumkin@nvidia.com)
18 | #pragma once
19 | 
20 | #include <iostream>
21 | #include <string>
22 | #include <time.h>
23 | 
24 | class HighResClock {
25 |  public:
26 |   HighResClock() {
27 |     clock_gettime(CLOCK_REALTIME, &_start_time);
28 |     clock_gettime(CLOCK_REALTIME, &_stop_time);
29 |   }
30 |   ~HighResClock() { }
31 | 
32 |   void start() { clock_gettime(CLOCK_REALTIME, &_start_time); }
33 | 
34 |   std::string stop() {
35 |     clock_gettime(CLOCK_REALTIME, &_stop_time);
36 |     char buffer[64];
37 |     long long int start_time =
38 |         _start_time.tv_sec * 1e9 + _start_time.tv_nsec;
39 |     long long int stop_time =
40 |         _stop_time.tv_sec * 1e9 + _stop_time.tv_nsec;
41 | 
42 |     sprintf(buffer, "%lld us", 
43 |             (stop_time - start_time) / 1000);
44 |     std::string str(buffer);
45 |     return str;
46 |   }
47 | 
48 |   void stop(double* elapsed_time) {  // returns time in us
49 |     clock_gettime(CLOCK_REALTIME, &_stop_time);
50 |     long long int start_time =
51 |         _start_time.tv_sec * 1e9 + _start_time.tv_nsec;
52 |     long long int stop_time =
53 |         _stop_time.tv_sec * 1e9 + _stop_time.tv_nsec;
54 |     *elapsed_time = (stop_time - start_time) / 1000;
55 |   }
56 | 
57 |  private: 
58 |   timespec _start_time;
59 |   timespec _stop_time;   
60 | };
61 | 


--------------------------------------------------------------------------------
/cpp/include/triangles_counting.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | 
19 | #include <csr_graph.hxx>
20 | #include <async_event.hxx>
21 | #include <nvgraph_error.hxx>
22 | #include <nvgraph_vector.hxx>
23 | 
24 | #include <cuda_runtime.h>
25 | 
26 | #include <triangles_counting_defines.hxx>
27 | 
28 | namespace nvgraph
29 | {
30 | 
31 | namespace triangles_counting
32 | {
33 | 
34 | 
35 | typedef enum { TCOUNT_DEFAULT, TCOUNT_BSH, TCOUNT_B2B, TCOUNT_WRP, TCOUNT_THR } TrianglesCountAlgo;
36 | 
37 | 
38 | template <typename IndexType>
39 | class TrianglesCount 
40 | {
41 | private:
42 |     //CsrGraph <IndexType>& m_last_graph ;
43 |     AsyncEvent          m_event;
44 |     uint64_t            m_triangles_number;
45 |     spmat_t<IndexType>  m_mat;
46 |     int                 m_dev_id;
47 |     cudaDeviceProp      m_dev_props;
48 | 
49 |     Vector<IndexType>   m_seq;
50 | 
51 |     cudaStream_t        m_stream;
52 | 
53 |     bool m_done;
54 | 
55 |     void tcount_bsh();
56 |     void tcount_b2b();
57 |     void tcount_wrp();
58 |     void tcount_thr();
59 | 
60 | public:
61 |     // Simple constructor 
62 |     TrianglesCount(const CsrGraph <IndexType>& graph, cudaStream_t stream = NULL, int device_id = -1);
63 |     // Simple destructor
64 |     ~TrianglesCount();
65 | 
66 |     NVGRAPH_ERROR count(TrianglesCountAlgo algo = TCOUNT_DEFAULT );
67 |     inline uint64_t get_triangles_count() const {return m_triangles_number;}
68 | };
69 | 
70 | } // end namespace triangles_counting
71 | 
72 | } // end namespace nvgraph
73 | 
74 | 


--------------------------------------------------------------------------------
/cpp/src/pagerank_kernels.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #include <algorithm>
17 | #include <thrust/device_vector.h>
18 | 
19 | #include "nvgraph_error.hxx"
20 | #include "nvgraph_vector_kernels.hxx"
21 | #include "pagerank_kernels.hxx"
22 | 
23 | namespace nvgraph
24 | {
25 | 
26 | template <typename ValueType_>
27 | __global__ void update_dn_kernel(int num_vertices, ValueType_* aa, ValueType_ beta)
28 | {
29 |     int tidx = blockDim.x * blockIdx.x + threadIdx.x;
30 |     for (int r = tidx; r < num_vertices; r += blockDim.x * gridDim.x)
31 |     {
32 |         // NOTE 1 : a = alpha*a + (1-alpha)e
33 |         if (aa[r] == 0.0)
34 |             aa[r] = beta; // NOTE 2 : alpha*0 + (1-alpha)*1 = (1-alpha)
35 |     }
36 | }
37 | 
38 | template <typename ValueType_>
39 | void update_dangling_nodes(int num_vertices, ValueType_* dangling_nodes, ValueType_ damping_factor, cudaStream_t stream)
40 | {
41 | 	
42 | 	int num_threads = 256;
43 |     int max_grid_size = 4096;
44 |     int num_blocks = std::min(max_grid_size, (num_vertices/num_threads)+1);
45 |     ValueType_ beta = 1.0-damping_factor;
46 |     update_dn_kernel<<<num_blocks, num_threads, 0, stream>>>(num_vertices, dangling_nodes,beta);
47 |     cudaCheckError();
48 | }
49 | 
50 | //Explicit
51 | 
52 | template void update_dangling_nodes<double> (int num_vertices, double* dangling_nodes, double damping_factor, cudaStream_t stream);
53 | template void update_dangling_nodes<float> (int num_vertices, float* dangling_nodes, float damping_factor, cudaStream_t stream);
54 | } // end namespace nvgraph
55 | 
56 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_float_max.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, float>* contract_graph_mv_float_max(MultiValuedCsrGraph<int, float>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, float>*>(contract_from_aggregates_t<int, float, SemiRingFctrSelector<Max, float>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_float_min.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, float>* contract_graph_mv_float_min(MultiValuedCsrGraph<int, float>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, float>*>(contract_from_aggregates_t<int, float, SemiRingFctrSelector<Min, float>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_float_sum.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, float>* contract_graph_mv_float_sum(MultiValuedCsrGraph<int, float>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, float>*>(contract_from_aggregates_t<int, float, SemiRingFctrSelector<Sum, float>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_double_max.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, double>* contract_graph_mv_double_max(MultiValuedCsrGraph<int, double>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, double>*>(contract_from_aggregates_t<int, double, SemiRingFctrSelector<Max, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_double_min.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, double>* contract_graph_mv_double_min(MultiValuedCsrGraph<int, double>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, double>*>(contract_from_aggregates_t<int, double, SemiRingFctrSelector<Min, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_double_sum.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, double>* contract_graph_mv_double_sum(MultiValuedCsrGraph<int, double>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, double>*>(contract_from_aggregates_t<int, double, SemiRingFctrSelector<Sum, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_float_mul.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, float>* contract_graph_mv_float_mul(MultiValuedCsrGraph<int, float>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, float>*>(contract_from_aggregates_t<int, float, SemiRingFctrSelector<Multiply, float>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/cpp/src/graph_contraction/contraction_mv_double_mul.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_contracting_visitor.hxx>
18 | 
19 | namespace nvgraph
20 | {
21 |   //------------------------- Graph Contraction: ----------------------
22 |   //
23 |   MultiValuedCsrGraph<int, double>* contract_graph_mv_double_mul(MultiValuedCsrGraph<int, double>& graph, 
24 |                                                   int* pV, size_t n,
25 |                                                   cudaStream_t stream,
26 |                                                   const int& VCombine,
27 |                                                   const int& VReduce,
28 |                                                   const int& ECombine,
29 |                                                   const int& EReduce)
30 |   {
31 |     return static_cast<nvgraph::MultiValuedCsrGraph<int, double>*>(contract_from_aggregates_t<int, double, SemiRingFctrSelector<Multiply, double>::FctrType >(graph, pV, n, stream,
32 |                                                                                                        static_cast<SemiRingFunctorTypes>(VCombine),
33 |                                                                                                        static_cast<SemiRingFunctorTypes>(VReduce),
34 |                                                                                                        static_cast<SemiRingFunctorTypes>(ECombine),
35 |                                                                                                        static_cast<SemiRingFunctorTypes>(EReduce)));
36 |   }
37 |  
38 | }
39 | 


--------------------------------------------------------------------------------
/external/cub_semiring/util_namespace.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 3 |  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *     * Redistributions of source code must retain the above copyright
 8 |  *       notice, this list of conditions and the following disclaimer.
 9 |  *     * Redistributions in binary form must reproduce the above copyright
10 |  *       notice, this list of conditions and the following disclaimer in the
11 |  *       documentation and/or other materials provided with the distribution.
12 |  *     * Neither the name of the NVIDIA CORPORATION nor the
13 |  *       names of its contributors may be used to endorse or promote products
14 |  *       derived from this software without specific prior written permission.
15 |  *
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  ******************************************************************************/
28 | 
29 | /**
30 |  * \file
31 |  * Place-holder for prefixing the cub namespace
32 |  */
33 | 
34 | #pragma once
35 | 
36 | // For example:
37 | #define CUB_NS_PREFIX namespace cub_semiring {
38 | #define CUB_NS_POSTFIX }
39 | 
40 | #ifndef CUB_NS_PREFIX
41 | #define CUB_NS_PREFIX
42 | #endif
43 | 
44 | #ifndef CUB_NS_POSTFIX
45 | #define CUB_NS_POSTFIX
46 | #endif
47 | 


--------------------------------------------------------------------------------
/cpp/include/debug_macros.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  #pragma once
17 | 
18 | #include "nvgraph_error.hxx"
19 | 
20 | #define CHECK_STATUS(...)                                                      \
21 |     do {                                                                       \
22 |         if (__VA_ARGS__) {                                                     \
23 |             FatalError(#__VA_ARGS__, NVGRAPH_ERR_UNKNOWN);                        \
24 |         }                                                                      \
25 |     } while (0)
26 | 
27 | #define CHECK_NVGRAPH(...)                                                        \
28 |     do {                                                                       \
29 |         NVGRAPH_ERROR e = __VA_ARGS__;                                            \
30 |         if (e != NVGRAPH_OK) {                                                    \
31 |             FatalError(#__VA_ARGS__, e)                                        \
32 |         }                                                                      \
33 |     } while (0)
34 | 
35 | #ifdef DEBUG
36 | #define COUT() (std::cout)
37 | #define CERR() (std::cerr)
38 | #define WARNING(message)                                                       \
39 |     do {                                                                       \
40 |         std::stringstream ss;                                                  \
41 |         ss << "Warning (" << __FILE__ << ":" << __LINE__ << "): " << message;  \
42 |         CERR() << ss.str() << std::endl;                                       \
43 |     } while (0)
44 | #else // DEBUG
45 | #define WARNING(message)
46 | #endif
47 | 


--------------------------------------------------------------------------------
/cpp/include/csrmv_cub.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include "nvgraph.h"
19 | #include "nvgraph_error.hxx"
20 | #include "multi_valued_csr_graph.hxx"
21 | 
22 | namespace nvgraph
23 | {
24 | 
25 | template <typename I, typename V>
26 | class SemiringDispatch
27 | {
28 | public:
29 |     template <typename SR>
30 |     static NVGRAPH_ERROR Dispatch(
31 |         const V*             d_values,
32 |         const I*             d_row_offsets,
33 |         const I*             d_column_indices,
34 |         const V*             d_vector_x,
35 |         V*             d_vector_y,
36 |         V              alpha,
37 |         V              beta, 
38 |         I              num_rows,
39 |         I              num_cols,
40 |         I              num_nonzeros,
41 |         cudaStream_t   stream);
42 | 
43 |     static NVGRAPH_ERROR InitAndLaunch(
44 |             const nvgraph::MultiValuedCsrGraph<I, V> &graph,
45 |             const size_t weight_index,
46 |             const void *p_alpha,
47 |             const size_t x_index,
48 |             const void *p_beta,
49 |             const size_t y_index,
50 |             const nvgraphSemiring_t SR,
51 |             cudaStream_t stream
52 |         );
53 | };
54 | 
55 | 
56 | // API wrapper to avoid bloating main API object nvgraph.cpp
57 | NVGRAPH_ERROR SemiringAPILauncher(nvgraphHandle_t handle,
58 |                            const nvgraphGraphDescr_t descrG,
59 |                            const size_t weight_index,
60 |                            const void *alpha,
61 |                            const size_t x,
62 |                            const void *beta,
63 |                            const size_t y,
64 |                            const nvgraphSemiring_t sr);
65 | } //namespace nvgraph
66 | 


--------------------------------------------------------------------------------
/test/generators/convertors/sort_eges.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <algorithm>    // std::sort
 4 | #include <vector>       // std::vector
 5 | 
 6 | struct edge {
 7 |   unsigned long int r;
 8 |   unsigned long int c;
 9 | };
10 | 
11 | void printUsageAndExit()
12 | {
13 |   printf("%s", "Fatal Error\n");
14 |   printf("%s", "Usage: ./sort edges.dat\n");
15 |   printf("%s", "Input : Graph in matrix market parttern format");
16 |   printf("%s", "Output : Graph with sorted edges in matrix market parttern format\n");
17 |   exit(0);
18 | }
19 | 
20 | inline bool operator< (const edge& a, const edge& b){ if(a.r<b.r) return true; else return false; }
21 | 
22 | int main (int argc, char *argv[])
23 | {
24 |   // Check args
25 |   if (argc != 2) printUsageAndExit();
26 |     	
27 |   // Vars
28 |   unsigned long int n, nz, i = 0, current_r, nbr = 1;
29 |   int ok;
30 |   double scal;
31 |   char outp[128], cc;
32 |   FILE *fpin = NULL, *fpout = NULL;
33 |   edge e;
34 |   std::vector<struct edge> edges;
35 | 
36 |   // Get I/O names
37 |   // The output is filename.mtx
38 |   while (argv[1][i] != '\0')
39 |     {outp[i] = argv[1][i];i++;}
40 |   outp[i] = '_'; i++;
41 |   outp[i] = 's';i++;
42 |   outp[i]='\0';
43 |   	
44 |   	// Open files
45 | 	fpin = fopen(argv[1],"r");
46 | 	fpout = fopen(outp,"w");
47 | 	if (!fpin || !fpout)
48 | 	{
49 | 		printf("%s", "Fatal Error : I/O fail\n");
50 | 		exit(0);
51 | 	}
52 | 
53 | 	// Skip lines starting with "%""
54 | 	do
55 | 	{
56 | 		cc = fgetc(fpin); 
57 | 		if (cc == '%') fgets(outp,128,fpin);
58 | 	}
59 | 	while (cc == '%');
60 | 	fseek( fpin, -1, SEEK_CUR );
61 | 
62 | 	// Get n and nz
63 | 	fscanf(fpin,"%lu",&n);
64 | 	//fscanf(fpin,"%lu",&n);
65 | 	fscanf(fpin,"%lu",&nz);
66 |   	fprintf(fpout,"%lu %lu %lu\n",n, n, nz);
67 | 	// Read the first edge
68 | 	ok = fscanf(fpin,"%lu",&e.r);
69 | 	if (ok)
70 | 	{
71 | 		fscanf(fpin,"%lu",&e.c);
72 | 		edges.push_back(e);
73 | 	}
74 | 	else
75 | 	{
76 | 		printf("%s", "Fatal Error : Wrong data format\n");
77 |   		exit(0);
78 | 	}
79 | 	
80 | 	//Loop
81 | 	for (i=0; i<nz-1; i++)
82 | 	{	
83 | 		fscanf(fpin,"%lu",&e.r);
84 | 		fscanf(fpin,"%lu",&e.c);
85 | 		edges.push_back(e);
86 | 	}
87 |   std::sort (edges.begin(), edges.end());
88 |   for (std::vector<struct edge>::iterator it = edges.begin() ; it != edges.end(); ++it)
89 |       fprintf(fpout,"%lu %lu\n",it->r, it->c);
90 | 	return 0;
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/test/ref/nerstrand/nerstrand_driver.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stddef.h>
 3 | #include <iostream>
 4 | #include <stdlib.h> 
 5 | #include <vector>
 6 | #include <sys/time.h>
 7 | #include <sys/resource.h>
 8 | #include <sys/sysinfo.h>
 9 | 
10 | #include "mmio.h"
11 | 
12 | #include "mm_host.hxx"
13 | #include "nerstrand.h"
14 | 
15 | 
16 | static double second (void)
17 | {
18 |     struct timeval tv;
19 |     gettimeofday(&tv, NULL);
20 |     return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
21 | }
22 | 
23 | 
24 | int main(int argc, const char **argv) 
25 | {
26 | 
27 |     int m, n, nnz;
28 |     double start, stop,r_mod;
29 |     cid_t n_clusters;
30 |     MM_typecode mc;
31 |     if (argc != 3)
32 |     {
33 |         std::cout<<"Usage : ./nerstrand_bench <graph> <number of clusters>"<<std::endl;
34 |         exit(0);
35 |     }
36 |     FILE* fpin = fopen(argv[1],"r");
37 |     n_clusters = atoi(argv[2]);
38 |     
39 |     mm_properties<int>(fpin, 1, &mc, &m, &n, &nnz) ;
40 |     
41 |     // Allocate memory on host
42 |     std::vector<int> cooRowIndA(nnz);
43 |     std::vector<int> cooColIndA(nnz);
44 |     std::vector<double> cooValA(nnz);
45 |     std::vector<int> csrRowPtrA(n+1);
46 |     std::vector<int> csrColIndA(nnz);
47 |     std::vector<double> csrValA(nnz);
48 | 
49 |     
50 |     mm_to_coo<int,double>(fpin, 1, nnz, &cooRowIndA[0], &cooColIndA[0], &cooValA[0],NULL) ;
51 |     coo2csr<int,double> (n, nnz, &cooValA[0],  &cooRowIndA[0],  &cooColIndA[0], &csrValA[0], &csrColIndA[0],&csrRowPtrA[0]);
52 |     fclose(fpin);   
53 | 
54 |     vtx_t nerstrand_n = static_cast<vtx_t>(n);
55 |     std::vector<adj_t> nerstrand_csrRowPtrA(csrRowPtrA.begin(), csrRowPtrA.end());
56 |     std::vector<vtx_t> nerstrand_csrColIndA(csrColIndA.begin(), csrColIndA.end());
57 |     std::vector<wgt_t> nerstrand_csrValA(csrValA.begin(), csrValA.end());
58 |     std::vector<cid_t> clustering(n);
59 | 
60 |     start = second();
61 |     start = second();
62 |     #pragma omp_parallel
63 |     {
64 |     int nerstrand_status = nerstrand_cluster_kway(&nerstrand_n, &nerstrand_csrRowPtrA[0],&nerstrand_csrColIndA[0], &nerstrand_csrValA[0], &n_clusters, &clustering[0], &r_mod);
65 |     if (nerstrand_status != NERSTRAND_SUCCESS) 
66 |         std::cout<<"nerstrand execution failed"<<std::endl;
67 |     
68 | 
69 |     }
70 |         stop = second();
71 | 
72 |     std::cout<<r_mod<<","<<stop-start<<std::endl;
73 | }


--------------------------------------------------------------------------------
/cpp/include/nvgraph_lapack.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | #include <nvgraph_error.hxx>
19 | namespace nvgraph
20 | {
21 | template <typename T> class Lapack;
22 | 
23 | template <typename T>
24 | class Lapack
25 | {
26 | private:
27 |     Lapack();
28 |     ~Lapack();
29 | public:
30 | 	static void check_lapack_enabled();
31 | 
32 | 	static void gemm(bool transa, bool transb, int m, int n, int k, T alpha, const T * A, int lda, const T * B, int ldb, T beta, T * C, int ldc);
33 | 
34 | 	// special QR for lanczos
35 | 	static void sterf(int n, T * d, T * e);
36 | 	static void steqr(char compz, int n, T * d, T * e, T * z, int ldz, T * work);
37 | 
38 | 	// QR
39 | 	// computes the QR factorization of a general matrix
40 | 	static void geqrf (int m, int n, T *a, int lda, T *tau, T *work, int *lwork);
41 | 	// Generates the real orthogonal matrix Q of the QR factorization formed by geqrf.
42 | 	//static void orgqr( int m, int n, int k, T* a, int lda, const T* tau, T* work, int* lwork );
43 | 	// multiply C by implicit Q
44 | 	static void ormqr (bool right_side, bool transq, int m, int n, int k, T *a, int lda, T *tau, T *c, int ldc, T *work, int *lwork);
45 | 	//static void unmqr (bool right_side, bool transq, int m, int n, int k, T *a, int lda, T *tau, T *c, int ldc, T *work, int *lwork);
46 |     //static void qrf (int n, T *H, T *Q, T *R);
47 | 
48 |     //static void hseqr (T* Q, T* R, T* eigenvalues,T* eigenvectors, int dim, int ldh, int ldq);
49 | 	static void geev(T* A, T* eigenvalues, int dim, int lda);
50 | 	static void geev(T* A, T* eigenvalues, T* eigenvectors, int dim, int lda, int ldvr);
51 | 	static void geev(T* A, T* eigenvalues_r, T* eigenvalues_i, T* eigenvectors_r, T* eigenvectors_i, int dim, int lda, int ldvr);
52 | 
53 | };
54 | }  // end namespace nvgraph
55 | 
56 | 


--------------------------------------------------------------------------------
/cpp/cmake/Modules/ConfigureGoogleTest.cmake:
--------------------------------------------------------------------------------
 1 | set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest")
 2 | 
 3 | set(GTEST_CMAKE_ARGS "")
 4 |                      #" -Dgtest_build_samples=ON" 
 5 |                      #" -DCMAKE_VERBOSE_MAKEFILE=ON")
 6 | 
 7 | if(NOT CMAKE_CXX11_ABI)
 8 |     message(STATUS "GTEST: Disabling the GLIBCXX11 ABI")
 9 |     list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_C_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0")
10 |     list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0")
11 | elseif(CMAKE_CXX11_ABI)
12 |     message(STATUS "GTEST: Enabling the GLIBCXX11 ABI")
13 |     list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_C_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=1")
14 |     list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=1")
15 | endif(NOT CMAKE_CXX11_ABI)
16 | 
17 | configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake"
18 |                "${GTEST_ROOT}/CMakeLists.txt")
19 | 
20 | file(MAKE_DIRECTORY "${GTEST_ROOT}/build")
21 | file(MAKE_DIRECTORY "${GTEST_ROOT}/install")
22 | 
23 | execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} .
24 |                 RESULT_VARIABLE GTEST_CONFIG
25 |                 WORKING_DIRECTORY ${GTEST_ROOT})
26 | 
27 | if(GTEST_CONFIG)
28 |     message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG})
29 | endif(GTEST_CONFIG)
30 | 
31 | set(PARALLEL_BUILD -j)
32 | if($ENV{PARALLEL_LEVEL})
33 |     set(NUM_JOBS $ENV{PARALLEL_LEVEL})
34 |     set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}")
35 | endif($ENV{PARALLEL_LEVEL})
36 | 
37 | if(${NUM_JOBS})
38 |     if(${NUM_JOBS} EQUAL 1)
39 |         message(STATUS "GTEST BUILD: Enabling Sequential CMake build")
40 |     elseif(${NUM_JOBS} GREATER 1)
41 |         message(STATUS "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs")
42 |     endif(${NUM_JOBS} EQUAL 1)
43 | else()
44 |     message(STATUS "GTEST BUILD: Enabling Parallel CMake build with all threads")
45 | endif(${NUM_JOBS})
46 | 
47 | execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD}
48 |                 RESULT_VARIABLE GTEST_BUILD
49 |                 WORKING_DIRECTORY ${GTEST_ROOT}/build)
50 | 
51 | if(GTEST_BUILD)
52 |     message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD})
53 | endif(GTEST_BUILD)
54 | 
55 | message(STATUS "GoogleTest installed here: " ${GTEST_ROOT}/install)
56 | set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include")
57 | set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib")
58 | set(GTEST_FOUND TRUE)
59 | 


--------------------------------------------------------------------------------
/cpp/include/size2_selector.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | //#include <common_selector.hxx>
20 | #include <nvgraph_vector.hxx>
21 | #include <valued_csr_graph.hxx>
22 | 
23 | namespace nvgraph {
24 | 
25 | typedef enum
26 | {
27 |    USER_PROVIDED = 0, // using edge values as is
28 |    SCALED_BY_ROW_SUM   = 1,  // 0.5*(A_ij+A_ji)/max(d(i),d (j)), where d(i) is the sum of the row i
29 |    SCALED_BY_DIAGONAL   = 2,  // 0.5*(A_ij+A_ji)/max(diag(i),diag(j)) 
30 | }Matching_t;
31 | 
32 | template <typename IndexType_, typename ValueType_>
33 | class Size2Selector
34 | {
35 | 
36 |   public:
37 |     typedef IndexType_ IndexType;
38 |     typedef ValueType_ ValueType;
39 | 
40 |     Size2Selector();
41 | 
42 |     Size2Selector(Matching_t similarity_metric,  int deterministic = 1, int max_iterations = 15 , ValueType numUnassigned_tol = 0.05 ,bool two_phase = false, bool merge_singletons = true, cudaStream_t stream = 0) 
43 |        :m_similarity_metric(similarity_metric), m_deterministic(deterministic), m_max_iterations(max_iterations), m_numUnassigned_tol(numUnassigned_tol), m_two_phase(two_phase), m_merge_singletons(merge_singletons), m_stream(stream)
44 |     {
45 |         m_aggregation_edge_weight_component = 0;
46 |         m_weight_formula = 0;
47 |     }
48 | 
49 |     NVGRAPH_ERROR setAggregates(const ValuedCsrGraph<IndexType, ValueType> &A, Vector<IndexType> &aggregates, int &num_aggregates);
50 | 
51 |   protected:
52 |     NVGRAPH_ERROR setAggregates_common_sqblocks(const ValuedCsrGraph<IndexType, ValueType> &A, Vector<IndexType> &aggregates, int &num_aggregates);
53 |     Matching_t m_similarity_metric;
54 |     int m_deterministic;
55 |     int m_max_iterations;
56 |     ValueType m_numUnassigned_tol;
57 |     bool m_two_phase;
58 |     bool m_merge_singletons;
59 |     cudaStream_t m_stream;    
60 |     int m_aggregation_edge_weight_component;
61 |     int m_weight_formula;
62 | };
63 | 
64 | }//nvgraph
65 | 


--------------------------------------------------------------------------------
/cpp/tests/benchmarkScripts/run_nvgraph.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ****************** Edit this *************************
 4 | #Path to nvgraph bin graphs
 5 | # From p4matrices:2024 sync //matrices/p4matrices/graphs/...
 6 | nvg_data_prefix="/home/afender/src/matrices/p4matrices/graphs"
 7 | 
 8 | #Path to nvgraph 
 9 | # nvg_bin_prefix should contain a release build of nvgraph's ToT (from p4sw //sw/gpgpu/nvgraph/...)
10 | # and nvgraph_benchmark executable which is build along with nvgraph's tests
11 | nvg_bin_prefix="/home/afender/src/sw/sw/gpgpu/bin/x86_64_Linux_release"
12 | # *****************************************************
13 | 
14 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$nvg_bin_prefix
15 | export PATH=$PATH:$nvg_bin_prefix
16 | 
17 | declare -a arr=(
18 |  "$nvg_data_prefix/webbase1M/webbase-1M_T.mtx.bin"
19 |  "$nvg_data_prefix/liveJournal/ljournal-2008_T.mtx.bin" 
20 |  "$nvg_data_prefix/webGoogle/web-Google_T.mtx.bin"
21 |  "$nvg_data_prefix/citPatents/cit-Patents_T.mtx.bin"
22 |  "$nvg_data_prefix/webBerkStan/web-BerkStan_T.mtx.bin"
23 |  "$nvg_data_prefix/WikiTalk/wiki-Talk_T.mtx.bin"
24 |  "$nvg_data_prefix/soc-liveJournal/soc-LiveJournal1_T.mtx.bin"
25 |  # Warning  : Twitter case works only on GPU with more than 12 GB of memory
26 |  "$nvg_data_prefix/Twitter/twitter.bin"
27 |  #Just for debug
28 |  #"$nvg_data_prefix/small/small.bin"
29 | )
30 | 
31 | 
32 | ## now loop through the above array
33 | for i in "${arr[@]}"
34 | do
35 |    echo "Pagerank"
36 |    echo "$i" 
37 |    echo "single precision"
38 |    $nvg_bin_prefix/nvgraph_benchmark --pagerank "$i" 0.85 500 1E-6 --float --repeats 10
39 |    echo 
40 |    #echo "Pagerank"
41 |    #echo "$i" 
42 |    #echo "double precision"
43 |    #$nvg_bin_prefix/nvgraph_benchmark --pagerank "$i" 0.85 500 1E-6 --double --repeats 10
44 |    #echo 
45 | done
46 | echo 
47 | for i in "${arr[@]}"
48 | do
49 |    echo "SSSP"
50 |    echo "$i" 
51 |    echo "single precision"
52 |    $nvg_bin_prefix/nvgraph_benchmark --sssp "$i" 0 --float --repeats 10
53 |    echo 
54 |    #echo "SSSP"
55 |    #echo "$i" 
56 |    #echo "double precision"
57 |    #$nvg_bin_prefix/nvgraph_benchmark --sssp "$i" 0 --double --repeats 10
58 |    #echo 
59 | done
60 | echo 
61 | for i in "${arr[@]}"
62 | do
63 |    echo "Widest Path"
64 |    echo "$i" 
65 |    echo "single precision"
66 |    $nvg_bin_prefix/nvgraph_benchmark --widest "$i" 0 --float --repeats 10
67 |    echo 
68 |    #echo "Widest Path"
69 |    #echo "$i" 
70 |    #echo "double precision"
71 |    #$nvg_bin_prefix/nvgraph_benchmark --widest "$i" 0 --double --repeats 10
72 |    #echo 
73 | done
74 | echo 
75 | 


--------------------------------------------------------------------------------
/cpp/src/graph_extractor.cu:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <graph_concrete_visitors.hxx>
18 | 
19 | 
20 | 
21 | namespace nvgraph
22 | {
23 |   //------------------------- SubGraph Extraction: ----------------------
24 |   //
25 |   CsrGraph<int>* extract_subgraph_by_vertices(CsrGraph<int>& graph,
26 | 											  int* pV, size_t n, cudaStream_t stream)
27 |   {
28 | 	return extract_from_vertex_subset<int, double>(graph, pV, n, stream);
29 |   }
30 | 
31 |   MultiValuedCsrGraph<int, float>* extract_subgraph_by_vertices(MultiValuedCsrGraph<int, float>& graph, 
32 | 																int* pV, size_t n, cudaStream_t stream)
33 |   {
34 | 	return static_cast<nvgraph::MultiValuedCsrGraph<int, float>*>(extract_from_vertex_subset<int, float>(graph, pV, n, stream));
35 |   }
36 | 
37 |   MultiValuedCsrGraph<int, double>* extract_subgraph_by_vertices(MultiValuedCsrGraph<int, double>& graph, 
38 | 																int* pV, size_t n, cudaStream_t stream)
39 |   {
40 | 	return static_cast<nvgraph::MultiValuedCsrGraph<int, double>*>(extract_from_vertex_subset<int, double>(graph, pV, n, stream));
41 |   }
42 | 
43 |   CsrGraph<int>* extract_subgraph_by_edges(CsrGraph<int>& graph,
44 | 										   int* pV, size_t n, cudaStream_t stream)
45 |   {
46 | 	return extract_from_edge_subset<int, double>(graph, pV, n, stream);
47 |   }
48 | 
49 |   MultiValuedCsrGraph<int, float>* extract_subgraph_by_edges(MultiValuedCsrGraph<int, float>& graph,
50 | 															 int* pV, size_t n, cudaStream_t stream)
51 |   {
52 | 	return static_cast<nvgraph::MultiValuedCsrGraph<int, float>*>(extract_from_edge_subset<int, float>(graph, pV, n, stream));
53 |   }
54 | 
55 |   MultiValuedCsrGraph<int, double>* extract_subgraph_by_edges(MultiValuedCsrGraph<int, double>& graph,
56 | 															 int* pV, size_t n, cudaStream_t stream)
57 |   {
58 | 	return static_cast<nvgraph::MultiValuedCsrGraph<int, double>*>(extract_from_edge_subset<int, double>(graph, pV, n, stream));
59 |   }
60 | 
61 | 
62 |   
63 | 
64 | 	
65 |   
66 | }// end namespace nvgraph
67 | 
68 | 


--------------------------------------------------------------------------------
/test/generators/convertors/edges_to_H.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <math.h>
  4 | #include <vector>
  5 | 
  6 | struct edge {
  7 |   unsigned long int r;
  8 |   unsigned long int c;
  9 | };
 10 | 
 11 | void printUsageAndExit()
 12 | {
 13 |   printf("%s", "Fatal Error\n");
 14 |   printf("%s", "Usage: ./H edges.dat\n");
 15 |   printf("%s", "Input : Graph given as a sorted set of edges\n");
 16 |   printf("%s", "Output : Row sub-stochastic matrix in MatrixMarket format\n");
 17 |   exit(0);
 18 | }
 19 | 
 20 | int main (int argc, char *argv[])
 21 | {
 22 | 	// Check args
 23 |   	if (argc != 2) printUsageAndExit();
 24 |   	
 25 |   	// Vars
 26 |   	unsigned long int n, nz, i = 0, current_r, nbr = 1;
 27 |   	int ok;
 28 | 	double scal;
 29 | 	char outp[128], cc;
 30 | 	FILE *fpin = NULL, *fpout = NULL;
 31 | 	edge e;
 32 |   	std::vector<struct edge> row;
 33 |   	// Get I/O names
 34 |   	// The output is filename.mtx
 35 |   	  while (argv[1][i] != '\0')
 36 |     {outp[i] = argv[1][i];i++;}
 37 | 	  outp[i] = '_'; i++;
 38 | 	  outp[i] = 'm';i++;outp[i] = 't';i++;outp[i] = 'x';i++;
 39 | 	  outp[i]='\0';
 40 |   	
 41 |   	// Open files
 42 | 	fpin = fopen(argv[1],"r");
 43 | 	fpout = fopen(outp,"w");
 44 |   	if (!fpin || !fpout)
 45 |   	{
 46 |   		printf("%s", "Fatal Error : I/O fail\n");
 47 |   		exit(0);
 48 |   	}
 49 |   	
 50 |   	// Get n and nz
 51 |   	fscanf(fpin,"%lu",&n);
 52 |   	fscanf(fpin,"%lu",&n);
 53 |   	fscanf(fpin,"%lu",&nz);
 54 | 
 55 | 	fprintf(fpout, "%s", "%%" );
 56 | 	fprintf(fpout,"MatrixMarket matrix coordinate real general\n");
 57 | 	fprintf(fpout,"%lu %lu %lu\n",n, n, nz);
 58 | 	
 59 | 	// Read the first edge
 60 | 	ok = fscanf(fpin,"%lu",&e.r);
 61 | 	if (ok)
 62 | 	{
 63 | 		fscanf(fpin,"%lu",&e.c);
 64 | 		current_r = e.r;
 65 | 		row.push_back(e);
 66 | 	}
 67 | 	else
 68 | 	{
 69 | 		printf("%s", "Fatal Error : Wrong data format\n");
 70 |   		exit(0);
 71 | 	}
 72 | 	
 73 | 	//Loop
 74 | 	for (i=0; i<nz-1; i++)
 75 | 	{	
 76 | 		fscanf(fpin,"%lu",&e.r);
 77 | 		fscanf(fpin,"%lu",&e.c);
 78 | 		if (current_r == e.r)
 79 | 		{
 80 | 			nbr++;
 81 | 		}
 82 | 		else
 83 | 		{
 84 | 			current_r = e.r;
 85 | 			scal = 1.0/nbr;
 86 | 			for (std::vector<struct edge>::iterator it = row.begin() ; it != row.end(); ++it)
 87 | 				fprintf(fpout,"%lu %lu %.9lf\n",it->r, it->c, scal);
 88 | 			row.clear();
 89 | 			nbr = 1;
 90 | 		}
 91 | 		row.push_back(e);
 92 | 	}
 93 | 	// Last print
 94 | 	scal = 1.0/nbr;
 95 | 	for (std::vector<struct edge>::iterator it = row.begin() ; it != row.end(); ++it)
 96 | 		fprintf(fpout,"%lu %lu %.9f\n",it->r, it->c, scal);
 97 | 
 98 | 	return 0;
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/cpp/include/cnmem_shared_ptr.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | 
19 | #include <cnmem.h>
20 | #include <cstring>
21 | 
22 | 
23 | // 
24 | 
25 | #if __cplusplus > 199711L
26 | #include <memory>
27 | #define SHARED_PREFIX std
28 | 
29 | #else
30 | #include <boost/shared_ptr.hpp>
31 | #define SHARED_PREFIX boost
32 | 
33 | #endif
34 | 
35 | #include <iostream>
36 | #include "nvgraph_error.hxx"
37 | 
38 | namespace nvgraph
39 | {
40 | 
41 | template< typename T >
42 | class DeviceDeleter 
43 | {
44 |     cudaStream_t mStream;
45 | public:
46 |     DeviceDeleter(cudaStream_t stream) : mStream(stream) {}
47 |     void operator()(T *ptr) 
48 |     {
49 |         cnmemStatus_t status = cnmemFree(ptr, mStream);
50 |         if( status != CNMEM_STATUS_SUCCESS ) 
51 |         {
52 |             FatalError("Memory manager internal error (free)", NVGRAPH_ERR_UNKNOWN);
53 |         }
54 |     }
55 | };
56 | 
57 | 
58 | template< typename T >
59 | inline SHARED_PREFIX::shared_ptr<T> allocateDevice(size_t n, cudaStream_t stream) 
60 | {
61 |     T *ptr = NULL;
62 |     cnmemStatus_t status = cnmemMalloc((void**) &ptr, n*sizeof(T), stream);
63 |     if( status == CNMEM_STATUS_OUT_OF_MEMORY) 
64 |     {
65 |         FatalError("Not enough memory", NVGRAPH_ERR_NO_MEMORY);
66 |     }
67 |     else if (status != CNMEM_STATUS_SUCCESS)
68 |     {
69 |         FatalError("Memory manager internal error (alloc)", NVGRAPH_ERR_UNKNOWN);        
70 |     }
71 |     return SHARED_PREFIX::shared_ptr<T>(ptr, DeviceDeleter<T>(stream));
72 | }
73 | 
74 | template< typename T >
75 | class DeviceReleaser 
76 | {
77 |     cudaStream_t mStream;
78 | public:
79 |     DeviceReleaser(cudaStream_t stream) : mStream(stream) {}
80 |     void operator()(T *ptr) 
81 |     {
82 | 
83 |     }
84 | };
85 | 
86 | template< typename T >
87 | inline SHARED_PREFIX::shared_ptr<T> attachDevicePtr(T * ptr_in, cudaStream_t stream) 
88 | {
89 |     T *ptr = ptr_in;
90 |     return SHARED_PREFIX::shared_ptr<T>(ptr, DeviceReleaser<T>(stream));
91 | }
92 | 
93 | 
94 | } // end namespace nvgraph
95 | 
96 | 


--------------------------------------------------------------------------------
/cpp/include/triangles_counting_defines.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cuda_runtime.h>
 20 | #include <limits.h>
 21 | 
 22 | #ifdef _MSC_VER
 23 | #include <stdint.h>
 24 | #else
 25 | #include <inttypes.h>
 26 | #endif
 27 | 
 28 | 
 29 | /*
 30 | #ifdef MSVC_VER
 31 | #include <intrin.h> 
 32 | #pragma intrinsic(_BitScanForward) 
 33 | #pragma intrinsic(_BitScanForward64) 
 34 | #pragma intrinsic(_BitScanReverse) 
 35 | #pragma intrinsic(_BitScanReverse64) 
 36 | #endif
 37 | */
 38 | 
 39 | #define MIN(x,y) (((x)<(y))?(x):(y))
 40 | #define MAX(x,y) (((x)>(y))?(x):(y))
 41 | 
 42 | #define THREADS        (128)
 43 | #define DIV_UP(a,b)    (((a)+((b)-1))/(b))
 44 | #define BITSOF(x)    (sizeof(*x)*8)
 45 | 
 46 | #define BLK_BWL0 (128)
 47 | #define WRP_BWL0 (128)
 48 | 
 49 | #define HUGE_GRAPH
 50 | 
 51 | #define DEG_THR1  (3.5) 
 52 | #define DEG_THR2 (38.0) 
 53 | 
 54 | namespace nvgraph
 55 | {
 56 | 
 57 | namespace triangles_counting
 58 | {
 59 | 
 60 | template <typename T> struct type_utils;
 61 | 
 62 | template <>
 63 | struct type_utils<int>
 64 | {
 65 |     typedef int  LOCINT;
 66 |     static const LOCINT LOCINT_MAX = INT_MAX;
 67 | #ifdef MPI_VERSION
 68 |     static const MPI_Datatype LOCINT_MPI = MPI_INT;
 69 | #endif
 70 |     static __inline__ LOCINT abs(const LOCINT& x)
 71 |     {
 72 |         return abs(x);
 73 |     }
 74 | };
 75 | 
 76 | template <>
 77 | struct type_utils<int64_t>
 78 | {
 79 |     typedef uint64_t  LOCINT;
 80 |     static const LOCINT LOCINT_MAX = LLONG_MAX;
 81 | #ifdef MPI_VERSION
 82 |     static const MPI_Datatype LOCINT_MPI = MPI_LONG_LONG;
 83 | #endif
 84 | 
 85 |     static __inline__ LOCINT abs(const LOCINT& x)
 86 |     {
 87 |         return llabs(x);
 88 |     }
 89 | };
 90 | 
 91 | 
 92 | template <typename T>
 93 | struct spmat_t {
 94 |     T    N;
 95 |     T    nnz;
 96 |     T    nrows;
 97 |     const T    *roff_d;
 98 |     const T    *rows_d;
 99 |     const T    *cols_d;
100 |     bool is_lower_triangular;
101 | };
102 | 
103 | } // namespace triangles_counting
104 | 
105 | } // namespace nvgraph
106 | 


--------------------------------------------------------------------------------
/cpp/include/sssp.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | #include <climits> 
19 | namespace nvgraph
20 | {
21 | template <typename IndexType_, typename ValueType_>
22 | class Sssp 
23 | {
24 | public: 
25 |     typedef IndexType_ IndexType;
26 |     typedef ValueType_ ValueType;
27 | 
28 | private:
29 |     ValuedCsrGraph <IndexType, ValueType> m_network ;
30 |     Vector <ValueType> m_sssp;
31 |     Vector <ValueType> m_tmp;
32 |     Vector <int> m_mask; // mask[i] = 0 if we can ignore the i th column in the csrmv
33 | 
34 |     IndexType m_source;
35 |     ValueType m_residual;
36 |     int m_iterations;
37 |     bool m_is_setup;
38 | 
39 |     cudaStream_t m_stream;
40 | 
41 |     bool solve_it();
42 |     void setup(IndexType source_index, Vector<ValueType>& source_connection,  Vector<ValueType>& sssp_result);
43 | 
44 | public:
45 |     // Simple constructor 
46 |     Sssp(void) {};
47 |     // Simple destructor
48 |     ~Sssp(void) {};
49 | 
50 |     // Create a Sssp solver attached to a the transposed of a  weighted network
51 |     // *** network is the transposed/CSC*** 
52 |     Sssp(const ValuedCsrGraph <IndexType, ValueType>& network, cudaStream_t stream = 0):m_network(network),m_is_setup(false), m_stream(stream)  {};
53 |    
54 |     /*! Find the sortest path from  the vertex source_index to every other vertices.
55 |      *
56 |      *  \param source_index The source. 
57 |      *  \param source_connection The connectivity of the source
58 |      *                                                  if there is a link from source_index to i, source_connection[i] = E(source_index, i) 
59 |      *                                                  otherwise  source_connection[i] = inifinity 
60 |      *                                                  source_connection[source_index] = 0
61 |                                                          The source_connection is computed somewhere else.
62 |      *  \param (output) m_sssp  m_sssp[i] contains the sortest path from  the source to the vertex i.
63 |      */
64 |      
65 |     NVGRAPH_ERROR solve(IndexType source_index, Vector<ValueType>& source_connection, Vector<ValueType>& sssp_result);
66 |     inline int get_iterations() const {return m_iterations;}
67 | };
68 | 
69 | } // end namespace nvgraph
70 | 
71 | 


--------------------------------------------------------------------------------
/cpp/include/widest_path.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | namespace nvgraph
19 | {
20 | template <typename IndexType_, typename ValueType_>
21 | class WidestPath 
22 | {
23 | public: 
24 |     typedef IndexType_ IndexType;
25 |     typedef ValueType_ ValueType;
26 | private:
27 |     ValuedCsrGraph <IndexType, ValueType> m_network ;
28 |     Vector <ValueType> m_widest_path;
29 |     Vector <ValueType> m_tmp;
30 |     Vector <int> m_mask; // mask[i] = 0 if we can ignore the i th column in the csrmv
31 |     IndexType m_source;
32 |     ValueType m_residual;
33 |     int m_iterations;
34 |     bool m_is_setup;
35 |     cudaStream_t m_stream;
36 |     bool solve_it();
37 |     void setup(IndexType source_index, Vector<ValueType>& source_connection,  Vector<ValueType>& WidestPath_result);
38 | public:
39 |     // Simple constructor 
40 |     WidestPath(void) {};
41 |     // Simple destructor
42 |     ~WidestPath(void) {};
43 |     // Create a WidestPath solver attached to a the transposed of a  weighted network
44 |     // *** network is the transposed/CSC*** 
45 |     WidestPath(const ValuedCsrGraph <IndexType, ValueType>& network, cudaStream_t stream = 0):m_network(network),m_is_setup(false), m_stream(stream)  {};
46 |    
47 |     /*! Find the Widest Path from  the vertex source_index to every other vertices.
48 |      *
49 |      *  \param source_index The source. 
50 |      *  \param source_connection The connectivity of the source
51 |      *                                                  - if there is a link from source_index to i, source_connection[i] =  E(source_index, i) ) 
52 |      *                                                  - otherwise  source_connection[i] = op.plus->id
53 |      *                                                  - source_connection[source_index] = op.time->id
54 |                                                         The source_connection is provided as input
55 |      *  \param (output) m_widest_path  m_widest_path[i] contains the Widest Path from  the source to the vertex i.
56 |      */
57 |      
58 |     NVGRAPH_ERROR solve(IndexType source_index, Vector<ValueType>& source_connection, Vector<ValueType>& WidestPath_result);
59 |     inline int get_iterations() const {return m_iterations;}
60 | };
61 | } // end namespace nvgraph
62 | 
63 | 


--------------------------------------------------------------------------------
/cpp/include/test/delta_modularity_test.cuh:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | #pragma once
18 | 
19 | #include <string>
20 | #include "test_opt_utils.h"
21 | #include "graph_utils.cuh"
22 | #include "louvain.cuh"
23 | #include "gtest/gtest.h"
24 | #include "high_res_clock.h"
25 | #include "util.cuh"
26 | 
27 | #include <thrust/host_vector.h>
28 | #include <thrust/device_vector.h>
29 | #include <thrust/generate.h>
30 | #include <thrust/reduce.h>
31 | #include <thrust/functional.h>
32 | 
33 | 
34 | template<typename IdxIter, typename ValIter, typename ValType>
35 | __global__ void 
36 | kernel_delta_modularity(const int n_vertex, IdxIter csr_ptr_iter, IdxIter csr_ind_iter, ValIter csr_val_iter, IdxIter cluster, ValType* score){
37 | 
38 |   int c = blockIdx.x * blockDim.x + threadIdx.x;
39 |   int i = blockIdx.y * blockDim.y + threadIdx.y;
40 |   if( i<n_vertex && c < n_vertex ){
41 |     nvlouvain::delta_modularity_block( n_vertex, csr_ptr_iter, csr_ind_iter, csr_val_iter, cluster, i, c, &score[i*n_vertex +c] );
42 |     //printf("i: %d c: %d delta: %f\n", i, c, score[i*n_vertex +c] );
43 |   }
44 | 
45 | }
46 | 
47 | 
48 | void delta_modularity_test(thrust::device_vector<int> &csr_ptr_d,
49 |                      thrust::device_vector<int> &csr_ind_d,
50 |                      thrust::device_vector<T> &csr_val_d,
51 |                      const int size){
52 | 
53 |   HighResClock hr_clock;
54 |   double timed;
55 |   
56 |   dim3 block_size((size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, (size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, 1);
57 |   dim3 grid_size(BLOCK_SIZE_2D, BLOCK_SIZE_2D, 1); 
58 | 
59 |  
60 |   thrust::device_vector<int> cluster_d(size);
61 |   thrust::sequence(cluster_d.begin(), cluster_d.end());  
62 |   std::cout<<"cluster: ";
63 |   nvlouvain::display_vec(cluster_d);
64 | 
65 |   thrust::device_vector<T> score_d(size*size);
66 |   T* score_d_raw_ptr = thrust::raw_pointer_cast(score_d.data());
67 | 
68 | 
69 |   hr_clock.start();
70 | 
71 |   kernel_delta_modularity<<<block_size, grid_size>>>(size, csr_ptr_d.begin(), csr_ind_d.begin(), csr_val_d.begin(), cluster_d.begin(), score_d_raw_ptr);
72 | 
73 |  
74 |   CUDA_CALL(cudaDeviceSynchronize());
75 | 
76 |   hr_clock.stop(&timed);
77 |   double mod_time(timed);
78 |   std::cout<<"delta modularity: "<<score_d[0]<<" runtime: "<<mod_time<<std::endl;
79 | 
80 | 
81 |   
82 | }
83 | 
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/cpp/include/test/mem_test.cuh:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 4 |  *
 5 |  * Licensed under the Apache License, Version 2.0 (the "License");
 6 |  * you may not use this file except in compliance with the License.
 7 |  * You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | #pragma once
18 | 
19 | #include <string>
20 | #include "test_opt_utils.h"
21 | #include "graph_utils.cuh"
22 | #include "louvain.cuh"
23 | #include "gtest/gtest.h"
24 | #include "high_res_clock.h"
25 | #include "util.cuh"
26 | 
27 | #include <thrust/host_vector.h>
28 | #include <thrust/device_vector.h>
29 | #include <thrust/generate.h>
30 | #include <thrust/reduce.h>
31 | #include <thrust/functional.h>
32 | #include <thrust/memory.h>
33 | 
34 | 
35 | 
36 | template<typename IdxType=int, typename ValType=double>
37 | __global__ void
38 | kernel_local_mem(const int n_vertex ){
39 | 
40 |   thrust::device_system_tag device_sys;
41 |   thrust::pointer<ValType,thrust::device_system_tag> temp_i = thrust::malloc<ValType>(device_sys, n_vertex); // for weight on i and for sum_k
42 |   thrust::pointer<IdxType,thrust::device_system_tag> temp_idx = thrust::malloc<IdxType>(device_sys, n_vertex); // for weight on i and for sum_k
43 | 
44 |   
45 | 
46 |   *temp_i = 10.0;
47 |   *(temp_i + n_vertex-1) = 100.5;
48 |   
49 |   thrust::return_temporary_buffer(device_sys, temp_idx);
50 |   thrust::return_temporary_buffer(device_sys, temp_i);
51 | }
52 | 
53 | template<typename IdxType=int, typename ValType=double>
54 | __global__ void
55 | kernel_local_mem_new(const int n_vertex ){
56 | 
57 |   ValType * temp_i = new ValType[n_vertex];
58 |   IdxType * temp_idx = new IdxType[n_vertex];
59 |  
60 | 
61 |   *temp_i = 10.0;
62 |   *(temp_i + n_vertex-1) = 100.5;
63 |   thrust::sequence(thrust::cuda::par, temp_idx, temp_idx + n_vertex);
64 |   printf("%d %d %d ... %d\n",*temp_idx, *(temp_idx+1), *(temp_idx+2), *(temp_idx + n_vertex - 1) );
65 | 
66 |   delete [] temp_i;    
67 |   delete [] temp_idx;
68 | }
69 | 
70 | 
71 | 
72 | 
73 | void mem_allocate_test(const int size){
74 |  
75 |  
76 |   HighResClock hr_clock;
77 |   double timed;
78 | 
79 |  
80 |   dim3 block_size((size + BLOCK_SIZE_1D -1)/ BLOCK_SIZE_1D, 1, 1);
81 |   dim3 grid_size(BLOCK_SIZE_1D, 1, 1);
82 |   hr_clock.start();
83 | 
84 |   kernel_local_mem<<<block_size,grid_size>>>(30000);
85 | 
86 |   kernel_local_mem_new<<<block_size,grid_size>>>(30000);
87 | 
88 | 
89 |   CUDA_CALL(cudaDeviceSynchronize());
90 |   hr_clock.stop(&timed); 
91 |   double raw_ptr_time(timed);  
92 | 
93 |   std::cout<<"allocate_mem_runtime: "<<raw_ptr_time<<std::endl;
94 | 
95 | 
96 |  
97 |    
98 | }
99 | 


--------------------------------------------------------------------------------
/cpp/include/modularity_maximization.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | 
18 | #include "nvgraph_error.hxx"
19 | #include "valued_csr_graph.hxx"
20 | #include "matrix.hxx"
21 | 
22 | 
23 | namespace nvgraph {
24 |   /** Compute partition for a weighted undirected graph. This
25 |    *  partition attempts to minimize the cost function:
26 |    *    Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition)
27 |    *
28 |    *  @param G Weighted graph in CSR format
29 |    *  @param nClusters Number of partitions.
30 |    *  @param nEigVecs Number of eigenvectors to compute.
31 |    *  @param maxIter_lanczos Maximum number of Lanczos iterations.
32 |    *  @param restartIter_lanczos Maximum size of Lanczos system before
33 |    *    implicit restart.
34 |    *  @param tol_lanczos Convergence tolerance for Lanczos method.
35 |    *  @param maxIter_kmeans Maximum number of k-means iterations.
36 |    *  @param tol_kmeans Convergence tolerance for k-means algorithm.
37 |    *  @param parts (Output, device memory, n entries) Cluster
38 |    *    assignments.
39 |    *  @param iters_lanczos On exit, number of Lanczos iterations
40 |    *    performed.
41 |    *  @param iters_kmeans On exit, number of k-means iterations
42 |    *    performed.
43 |    *  @return NVGRAPH error flag.
44 |    */
45 |   template <typename IndexType_, typename ValueType_>
46 |   NVGRAPH_ERROR modularity_maximization( ValuedCsrGraph<IndexType_,ValueType_>& G,
47 |            IndexType_ nClusters,
48 |            IndexType_ nEigVecs,
49 |            IndexType_ maxIter_lanczos,
50 |            IndexType_ restartIter_lanczos,
51 |            ValueType_ tol_lanczos,
52 |            IndexType_ maxIter_kmeans,
53 |            ValueType_ tol_kmeans,
54 |            IndexType_ * __restrict__ clusters,
55 |            Vector<ValueType_> &eigVals,
56 |            Vector<ValueType_> &eigVecs,
57 |            IndexType_ & iters_lanczos,
58 |            IndexType_ & iters_kmeans) ;
59 | 
60 | 
61 |   /// Compute modularity
62 |   /** This function determines the modularity based on a graph and cluster assignments 
63 |    *  @param G Weighted graph in CSR format
64 |    *  @param nClusters Number of clusters.
65 |    *  @param parts (Input, device memory, n entries) Cluster assignments.
66 |    *  @param modularity On exit, modularity
67 |    */
68 |  template <typename IndexType_, typename ValueType_>
69 |   NVGRAPH_ERROR analyzeModularity(ValuedCsrGraph<IndexType_,ValueType_> & G,
70 |             IndexType_ nClusters,
71 |             const IndexType_ * __restrict__ parts,
72 |             ValueType_ & modularity) ;
73 | 
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/cpp/include/bfs2d.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #pragma once
18 | 
19 | #include <climits>
20 | 
21 | //Used in nvgraph.h
22 | #define TRAVERSAL_DEFAULT_ALPHA 15
23 | #define TRAVERSAL_DEFAULT_BETA 18
24 | 
25 | #include "nvgraph_error.hxx"
26 | #include "2d_partitioning.h"
27 | 
28 | namespace nvgraph {
29 | 	template<typename GlobalType, typename LocalType, typename ValueType>
30 | 	class Bfs2d {
31 | 	private:
32 | 		Matrix2d<GlobalType, LocalType, ValueType>* M;
33 | 
34 | 		bool directed;
35 | 		bool deterministic;
36 | 		GlobalType alpha;
37 | 		GlobalType beta;
38 | 
39 | 		// edgemask, distances, predecessors are set/read by users - using Vectors
40 | 		bool useEdgeMask;
41 | 		bool computeDistances;
42 | 		bool computePredecessors;
43 | 		int32_t vertices_bmap_size;
44 | 		VertexData2D<GlobalType, LocalType, LocalType>* distances;
45 | 		VertexData2D<GlobalType, LocalType, GlobalType>* predecessors;
46 | 
47 | 		//Working data
48 | 		VertexData2D<GlobalType, LocalType, int32_t>* frontier_bmap;
49 | 		VertexData2D<GlobalType, LocalType, int32_t>* visited_bmap;
50 | 		VertexData2D_Unbuffered<GlobalType, LocalType, LocalType>* frontier;
51 | 		VertexData2D_Unbuffered<GlobalType, LocalType, LocalType>* trim_frontier;
52 | 		VertexData2D_Unbuffered<GlobalType, LocalType, LocalType>* frontierSize;
53 | 		VertexData2D_Unbuffered<GlobalType, LocalType, int8_t>* degreeFlags;
54 | 		std::vector<LocalType> frontierSize_h;
55 | 		VertexData2D_Unbuffered<GlobalType, LocalType, LocalType>* exSumDegree;
56 | 		VertexData2D_Unbuffered<GlobalType, LocalType, int8_t>* exSumStorage;
57 | 		VertexData2D_Unbuffered<GlobalType, LocalType, LocalType>* bucketOffsets;
58 | 		std::vector<LocalType> frontierDegree_h;
59 | 
60 | 		// Output locations
61 | 		GlobalType* distances_out;
62 | 		GlobalType* predecessors_out;
63 | 
64 | 		NVGRAPH_ERROR setup();
65 | 
66 | 		void clean();
67 | 
68 | 	public:
69 | 		virtual ~Bfs2d(void) {
70 | 			clean();
71 | 		};
72 | 
73 | 		Bfs2d(Matrix2d<GlobalType, LocalType, ValueType>* _M,
74 | 				bool _directed,
75 | 				GlobalType _alpha,
76 | 				GlobalType _beta) :
77 | 						M(_M),
78 | 						directed(_directed),
79 | 						alpha(_alpha),
80 | 						beta(_beta){
81 | 			distances = NULL;
82 | 			predecessors = NULL;
83 | 			frontier_bmap = NULL;
84 | 			visited_bmap = NULL;
85 | 			setup();
86 | 		}
87 | 
88 | 		NVGRAPH_ERROR configure(GlobalType *distances, GlobalType *predecessors);
89 | 
90 | 		NVGRAPH_ERROR traverse(GlobalType source_vertex);
91 | 
92 | 		//Used only for benchmarks
93 | 		NVGRAPH_ERROR traverse(GlobalType *source_vertices, int32_t nsources);
94 | 	};
95 | } // end namespace nvgraph
96 | 
97 | 


--------------------------------------------------------------------------------
/cpp/include/valued_csr_graph.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include "csr_graph.hxx"
 20 | #include "nvgraph_vector.hxx"
 21 | 
 22 | namespace nvgraph
 23 | {
 24 | 
 25 | /*! A ValuedCsrGraph is a graph strored in a CSR data structure.
 26 |     It represents an weighted graph and has storage for row_offsets and column_indices and values
 27 |  */
 28 | template <typename IndexType_, typename ValueType_>
 29 | class ValuedCsrGraph : public nvgraph::CsrGraph<IndexType_>
 30 | {
 31 | public:
 32 |     typedef IndexType_ IndexType;
 33 |     typedef ValueType_ ValueType;
 34 | 
 35 | private:
 36 |     typedef nvgraph::CsrGraph<IndexType> Parent;
 37 | 
 38 | protected:
 39 |     /*! Storage for the nonzero entries of the CSR data structure.
 40 |      */
 41 |     SHARED_PREFIX::shared_ptr<ValueType> values;
 42 | 
 43 | public:  
 44 | 
 45 |     /*! Construct an empty \p ValuedCsrGraph.
 46 |      */
 47 |     ValuedCsrGraph(void) {}
 48 |     /*! Destruct a \p ValuedCsrGraph.
 49 |      */
 50 |     ~ValuedCsrGraph(void) {}
 51 | 
 52 |     /*! Construct a \p ValuedCsrGraph with a specific shape and number of nonzero entries.
 53 |      *
 54 |      *  \param num_rows Number of rows.
 55 |      *  \param num_entries Number of nonzero graph entries.
 56 |      */
 57 |     ValuedCsrGraph(size_t num_rows, size_t num_entries, cudaStream_t stream)
 58 |         : Parent(num_rows, num_entries, stream),
 59 |           values(allocateDevice<ValueType>(num_entries, NULL)) {}
 60 | 
 61 |     /*! Construct a \p ValuedCsrGraph from another graph.
 62 |      *
 63 |      *  \param ValuedCsrGraph Another graph in csr
 64 |      */
 65 |     ValuedCsrGraph(const ValuedCsrGraph& gr): 
 66 |         Parent(gr),
 67 |         values(gr.values)
 68 |     {}
 69 | 
 70 |     /*! Construct a \p ValuedCsrGraph from another graph.  
 71 |      *
 72 |      *  \param ValuedCsrGraph Another graph in csr
 73 |      */
 74 |     ValuedCsrGraph(const Parent& gr, Vector<ValueType>& vals):
 75 |         Parent(gr),  
 76 |         values(vals.raw())      
 77 |     {
 78 | 
 79 |     }
 80 | 
 81 |     inline ValueType* get_raw_values()  const { return values.get(); }
 82 | 
 83 | 
 84 |     /*! Swap the contents of two \p ValuedCsrGraph objects.
 85 |      *
 86 |      *  \param graph Another graph in csr 
 87 |      */
 88 |     void swap(ValuedCsrGraph& graph);
 89 | 
 90 |     /*! Assignment from another graph.
 91 |      *
 92 |      *  \param graph Another graph in csr
 93 |      */
 94 |     ValuedCsrGraph& operator=(const ValuedCsrGraph& graph);
 95 | 
 96 |     //Accept method injection
 97 |     DEFINE_VISITABLE(IndexType_)
 98 | 
 99 | }; // class ValuedCsrGraph
100 | }
101 | 
102 | 


--------------------------------------------------------------------------------
/cpp/tests/nvgraph_test_common.h:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>   /* import labs() */
  2 | #include <math.h>
  3 | 
  4 | #include <iostream>
  5 | #include <string>
  6 | 
  7 | #if defined(_WIN32)
  8 | #if !defined(WIN32_LEAN_AND_MEAN)
  9 | #define WIN32_LEAN_AND_MEAN
 10 | #endif
 11 | #define NOMINMAX
 12 | #include <windows.h>
 13 | static double second (void)
 14 | {
 15 |     LARGE_INTEGER t;
 16 |     static double oofreq;
 17 |     static int checkedForHighResTimer;
 18 |     static BOOL hasHighResTimer;
 19 | 
 20 |     if (!checkedForHighResTimer) {
 21 |         hasHighResTimer = QueryPerformanceFrequency (&t);
 22 |         oofreq = 1.0 / (double)t.QuadPart;
 23 |         checkedForHighResTimer = 1;
 24 |     }
 25 |     if (hasHighResTimer) {
 26 |         QueryPerformanceCounter (&t);
 27 |         return (double)t.QuadPart * oofreq;
 28 |     } else {
 29 |         return (double)GetTickCount() / 1000.0;
 30 |     }
 31 | }
 32 | 
 33 | static long long getSystemMemory() 
 34 | { 
 35 |     MEMORYSTATUSEX state; // Requires >= win2k 
 36 |     memset (&state, 0, sizeof(state)); 
 37 |     state.dwLength = sizeof(state); 
 38 |     if (0 == GlobalMemoryStatusEx(&state)) { 
 39 |         return 0; 
 40 |     } else {
 41 |         return (long long)state.ullTotalPhys; 
 42 |     }
 43 | } 
 44 | #elif defined(__linux) || defined(__powerpc64__)
 45 | #include <stddef.h>
 46 | #include <sys/time.h>
 47 | #include <sys/resource.h>
 48 | #include <sys/sysinfo.h>
 49 | static double second (void)
 50 | {
 51 |     struct timeval tv;
 52 |     gettimeofday(&tv, NULL);
 53 |     return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
 54 | }
 55 | 
 56 | static long long getSystemMemory(void) 
 57 | { 
 58 |     struct sysinfo s_info; 
 59 |     sysinfo (&s_info); 
 60 |     return (long long)s_info.totalram * (long long)s_info.mem_unit; 
 61 | } 
 62 | #elif defined(__APPLE__)
 63 | #include <stddef.h>
 64 | #include <sys/time.h>
 65 | #include <sys/resource.h>
 66 | #include <sys/types.h>
 67 | #include <sys/sysctl.h>
 68 | static double second (void)
 69 | {
 70 |     struct timeval tv;
 71 |     gettimeofday(&tv, NULL);
 72 |     return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
 73 | }
 74 | 
 75 | static long long getSystemMemory(void) 
 76 | { 
 77 |     int memmib[2] = { CTL_HW, HW_MEMSIZE };
 78 |     long long mem = (size_t)0;
 79 |     size_t memsz = sizeof(mem);
 80 | 
 81 |     /* NOTE: This may cap memory reported at 2GB */
 82 |     if (sysctl(memmib, 2, &mem, &memsz, NULL, 0) == -1) {
 83 |         return 0;
 84 |     } else {
 85 |         return mem;
 86 |     }
 87 | }
 88 | #elif defined(__QNX__)  
 89 | #include <stddef.h>
 90 | #include <sys/time.h>
 91 | #include <sys/resource.h>
 92 | static double second (void)
 93 | {
 94 |     struct timeval tv;
 95 |     gettimeofday(&tv, NULL);
 96 |     return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
 97 | }
 98 | 
 99 | static long long getSystemMemory(void) 
100 | { 
101 |     return 0;
102 | } 
103 | #else
104 | #error unsupported platform
105 | #endif
106 | 
107 | std::string getFileName(const std::string& s) {
108 | 
109 |    char sep = '/';
110 | 
111 | #ifdef _WIN32
112 |    sep = '\\';
113 | #endif
114 | 
115 |    size_t i = s.rfind(sep, s.length());
116 |    if (i != std::string::npos) {
117 |       return(s.substr(i+1, s.length() - i));
118 |    }
119 | 
120 |    return("");
121 | }
122 | 


--------------------------------------------------------------------------------
/cpp/include/nvgraph_csrmv.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | #pragma once
17 | #include <algorithm>
18 | #include <stdio.h>
19 | #include "valued_csr_graph.hxx"
20 | #include "nvgraph_vector.hxx"
21 | 
22 | namespace nvgraph{
23 | 
24 | //this header file defines the various semirings using enum
25 |  enum Semiring
26 |  {//the datatype is assumed to be real unless otherwise specified in the name
27 |  	PlusTimes, //standard matrix vector multiplication
28 |  	MinPlus, //breadth first search-also called tropical
29 |  	MaxMin, //mas flow problems
30 |  	OrAndBool,
31 |  	LogPlus
32 |  };	
33 | 
34 | //Merge Path Coord array depends on the integere type
35 | template<typename IndexType_>
36 | struct Coord 
37 | {
38 |     IndexType_ x;
39 |     IndexType_ y;
40 | };
41 | 
42 | //struct which stores the csr matrix format, templated on the index and value
43 |  template <typename IndexType_, typename ValueType_>
44 |  struct CsrMvParams {
45 |  	ValueType_ alpha;
46 |  	ValueType_ beta;
47 |  	ValueType_ *csrVal; //nonzero values from matrix A
48 |  	//row pointer must look at next address to avoid the 0 in merge path
49 |  	IndexType_ *csrRowPtr; //row offsets last entry is number of nonzeros size is m +1
50 |  	IndexType_ *csrColInd; //column indices of nonzeros
51 |  	ValueType_ *x; //vector x in alpha*A*x
52 |  	ValueType_ *y; //output y will be modified and store the output
53 |  	IndexType_ m; //number of rows
54 |  	IndexType_ n; //number of columns
55 | 	IndexType_ nnz; 
56 |  };
57 | 
58 | //create a device function interface to call the above dispatch function
59 | template <typename IndexType_, typename ValueType_>
60 | cudaError_t csrmv_mp(
61 | 	IndexType_ n,
62 | 	IndexType_ m, 
63 | 	IndexType_ nnz,
64 | 	ValueType_ alpha,
65 | 	ValueType_ * dValues, //all must be preallocated on the device
66 | 	IndexType_ * dRowOffsets,
67 | 	IndexType_ * dColIndices,
68 | 	ValueType_ *dVectorX,
69 | 	ValueType_ beta,
70 | 	ValueType_ *dVectorY,
71 | 	Semiring SR,  //this parameter is of type enum and gives the semiring name
72 | 	cudaStream_t stream = 0 );
73 | //overloaded function that has valued_csr_graph parameter to store the matrix
74 | template<typename IndexType_, typename ValueType_>
75 | cudaError_t csrmv_mp(
76 | 	IndexType_ n,
77 | 	IndexType_ m,
78 | 	IndexType_ nnz,
79 | 	ValueType_ alpha,
80 | 	ValuedCsrGraph <IndexType_, ValueType_> network,
81 | 	ValueType_ *dVectorX,
82 | 	ValueType_ beta,
83 | 	ValueType_ *dVectorY,
84 | 	Semiring SR, //this parameter is of type enum and gives the semiring name
85 | 	cudaStream_t stream = 0);	
86 | } //end nvgraph namespace
87 | 
88 | template<typename IndexType_, typename ValueType_>
89 | void callTestCsrmv(IndexType_ num_rows, IndexType_ *dRowOffsets, IndexType_ *dColIndices, ValueType_ *dValues, 
90 |  	ValueType_ *dVectorX, ValueType_ *dVectorY, nvgraph::Semiring SR, ValueType_ alpha, ValueType_ beta);
91 | 
92 | 


--------------------------------------------------------------------------------
/cpp/include/graph.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include <cstdlib>
 20 | #include <cstddef> // size_t
 21 | #include <iostream> 
 22 | 
 23 | #include <graph_visitors.hxx>//
 24 | //
 25 | namespace nvgraph
 26 | {
 27 | 
 28 | #define DEFINE_VISITABLE(T) \
 29 | virtual void Accept(VisitorBase& guest) \
 30 | { BaseVisitableGraph<T>::AcceptImpl(*this, guest); }
 31 | 
 32 | template<typename T>
 33 | struct BaseVisitableGraph
 34 | {
 35 |   virtual void Accept(VisitorBase& v) = 0;
 36 | 
 37 |   virtual ~BaseVisitableGraph(void)
 38 |   {
 39 |   }
 40 | protected:
 41 |   template<typename Host>
 42 |   static void AcceptImpl(Host& visited, VisitorBase& guest)
 43 |   {
 44 | 	if( Visitor<Host>* p = dynamic_cast<Visitor<Host>*>(&guest))
 45 | 	  {
 46 | 		p->Visit(visited);
 47 | 	  }
 48 |   }
 49 | };
 50 | 
 51 | template<typename IndexType_>
 52 | class Graph: public BaseVisitableGraph<IndexType_>
 53 | {
 54 | public:
 55 |     typedef IndexType_ IndexType;
 56 |     
 57 | protected:
 58 |     size_t num_vertices;
 59 |     size_t num_edges;
 60 |     Graph<IndexType> *parent;
 61 |     Graph<IndexType> *child;
 62 | 
 63 | public:
 64 |     /*! Construct an empty \p Graph.
 65 |      */
 66 |     Graph()
 67 |         : num_vertices(0),num_edges(0) {}
 68 | 
 69 |     /*! Construct a \p Graph with a specific number of vertices.
 70 |      *
 71 |      *  \param vertices Number of vertices.
 72 |      */
 73 |     Graph(size_t vertices)
 74 |         : num_vertices(vertices), num_edges(0) {}
 75 | 
 76 |     /*! Construct a \p Graph with a specific number of vertices and edges.
 77 |      *
 78 |      *  \param vertices Number of vertices.
 79 |      *  \param edges Number of edges.
 80 |      */
 81 |     Graph(size_t vertices, size_t edges)
 82 |         : num_vertices(vertices), num_edges(edges) {}
 83 | 
 84 |     /*! Construct a \p CsrGraph from another graph.
 85 |      *
 86 |      *  \param CsrGraph Another graph in csr
 87 |      */
 88 |     Graph(const Graph& gr)
 89 |     {
 90 |         num_vertices = gr.get_num_vertices();
 91 |         num_edges = gr.get_num_edges();
 92 |     }
 93 | 
 94 |     inline void set_num_vertices(IndexType_ p_num_vertices) { num_vertices = p_num_vertices; }
 95 |     inline void set_num_edges(IndexType_ p_num_edges) { num_edges = p_num_edges; }
 96 |     inline size_t get_num_vertices() const { return num_vertices; }
 97 |     inline size_t get_num_edges() const { return num_edges; }
 98 |     /*! Resize graph dimensions
 99 |      *
100 |      *  \param num_rows Number of vertices.
101 |      *  \param num_cols Number of edges.
102 |      */
103 |    //inline void resize(size_t vertices, size_t edges)
104 |    //{
105 |    //    num_vertices = vertices;
106 |    //    num_edges = edges;
107 |    //}
108 | 
109 |     //Accept method injection
110 |     DEFINE_VISITABLE(IndexType_)
111 | };
112 | 
113 | } // end namespace nvgraph
114 | 
115 | 


--------------------------------------------------------------------------------
/cpp/include/pagerank.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  
17 | #pragma once
18 | 
19 | namespace nvgraph
20 | {
21 | template <typename IndexType_, typename ValueType_>
22 | class Pagerank 
23 | {
24 | public: 
25 |     typedef IndexType_ IndexType;
26 |     typedef ValueType_ ValueType;
27 | 
28 | private:
29 | 	ValuedCsrGraph <IndexType, ValueType> m_network ;
30 | 	Vector <ValueType> m_a;
31 | 	Vector <ValueType> m_b;
32 | 	Vector <ValueType> m_pagerank;
33 | 	Vector <ValueType> m_tmp;
34 | 	ValueType m_damping_factor;
35 | 	ValueType m_residual;
36 | 	ValueType m_tolerance;
37 | 	cudaStream_t m_stream;
38 | 	int m_iterations;
39 | 	int m_max_it;
40 | 	bool m_is_setup;
41 | 	bool m_has_guess;
42 | 
43 | 	bool solve_it();
44 | 	//void update_dangling_nodes(Vector<ValueType_>& dangling_nodes);
45 | 	void setup(ValueType damping_factor, Vector<ValueType>& initial_guess, Vector<ValueType>& pagerank_vector);
46 | 
47 | public:
48 | 	// Simple constructor 
49 | 	Pagerank(void) {};
50 | 	// Simple destructor
51 | 	~Pagerank(void) {};
52 | 
53 | 	// Create a Pagerank Solver attached to a the transposed of a transition matrix
54 | 	// *** network is the transposed of a transition matrix***
55 | 	Pagerank(const ValuedCsrGraph <IndexType, ValueType>& network, Vector<ValueType>& dangling_nodes, cudaStream_t stream = 0);
56 | 	
57 | 	// dangling_nodes is a vector of size n where dangling_nodes[i] = 1.0 if vertex i is a dangling node and 0.0 otherwise
58 |     // pagerank_vector is the output
59 |     //void solve(ValueType damping_factor, Vector<ValueType>& dangling_nodes, Vector<ValueType>& pagerank_vector);
60 |    // setup with an initial guess of the pagerank
61 |     NVGRAPH_ERROR solve(ValueType damping_factor, Vector<ValueType>& initial_guess, Vector<ValueType>& pagerank_vector, float tolerance =1.0E-6, int max_it = 500);
62 |     inline ValueType get_residual() const {return m_residual;}
63 |     inline int get_iterations() const {return m_iterations;}
64 | 
65 | 
66 | // init :
67 | // We need the transpose (=converse =reverse) in input (this can be seen as a CSC matrix that we see as CSR)
68 | // b is a constant and uniform vector, b = 1.0/num_vertices
69 | // a is a constant vector that initialy store the dangling nodes then we set : a = alpha*a + (1-alpha)e
70 | // pagerank is 0
71 | // tmp is random ( 1/n is fine)
72 | // alpha is a constant scalar (0.85 usually)
73 | 
74 | //loop :
75 | //  pagerank = csrmv (network, tmp)
76 | //  scal(pagerank, alpha); //pagerank =  alpha*pagerank
77 | //  gamma  = dot(a, tmp); //gamma  = a*tmp
78 | //  pagerank = axpy(b, pagerank, gamma); // pagerank = pagerank+gamma*b
79 | 
80 | // convergence check
81 | //  tmp = axpby(pagerank, tmp, -1, 1);	 // tmp = pagerank - tmp
82 | //  residual_norm = norm(tmp);               
83 | //  if converged (residual_norm)
84 | 	  // l1 = l1_norm(pagerank);
85 | 	  // pagerank = scal(pagerank, 1/l1);
86 |       // return pagerank 
87 | //  swap(tmp, pagerank)
88 | //end loop
89 | };
90 | 
91 | } // end namespace nvgraph
92 | 
93 | 


--------------------------------------------------------------------------------
/test/generators/rmat.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <assert.h> 
 3 | #include <stdlib.h>
 4 | 
 5 | #include <boost/graph/adjacency_list.hpp>
 6 | #include <boost/graph/rmat_graph_generator.hpp>
 7 | #include <boost/random/linear_congruential.hpp>
 8 | #include <boost/graph/graph_traits.hpp>
 9 | 
10 | 
11 | void printUsageAndExit()
12 | {
13 |   printf("%s", "Usage:./rmatg x\n");
14 |   printf("%s", "x is the size of the graph, x>32 (Boost generator hang if x<32)\n");
15 |   exit(0);
16 | }
17 | 
18 | int main(int argc, char *argv[])
19 | {
20 |   
21 |   // RMAT paper http://snap.stanford.edu/class/cs224w-readings/chakrabarti04rmat.pdf
22 |   // Boost doc on RMAT http://www.boost.org/doc/libs/1_49_0/libs/graph_parallel/doc/html/rmat_generator.html
23 |   
24 |   typedef boost::adjacency_list<boost::mapS, boost::vecS, boost::directedS> Graph;
25 |   typedef boost::unique_rmat_iterator<boost::minstd_rand, Graph> RMATGen;
26 | 
27 |   if (argc < 2) printUsageAndExit();
28 |   int size = atoi (argv[1]);
29 |   if (size<32) printUsageAndExit();
30 |   assert (size > 31 && size < INT_MAX);
31 |   const unsigned num_edges = 16 * size;
32 |   /************************
33 |    * RMAT Gen
34 |    ************************/
35 |   std::cout << "generating ... "<<'\n';
36 |   // values of a,b,c,d are from the graph500.
37 |   boost::minstd_rand gen;
38 |   Graph g(RMATGen(gen, size, num_edges, 0.57, 0.19, 0.19, 0.05, true), RMATGen(), size);
39 |   assert (num_edges == boost::num_edges(g));
40 |   
41 |   /************************
42 |    * Print
43 |    ************************/
44 |   boost::graph_traits<Graph>::edge_iterator edge, edge_end;
45 |   std::cout << "vertices : "      << boost::num_vertices(g) <<'\n';
46 |   std::cout << "edges : "         << boost::num_edges(g) <<'\n';
47 |   std::cout << "average degree : "<< static_cast<float>(boost::num_edges(g))/boost::num_vertices(g)<< '\n';
48 |   
49 |   // Print in matrix coordinate real general format
50 |   std::cout << "writing ... "<<'\n';
51 |   std::stringstream tmp;
52 |   tmp <<"local_test_data/rmat_graph_" << size << ".mtx";
53 |   const std::string filename = tmp.str();
54 |   std::ofstream fout(tmp.str().c_str()) ;
55 |   if (argv[2]==NULL)
56 |   {
57 |     // Power law out degree with random weights
58 |     fout << "%%MatrixMarket matrix coordinate real general\n";
59 |     fout << boost::num_vertices(g) <<' '<< boost::num_vertices(g)  <<' '<< boost::num_edges(g) << '\n';
60 |     float val;
61 |     for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
62 |     {
63 |       val = (rand()%10)+(rand()%100)*(1e-2f);
64 |       fout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< ' ' << val << '\n';
65 |     }
66 |   }
67 |   else if (argv[2][0]=='i')
68 |   {
69 |     // Power law in degree (ie the transpose will have a power law)
70 |     // -- Edges only --
71 |     // * Wraning * edges will be unsorted, use sort_edges.cpp to sort the dataset.
72 |     fout << boost::num_vertices(g) <<' '<< boost::num_edges(g) << '\n';
73 |     for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
74 |       fout <<boost::target(*edge, g)<< ' ' << boost::source(*edge, g) << '\n';
75 |   }
76 |   else if (argv[2][0]=='o')
77 |   {
78 |     // Power law out degree
79 |     // -- Edges only --
80 |     fout << boost::num_vertices(g) <<' '<< boost::num_edges(g) << '\n';
81 |     for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
82 |       fout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< '\n';
83 |   }
84 |   else printUsageAndExit();
85 |   fout.close();
86 |   std::cout << "done"<<'\n';
87 |   return 0;
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/external/cub_semiring/block/specializations/block_histogram_atomic.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 3 |  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 4 |  * 
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *     * Redistributions of source code must retain the above copyright
 8 |  *       notice, this list of conditions and the following disclaimer.
 9 |  *     * Redistributions in binary form must reproduce the above copyright
10 |  *       notice, this list of conditions and the following disclaimer in the
11 |  *       documentation and/or other materials provided with the distribution.
12 |  *     * Neither the name of the NVIDIA CORPORATION nor the
13 |  *       names of its contributors may be used to endorse or promote products
14 |  *       derived from this software without specific prior written permission.
15 |  * 
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  ******************************************************************************/
28 | 
29 | /**
30 |  * \file
31 |  * The cub::BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.
32 |  */
33 | 
34 | #pragma once
35 | 
36 | #include "../../util_namespace.cuh"
37 | 
38 | /// Optional outer namespace(s)
39 | CUB_NS_PREFIX
40 | 
41 | /// CUB namespace
42 | namespace cub {
43 | 
44 | 
45 | /**
46 |  * \brief The BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block.
47 |  */
48 | template <int BINS>
49 | struct BlockHistogramAtomic
50 | {
51 |     /// Shared memory storage layout type
52 |     struct TempStorage {};
53 | 
54 | 
55 |     /// Constructor
56 |     __device__ __forceinline__ BlockHistogramAtomic(
57 |         TempStorage &temp_storage)
58 |     {}
59 | 
60 | 
61 |     /// Composite data onto an existing histogram
62 |     template <
63 |         typename            T,
64 |         typename            CounterT,     
65 |         int                 ITEMS_PER_THREAD>
66 |     __device__ __forceinline__ void Composite(
67 |         T                   (&items)[ITEMS_PER_THREAD],     ///< [in] Calling thread's input values to histogram
68 |         CounterT             histogram[BINS])                 ///< [out] Reference to shared/device-accessible memory histogram
69 |     {
70 |         // Update histogram
71 |         #pragma unroll
72 |         for (int i = 0; i < ITEMS_PER_THREAD; ++i)
73 |         {
74 |               atomicAdd(histogram + items[i], 1);
75 |         }
76 |     }
77 | 
78 | };
79 | 
80 | }               // CUB namespace
81 | CUB_NS_POSTFIX  // Optional outer namespace(s)
82 | 
83 | 


--------------------------------------------------------------------------------
/test/ref/cpu_ref_widest.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Generates widest path vector for the single source vertex to all other vertices using dijkstra-like algorithm
  4 | 
  5 | # Usage : python3 nvgraph_cpu_ref.py graph.mtx source_vertex
  6 | # This works with networkx 1.8.1 (default ubuntu package version in 14.04)
  7 | # http://networkx.github.io/documentation/networkx-1.8/
  8 | 
  9 | # Latest version is currenlty 1.11 in feb 2016
 10 | # https://networkx.github.io/documentation/latest/tutorial/index.html
 11 | 
 12 | #import numpy as np
 13 | import sys
 14 | import time
 15 | from scipy.io import mmread
 16 | import numpy as np
 17 | import matplotlib.pyplot as plt
 18 | import networkx as nx
 19 | import os
 20 | import sys
 21 | 
 22 | #modified widest
 23 | def _dijkstra_custom(G, source, get_weight, cutoff=None):
 24 |     G_succ = G.succ if G.is_directed() else G.adj
 25 |     width = {node: -sys.float_info.max for node in range(G.number_of_nodes())}  # dictionary of final distances
 26 |     width[source] = sys.float_info.max
 27 |     #seen = set()
 28 |     Qset = set([(source, 0)])
 29 |     while len(Qset) > 0:
 30 |         u, depth = Qset.pop()
 31 |         if cutoff:
 32 |             if cutoff < depth:
 33 |                 continue
 34 |         #print "Looking at vertex ", u, ", depth = ", depth
 35 |         for v, e in G_succ[u].items():
 36 |             cost = get_weight(u, v, e)
 37 |             #print "Looking at vertex ", u, ", edge to ", v
 38 |             if cost is None:
 39 |                 continue
 40 |             alt = max(width[v], min(width[u], cost))
 41 |             if alt > width[v]:
 42 |                 width[v] = alt
 43 |                 Qset.add((v, depth+1))
 44 |         #print "Updated QSET: ", Qset
 45 |     return width
 46 | 
 47 | def single_source_dijkstra_widest(G, source, cutoff=None,
 48 |                                        weight='weight'):
 49 |     if G.is_multigraph():
 50 |         get_weight = lambda u, v, data: min(
 51 |             eattr.get(weight, 1) for eattr in data.values())
 52 |     else:
 53 |         get_weight = lambda u, v, data: data.get(weight, 1)
 54 | 
 55 |     return _dijkstra_custom(G, source, get_weight, cutoff=cutoff)
 56 | 
 57 | print ('Networkx version : {} '.format(nx.__version__))
 58 | 
 59 | # Command line arguments
 60 | argc = len(sys.argv)
 61 | if argc<=2:
 62 |     print("Error: usage is : python3 nvgraph_cpu_ref.py graph.mtx source_vertex")
 63 |     sys.exit()
 64 | mmFile = sys.argv[1]
 65 | src = int(sys.argv[2])
 66 | print('Reading '+ str(mmFile) + '...')
 67 | #Read
 68 | M = mmread(mmFile).transpose()
 69 | 
 70 | if M is None :
 71 |     raise TypeError('Could not read the input graph')
 72 | 
 73 | # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly
 74 | M = M.asfptype().tolil().tocsr()
 75 | if not M.has_sorted_indices:
 76 |     M.sort_indices()
 77 | 
 78 | # Directed NetworkX graph
 79 | Gnx = nx.DiGraph(M)
 80 | 
 81 | #widest
 82 | print('Solving... ')
 83 | t1 = time.time()
 84 | widest = single_source_dijkstra_widest(Gnx,source=src)
 85 | t2 =  time.time() - t1
 86 | 
 87 | print('Time : '+str(t2))
 88 | print('Writing result ... ')
 89 | 
 90 | # fill missing with DBL_MAX
 91 | bwidest = np.full(M.shape[0], -sys.float_info.max, dtype=np.float64)
 92 | for r in widest.keys():
 93 |     bwidest[r] = widest[r]
 94 | #print bwidest
 95 | # write binary
 96 | out_fname = os.path.splitext(os.path.basename(mmFile))[0] + '_T.widest_' + str(src) + '.bin'
 97 | bwidest.tofile(out_fname, "")
 98 | print ('Result is in the file: ' + out_fname)
 99 | 
100 | # write text
101 | #f = open('/tmp/ref_' + os.path.basename(mmFile) + '_widest.txt', 'w')
102 | #f.write(str(widest.values()))
103 | 
104 | print('Done')
105 | 


--------------------------------------------------------------------------------
/test/generators/plod.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <assert.h> 
 3 | #include <stdlib.h>
 4 | 
 5 | #include <boost/graph/adjacency_list.hpp>
 6 | #include <boost/graph/plod_generator.hpp>
 7 | #include <boost/random/linear_congruential.hpp>
 8 | #include <boost/graph/graph_traits.hpp>
 9 | 
10 | 
11 | void printUsageAndExit()
12 | {
13 |   printf("%s", "Usage:./plodg x\n");
14 |   printf("%s", "x is the size of the graph\n");
15 |   exit(0);
16 | }
17 | 
18 | int main(int argc, char *argv[])
19 | {
20 |   
21 |   /* " The Power Law Out Degree (PLOD) algorithm generates a scale-free graph from three parameters, n, alpha, and beta.
22 |   [...] The value of beta controls the y-intercept of the curve, so that increasing beta increases the average degree of vertices (credit = beta*x^-alpha). 
23 |   [...] The value of alpha controls how steeply the curve drops off, with larger values indicating a steeper curve. */
24 |   // From Boost documentation http://www.boost.org/doc/libs/1_47_0/libs/graph/doc/plod_generator.html
25 |   
26 |   // we use setS aka std::set for edges storage
27 |   // so we have at most one edges between 2 vertices
28 |   // the extra cost is O(log(E/V)).
29 |   typedef boost::adjacency_list<boost::setS> Graph;
30 |   typedef boost::plod_iterator<boost::minstd_rand, Graph> SFGen;
31 | 
32 |   if (argc < 2) printUsageAndExit();
33 |   int size = atoi (argv[1]);
34 |   assert (size > 1 && size < INT_MAX);
35 |   double alpha = 2.57; // It is known that web graphs have alpha ~ 2.72.
36 |   double beta = size*512+1024; // This will give an average degree ~ 15
37 | 
38 |   // generation
39 |   std::cout << "generating ... "<<'\n';
40 |   boost::minstd_rand gen;
41 |   Graph g(SFGen(gen, size, alpha, beta, false), SFGen(), size);
42 |   boost::graph_traits<Graph>::edge_iterator edge, edge_end;
43 |   
44 |   std::cout << "vertices : "      << num_vertices(g) <<'\n';
45 |   std::cout << "edges : "         << num_edges(g) <<'\n';
46 |   std::cout << "average degree : "<< static_cast<float>(num_edges(g))/num_vertices(g)<< '\n';
47 |   // Print in matrix coordinate real general format
48 |   std::cout << "writing ... "<<'\n';
49 |   std::stringstream tmp;
50 |   tmp <<"local_test_data/plod_graph_" << size << ".mtx";
51 |   const std::string filename = tmp.str();
52 |   std::ofstream fout(tmp.str().c_str()) ;
53 |   
54 |   if (argv[2]==NULL)
55 |   {
56 |     // Power law out degree with random weights
57 |     fout << "%%MatrixMarket matrix coordinate real general\n";
58 |     fout << num_vertices(g) <<' '<< num_vertices(g)  <<' '<< num_edges(g) << '\n';
59 |     float val;
60 |     for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
61 |     {
62 |       val = (rand()%10)+(rand()%100)*(1e-2f);
63 |       fout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< ' ' << val << '\n';
64 |     }
65 |   }
66 |   else if (argv[2][0]=='i')
67 |   {
68 |     // Power law in degree (ie the transpose will have a power law)
69 |     // -- Edges only --
70 |     // * Wraning * edges will be unsorted, use sort_edges.cpp to sort the dataset.
71 |     fout << num_vertices(g) <<' '<< num_edges(g) << '\n';
72 |     for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
73 |       fout <<boost::target(*edge, g)<< ' ' << boost::source(*edge, g) << '\n';
74 |   }
75 |   else if (argv[2][0]=='o')
76 |   {
77 |     // Power law out degree
78 |     // -- Edges only --
79 |     fout << num_vertices(g) <<' '<< num_edges(g) << '\n';
80 |     for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
81 |       fout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< '\n';
82 |   }
83 |   else printUsageAndExit();
84 | 
85 |   fout.close();
86 |   std::cout << "done!"<<'\n';
87 |   return 0;
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/test/ref/ref_sssp_BGL.cpp:
--------------------------------------------------------------------------------
 1 | #include <boost/config.hpp>
 2 | #include <iostream>
 3 | #include <fstream> //file output
 4 | #include <cfloat>
 5 | #include <omp.h>
 6 | #include <boost/graph/graph_traits.hpp>
 7 | #include <boost/graph/adjacency_list.hpp>
 8 | #include <boost/graph/dijkstra_shortest_paths.hpp>
 9 | #include <boost/property_map/property_map.hpp>
10 | #include <boost/graph/adjacency_list.hpp>
11 | #include <boost/graph/rmat_graph_generator.hpp>
12 | #include <boost/random/linear_congruential.hpp>
13 | #include <boost/graph/graph_traits.hpp>
14 |  
15 | void printUsageAndExit()
16 | {
17 |   printf("%s", "Usage:./rmatg x y\n");
18 |   printf("%s", "x is the size of the graph, x>32 (Boost generator hang if x<32)\n");
19 |   printf("%s", "y is the source of sssp\n");
20 |   exit(0);
21 | }
22 | 
23 | int main(int argc, char *argv[])
24 | {
25 |   // read size
26 |   if (argc < 3) printUsageAndExit();
27 |   int size = atoi (argv[1]);
28 |   if (size<32) printUsageAndExit();
29 |   int source_sssp =atoi (argv[2]);
30 |   assert (size > 1 && size < INT_MAX);
31 |   assert (source_sssp >= 0 && source_sssp < size);
32 |   const unsigned num_edges = 15 * size;
33 |   
34 |   // Some boost types
35 |   typedef boost::no_property VertexProperty;
36 |   typedef boost::property<boost::edge_weight_t, float> EdgeProperty;
37 |   typedef boost::adjacency_list<boost::mapS, boost::vecS, boost::directedS, VertexProperty, EdgeProperty> Graph;
38 |   typedef boost::unique_rmat_iterator<boost::minstd_rand, Graph> RMATGen;
39 |   typedef boost::graph_traits<Graph>::vertex_descriptor vertex_descriptor;
40 |   boost::minstd_rand gen;
41 |   boost::graph_traits<Graph>::edge_iterator edge, edge_end;
42 | 
43 |   /************************
44 |    * Random weights
45 |    ************************/
46 |   // !!! WARNING !!!
47 |   // watch the stack
48 |   float* weight = new float[num_edges]; 
49 |   int count = 0;
50 |   for( int i = 0; i < num_edges;  ++i)
51 |     weight[i] = (rand()%10)+(rand()%100)*(1.2e-2f);
52 | 
53 |   /************************
54 |    * RMAT Gen
55 |    ************************/
56 |   Graph g(RMATGen(gen, size, num_edges, 0.57, 0.19, 0.19, 0.05,true),RMATGen(),weight, size);
57 |   std::cout << "Generator : done. Edges = "<<boost::num_edges(g)<<std::endl; 
58 |   assert (num_edges == boost::num_edges(g));
59 |   // debug print after gen
60 |   //for( boost::tie(edge, edge_end) = boost::edges(g); edge != edge_end; ++edge)
61 |   //  std::cout << boost::source(*edge, g) << ' ' << boost::target(*edge, g)<< ' '<<  boost::get(boost::get(boost::edge_weight, g),*edge) << '\n';
62 |   
63 |   /************************
64 |    * Dijkstra
65 |    ************************/
66 |   std::vector<vertex_descriptor> p(num_vertices(g));
67 |   std::vector<float> d(num_vertices(g));
68 |   vertex_descriptor s = vertex(source_sssp, g); //define soruce node
69 |   
70 |   double start = omp_get_wtime();
71 |   dijkstra_shortest_paths(g, s,
72 |                           predecessor_map(boost::make_iterator_property_map(p.begin(), get(boost::vertex_index, g))).
73 |                           distance_map(boost::make_iterator_property_map(d.begin(), get(boost::vertex_index, g))));
74 | 
75 |   double stop = omp_get_wtime();
76 |   std::cout << "Time = " << stop-start << "s"<< std::endl;
77 | 
78 |   /************************
79 |    * Print
80 |    ************************/
81 |   /*
82 |   boost::graph_traits<Graph>::vertex_iterator vi, vend;
83 |   std::cout << "SOURCE = "<< source_sssp << std::endl; 
84 |   for (boost::tie(vi, vend) = vertices(g); vi != vend; ++vi) 
85 |   {
86 |     if (d[*vi] != FLT_MAX) 
87 |     {
88 |       std::cout << "d(" << *vi << ") = " << d[*vi] << ", ";
89 |       std::cout << "parent = " << p[*vi] << std::endl; 
90 |     }
91 |     else
92 |       std::cout << "d(" << *vi << ") = INF"<< std::endl;
93 |   }
94 |   */
95 |   return 0;
96 |                 
97 | }
98 | 
99 | 


--------------------------------------------------------------------------------
/cpp/include/bfs.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | 
 18 | 
 19 | #pragma once
 20 | 
 21 | 
 22 | 
 23 | #include <climits> 
 24 | 
 25 | 
 26 | 
 27 | //Used in nvgraph.h
 28 | 
 29 | #define TRAVERSAL_DEFAULT_ALPHA 15
 30 | 
 31 | #define TRAVERSAL_DEFAULT_BETA 18
 32 | 
 33 | 
 34 | 
 35 | #include "nvgraph_error.hxx"
 36 | 
 37 | 
 38 | 
 39 | namespace nvgraph
 40 | 
 41 | {
 42 | 
 43 | 	template <typename IndexType>
 44 | 
 45 | 		class Bfs 
 46 | 
 47 | 		{
 48 | 
 49 | 			private:
 50 | 
 51 | 				IndexType n, nnz;
 52 | 
 53 | 				IndexType* row_offsets;
 54 | 
 55 | 				IndexType* col_indices;
 56 | 
 57 | 			
 58 | 
 59 | 				bool directed;
 60 | 				bool deterministic;
 61 | 
 62 | 
 63 | 				// edgemask, distances, predecessors are set/read by users - using Vectors
 64 | 
 65 | 				bool useEdgeMask;
 66 | 
 67 | 				bool computeDistances;
 68 | 
 69 | 				bool computePredecessors; 
 70 | 
 71 | 				
 72 | 
 73 | 				IndexType *distances;
 74 | 
 75 | 				IndexType *predecessors;
 76 | 
 77 | 				int *edge_mask;
 78 | 
 79 | 				
 80 | 
 81 | 				//Working data
 82 | 
 83 | 				//For complete description of each, go to bfs.cu
 84 | 
 85 | 
 86 | 
 87 | 				IndexType nisolated;	
 88 | 
 89 | 				IndexType *frontier, *new_frontier;	
 90 | 
 91 | 				IndexType * original_frontier;
 92 | 
 93 | 				IndexType vertices_bmap_size;
 94 | 
 95 | 				int *visited_bmap, *isolated_bmap;
 96 | 
 97 | 				IndexType *vertex_degree;
 98 | 
 99 | 				IndexType *buffer_np1_1, *buffer_np1_2;
100 | 
101 | 				IndexType *frontier_vertex_degree;
102 | 
103 | 				IndexType *exclusive_sum_frontier_vertex_degree;
104 | 
105 | 				IndexType *unvisited_queue;
106 | 
107 | 				IndexType *left_unvisited_queue; 
108 | 
109 | 				IndexType *exclusive_sum_frontier_vertex_buckets_offsets;
110 | 
111 | 
112 | 
113 | 				IndexType *d_counters_pad;
114 | 
115 | 				IndexType *d_new_frontier_cnt;
116 | 
117 | 				IndexType *d_mu;
118 | 
119 | 				IndexType *d_unvisited_cnt;
120 | 
121 | 				IndexType *d_left_unvisited_cnt;	
122 | 
123 | 			
124 | 
125 | 				void *d_cub_exclusive_sum_storage;
126 | 
127 | 				size_t cub_exclusive_sum_storage_bytes;
128 | 
129 | 	
130 | 
131 | 				//Parameters for direction optimizing
132 | 
133 | 				IndexType alpha, beta; 
134 | 
135 | 			
136 | 
137 | 				cudaStream_t stream;
138 | 
139 | 				//resets pointers defined by d_counters_pad (see implem)
140 | 
141 | 				void resetDevicePointers();
142 | 
143 | 				NVGRAPH_ERROR setup();
144 | 
145 | 				void clean();
146 | 
147 | 			public:
148 | 
149 | 				virtual ~Bfs(void) {
150 | 
151 | 					clean();
152 | 
153 | 				};
154 | 
155 | 
156 | 
157 | 				Bfs(IndexType _n, IndexType _nnz, IndexType *_row_offsets, IndexType *_col_indices, bool _directed, IndexType _alpha, IndexType _beta, cudaStream_t _stream = 0) : n(_n), nnz(_nnz), row_offsets(_row_offsets), col_indices(_col_indices), directed(_directed), alpha(_alpha), beta(_beta), stream(_stream) {
158 | 
159 | 					setup();
160 | 
161 | 				}
162 | 
163 | 
164 | 
165 | 				NVGRAPH_ERROR configure(IndexType *distances, IndexType *predecessors, int *edge_mask);
166 | 
167 | 				NVGRAPH_ERROR traverse(IndexType source_vertex);
168 | 
169 | 				//Used only for benchmarks
170 | 
171 | 				NVGRAPH_ERROR traverse(IndexType *source_vertices, IndexType nsources);
172 | 
173 | 		};
174 | 
175 | 
176 | 
177 | } // end namespace nvgraph
178 | 
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/test/generators/convertors/H_to_HtSorted_and_a.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <iostream>
  3 | #include <stdlib.h>
  4 | #include <algorithm>    // std::sort
  5 | #include <vector>       // std::vector
  6 | // This code transpose a matrix H and compute the flag vector of empty rows a.
  7 | // We assume that H is row-substochastic, in MatrixMarket format and data are sorted by row id
  8 | // The output is filename_T.filetype, H is printed first then a is printed.
  9 | 
 10 | struct elt {
 11 |   long int r;
 12 |   long int c;
 13 |   double v;
 14 | };
 15 | 
 16 | void printUsageAndExit()
 17 | {
 18 |   printf("%s", "Fatal Error\n");
 19 |   printf("%s", "Usage: ./HTA H.mtx\n");
 20 |   printf("%s", "NOTE1: H is the row-substochastic matrix of a graph\n");
 21 |   printf("%s", "NOTE2: H is in MatrixMarket coordinate real general format\n");
 22 |   printf("%s", "NOTE3: Data are sorted by row id\n");
 23 |   printf("%s", "Output : H^t and the bookmark vector of empty rows\n");
 24 |   printf("%s", "***This output fits the input of AMGX PageRank***\n");
 25 |   exit(0);
 26 | }
 27 | 
 28 | inline bool operator< (const elt& a, const elt& b)
 29 | { // ordered by row and then by colum inside a row
 30 |   return a.r<b.r || (a.r==b.r && a.c<b.c ) ;  
 31 | }
 32 | 
 33 | int main (int argc, char *argv[])
 34 | {
 35 |   // Check args
 36 |   if (argc == 1) printUsageAndExit();
 37 |   
 38 |   // Vars
 39 |   long int n, nz, start, i = 0 ,j, k, lastr;
 40 |   double v;
 41 |   char outp[128], cc;
 42 |   FILE *fpin = NULL, *fpout = NULL;
 43 |   elt e;
 44 |   std::vector<struct elt> A;
 45 |   std::vector<unsigned int> a;
 46 |   // Get I/O names
 47 |   // The output is filename_T
 48 |   while (argv[1][i] != '\0')
 49 |   {outp[i] = argv[1][i];i++;}
 50 |   outp[i] = '_'; i++;
 51 |   outp[i] = 'T';i++;
 52 |   outp[i]='\0';
 53 |   // Open files
 54 |   fpin = fopen(argv[1],"r");
 55 |   fpout = fopen(outp,"w");
 56 |   if (!fpin || !fpout)
 57 |   {
 58 |     printf("%s", "Fatal Error : I/O fail\n");
 59 |     exit(0);
 60 |   }
 61 |   
 62 |   // Skip lines starting with "%%""
 63 |   do
 64 |   {
 65 |     cc = fgetc(fpin); 
 66 |     if (cc == '%') fgets(outp,128,fpin);
 67 |   }
 68 |   while (cc == '%');
 69 |   fseek( fpin, -1, SEEK_CUR );
 70 | 
 71 |   // Get n and nz
 72 |   fscanf(fpin,"%ld",&n);
 73 |   fscanf(fpin,"%ld",&n);
 74 |   fscanf(fpin,"%ld",&nz);
 75 | 
 76 |   // Print format and size
 77 |   fprintf(fpout, "%s", "%%");
 78 |   fprintf(fpout,"MatrixMarket matrix coordinate real general\n");
 79 |   fprintf(fpout, "%s", "%%");
 80 |   fprintf(fpout,"AMGX rhs\n");
 81 |   fprintf(fpout,"%ld %ld %ld\n",n, n, nz);
 82 | 
 83 |   // Empty rows at the begining
 84 |   fscanf(fpin,"%ld",&e.c);
 85 |   fscanf(fpin,"%ld",&e.r);
 86 |   fscanf(fpin,"%lf",&e.v);
 87 |   A.push_back(e);
 88 | 
 89 |   for (j=0; j<static_cast<int>(e.c)-1; j++)
 90 |   {
 91 |     std::cout<<e.c<<' '<<e.r<<' '<<e.v<<'\n';
 92 |     a.push_back(1);
 93 |   }
 94 | 
 95 |     // Loop
 96 |   for (i=0; i< nz-1;i++)
 97 |   {
 98 |     lastr = e.c;
 99 |     fscanf(fpin,"%ld",&e.c);
100 |     fscanf(fpin,"%ld",&e.r);
101 |     fscanf(fpin,"%lf",&e.v);
102 |     A.push_back(e);
103 | 
104 |     if (e.c > lastr)
105 |     {
106 |       if (e.c > lastr+1)
107 |       {
108 |         a.push_back(0); 
109 |         //Successive empty rows 
110 |         for (k=0; k<static_cast<int>(e.c)-lastr-1; k++)
111 |           a.push_back(1);
112 |       }
113 |       else
114 |         a.push_back(0);
115 |     }
116 |   }
117 |   a.push_back(0);
118 | 
119 |   // Empty rows at the end
120 |   for (k=a.size(); k<n; k++)
121 |   {
122 |     a.push_back(1);
123 |   }
124 | 
125 |   std::sort (A.begin(), A.end());
126 |   for (std::vector<struct elt>::iterator it = A.begin() ; it != A.end(); ++it)
127 |     fprintf(fpout,"%ld %ld %.9f\n",it->r, it->c, it->v);
128 | 
129 |   for (std::vector<unsigned int>::iterator it = a.begin() ; it != a.end(); ++it)
130 |     fprintf(fpout,"%u\n",*it);
131 | 
132 |   return 0;
133 | 
134 | }
135 | 
136 | 


--------------------------------------------------------------------------------
/external/cub_semiring/cub.cuh:
--------------------------------------------------------------------------------
 1 | /******************************************************************************
 2 |  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
 3 |  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
 4 |  * 
 5 |  * Redistribution and use in source and binary forms, with or without
 6 |  * modification, are permitted provided that the following conditions are met:
 7 |  *     * Redistributions of source code must retain the above copyright
 8 |  *       notice, this list of conditions and the following disclaimer.
 9 |  *     * Redistributions in binary form must reproduce the above copyright
10 |  *       notice, this list of conditions and the following disclaimer in the
11 |  *       documentation and/or other materials provided with the distribution.
12 |  *     * Neither the name of the NVIDIA CORPORATION nor the
13 |  *       names of its contributors may be used to endorse or promote products
14 |  *       derived from this software without specific prior written permission.
15 |  * 
16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |  *
27 |  ******************************************************************************/
28 | 
29 | /**
30 |  * \file
31 |  * CUB umbrella include file
32 |  */
33 | 
34 | #pragma once
35 | 
36 | 
37 | // Block
38 | #include "block/block_histogram.cuh"
39 | #include "block/block_discontinuity.cuh"
40 | #include "block/block_exchange.cuh"
41 | #include "block/block_load.cuh"
42 | #include "block/block_radix_rank.cuh"
43 | #include "block/block_radix_sort.cuh"
44 | #include "block/block_reduce.cuh"
45 | #include "block/block_scan.cuh"
46 | #include "block/block_store.cuh"
47 | //#include "block/block_shift.cuh"
48 | 
49 | // Device
50 | #include "device/device_histogram.cuh"
51 | #include "device/device_partition.cuh"
52 | #include "device/device_radix_sort.cuh"
53 | #include "device/device_reduce.cuh"
54 | #include "device/device_run_length_encode.cuh"
55 | #include "device/device_scan.cuh"
56 | #include "device/device_segmented_radix_sort.cuh"
57 | #include "device/device_segmented_reduce.cuh"
58 | #include "device/device_select.cuh"
59 | #include "device/device_spmv.cuh"
60 | 
61 | // Grid
62 | //#include "grid/grid_barrier.cuh"
63 | #include "grid/grid_even_share.cuh"
64 | #include "grid/grid_mapping.cuh"
65 | #include "grid/grid_queue.cuh"
66 | 
67 | // Thread
68 | #include "thread/thread_load.cuh"
69 | #include "thread/thread_operators.cuh"
70 | #include "thread/thread_reduce.cuh"
71 | #include "thread/thread_scan.cuh"
72 | #include "thread/thread_store.cuh"
73 | 
74 | // Warp
75 | #include "warp/warp_reduce.cuh"
76 | #include "warp/warp_scan.cuh"
77 | 
78 | // Iterator
79 | #include "iterator/arg_index_input_iterator.cuh"
80 | #include "iterator/cache_modified_input_iterator.cuh"
81 | #include "iterator/cache_modified_output_iterator.cuh"
82 | #include "iterator/constant_input_iterator.cuh"
83 | #include "iterator/counting_input_iterator.cuh"
84 | #include "iterator/tex_obj_input_iterator.cuh"
85 | #include "iterator/tex_ref_input_iterator.cuh"
86 | #include "iterator/transform_input_iterator.cuh"
87 | 
88 | // Util
89 | #include "util_arch.cuh"
90 | #include "util_debug.cuh"
91 | #include "util_device.cuh"
92 | #include "util_macro.cuh"
93 | #include "util_ptx.cuh"
94 | #include "util_type.cuh"
95 | 
96 | 


--------------------------------------------------------------------------------
/cpp/include/app/nvlouvain_app.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <string>
 17 | #include <cstring>
 18 | #include <vector>
 19 | #include <cmath>
 20 | #include "test_opt_utils.cuh"
 21 | #include "graph_utils.cuh"
 22 | 
 23 | //#define ENABLE_LOG TRUE
 24 | #define ENALBE_LOUVAIN true
 25 | 
 26 | #include "nvlouvain.cuh"
 27 | #include "gtest/gtest.h"
 28 | #include "high_res_clock.h"
 29 | 
 30 | #include <thrust/host_vector.h>
 31 | #include <thrust/device_vector.h>
 32 | #include <thrust/generate.h>
 33 | #include <thrust/reduce.h>
 34 | #include <thrust/functional.h>
 35 | #include <cuda.h>
 36 | #include <cuda_profiler_api.h>
 37 | using T = float;
 38 | 
 39 | int main(int argc, char* argv[]){
 40 | 
 41 |   if(argc < 2)
 42 |   {
 43 |     std::cout<< "Help : ./louvain_test matrix_market_file.mtx"<<std::endl;
 44 |     return 1;
 45 |   }
 46 |   FILE* fin = std::fopen( argv[1] ,"r");
 47 |   int m, k, nnz;
 48 |   MM_typecode mc;
 49 | 
 50 |   CUDA_CALL(cudaSetDevice(0));
 51 | 
 52 |   EXPECT_EQ((mm_properties<int>(fin, 1, &mc, &m, &k, &nnz)) ,0);
 53 |   EXPECT_EQ(m,k);  
 54 | 
 55 |   thrust::host_vector<int> coo_ind_h(nnz);
 56 |   thrust::host_vector<int> csr_ptr_h(m+1);
 57 |   thrust::host_vector<int> csr_ind_h(nnz);
 58 |   thrust::host_vector<T> csr_val_h(nnz);
 59 | 
 60 |   EXPECT_EQ( (mm_to_coo<int,T>(fin, 1, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL)), 0);
 61 |   EXPECT_EQ( (coo_to_csr<int,T> (m, k, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL, &csr_ptr_h[0], NULL, NULL, NULL)), 0);
 62 | 
 63 |   EXPECT_EQ(fclose(fin),0); 
 64 | 
 65 |   thrust::device_vector<int> csr_ptr_d(csr_ptr_h);
 66 |   thrust::device_vector<int> csr_ind_d(csr_ind_h);
 67 |   thrust::device_vector<T> csr_val_d(csr_val_h);
 68 |   
 69 |   thrust::device_vector<T> tmp_1(nnz);
 70 |   thrust::fill(thrust::cuda::par, tmp_1.begin(), tmp_1.end(), 1.0);
 71 |   thrust::device_vector<T>::iterator max_ele = thrust::max_element(thrust::cuda::par, csr_val_d.begin(), csr_val_d.end());
 72 | 
 73 |   bool weighted = (*max_ele!=1.0);
 74 | 
 75 |   //std::cout<<(weighted?"Weighted ":"Not Weigthed ")<<" n_vertex: "<<m<<"\n";
 76 | 
 77 |   HighResClock hr_clock;
 78 |   double louvain_time;
 79 |   if(ENALBE_LOUVAIN){
 80 |     T final_modulartiy(0);    
 81 |     //bool record = true;
 82 |     bool has_init_cluster = false;
 83 |     int *clustering_h = (int*)malloc(m*sizeof(int));
 84 |     thrust::device_vector<int> cluster_d(m, 0);
 85 |     int* csr_ptr_ptr = thrust::raw_pointer_cast(csr_ptr_d.data());
 86 |     int* csr_ind_ptr = thrust::raw_pointer_cast(csr_ind_d.data());
 87 |     T* csr_val_ptr = thrust::raw_pointer_cast(csr_val_d.data());      
 88 |     int* init_cluster_ptr = thrust::raw_pointer_cast(cluster_d.data());
 89 |     int num_level;
 90 |     
 91 |     cudaProfilerStart(); 
 92 |     hr_clock.start(); 
 93 |     nvlouvain::louvain<int,T>(csr_ptr_ptr, csr_ind_ptr, csr_val_ptr,
 94 |                             m, nnz, 
 95 |                             weighted, has_init_cluster,  
 96 |                             init_cluster_ptr, final_modulartiy, clustering_h, num_level);
 97 | 
 98 |     hr_clock.stop(&louvain_time);
 99 |     cudaProfilerStop();
100 | 
101 |     std::cout<<"Final modularity: "<<COLOR_MGT<<final_modulartiy<<COLOR_WHT<<" num_level: "<<num_level<<std::endl;
102 |     std::cout<<"louvain total runtime:"<<louvain_time/1000<<" ms\n"; 
103 |   }
104 |   return 0;
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/test/ref/cpu_ref_pagerank.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Usage : python3 nvgraph_cpu_ref.py graph.mtx alpha
  4 | # This will convert matrix values to default probabilities
  5 | # This will also write same matrix in CSC format and with dangling notes
  6 | 
  7 | #import numpy as np
  8 | import sys
  9 | import time
 10 | from scipy.io import mmread
 11 | import numpy as np
 12 | #import matplotlib.pyplot as plt
 13 | import networkx as nx
 14 | import os
 15 | #from test_pagerank import pagerank
 16 | 
 17 | print ('Networkx version : {} '.format(nx.__version__))
 18 | 
 19 | # Command line arguments
 20 | argc = len(sys.argv)
 21 | if argc<=2:
 22 |     print("Error: usage is : python3 cpu_ref_pagerank.py graph.mtx alpha")
 23 |     sys.exit()
 24 | mmFile = sys.argv[1]
 25 | alpha = float(sys.argv[2])
 26 | print('Reading '+ str(mmFile) + '...')
 27 | #Read
 28 | M = mmread(mmFile).asfptype()
 29 | nnz_per_row = {r : 0 for r in range(M.get_shape()[0])}
 30 | for nnz in range(M.getnnz()):
 31 |     nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]]
 32 | for nnz in range(M.getnnz()):
 33 |     M.data[nnz] = 1.0/float(nnz_per_row[M.row[nnz]])
 34 | 
 35 | 
 36 | MT = M.transpose(True)
 37 | M = M.tocsr()
 38 | if M is None :
 39 |     raise TypeError('Could not read the input graph')
 40 | if M.shape[0] != M.shape[1]:
 41 |     raise TypeError('Shape is not square')
 42 | 
 43 | # should be autosorted, but check just to make sure
 44 | if not M.has_sorted_indices:
 45 |     print('sort_indices ... ')
 46 |     M.sort_indices()
 47 | 
 48 | n = M.shape[0]
 49 | dangling = [0]*n 
 50 | for row in range(n):
 51 |     if M.indptr[row] == M.indptr[row+1]:
 52 |         dangling[row] = 1
 53 |     else:
 54 |         pass #M.data[M.indptr[row]:M.indptr[row+1]] = [1.0/float(M.indptr[row+1] - M.indptr[row])]*(M.indptr[row+1] - M.indptr[row])
 55 | #MT.data = M.data
 56 | 
 57 | # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly
 58 | print('Format conversion ... ')
 59 | 
 60 | # Directed NetworkX graph
 61 | print (M.shape[0])
 62 | Gnx = nx.DiGraph(M)
 63 | 
 64 | z = {k: 1.0/M.shape[0] for k in range(M.shape[0])}
 65 | 
 66 | #SSSP
 67 | print('Solving... ')
 68 | t1 = time.time()
 69 | pr = nx.pagerank(Gnx, alpha=alpha, nstart = z, max_iter=5000, tol = 1e-10) #same parameters as in NVGRAPH
 70 | t2 =  time.time() - t1
 71 | 
 72 | print('Time : '+str(t2))
 73 | print('Writing result ... ')
 74 | 
 75 | 
 76 | '''
 77 | #raw rank results
 78 | # fill missing with DBL_MAX
 79 | bres = np.zeros(M.shape[0], dtype=np.float64)
 80 | for r in pr.keys():
 81 |     bres[r] = pr[r]
 82 | 
 83 | print len(pr.keys())
 84 | # write binary
 85 | out_fname = '/tmp/' + os.path.splitext(os.path.basename(mmFile))[0] + '_T.pagerank_' + str(alpha) + '.bin'
 86 | bres.tofile(out_fname, "")
 87 | print 'Result is in the file: ' + out_fname
 88 | '''
 89 | 
 90 | #Indexes
 91 | sorted_pr = [item[0] for item in sorted(pr.items(), key=lambda x: x[1])]
 92 | bres = np.array(sorted_pr, dtype = np.int32)
 93 | #print (bres)
 94 | out_fname = os.path.splitext(os.path.basename(mmFile))[0] + '_T.pagerank_idx_' + str(alpha) + '.bin'
 95 | bres.tofile(out_fname, "")
 96 | print ('Vertices index sorted by pageranks in file: ' + out_fname)
 97 | #Values
 98 | out_fname =  os.path.splitext(os.path.basename(mmFile))[0] + '_T.pagerank_val_' + str(alpha) + '.bin'
 99 | #print (np.array(sorted(pr.values()),  dtype = np.float64))
100 | np.array(sorted(pr.values()),  dtype = np.float64).tofile(out_fname, "")
101 | print ('Pagerank sorted values in file: ' + out_fname)
102 | 
103 | print ('Converting and Writing CSC')
104 | 
105 | b = open(os.path.splitext(os.path.basename(mmFile))[0] + '_T.mtx', "w")
106 | b.write("%%MatrixMarket matrix coordinate real general\n")
107 | b.write("%%NVAMG rhs\n")
108 | b.write("{} {} {}\n".format(n, n, M.getnnz()))
109 | 
110 | for item in range(MT.getnnz()):
111 |     b.write("{} {} {}\n".format(MT.row[item] + 1, MT.col[item] + 1, MT.data[item]))
112 | for val in dangling:
113 |     b.write(str(val) + "\n")        
114 | b.close()
115 | print ("Wrote CSC to the file: "+ os.path.splitext(os.path.basename(mmFile))[0] + '_T.mtx')
116 | 
117 | print('Done')
118 | 


--------------------------------------------------------------------------------
/cpp/include/test/k_in_test.cuh:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License");
  6 |  * you may not use this file except in compliance with the License.
  7 |  * You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | #pragma once
 18 | 
 19 | #include <string>
 20 | #include "test_opt_utils.h"
 21 | #include "graph_utils.cuh"
 22 | #include "louvain.cuh"
 23 | #include "gtest/gtest.h"
 24 | #include "high_res_clock.h"
 25 | #include "util.cuh"
 26 | 
 27 | #include <thrust/host_vector.h>
 28 | #include <thrust/device_vector.h>
 29 | #include <thrust/generate.h>
 30 | #include <thrust/reduce.h>
 31 | #include <thrust/functional.h>
 32 | 
 33 | template< typename IdxIter, typename ValIter, typename ValType >
 34 | __global__ void kernal_k_in_test(const int size, IdxIter csr_ptr_iter, IdxIter csr_ind_iter, ValIter csr_val_iter, IdxIter cluster_iter, int i, ValType* result){
 35 | /*
 36 |   
 37 |   //printf("successfully launch kernal\n");
 38 | 
 39 |   int idx_x = blockDim.x*blockIdx.x + threadIdx.x;
 40 |   int idx_y = blockDim.y*blockIdx.y + threadIdx.y;
 41 | 
 42 |   if(idx_x < size && idx_y < size ){
 43 |     
 44 |     int c = *( cluster_iter + idx_y);  
 45 |     //printf(" ** %d %d\n", idx_x, idx_y); 
 46 |     //printf("start compute k with iter passing. (%d, %d, %d) idx = %d %f\n", blockDim.x, blockIdx.x, threadIdx.x, idx, result[idx]);
 47 |     nvlouvain::compute_k_i_in(size, csr_ptr_iter, csr_ind_iter, csr_val_iter, cluster_iter, c, idx_x, &result[idx_x *size + idx_y ]);
 48 |                        // n_vertex, csr_ptr_iter, csr_idx_iter, csr_val_iter, cluster_iter,      c,   i, result
 49 |     printf("k_%d_in_c%d = %f\n", idx_x, idx_y ,result[idx_x *size + idx_y]);
 50 |     
 51 |   }
 52 | */
 53 | /*
 54 |   if(idx == 0){
 55 |     nvlouvain::display_vec(csr_ptr_iter, size);  
 56 |     nvlouvain::display_vec(csr_ind_iter, csr_ptr_iter[size]);
 57 |     nvlouvain::display_vec(csr_val_iter, csr_ptr_iter[size]);
 58 | 
 59 |   }
 60 | */
 61 |   return;
 62 | 
 63 | }
 64 | 
 65 | 
 66 | void k_i_in_compute_test( thrust::device_vector<int> &csr_ptr_d,
 67 |                           thrust::device_vector<int> &csr_ind_d,
 68 |                           thrust::device_vector<T> &csr_val_d,
 69 |                           int size){
 70 | 
 71 |   HighResClock hr_clock;
 72 |   double timed;
 73 | 
 74 |   
 75 |   dim3 block_size((size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, (size + BLOCK_SIZE_2D -1)/ BLOCK_SIZE_2D, 1);
 76 |   dim3 grid_size(BLOCK_SIZE_2D, BLOCK_SIZE_2D, 1);
 77 | 
 78 |   std::cout<< csr_ptr_d.size()<<" "<<csr_val_d.size()<<" size:"<< size <<std::endl;
 79 |   thrust::device_vector<T> result_d(size * size);
 80 |   thrust::device_vector<int> cluster_d(size);
 81 | 
 82 |   T* result_ptr = thrust::raw_pointer_cast(result_d.data());
 83 | 
 84 | 
 85 |   hr_clock.start();
 86 |   int i = 0; 
 87 |   std::cout<<"successfully declair device vector.\n";
 88 |   kernal_k_in_test<<<block_size, grid_size>>>(size, csr_ptr_d.begin(), csr_ind_d.begin(), csr_val_d.begin(), cluster_d.begin(), i, result_ptr);
 89 |   CUDA_CALL(cudaDeviceSynchronize());
 90 | 
 91 |   hr_clock.stop(&timed);
 92 |   double iter_time(timed);
 93 |   nvlouvain::display_vec(result_d);
 94 | 
 95 |   std::cout<<"k_i_in runtime: "<<iter_time<<"\n";
 96 |  std::cout<<"============== complete k_i_in computation test =============\n";
 97 |  
 98 | }
 99 | 
100 | /*
101 | void k_i_in_compute_for_each_with_functor(thrust::device_vector<int> &csr_ptr_d,
102 |                                           thrust::device_vector<int> &csr_ind_d,
103 |                                           thrust::device_vector<T> &csr_val_d,
104 |                                           int size){
105 |   for_each_n() 
106 | }*/
107 | 


--------------------------------------------------------------------------------
/cpp/include/nvgraph_cublas.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 |  
 17 | #pragma once
 18 | 
 19 | #include <cublas_v2.h>
 20 | #include <iostream>
 21 | #include "debug_macros.h"
 22 | 
 23 | namespace nvgraph
 24 | {
 25 | class Cublas;
 26 | 
 27 | class Cublas
 28 | {
 29 | private:
 30 |     static cublasHandle_t m_handle;
 31 |     // Private ctor to prevent instantiation.
 32 |     Cublas();
 33 |     ~Cublas();
 34 | public:
 35 | 
 36 |     // Get the handle.
 37 |     static cublasHandle_t get_handle()
 38 |     {
 39 |         if (m_handle == 0)
 40 |             CHECK_CUBLAS(cublasCreate(&m_handle));
 41 |         return m_handle;
 42 |     }
 43 | 
 44 |     static void destroy_handle()
 45 |     {
 46 |         if (m_handle != 0)
 47 |             CHECK_CUBLAS(cublasDestroy(m_handle));
 48 |         m_handle = 0;
 49 |     }
 50 | 
 51 |     static void set_pointer_mode_device();
 52 |     static void set_pointer_mode_host();
 53 |     static void setStream(cudaStream_t stream) 
 54 |     {   
 55 |         cublasHandle_t handle = Cublas::get_handle();
 56 |         CHECK_CUBLAS(cublasSetStream(handle, stream));
 57 |     }
 58 | 
 59 |     template <typename T>
 60 |     static void axpy(int n, T alpha,
 61 |                      const T* x, int incx,
 62 |                      T* y, int incy);
 63 | 
 64 |     template <typename T>
 65 |     static void copy(int n, const T* x, int incx,
 66 |                      T* y, int incy);
 67 | 
 68 |     template <typename T>
 69 |     static void dot(int n, const T* x, int incx,
 70 |                     const T* y, int incy,
 71 |                     T* result);
 72 | 
 73 |     template <typename T>
 74 |     static void gemv(bool transposed, int m, int n,
 75 |                      const T* alpha, const T* A, int lda,
 76 |                      const T* x, int incx,
 77 |                      const T* beta, T* y, int incy);
 78 | 
 79 |     template <typename T>
 80 |     static void gemv_ext(bool transposed, const int m, const int n,
 81 |                      const T* alpha, const T* A, const int lda,
 82 |                      const T* x, const int incx,
 83 |                      const T* beta, T* y, const int incy, const int offsetx, const int offsety, const int offseta);
 84 |     
 85 |     template <typename T>
 86 |     static void trsv_v2( cublasFillMode_t uplo, cublasOperation_t trans, cublasDiagType_t diag, int n, 
 87 | 			      const T *A, int lda, T *x, int incx, int offseta);
 88 | 
 89 |     template <typename T>
 90 |     static void ger(int m, int n, const T* alpha,
 91 |                     const T* x, int incx,
 92 |                     const T* y, int incy,
 93 |                     T* A, int lda);
 94 | 
 95 |     template <typename T>
 96 |     static T nrm2(int n, const T* x, int incx);
 97 |     template <typename T>
 98 |     static void nrm2(int n, const T* x, int incx, T* result);
 99 | 
100 |     template <typename T>
101 |     static void scal(int n, T alpha, T* x, int incx);
102 |     template <typename T>
103 |     static void scal(int n, T* alpha, T* x, int incx);
104 | 
105 |     template <typename T>
106 |     static void gemm(bool transa, bool transb, int m, int n, int k,
107 | 		     const T * alpha, const T * A, int lda,
108 | 		     const T * B, int ldb,
109 | 		     const T * beta, T * C, int ldc);
110 | 
111 |     template <typename T>
112 |     static void geam(bool transa, bool transb, int m, int n,
113 | 		     const T * alpha, const T * A, int lda,
114 | 		     const T * beta,  const T * B, int ldb,
115 | 		     T * C, int ldc);
116 | 
117 | };
118 | 
119 | } // end namespace nvgraph
120 | 
121 | 


--------------------------------------------------------------------------------
/external/cub_semiring/util_macro.cuh:
--------------------------------------------------------------------------------
  1 | /******************************************************************************
  2 |  * Copyright (c) 2011, Duane Merrill.  All rights reserved.
  3 |  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  4 |  * 
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *     * Redistributions of source code must retain the above copyright
  8 |  *       notice, this list of conditions and the following disclaimer.
  9 |  *     * Redistributions in binary form must reproduce the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer in the
 11 |  *       documentation and/or other materials provided with the distribution.
 12 |  *     * Neither the name of the NVIDIA CORPORATION nor the
 13 |  *       names of its contributors may be used to endorse or promote products
 14 |  *       derived from this software without specific prior written permission.
 15 |  * 
 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 19 |  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
 20 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 21 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 22 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 23 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 |  *
 27 |  ******************************************************************************/
 28 | 
 29 | /******************************************************************************
 30 |  * Common C/C++ macro utilities
 31 |  ******************************************************************************/
 32 | 
 33 | #pragma once
 34 | 
 35 | #include "util_namespace.cuh"
 36 | 
 37 | /// Optional outer namespace(s)
 38 | CUB_NS_PREFIX
 39 | 
 40 | /// CUB namespace
 41 | namespace cub {
 42 | 
 43 | 
 44 | /**
 45 |  * \addtogroup UtilModule
 46 |  * @{
 47 |  */
 48 | 
 49 | #ifndef CUB_ALIGN
 50 |     #if defined(_WIN32) || defined(_WIN64)
 51 |         /// Align struct
 52 |         #define CUB_ALIGN(bytes) __declspec(align(32))
 53 |     #else
 54 |         /// Align struct
 55 |         #define CUB_ALIGN(bytes) __attribute__((aligned(bytes)))
 56 |     #endif
 57 | #endif
 58 | 
 59 | #ifndef CUB_MAX
 60 |     /// Select maximum(a, b)
 61 |     #define CUB_MAX(a, b) (((b) > (a)) ? (b) : (a))
 62 | #endif
 63 | 
 64 | #ifndef CUB_MIN
 65 |     /// Select minimum(a, b)
 66 |     #define CUB_MIN(a, b) (((b) < (a)) ? (b) : (a))
 67 | #endif
 68 | 
 69 | #ifndef CUB_QUOTIENT_FLOOR
 70 |     /// Quotient of x/y rounded down to nearest integer
 71 |     #define CUB_QUOTIENT_FLOOR(x, y) ((x) / (y))
 72 | #endif
 73 | 
 74 | #ifndef CUB_QUOTIENT_CEILING
 75 |     /// Quotient of x/y rounded up to nearest integer
 76 |     #define CUB_QUOTIENT_CEILING(x, y) (((x) + (y) - 1) / (y))
 77 | #endif
 78 | 
 79 | #ifndef CUB_ROUND_UP_NEAREST
 80 |     /// x rounded up to the nearest multiple of y
 81 |     #define CUB_ROUND_UP_NEAREST(x, y) ((((x) + (y) - 1) / (y)) * y)
 82 | #endif
 83 | 
 84 | #ifndef CUB_ROUND_DOWN_NEAREST
 85 |     /// x rounded down to the nearest multiple of y
 86 |     #define CUB_ROUND_DOWN_NEAREST(x, y) (((x) / (y)) * y)
 87 | #endif
 88 | 
 89 | 
 90 | #ifndef CUB_STATIC_ASSERT
 91 |     #ifndef DOXYGEN_SHOULD_SKIP_THIS    // Do not document
 92 |         #define CUB_CAT_(a, b) a ## b
 93 |         #define CUB_CAT(a, b) CUB_CAT_(a, b)
 94 |     #endif // DOXYGEN_SHOULD_SKIP_THIS
 95 | 
 96 |     /// Static assert
 97 |     #define CUB_STATIC_ASSERT(cond, msg) typedef int CUB_CAT(cub_static_assert, __LINE__)[(cond) ? 1 : -1]
 98 | #endif
 99 | 
100 | /** @} */       // end group UtilModule
101 | 
102 | }               // CUB namespace
103 | CUB_NS_POSTFIX  // Optional outer namespace(s)
104 | 


--------------------------------------------------------------------------------
/cpp/include/test/thrust_test.cuh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #pragma once
 17 | 
 18 | #include <string>
 19 | #include "test_opt_utils.h"
 20 | #include "graph_utils.cuh"
 21 | #include "louvain.cuh"
 22 | #include "gtest/gtest.h"
 23 | #include "high_res_clock.h"
 24 | 
 25 | #include <thrust/host_vector.h>
 26 | #include <thrust/device_vector.h>
 27 | #include <thrust/generate.h>
 28 | #include <thrust/reduce.h>
 29 | #include <thrust/functional.h>
 30 | 
 31 | 
 32 | 
 33 | 
 34 | template<typename iter, typename ptr >
 35 | __global__ void test_sum(iter begin, iter end, ptr sum){
 36 | 
 37 |   thrust::plus<T> op;
 38 |   *sum = thrust::reduce(thrust::cuda::par, begin, end, 0.0, op);
 39 |   
 40 | }
 41 | 
 42 | __global__ void test_sum_cast(T* vec, size_t size, T* sum){
 43 | 
 44 |   thrust::plus<T> op;
 45 |   *sum = thrust::reduce(thrust::cuda::par, vec, vec+size, 0.0, op);
 46 |   
 47 | }
 48 | 
 49 | 
 50 | void thrust_passing_arg_test( thrust::host_vector<int> &csr_ptr_h, 
 51 |                               thrust::host_vector<int> &csr_ind_h,
 52 |                               thrust::host_vector<T> &csr_val_h,
 53 |                               thrust::device_vector<int> &csr_ptr_d, 
 54 |                               thrust::device_vector<int> &csr_ind_d, 
 55 |                               thrust::device_vector<T> &csr_val_d){
 56 | 
 57 |   HighResClock hr_clock;
 58 |   double timed;
 59 |   
 60 |   thrust::plus<T> binary_op;
 61 |   hr_clock.start();
 62 |   T sum_h = thrust::reduce(csr_val_h.begin(), csr_val_h.end(), 0.0, binary_op); 
 63 |   hr_clock.stop(&timed);
 64 |   double cpu_time(timed);
 65 |  
 66 |   
 67 |   
 68 |   thrust::copy(csr_val_d.begin(), csr_val_d.end(), std::ostream_iterator<float>(std::cout, " "));
 69 |   std::cout<<std::endl;
 70 | 
 71 | 
 72 |   dim3 block_size(1, 1, 1);
 73 |   dim3 grid_size(1, 1, 1);
 74 |   
 75 | 
 76 |   hr_clock.start();
 77 |   T sum_r = thrust::reduce(csr_val_d.begin(), csr_val_d.end(), 0.0, binary_op);
 78 | 
 79 |   hr_clock.stop(&timed);
 80 |   double r_time(timed);
 81 | 
 82 | 
 83 | 
 84 |   hr_clock.start();
 85 |   thrust::device_vector<T> sum_d(1, 0.0);
 86 |   test_sum<<<block_size,grid_size>>>( csr_val_d.begin(),csr_val_d.end(), sum_d.data());
 87 |   CUDA_CALL(cudaDeviceSynchronize());
 88 |   hr_clock.stop(&timed);
 89 |   double cuda_time(timed);
 90 | 
 91 |   
 92 |   hr_clock.start();
 93 |   cudaStream_t s;
 94 |   thrust::device_vector<T> sum_a(1, 0.0);
 95 |   cudaStreamCreate(&s);
 96 |   test_sum<<<1,1,0,s>>>(csr_val_d.begin(),csr_val_d.end(), sum_a.data());
 97 |   cudaStreamSynchronize(s);
 98 |   hr_clock.stop(&timed);
 99 |   double asyn_time(timed);
100 | 
101 | 
102 | 
103 |   hr_clock.start();
104 |   T* csr_val_ptr = thrust::raw_pointer_cast(csr_val_d.data());
105 |   double* raw_sum;  
106 |   double sum_cast;
107 |   cudaMalloc((void **) &raw_sum, sizeof(double));
108 |   test_sum_cast<<<block_size,grid_size>>>( csr_val_ptr, csr_val_d.size(), raw_sum);
109 |   cudaMemcpy(&sum_cast, raw_sum, sizeof(double),cudaMemcpyDeviceToHost);
110 |   CUDA_CALL(cudaDeviceSynchronize());
111 |   hr_clock.stop(&timed);
112 |   double cast_time(timed);
113 |   cudaFree(raw_sum);
114 | 
115 | 
116 | 
117 |   
118 |   std::cout<<"cpu    sum of val: "<< sum_h <<" runtime: "<<cpu_time<<std::endl;
119 |   std::cout<<"device sum of val: "<< sum_r <<" runtime: "<<r_time<<std::endl;
120 |   std::cout<<"kernel sum of val: "<< sum_d[0] <<" runtime: "<<cuda_time<<std::endl;
121 |   std::cout<<"async  sum of val: "<< sum_a[0] <<" runtime: "<<asyn_time<<std::endl;
122 |   std::cout<<"cast:  sum of val: "<< sum_cast <<" runtime: "<<cast_time<<std::endl;
123 | 
124 | } 
125 | 


--------------------------------------------------------------------------------
/cpp/include/app/nvlouvain_app_hierarchy.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #include <string>
 17 | #include <cstring>
 18 | #include <vector>
 19 | #include <cmath>
 20 | #include "test_opt_utils.cuh"
 21 | #include "graph_utils.cuh"
 22 | 
 23 | //#define ENABLE_LOG true
 24 | #define ENALBE_LOUVAIN true
 25 | 
 26 | #include "nvlouvain.cuh"
 27 | #include "gtest/gtest.h"
 28 | #include "high_res_clock.h"
 29 | 
 30 | #include <thrust/host_vector.h>
 31 | #include <thrust/device_vector.h>
 32 | #include <thrust/generate.h>
 33 | #include <thrust/reduce.h>
 34 | #include <thrust/functional.h>
 35 | #include <cuda.h>
 36 | #include <cuda_profiler_api.h>
 37 | using T = double;
 38 | 
 39 | int main(int argc, char* argv[]){
 40 | 
 41 |   if(argc < 2)
 42 |   {
 43 |     std::cout<< "Help : ./louvain_test matrix_market_file.mtx"<<std::endl;
 44 |     return 1;
 45 |   }
 46 |   FILE* fin = std::fopen( argv[1] ,"r");
 47 |   int m, k, nnz;
 48 |   MM_typecode mc;
 49 | 
 50 |   CUDA_CALL(cudaSetDevice(0));
 51 | 
 52 |   EXPECT_EQ((mm_properties<int>(fin, 1, &mc, &m, &k, &nnz)) ,0);
 53 |   EXPECT_EQ(m,k);  
 54 | 
 55 |   thrust::host_vector<int> coo_ind_h(nnz);
 56 |   thrust::host_vector<int> csr_ptr_h(m+1);
 57 |   thrust::host_vector<int> csr_ind_h(nnz);
 58 |   thrust::host_vector<T> csr_val_h(nnz);
 59 | 
 60 |   EXPECT_EQ( (mm_to_coo<int,T>(fin, 1, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL)), 0);
 61 |   EXPECT_EQ( (coo_to_csr<int,T> (m, k, nnz, &coo_ind_h[0], &csr_ind_h[0], &csr_val_h[0], NULL, &csr_ptr_h[0], NULL, NULL, NULL)), 0);
 62 | 
 63 |   EXPECT_EQ(fclose(fin),0); 
 64 | 
 65 |   thrust::device_vector<int> csr_ptr_d(csr_ptr_h);
 66 |   thrust::device_vector<int> csr_ind_d(csr_ind_h);
 67 |   thrust::device_vector<T> csr_val_d(csr_val_h);
 68 |   
 69 |   thrust::device_vector<T> tmp_1(nnz);
 70 |   thrust::fill(thrust::cuda::par, tmp_1.begin(), tmp_1.end(), 1.0);
 71 |   thrust::device_vector<T>::iterator max_ele = thrust::max_element(thrust::cuda::par, csr_val_d.begin(), csr_val_d.end());
 72 | 
 73 |   bool weighted = (*max_ele!=1.0);
 74 | 
 75 |   //std::cout<<(weighted?"Weighted ":"Not Weigthed ")<<" n_vertex: "<<m<<"\n";
 76 | 
 77 |   HighResClock hr_clock;
 78 |   double louvain_time;
 79 |   if(ENALBE_LOUVAIN){
 80 |     T final_modulartiy(0);    
 81 |     //bool record = true;
 82 |     bool has_init_cluster = false;
 83 |     thrust::device_vector<int> cluster_d(m, 0);
 84 |     std::vector< std::vector<int> > best_cluster_vec; 
 85 |     int* csr_ptr_ptr = thrust::raw_pointer_cast(csr_ptr_d.data());
 86 |     int* csr_ind_ptr = thrust::raw_pointer_cast(csr_ind_d.data());
 87 |     T* csr_val_ptr = thrust::raw_pointer_cast(csr_val_d.data());      
 88 |     int* init_cluster_ptr = thrust::raw_pointer_cast(cluster_d.data());
 89 |     int num_level;
 90 |     
 91 |     cudaProfilerStart(); 
 92 |     hr_clock.start(); 
 93 | 
 94 |     nvlouvain::louvain<int,T>(csr_ptr_ptr, csr_ind_ptr, csr_val_ptr,
 95 |                             m, nnz, 
 96 |                             weighted, has_init_cluster,  
 97 |                             init_cluster_ptr, final_modulartiy, best_cluster_vec, num_level);
 98 | 
 99 |     hr_clock.stop(&louvain_time);
100 |     cudaProfilerStop();
101 | 
102 |     std::cout<<"Final modularity: "<<COLOR_MGT<<final_modulartiy<<COLOR_WHT<<" num_level: "<<num_level<<std::endl;
103 |     std::cout<<"louvain total runtime:"<<louvain_time/1000<<" ms\n"; 
104 | 
105 |     //for (size_t i = 0; i < best_cluster_vec.size(); i++)
106 |     //{
107 |     //    for(std::vector<int>::iterator it = best_cluster_vec[i].begin(); it != best_cluster_vec[i].end(); ++it)
108 |     //        std::cout << *it <<' ';
109 |     //    std::cout << std::endl;
110 |     //}
111 |   }
112 |   return 0;
113 | }
114 | 
115 | 


--------------------------------------------------------------------------------
/cpp/include/nvgraph_vector.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 |  
 17 | #pragma once
 18 | #include <cnmem_shared_ptr.hxx>
 19 | #include "nvgraph_error.hxx"
 20 | #include "nvgraph_vector_kernels.hxx"
 21 | 
 22 | #include "debug_macros.h"
 23 | 
 24 | namespace nvgraph
 25 | {
 26 | 
 27 | /*! A Vector contains a device vector of size |E| and type T
 28 |  */
 29 | template <typename ValueType_>
 30 | class Vector 
 31 | {
 32 | public:
 33 |     //typedef IndexType_ IndexType;
 34 |     typedef ValueType_ ValueType;
 35 | 
 36 | protected:
 37 |     /*! Storage for the values.
 38 |      */
 39 |     SHARED_PREFIX::shared_ptr<ValueType> values;
 40 | 
 41 |     /*! Size of the array
 42 |      */
 43 |     size_t size;
 44 | 
 45 |     /*! Storage for a cuda stream
 46 |      */
 47 |     //, cudaStream_t stream = 0
 48 | 
 49 | public:
 50 |         
 51 |     /*! Construct an empty \p Vector.
 52 |      */
 53 |     Vector(void) {}
 54 |     ~Vector(void) {}
 55 |     /*! Construct a \p Vector of size vertices.
 56 |      *
 57 |      *  \param vertices The size of the Vector
 58 |      */
 59 |     Vector(size_t vertices, cudaStream_t stream = 0)
 60 |         : values(allocateDevice<ValueType>(vertices, stream)),
 61 |           size(vertices) {}
 62 | 
 63 |     
 64 |     size_t get_size() const { return size; }
 65 |     size_t bytes() const { return size*sizeof(ValueType);}
 66 |     ValueType* raw() const { return values.get(); }
 67 |     //cudaStream_t get_stream() const { return stream_; }
 68 |     void allocate(size_t n, cudaStream_t stream = 0) 
 69 |     {
 70 |         size = n; 
 71 |         values = allocateDevice<ValueType>(n, stream); 
 72 |     }
 73 | 
 74 |     void attach(size_t n, ValueType* vals, cudaStream_t stream = 0) 
 75 |     {
 76 |         size = n;
 77 |         values = attachDevicePtr<ValueType>(vals, stream); 
 78 |     }
 79 | 
 80 |     Vector(size_t vertices, ValueType * vals, cudaStream_t stream = 0)
 81 |         : values(attachDevicePtr<ValueType>(vals, stream)),
 82 |           size(vertices) {}
 83 | 
 84 |     void fill(ValueType val, cudaStream_t stream = 0) 
 85 |     {
 86 |         fill_raw_vec(this->raw(), this->get_size(), val, stream); 
 87 |     } 
 88 |     void copy(Vector<ValueType> &vec1, cudaStream_t stream = 0)
 89 |     {
 90 |         if (this->get_size() == 0 && vec1.get_size()>0)
 91 |         {
 92 |             allocate(vec1.get_size(), stream);
 93 |             copy_vec(vec1.raw(), this->get_size(), this->raw(), stream);
 94 |         }
 95 |         else if (this->get_size() == vec1.get_size()) 
 96 |             copy_vec(vec1.raw(),  this->get_size(), this->raw(), stream);
 97 |         else if (this->get_size() > vec1.get_size()) 
 98 |         {
 99 |             //COUT() << "Warning Copy : sizes mismatch "<< this->get_size() <<':'<< vec1.get_size() <<std::endl;
100 |             copy_vec(vec1.raw(),  vec1.get_size(), this->raw(), stream);
101 |             //dump_raw_vec (this->raw(), vec1.get_size(), 0);
102 |         }
103 |         else
104 |         {
105 |             FatalError("Cannot copy a vector into a smaller one", NVGRAPH_ERR_BAD_PARAMETERS);
106 |         }
107 |     }
108 |     void dump(size_t off, size_t sz, cudaStream_t stream = 0)
109 |     {
110 |         if ((off+sz)<= this->size) 
111 |             dump_raw_vec(this->raw(), sz, off, stream);
112 |         else
113 |             FatalError("Offset and Size values doesn't make sense", NVGRAPH_ERR_BAD_PARAMETERS);
114 |     }
115 |     void flag_zeros(Vector<int> & flags, cudaStream_t stream = 0) 
116 |     {
117 |         flag_zeros_raw_vec(this->get_size(), this->raw(), flags.raw(), stream);
118 |     }
119 | 
120 |     ValueType nrm1(cudaStream_t stream = 0) 
121 |     { 
122 |         ValueType res = 0;
123 |         nrm1_raw_vec(this->raw(), this->get_size(), &res, stream);
124 |         return res;
125 |     }
126 | }; // class Vector
127 | } // end namespace nvgraph
128 | 
129 | 


--------------------------------------------------------------------------------
/cpp/include/nvgraph_convert.hxx:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 |  #pragma once
17 | 
18 | #include <nvgraph.h>
19 | #include <nvgraph_cusparse.hxx>
20 | #include <cnmem_shared_ptr.hxx>
21 | 
22 | namespace nvgraph{
23 |   void csr2coo( const int *csrSortedRowPtr,
24 |                 int nnz, int m,
25 |                 int *cooRowInd,
26 |                 cusparseIndexBase_t idxBase);
27 |   void coo2csr( const int *cooRowInd,
28 |                 int nnz, int m,
29 |                 int *csrSortedRowPtr,
30 |                 cusparseIndexBase_t idxBase );
31 | 
32 |   void csr2csc( int m, int n, int nnz,
33 |                 const void *csrVal, const int *csrRowPtr, const int *csrColInd,
34 |                 void *cscVal, int *cscRowInd, int *cscColPtr,
35 |                 cusparseAction_t copyValues, cusparseIndexBase_t idxBase,
36 |                 cudaDataType_t *dataType);
37 |   void csc2csr( int m, int n, int nnz,
38 |                 const void *cscVal, const int *cscRowInd, const int *cscColPtr,
39 |                 void *csrVal, int *csrRowPtr, int *csrColInd,
40 |                 cusparseAction_t copyValues, cusparseIndexBase_t idxBase,
41 |                 cudaDataType_t *dataType);
42 | 
43 |   void csr2cscP( int m, int n, int nnz,
44 |                  const int *csrRowPtr, const int *csrColInd,
45 |                  int *cscRowInd, int *cscColPtr, int *p, cusparseIndexBase_t idxBase);
46 | 
47 | 
48 |   void cooSortBySource(int m, int n, int nnz,
49 |             const void *srcVal, const int *srcRowInd, const int *srcColInd,
50 |             void *dstVal, int *dstRowInd, int *dstColInd,
51 |             cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
52 |   void cooSortByDestination(int m, int n, int nnz,
53 |             const void *srcVal, const int *srcRowInd, const int *srcColInd,
54 |             void *dstVal, int *dstRowInd, int *dstColInd,
55 |             cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
56 | 
57 |   void coos2csc(int m, int n, int nnz,
58 |             const void *srcVal, const int *srcRowInd, const int *srcColInd,
59 |             void *dstVal, int *dstRowInd, int *dstColInd,
60 |             cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
61 |   void cood2csr(int m, int n, int nnz,
62 |             const void *srcVal, const int *srcRowInd, const int *srcColInd,
63 |             void *dstVal, int *dstRowInd, int *dstColInd,
64 |             cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
65 |   void coou2csr(int m, int n, int nnz,
66 |             const void *srcVal, const int *srcRowInd, const int *srcColInd,
67 |             void *dstVal, int *dstRowInd, int *dstColInd,
68 |             cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
69 |   void coou2csc(int m, int n, int nnz,
70 |             const void *srcVal, const int *srcRowInd, const int *srcColInd,
71 |             void *dstVal, int *dstRowInd, int *dstColInd,
72 |             cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
73 | 
74 |   ////////////////////////// Utility functions //////////////////////////
75 |   void createIdentityPermutation(int n, int *p);
76 |   void gthrX(int nnz, const void *y, void *xVal, const int *xInd,
77 |     cusparseIndexBase_t idxBase, cudaDataType_t *dataType);
78 | 
79 |   void cooSortBufferSize(int m, int n, int nnz, const int *cooRows, const int *cooCols, size_t *pBufferSizeInBytes);
80 |   void cooGetSourcePermutation(int m, int n, int nnz, int *cooRows, int *cooCols, int *p, void *pBuffer);
81 |   void cooGetDestinationPermutation(int m, int n, int nnz, int *cooRows, int *cooCols, int *p, void *pBuffer);
82 | 
83 |   void csr2csc2BufferSize(int m, int n, int nnz, const int *csrRowPtr, const int *csrColInd, size_t *pBufferSize);
84 |   void csr2csc2(int m, int n, int nnz,
85 |     const int *csrRowPtr, const int *csrColInd,
86 |     int *cscRowInd, int *cscColPtr, int *p, void *pBuffer,
87 |     cusparseIndexBase_t idxBase);
88 | 
89 | } //end nvgraph namespace
90 | 


--------------------------------------------------------------------------------
/cpp/include/kmeans.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | #pragma once
 17 | 
 18 | #include "nvgraph_error.hxx"
 19 | 
 20 | namespace nvgraph {
 21 | 
 22 |   /// Find clusters with k-means algorithm
 23 |   /** Initial centroids are chosen with k-means++ algorithm. Empty
 24 |    *  clusters are reinitialized by choosing new centroids with
 25 |    *  k-means++ algorithm.
 26 |    *
 27 |    *  CNMEM must be initialized before calling this function.
 28 |    *
 29 |    *  @param cublasHandle_t cuBLAS handle.
 30 |    *  @param n Number of observation vectors.
 31 |    *  @param d Dimension of observation vectors.
 32 |    *  @param k Number of clusters.
 33 |    *  @param tol Tolerance for convergence. k-means stops when the
 34 |    *    change in residual divided by n is less than tol.
 35 |    *  @param maxiter Maximum number of k-means iterations.
 36 |    *  @param obs (Input, device memory, d*n entries) Observation
 37 |    *    matrix. Matrix is stored column-major and each column is an
 38 |    *    observation vector. Matrix dimensions are d x n.
 39 |    *  @param codes (Output, device memory, n entries) Cluster
 40 |    *    assignments.
 41 |    *  @param residual On exit, residual sum of squares (sum of squares
 42 |    *    of distances between observation vectors and centroids).
 43 |    *  @param On exit, number of k-means iterations.
 44 |    *  @return NVGRAPH error flag.
 45 |    */
 46 |   template <typename IndexType_, typename ValueType_>
 47 |   NVGRAPH_ERROR kmeans(IndexType_ n, IndexType_ d, IndexType_ k,
 48 | 		    ValueType_ tol, IndexType_ maxiter,
 49 | 		    const ValueType_ * __restrict__ obs,
 50 | 		    IndexType_ * __restrict__ codes,
 51 | 		    ValueType_ & residual,
 52 | 		    IndexType_ & iters);
 53 | 
 54 |   /// Find clusters with k-means algorithm
 55 |   /** Initial centroids are chosen with k-means++ algorithm. Empty
 56 |    *  clusters are reinitialized by choosing new centroids with
 57 |    *  k-means++ algorithm.
 58 |    *
 59 |    *  @param n Number of observation vectors.
 60 |    *  @param d Dimension of observation vectors.
 61 |    *  @param k Number of clusters.
 62 |    *  @param tol Tolerance for convergence. k-means stops when the
 63 |    *    change in residual divided by n is less than tol.
 64 |    *  @param maxiter Maximum number of k-means iterations.
 65 |    *  @param obs (Input, device memory, d*n entries) Observation
 66 |    *    matrix. Matrix is stored column-major and each column is an
 67 |    *    observation vector. Matrix dimensions are d x n.
 68 |    *  @param codes (Output, device memory, n entries) Cluster
 69 |    *    assignments.
 70 |    *  @param clusterSizes (Output, device memory, k entries) Number of
 71 |    *    points in each cluster.
 72 |    *  @param centroids (Output, device memory, d*k entries) Centroid
 73 |    *    matrix. Matrix is stored column-major and each column is a
 74 |    *    centroid. Matrix dimensions are d x k.
 75 |    *  @param work (Output, device memory, n*max(k,d) entries)
 76 |    *    Workspace.
 77 |    *  @param work_int (Output, device memory, 2*d*n entries)
 78 |    *    Workspace.
 79 |    *  @param residual_host (Output, host memory, 1 entry) Residual sum
 80 |    *    of squares (sum of squares of distances between observation
 81 |    *    vectors and centroids).
 82 |    *  @param iters_host (Output, host memory, 1 entry) Number of
 83 |    *    k-means iterations.
 84 |    *  @return NVGRAPH error flag.
 85 |    */
 86 |   template <typename IndexType_, typename ValueType_>
 87 |   NVGRAPH_ERROR kmeans(IndexType_ n, IndexType_ d, IndexType_ k,
 88 | 		    ValueType_ tol, IndexType_ maxiter,
 89 | 		    const ValueType_ * __restrict__ obs,
 90 | 		    IndexType_ * __restrict__ codes,
 91 | 		    IndexType_ * __restrict__ clusterSizes,
 92 | 		    ValueType_ * __restrict__ centroids,
 93 | 		    ValueType_ * __restrict__ work,
 94 | 		    IndexType_ * __restrict__ work_int,
 95 | 		    ValueType_ * residual_host,
 96 | 		    IndexType_ * iters_host);
 97 | 
 98 | }
 99 | 
100 | 


--------------------------------------------------------------------------------
/cpp/include/partition.hxx:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #pragma once
 18 | 
 19 | #include "nvgraph_error.hxx"
 20 | #include "valued_csr_graph.hxx"
 21 | #include "matrix.hxx"
 22 | 
 23 | 
 24 | namespace nvgraph {
 25 |   #define SPECTRAL_USE_COLORING true
 26 |   
 27 |   #define SPECTRAL_USE_LOBPCG true 
 28 |   #define SPECTRAL_USE_PRECONDITIONING true
 29 |   #define SPECTRAL_USE_SCALING_OF_EIGVECS false
 30 |   
 31 |   #define SPECTRAL_USE_MAGMA false
 32 |   #define SPECTRAL_USE_THROTTLE true
 33 |   #define SPECTRAL_USE_NORMALIZED_LAPLACIAN true
 34 |   #define SPECTRAL_USE_R_ORTHOGONALIZATION false
 35 | 
 36 |   /// Spectral graph partition
 37 |   /** Compute partition for a weighted undirected graph. This
 38 |    *  partition attempts to minimize the cost function:
 39 |    *    Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition)
 40 |    *
 41 |    *  @param G Weighted graph in CSR format
 42 |    *  @param nParts Number of partitions.
 43 |    *  @param nEigVecs Number of eigenvectors to compute.
 44 |    *  @param maxIter_lanczos Maximum number of Lanczos iterations.
 45 |    *  @param restartIter_lanczos Maximum size of Lanczos system before
 46 |    *    implicit restart.
 47 |    *  @param tol_lanczos Convergence tolerance for Lanczos method.
 48 |    *  @param maxIter_kmeans Maximum number of k-means iterations.
 49 |    *  @param tol_kmeans Convergence tolerance for k-means algorithm.
 50 |    *  @param parts (Output, device memory, n entries) Partition
 51 |    *    assignments.
 52 |    *  @param iters_lanczos On exit, number of Lanczos iterations
 53 |    *    performed.
 54 |    *  @param iters_kmeans On exit, number of k-means iterations
 55 |    *    performed.
 56 |    *  @return NVGRAPH error flag.
 57 |    */
 58 |   template <typename IndexType_, typename ValueType_>
 59 |   NVGRAPH_ERROR partition( ValuedCsrGraph<IndexType_,ValueType_>& G,
 60 | 		       IndexType_ nParts,
 61 | 		       IndexType_ nEigVecs,
 62 | 		       IndexType_ maxIter_lanczos,
 63 | 		       IndexType_ restartIter_lanczos,
 64 | 		       ValueType_ tol_lanczos,
 65 | 		       IndexType_ maxIter_kmeans,
 66 | 		       ValueType_ tol_kmeans,
 67 | 		       IndexType_ * __restrict__ parts,
 68 |            Vector<ValueType_> &eigVals,
 69 |            Vector<ValueType_> &eigVecs,
 70 | 		       IndexType_ & iters_lanczos,
 71 | 		       IndexType_ & iters_kmeans);
 72 | 
 73 |   template <typename IndexType_, typename ValueType_>
 74 |   NVGRAPH_ERROR partition_lobpcg( ValuedCsrGraph<IndexType_,ValueType_>& G, Matrix<IndexType_,ValueType_> * M, cusolverDnHandle_t cusolverHandle,
 75 |            IndexType_ nParts,
 76 |            IndexType_ nEigVecs,
 77 |            IndexType_ maxIter_lanczos,
 78 |            ValueType_ tol_lanczos,
 79 |            IndexType_ maxIter_kmeans,
 80 |            ValueType_ tol_kmeans,
 81 |            IndexType_ * __restrict__ parts,
 82 |            Vector<ValueType_> &eigVals,
 83 |            Vector<ValueType_> &eigVecs,
 84 |            IndexType_ & iters_lanczos,
 85 |            IndexType_ & iters_kmeans);
 86 | 
 87 | 
 88 |   /// Compute cost function for partition
 89 |   /** This function determines the edges cut by a partition and a cost
 90 |    *  function:
 91 |    *    Cost = \sum_i (Edges cut by ith partition)/(Vertices in ith partition)
 92 |    *  Graph is assumed to be weighted and undirected.
 93 |    *
 94 |    *  @param G Weighted graph in CSR format
 95 |    *  @param nParts Number of partitions.
 96 |    *  @param parts (Input, device memory, n entries) Partition
 97 |    *    assignments.
 98 |    *  @param edgeCut On exit, weight of edges cut by partition.
 99 |    *  @param cost On exit, partition cost function.
100 |    *  @return NVGRAPH error flag.
101 |    */
102 |   template <typename IndexType_, typename ValueType_>
103 |   NVGRAPH_ERROR analyzePartition(ValuedCsrGraph<IndexType_,ValueType_> & G,
104 | 			      IndexType_ nParts,
105 | 			      const IndexType_ * __restrict__ parts,
106 | 			      ValueType_ & edgeCut, ValueType_ & cost);
107 | 
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/cpp/include/stacktrace.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2019, NVIDIA CORPORATION.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | //adapted from https://idlebox.net/2008/0901-stacktrace-demangled/ and licensed under WTFPL v2.0
 18 | #pragma once
 19 | 
 20 | #if defined(_WIN32) || defined (__ANDROID__) || defined(ANDROID) || defined (__QNX__) || defined (__QNXNTO__)
 21 | #else
 22 |  #include <execinfo.h>
 23 |  #include <dlfcn.h>
 24 |  #include <cxxabi.h>
 25 |  #include <unistd.h>
 26 |  #include <stdlib.h>
 27 | #endif
 28 | 
 29 | #include <stdio.h>
 30 | #include <string>
 31 | #include <sstream>
 32 | #include <iostream>
 33 | namespace nvgraph {
 34 | 
 35 | /** Print a demangled stack backtrace of the caller function to FILE* out. */
 36 | static inline void printStackTrace(std::ostream &eout = std::cerr, unsigned int max_frames = 63)
 37 | {
 38 | #if defined(_WIN32) || defined (__ANDROID__) || defined(ANDROID) || defined (__QNX__) || defined (__QNXNTO__)
 39 |   //TODO add code for windows stack trace and android stack trace
 40 | #else
 41 |     std::stringstream out;
 42 | 
 43 |     // storage array for stack trace address data
 44 |     void* addrlist[max_frames+1];
 45 | 
 46 |     // retrieve current stack addresses
 47 |     int addrlen = backtrace(addrlist, sizeof(addrlist) / sizeof(void*));
 48 | 
 49 |     if (addrlen == 0) {
 50 |         out << "  <empty, possibly corrupt>\n";
 51 |         return;
 52 |     }
 53 | 
 54 |     // resolve addresses into strings containing "filename(function+address)",
 55 |     // this array must be free()-ed
 56 |     char** symbollist = backtrace_symbols(addrlist, addrlen);
 57 | 
 58 |     // allocate string which will be filled with the demangled function name
 59 |     size_t funcnamesize = 256;
 60 |     char* funcname = (char*)malloc(funcnamesize);
 61 | 
 62 |     // iterate over the returned symbol lines. skip the first, it is the
 63 |     // address of this function.
 64 |     for (int i = 1; i < addrlen; i++)
 65 |     {
 66 |         char *begin_name = 0, *begin_offset = 0, *end_offset = 0;
 67 | 
 68 |         // find parentheses and +address offset surrounding the mangled name:
 69 |         // ./module(function+0x15c) [0x8048a6d]
 70 |         for (char *p = symbollist[i]; *p; ++p)
 71 |         { 
 72 |             if (*p == '(')
 73 |                 begin_name = p;   
 74 |             else if (*p == '+')
 75 |                 begin_offset = p;
 76 |             else if (*p == ')' && begin_offset) {
 77 |                 end_offset = p;
 78 |                 break;
 79 |             }
 80 |         }
 81 | 
 82 |         if (begin_name && begin_offset && end_offset
 83 |             && begin_name < begin_offset)
 84 |         {
 85 |             *begin_name++ = '\0';
 86 |             *begin_offset++ = '\0';
 87 |             *end_offset = '\0';
 88 | 
 89 |             // mangled name is now in [begin_name, begin_offset) and caller
 90 |             // offset in [begin_offset, end_offset). now apply
 91 |             // __cxa_demangle():
 92 | 
 93 |             int status;
 94 |             char* ret = abi::__cxa_demangle(begin_name,
 95 |                                             funcname, &funcnamesize, &status);
 96 |             if (status == 0) {
 97 |                 funcname = ret; // use possibly realloc()-ed string
 98 |                 out << " " << symbollist[i] << " : " << funcname << "+" << begin_offset << "\n";
 99 |             }
100 |             else {
101 |                 // demangling failed. Output function name as a C function with
102 |                 // no arguments.
103 |                 out << " " << symbollist[i] << " : " << begin_name << "()+" << begin_offset << "\n";
104 |             }
105 |         }
106 |         else
107 |         {
108 |             // couldn't parse the line? print the whole line.
109 |             out << " " << symbollist[i] << "\n";
110 |         }
111 |     }
112 |     eout << out.str();
113 |     //error_output(out.str().c_str(),out.str().size());
114 |     free(funcname);
115 |     free(symbollist);
116 |     //printf("PID of failing process: %d\n",getpid());
117 |     //while(1);
118 | #endif
119 | }
120 | 
121 | } //end namespace nvgraph
122 | 
123 | 


--------------------------------------------------------------------------------