├── .gitignore ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE ├── README.md ├── asset ├── graph.png ├── knowledge_graph.png ├── logo │ ├── favicon.ico │ └── logo.png ├── visualization.png └── visualization │ ├── imagenet_hierarchy.gif │ └── mnist_3d.gif ├── cmake ├── FindGFlags.cmake ├── FindGlog.cmake └── FindPythonLibsNew.cmake ├── conda ├── conda_build_config.yaml ├── graphvite-mini │ ├── build.sh │ └── meta.yaml ├── graphvite │ ├── build.sh │ └── meta.yaml └── requirements.txt ├── config ├── demo │ ├── math.yaml │ └── quick_start.yaml ├── graph │ ├── deepwalk_flickr.yaml │ ├── deepwalk_friendster-small.yaml │ ├── deepwalk_friendster.yaml │ ├── deepwalk_hyperlink-pld.yaml │ ├── deepwalk_youtube.yaml │ ├── line_flickr.yaml │ ├── line_friendster-small.yaml │ ├── line_friendster.yaml │ ├── line_hyperlink-pld.yaml │ ├── line_youtube.yaml │ └── node2vec_youtube.yaml ├── knowledge_graph │ ├── complex_fb15k-237.yaml │ ├── complex_fb15k.yaml │ ├── complex_wikidata5m.yaml │ ├── complex_wn18.yaml │ ├── complex_wn18rr.yaml │ ├── distmult_fb15k-237.yaml │ ├── distmult_fb15k.yaml │ ├── distmult_wikidata5m.yaml │ ├── distmult_wn18.yaml │ ├── distmult_wn18rr.yaml │ ├── quate_fb15k-237.yaml │ ├── quate_fb15k.yaml │ ├── quate_wikidata5m.yaml │ ├── quate_wn18.yaml │ ├── quate_wn18rr.yaml │ ├── rotate_fb15k-237.yaml │ ├── rotate_fb15k.yaml │ ├── rotate_wikidata5m.yaml │ ├── rotate_wn18.yaml │ ├── rotate_wn18rr.yaml │ ├── simple_fb15k-237.yaml │ ├── simple_fb15k.yaml │ ├── simple_wikidata5m.yaml │ ├── simple_wn18.yaml │ ├── simple_wn18rr.yaml │ ├── transe_fb15k-237.yaml │ ├── transe_fb15k.yaml │ ├── transe_wikidata5m.yaml │ ├── transe_wn18.yaml │ └── transe_wn18rr.yaml ├── template │ ├── graph.yaml │ ├── knowledge_graph.yaml │ ├── visualization.yaml │ └── word_graph.yaml ├── visualization │ ├── largevis_imagenet.yaml │ ├── largevis_mnist_2d.yaml │ └── largevis_mnist_3d.yaml └── word_graph │ └── line_wikipedia.yaml ├── doc ├── Makefile └── source │ ├── api │ ├── application.rst │ ├── dataset.rst │ ├── graph.rst │ ├── optimizer.rst │ └── solver.rst │ ├── benchmark.rst │ ├── conf.py │ ├── developer │ ├── framework.rst │ ├── model.rst │ ├── routine.rst │ └── solver.rst │ ├── faq.rst │ ├── index.rst │ ├── install.rst │ ├── introduction.rst │ ├── link.rst │ ├── overview.rst │ ├── pretrained_model.rst │ ├── quick_start.rst │ └── user │ ├── auto.rst │ ├── command_line.rst │ ├── configuration.rst │ ├── format.rst │ └── python.rst ├── external └── .gitignore ├── include ├── base │ ├── alias_table.cuh │ ├── memory.h │ └── vector.h ├── bind.h ├── core │ ├── graph.h │ ├── optimizer.h │ └── solver.h ├── instance │ ├── gpu │ │ ├── graph.cuh │ │ ├── knowledge_graph.cuh │ │ └── visualization.cuh │ ├── graph.cuh │ ├── knowledge_graph.cuh │ ├── model │ │ ├── graph.h │ │ ├── knowledge_graph.h │ │ └── visualization.h │ ├── visualization.cuh │ └── word_graph.cuh └── util │ ├── common.h │ ├── debug.h │ ├── gpu.cuh │ ├── io.h │ ├── math.h │ └── time.h ├── python ├── graphvite │ ├── __init__.py │ ├── application │ │ ├── __init__.py │ │ ├── application.py │ │ └── network.py │ ├── base.py │ ├── cmd.py │ ├── dataset.py │ ├── graph.py │ ├── helper.py │ ├── optimizer.py │ ├── solver.py │ └── util.py └── setup.py └── src ├── CMakeLists.txt └── graphvite.cu /.gitignore: -------------------------------------------------------------------------------- 1 | # build 2 | /build/ 3 | /doc/build 4 | 5 | # cmake 6 | /Makefile 7 | /src/Makefile 8 | /cmake-build-*/ 9 | 10 | # JetBrains 11 | /.idea -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Change log 2 | ========== 3 | 4 | Here list all notable changes in GraphVite library. 5 | 6 | v0.2.2 - 2020-03-11 7 | ------------------- 8 | - New model QuatE and its benchmarks on 5 knowledge graph datasets. 9 | - Add an option to skip `faiss` in compilation. 10 | - Fix instructions for conda installation. 11 | 12 | v0.2.1 - 2019-11-12 13 | ------------------- 14 | - New dataset `Wikidata5m` and its benchmarks, 15 | including TransE, DistMult, ComplEx, SimplE and RotatE. 16 | - Add interface for loading pretrained models and save hyperparameters. 17 | - Add weight clip in asynchronous self-adversarial negative sampling. 18 | 19 | v0.2.0 - 2019-10-11 20 | ------------------- 21 | - Add scalable multi-GPU prediction for node embedding and knowledge graph embedding. 22 | Evaluation on link prediction is 4.6x faster than v0.1.0. 23 | - New demo dataset `math` and entity prediction evaluation for knowledge graph. 24 | - Support Kepler and Turing GPU architectures. 25 | - Automatically choose the best episode size with regrad to RAM limit. 26 | - Add template config files for applications. 27 | - Change the update of global embeddings from average to accumulation. Fix a serious 28 | numeric problem in the update. 29 | - Move file format settings from graph to application. Now one can customize formats 30 | and use comments in evaluation files. Add document for data format. 31 | - Separate GPU implementation into training routines and models. Routines are in 32 | `include/instance/gpu/*` and models are in `include/instance/model/*`. 33 | 34 | v0.1.0 - 2019-08-05 35 | ------------------- 36 | - Multi-GPU training of large-scale graph embedding 37 | - 3 applications: node embedding, knowledge graph embedding and graph & 38 | high-dimensional data visualization 39 | - Node embedding 40 | - Model: DeepWalk, LINE, node2vec 41 | - Evaluation: node classification, link prediction 42 | - Knowledge graph embedding 43 | - Model: TransE, DistMult, ComplEx, SimplE, RotatE 44 | - Evaluation: link prediction 45 | - Graph & High-dimensional data visualization 46 | - Model: LargeVis 47 | - Evaluation: visualization(2D / 3D), animation(3D), hierarchy(2D) -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (WIN32 OR APPLE) 2 | set(SKIP_TOOLCHAIN TRUE) 3 | endif() 4 | 5 | if (SKIP_TOOLCHAIN) 6 | # skip toolchain so that code insight can work properly 7 | cmake_minimum_required(VERSION 3.0) 8 | project(graphvite LANGUAGES CXX) 9 | include_directories(include) 10 | add_subdirectory(src) 11 | return() 12 | endif() 13 | 14 | cmake_minimum_required(VERSION 3.12) 15 | project(graphvite LANGUAGES CXX CUDA) 16 | 17 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) 18 | include(FindCUDA) 19 | 20 | find_package(Glog) 21 | find_package(GFlags) 22 | find_package(PythonLibsNew REQUIRED) 23 | find_package(pybind11 REQUIRED) 24 | 25 | # CUDA_ROOT & CUDA_ARCH 26 | get_filename_component(CUDA_ROOT ${CMAKE_CUDA_COMPILER} DIRECTORY) 27 | get_filename_component(CUDA_ROOT ${CUDA_ROOT} DIRECTORY) 28 | if (NOT ALL_ARCH) 29 | cuda_select_nvcc_arch_flags(CUDA_ARCH Auto) 30 | else() 31 | cuda_select_nvcc_arch_flags(CUDA_ARCH 3.5 5.0 6.0 7.0) 32 | endif() 33 | string(REPLACE ";" " " CUDA_ARCH "${CUDA_ARCH}") 34 | 35 | # add externals 36 | include(ExternalProject) 37 | set(EXTERNAL_DIR ${PROJECT_SOURCE_DIR}/external) 38 | 39 | if (NOT UPDATE) 40 | set(SKIP_UPDATE TRUE) 41 | else() 42 | set(SKIP_UPDATE FALSE) 43 | endif() 44 | 45 | # glog 46 | if (NOT GLOG_FOUND) 47 | message(WARNING "Can't find glog library. It will be installed from git repository.") 48 | ExternalProject_Add(glog 49 | GIT_REPOSITORY https://github.com/google/glog.git 50 | UPDATE_DISCONNECTED ${SKIP_UPDATE} 51 | PREFIX glog 52 | SOURCE_DIR ${EXTERNAL_DIR}/glog 53 | BINARY_DIR ${PROJECT_BINARY_DIR}/glog 54 | INSTALL_DIR ${PROJECT_BINARY_DIR}/glog 55 | CONFIGURE_COMMAND test -e Makefile && exit || 56 | cd ${EXTERNAL_DIR}/glog && ./autogen.sh && cd - && 57 | ${EXTERNAL_DIR}/glog/configure 58 | BUILD_COMMAND test -e .libs/libglog.a || make -j 59 | INSTALL_COMMAND test -e include/glog/logging.h || make install prefix=${PROJECT_BINARY_DIR}/glog) 60 | include_directories(${PROJECT_BINARY_DIR}/glog/include) 61 | link_directories(${PROJECT_BINARY_DIR}/glog/.libs) 62 | else() 63 | get_filename_component(GLOG_LIBRARY_DIR ${GLOG_LIBRARY} DIRECTORY) 64 | include_directories(${GLOG_INCLUDE_DIR}) 65 | link_directories(${GLOG_LIBRARY_DIR}) 66 | endif () 67 | 68 | # gflags 69 | if (NOT GFLAGS_FOUND) 70 | message(WARNING "Can't find gflags library. It will be installed from git repository.") 71 | ExternalProject_Add(gflags 72 | GIT_REPOSITORY https://github.com/gflags/gflags 73 | UPDATE_DISCONNECTED ${SKIP_UPDATE} 74 | PREFIX gflags 75 | SOURCE_DIR ${EXTERNAL_DIR}/gflags 76 | BINARY_DIR ${PROJECT_BINARY_DIR}/gflags 77 | INSTALL_DIR ${PROJECT_BINARY_DIR}/gflags 78 | CONFIGURE_COMMAND test -e Makefile || 79 | cmake ${EXTERNAL_DIR}/gflags -DCMAKE_INSTALL_PREFIX=${PROJECT_BINARY_DIR}/gflags 80 | BUILD_COMMAND test -e lib/libgflags.a || make -j 81 | INSTALL_COMMAND test -e include/gflags/gflags.h || make install) 82 | include_directories(${PROJECT_BINARY_DIR}/gflags/include) 83 | link_directories(${PROJECT_BINARY_DIR}/gflags/lib) 84 | else() 85 | get_filename_component(GFLAGS_LIBRARY_DIR ${GFLAGS_LIBRARY} DIRECTORY) 86 | include_directories(${GFLAGS_INCLUDE_DIR}) 87 | link_directories(${GFLAGS_LIBRARY_DIR}) 88 | endif() 89 | 90 | # faiss 91 | if (NOT FAISS_PATH) 92 | if (NOT NO_FAISS) 93 | ExternalProject_Add(faiss 94 | GIT_REPOSITORY https://github.com/facebookresearch/faiss.git 95 | UPDATE_DISCONNECTED ${SKIP_UPDATE} 96 | PREFIX faiss 97 | SOURCE_DIR ${EXTERNAL_DIR}/faiss 98 | BINARY_DIR ${EXTERNAL_DIR}/faiss 99 | INSTALL_DIR ${PROJECT_BINARY_DIR}/faiss 100 | CONFIGURE_COMMAND test -e makefile.inc || 101 | ${EXTERNAL_DIR}/faiss/configure --with-cuda=${CUDA_ROOT} --with-cuda-arch=${CUDA_ARCH} 102 | BUILD_COMMAND test -e libfaiss.so || make -j 103 | INSTALL_COMMAND test -e ${PROJECT_BINARY_DIR}/faiss/include/faiss/gpu/GpuIndexFlat.h || 104 | make install prefix=${PROJECT_BINARY_DIR}/faiss) 105 | include_directories(${PROJECT_BINARY_DIR}/faiss/include) 106 | link_directories(${PROJECT_BINARY_DIR}/faiss/lib) 107 | set(FAISS_LIBRARY ${PROJECT_BINARY_DIR}/faiss/lib/libfaiss.so) 108 | endif() 109 | else() 110 | get_filename_component(FAISS_PARENT ${FAISS_PATH} DIRECTORY) 111 | include_directories(${FAISS_PARENT}) 112 | link_directories(${FAISS_PATH}) 113 | set(FAISS_LIBRARY ${FAISS_PATH}/libfaiss.so) 114 | endif() 115 | 116 | if (NOT CMAKE_BUILD_TYPE) 117 | set(CMAKE_BUILD_TYPE Release) 118 | endif() 119 | 120 | # compilation flags 121 | set(CMAKE_CXX_FLAGS "-std=c++11 -Wall") 122 | set(CMAKE_CXX_FLAGS_DEBUG "-g -Og") 123 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -D NDEBUG") 124 | 125 | set(CMAKE_CUDA_FLAGS "-std=c++11") 126 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G") 127 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3") 128 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH}") 129 | 130 | foreach (MACRO FAST_COMPILE NO_FAISS) 131 | if (${${MACRO}}) 132 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D${MACRO}") 133 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D${MACRO}") 134 | endif() 135 | endforeach() 136 | 137 | # output directory 138 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) 139 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) 140 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) 141 | # symbolic link to faiss 142 | file(MAKE_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) 143 | execute_process(COMMAND ln -sf ${FAISS_LIBRARY} ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libfaiss.so) 144 | 145 | # project directories 146 | include_directories(${PYTHON_INCLUDE_DIRS}) 147 | include_directories(include) 148 | add_subdirectory(src) -------------------------------------------------------------------------------- /asset/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/graph.png -------------------------------------------------------------------------------- /asset/knowledge_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/knowledge_graph.png -------------------------------------------------------------------------------- /asset/logo/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/logo/favicon.ico -------------------------------------------------------------------------------- /asset/logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/logo/logo.png -------------------------------------------------------------------------------- /asset/visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/visualization.png -------------------------------------------------------------------------------- /asset/visualization/imagenet_hierarchy.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/visualization/imagenet_hierarchy.gif -------------------------------------------------------------------------------- /asset/visualization/mnist_3d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/visualization/mnist_3d.gif -------------------------------------------------------------------------------- /cmake/FindGFlags.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find GFLAGS 2 | # 3 | # The following variables are optionally searched for defaults 4 | # GFLAGS_ROOT_DIR: Base directory where all GFLAGS components are found 5 | # 6 | # The following are set after configuration is done: 7 | # GFLAGS_FOUND 8 | # GFLAGS_INCLUDE_DIRS 9 | # GFLAGS_LIBRARIES 10 | # GFLAGS_LIBRARYRARY_DIRS 11 | 12 | include(FindPackageHandleStandardArgs) 13 | 14 | set(GFLAGS_ROOT_DIR "" CACHE PATH "Folder contains Gflags") 15 | 16 | # We are testing only a couple of files in the include directories 17 | if(WIN32) 18 | find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h 19 | PATHS ${GFLAGS_ROOT_DIR}/src/windows) 20 | else() 21 | find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h 22 | PATHS ${GFLAGS_ROOT_DIR}) 23 | endif() 24 | 25 | if(MSVC) 26 | find_library(GFLAGS_LIBRARY_RELEASE 27 | NAMES libgflags 28 | PATHS ${GFLAGS_ROOT_DIR} 29 | PATH_SUFFIXES Release) 30 | 31 | find_library(GFLAGS_LIBRARY_DEBUG 32 | NAMES libgflags-debug 33 | PATHS ${GFLAGS_ROOT_DIR} 34 | PATH_SUFFIXES Debug) 35 | 36 | set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug ${GFLAGS_LIBRARY_DEBUG}) 37 | else() 38 | find_library(GFLAGS_LIBRARY gflags) 39 | endif() 40 | 41 | find_package_handle_standard_args(GFlags DEFAULT_MSG GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY) 42 | 43 | 44 | if(GFLAGS_FOUND) 45 | set(GFLAGS_INCLUDE_DIRS ${GFLAGS_INCLUDE_DIR}) 46 | set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY}) 47 | message(STATUS "Found gflags (include: ${GFLAGS_INCLUDE_DIR}, library: ${GFLAGS_LIBRARY})") 48 | mark_as_advanced(GFLAGS_LIBRARY_DEBUG GFLAGS_LIBRARY_RELEASE 49 | GFLAGS_LIBRARY GFLAGS_INCLUDE_DIR GFLAGS_ROOT_DIR) 50 | endif() -------------------------------------------------------------------------------- /cmake/FindGlog.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Glog 2 | # 3 | # The following variables are optionally searched for defaults 4 | # GLOG_ROOT_DIR: Base directory where all GLOG components are found 5 | # 6 | # The following are set after configuration is done: 7 | # GLOG_FOUND 8 | # GLOG_INCLUDE_DIRS 9 | # GLOG_LIBRARIES 10 | 11 | include(FindPackageHandleStandardArgs) 12 | 13 | set(GLOG_ROOT_DIR "" CACHE PATH "Folder contains Google glog") 14 | 15 | if(WIN32) 16 | find_path(GLOG_INCLUDE_DIR glog/logging.h 17 | PATHS ${GLOG_ROOT_DIR}/src/windows) 18 | else() 19 | find_path(GLOG_INCLUDE_DIR glog/logging.h 20 | PATHS ${GLOG_ROOT_DIR}) 21 | endif() 22 | 23 | if(MSVC) 24 | find_library(GLOG_LIBRARY_RELEASE libglog_static 25 | PATHS ${GLOG_ROOT_DIR} 26 | PATH_SUFFIXES Release) 27 | 28 | find_library(GLOG_LIBRARY_DEBUG libglog_static 29 | PATHS ${GLOG_ROOT_DIR} 30 | PATH_SUFFIXES Debug) 31 | 32 | set(GLOG_LIBRARY optimized ${GLOG_LIBRARY_RELEASE} debug ${GLOG_LIBRARY_DEBUG}) 33 | else() 34 | find_library(GLOG_LIBRARY glog 35 | PATHS ${GLOG_ROOT_DIR} 36 | PATH_SUFFIXES lib lib64) 37 | endif() 38 | 39 | find_package_handle_standard_args(Glog DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARY) 40 | 41 | if(GLOG_FOUND) 42 | set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIR}) 43 | set(GLOG_LIBRARIES ${GLOG_LIBRARY}) 44 | message(STATUS "Found glog (include: ${GLOG_INCLUDE_DIR}, library: ${GLOG_LIBRARY})") 45 | mark_as_advanced(GLOG_ROOT_DIR GLOG_LIBRARY_RELEASE GLOG_LIBRARY_DEBUG 46 | GLOG_LIBRARY GLOG_INCLUDE_DIR) 47 | endif() -------------------------------------------------------------------------------- /conda/conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | cxx_compiler_version: 2 | - 5.4 3 | 4 | python: 5 | - 2.7 6 | - 3.6 7 | - 3.7 8 | 9 | numpy: 10 | - 1.11 11 | 12 | cudatoolkit: 13 | - 9.2 14 | - 10.0 15 | - 10.1 16 | 17 | pin_run_as_build: 18 | cudatoolkit: 19 | max_pin: x.x -------------------------------------------------------------------------------- /conda/graphvite-mini/build.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | mkdir -p build 4 | 5 | cd build 6 | cmake .. -DALL_ARCH=True 7 | make 8 | cd - 9 | 10 | cd python 11 | $PYTHON setup.py install 12 | cd - -------------------------------------------------------------------------------- /conda/graphvite-mini/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: graphvite-mini 3 | version: 0.2.2 4 | 5 | source: 6 | path: ../.. 7 | 8 | requirements: 9 | build: 10 | # cmake 11 | - cmake >=3.12 12 | - {{ compiler("cxx") }} 13 | - glog 14 | - gflags 15 | - cudatoolkit {{ cudatoolkit }} 16 | - python {{ python }} 17 | - pybind11 18 | host: 19 | # make 20 | - glog 21 | - gflags 22 | - cudatoolkit {{ cudatoolkit }} 23 | - python {{ python }} 24 | - pybind11 25 | - numpy {{ numpy }} 26 | - mkl >=2018 27 | # setup 28 | - pyyaml 29 | - easydict 30 | - six 31 | run: 32 | - glog 33 | - gflags 34 | - cudatoolkit 35 | - python {{ python }} 36 | - mkl >=2018 37 | - numpy >=1.11 38 | - pyyaml 39 | - easydict 40 | - six 41 | - future 42 | - psutil 43 | 44 | build: 45 | string: 46 | "py{{ python|replace('.', '') }}\ 47 | cuda{{ cudatoolkit|replace('.', '') }}\ 48 | h{{ environ.get('GIT_FULL_HASH')|string|truncate(7, True, '', 0) }}" 49 | 50 | test: 51 | imports: 52 | - graphvite 53 | 54 | about: 55 | home: https://graphvite.io 56 | license: Apache-2.0 57 | summary: "A general and high-performance graph embedding system for various applications" -------------------------------------------------------------------------------- /conda/graphvite/build.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | mkdir -p build 4 | 5 | cd build 6 | cmake .. -DALL_ARCH=True 7 | make 8 | cd - 9 | 10 | cd python 11 | $PYTHON setup.py install 12 | cd - -------------------------------------------------------------------------------- /conda/graphvite/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: graphvite 3 | version: 0.2.2 4 | 5 | source: 6 | path: ../.. 7 | 8 | requirements: 9 | build: 10 | # cmake 11 | - cmake >=3.12 12 | - {{ compiler("cxx") }} 13 | - glog 14 | - gflags 15 | - cudatoolkit {{ cudatoolkit }} 16 | - python {{ python }} 17 | - pybind11 18 | host: 19 | # make 20 | - glog 21 | - gflags 22 | - cudatoolkit {{ cudatoolkit }} 23 | - python {{ python }} 24 | - pybind11 25 | - numpy {{ numpy }} 26 | - mkl >=2018 27 | # setup 28 | - pyyaml 29 | - easydict 30 | - six 31 | run: 32 | - glog 33 | - gflags 34 | - cudatoolkit 35 | - python {{ python }} 36 | - mkl >=2018 37 | - numpy >=1.11 38 | - pyyaml 39 | - easydict 40 | - six 41 | - future 42 | - imageio 43 | - psutil 44 | - scipy 45 | - matplotlib 46 | - pytorch 47 | - torchvision 48 | - nltk 49 | 50 | build: 51 | string: 52 | "py{{ python|replace('.', '') }}\ 53 | cuda{{ cudatoolkit|replace('.', '') }}\ 54 | h{{ environ.get('GIT_FULL_HASH')|string|truncate(7, True, '', 0) }}" 55 | 56 | test: 57 | imports: 58 | - graphvite 59 | 60 | about: 61 | home: https://graphvite.io 62 | license: Apache-2.0 63 | summary: "A general and high-performance graph embedding system for various applications" -------------------------------------------------------------------------------- /conda/requirements.txt: -------------------------------------------------------------------------------- 1 | # cmake 2 | cmake >=3.12 3 | gxx_linux-64 >=5.4 4 | glog 5 | gflags 6 | cudatoolkit >=9.2 7 | python 8 | pybind11 9 | 10 | # make 11 | mkl >=2018 12 | 13 | # run 14 | numpy >=1.11 15 | pyyaml 16 | conda-forge::easydict 17 | six 18 | future 19 | imageio 20 | psutil 21 | scipy 22 | matplotlib 23 | pytorch 24 | torchvision 25 | nltk -------------------------------------------------------------------------------- /config/demo/math.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [0] 6 | cpu_per_gpu: 8 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-3 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 8 19 | batch_size: 100000 20 | episode_size: 100 21 | 22 | train: 23 | model: RotatE 24 | num_epoch: 2000 25 | margin: 9 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | target: tail 38 | 39 | save: 40 | file_name: rotate_math.pkl -------------------------------------------------------------------------------- /config/demo/quick_start.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [0] 6 | cpu_per_gpu: 8 7 | dim: 128 8 | 9 | format: 10 | delimiters: " \t\r\n" 11 | comment: "#" 12 | 13 | graph: 14 | file_name: 15 | as_undirected: true 16 | 17 | build: 18 | optimizer: 19 | type: SGD 20 | lr: 0.025 21 | weight_decay: 0.005 22 | num_partition: auto 23 | num_negative: 1 24 | batch_size: 100000 25 | episode_size: 500 26 | 27 | train: 28 | model: LINE 29 | num_epoch: 2000 30 | negative_weight: 5 31 | augmentation_step: 2 32 | random_walk_length: 40 33 | random_walk_batch_size: 100 34 | log_frequency: 1000 35 | 36 | evaluate: 37 | - task: link prediction 38 | file_name: 39 | filter_file: 40 | - task: node classification 41 | file_name: 42 | portions: [0.2] 43 | times: 1 44 | 45 | save: 46 | file_name: line_blogcatalog.pkl -------------------------------------------------------------------------------- /config/graph/deepwalk_flickr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 1000 22 | 23 | train: 24 | # here the best setting uses no augmentation 25 | # in this case, DeepWalk is equal to LINE 26 | model: DeepWalk 27 | num_epoch: 2000 28 | negative_weight: 5 29 | augmentation_step: 1 30 | random_walk_length: 40 31 | random_walk_batch_size: 100 32 | log_frequency: 1000 33 | 34 | evaluate: 35 | task: node classification 36 | file_name: 37 | portions: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 38 | times: 5 39 | 40 | save: 41 | file_name: deepwalk_flickr.pkl -------------------------------------------------------------------------------- /config/graph/deepwalk_friendster-small.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 3500 22 | 23 | train: 24 | # here the best setting uses no augmentation 25 | # in this case, DeepWalk is equal to LINE 26 | model: DeepWalk 27 | num_epoch: 2000 28 | negative_weight: 5 29 | augmentation_step: 1 30 | random_walk_length: 40 31 | random_walk_batch_size: 100 32 | log_frequency: 1000 33 | 34 | evaluate: 35 | task: node classification 36 | file_name: 37 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 38 | times: 5 39 | 40 | save: 41 | file_name: deepwalk_friendster-small.pkl -------------------------------------------------------------------------------- /config/graph/deepwalk_friendster.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 96 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 2500 22 | 23 | train: 24 | model: DeepWalk 25 | num_epoch: 2000 26 | negative_weight: 5 27 | augmentation_step: 2 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: node classification 34 | file_name: 35 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 36 | times: 5 37 | 38 | save: 39 | file_name: deepwalk_friendster.pkl -------------------------------------------------------------------------------- /config/graph/deepwalk_hyperlink-pld.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 5000 22 | 23 | train: 24 | model: DeepWalk 25 | num_epoch: 2000 26 | negative_weight: 5 27 | augmentation_step: 2 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_file: 36 | 37 | save: 38 | file_name: deepwalk_hyperlink-pld.pkl -------------------------------------------------------------------------------- /config/graph/deepwalk_youtube.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 500 22 | 23 | train: 24 | model: DeepWalk 25 | num_epoch: 4000 26 | negative_weight: 5 27 | augmentation_step: 5 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: node classification 34 | file_name: 35 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 36 | times: 5 37 | 38 | save: 39 | file_name: deepwalk_youtube.pkl -------------------------------------------------------------------------------- /config/graph/line_flickr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 1000 22 | 23 | train: 24 | model: LINE 25 | num_epoch: 2000 26 | negative_weight: 5 27 | augmentation_step: 1 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: node classification 34 | file_name: 35 | portions: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 36 | times: 5 37 | 38 | save: 39 | file_name: line_flickr.pkl -------------------------------------------------------------------------------- /config/graph/line_friendster-small.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 3500 22 | 23 | train: 24 | model: LINE 25 | num_epoch: 2000 26 | negative_weight: 5 27 | augmentation_step: 1 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: node classification 34 | file_name: 35 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 36 | times: 5 37 | 38 | save: 39 | file_name: line_friendster-small.pkl -------------------------------------------------------------------------------- /config/graph/line_friendster.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 96 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 2500 22 | 23 | train: 24 | model: LINE 25 | num_epoch: 2000 26 | negative_weight: 5 27 | augmentation_step: 2 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: node classification 34 | file_name: 35 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 36 | times: 5 37 | 38 | save: 39 | file_name: line_friendster.pkl -------------------------------------------------------------------------------- /config/graph/line_hyperlink-pld.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 5000 22 | 23 | train: 24 | model: LINE 25 | num_epoch: 2000 26 | negative_weight: 5 27 | augmentation_step: 2 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_file: 36 | 37 | save: 38 | file_name: line_hyperlink-pld.pkl -------------------------------------------------------------------------------- /config/graph/line_youtube.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 500 22 | 23 | train: 24 | model: LINE 25 | num_epoch: 4000 26 | negative_weight: 5 27 | augmentation_step: 5 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | log_frequency: 1000 31 | 32 | evaluate: 33 | task: node classification 34 | file_name: 35 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 36 | times: 5 37 | 38 | save: 39 | file_name: line_youtube.pkl -------------------------------------------------------------------------------- /config/graph/node2vec_youtube.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | as_undirected: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 0.025 17 | weight_decay: 0.005 18 | num_partition: auto 19 | num_negative: 1 20 | batch_size: 100000 21 | episode_size: 500 22 | 23 | train: 24 | model: node2vec 25 | num_epoch: 4000 26 | negative_weight: 5 27 | augmentation_step: 5 28 | random_walk_length: 40 29 | random_walk_batch_size: 100 30 | p: 4 31 | q: 2 32 | log_frequency: 1000 33 | 34 | evaluate: 35 | task: node classification 36 | file_name: 37 | portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10] 38 | times: 5 39 | 40 | save: 41 | file_name: node2vec_youtube.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/complex_fb15k-237.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: ComplEx 24 | num_epoch: 1000 25 | l3_regularization: 5.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: complex_fb15k-237.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/complex_fb15k.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-4 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: ComplEx 24 | num_epoch: 1000 25 | l3_regularization: 1.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: complex_fb15k.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/complex_wikidata5m.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | normalization: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 1.0e-1 17 | weight_decay: 0 18 | num_partition: auto 19 | num_negative: 64 20 | batch_size: 100000 21 | episode_size: 200 22 | 23 | train: 24 | model: ComplEx 25 | num_epoch: 1000 26 | l3_regularization: 2.0e-3 27 | sample_batch_size: 2000 28 | adversarial_temperature: 0.2 29 | relation_lr_multiplier: 1.0e-3 30 | log_frequency: 500 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_files: 36 | - 37 | - 38 | - 39 | # fast_mode: 1000 40 | 41 | save: 42 | file_name: complex_wikidata5m.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/complex_wn18.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: ComplEx 24 | num_epoch: 4000 25 | l3_regularization: 5.0e-5 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: complex_wn18.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/complex_wn18rr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: ComplEx 24 | num_epoch: 6000 25 | l3_regularization: 5.0e-6 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: complex_wn18rr.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/distmult_fb15k-237.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: DistMult 24 | num_epoch: 1000 25 | l3_regularization: 5.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: distmult_fb15k-237.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/distmult_fb15k.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: DistMult 24 | num_epoch: 1000 25 | l3_regularization: 1.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: distmult_fb15k.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/distmult_wikidata5m.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | normalization: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 1.0e-1 17 | weight_decay: 0 18 | num_partition: auto 19 | num_negative: 64 20 | batch_size: 100000 21 | episode_size: 200 22 | 23 | train: 24 | model: DistMult 25 | num_epoch: 2000 26 | l3_regularization: 2.0e-3 27 | sample_batch_size: 2000 28 | adversarial_temperature: 2 29 | relation_lr_multiplier: 1.0e-4 30 | log_frequency: 500 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_files: 36 | - 37 | - 38 | - 39 | # fast_mode: 1000 40 | 41 | save: 42 | file_name: distmult_wikidata5m.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/distmult_wn18.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-4 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: DistMult 24 | num_epoch: 4000 25 | l3_regularization: 1.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: distmult_wn18.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/distmult_wn18rr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: DistMult 24 | num_epoch: 6000 25 | l3_regularization: 1.0e-2 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: distmult_wn18rr.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/quate_fb15k-237.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: QuatE 24 | num_epoch: 1000 25 | l3_regularization: 5.0e-2 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: quate_fb15k-237.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/quate_fb15k.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: QuatE 24 | num_epoch: 1000 25 | l3_regularization: 1.0e-2 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: quate_fb15k.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/quate_wikidata5m.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | normalization: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 1.0 17 | weight_decay: 0 18 | num_partition: auto 19 | num_negative: 64 20 | batch_size: 100000 21 | episode_size: 200 22 | 23 | train: 24 | model: QuatE 25 | num_epoch: 2000 26 | l3_regularization: 5.0e-3 27 | sample_batch_size: 2000 28 | adversarial_temperature: 10 29 | relation_lr_multiplier: 1.0e-4 30 | log_frequency: 500 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_files: 36 | - 37 | - 38 | - 39 | # fast_mode: 1000 40 | 41 | save: 42 | file_name: quate_wikidata5m.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/quate_wn18.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: QuatE 24 | num_epoch: 4000 25 | l3_regularization: 1.0e-2 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: quate_wn18.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/quate_wn18rr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: QuatE 24 | num_epoch: 6000 25 | l3_regularization: 5.0e-2 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: quate_wn18rr.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/rotate_fb15k-237.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: RotatE 24 | num_epoch: 1000 25 | margin: 9 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: rotate_fb15k-237.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/rotate_fb15k.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-4 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: RotatE 24 | num_epoch: 1000 25 | margin: 24 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: rotate_fb15k.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/rotate_wikidata5m.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | normalization: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 1.0e-2 17 | weight_decay: 0 18 | num_partition: auto 19 | num_negative: 64 20 | batch_size: 100000 21 | episode_size: 200 22 | 23 | train: 24 | model: RotatE 25 | num_epoch: 1000 26 | margin: 6 27 | sample_batch_size: 2000 28 | adversarial_temperature: 0.2 29 | relation_lr_multiplier: 1 30 | log_frequency: 500 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_files: 36 | - 37 | - 38 | - 39 | # fast_mode: 1000 40 | 41 | save: 42 | file_name: rotate_wikidata5m.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/rotate_wn18.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: RotatE 24 | num_epoch: 4000 25 | margin: 9 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: rotate_wn18.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/rotate_wn18rr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: RotatE 24 | num_epoch: 6000 25 | margin: 6 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: rotate_wn18rr.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/simple_fb15k-237.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: SimplE 24 | num_epoch: 1000 25 | l3_regularization: 5.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: simple_fb15k-237.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/simple_fb15k.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 2048 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: SimplE 24 | num_epoch: 1000 25 | l3_regularization: 1.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: simple_fb15k.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/simple_wikidata5m.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | normalization: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 1.0 17 | weight_decay: 0 18 | num_partition: auto 19 | num_negative: 64 20 | batch_size: 100000 21 | episode_size: 200 22 | 23 | train: 24 | model: SimplE 25 | num_epoch: 2000 26 | l3_regularization: 2.0e-3 27 | sample_batch_size: 2000 28 | adversarial_temperature: 2 29 | relation_lr_multiplier: 1.0e-4 30 | log_frequency: 500 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_files: 36 | - 37 | - 38 | - 39 | # fast_mode: 1000 40 | 41 | save: 42 | file_name: simple_wikidata5m.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/simple_wn18.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: SimplE 24 | num_epoch: 4000 25 | l3_regularization: 2.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: simple_wn18.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/simple_wn18rr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-4 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: SimplE 24 | num_epoch: 6000 25 | l3_regularization: 2.0e-3 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: simple_wn18rr.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/transe_fb15k-237.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 2.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: TransE 24 | num_epoch: 1000 25 | margin: 9 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: transe_fb15k-237.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/transe_fb15k.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 1024 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-5 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: TransE 24 | num_epoch: 1000 25 | margin: 24 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: transe_fb15k.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/transe_wikidata5m.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | normalization: true 12 | 13 | build: 14 | optimizer: 15 | type: SGD 16 | lr: 1.0e-3 17 | weight_decay: 0 18 | num_partition: auto 19 | num_negative: 64 20 | batch_size: 100000 21 | episode_size: 200 22 | 23 | train: 24 | model: TransE 25 | num_epoch: 1000 26 | margin: 12 27 | sample_batch_size: 2000 28 | adversarial_temperature: 0.5 29 | relation_lr_multiplier: 1.0e-2 30 | log_frequency: 500 31 | 32 | evaluate: 33 | task: link prediction 34 | file_name: 35 | filter_files: 36 | - 37 | - 38 | - 39 | # fast_mode: 1000 40 | 41 | save: 42 | file_name: transe_wikidata5m.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/transe_wn18.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 5.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: TransE 24 | num_epoch: 4000 25 | margin: 12 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: transe_wn18.pkl -------------------------------------------------------------------------------- /config/knowledge_graph/transe_wn18rr.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | knowledge graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 512 8 | 9 | graph: 10 | file_name: 11 | 12 | build: 13 | optimizer: 14 | type: Adam 15 | lr: 1.0e-6 16 | weight_decay: 0 17 | num_partition: auto 18 | num_negative: 64 19 | batch_size: 100000 20 | episode_size: 1 21 | 22 | train: 23 | model: TransE 24 | num_epoch: 6000 25 | margin: 6 26 | sample_batch_size: 2000 27 | adversarial_temperature: 2 28 | log_frequency: 100 29 | 30 | evaluate: 31 | task: link prediction 32 | file_name: 33 | filter_files: 34 | - 35 | - 36 | - 37 | # fast_mode: 3000 38 | 39 | save: 40 | file_name: transe_wn18rr.pkl -------------------------------------------------------------------------------- /config/template/graph.yaml: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # Node embedding configuration file 3 | ########################################################### 4 | 5 | application: 6 | graph 7 | 8 | resource: 9 | # List of GPU ids. Default is all GPUs 10 | gpus: [] 11 | # Memory limit for each GPU in bytes. Default is all available memory. 12 | gpu_memory_limit: auto 13 | # Number of CPU thread per GPU. Default is all CPUs. 14 | cpu_per_gpu: auto 15 | # Dimension of the embeddings. 16 | dim: 128 17 | 18 | format: 19 | # String of delimiter characters. Change it if your node name contains blank character. 20 | delimiters: " \t\r\n" 21 | # Prefix of comment strings. Change it if you use comment style other than Python. 22 | comment: "#" 23 | 24 | graph: 25 | # Path to edge list file. Each line should be one of the following 26 | # [node 1] [delimiter] [node 2] [comment]... 27 | # [node 1] [delimiter] [node 2] [delimiter] [weight] [comment]... 28 | # [comment]... 29 | # For standard datasets, you can specify them by <[dataset].[split]>. 30 | file_name: 31 | # Symmetrize the graph or not. True is recommended. 32 | as_undirected: true 33 | # Normalize the adjacency matrix or not. This may influence the performance a little. 34 | normalization: false 35 | 36 | build: 37 | optimizer: 38 | # Optimizer. 39 | type: SGD 40 | # Learning rate. Default is usually reasonable. 41 | lr: 0.025 42 | # Weight decay. 43 | weight_decay: 0.005 44 | # Learning rate schedule, can be "linear" or "constant". Linear is recommended. 45 | schedule: linear 46 | # Number of partitions. Auto is recommended. 47 | num_partition: auto 48 | # Number of negative samples per positive sample. 49 | # Larger value results in slower training. 50 | # The performance may be influenced by num_negative * negative_weight. 51 | num_negative: 1 52 | # Batch size of samples in CPU-GPU transfer. Default is recommended. 53 | batch_size: 100000 54 | # Number of batches in a partition block. 55 | # Default is recommended. 56 | episode_size: auto 57 | 58 | # Comment out this section if not needed. 59 | load: 60 | # Path to model file, can be "*.pkl". 61 | file_name: graph.pkl 62 | 63 | train: 64 | # Model, can be DeepWalk, LINE or node2vec. 65 | model: DeepWalk 66 | # Number of epochs. Default is usually reasonable for sparse graphs. 67 | # For dense graphs (|E| / |V| > 100), you may use smaller values. 68 | num_epoch: 2000 69 | # Resume training from a loaded model. 70 | resume: false 71 | # Weight of negative samples. Values larger than 10 may cause unstable training. 72 | negative_weight: 5 73 | # Exponent of degrees in negative sampling. Default is recommended. 74 | negative_sample_exponent: 0.75 75 | # Augmentation step. Default is usually reasonable. 76 | # Larger value is needed for sparser graphs. 77 | augmentation_step: auto 78 | # Return parameter and in-out parameters (node2vec). Need to be tuned on the validation set. 79 | p: 1 80 | q: 1 81 | # Length of each random walk. Default is recommended. 82 | random_walk_length: 40 83 | # Batch size of random walks in samplers. Default is recommended. 84 | random_walk_batch_size: 100 85 | # Log every n batches. 86 | log_frequency: 1000 87 | 88 | # Comment out this section if not needed. 89 | evaluate: 90 | # Comment out any task if not needed. 91 | - task: node classification 92 | # Path to node label file. Each line should be one of the following 93 | # [node] [delimiter] [label] [comment]... 94 | # [comment]... 95 | file_name: 96 | # Portions of data used for training. Each of them corresponds to one evaluation. 97 | portions: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 98 | # Number of trials repeated. Change it to 1 if your evaluation set is large enough. 99 | times: 5 100 | 101 | - task: link prediction 102 | # Path to link prediction file. Each line should be 103 | # [node 1] [delimiter] [node 2] [delimiter] [label] 104 | # where label is 1 for positive and 0 for negative. 105 | file_name: 106 | # Path to filter file. If you aren't sure that training data is excluded in evaluation, 107 | # you can specify the training edge list here. 108 | filter_file: 109 | 110 | # Comment out this section if not needed. 111 | save: 112 | # Path to save file, can be "*.pkl". 113 | file_name: graph.pkl 114 | # Save hyperparameters or not. 115 | save_hyperparameter: false -------------------------------------------------------------------------------- /config/template/knowledge_graph.yaml: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # Knowledge graph embedding configuration file 3 | ########################################################### 4 | 5 | application: 6 | knowledge graph 7 | 8 | resource: 9 | # List of GPU ids. Default is all GPUs 10 | gpus: [] 11 | # Memory limit for each GPU in bytes. Default is all available memory. 12 | gpu_memory_limit: auto 13 | # Number of CPU thread per GPU. Default is all CPUs. 14 | cpu_per_gpu: auto 15 | # Dimension of the embeddings. 16 | dim: 1024 17 | 18 | format: 19 | # String of delimiter characters. Change it if your node name contains blank character. 20 | delimiters: " \t\r\n" 21 | # Prefix of comment strings. Change it if you use comment style other than Python. 22 | comment: "#" 23 | 24 | graph: 25 | # Path to triplet file. Each line should be one of the following 26 | # [head] [delimiter] [relation] [tail] [comment]... 27 | # [head] [delimiter] [relation] [tail] [delimiter] [weight] [comment]... 28 | # [comment]... 29 | # For standard datasets, you can specify them by <[dataset].[split]>. 30 | file_name: 31 | # Normalize the adjacency matrix or not. This may influence the performance a little. 32 | normalization: false 33 | 34 | build: 35 | optimizer: 36 | # Optimizer. 37 | type: Adam 38 | # Learning rate. Default is usually reasonable. 39 | lr: 5.0e-5 40 | # Weight decay. 41 | weight_decay: 0 42 | # Learning rate schedule, can be "linear" or "constant". Linear is recommended. 43 | schedule: linear 44 | # Number of partitions. Auto is recommended. 45 | num_partition: auto 46 | # Number of negative samples per positive sample. 47 | # Larger value results in slower training. 48 | num_negative: 64 49 | # Batch size of samples in CPU-GPU transfer. Default is recommended. 50 | batch_size: 100000 51 | # Number of batches in a partition block. 52 | # Default is recommended. 53 | episode_size: auto 54 | 55 | # Comment out this section if not needed. 56 | load: 57 | # Path to model file, can be "*.pkl". 58 | file_name: knowledge_graph.pkl 59 | 60 | train: 61 | # Model, can be TransE, DistMult, ComplEx, SimplE or RotatE 62 | model: TransE 63 | # Number of epochs. Default is usually reasonable. 64 | num_epoch: 2000 65 | # Resume training from a loaded model. 66 | resume: false 67 | # Learning rate multiplier for relation embeddings. 68 | # Need to be tuned on the validation set if using multiple GPUs. 69 | relation_lr_multiplier: 1 70 | # L3 regularization (DistMult, ComplEx and SimplE). Need to be tuned on the validation set. 71 | l3_regularization: 2.0e-3 72 | # Margin (TransE, RotatE). Need to be tuned on the validation set. 73 | margin: 12 74 | # Batch size of samples in samplers. Default is recommended. 75 | sample_batch_size: 2000 76 | # Temperature for self-adversarial negative sampling. Default is usually reasonable. 77 | adversarial_temperature: 2 78 | # Log every n batches. 79 | log_frequency: 100 80 | 81 | # Comment out this section if not needed. 82 | evaluate: 83 | # Comment out any task if not needed. 84 | - task: link prediction 85 | # Path to triplet file. Each line should be one of the following 86 | # [head] [delimiter] [relation] [tail] [comment]... 87 | # [head] [delimiter] [relation] [tail] [delimiter] [weight] [comment]... 88 | # [comment]... 89 | file_name: 90 | # List of paths to filter files. 91 | # Specify all dataset splits for filtered ranking. Comment out for unfiltered ranking. 92 | filter_files: 93 | # Target entity to rank, can be head, tail or both. 94 | target: both 95 | # Number of samples to be evaluated. Comment out for precise evaluation. 96 | # fast_mode: 3000 97 | # Backend, can be graphvite or torch 98 | backend: graphvite 99 | 100 | - task: entity prediction 101 | # Path to triplet file. Each line should be one of the following 102 | # [head] [delimiter] [relation] [tail] [comment]... 103 | # [head] [delimiter] [relation] [tail] [delimiter] [weight] [comment]... 104 | # [comment]... 105 | file_name: 106 | # Path to save file, can be "*.txt" or "*.pkl". 107 | save_file: 108 | # Target entity to predict, can be head or tail. 109 | target: tail 110 | # Top-k recalls will be returned. 111 | k: 10 112 | # Backend, can be graphvite or torch. 113 | backend: graphvite 114 | 115 | # Comment out this section if not needed. 116 | save: 117 | # Path to save file, can be "*.pkl". 118 | file_name: knowledge_graph.pkl 119 | # Save hyperparameters or not. 120 | save_hyperparameter: false -------------------------------------------------------------------------------- /config/template/visualization.yaml: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # High-dimensional data visualization configuration file 3 | ########################################################### 4 | 5 | application: 6 | visualization 7 | 8 | resource: 9 | # List of GPU ids. Multiple GPUs will cause unstable results. 10 | gpus: [0] 11 | # Memory limit for each GPU in bytes. Default is all available memory. 12 | gpu_memory_limit: auto 13 | # Number of CPU thread per GPU. Default is all CPUs. 14 | cpu_per_gpu: auto 15 | # Dimension of the embeddings. 16 | dim: 2 17 | 18 | format: 19 | # String of delimiter characters. Change it if your node name contains blank character. 20 | delimiters: " \t\r\n" 21 | # Prefix of comment strings. Change it if you use comment style other than Python. 22 | comment: "#" 23 | 24 | graph: 25 | # Path to vector file. Each line should be one of the following 26 | # [value] [delimiter] [value] [delimiter]... [comment]... 27 | # [comment]... 28 | # For standard datasets, you can specify them by <[dataset].[split]>. 29 | vector_file: 30 | # Number of neighbors for each node. Default is usually reasonable. 31 | num_neighbor: 200 32 | # Perplexity for the neighborhood of each node. 33 | # Typical values are between 5 and 50. Need to be tuned for best results. 34 | # Larger value focuses on global difference and results in larger clusters. 35 | perplexity: 30 36 | # Normalize the input vectors or not. True is recommended. 37 | vector_normalization: true 38 | 39 | build: 40 | optimizer: 41 | # Optimizer. 42 | type: Adam 43 | # Learning rate. Default is usually reasonable. 44 | lr: 0.5 45 | # Weight decay. Default is usually reasonable. 46 | weight_decay: 1.0e-5 47 | # Learning rate schedule, can be "linear" or "constant". Linear is recommended. 48 | schedule: linear 49 | # Number of partitions. Auto is recommended. 50 | num_partition: auto 51 | # Number of negative samples per positive sample. 52 | # Larger value results in slower training. 53 | # The performance may be influenced by num_negative * negative_weight. 54 | num_negative: 5 55 | # Batch size of samples in CPU-GPU transfer. Default is recommended. 56 | batch_size: 100000 57 | # Number of batches in a partition block. 58 | # Default is recommended. 59 | episode_size: auto 60 | 61 | # Comment out this section if not needed. 62 | load: 63 | # Path to model file, can be "*.pkl". 64 | file_name: visualization.pkl 65 | 66 | train: 67 | # Model, can be LargeVis. 68 | model: LargeVis 69 | # Number of epochs. Default is recommended. 70 | num_epoch: 50 71 | # Resume training from a loaded model. 72 | resume: false 73 | # Weight of negative samples. Values larger than 10 may cause unstable training. 74 | negative_weight: 3 75 | # Exponent of degrees in negative sampling. Default is recommended. 76 | negative_sample_exponent: 0.75 77 | # Batch size of samples in samplers. Default is recommended. 78 | sample_batch_size: 2000 79 | # Log every n batches. 80 | log_frequency: 1000 81 | 82 | # Comment out this section if not needed. 83 | evaluate: 84 | # Comment out any task if not needed. 85 | - task: visualization 86 | # Path to label file. Each line should be one of the following 87 | # [label] [comment]... 88 | # [comment]... 89 | # The file is assumed to have the same order as input vectors. 90 | file_name: 91 | # Path to save file, can be either "*.png" or "*.pdf". 92 | # If not provided, show the figure in window. 93 | save_file: 94 | # Size of the figure. 95 | figure_size: 10 96 | # Size of points. Recommend to use figure_size / 5. 97 | scale: 2 98 | 99 | # This task only works for dim = 3. 100 | - task: animation 101 | # Path to label file. Each line should be one of the following 102 | # [label] [comment]... 103 | # [comment]... 104 | file_name: 105 | # Path to save file, can be "*.gif". 106 | save_file: 107 | # Size of the figure. 108 | figure_size: 5 109 | # Size of points. Recommend to use figure_size / 5. 110 | scale: 1 111 | # Elevation angle. Default is recommended. 112 | elevation: 30 113 | # Number of frames. Default is recommended. 114 | num_frame: 700 115 | 116 | - task: hierarchy 117 | # Path to hierarchical label file. Each line should be one of the following 118 | # [label] [delimiter] [label] [delimiter]... [comment]... 119 | # [comment]... 120 | # Labels should be ordered in ascending depth, i.e. the first label corresponds to the root in the hierarchy. 121 | # The file is assumed to have the same order as input vectors. 122 | file_name: 123 | # Target class to be visualized. 124 | target: 125 | # Path to save file, can be "*.gif". 126 | save_file: 127 | # Size of the figure. 128 | figure_size: 10 129 | # Size of points. Recommend to use figure_size / 5. 130 | scale: 2 131 | # Duration of each frame in seconds. Default is recommended. 132 | duration: 3 133 | 134 | # Comment out this section if not needed. 135 | save: 136 | # Path to save file, can be "*.pkl". 137 | file_name: visualization.pkl 138 | # Save hyperparameters or not. 139 | save_hyperparameter: false -------------------------------------------------------------------------------- /config/template/word_graph.yaml: -------------------------------------------------------------------------------- 1 | ########################################################### 2 | # Word embedding configuration file 3 | ########################################################### 4 | 5 | application: 6 | word graph 7 | 8 | resource: 9 | # List of GPU ids. Default is all GPUs 10 | gpus: [] 11 | # Memory limit for each GPU in bytes. Default is all available memory. 12 | gpu_memory_limit: auto 13 | # Number of CPU thread per GPU. Default is all CPUs. 14 | cpu_per_gpu: auto 15 | # Dimension of the embeddings. 16 | dim: 128 17 | 18 | graph: 19 | # Path to corpus file. Each line should be one of the following 20 | # [word] [delimiter] [word] [delimiter]... [comment]... 21 | # [comment]... 22 | # For standard datasets, you can specify them by <[dataset].[split]>. 23 | file_name: 24 | # Word pairs with distance <= window as counted as edges. Default is recommended. 25 | window: 5 26 | # Words with occurrence <= min_count are discarded. 27 | min_count: 5 28 | # Normalize the adjacency matrix or not. This may influence the performance a little. 29 | normalization: false 30 | # String of delimiter characters. Change it if your node name contains blank character. 31 | delimiters: " \t\r\n" 32 | # Prefix of comment strings. Change it if you use comment style other than Python. 33 | comment: "#" 34 | 35 | build: 36 | optimizer: 37 | # Optimizer. 38 | type: SGD 39 | # Learning rate. Default is usually reasonable. 40 | lr: 0.025 41 | # Weight decay. 42 | weight_decay: 0.005 43 | # Learning rate schedule, can be "linear" or "constant". Linear is recommended. 44 | schedule: linear 45 | # Number of partitions. Auto is recommended. 46 | num_partition: auto 47 | # Number of negative samples per positive sample. 48 | # Larger value results in slower training. 49 | # The performance may be influenced by num_negative * negative_weight. 50 | num_negative: 1 51 | # Batch size of samples in CPU-GPU transfer. Default is recommended. 52 | batch_size: 100000 53 | # Number of batches in a partition block. 54 | # Default is recommended, unless it overflows the memory (std::bad_alloc). 55 | episode_size: auto 56 | 57 | # Comment out this section if not needed. 58 | load: 59 | # Path to model file, can be "*.pkl". 60 | file_name: word_graph.pkl 61 | 62 | train: 63 | # Model, can be LINE. 64 | model: LINE 65 | # Number of epochs. Default is usually reasonable. 66 | num_epoch: 80 67 | # Resume training from a loaded model. 68 | resume: false 69 | # Weight of negative samples. Values larger than 10 may cause unstable training. 70 | negative_weight: 5 71 | # Exponent of degrees in negative sampling. Default is recommended. 72 | negative_sample_exponent: 0.75 73 | # Augmentation step. Default is recommended. 74 | augmentation_step: 1 75 | # Length of each random walk. Default is recommended. 76 | random_walk_length: 40 77 | # Batch size of random walks in samplers. Default is recommended. 78 | random_walk_batch_size: 100 79 | # Log every n batches. 80 | log_frequency: 1000 81 | 82 | # Comment out this section if not needed. 83 | save: 84 | # Path to save file, can be "*.pkl". 85 | file_name: word_graph.pkl 86 | # Save hyperparameters or not. 87 | save_hyperparameter: false -------------------------------------------------------------------------------- /config/visualization/largevis_imagenet.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | visualization 3 | 4 | resource: 5 | gpus: [0] 6 | cpu_per_gpu: auto 7 | dim: 2 8 | 9 | graph: 10 | vectors: 11 | num_neighbor: 200 12 | perplexity: 50 13 | 14 | build: 15 | optimizer: 16 | type: Adam 17 | lr: 0.5 18 | weight_decay: 1.0e-5 19 | num_partition: auto 20 | num_negative: 5 21 | batch_size: 100000 22 | episode_size: 200 23 | 24 | train: 25 | model: LargeVis 26 | num_epoch: 50 27 | negative_weight: 3 28 | log_frequency: 1000 29 | 30 | evaluate: 31 | task: hierarchy 32 | file_name: 33 | target: english_setter 34 | save_file: imagenet_hierarchy.gif 35 | 36 | save: 37 | file_name: largevis_imagenet_2d.pkl -------------------------------------------------------------------------------- /config/visualization/largevis_mnist_2d.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | visualization 3 | 4 | resource: 5 | gpus: [0] 6 | cpu_per_gpu: auto 7 | dim: 2 8 | 9 | graph: 10 | vectors: 11 | num_neighbor: 200 12 | perplexity: 20 13 | 14 | build: 15 | optimizer: 16 | type: Adam 17 | lr: 0.5 18 | weight_decay: 1.0e-5 19 | num_partition: auto 20 | num_negative: 5 21 | batch_size: 100000 22 | episode_size: 200 23 | 24 | train: 25 | model: LargeVis 26 | num_epoch: 50 27 | negative_weight: 3 28 | log_frequency: 1000 29 | 30 | evaluate: 31 | task: visualization 32 | Y: 33 | save_file: mnist_2d.png 34 | 35 | save: 36 | file_name: largevis_mnist_2d.pkl -------------------------------------------------------------------------------- /config/visualization/largevis_mnist_3d.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | visualization 3 | 4 | resource: 5 | gpus: [0] 6 | cpu_per_gpu: auto 7 | dim: 3 8 | 9 | graph: 10 | vectors: 11 | num_neighbor: 200 12 | perplexity: 20 13 | 14 | build: 15 | optimizer: 16 | type: Adam 17 | lr: 0.5 18 | weight_decay: 1.0e-5 19 | num_partition: auto 20 | num_negative: 5 21 | batch_size: 100000 22 | episode_size: 200 23 | 24 | train: 25 | model: LargeVis 26 | num_epoch: 50 27 | negative_weight: 3 28 | log_frequency: 1000 29 | 30 | evaluate: 31 | task: animation 32 | Y: 33 | save_file: mnist_3d.gif 34 | 35 | save: 36 | file_name: largevis_mnist_3d.pkl -------------------------------------------------------------------------------- /config/word_graph/line_wikipedia.yaml: -------------------------------------------------------------------------------- 1 | application: 2 | word graph 3 | 4 | resource: 5 | gpus: [] 6 | cpu_per_gpu: auto 7 | dim: 128 8 | 9 | graph: 10 | file_name: 11 | window: 5 12 | min_count: 5 13 | 14 | build: 15 | optimizer: 16 | type: SGD 17 | lr: 0.025 18 | weight_decay: 0.005 19 | num_partition: auto 20 | num_negative: 1 21 | batch_size: 100000 22 | episode_size: 1000 23 | 24 | train: 25 | model: LINE 26 | num_epoch: 80 27 | negative_weight: 5 28 | augmentation_step: 1 29 | random_walk_length: 40 30 | random_walk_batch_size: 100 31 | log_frequency: 1000 32 | 33 | save: 34 | file_name: line_wikipedia.pkl -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/source/api/application.rst: -------------------------------------------------------------------------------- 1 | graphvite.application 2 | ===================== 3 | 4 | .. automodule:: graphvite.application 5 | :members: 6 | :inherited-members: 7 | -------------------------------------------------------------------------------- /doc/source/api/dataset.rst: -------------------------------------------------------------------------------- 1 | graphvite.dataset 2 | ================= 3 | 4 | .. automodule:: graphvite.dataset 5 | :members: -------------------------------------------------------------------------------- /doc/source/api/graph.rst: -------------------------------------------------------------------------------- 1 | graphvite.graph 2 | =============== 3 | 4 | .. automodule:: graphvite.graph 5 | :members: -------------------------------------------------------------------------------- /doc/source/api/optimizer.rst: -------------------------------------------------------------------------------- 1 | graphvite.optimizer 2 | =================== 3 | 4 | .. automodule:: graphvite.optimizer 5 | :members: 6 | -------------------------------------------------------------------------------- /doc/source/api/solver.rst: -------------------------------------------------------------------------------- 1 | graphvite.solver 2 | ================ 3 | 4 | .. automodule:: graphvite.solver 5 | :members: -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = u'GraphVite' 23 | copyright = u'2019, MilaGraph' 24 | author = u'Zhaocheng Zhu, Shizhen Xu, Meng Qu, Jian Tang' 25 | 26 | import re 27 | from graphvite import __version__ 28 | # The short X.Y version 29 | version = re.match("\d+\.\d+", __version__).group() 30 | # The full version, including alpha/beta/rc tags 31 | release = __version__ 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # If your documentation needs a minimal Sphinx version, state it here. 37 | # 38 | # needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 42 | # ones. 43 | extensions = [ 44 | 'sphinx.ext.autodoc', 45 | 'sphinx.ext.autosummary', 46 | 'sphinx.ext.coverage', 47 | 'sphinx.ext.viewcode', 48 | 'sphinx.ext.napoleon' 49 | ] 50 | 51 | # Add any paths that contain templates here, relative to this directory. 52 | templates_path = ['_templates'] 53 | 54 | # The suffix(es) of source filenames. 55 | # You can specify multiple suffix as a list of string: 56 | # 57 | # source_suffix = ['.rst', '.md'] 58 | source_suffix = '.rst' 59 | 60 | # The master toctree document. 61 | master_doc = 'index' 62 | 63 | # The language for content autogenerated by Sphinx. Refer to documentation 64 | # for a list of supported languages. 65 | # 66 | # This is also used if you do content translation via gettext catalogs. 67 | # Usually you set "language" from the command line for these cases. 68 | language = None 69 | 70 | # List of patterns, relative to source directory, that match files and 71 | # directories to ignore when looking for source files. 72 | # This pattern also affects html_static_path and html_extra_path. 73 | exclude_patterns = [] 74 | 75 | # The name of the Pygments (syntax highlighting) style to use. 76 | pygments_style = None 77 | 78 | 79 | # -- Options for HTML output ------------------------------------------------- 80 | 81 | # The theme to use for HTML and HTML Help pages. See the documentation for 82 | # a list of builtin themes. 83 | # 84 | html_theme = 'sphinx_rtd_theme' 85 | 86 | # The name of an image file (relative to this directory) to place at the top 87 | # of the sidebar. 88 | # 89 | # html_logo = None 90 | 91 | # The name of an image file (relative to this directory) to use as a favicon of 92 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 93 | # pixels large. 94 | # 95 | html_favicon = '../../asset/logo/favicon.ico' 96 | 97 | # Theme options are theme-specific and customize the look and feel of a theme 98 | # further. For a list of options available for each theme, see the 99 | # documentation. 100 | # 101 | # html_theme_options = {} 102 | 103 | # Add any paths that contain custom static files (such as style sheets) here, 104 | # relative to this directory. They are copied after the builtin static files, 105 | # so a file named "default.css" will overwrite the builtin "default.css". 106 | html_static_path = [] 107 | 108 | # Custom sidebar templates, must be a dictionary that maps document names 109 | # to template names. 110 | # 111 | # The default sidebars (for documents that don't match any pattern) are 112 | # defined by theme itself. Builtin themes are using these templates by 113 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 114 | # 'searchbox.html']``. 115 | # 116 | # html_sidebars = {} 117 | 118 | # Generate module index or not 119 | html_domain_indices = False 120 | 121 | # -- Options for HTMLHelp output --------------------------------------------- 122 | 123 | # Output file base name for HTML help builder. 124 | htmlhelp_basename = 'GraphVite doc' 125 | 126 | 127 | # -- Options for LaTeX output ------------------------------------------------ 128 | 129 | latex_elements = { 130 | # The paper size ('letterpaper' or 'a4paper'). 131 | # 132 | # 'papersize': 'letterpaper', 133 | 134 | # The font size ('10pt', '11pt' or '12pt'). 135 | # 136 | # 'pointsize': '10pt', 137 | 138 | # Additional stuff for the LaTeX preamble. 139 | # 140 | # 'preamble': '', 141 | 142 | # Latex figure (float) alignment 143 | # 144 | # 'figure_align': 'htbp', 145 | } 146 | 147 | # Grouping the document tree into LaTeX files. List of tuples 148 | # (source start file, target name, title, 149 | # author, documentclass [howto, manual, or own class]). 150 | latex_documents = [ 151 | (master_doc, 'GraphVite.tex', u'GraphVite Documentation', 152 | u'Zhaocheng Zhu, Shizhen Xu, Meng Qu, Jian Tang', 'manual'), 153 | ] 154 | 155 | 156 | # -- Options for manual page output ------------------------------------------ 157 | 158 | # One entry per manual page. List of tuples 159 | # (source start file, name, description, authors, manual section). 160 | man_pages = [ 161 | (master_doc, 'graphvite', u'GraphVite Documentation', 162 | [author], 1) 163 | ] 164 | 165 | 166 | # -- Options for Texinfo output ---------------------------------------------- 167 | 168 | # Grouping the document tree into Texinfo files. List of tuples 169 | # (source start file, target name, title, author, 170 | # dir menu entry, description, category) 171 | texinfo_documents = [ 172 | (master_doc, 'GraphVite', u'GraphVite Documentation', 173 | author, 'GraphVite', 'One line description of project.', 174 | 'Miscellaneous'), 175 | ] 176 | 177 | 178 | # -- Options for Epub output ------------------------------------------------- 179 | 180 | # Bibliographic Dublin Core info. 181 | epub_title = project 182 | 183 | # The unique identifier of the text. This can be a ISBN number 184 | # or the project homepage. 185 | # 186 | # epub_identifier = '' 187 | 188 | # A unique identification for the text. 189 | # 190 | # epub_uid = '' 191 | 192 | # A list of files that should not be packed into the epub file. 193 | epub_exclude_files = ['search.html'] 194 | 195 | 196 | # -- Extension configuration ------------------------------------------------- 197 | 198 | # sort members by member type 199 | autodoc_member_order = 'groupwise' -------------------------------------------------------------------------------- /doc/source/developer/framework.rst: -------------------------------------------------------------------------------- 1 | Understand the Framework 2 | ======================== 3 | 4 | The framework of GraphVite is composed of two parts, a core library and a Python wrapper. 5 | The Python wrapper can be found in ``python/graphvite/``. It provides an auto wrapper for 6 | classes in the core library, as well as implementation for applications and datasets. 7 | 8 | The core library is implemented with C++11 and CUDA, and binded to Python using 9 | `pybind11`_. It covers implementation of all computation-related classes in GraphVite, 10 | such as graphs, solvers and optimizers. All these ingredients are packaged as classes, 11 | similar to the Python interface. The source code can be found in ``include/`` and 12 | ``src/``. 13 | 14 | .. _pybind11: https://pybind11.readthedocs.io 15 | 16 | In the C++ implementation, there is something different from Python. The graphs and 17 | solvers are templaterized by the underlying data types, and the length of embedding 18 | vectors. This design enables dynamic data type in Python interface, as well as maximal 19 | compile-time optimization. 20 | 21 | The C++ interface is highly abstracted to faciliate further development on GraphVite. 22 | Generally, by inheriting from the core interface, we can implement new graph embedding 23 | instances without caring about any scheduling detail. 24 | 25 | The source code is organized as follows. 26 | 27 | - ``include/base/*`` implements basic data structures 28 | - ``include/core/*`` implements optimizers, and core interface of graphs and solvers 29 | - ``include/instance/*`` implements instances of graphs and solvers 30 | - ``include/instance/gpu/*`` implements GPU training & evaluation routines 31 | - ``include/instance/model/*`` implements forward & backward propagation of models 32 | - ``include/util/*`` implements basic utils 33 | - ``include/bind.h`` implements Python bindings 34 | - ``src/graphvite.cu`` instantiates all Python classes 35 | -------------------------------------------------------------------------------- /doc/source/developer/model.rst: -------------------------------------------------------------------------------- 1 | Customize Models 2 | ================ 3 | 4 | One common demand for graph embedding is to customize the model (i.e. score function). 5 | Here we will demonstrate an example of adding a new model to the knowledge graph 6 | solver. 7 | 8 | First, get into ``include/model/knowledge_graph.h``. Fork an existing model class 9 | (e.g. TransE) and change it to a new name. 10 | 11 | .. code-block:: c++ 12 | 13 | template 14 | class TransE { 15 | __host__ __device__ static void forward(...); 16 | 17 | template 18 | __host__ __device__ static void backward(...); 19 | 20 | template 21 | __host__ __device__ static void backward(...); 22 | 23 | template 24 | __host__ __device__ static void backward(...); 25 | } 26 | 27 | Here a model class contains a forward function and several overloads of the backward 28 | function, which correspond to different categories of optimizers. We are going to 29 | modify a forward and a backward function, and then do some copy-and-paste work to the 30 | others. 31 | 32 | Let's start from the forward function. This function takes a triplet of embedding 33 | vectors, and outputs a score. 34 | 35 | .. code-block:: c++ 36 | 37 | void forward(const Vector &head, const Vector &tail, const Vector &relation, 38 | Float &output, float margin) 39 | 40 | The last argument is either margin for latent distance model or l3 regularization 41 | for tensor decomposition models. For TransE, the function is implemented as 42 | 43 | .. code-block:: c++ 44 | 45 | output = 0; 46 | FOR(i, dim) 47 | output += abs(head[i] + relation[i] - tail[i]); 48 | output = margin - SUM(output); 49 | 50 | Here we need to replace this piece of code with our own formulas. Note that this 51 | function should be compatible with both CPU and GPU. This can be easily achieved by 52 | helper macros defined in GraphVite. 53 | 54 | We just need to use the macro ``FOR(i, stop)`` instead of the conventional 55 | ``for (int i = 0; i < stop; i++)``. For any accumulator ``x`` inside the loop (e.g. 56 | ``output`` in this case), update it with ``x = SUM(x)`` after the loop to get the 57 | correct value. 58 | 59 | For the backward function. It takes additional arguments of moment statistics, head 60 | gradient, optimizer and sample weight. For example, here is an overload with 1 moment 61 | per embedding. 62 | 63 | .. code-block:: c++ 64 | 65 | template 66 | void backward(Vector &head, Vector &tail, Vector &relation, 67 | Vector &head_moment1, Vector &tail_moment1, Vector &relation_moment1, 68 | float margin, Float gradient, const Optimizer &optimizer, Float weight) 69 | 70 | The backward function should compute the gradient for each embedding, and update them 71 | with the optimizer. Typically, this is implemented as 72 | 73 | .. code-block:: c++ 74 | 75 | auto update = get_update_function_1_moment(); 76 | FOR(i, dim) { 77 | Float h = head[i]; 78 | Float t = tail[i]; 79 | Float r = relation[i]; 80 | Float s = h + r - t > 0 ? 1 : -1; 81 | head[i] -= (optimizer.*update)(h, -gradient * s, head_moment1[i], weight); 82 | tail[i] -= (optimizer.*update)(t, gradient * s, tail_moment1[i], weight); 83 | relation[i] -= (optimizer.*update)(r, -gradient * s, relation_moment1[i], weight); 84 | } 85 | 86 | Here we modify this function according to the partial derivatives of our forward 87 | function. Once we complete a backward function, we can copy them to the other 88 | overloads. The only difference among overloads is that they use different update 89 | function and numbers of moment statistics. 90 | 91 | Finally, we have to let the solver know there is a new model. In 92 | ``instance/knowledge_graph.cuh``, add the name of your model in 93 | ``get_available_models()``. Also add run-time dispatch of the new model in 94 | ``train_dispatch()`` and ``predict_dispatch()``. 95 | 96 | .. code-block:: c++ 97 | 98 | switch (num_moment) { 99 | case 0: 100 | if (solver->model == ...) 101 | ... 102 | case 1: 103 | if (solver->model == ...) 104 | ... 105 | case 2: 106 | if (solver->model == ...) 107 | ... 108 | 109 | Compile the source and it should be ready. -------------------------------------------------------------------------------- /doc/source/developer/routine.rst: -------------------------------------------------------------------------------- 1 | Customize Routine 2 | ================= 3 | 4 | For advanced developers, GraphVite also supports customizing routines, such as 5 | training and prediction. Here we will illustrate how to add a new routine to the 6 | knowledge graph solver. 7 | 8 | Before we start, it would be better if you know some basics about 9 | `the index and threads`_ in CUDA. In GraphVite, the threads are arranged in a group 10 | of 32 (`warp`_). Threads in a group works simultaneously on an edge sample, where 11 | each thread is responsible for computation in some dimensions, according to the 12 | modulus of the dimension. 13 | 14 | .. _the index and threads: https://en.wikipedia.org/wiki/Thread_block_(CUDA_programming)#Indexing 15 | .. _warp: https://en.wikipedia.org/wiki/Thread_block_(CUDA_programming)#Warps 16 | 17 | First, get into ``include/instance/gpu/knowledge_graph.h``. This file includes several 18 | training functions and a prediction function. 19 | 20 | .. code-block:: c++ 21 | 22 | template class Model, OptimizerType optimizer_type> 23 | __global__ void train(...) 24 | 25 | template class Model, OptimizerType optimizer_type> 26 | __global__ void train_1_moment(...) 27 | 28 | template class Model, OptimizerType optimizer_type> 29 | __global__ void train_2_moment(...) 30 | 31 | template class Model> 32 | __global__ void predict(...) 33 | 34 | The 3 implementations correspond to 3 categories of optimizers, as we have seen in 35 | :doc:`routine`. Routines with different numbers of moment statistics are separated 36 | to achieve maximal compile-time optimization. 37 | 38 | Let's take a look at a training function. Generally, the function body looks like 39 | 40 | .. code-block:: c++ 41 | 42 | for (int sample_id = thread_id / kWarpSize; sample_id < batch_size; sample_id += num_thread / kWarpSize) { 43 | if (adversarial_temperature > kEpsilon) 44 | for (int s = 0; s < num_negative; s++) 45 | normalizer += ...; 46 | 47 | for (int s = 0; s <= num_negative; s++) { 48 | model.forward(sample[s], logit); 49 | prob = sigmoid(logit); 50 | 51 | gradient = ...; 52 | weight = ...; 53 | sample_loss += ...; 54 | model.backward(sample[s], gradient); 55 | } 56 | } 57 | 58 | The outer loop iterates over all positive samples. For each positive sample and its 59 | negative samples, we first compute the normalizer of self-adversarial negative 60 | sampling, and then perform forward and backward propagation for each sample. 61 | 62 | For example, if we want to change the negative log likelihood to a mean square error, 63 | we can change the following lines. 64 | 65 | .. code-block:: c++ 66 | 67 | gradient = 2 * (logit - label); 68 | sample_loss += weight * (logit - label) * (logit - label); 69 | 70 | Or we can use a margin-based ranking loss like 71 | 72 | .. code-block:: c++ 73 | 74 | model.forward(samples[num_negative], positive_score); // the positive sample 75 | 76 | for (int s = 0; s < num_negative; s++) { 77 | model.forward(samples[s], negative_logit); 78 | if (positive_score - negative_score < margin) { 79 | sample_loss += negative_score - positive_score + margin; 80 | gradient = 1; 81 | model.backward(sample[s], gradient); 82 | model.backward(sample[num_negative], -gradient); 83 | } 84 | } 85 | 86 | We may also add new hyperparameters or training routines. Note if we change 87 | the signature of the function, we should also update its calls accrodingly. For 88 | knowledge graph, they are in ``train_dispatch()`` and ``predict_dispatch()`` of file 89 | ``include/instance/knowledge_graph.cuh``. -------------------------------------------------------------------------------- /doc/source/developer/solver.rst: -------------------------------------------------------------------------------- 1 | Customize Solvers 2 | ================= 3 | 4 | A more interesting thing to explore is extending GraphVite with new solvers. 5 | Generally, the core library is capable to perform any graph embedding variant that 6 | fits into the following paradigm. 7 | 8 | - The training samples are edges. 9 | There may be additional attributes (e.g. labels) to edge samples. 10 | 11 | To support that, GraphVite provides a protocol interface and a series of abstract 12 | classes. We only need to declare the protocols for our parameters, and fill in the 13 | virtual member functions for the classes. 14 | 15 | Let's begin with the protocol interface. There are 3 main protocols for parameters. 16 | 17 | - ``head`` 18 | - ``tail`` 19 | - ``global`` 20 | 21 | For each parameter matrix, it should be assigned one of these protocols. 22 | ``head`` means that the parameter matrix is indexed by head nodes in directed edges, 23 | while ``tail`` corresponds to tail nodes. Any other parameter matrix should be assigned 24 | with ``global``. 25 | 26 | There are also 2 optional protocols. One is ``in place``, which implies that the 27 | parameter matrix takes in-place update and doesn't need storage for gradients. 28 | The other is ``shared``, which implies the matrix is shared with the previous one. 29 | This may be used for tied weight case. 30 | 31 | Each parameter matrix should also be specified with a shape. We can use ``auto`` 32 | if the shape can be inferred from the protocol and the graph structure. 33 | 34 | For example, knowledge graph embeddings take the following settings. 35 | 36 | .. code-block:: c++ 37 | 38 | // head embeddings, tail embeddings, relation embeddings 39 | protocols = {head | in place, tail | in place | shared, global}; 40 | shapes = {auto, auto, graph->num_relation}; 41 | 42 | If the learning routine also needs negative sampling, we should additionally 43 | specify a negative sampler protocol. For knowledge graph embedding, this is 44 | 45 | .. code-block:: c++ 46 | 47 | negative_sampler_protocol = head | tail; 48 | 49 | Given the protocols, GraphVite will automatically schedule the paramters and samples 50 | over multiple GPUs, using an algorithm called parallel negative sampling. For a more 51 | detailed explanation of the algorithm, see section 3.2 in `GraphVite paper`_. 52 | 53 | .. _GraphVite paper: https://arxiv.org/pdf/1903.00757.pdf 54 | 55 | .. note:: 56 | Parallel negative sampling only takes place when at least one parameter matrix 57 | is ``head`` or ``tail``. If all parameters are ``global``, GraphVite will schedule 58 | them by standard data parallel. 59 | 60 | To implement a new solver, we need to implement ``get_protocols()``, 61 | ``get_sampler_protocol()`` and ``get_shapes()`` as above. Some additional helper 62 | functions may be required to complete the solver. 63 | 64 | A solver also contains a sampler and a worker class. By default, the sampler samples 65 | positive edges from the graph, with probability proportional to the weight of each 66 | edge. We only need to specify the additional edge attributes in ``get_attributes()``. 67 | 68 | For the worker, it will build the negative sampler according to the its protocol. 69 | We need to specify the GPU implementation of models in ``train_dispatch()``. See 70 | :doc:`model` for how to do that. 71 | 72 | Finally, to get our new solver appeared in Python, add a Python declaration for it in 73 | ``include/bind.h``, and instantiate it in ``src/graphvite.cu``. 74 | 75 | See ``include/instance/*`` for all solver instances. 76 | 77 | .. note:: 78 | Functions in solver, sampler and worker can be overrided. For example, 79 | :class:`GraphSolver ` overrides edge sampling with 80 | online augmentation. -------------------------------------------------------------------------------- /doc/source/faq.rst: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ========================== 3 | 4 | .. contents:: 5 | :local: 6 | 7 | How should I cite GraphVite? 8 | ---------------------------- 9 | 10 | If you find GraphVite helps your research, please cite it in your publications. 11 | 12 | .. code-block:: none 13 | 14 | @inproceedings{zhu2019graphvite, 15 | title={GraphVite: A High-Performance CPU-GPU Hybrid System for Node Embedding}, 16 | author={Zhu, Zhaocheng and Xu, Shizhen and Qu, Meng and Tang, Jian}, 17 | booktitle={The World Wide Web Conference}, 18 | pages={2494--2504}, 19 | year={2019}, 20 | organization={ACM} 21 | } 22 | 23 | Why is my CUDA driver version insufficient for CUDA runtime version? 24 | -------------------------------------------------------------------- 25 | 26 | This is because you have installed a GraphVite compiled for some later CUDA version. 27 | You can check your CUDA version with ``nvcc -V``, and then install the corresponding 28 | package by 29 | 30 | .. code-block:: bash 31 | 32 | conda install -c milagraph -c conda-forge graphvite cudatoolkit=x.x 33 | 34 | where ``x.x`` is your CUDA version, e.g. 9.2 or 10.0. 35 | 36 | Note graphvite does not support CUDA version earlier than 9.2, due to a failure of 37 | old version ``nvcc``. 38 | 39 | Why is there a compilation error for template deduction? 40 | -------------------------------------------------------- 41 | 42 | This is due to a failure of old version ``nvcc`` in compiling the templates in 43 | ``pybind11``. Generally, ``nvcc 9.2`` or later will work. 44 | 45 | Why is the access to embeddings so slow? 46 | ---------------------------------------- 47 | 48 | Due to the binding mechanism, the numpy view of embeddings is generated each time 49 | when you access the embeddings in Python. Such generation may take a non-trivial 50 | overhead. To avoid that cost, we recommend you to copy the reference of the 51 | embeddings. 52 | 53 | .. code-block:: python 54 | 55 | embeddings = solver.vertex_embeddings 56 | 57 | Now the access to ``embeddings`` should be good. 58 | 59 | How can I speed up compliation? 60 | ------------------------------- 61 | 62 | The compilation can be accelerated by reducing the number of template instantiations. 63 | You can pass ``-DFAST_COMPILE=True`` to cmake, which will only compile commonly used 64 | embedding dimensions. You may also comment out unnecessary instantiations in 65 | ``src/graphvite.cu`` for further speed-up. 66 | 67 | How can I solve the BLAS issue in ``faiss``? 68 | -------------------------------------------- 69 | 70 | ``faiss`` is only required by the visualization application in GraphVite. If you do 71 | not need visualization, you can pass ``-DNO_FAISS=True`` to cmake to skip that. -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. GraphVite documentation master file, created by 2 | sphinx-quickstart on Wed May 29 18:13:45 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | GraphVite - graph embedding at high speed and large scale 7 | ========================================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | :caption: Get Started 12 | 13 | Introduction 14 | install 15 | quick_start 16 | overview 17 | benchmark 18 | pretrained_model 19 | 20 | .. toctree:: 21 | :maxdepth: 1 22 | :caption: User Guide 23 | 24 | user/command_line 25 | user/configuration 26 | user/format 27 | user/python 28 | user/auto 29 | 30 | .. toctree:: 31 | :maxdepth: 1 32 | :caption: Developer Guide 33 | 34 | developer/framework 35 | developer/model 36 | developer/routine 37 | developer/solver 38 | 39 | .. toctree:: 40 | :maxdepth: 1 41 | :caption: Package Reference 42 | 43 | Application 44 | Graph 45 | Solver 46 | Optimizer 47 | Dataset 48 | 49 | .. toctree:: 50 | :maxdepth: 1 51 | :caption: FAQ 52 | 53 | FAQ 54 | 55 | Indices and tables 56 | ================== 57 | 58 | * :ref:`genindex` 59 | * :ref:`search` -------------------------------------------------------------------------------- /doc/source/install.rst: -------------------------------------------------------------------------------- 1 | Install 2 | ======= 3 | 4 | GraphVite can be installed from either conda or source. You can also easily install 5 | the library on `Google Colab`_ for demonstration. 6 | 7 | .. _Google Colab: https://colab.research.google.com/ 8 | 9 | Install from conda 10 | ------------------ 11 | 12 | To install GraphVite from ``conda``, you only need one line. 13 | 14 | .. code-block:: bash 15 | 16 | conda install -c milagraph -c conda-forge graphvite cudatoolkit=$(nvcc -V | grep -Po "(?<=V)\d+.\d+") 17 | 18 | By default, this will install all dependencies, including ``PyTorch`` and 19 | ``matplotlib``. If you only need embedding training without evaluation, there is an 20 | alternative with minimum dependencies. 21 | 22 | .. code-block:: bash 23 | 24 | conda install -c milagraph -c conda-forge graphvite-mini cudatoolkit=$(nvcc -V | grep -Po "(?<=V)\d+.\d+") 25 | 26 | Install from source 27 | ------------------- 28 | 29 | First, clone GraphVite from GitHub. 30 | 31 | .. code-block:: bash 32 | 33 | git clone https://github.com/DeepGraphLearning/graphvite 34 | cd graphvite 35 | 36 | Install compilation and runtime dependencies via ``conda``. 37 | 38 | .. code-block:: bash 39 | 40 | conda install -y --file conda/requirements.txt 41 | 42 | Compile the code using the following directives. If you have ``faiss`` installed 43 | from source, you can pass ``-DFAISS_PATH=/path/to/faiss`` to ``cmake``. 44 | 45 | .. code-block:: bash 46 | 47 | mkdir build 48 | cd build && cmake .. && make && cd - 49 | 50 | Finally, install Python bindings. 51 | 52 | .. code-block:: bash 53 | 54 | cd python && python setup.py install && cd - 55 | 56 | Install on Colab 57 | ---------------- 58 | 59 | First, install Miniconda on Colab. 60 | 61 | .. code-block:: bash 62 | 63 | !wget -c https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 64 | !chmod +x Miniconda3-latest-Linux-x86_64.sh 65 | !./Miniconda3-latest-Linux-x86_64.sh -b -p /usr/local -f 66 | 67 | Then we install GraphVite and some tools for Jupyter Notebook. 68 | 69 | .. code-block:: bash 70 | 71 | !conda install -y -c milagraph -c conda-forge graphvite \ 72 | python=3.6 cudatoolkit=$(nvcc -V | grep -Po "(?<=V)\d+\.\d+") 73 | !conda install -y wurlitzer ipykernel 74 | 75 | Load the installed packages. Now you are ready to go. 76 | 77 | .. code-block:: python 78 | 79 | import site 80 | site.addsitedir("/usr/local/lib/python3.6/site-packages") 81 | %reload_ext wurlitzer 82 | -------------------------------------------------------------------------------- /doc/source/introduction.rst: -------------------------------------------------------------------------------- 1 | GraphVite - graph embedding at high speed and large scale 2 | ========================================================= 3 | 4 | .. include:: link.rst 5 | 6 | GraphVite is a general graph embedding engine, dedicated to high-speed and 7 | large-scale embedding learning in various applications. By cooperating CPUs and GPUs 8 | for learning, it scales to million-scale or even billion-scale graphs. With its 9 | Python interface, you can easily practice advanced graph embedding algorithms, and 10 | get results in incredibly short time. 11 | 12 | Try GraphVite if you have any of the following demands. 13 | 14 | - You want to reproduce graph learning algorithms on a uniform platform. 15 | - You need fast visualization for graphs or high-dimensional data. 16 | - You are tired of waiting a long time for prototyping or tuning models. 17 | - You need to learn representations of large graphs or knowledge graphs. 18 | 19 | Generally, GraphVite provides complete training and evaluation pipelines for 3 20 | applications: **node embedding**, **knowledge graph embedding** and 21 | **graph & high-dimensional data visualization**. Besides, it also includes 9 popular 22 | models, along with their benchmarks on a bunch of standard datasets. 23 | 24 | .. figure:: ../../asset/graph.png 25 | :align: left 26 | :height: 180px 27 | :target: overview.html#node-embedding 28 | :figclass: align-center 29 | 30 | Node Embedding 31 | 32 | .. figure:: ../../asset/knowledge_graph.png 33 | :align: left 34 | :height: 180px 35 | :target: overview.html#knowledge-graph-embedding 36 | :figclass: align-center 37 | 38 | Knowledge Graph |br| Embedding 39 | 40 | .. figure:: ../../asset/visualization.png 41 | :align: left 42 | :height: 180px 43 | :target: overview.html#graph-high-dimensional-data-visualization 44 | :figclass: align-center 45 | 46 | Graph & |br| High-dimensional |br| Data Visualization 47 | 48 | .. |br| raw:: html 49 | 50 |
51 | 52 | .. raw:: html 53 | 54 |
55 | 56 | How fast is GraphVite? 57 | ---------------------- 58 | 59 | To give a brief idea of GraphVite's speed, we summarize the training time of 60 | GraphVite along with the best open-source implementations. All the time is reported 61 | based on a server with 24 CPU threads and 4 V100 GPUs. 62 | 63 | Training time of node embedding on `Youtube`_ dataset. 64 | 65 | +-------------+----------------------------+-----------+---------+ 66 | | Model | Existing Implementation | GraphVite | Speedup | 67 | +=============+============================+===========+=========+ 68 | | `DeepWalk`_ | `1.64 hrs (CPU parallel)`_ | 1.19 mins | 82.9x | 69 | +-------------+----------------------------+-----------+---------+ 70 | | `LINE`_ | `1.39 hrs (CPU parallel)`_ | 1.17 mins | 71.4x | 71 | +-------------+----------------------------+-----------+---------+ 72 | | `node2vec`_ | `24.4 hrs (CPU parallel)`_ | 4.39 mins | 334x | 73 | +-------------+----------------------------+-----------+---------+ 74 | 75 | .. _1.64 hrs (CPU parallel): https://github.com/phanein/deepwalk 76 | .. _1.39 hrs (CPU parallel): https://github.com/tangjianpku/LINE 77 | .. _24.4 hrs (CPU parallel): https://github.com/aditya-grover/node2vec 78 | 79 | Training / evaluation time of knowledge graph embedding on `FB15k`_ dataset. 80 | 81 | +-----------+---------------------------------+--------------------+---------------+ 82 | | Model | Existing Implementation | GraphVite | Speedup | 83 | +===========+=================================+====================+===============+ 84 | | `TransE`_ | `1.31 hrs / 1.75 mins (1 GPU)`_ | 13.5 mins / 54.3 s | 5.82x / 1.93x | 85 | +-----------+---------------------------------+--------------------+---------------+ 86 | | `RotatE`_ | `3.69 hrs / 4.19 mins (1 GPU)`_ | 28.1 mins / 55.8 s | 7.88x / 4.50x | 87 | +-----------+---------------------------------+--------------------+---------------+ 88 | 89 | .. _1.31 hrs / 1.75 mins (1 GPU): https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding 90 | .. _3.69 hrs / 4.19 mins (1 GPU): https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding 91 | 92 | Training time of high-dimensional data visualization on `MNIST`_ dataset. 93 | 94 | +-------------+-----------------------------+-----------+---------+ 95 | | Model | Existing Implementation | GraphVite | Speedup | 96 | +=============+=============================+===========+=========+ 97 | | `LargeVis`_ | `15.3 mins (CPU parallel)`_ | 13.9 s | 66.8x | 98 | +-------------+-----------------------------+-----------+---------+ 99 | 100 | .. _15.3 mins (CPU parallel): https://github.com/lferry007/LargeVis 101 | 102 | Comparison to concurrent work 103 | ----------------------------- 104 | 105 | A work concurrent to GraphVite is `PyTorch-BigGraph`_, which aims at accelerating 106 | knowledge graph embedding on large-scale data. Here is an apple-to-apple comparison 107 | of models implemented in both libraries on `FB15k`_, under the same setting of 108 | hyperparameters. 109 | 110 | .. _PyTorch-BigGraph: https://torchbiggraph.readthedocs.io 111 | 112 | +-------------+------------------+-----------+---------+ 113 | | Model | PyTorch-BigGraph | GraphVite | Speedup | 114 | +=============+==================+===========+=========+ 115 | | `TransE`_ | 1.21 hrs | 8.37 mins | 8.70x | 116 | +-------------+------------------+-----------+---------+ 117 | | `DistMult`_ | 2.48 hrs | 20.3 mins | 7.33x | 118 | +-------------+------------------+-----------+---------+ 119 | | `ComplEx`_ | 3.13 hrs | 18.5 mins | 10.1x | 120 | +-------------+------------------+-----------+---------+ 121 | 122 | GraphVite surpasses its counterpart by a signficant margin. Besides, the framework of 123 | GraphVite also supports two more applications, and provides many benchmarks for easy 124 | research and development. 125 | 126 | About the name 127 | -------------- 128 | GraphVite(/ɡɹæfvit/) is a combination of English word "graph" and French word 129 | "vite", which means "rapid". GraphVite represents the traits of this library, 130 | as well as the bilingual environment of `Mila`_ where the library was developed. 131 | 132 | .. _Mila: https://mila.quebec -------------------------------------------------------------------------------- /doc/source/link.rst: -------------------------------------------------------------------------------- 1 | .. Node embedding models 2 | .. _DeepWalk: https://arxiv.org/pdf/1403.6652.pdf 3 | .. _LINE: https://arxiv.org/pdf/1503.03578.pdf 4 | .. _node2vec: https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf 5 | 6 | .. Knowledge graph embedding models 7 | .. _TransE: http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf 8 | .. _DistMult: https://arxiv.org/pdf/1412.6575.pdf 9 | .. _ComplEx: http://proceedings.mlr.press/v48/trouillon16.pdf 10 | .. _SimplE: https://papers.nips.cc/paper/7682-simple-embedding-for-link-prediction-in-knowledge-graphs.pdf 11 | .. _RotatE: https://arxiv.org/pdf/1902.10197.pdf 12 | .. _QuatE: https://papers.nips.cc/paper/8541-quaternion-knowledge-graph-embeddings.pdf 13 | 14 | .. Graph & high-dimensional data visualization models 15 | .. _LargeVis: https://arxiv.org/pdf/1602.00370.pdf 16 | 17 | .. GraphVite 18 | .. _GraphVite: https://arxiv.org/pdf/1903.00757.pdf 19 | .. _Repo: https://github.com/DeepGraphLearning/graphvite 20 | 21 | .. Graph datasets 22 | .. _Youtube: http://conferences.sigcomm.org/imc/2007/papers/imc170.pdf 23 | .. _Flickr: http://conferences.sigcomm.org/imc/2007/papers/imc170.pdf 24 | .. _Friendster-small: https://arxiv.org/pdf/1903.00757.pdf 25 | 26 | .. Knowledge graph datasets 27 | .. _FB15k: http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf 28 | .. _FB15k-237: https://www.aclweb.org/anthology/W15-4007 29 | .. _WN18: http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf 30 | .. _WN18RR: https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/17366/15884 31 | .. _Wikidata5m: https://arxiv.org/pdf/1911.06136.pdf 32 | 33 | .. Image datasets 34 | .. _MNIST: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf 35 | .. _ImageNet: https://arxiv.org/pdf/1409.0575.pdf 36 | 37 | .. Misc 38 | .. _WordNet: http://www.cs.columbia.edu/~vh/courses/LexicalSemantics/Ontologies/miller-wordnet95.pdf 39 | .. _Wikidata: https://www.wikidata.org 40 | .. _Wikipedia: https://www.wikipedia.org -------------------------------------------------------------------------------- /doc/source/overview.rst: -------------------------------------------------------------------------------- 1 | Application Overview 2 | ==================== 3 | 4 | In GraphVite, the pipelines are packaged into classes, which we call applications. 5 | 6 | There are 3 main applications, node embedding, knowledge graph embedding, and 7 | graph & high-dimensional data visualization. For each application, GraphVite loads 8 | an input graph, perfoms embedding training, and finally evaluates the embeddngs on 9 | downstream tasks. 10 | 11 | .. _node embedding: 12 | 13 | Node Embedding 14 | -------------- 15 | 16 | Node embedding is a family of algorithms that learn a representation for each node 17 | in a graph. It is important for graph analysis and a variety of downstream tasks. 18 | 19 | For example, node embedding can be leveraged for analyzing social networks, citation 20 | networks, or protein-protein interaction networks. It may be also helpful to other 21 | unsupervised learning problems with graph structures. 22 | 23 | To qualify the learned embeddings, we evaluate them on the node classification and 24 | link prediction tasks. 25 | 26 | .. seealso:: 27 | Package Reference: 28 | :class:`GraphApplication ` 29 | 30 | .. _knowledge graph embedding: 31 | 32 | Knowledge Graph Embedding 33 | ------------------------- 34 | 35 | Knowledge graph (aka. knowledge base) is a family of graphs where each edge has a 36 | type, indicating the relation of the connected nodes. In knowledge graphs, nodes 37 | are called entities, and edges are called relations. The knowledge graph embedding 38 | algorithm aims to learn a representation for each entity and relation. 39 | 40 | With knowledge graph embeddings, it is easy to compare entities or relations in a 41 | uniform space, and further infer unobserved links in a knowledge graph. 42 | 43 | The learned embeddings are evaluated under the link prediction task in GraphVite. 44 | 45 | .. seealso:: 46 | Package Reference: 47 | :class:`KnowledgeGraphApplication ` 48 | 49 | .. _visualization: 50 | 51 | Graph & High-dimensional Data Visualization 52 | ------------------------------------------- 53 | 54 | Visualization is a critical step in exploring and analyzing graphs and 55 | high-dimensional data. Typically, visualization methods project each data points into 56 | a low-dimensional space. 57 | 58 | As most projection methods treat the similarity between data points as a graph, 59 | GraphVite is also able to provide acceleration for this application. Taking a graph 60 | or a group of high-dimensional vectors, GraphVite can produce either 2D or 3D 61 | projections in a very short time. 62 | 63 | .. seealso:: 64 | Package Reference: 65 | :class:`VisualizationApplication ` -------------------------------------------------------------------------------- /doc/source/pretrained_model.rst: -------------------------------------------------------------------------------- 1 | Pre-trained Models 2 | ================== 3 | 4 | .. include:: link.rst 5 | 6 | To facilitate the usage of knowledge graph representations in semantic tasks, we 7 | provide a bunch of pre-trained embeddings for some common datasets. 8 | 9 | Wikidata5m 10 | ---------- 11 | 12 | `Wikidata5m`_ is a large-scale knowledge graph dataset constructed from `Wikidata`_ 13 | and `Wikipedia`_. It contains plenty of entities in the general domain, such as 14 | celebrities, events, concepts and things. 15 | 16 | We trained 5 standard knowledge graph embedding models on `Wikidata5m`_. The 17 | performance benchmark of these models can be found :ref:`here `. 18 | 19 | +-------------+-----------+---------+----------------------------+ 20 | | Model | Dimension | Size | Download link | 21 | +=============+===========+=========+============================+ 22 | | `TransE`_ | 512 | 9.33 GB | `transe_wikidata5m.pkl`_ | 23 | +-------------+-----------+---------+----------------------------+ 24 | | `DistMult`_ | 512 | 9.33 GB | `distmult_wikidata5m.pkl`_ | 25 | +-------------+-----------+---------+----------------------------+ 26 | | `ComplEx`_ | 512 | 9.33 GB | `complex_wikidata5m.pkl`_ | 27 | +-------------+-----------+---------+----------------------------+ 28 | | `SimplE`_ | 512 | 9.33 GB | `simple_wikidata5m.pkl`_ | 29 | +-------------+-----------+---------+----------------------------+ 30 | | `RotatE`_ | 512 | 9.33 GB | `rotate_wikidata5m.pkl`_ | 31 | +-------------+-----------+---------+----------------------------+ 32 | | `QuatE`_ | 512 | 9.36 GB | `quate_wikidata5m.pkl`_ | 33 | +-------------+-----------+---------+----------------------------+ 34 | 35 | .. _transe_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EX4c1Ud8M61KlDUn2U_yz_sBP_bXNuFnudfhRnYzWUFA2A?download=1 36 | .. _distmult_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EQsXL8UmSJhHt2uBdB32muMBo4o4RUaMR6KDEQTcsz3jvg?download=1 37 | .. _complex_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/ERAwwLdsvdRIlrkVujMetmEBV9RgizsFnW91pIpjkBjbTw?download=1 38 | .. _simple_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EVcJpJAzkThPu1vjgJLohscBgwtPajhTZvCCd8nEg1GiwA?download=1 39 | .. _rotate_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EWvX5Z0rWZ9GvmdLaM3ONx4BtxzDFehXdc0gwE52YEiX2Q?download=1 40 | .. _quate_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EUGNHMB9tlJAokjxBouyG08ByfAb3-IYHCszTMmJnQSegg?download=1 41 | 42 | Load pre-trained models 43 | ----------------------- 44 | 45 | The pre-trained models can be loaded through ``pickle``. 46 | 47 | .. code-block:: python 48 | 49 | import pickle 50 | with open("transe_wikidata5m.pkl", "rb") as fin: 51 | model = pickle.load(fin) 52 | entity2id = model.graph.entity2id 53 | relation2id = model.graph.relation2id 54 | entity_embeddings = model.solver.entity_embeddings 55 | relation_embeddings = model.solver.relation_embeddings 56 | 57 | Load the alias mapping from the dataset. Now we can access the embeddings by natural language index. 58 | 59 | .. code-block:: python 60 | 61 | import graphvite as gv 62 | alias2entity = gv.dataset.wikidata5m.alias2entity 63 | alias2relation = gv.dataset.wikidata5m.alias2relation 64 | print(entity_embeddings[entity2id[alias2entity["machine learning"]]]) 65 | print(relation_embeddings[relation2id[alias2relation["field of work"]]]) -------------------------------------------------------------------------------- /doc/source/quick_start.rst: -------------------------------------------------------------------------------- 1 | Quick Start 2 | =========== 3 | 4 | Here is a quick-start example that illustrate the pipeline in GraphVite. If ``pytorch`` 5 | is not installed, we can simply add ``--no-eval`` to skip the evaluation stage. 6 | 7 | .. code-block:: bash 8 | 9 | graphvite baseline quick start 10 | 11 | The example will automatically download a social network dataset called BlogCatalog, 12 | where nodes correspond to blog users. For each node, we learn an embedding vector that 13 | preserves its neighborhood structure, which is done by minimizing a reconstruction 14 | loss. GraphVite will display the progress and the loss during training. 15 | 16 | Once the training is done, the learned embeddings are evaluated on link prediction and 17 | node classification tasks. For link prediction, we try to predict unseen edges with 18 | the embeddings. For node classification, we use the embeddings as inputs for 19 | multi-label classification of nodes. 20 | 21 | Typically, this example takes no more than 1 minute. We will obtain some output like 22 | 23 | .. code-block:: none 24 | 25 | Batch id: 6000 26 | loss = 0.371041 27 | 28 | ------------- link prediction -------------- 29 | AUC: 0.899933 30 | 31 | ----------- node classification ------------ 32 | macro-F1@20%: 0.242114 33 | micro-F1@20%: 0.391342 34 | 35 | Note that the F1 scores may vary across different trials, as only one random split is 36 | evaluated for quick demonstration here. 37 | 38 | The learned embeddings are saved into a pickle dump. We can load them for further 39 | use. 40 | 41 | >>> import pickle 42 | >>> with open("line_blogcatalog.pkl", "rb") as fin: 43 | >>> model = pickle.load(fin) 44 | >>> names = model.graph.id2name 45 | >>> embeddings = model.solver.vertex_embeddings 46 | >>> print(names[1024], embeddings[1024]) 47 | 48 | Another interesting example is a synthetic math dataset of arithmetic operations. By 49 | treating the operations as relations of a knowledge graph, we can learn embeddings 50 | that generalize to unseen triplets (i.e. computation formulas). Check out this example 51 | with 52 | 53 | .. code-block:: bash 54 | 55 | graphvite baseline math 56 | 57 | For a more in-depth tutorial about GraphVite, take a look at 58 | 59 | - :doc:`user/command_line` 60 | - :doc:`user/configuration` 61 | - :doc:`user/python` 62 | - :doc:`user/auto` -------------------------------------------------------------------------------- /doc/source/user/auto.rst: -------------------------------------------------------------------------------- 1 | Magic of Auto 2 | ============= 3 | 4 | Hyperparameter tuning is usually painful for machine learning practioners. In order 5 | to help users focus on the most important part, GraphVite provides an auto deduction 6 | for many hyperparameters. Generally, auto deduction will maximize the speed of the 7 | system, while keep the performance loss as small as possible. 8 | 9 | To invoke auto deduction, we can simply leave hyperparameters to their default 10 | values. An explicit way is to use ``auto`` in configuration files, or value 11 | ``gv.auto`` in Python. 12 | 13 | Here lists hyperparameters that support auto deduction. 14 | 15 | .. code-block:: yaml 16 | 17 | resource: 18 | gpus: [] 19 | gpu_memory_limit: auto 20 | cpu_per_gpu: auto 21 | 22 | build: 23 | optimizer: auto 24 | num_partition: auto 25 | episode_size: auto 26 | 27 | train: 28 | # for node embedding 29 | augmentation_step: auto 30 | 31 | .. note:: 32 | The auto value for ``gpus`` is an empty list. -------------------------------------------------------------------------------- /doc/source/user/command_line.rst: -------------------------------------------------------------------------------- 1 | Command Line 2 | ============ 3 | 4 | As we have seen in :doc:`../quick_start`, GraphVite can be simply invoked from a 5 | command line. Here are some other useful commands we can use. 6 | 7 | Reproduce baseline benchmarks 8 | ----------------------------- 9 | 10 | .. code-block:: bash 11 | 12 | graphvite baseline [keyword ...] [--no-eval] [--gpu n] [--cpu m] [--epoch e] 13 | 14 | GraphVite provides a large number of baselines on standard datasets. To reproduce 15 | a baseline benchmark, we only need to specify the keywords of the experiment, and 16 | the library will do the rest for us. 17 | 18 | By default, baselines are configured to use all CPUs and GPUs. We may override this 19 | behavior by specifying the number of GPUs and the number of CPUs per GPU. We may also 20 | override the number of training epochs for fast experiments. 21 | 22 | For example, the following command line reproduces RotatE model on FB15k dataset, 23 | using 4 GPUs and 12 CPUs. 24 | 25 | .. code-block:: bash 26 | 27 | graphvite baseline rotate fb15k --gpu 4 --cpu 3 28 | 29 | Use ``graphvite list`` to get a list of available baselines. 30 | 31 | Run configuration files 32 | ----------------------- 33 | 34 | Custom experiments can be easily carried out in GraphVite through a yaml configuration. 35 | This is especially convenient if we want to use GraphVite as an off-the-shelf tool 36 | for pretraining embeddings. 37 | 38 | .. code-block:: bash 39 | 40 | graphvite new [application ...] [--file f] 41 | 42 | The above command creates a configuration scaffold for our application, where most 43 | settings are ready. We just need to fill a minimal number of settings following the 44 | instructions. For a more detailed introduction on configuration files, see 45 | :ref:`experiment configuration`. 46 | 47 | Once we complete the configuration file, we can run it by 48 | 49 | .. code-block:: bash 50 | 51 | graphvite run [config] [--no-eval] [--gpu n] [--cpu m] [--epoch e] 52 | 53 | Visualize high-dimensional vectors 54 | ---------------------------------- 55 | 56 | .. code-block:: bash 57 | 58 | graphvite visualize [file] [--label label_file] [--save save_file] [--perplexity n] [--3d] 59 | 60 | We can visualize our high-dimensional vectors with a simple command line in 61 | GraphVite. 62 | 63 | The file can be either a numpy dump ``*.npy`` or a text matrix ``*.txt``. We can 64 | also provide a label file indicating the category of each data point. For the save 65 | file, we recommend to use ``png`` format, while ``pdf`` is also supported. -------------------------------------------------------------------------------- /doc/source/user/configuration.rst: -------------------------------------------------------------------------------- 1 | Configuration Files 2 | =================== 3 | 4 | .. include:: ../link.rst 5 | 6 | .. _experiment configuration: 7 | 8 | Experiment configuration 9 | ------------------------ 10 | 11 | An experiment configuration starts with an ``application type``, and contains settings 12 | for ``resource``, ``format``, ``graph``, ``build``, ``load``, ``train``, ``evaluate`` 13 | and ``save`` stages. 14 | 15 | Here is the configuration used in :doc:`../quick_start`. 16 | :download:`quick_start.yaml <../../../config/demo/quick_start.yaml>` 17 | 18 | The stages are configured as follows. 19 | 20 | .. code-block:: yaml 21 | 22 | application: [type] 23 | 24 | The application type can be ``graph``, ``word graph``, ``knowledge graph`` or 25 | ``visualization``. 26 | 27 | .. code-block:: yaml 28 | 29 | resource: 30 | gpus: [list of GPU ids] 31 | gpu_memory_limit: [limit for each GPU in bytes] 32 | cpu_per_gpu: [CPU thread per GPU] 33 | dim: [dim] 34 | 35 | .. note:: 36 | For optimal performance, modules are compiled with pre-defined dimensions in C++. 37 | As a drawback, only dimensions that are powers of 2 are supported in the library. 38 | 39 | .. code-block:: yaml 40 | 41 | format: 42 | delimiters: [string of delimiter characters] 43 | comment: [prefix of comment strings] 44 | 45 | Format section is optional. By default, delimiters are any blank character and comment 46 | is "#", following the Python style. 47 | 48 | .. code-block:: yaml 49 | 50 | graph: 51 | file_name: [file name] 52 | as_undirected: [symmetrize the graph or not] 53 | 54 | For standard datasets, we can specify its file name by ``<[dataset].[split]>``. 55 | This would make the configuration file independent of the path. 56 | 57 | .. code-block:: yaml 58 | 59 | build: 60 | optimizer: 61 | type: [type] 62 | lr: [learning rate] 63 | weight_decay: [weight decay] 64 | schedule: [learning rate schedule] 65 | # and other optimizer-specific configuration 66 | num_partition: [number of partitions] 67 | num_negative: [number of negative samples] 68 | batch_size: [batch size] 69 | episode_size: [episode size] 70 | 71 | The number of partitions determines how to deal with multi-GPU or large graph cases. 72 | The more partitions, the less GPU memory consumption and speed. The episode size 73 | controls the synchronization frequency across partitions. 74 | 75 | See section 3.2 in `GraphVite paper `_ for a detailed illustration. 76 | 77 | .. code-block:: yaml 78 | 79 | load: 80 | file_name: [file name] 81 | 82 | Loading a model is optional. 83 | 84 | .. code-block:: yaml 85 | 86 | train: 87 | model: [model] 88 | num_epoch: [number of epochs] 89 | resume: [resume training or not] 90 | log_frequency: [log frequency in batches] 91 | # and other application-specific configuration 92 | 93 | To resume training from a loaded model, set ``resume`` to true in ``train``. 94 | 95 | .. seealso:: 96 | Training interface: 97 | :meth:`Graph `, 98 | :meth:`Knowledge graph `, 99 | :meth:`Visualization ` 100 | 101 | .. code-block:: yaml 102 | 103 | evaluate: 104 | - task: [task] 105 | # and other task-specific configuration 106 | - task: [task] 107 | ... 108 | 109 | Evaluation is optional. There may be multiple evaluation tasks. 110 | 111 | .. seealso:: 112 | Evaluation tasks: 113 | 114 | - Graph: \ 115 | :meth:`link prediction `, 116 | :meth:`node classification ` 117 | - Knowledge graph: 118 | :meth:`link prediction `, 119 | :meth:`entity prediction ` 120 | - Visualization: 121 | :meth:`visualization `, 122 | :meth:`animation `, 123 | :meth:`hierarchy ` 124 | 125 | .. code-block:: yaml 126 | 127 | save: 128 | file_name: [file name] 129 | save_hyperparameter: [save hyperparameters or not] 130 | 131 | Saving the model is optional. 132 | 133 | For more detailed settings, we recommend to read the baseline configurations 134 | for concrete examples. They can be found under ``config/`` in the Python package, 135 | or in the `GitHub repository `_. 136 | 137 | Global configuration 138 | -------------------- 139 | 140 | We can overwrite the global settings of GraphVite in ``~/.graphvite/config.yaml``. 141 | 142 | .. code-block:: yaml 143 | 144 | backend: [graphvite or torch] 145 | dataset_path: [path to store downloaded datasets] 146 | float_type: [default float type] 147 | index_type: [default index type] 148 | 149 | By default, the evaluation backend is ``graphvite``. The datasets are stored in 150 | ``~/.graphvite/dataset``. The data types are ``float32`` and ``uint32`` respectively. -------------------------------------------------------------------------------- /doc/source/user/format.rst: -------------------------------------------------------------------------------- 1 | Data Format 2 | =========== 3 | 4 | GraphVite is designed to support a wide range of formats for graphs. Generally, it 5 | doesn't enforce any type restriction on input elements. We can either use integers or 6 | strings as our input. Each line in the file is parsed as 7 | 8 | .. code-block:: 9 | 10 | [token] [delimiter] [token] [delimiter]... [comment]... 11 | 12 | By default, GraphVite treats any blank character as delimiter, and string after ``#`` 13 | as comment. You can change these settings in the 14 | :ref:`format section ` of configuration files, or using 15 | ``app.set_format(delimiters, comment)`` in Python code. 16 | 17 | GraphVite can also construct graphs from Python objects, which is helpful if graphs 18 | are dynamically generated. It takes a nested list similar to the file format. Each 19 | token should be a string or a float. 20 | 21 | .. code-block:: python 22 | 23 | graph = [[token, token], [token, token], ...] 24 | 25 | Node Embedding 26 | -------------- 27 | 28 | The input graph for node embedding follows the edge list format. Each line should be 29 | 30 | .. code-block:: 31 | 32 | [head] [tail] 33 | 34 | You may also specify a weight for each edge. 35 | 36 | .. code-block:: 37 | 38 | [head] [tail] [weight] 39 | 40 | For link prediction task, the evaluation file consists of edges and labels. 41 | 42 | .. code-block:: 43 | 44 | [head] [tail] [label] 45 | 46 | where label ``1`` is positive and ``0`` is negative. The filter file takes the same 47 | format as the input graph. 48 | 49 | For node classification task, each line is a node and a label. If a node has more 50 | than one label, it should take multiple lines. 51 | 52 | .. code-block:: 53 | 54 | [node] [label] 55 | 56 | Knowledge Graph Embedding 57 | ------------------------- 58 | 59 | Each line in a knowledge graph is a triplet. 60 | 61 | .. code-block:: 62 | 63 | [head] [relation] [tail] 64 | 65 | You may also specify a weight for each triplet. 66 | 67 | .. code-block:: 68 | 69 | [head] [relation] [tail] [weight] 70 | 71 | All the files in knowledge graph evaluation tasks take the same triplet format. 72 | 73 | Graph & High-dimensional Data Visualization 74 | ------------------------------------------- 75 | 76 | For graph visualization, the input format is same as the graph in node embedding. 77 | 78 | For high-dimensional data visualization, the input format can either be a 2D numpy 79 | array or a text matrix. Each row in the matrix is parsed as a point in the 80 | high-dimensional space. -------------------------------------------------------------------------------- /doc/source/user/python.rst: -------------------------------------------------------------------------------- 1 | Python Interface 2 | ================ 3 | 4 | GraphVite provides Python interface for convenient integration with other software. 5 | To use GraphVite in Python, import these two modules in our script. 6 | 7 | >>> import graphvite as gv 8 | >>> import graphvite.application as gap 9 | 10 | The ``graphvite`` module itself provides basic class interface, such as graphs, 11 | solvers, optimizers and datasets. The ``application`` module contains high-level 12 | wrappers of applications, along with their evaluation routines. 13 | 14 | Applications 15 | ------------ 16 | 17 | We can invoke a node embedding application with the following lines. 18 | 19 | >>> app = gap.GraphApplication(dim=128) 20 | >>> app.load(file_name=gv.dataset.blogcatalog.train) 21 | >>> app.build() 22 | >>> app.train() 23 | >>> app.evaluate("node classification", file_name=gv.dataset.blogcatalog.label) 24 | 25 | where the arguments of each member function are identical to those in the 26 | :doc:`configuration files `. 27 | 28 | .. seealso:: 29 | Package reference: :doc:`Application <../api/application>` 30 | 31 | Basic classes 32 | ------------- 33 | 34 | The basic classes are very helpful if we need fine-grained manipulation of the 35 | pipeline. For example, we may train an ensemble of node embedding models on the 36 | same graph. First, create a graph and two node embedding solvers. 37 | 38 | >>> graph = gv.graph.Graph() 39 | >>> graph.load(gv.dataset.blogcatalog.train) 40 | >>> solvers = [gv.solver.GraphSolver(dim=128, device_ids=[gpu], num_sampler_per_worker=4) 41 | ... for gpu in range(2)] 42 | 43 | Then, build the solvers on that graph. This step determines all memory allocation. 44 | 45 | >>> for solver in solvers: 46 | >>> solver.build(graph) 47 | 48 | Now we can train the solver. The training stage of solvers can be fully paralleled 49 | with multiple threads, since GraphVite never holds Python GIL inside basic classes. 50 | 51 | >>> from multiprocessing.pool import ThreadPool 52 | >>> pool = ThreadPool(2) 53 | >>> models = ["DeepWalk", "LINE"] 54 | >>> pool.map(lambda x: x[0].train(x[1]), zip(solvers, models)) 55 | 56 | Finally, obtain the ensembled embeddings. 57 | 58 | >>> import numpy as np 59 | >>> vertex_embeddings = np.hstack([s.vertex_embeddings for s in solvers]) 60 | >>> context_embeddings = np.hstack([s.context_embeddings for s in solvers]) 61 | 62 | Note the embeddings are stored in an internal order. To get an index of a specific 63 | node, use the ``name2id`` property of the graph. For example, the following line 64 | prints the vertex embedding of node "1024". 65 | 66 | >>> print(vertex_embeddings[graph.name2id["1024"]]) 67 | 68 | .. seealso:: 69 | Package reference: :doc:`Graph <../api/graph>`, :doc:`Solver <../api/solver>`, 70 | :doc:`Optimizer <../api/optimizer>`, :doc:`Dataset <../api/dataset>` 71 | 72 | Logging settings 73 | ---------------- 74 | 75 | GraphVite outputs a bunch of messages during stages like training. We can set the 76 | logging level to dismiss unimportant logs. 77 | 78 | The following lines suppress most logs except hyperparameters and evaluation results. 79 | The verbose mode additionally prints time tags and thread IDs each log. 80 | 81 | >>> import logging 82 | >>> gv.init_logging(logging.WARNING, verbose=True) 83 | 84 | Messages can be also redirected to files by specifying a value for the ``dir`` 85 | argument. -------------------------------------------------------------------------------- /external/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /include/base/alias_table.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "memory.h" 27 | 28 | namespace graphvite { 29 | 30 | template 31 | class AliasTable; 32 | 33 | namespace gpu { 34 | 35 | template 36 | __global__ void Sample(AliasTable sampler, Memory rand, Memory result); 37 | 38 | } 39 | 40 | /** 41 | * @brief CPU / GPU implementation of the alias table algorithm 42 | * 43 | * Generate a sample from any discrete distribution in O(1) time. 44 | * 45 | * @tparam _Float floating type of probability 46 | * @tparam _Index integral type of indexes 47 | */ 48 | template 49 | class AliasTable { 50 | public: 51 | typedef _Float Float; 52 | typedef _Index Index; 53 | 54 | static const int kThreadPerBlock = 512; 55 | 56 | int device_id; 57 | Index count; 58 | cudaStream_t stream; 59 | Memory prob_table; 60 | Memory alias_table; 61 | 62 | /** @brief Construct an alias table 63 | * @param _device_id GPU id, -1 for CPU 64 | * @param _stream CUDA stream 65 | */ 66 | AliasTable(int _device_id, cudaStream_t _stream = 0) : 67 | device_id(_device_id), count(0), stream(_stream), prob_table(device_id, 0, stream), 68 | alias_table(device_id, 0, stream) {} 69 | 70 | /** Shallow copy constructor */ 71 | AliasTable(const AliasTable &a) : 72 | device_id(a.device_id), count(a.count), stream(a.stream), prob_table(a.prob_table), 73 | alias_table(a.alias_table) {} 74 | 75 | AliasTable &operator=(const AliasTable &) = delete; 76 | 77 | /** Reallocate the memory space */ 78 | void reallocate(Index capacity) { 79 | prob_table.reallocate(capacity); 80 | alias_table.reallocate(capacity); 81 | } 82 | 83 | /** Initialize the table with a distribution */ 84 | void build(const std::vector &_prob_table) { 85 | count = _prob_table.size(); 86 | CHECK(count > 0) << "Invalid sampling distribution"; 87 | prob_table.resize(count); 88 | alias_table.resize(count); 89 | 90 | memcpy(prob_table.host_ptr, _prob_table.data(), count * sizeof(Float)); 91 | // single precision may cause considerable trunctation error 92 | double norm = 0; 93 | for (int i = 0; i < count; i++) 94 | norm += prob_table[i]; 95 | norm = norm / count; 96 | for (int i = 0; i < count; i++) 97 | prob_table[i] /= norm; 98 | 99 | std::queue large, little; 100 | for (int i = 0; i < count; i++) { 101 | if (prob_table[i] < 1) 102 | little.push(i); 103 | else 104 | large.push(i); 105 | } 106 | while (!little.empty() && !large.empty()) { 107 | Index i = little.front(), j = large.front(); 108 | little.pop(); 109 | large.pop(); 110 | alias_table[i] = j; 111 | prob_table[j] = prob_table[i] + prob_table[j] - 1; 112 | if (prob_table[j] < 1) 113 | little.push(j); 114 | else 115 | large.push(j); 116 | } 117 | // suppress some trunction error 118 | while (!little.empty()) { 119 | Index i = little.front(); 120 | little.pop(); 121 | alias_table[i] = i; 122 | } 123 | while (!large.empty()) { 124 | Index i = large.front(); 125 | large.pop(); 126 | alias_table[i] = i; 127 | } 128 | } 129 | 130 | /** Copy the table to GPU */ 131 | void to_device() { 132 | prob_table.to_device(); 133 | alias_table.to_device(); 134 | } 135 | 136 | /** Copy the table to GPU (asynchronous) */ 137 | void to_device_async() { 138 | prob_table.to_device_async(); 139 | alias_table.to_device_async(); 140 | } 141 | 142 | /** Free GPU memory */ 143 | void clear() { 144 | reallocate(0); 145 | } 146 | 147 | /** Generate a sample on CPU / GPU */ 148 | __host__ __device__ inline Index sample(double rand1, double rand2) const { 149 | Index index = rand1 * count; 150 | Float prob = rand2; 151 | return prob < prob_table[index] ? index : alias_table[index]; 152 | } 153 | 154 | /** Generate a batch of samples on GPU */ 155 | void device_sample(const Memory &rand, Memory *result) { 156 | int block_per_grid = (result->count + kThreadPerBlock - 1) / kThreadPerBlock; 157 | gpu::Sample<<>>(*this, rand, *result); 158 | } 159 | 160 | /** 161 | * @param count size of the distribution 162 | * @return GPU memory cost 163 | */ 164 | static size_t gpu_memory_demand(int count) { 165 | size_t demand = 0; 166 | demand += decltype(prob_table)::gpu_memory_demand(count); 167 | demand += decltype(alias_table)::gpu_memory_demand(count); 168 | return demand; 169 | } 170 | }; 171 | 172 | namespace gpu { 173 | 174 | template 175 | __global__ void Sample(AliasTable sampler, Memory random, Memory result) { 176 | int thread_id = blockIdx.x * blockDim.x + threadIdx.x; 177 | if (thread_id < result.count) { 178 | Float rand1 = random[thread_id * 2]; 179 | Float rand2 = random[thread_id * 2 + 1]; 180 | result[thread_id] = sampler.sample(rand1, rand2); 181 | } 182 | } 183 | 184 | } // namespace gpu 185 | 186 | } // namespace graphvite -------------------------------------------------------------------------------- /include/base/memory.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | #include "util/debug.h" 25 | 26 | namespace graphvite { 27 | 28 | /** 29 | * @brief CPU / GPU memory space allocator 30 | * @tparam _T type of data 31 | * @tparam _Index integral type of indexes 32 | */ 33 | template 34 | class Memory { 35 | public: 36 | typedef _T Data; 37 | typedef _Index Index; 38 | 39 | int device_id; 40 | Index count = 0, capacity = 0; 41 | cudaStream_t stream; 42 | int *refer_count = nullptr; 43 | Data *host_ptr = nullptr, *device_ptr = nullptr; 44 | 45 | /** 46 | * @brief Construct a memory space 47 | * @param _device_id GPU id, -1 for CPU 48 | * @param _count number of data 49 | * @param _stream CUDA stream 50 | */ 51 | Memory(int _device_id, Index _count = 0, cudaStream_t _stream = 0) : 52 | device_id(_device_id), stream(_stream) { 53 | resize(_count); 54 | } 55 | 56 | /** Shallow copy constructor */ 57 | Memory(const Memory &m) : 58 | device_id(m.device_id), count(m.count), capacity(m.capacity), stream(m.stream), refer_count(m.refer_count), 59 | host_ptr(m.host_ptr), device_ptr(m.device_ptr) { 60 | if (capacity) 61 | (*refer_count)++; 62 | } 63 | 64 | Memory &operator=(const Memory &) = delete; 65 | 66 | ~Memory() { reallocate(0); } 67 | 68 | /** Swap two memory spaces */ 69 | void swap(Memory &m) { 70 | std::swap(device_id, m.device_id); 71 | std::swap(count, m.count); 72 | std::swap(capacity, m.capacity); 73 | std::swap(stream, m.stream); 74 | std::swap(refer_count, m.refer_count); 75 | std::swap(host_ptr, m.host_ptr); 76 | std::swap(device_ptr, m.device_ptr); 77 | } 78 | 79 | __host__ __device__ Data &operator[](Index index) { 80 | #ifdef __CUDA_ARCH__ 81 | return device_ptr[index]; 82 | #else 83 | return host_ptr[index]; 84 | #endif 85 | } 86 | 87 | __host__ __device__ Data &operator[](Index index) const { 88 | #ifdef __CUDA_ARCH__ 89 | return device_ptr[index]; 90 | #else 91 | return host_ptr[index]; 92 | #endif 93 | } 94 | 95 | /** Copy data from another memory */ 96 | void copy(const Memory &m) { 97 | resize(m.count); 98 | memcpy(host_ptr, m.host_ptr, count * sizeof(Data)); 99 | } 100 | 101 | /** Copy data from a pointer */ 102 | void copy(void *ptr, Index _count) { 103 | resize(_count); 104 | memcpy(host_ptr, ptr, count * sizeof(Data)); 105 | } 106 | 107 | /** Reallocate the memory space */ 108 | void reallocate(Index _capacity) { 109 | if (capacity && !--(*refer_count)) { 110 | delete refer_count; 111 | #ifdef PINNED_MEMORY 112 | CUDA_CHECK(cudaFreeHost(host_ptr)); 113 | #else 114 | delete [] host_ptr; 115 | #endif 116 | if (device_id != -1) { 117 | CUDA_CHECK(cudaSetDevice(device_id)); 118 | CUDA_CHECK(cudaFree(device_ptr)); 119 | } 120 | } 121 | capacity = _capacity; 122 | if (capacity) { 123 | refer_count = new int(1); 124 | #ifdef PINNED_MEMORY 125 | CUDA_CHECK(cudaMallocHost(&host_ptr, capacity * sizeof(Data))); 126 | #else 127 | host_ptr = new Data[capacity]; 128 | #endif 129 | if (device_id != -1) { 130 | CUDA_CHECK(cudaSetDevice(device_id)); 131 | CUDA_CHECK(cudaMalloc(&device_ptr, capacity * sizeof(Data))); 132 | } 133 | } 134 | } 135 | 136 | /** Resize the memory space. Reallocate only if the capacity is not enough. */ 137 | void resize(Index _count) { 138 | if (_count > capacity || (capacity && *refer_count > 1)) 139 | reallocate(_count); 140 | count = _count; 141 | } 142 | 143 | /** Copy the memory space to GPU */ 144 | void to_device(Index copy_count = 0) { 145 | if (count && device_id != -1) { 146 | if (!copy_count) 147 | copy_count = count; 148 | CUDA_CHECK(cudaSetDevice(device_id)); 149 | CUDA_CHECK(cudaMemcpyAsync(device_ptr, host_ptr, copy_count * sizeof(Data), cudaMemcpyHostToDevice, stream)); 150 | CUDA_CHECK(cudaStreamSynchronize(stream)); 151 | } 152 | } 153 | 154 | /** Copy the memory space to GPU (asynchronous) */ 155 | void to_device_async(Index copy_count = 0) { 156 | if (count && device_id != -1) { 157 | if (!copy_count) 158 | copy_count = count; 159 | CUDA_CHECK(cudaSetDevice(device_id)); 160 | CUDA_CHECK(cudaMemcpyAsync(device_ptr, host_ptr, copy_count * sizeof(Data), cudaMemcpyHostToDevice, stream)); 161 | } 162 | } 163 | 164 | /** Copy the memory space back from GPU */ 165 | void to_host(Index copy_count = 0) { 166 | if (count && device_id != -1) { 167 | if (!copy_count) 168 | copy_count = count; 169 | CUDA_CHECK(cudaSetDevice(device_id)); 170 | CUDA_CHECK(cudaMemcpyAsync(host_ptr, device_ptr, copy_count * sizeof(Data), cudaMemcpyDeviceToHost, stream)); 171 | CUDA_CHECK(cudaStreamSynchronize(stream)); 172 | } 173 | } 174 | 175 | /** Copy the memory space back from GPU (asynchronous) */ 176 | void to_host_async(Index copy_count = 0) { 177 | if (count && device_id != -1) { 178 | if (!copy_count) 179 | copy_count = count; 180 | CUDA_CHECK(cudaSetDevice(device_id)); 181 | CUDA_CHECK(cudaMemcpyAsync(host_ptr, device_ptr, copy_count * sizeof(Data), cudaMemcpyDeviceToHost, stream)); 182 | } 183 | } 184 | 185 | /** Fill the memory space with data. Automatically resize the memory when necessary. */ 186 | void fill(const Data &data, Index _count = 0) { 187 | if (_count) 188 | resize(_count); 189 | for (Index i = 0; i < count; i++) 190 | host_ptr[i] = data; 191 | } 192 | 193 | /** Gather data from a pool according to an index mapping. Automatically resize the memory when necessary. */ 194 | void gather(const std::vector &memory, const std::vector &mapping) { 195 | if (!mapping.empty()) { 196 | resize(mapping.size()); 197 | for (Index i = 0; i < count; i++) 198 | host_ptr[i] = memory[mapping[i]]; 199 | } 200 | else { 201 | resize(memory.size()); 202 | for (Index i = 0; i < count; i++) 203 | host_ptr[i] = memory[i]; 204 | } 205 | } 206 | 207 | /** Scatter data to a pool according to an index mapping */ 208 | void scatter(std::vector &memory, const std::vector &mapping) { 209 | if (!mapping.empty()) { 210 | for (Index i = 0; i < count; i++) 211 | memory[mapping[i]] = host_ptr[i]; 212 | } 213 | else { 214 | for (Index i = 0; i < count; i++) 215 | memory[i] = host_ptr[i]; 216 | } 217 | } 218 | 219 | /** Scatter data to a pool by addition, according to an index mapping */ 220 | void scatter_add(std::vector &memory, const std::vector &mapping) { 221 | if (!mapping.empty()) { 222 | for (Index i = 0; i < count; i++) 223 | memory[mapping[i]] += host_ptr[i]; 224 | } 225 | else { 226 | for (Index i = 0; i < count; i++) 227 | memory[i] += host_ptr[i]; 228 | } 229 | } 230 | 231 | /** Scatter data to a pool by substraction, according to an index mapping */ 232 | void scatter_sub(std::vector &memory, const std::vector &mapping) { 233 | if (!mapping.empty()) { 234 | for (Index i = 0; i < count; i++) 235 | memory[mapping[i]] -= host_ptr[i]; 236 | } 237 | else { 238 | for (Index i = 0; i < count; i++) 239 | memory[i] -= host_ptr[i]; 240 | } 241 | } 242 | 243 | /** 244 | * @param capacity number of data 245 | * @return GPU memory cost 246 | */ 247 | static size_t gpu_memory_demand(int capacity) { 248 | return capacity * sizeof(Data); 249 | } 250 | }; 251 | 252 | } // namespace graphvite -------------------------------------------------------------------------------- /include/base/vector.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include "util/gpu.cuh" 23 | 24 | namespace graphvite { 25 | 26 | /** 27 | * @brief Vector computation 28 | * @tparam _dim dimension 29 | * @tparam _Float floating type of data 30 | */ 31 | template 32 | class Vector { 33 | static_assert(std::is_floating_point<_Float>::value, "Vector can only be instantiated with floating point types"); 34 | // static_assert(_dim % gpu::kWarpSize == 0, "`dim` should be divided by 32"); 35 | public: 36 | static const size_t dim = _dim; 37 | typedef size_t Index; 38 | typedef _Float Float; 39 | Float data[dim]; 40 | 41 | /** Default constructor */ 42 | Vector() = default; 43 | 44 | /** Construct a vector of repeat scalar */ 45 | Vector(Float f) { 46 | #pragma unroll 47 | for (Index i = 0; i < dim; i++) 48 | data[i] = f; 49 | } 50 | 51 | __host__ __device__ Float &operator[](Index index) { 52 | return data[index]; 53 | } 54 | 55 | __host__ __device__ Float operator[](Index index) const { 56 | return data[index]; 57 | } 58 | 59 | __host__ __device__ Vector &operator=(const Vector &v) { 60 | #if __CUDA_ARCH__ 61 | using namespace gpu; 62 | const int lane_id = threadIdx.x % kWarpSize; 63 | for (Index i = lane_id; i < dim; i += kWarpSize) 64 | #else 65 | for (Index i = 0; i < dim; i++) 66 | #endif 67 | data[i] = v[i]; 68 | return *this; 69 | } 70 | 71 | Vector &operator =(Float f) { 72 | #pragma unroll 73 | for (Index i = 0; i < dim; i++) 74 | data[i] = f; 75 | return *this; 76 | } 77 | 78 | Vector &operator +=(const Vector &v) { 79 | #pragma unroll 80 | for (Index i = 0; i < dim; i++) 81 | data[i] += v[i]; 82 | return *this; 83 | } 84 | 85 | 86 | Vector &operator -=(const Vector &v) { 87 | #pragma unroll 88 | for (Index i = 0; i < dim; i++) 89 | data[i] -= v[i]; 90 | return *this; 91 | } 92 | 93 | Vector &operator *=(const Vector &v) { 94 | #pragma unroll 95 | for (Index i = 0; i < dim; i++) 96 | data[i] *= v[i]; 97 | return *this; 98 | } 99 | 100 | Vector &operator /=(const Vector &v) { 101 | #pragma unroll 102 | for (Index i = 0; i < dim; i++) 103 | data[i] /= v[i]; 104 | return *this; 105 | } 106 | 107 | Vector &operator +=(Float f) { 108 | #pragma unroll 109 | for (Index i = 0; i < dim; i++) 110 | data[i] += f; 111 | return *this; 112 | } 113 | 114 | Vector &operator -=(Float f) { 115 | #pragma unroll 116 | for (Index i = 0; i < dim; i++) 117 | data[i] -= f; 118 | return *this; 119 | } 120 | 121 | Vector &operator *=(Float f) { 122 | #pragma unroll 123 | for (Index i = 0; i < dim; i++) 124 | data[i] *= f; 125 | return *this; 126 | } 127 | 128 | Vector &operator /=(Float f) { 129 | #pragma unroll 130 | for (Index i = 0; i < dim; i++) 131 | data[i] /= f; 132 | return *this; 133 | } 134 | 135 | Vector operator +(const Vector &v) { 136 | Vector result; 137 | #pragma unroll 138 | for (Index i = 0; i < dim; i++) 139 | result[i] = (*this)[i] + v[i]; 140 | return result; 141 | } 142 | 143 | Vector operator -(const Vector &v) { 144 | Vector result; 145 | #pragma unroll 146 | for (Index i = 0; i < dim; i++) 147 | result[i] = (*this)[i] - v[i]; 148 | return result; 149 | } 150 | 151 | Vector operator *(const Vector &v) { 152 | Vector result; 153 | #pragma unroll 154 | for (Index i = 0; i < dim; i++) 155 | result[i] = (*this)[i] * v[i]; 156 | return result; 157 | } 158 | 159 | Vector operator /(const Vector &v) { 160 | Vector result; 161 | #pragma unroll 162 | for (Index i = 0; i < dim; i++) 163 | result[i] = (*this)[i] / v[i]; 164 | return result; 165 | } 166 | 167 | Vector operator +(Float f) { 168 | Vector result; 169 | #pragma unroll 170 | for (Index i = 0; i < dim; i++) 171 | result[i] = (*this)[i] + f; 172 | return result; 173 | } 174 | 175 | Vector operator -(Float f) { 176 | Vector result; 177 | #pragma unroll 178 | for (Index i = 0; i < dim; i++) 179 | result[i] = (*this)[i] - f; 180 | return result; 181 | } 182 | 183 | Vector operator *(Float f) { 184 | Vector result; 185 | #pragma unroll 186 | for (Index i = 0; i < dim; i++) 187 | result[i] = (*this)[i] * f; 188 | return result; 189 | } 190 | 191 | Vector operator /(Float f) { 192 | Vector result; 193 | #pragma unroll 194 | for (Index i = 0; i < dim; i++) 195 | result[i] = (*this)[i] / f; 196 | return result; 197 | } 198 | 199 | friend Vector operator +(Float f, const Vector &v) { 200 | Vector result; 201 | #pragma unroll 202 | for (Index i = 0; i < dim; i++) 203 | result[i] = v[i] + f; 204 | return result; 205 | } 206 | 207 | friend Vector operator -(Float f, const Vector &v) { 208 | Vector result; 209 | #pragma unroll 210 | for (Index i = 0; i < dim; i++) 211 | result[i] = v[i] - f; 212 | return result; 213 | } 214 | 215 | friend Vector operator *(Float f, const Vector &v) { 216 | Vector result; 217 | #pragma unroll 218 | for (Index i = 0; i < dim; i++) 219 | result[i] = v[i] * f; 220 | return result; 221 | } 222 | }; 223 | 224 | } // namespace graphvite -------------------------------------------------------------------------------- /include/core/graph.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #define FILE_OFFSET_BITS 64 22 | #include 23 | #undef FILE_OFFSET_BITS 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "util/common.h" 31 | #include "util/debug.h" 32 | 33 | namespace graphvite { 34 | 35 | /** 36 | * @brief General interface of graphs 37 | * @tparam _Index integral type of node indexes 38 | * @tparam _Attributes types of additional edge attributes 39 | * 40 | * @note To add a new graph, you need to 41 | * - derive a template graph class from GraphMixin 42 | * - implement all virtual functions for that class 43 | * - add python binding of instantiations of that class in extension.h & extension.cu 44 | */ 45 | template 46 | class GraphMixin { 47 | public: 48 | typedef _Index Index; 49 | typedef std::tuple VertexEdge; 50 | typedef std::tuple Edge; 51 | 52 | std::vector> vertex_edges; 53 | std::vector edges; 54 | std::vector vertex_weights, edge_weights; 55 | std::vector flat_offsets; 56 | 57 | Index num_vertex; 58 | size_t num_edge; 59 | 60 | #define USING_GRAPH_MIXIN(type) \ 61 | using typename type::VertexEdge; \ 62 | using typename type::Edge; \ 63 | using type::vertex_edges; \ 64 | using type::edges; \ 65 | using type::vertex_weights; \ 66 | using type::edge_weights; \ 67 | using type::num_vertex; \ 68 | using type::num_edge; \ 69 | using type::info 70 | 71 | GraphMixin() = default; 72 | GraphMixin(const GraphMixin &) = delete; 73 | GraphMixin &operator=(const GraphMixin &) = delete; 74 | 75 | /** Clear the graph and free CPU memory */ 76 | virtual void clear() { 77 | num_vertex = 0; 78 | num_edge = 0; 79 | decltype(vertex_edges)().swap(vertex_edges); 80 | decltype(edges)().swap(edges); 81 | decltype(vertex_weights)().swap(vertex_weights); 82 | decltype(edge_weights)().swap(edge_weights); 83 | decltype(flat_offsets)().swap(flat_offsets); 84 | } 85 | 86 | /** Flatten the adjacency list to an edge list */ 87 | virtual void flatten() { 88 | if (!edges.empty()) 89 | return; 90 | 91 | size_t offset = 0; 92 | flat_offsets.resize(num_vertex); 93 | for (Index u = 0; u < num_vertex; u++) { 94 | for (auto &&vertex_edge : vertex_edges[u]) { 95 | edges.push_back(std::tuple_cat(std::tie(u), vertex_edge)); 96 | edge_weights.push_back(std::get<1>(vertex_edge)); 97 | } 98 | flat_offsets[u] = offset; 99 | offset += vertex_edges[u].size(); 100 | } 101 | } 102 | 103 | virtual inline std::string name() const { 104 | std::stringstream ss; 105 | ss << "GraphMixin<" << pretty::type2name(); 106 | auto _ = {0, (ss << ", " << pretty::type2name<_Attributes>(), 0)...}; 107 | ss << ">"; 108 | return ss.str(); 109 | } 110 | 111 | virtual inline std::string graph_info() const { 112 | std::stringstream ss; 113 | ss << "#vertex: " << num_vertex << ", #edge: " << num_edge; 114 | return ss.str(); 115 | } 116 | 117 | /** Return information about the graph */ 118 | std::string info() { 119 | std::stringstream ss; 120 | ss << name() << std::endl; 121 | ss << pretty::header("Graph") << std::endl; 122 | ss << graph_info(); 123 | return ss.str(); 124 | } 125 | }; 126 | } // namespace graphvite -------------------------------------------------------------------------------- /include/instance/model/graph.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu, Shizhen Xu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include "core/optimizer.h" 22 | #include "util/gpu.cuh" 23 | 24 | namespace graphvite { 25 | 26 | /** 27 | * @brief LINE model 28 | * @tparam _Vector vector type of embeddings 29 | * 30 | * Forward: dot(vertex, context) 31 | * Backward: gradient of forward function 32 | */ 33 | template 34 | class LINE { 35 | public: 36 | static const size_t dim = _Vector::dim; 37 | typedef _Vector Vector; 38 | typedef typename _Vector::Float Float; 39 | 40 | __host__ __device__ static void forward(const Vector &vertex, const Vector &context, Float &output) { 41 | output = 0; 42 | FOR(i, dim) 43 | output += vertex[i] * context[i]; 44 | output = SUM(output); 45 | } 46 | 47 | template 48 | __host__ __device__ 49 | static void backward(Vector &vertex, Vector &context, 50 | Float gradient, const Optimizer &optimizer, Float weight = 1) { 51 | auto update = get_update_function < Float, optimizer_type>(); 52 | FOR(i, dim) { 53 | Float v = vertex[i]; 54 | Float c = context[i]; 55 | vertex[i] -= (optimizer.*update)(v, gradient * c, weight); 56 | context[i] -= (optimizer.*update)(c, gradient * v, weight); 57 | } 58 | } 59 | 60 | template 61 | __host__ __device__ 62 | static void backward(Vector &vertex, Vector &context, Vector &vertex_moment1, Vector &context_moment1, 63 | Float gradient, const Optimizer &optimizer, Float weight = 1) { 64 | auto update = get_update_function_1_moment < Float, optimizer_type>(); 65 | FOR(i, dim) { 66 | Float v = vertex[i]; 67 | Float c = context[i]; 68 | vertex[i] -= (optimizer.*update)(v, gradient * c, vertex_moment1[i], weight); 69 | context[i] -= (optimizer.*update)(c, gradient * v, context_moment1[i], weight); 70 | } 71 | } 72 | 73 | template 74 | __host__ __device__ 75 | static void backward(Vector &vertex, Vector &context, Vector &vertex_moment1, Vector &context_moment1, 76 | Vector &vertex_moment2, Vector &context_moment2, 77 | Float gradient, const Optimizer &optimizer, Float weight = 1) { 78 | auto update = get_update_function_2_moment < Float, optimizer_type>(); 79 | FOR(i, dim) { 80 | Float v = vertex[i]; 81 | Float c = context[i]; 82 | vertex[i] -= (optimizer.*update)(v, gradient * c, vertex_moment1[i], vertex_moment2[i], weight); 83 | context[i] -= (optimizer.*update)(c, gradient * v, context_moment1[i], context_moment2[i], weight); 84 | } 85 | } 86 | }; 87 | 88 | /** 89 | * @brief DeepWalk model 90 | * @tparam _Vector vector type of embeddings 91 | * 92 | * Forward: dot(vertex, context) 93 | * Backward: gradient of forward function 94 | */ 95 | template 96 | class DeepWalk : public LINE<_Vector> {}; 97 | 98 | /** 99 | * @brief node2vec model 100 | * @tparam _Vector vector type of embeddings 101 | * 102 | * Forward: dot(vertex, context) 103 | * Backward: gradient of forward function 104 | */ 105 | template 106 | class Node2Vec : public LINE<_Vector> {}; 107 | 108 | } -------------------------------------------------------------------------------- /include/instance/model/visualization.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include "core/optimizer.h" 22 | #include "util/gpu.cuh" 23 | 24 | namespace graphvite { 25 | 26 | /** 27 | * @brief LargeVis model 28 | * @tparam _Vector vector type of embeddings 29 | * 30 | * Forward: L2_norm(head - tail) ^ 2 31 | * Backward: gradient of forward function 32 | */ 33 | template 34 | class LargeVis { 35 | public: 36 | static const size_t dim = _Vector::dim; 37 | typedef _Vector Vector; 38 | typedef typename _Vector::Float Float; 39 | 40 | __host__ __device__ 41 | static void forward(const Vector &head, const Vector &tail, Float &output) { 42 | output = 0; 43 | FOR(i, dim) 44 | output += (head[i] - tail[i]) * (head[i] - tail[i]); 45 | output = SUM(output); 46 | } 47 | 48 | template 49 | __host__ __device__ 50 | static void backward(Vector &head, Vector &tail, Float gradient, const Optimizer &optimizer, Float weight = 1) { 51 | auto update = get_update_function(); 52 | FOR(i, dim) { 53 | Float h = head[i]; 54 | Float t = tail[i]; 55 | head[i] -= (optimizer.*update)(h, gradient * (h - t), weight); 56 | tail[i] -= (optimizer.*update)(t, gradient * (t - h), weight); 57 | } 58 | } 59 | 60 | template 61 | __host__ __device__ 62 | static void backward(Vector &head, Vector &tail, Vector &head_moment1, Vector &tail_moment1, 63 | Float gradient, const Optimizer &optimizer, Float weight = 1) { 64 | auto update = get_update_function_1_moment(); 65 | FOR(i, dim) { 66 | Float h = head[i]; 67 | Float t = tail[i]; 68 | head[i] -= (optimizer.*update)(h, gradient * (h - t), head_moment1[i], weight); 69 | tail[i] -= (optimizer.*update)(t, gradient * (t - h), tail_moment1[i], weight); 70 | } 71 | } 72 | 73 | template 74 | __host__ __device__ 75 | static void backward(Vector &head, Vector &tail, Vector &head_moment1, Vector &tail_moment1, 76 | Vector &head_moment2, Vector &tail_moment2, 77 | Float gradient, const Optimizer &optimizer, Float weight = 1) { 78 | auto update = get_update_function_2_moment(); 79 | FOR(i, dim) { 80 | Float h = head[i]; 81 | Float t = tail[i]; 82 | head[i] -= (optimizer.*update)(h, gradient * (h - t), head_moment1[i], head_moment2[i], weight); 83 | tail[i] -= (optimizer.*update)(t, gradient * (t - h), tail_moment1[i], tail_moment2[i], weight); 84 | } 85 | } 86 | }; 87 | 88 | } -------------------------------------------------------------------------------- /include/util/common.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include "io.h" 22 | #include "math.h" 23 | 24 | namespace graphvite { 25 | 26 | #define DEPRECATED(reason) __attribute__ ((deprecated(reason))) 27 | 28 | const float kEpsilon = 1e-15; 29 | const int kAuto = 0; 30 | const size_t kMaxLineLength = 1 << 22; 31 | 32 | constexpr size_t KiB(size_t x) { 33 | return x << 10; 34 | } 35 | 36 | constexpr size_t MiB(size_t x) { 37 | return x << 20; 38 | } 39 | 40 | constexpr size_t GiB(size_t x) { 41 | return x << 30; 42 | } 43 | 44 | } // namespace graphvite -------------------------------------------------------------------------------- /include/util/debug.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | namespace graphvite { 26 | 27 | #define CUDA_CHECK(error) CudaCheck((error), __FILE__, __LINE__) 28 | #define CURAND_CHECK(error) CurandCheck((error), __FILE__, __LINE__) 29 | 30 | inline void CudaCheck(cudaError_t error, const char *file_name, int line) { 31 | CHECK(error == cudaSuccess) 32 | << "CUDA error " << cudaGetErrorString(error) << " at " << file_name << ":" << line; 33 | } 34 | 35 | inline void CurandCheck(curandStatus_t error, const char *file_name, int line) { 36 | CHECK(error == CURAND_STATUS_SUCCESS) 37 | << "CURAND error " << error << " at " << file_name << ":" << line; 38 | } 39 | 40 | } // namespace graphvite -------------------------------------------------------------------------------- /include/util/gpu.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu, Shizhen Xu 17 | */ 18 | 19 | #pragma once 20 | 21 | namespace graphvite { 22 | 23 | // helper macros for CPU-GPU agnostic code 24 | #if __CUDA_ARCH__ 25 | 26 | #define FOR(i, stop) \ 27 | const int lane_id = threadIdx.x % gpu::kWarpSize; \ 28 | for (int i = lane_id; i < (stop); i += gpu::kWarpSize) 29 | #define SUM(x) gpu::WarpBroadcast(gpu::WarpReduce(x), 0) 30 | 31 | #else 32 | 33 | #define FOR(i, stop) \ 34 | for (int i = 0; i < stop; i++) 35 | #define SUM(x) (x) 36 | 37 | #endif 38 | 39 | namespace gpu { 40 | 41 | const int kBlockPerGrid = 8192; 42 | const int kThreadPerBlock = 512; 43 | const int kWarpSize = 32; 44 | const unsigned kFullMask = 0xFFFFFFFF; 45 | 46 | template 47 | __device__ T WarpReduce(T value) { 48 | #pragma unroll 49 | for (int delta = 1; delta < kWarpSize; delta *= 2) 50 | #if __CUDACC_VER_MAJOR__ >= 9 51 | value += __shfl_down_sync(kFullMask, value, delta); 52 | #else 53 | value += __shfl_down(value, delta); 54 | #endif 55 | return value; 56 | } 57 | 58 | template 59 | __device__ T WarpBroadcast(T value, int lane_id) { 60 | #if __CUDACC_VER_MAJOR__ >= 9 61 | return __shfl_sync(kFullMask, value, lane_id); 62 | #else 63 | return __shfl(value, lane_id); 64 | #endif 65 | } 66 | 67 | } // namespace gpu 68 | } // namespace graphvite -------------------------------------------------------------------------------- /include/util/io.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace graphvite { 25 | 26 | void init_logging(int threshold = google::INFO, std::string dir = "", bool verbose = false) { 27 | static bool initialized = false; 28 | 29 | FLAGS_minloglevel = threshold; 30 | if (dir == "") 31 | FLAGS_logtostderr = true; 32 | else 33 | FLAGS_log_dir = dir; 34 | FLAGS_log_prefix = verbose; 35 | if (!initialized) { 36 | google::InitGoogleLogging("graphvite"); 37 | initialized = true; 38 | } 39 | } 40 | 41 | namespace pretty { 42 | 43 | template 44 | std::string type2name(); 45 | 46 | template <> 47 | std::string type2name() { return "float32"; } 48 | 49 | template <> 50 | std::string type2name() { return "float64"; } 51 | 52 | template <> 53 | std::string type2name() { return "int32"; } 54 | 55 | template <> 56 | std::string type2name() { return "uint32"; } 57 | 58 | template <> 59 | std::string type2name() { return "int64"; } 60 | 61 | template <> 62 | std::string type2name() { return "uint64"; } 63 | 64 | std::string yes_no(bool x) { 65 | return x ? "yes" : "no"; 66 | } 67 | 68 | std::string size_string(size_t size) { 69 | std::stringstream ss; 70 | ss.precision(3); 71 | if (size >= 1 << 30) 72 | ss << size / float(1 << 30) << " GiB"; 73 | else if (size >= 1 << 20) 74 | ss << size / float(1 << 20) << " MiB"; 75 | else if (size >= 1 << 10) 76 | ss << size / float(1 << 10) << " KiB"; 77 | else 78 | ss << size << " B"; 79 | return ss.str(); 80 | } 81 | 82 | const size_t kLineWidth = 44; 83 | std::string begin(kLineWidth, '<'); 84 | std::string end(kLineWidth, '>'); 85 | 86 | inline std::string block(const std::string &content) { 87 | std::stringstream ss; 88 | ss << begin << std::endl; 89 | ss << content << std::endl; 90 | ss << end << std::endl; 91 | return ss.str(); 92 | } 93 | 94 | inline std::string header(const std::string &content) { 95 | std::stringstream ss; 96 | size_t padding = kLineWidth - content.length() - 2; 97 | std::string line(padding / 2, '-'); 98 | ss << line << " " << content << " " << line; 99 | if (padding % 2 == 1) 100 | ss << '-'; 101 | return ss.str(); 102 | } 103 | 104 | } // namespace pretty 105 | } // namespace graphvite -------------------------------------------------------------------------------- /include/util/math.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | 24 | namespace graphvite { 25 | 26 | #ifndef __CUDA_ARCH__ 27 | using std::abs; // the template version of abs() 28 | #endif 29 | 30 | template 31 | __host__ __device__ Float sigmoid(Float x) { 32 | return x > 0 ? 1 / (1 + exp(-x)) : exp(x) / (exp(x) + 1); 33 | } 34 | 35 | template 36 | __host__ __device__ Float safe_exp(Float x); 37 | 38 | template<> 39 | __host__ __device__ float safe_exp(float x) { 40 | static const float kLogitClip = 80; 41 | #if __CUDA_ARCH__ 42 | return exp(min(max(x, -kLogitClip), kLogitClip)); 43 | #else 44 | return std::exp(std::min(std::max(x, -kLogitClip), kLogitClip)); 45 | #endif 46 | } 47 | 48 | template<> 49 | __host__ __device__ double safe_exp(double x) { 50 | static const double kLogitClip = 700; 51 | #if __CUDA_ARCH__ 52 | return exp(min(max(x, -kLogitClip), kLogitClip)); 53 | #else 54 | return std::exp(std::min(std::max(x, -kLogitClip), kLogitClip)); 55 | #endif 56 | } 57 | 58 | template 59 | __host__ __device__ Integer bit_floor(Integer x) { 60 | static_assert(std::is_integral::value, "bit_floor() can only be invoked with integral types"); 61 | #pragma unroll 62 | for (int i = 1; i < sizeof(Integer) * 8; i *= 2) 63 | x |= x >> i; 64 | return (x + 1) >> 1; 65 | } 66 | 67 | template 68 | __host__ __device__ Integer bit_ceil(Integer x) { 69 | static_assert(std::is_integral::value, "bit_ceil() can only be invoked with integral types"); 70 | x--; 71 | #pragma unroll 72 | for (int i = 1; i < sizeof(Integer) * 8; i *= 2) 73 | x |= x >> i; 74 | return x + 1; 75 | } 76 | 77 | } // namespace graphvie -------------------------------------------------------------------------------- /include/util/time.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #pragma once 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace graphvite { 27 | 28 | #ifdef USE_TIMER 29 | class Timer { 30 | public: 31 | typedef std::chrono::system_clock::time_point time_point; 32 | typedef std::chrono::high_resolution_clock clock; 33 | 34 | static std::unordered_map occurrence; 35 | 36 | const char *prompt; 37 | int log_frequency; 38 | time_point start; 39 | 40 | Timer(const char *_prompt, int _log_frequency = 1) 41 | : prompt(_prompt), log_frequency(_log_frequency), start(clock::now()) { 42 | if (occurrence.find(prompt) == occurrence.end()) 43 | occurrence[prompt] = 0; 44 | } 45 | 46 | ~Timer() { 47 | time_point end = clock::now(); 48 | LOG_IF(INFO, ++occurrence[prompt] == 1) << prompt << ": " << (end - start).count() / 1.0e6 << " ms"; 49 | occurrence[prompt] %= log_frequency; 50 | } 51 | }; 52 | 53 | std::unordered_map Timer::occurrence; 54 | #else 55 | class Timer { 56 | public: 57 | template 58 | Timer(const Args &...args) {} 59 | }; 60 | #endif 61 | 62 | } // namespace graphvite -------------------------------------------------------------------------------- /python/graphvite/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """GraphVite: graph embedding at high speed and large scale""" 18 | from __future__ import absolute_import, unicode_literals 19 | 20 | import os 21 | import sys 22 | import imp 23 | import logging 24 | 25 | from . import util 26 | 27 | package_path = os.path.dirname(__file__) 28 | candidate_paths = [ 29 | os.path.realpath(os.path.join(package_path, "lib")), 30 | os.path.realpath(os.path.join(package_path, "../../lib")), 31 | os.path.realpath(os.path.join(package_path, "../../build/lib")) 32 | ] 33 | lib_file = imp.find_module("libgraphvite", candidate_paths)[1] 34 | lib_path = os.path.dirname(lib_file) 35 | with util.chdir(lib_path): 36 | lib = imp.load_dynamic("libgraphvite", lib_file) 37 | 38 | from libgraphvite import dtype, auto, __version__ 39 | 40 | from . import base 41 | from .base import init_logging 42 | cfg = base.load_global_config() 43 | base.init_logging(logging.INFO) 44 | 45 | from . import helper 46 | from . import graph, solver, optimizer 47 | from . import dataset 48 | 49 | module = sys.modules[__name__] 50 | module.__dict__.update(dtype.__members__) -------------------------------------------------------------------------------- /python/graphvite/application/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Application module of GraphVite""" 18 | from __future__ import absolute_import 19 | 20 | from .application import Application, \ 21 | GraphApplication, WordGraphApplication, KnowledgeGraphApplication, VisualizationApplication 22 | 23 | __all__ = [ 24 | "Application", 25 | "GraphApplication", "WordGraphApplication", "KnowledgeGraphApplication", "VisualizationApplication" 26 | ] -------------------------------------------------------------------------------- /python/graphvite/application/network.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Neural network definitions for applications""" 18 | from __future__ import absolute_import 19 | 20 | import types 21 | import numpy as np 22 | 23 | import torch 24 | from torch import nn 25 | 26 | 27 | class NodeClassifier(nn.Module): 28 | """ 29 | Node classification network for graphs 30 | """ 31 | def __init__(self, embedding, num_class, normalization=False): 32 | super(NodeClassifier, self).__init__() 33 | if normalization: 34 | embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True) 35 | embedding = torch.as_tensor(embedding) 36 | self.embeddings = nn.Embedding.from_pretrained(embedding, freeze=True) 37 | self.linear = nn.Linear(embedding.size(1), num_class, bias=True) 38 | 39 | def forward(self, indexes): 40 | x = self.embeddings(indexes) 41 | x = self.linear(x) 42 | return x 43 | 44 | 45 | class LinkPredictor(nn.Module): 46 | """ 47 | Link prediction network for graphs / knowledge graphs 48 | """ 49 | def __init__(self, score_function, *embeddings, **kwargs): 50 | super(LinkPredictor, self).__init__() 51 | if isinstance(score_function, types.FunctionType): 52 | self.score_function = score_function 53 | else: 54 | self.score_function = getattr(LinkPredictor, score_function) 55 | self.kwargs = kwargs 56 | self.embeddings = nn.ModuleList() 57 | for embedding in embeddings: 58 | embedding = torch.as_tensor(embedding) 59 | embedding = nn.Embedding.from_pretrained(embedding, freeze=True) 60 | self.embeddings.append(embedding) 61 | 62 | def forward(self, *indexes): 63 | assert len(indexes) == len(self.embeddings) 64 | vectors = [] 65 | for index, embedding in zip(indexes, self.embeddings): 66 | vectors.append(embedding(index)) 67 | return self.score_function(*vectors, **self.kwargs) 68 | 69 | @staticmethod 70 | def LINE(heads, tails): 71 | x = heads * tails 72 | score = x.sum(dim=1) 73 | return score 74 | 75 | DeepWalk = LINE 76 | 77 | @staticmethod 78 | def TransE(heads, relations, tails, margin=12): 79 | x = heads + relations - tails 80 | score = margin - x.norm(p=1, dim=1) 81 | return score 82 | 83 | @staticmethod 84 | def RotatE(heads, relations, tails, margin=12): 85 | dim = heads.size(1) // 2 86 | 87 | head_re, head_im = heads.view(-1, dim, 2).permute(2, 0, 1) 88 | tail_re, tail_im = tails.view(-1, dim, 2).permute(2, 0, 1) 89 | relations = relations[:, :dim] 90 | relation_re, relation_im = torch.cos(relations), torch.sin(relations) 91 | 92 | x_re = head_re * relation_re - head_im * relation_im - tail_re 93 | x_im = head_re * relation_im + head_im * relation_re - tail_im 94 | x = torch.stack([x_re, x_im], dim=0) 95 | score = margin - x.norm(p=2, dim=0).sum(dim=1) 96 | return score 97 | 98 | @staticmethod 99 | def DistMult(heads, relations, tails): 100 | x = heads * relations * tails 101 | score = x.sum(dim=1) 102 | return score 103 | 104 | @staticmethod 105 | def ComplEx(heads, relations, tails): 106 | dim = heads.size(1) // 2 107 | 108 | head_re, head_im = heads.view(-1, dim, 2).permute(2, 0, 1) 109 | tail_re, tail_im = tails.view(-1, dim, 2).permute(2, 0, 1) 110 | relation_re, relation_im = relations.view(-1, dim, 2).permute(2, 0, 1) 111 | 112 | x_re = head_re * relation_re - head_im * relation_im 113 | x_im = head_re * relation_im + head_im * relation_re 114 | x = x_re * tail_re + x_im * tail_im 115 | score = x.sum(dim=1) 116 | return score 117 | 118 | @staticmethod 119 | def SimplE(heads, relations, tails): 120 | dim = heads.size(1) // 2 121 | 122 | tails = tails.view(-1, dim, 2).flip(2).view(-1, dim * 2) 123 | 124 | x = heads * relations * tails 125 | score = x.sum(dim=1) 126 | return score 127 | 128 | @staticmethod 129 | def QuatE(heads, relations, tails): 130 | dim = heads.size(1) // 4 131 | 132 | head_r, head_i, head_j, head_k = heads.view(-1, dim, 4).permute(2, 0, 1) 133 | tail_r, tail_i, tail_j, tail_k = tails.view(-1, dim, 4).permute(2, 0, 1) 134 | relation_r, relation_i, relation_j, relation_k = relations.view(-1, dim, 4).permute(2, 0, 1) 135 | 136 | relation_norm = relations.view(-1, dim, 4).norm(p=2, dim=2) 137 | x_r = head_r * relation_r - head_i * relation_i - head_j * relation_j - head_k * relation_k 138 | x_i = head_r * relation_i + head_i * relation_r + head_j * relation_k - head_k * relation_j 139 | x_j = head_r * relation_j - head_i * relation_k + head_j * relation_r + head_k * relation_i 140 | x_k = head_r * relation_k + head_i * relation_j - head_j * relation_i + head_k * relation_r 141 | x = (x_r * tail_r + x_i * tail_i + x_j * tail_j + x_k * tail_k) / (relation_norm + 1e-15) 142 | score = x.sum(dim=1) 143 | return score -------------------------------------------------------------------------------- /python/graphvite/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | from __future__ import absolute_import 18 | 19 | import os 20 | import sys 21 | import yaml 22 | import logging 23 | from easydict import EasyDict 24 | 25 | from . import lib, dtype 26 | from .util import recursive_default, assert_in 27 | 28 | 29 | root = os.path.expanduser("~/.graphvite") 30 | if not os.path.exists(root): 31 | os.mkdir(root) 32 | 33 | # default config 34 | default = EasyDict() 35 | default.backend = "graphvite" 36 | default.dataset_path = os.path.join(root, "dataset") 37 | default.float_type = dtype.float32 38 | default.index_type = dtype.uint32 39 | 40 | 41 | def load_global_config(): 42 | config_file = os.path.join(root, "config.yaml") 43 | if os.path.exists(config_file): 44 | with open(config_file, "r") as fin: 45 | cfg = EasyDict(yaml.safe_load(fin)) 46 | cfg = recursive_default(cfg, default) 47 | else: 48 | cfg = default 49 | 50 | assert_in(["graphvite", "torch"], backend=cfg.backend) 51 | if not os.path.exists(cfg.dataset_path): 52 | os.mkdir(cfg.dataset_path) 53 | if isinstance(cfg.float_type, str): 54 | cfg.float_type = eval(cfg.float_type) 55 | if isinstance(cfg.index_type, str): 56 | cfg.index_type = eval(cfg.index_type) 57 | 58 | return cfg 59 | 60 | 61 | def init_logging(level=logging.INFO, dir="", verbose=False): 62 | """ 63 | Init logging. 64 | 65 | Parameters: 66 | level (int, optional): logging level, INFO, WARNING, ERROR or FATAL 67 | dir (str, optional): log directory, leave empty for standard I/O 68 | verbose (bool, optional): verbose mode 69 | """ 70 | logger = logging.getLogger(__package__) 71 | logger.level = level 72 | if dir == "": 73 | logger.handlers = [logging.StreamHandler(sys.stdout)] 74 | else: 75 | logger.handlers = [logging.FileHandler(os.path.join(dir, "log.txt"))] 76 | 77 | if level <= logging.INFO: 78 | lib.init_logging(lib.INFO, dir, verbose) 79 | elif level <= logging.WARNING: 80 | lib.init_logging(lib.WARNING, dir, verbose) 81 | elif level <= logging.ERROR: 82 | lib.init_logging(lib.ERROR, dir, verbose) 83 | else: 84 | lib.init_logging(lib.FATAL, dir, verbose) -------------------------------------------------------------------------------- /python/graphvite/graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Graph module of GraphVite""" 18 | from __future__ import absolute_import 19 | 20 | import sys 21 | 22 | from . import lib, cfg 23 | from .helper import find_all_templates, make_helper_class 24 | 25 | module = sys.modules[__name__] 26 | 27 | for name in find_all_templates(lib.graph): 28 | module.__dict__[name] = make_helper_class(lib.graph, name, module, 29 | ["index_type"], [cfg.index_type]) 30 | 31 | __all__ = [ 32 | "Graph", "WordGraph", "KnowledgeGraph", "KNNGraph" 33 | ] -------------------------------------------------------------------------------- /python/graphvite/helper.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Helper functions for loading C++ extension""" 18 | from __future__ import absolute_import, print_function 19 | 20 | import re 21 | 22 | from . import lib 23 | lib.name2dtype = {n: t for t, n in lib.dtype2name.items()} 24 | 25 | 26 | def signature(name, *args): 27 | strings = [name] 28 | for arg in args: 29 | if isinstance(arg, lib.dtype): 30 | strings.append(lib.dtype2name[arg]) 31 | else: 32 | strings.append(str(arg)) 33 | return "_".join(strings) 34 | 35 | 36 | def find_all_names(module): 37 | pattern = re.compile("[^_]+") 38 | names = [] 39 | for name in module.__dict__: 40 | if pattern.match(name): 41 | names.append(name) 42 | return names 43 | 44 | 45 | def find_all_templates(module): 46 | pattern = re.compile("([^_]+)(?:_[^_]+)+") 47 | names = set() 48 | for full_name in module.__dict__: 49 | result = pattern.match(full_name) 50 | if result: 51 | names.add(result.group(1)) 52 | return list(names) 53 | 54 | 55 | def get_any_instantiation(module, name): 56 | pattern = re.compile("%s(?:_[^_]+)+" % name) 57 | for full_name in module.__dict__: 58 | if pattern.match(full_name): 59 | return getattr(module, full_name) 60 | 61 | 62 | def get_instantiation_info(module, name, template_keys): 63 | pattern = re.compile("%s((?:_[^_]+)+)" % name) 64 | possible_parameters = [] 65 | for full_name in module.__dict__: 66 | result = pattern.match(full_name) 67 | if result: 68 | possible_parameters.append(result.group(1).split("_")[1:]) 69 | template_values = zip(*possible_parameters) 70 | 71 | infos = ["Instantiations:"] 72 | for key, values in zip(template_keys, template_values): 73 | values = list(set(values)) 74 | if values[0] in lib.name2dtype: 75 | values = [lib.name2dtype[v] for v in values] 76 | else: 77 | values = sorted(eval(v) for v in values) 78 | values = [str(v) for v in values] 79 | infos.append("- **%s**: %s" % (key, ", ".join(values))) 80 | return "\n ".join(infos) 81 | 82 | 83 | class TemplateHelper(object): 84 | 85 | def __new__(cls, *args, **kwargs): 86 | args = list(args) 87 | parameters = [] 88 | for i, key in enumerate(cls.template_keys): 89 | if args: 90 | parameters.append(args.pop(0)) 91 | elif key in kwargs: 92 | parameters.append(kwargs.pop(key)) 93 | else: 94 | value = cls.template_values[i] 95 | if value is None: 96 | raise TypeError("Required argument `%s` (pos %d) not found" % (key, i)) 97 | else: 98 | parameters.append(value) 99 | 100 | full_name = signature(cls.name, *parameters) 101 | if hasattr(cls.module, full_name): 102 | return getattr(cls.module, full_name)(*args, **kwargs) 103 | else: 104 | strings = ["%s=%s" % (k, v) for k, v in zip(cls.template_keys, parameters)] 105 | raise AttributeError("Can't find an instantiation of %s with %s" % (cls.name, ", ".join(strings))) 106 | 107 | 108 | def make_helper_class(module, name, target_module, template_keys, template_values): 109 | InstanceClass = get_any_instantiation(module, name) 110 | # copy all members so that autodoc can work 111 | members = dict(InstanceClass.__dict__) 112 | # add instantiation info to docstring 113 | doc = InstanceClass.__doc__ 114 | indent = re.search("\n *", doc).group(0) 115 | info = "\n" + get_instantiation_info(module, name, template_keys) 116 | doc += info.replace("\n", indent) 117 | members.update({ 118 | "module": module, 119 | "name": name, 120 | "__module__": target_module.__name__, 121 | "__doc__": doc, 122 | "template_keys": template_keys, 123 | "template_values": template_values 124 | }) 125 | TemplateClass = type(name, (TemplateHelper,), members) 126 | return TemplateClass -------------------------------------------------------------------------------- /python/graphvite/optimizer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Optimizer module of GraphVite""" 18 | from __future__ import absolute_import 19 | 20 | import sys 21 | 22 | from . import lib, auto 23 | from .helper import find_all_names 24 | 25 | module = sys.modules[__name__] 26 | 27 | 28 | class Optimizer(object): 29 | """ 30 | Optimizer(type=auto, *args, **kwargs) 31 | Create an optimizer instance of any type. 32 | 33 | Parameters: 34 | type (str or auto): optimizer type, 35 | can be 'SGD', 'Momentum', 'AdaGrad', 'RMSprop' or 'Adam' 36 | """ 37 | def __new__(cls, type=auto, *args, **kwargs): 38 | if type == auto: 39 | return lib.optimizer.Optimizer(auto) 40 | elif hasattr(lib.optimizer, type): 41 | return getattr(lib.optimizer, type)(*args, **kwargs) 42 | else: 43 | raise ValueError("Unknown optimizer `%s`" % type) 44 | 45 | 46 | for name in find_all_names(lib.optimizer): 47 | if name not in module.__dict__: 48 | Class = getattr(lib.optimizer, name) 49 | # transfer module ownership so that autodoc can work 50 | Class.__module__ = Class.__module__.replace("libgraphvite", "graphvite") 51 | module.__dict__[name] = Class 52 | 53 | __all__ = [ 54 | "Optimizer", 55 | "LRSchedule", 56 | "SGD", "Momentum", "AdaGrad", "RMSprop", "Adam" 57 | ] -------------------------------------------------------------------------------- /python/graphvite/solver.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Solver module of GraphVite""" 18 | from __future__ import absolute_import 19 | 20 | import sys 21 | 22 | from . import lib, cfg 23 | from .helper import find_all_templates, make_helper_class 24 | 25 | module = sys.modules[__name__] 26 | 27 | for name in find_all_templates(lib.solver): 28 | module.__dict__[name] = make_helper_class(lib.solver, name, module, 29 | ["dim", "float_type", "index_type"], 30 | [None, cfg.float_type, cfg.index_type]) 31 | 32 | __all__ = [ 33 | "GraphSolver", "KnowledgeGraphSolver", "VisualizationSolver" 34 | ] -------------------------------------------------------------------------------- /python/graphvite/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | """Util module of GraphVite""" 18 | from __future__ import print_function, absolute_import 19 | 20 | import os 21 | import sys 22 | import logging 23 | import tempfile 24 | from time import time 25 | from functools import wraps 26 | 27 | import numpy as np 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | def recursive_default(obj, default): 33 | if isinstance(default, dict): 34 | new_obj = {} 35 | for key in default: 36 | if key in obj: 37 | new_obj[key] = recursive_default(obj[key], default[key]) 38 | else: 39 | new_obj[key] = default[key] 40 | return type(default)(new_obj) 41 | else: 42 | return obj 43 | 44 | 45 | def recursive_map(obj, function): 46 | if isinstance(obj, dict): 47 | return type(obj)({k: recursive_map(v, function) for k, v in obj.items()}) 48 | elif isinstance(obj, list): 49 | return type(obj)([recursive_map(x, function) for x in obj]) 50 | else: 51 | return function(obj) 52 | 53 | 54 | def assert_in(candidates, **kwargs): 55 | 56 | def readable_list(iterable): 57 | iterable = ["`%s`" % x for x in iterable] 58 | s = ", ".join(iterable[:-1]) 59 | if s: 60 | s += " and " 61 | s += iterable[-1] 62 | return s 63 | 64 | for key, value in kwargs.items(): 65 | assert value in candidates, \ 66 | "Unknown %s `%s`, candidates are %s" % (key, value, readable_list(candidates)) 67 | 68 | 69 | class chdir(object): 70 | """ 71 | Context manager for working directory. 72 | 73 | Parameters: 74 | dir (str): new working directory 75 | """ 76 | def __init__(self, dir): 77 | self.dir = dir 78 | 79 | def __enter__(self): 80 | self.old_dir = os.getcwd() 81 | os.chdir(self.dir) 82 | 83 | def __exit__(self, *args): 84 | os.chdir(self.old_dir) 85 | 86 | 87 | class SharedNDArray(np.memmap): 88 | """ 89 | Shared numpy ndarray with serialization interface. 90 | This class can be used as a drop-in replacement for arguments in multiprocessing. 91 | 92 | Parameters: 93 | array (array-like): input data 94 | """ 95 | def __new__(cls, array): 96 | if "linux" not in sys.platform: 97 | raise EnvironmentError("SharedNDArray only works on Linux") 98 | 99 | array = np.asarray(array) 100 | file = tempfile.NamedTemporaryFile() 101 | self = super(SharedNDArray, cls).__new__(cls, file, dtype=array.dtype, shape=array.shape) 102 | # keep reference to the tmp file, otherwise it will be released 103 | self.file = file 104 | self[:] = array 105 | return self 106 | 107 | @classmethod 108 | def from_memmap(cls, *args, **kwargs): 109 | return super(SharedNDArray, cls).__new__(cls, *args, **kwargs) 110 | 111 | def __reduce__(self): 112 | order = "C" if self.flags["C_CONTIGUOUS"] else "F" 113 | return self.__class__.from_memmap, (self.filename, self.dtype, self.mode, self.offset, self.shape, order) 114 | 115 | def __array_wrap__(self, arr, context=None): 116 | arr = super(np.memmap, self).__array_wrap__(arr, context) 117 | 118 | if self is arr or type(self) is not SharedNDArray: 119 | return arr 120 | if arr.shape == (): 121 | return arr[()] 122 | 123 | return arr.view(np.ndarray) 124 | 125 | 126 | class Monitor(object): 127 | """ 128 | Function call monitor. 129 | 130 | Parameters: 131 | name_style (str): style of displayed function name, 132 | can be `full`, `class` or `func` 133 | """ 134 | 135 | def __init__(self, name_style="class"): 136 | assert name_style in ["full", "class", "func"] 137 | self.name_style = name_style 138 | 139 | def get_name(self, function, instance): 140 | is_method = function.__code__.co_argcount > 0 and function.__code__.co_varnames[0] == "self" 141 | if self.name_style == "func" or not is_method: 142 | return "%s" % function.__name__ 143 | if self.name_style == "class": 144 | return "%s.%s" % (instance.__class__.__name__, function.__name__) 145 | if self.name_style == "full": 146 | return "%s.%s.%s" % (instance.__module__, instance.__class__.__name__, function.__name__) 147 | 148 | def time(self, function): 149 | """ 150 | Monitor the run time of a function. 151 | 152 | Parameters: 153 | function (function): function to monitor 154 | 155 | Returns: 156 | function: wrapped function 157 | """ 158 | @wraps(function) 159 | def wrapper(*args, **kwargs): 160 | name = self.get_name(function, args[0]) 161 | start = time() 162 | result = function(*args, **kwargs) 163 | end = time() 164 | logger.info("[time] %s: %g s" % (name, end - start)) 165 | return result 166 | 167 | return wrapper 168 | 169 | def call(self, function): 170 | """ 171 | Monitor the arguments of a function. 172 | 173 | Parameters: 174 | function (function): function to monitor 175 | 176 | Returns: 177 | function: wrapped function 178 | """ 179 | @wraps(function) 180 | def wrapper(*args, **kwargs): 181 | name = self.get_name(function, args[0]) 182 | strings = ["%s" % repr(arg) for arg in args] 183 | strings += ["%s=%s" % (k, repr(v)) for k, v in kwargs.items()] 184 | logger.info("[call] %s(%s)" % (name, ", ".join(strings))) 185 | return function(*args, **kwargs) 186 | 187 | return wrapper 188 | 189 | def result(self, function): 190 | """ 191 | Monitor the return value of a function. 192 | 193 | Parameters: 194 | function (function): function to monitor 195 | 196 | Returns: 197 | function: wrapped function 198 | """ 199 | @wraps(function) 200 | def wrapper(*args, **kwargs): 201 | name = self.get_name(function, args[0]) 202 | strings = ["%s" % repr(arg) for arg in args] 203 | strings += ["%s=%s" % (k, repr(v)) for k, v in kwargs.items()] 204 | result = function(*args, **kwargs) 205 | logger.info("[result] %s(%s) = %s" % (name, ", ".join(strings), result)) 206 | return result 207 | 208 | return wrapper 209 | 210 | 211 | monitor = Monitor() -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 MilaGraph. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # Author: Zhaocheng Zhu 16 | 17 | from __future__ import print_function, absolute_import 18 | 19 | import os 20 | from setuptools import setup, find_packages 21 | 22 | from graphvite import __version__, lib_path, lib_file 23 | 24 | name = "graphvite" 25 | faiss_file = os.path.join(lib_path, "libfaiss.so") 26 | project_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 27 | 28 | # library files 29 | install_path = os.path.join(name, "lib") 30 | data_files = [(install_path, [lib_file, faiss_file])] 31 | # configuration files 32 | for path, dirs, files in os.walk(os.path.join(project_path, "config")): 33 | install_path = os.path.join(name, os.path.relpath(path, project_path)) 34 | files = [os.path.join(path, file) for file in files] 35 | data_files.append((install_path, files)) 36 | 37 | setup( 38 | name=name, 39 | version=__version__, 40 | description="A general and high-performance graph embedding system for various applications", 41 | packages=find_packages(), 42 | data_files=data_files, 43 | entry_points={"console_scripts": ["graphvite = graphvite.cmd:main"]}, 44 | zip_safe=False, 45 | #install_requires=["numpy", "pyyaml", "easydict", "six", "future"], 46 | #extras_requires={"app": ["imageio", "psutil", "scipy", "matplotlib", "torch", "torchvision", "nltk"]} 47 | ) -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (WIN32) 2 | add_library(graphvite graphvite.cu) 3 | else () 4 | add_library(graphvite SHARED graphvite.cu) 5 | set_target_properties(graphvite PROPERTIES 6 | CXX_VISIBILITY_PRESET "hidden" 7 | CUDA_VISIBILITY_PRESET "hidden" 8 | LINK_FLAGS "-flto -Wl,-rpath=$ORIGIN" 9 | OUTPUT_NAME graphvite) 10 | 11 | target_link_libraries(graphvite pthread curand glog.so) 12 | if (FAISS_LIBRARY) 13 | target_link_libraries(graphvite faiss.so) 14 | endif() 15 | target_compile_options(graphvite PRIVATE "-Xcompiler=-fno-fat-lto-objects") # -flto 16 | endif () -------------------------------------------------------------------------------- /src/graphvite.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019 MilaGraph. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * @author Zhaocheng Zhu 17 | */ 18 | 19 | #include "util/common.h" 20 | 21 | //#define USE_TIMER 22 | //#define PINNED_MEMORY 23 | 24 | #include "bind.h" 25 | 26 | const std::string version = "0.2.2"; 27 | 28 | PYBIND11_MODULE(libgraphvite, module) { 29 | py::options options; 30 | options.disable_function_signatures(); 31 | 32 | // optimizers 33 | auto optimizer = module.def_submodule("optimizer"); 34 | pyLRSchedule(optimizer, "LRSchedule"); 35 | pyOptimizer(optimizer, "Optimizer"); 36 | pySGD(optimizer, "SGD"); 37 | pyMomentum(optimizer, "Momentum"); 38 | pyAdaGrad(optimizer, "AdaGrad"); 39 | pyRMSprop(optimizer, "RMSprop"); 40 | pyAdam(optimizer, "Adam"); 41 | 42 | // graphs 43 | auto graph = module.def_submodule("graph"); 44 | pyGraph(graph, "Graph"); 45 | pyWordGraph(graph, "WordGraph"); 46 | pyKnowledgeGraph(graph, "KnowledgeGraph"); 47 | pyKNNGraph(graph, "KNNGraph"); 48 | 49 | // solvers 50 | auto solver = module.def_submodule("solver"); 51 | 52 | pyGraphSolver<128, float, unsigned int>(solver, "GraphSolver"); 53 | #ifndef FAST_COMPILE 54 | pyGraphSolver<32, float, unsigned int>(solver, "GraphSolver"); 55 | pyGraphSolver<64, float, unsigned int>(solver, "GraphSolver"); 56 | pyGraphSolver<96, float, unsigned int>(solver, "GraphSolver"); 57 | pyGraphSolver<256, float, unsigned int>(solver, "GraphSolver"); 58 | pyGraphSolver<512, float, unsigned int>(solver, "GraphSolver"); 59 | #endif 60 | 61 | pyKnowledgeGraphSolver<512, float, unsigned int>(solver, "KnowledgeGraphSolver"); 62 | pyKnowledgeGraphSolver<1024, float, unsigned int>(solver, "KnowledgeGraphSolver"); 63 | pyKnowledgeGraphSolver<2048, float, unsigned int>(solver, "KnowledgeGraphSolver"); 64 | #ifndef FAST_COMPILE 65 | pyKnowledgeGraphSolver<32, float, unsigned int>(solver, "KnowledgeGraphSolver"); 66 | pyKnowledgeGraphSolver<64, float, unsigned int>(solver, "KnowledgeGraphSolver"); 67 | pyKnowledgeGraphSolver<96, float, unsigned int>(solver, "KnowledgeGraphSolver"); 68 | pyKnowledgeGraphSolver<128, float, unsigned int>(solver, "KnowledgeGraphSolver"); 69 | pyKnowledgeGraphSolver<256, float, unsigned int>(solver, "KnowledgeGraphSolver"); 70 | #endif 71 | 72 | pyVisualizationSolver<2, float, unsigned int>(solver, "VisualizationSolver"); 73 | #ifndef FAST_COMPILE 74 | pyVisualizationSolver<3, float, unsigned int>(solver, "VisualizationSolver"); 75 | #endif 76 | 77 | // interface 78 | py::enum_ pyDType(module, "dtype"); 79 | pyDType.value("uint32", DType::uint32) 80 | .value("uint64", DType::uint64) 81 | .value("float32", DType::float32) 82 | .value("float64", DType::float64); 83 | module.attr("dtype2name") = dtype2name; 84 | 85 | // glog 86 | module.def("init_logging", graphvite::init_logging, py::no_gil(), 87 | py::arg("threshhold") = google::INFO, py::arg("dir") = "", py::arg("verbose") = false); 88 | module.attr("INFO") = google::INFO; 89 | module.attr("WARNING") = google::WARNING; 90 | module.attr("ERROR") = google::ERROR; 91 | module.attr("FATAL") = google::FATAL; 92 | 93 | // io 94 | auto io = module.def_submodule("io"); 95 | io.def("size_string", graphvite::pretty::size_string, py::no_gil(), py::arg("size")); 96 | io.def("yes_no", graphvite::pretty::yes_no, py::no_gil(), py::arg("x")); 97 | io.def("block", graphvite::pretty::block, py::no_gil(), py::arg("content")); 98 | io.def("header", graphvite::pretty::header, py::no_gil(), py::arg("content")); 99 | 100 | module.attr("auto") = graphvite::kAuto; 101 | module.def("KiB", graphvite::KiB, py::no_gil(), py::arg("size")); 102 | module.def("MiB", graphvite::MiB, py::no_gil(), py::arg("size")); 103 | module.def("GiB", graphvite::GiB, py::no_gil(), py::arg("size")); 104 | 105 | module.attr("__version__") = version; 106 | } --------------------------------------------------------------------------------