├── .gitignore
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE
├── README.md
├── asset
    ├── graph.png
    ├── knowledge_graph.png
    ├── logo
    │   ├── favicon.ico
    │   └── logo.png
    ├── visualization.png
    └── visualization
    │   ├── imagenet_hierarchy.gif
    │   └── mnist_3d.gif
├── cmake
    ├── FindGFlags.cmake
    ├── FindGlog.cmake
    └── FindPythonLibsNew.cmake
├── conda
    ├── conda_build_config.yaml
    ├── graphvite-mini
    │   ├── build.sh
    │   └── meta.yaml
    ├── graphvite
    │   ├── build.sh
    │   └── meta.yaml
    └── requirements.txt
├── config
    ├── demo
    │   ├── math.yaml
    │   └── quick_start.yaml
    ├── graph
    │   ├── deepwalk_flickr.yaml
    │   ├── deepwalk_friendster-small.yaml
    │   ├── deepwalk_friendster.yaml
    │   ├── deepwalk_hyperlink-pld.yaml
    │   ├── deepwalk_youtube.yaml
    │   ├── line_flickr.yaml
    │   ├── line_friendster-small.yaml
    │   ├── line_friendster.yaml
    │   ├── line_hyperlink-pld.yaml
    │   ├── line_youtube.yaml
    │   └── node2vec_youtube.yaml
    ├── knowledge_graph
    │   ├── complex_fb15k-237.yaml
    │   ├── complex_fb15k.yaml
    │   ├── complex_wikidata5m.yaml
    │   ├── complex_wn18.yaml
    │   ├── complex_wn18rr.yaml
    │   ├── distmult_fb15k-237.yaml
    │   ├── distmult_fb15k.yaml
    │   ├── distmult_wikidata5m.yaml
    │   ├── distmult_wn18.yaml
    │   ├── distmult_wn18rr.yaml
    │   ├── quate_fb15k-237.yaml
    │   ├── quate_fb15k.yaml
    │   ├── quate_wikidata5m.yaml
    │   ├── quate_wn18.yaml
    │   ├── quate_wn18rr.yaml
    │   ├── rotate_fb15k-237.yaml
    │   ├── rotate_fb15k.yaml
    │   ├── rotate_wikidata5m.yaml
    │   ├── rotate_wn18.yaml
    │   ├── rotate_wn18rr.yaml
    │   ├── simple_fb15k-237.yaml
    │   ├── simple_fb15k.yaml
    │   ├── simple_wikidata5m.yaml
    │   ├── simple_wn18.yaml
    │   ├── simple_wn18rr.yaml
    │   ├── transe_fb15k-237.yaml
    │   ├── transe_fb15k.yaml
    │   ├── transe_wikidata5m.yaml
    │   ├── transe_wn18.yaml
    │   └── transe_wn18rr.yaml
    ├── template
    │   ├── graph.yaml
    │   ├── knowledge_graph.yaml
    │   ├── visualization.yaml
    │   └── word_graph.yaml
    ├── visualization
    │   ├── largevis_imagenet.yaml
    │   ├── largevis_mnist_2d.yaml
    │   └── largevis_mnist_3d.yaml
    └── word_graph
    │   └── line_wikipedia.yaml
├── doc
    ├── Makefile
    └── source
    │   ├── api
    │       ├── application.rst
    │       ├── dataset.rst
    │       ├── graph.rst
    │       ├── optimizer.rst
    │       └── solver.rst
    │   ├── benchmark.rst
    │   ├── conf.py
    │   ├── developer
    │       ├── framework.rst
    │       ├── model.rst
    │       ├── routine.rst
    │       └── solver.rst
    │   ├── faq.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── introduction.rst
    │   ├── link.rst
    │   ├── overview.rst
    │   ├── pretrained_model.rst
    │   ├── quick_start.rst
    │   └── user
    │       ├── auto.rst
    │       ├── command_line.rst
    │       ├── configuration.rst
    │       ├── format.rst
    │       └── python.rst
├── external
    └── .gitignore
├── include
    ├── base
    │   ├── alias_table.cuh
    │   ├── memory.h
    │   └── vector.h
    ├── bind.h
    ├── core
    │   ├── graph.h
    │   ├── optimizer.h
    │   └── solver.h
    ├── instance
    │   ├── gpu
    │   │   ├── graph.cuh
    │   │   ├── knowledge_graph.cuh
    │   │   └── visualization.cuh
    │   ├── graph.cuh
    │   ├── knowledge_graph.cuh
    │   ├── model
    │   │   ├── graph.h
    │   │   ├── knowledge_graph.h
    │   │   └── visualization.h
    │   ├── visualization.cuh
    │   └── word_graph.cuh
    └── util
    │   ├── common.h
    │   ├── debug.h
    │   ├── gpu.cuh
    │   ├── io.h
    │   ├── math.h
    │   └── time.h
├── python
    ├── graphvite
    │   ├── __init__.py
    │   ├── application
    │   │   ├── __init__.py
    │   │   ├── application.py
    │   │   └── network.py
    │   ├── base.py
    │   ├── cmd.py
    │   ├── dataset.py
    │   ├── graph.py
    │   ├── helper.py
    │   ├── optimizer.py
    │   ├── solver.py
    │   └── util.py
    └── setup.py
└── src
    ├── CMakeLists.txt
    └── graphvite.cu


/.gitignore:
--------------------------------------------------------------------------------
 1 | # build
 2 | /build/
 3 | /doc/build
 4 | 
 5 | # cmake
 6 | /Makefile
 7 | /src/Makefile
 8 | /cmake-build-*/
 9 | 
10 | # JetBrains
11 | /.idea


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | Change log
 2 | ==========
 3 | 
 4 | Here list all notable changes in GraphVite library.
 5 | 
 6 | v0.2.2 - 2020-03-11
 7 | -------------------
 8 | - New model QuatE and its benchmarks on 5 knowledge graph datasets.
 9 | - Add an option to skip `faiss` in compilation.
10 | - Fix instructions for conda installation.
11 | 
12 | v0.2.1 - 2019-11-12
13 | -------------------
14 | - New dataset `Wikidata5m` and its benchmarks,
15 |   including TransE, DistMult, ComplEx, SimplE and RotatE.
16 | - Add interface for loading pretrained models and save hyperparameters.
17 | - Add weight clip in asynchronous self-adversarial negative sampling.
18 | 
19 | v0.2.0 - 2019-10-11
20 | -------------------
21 | - Add scalable multi-GPU prediction for node embedding and knowledge graph embedding.
22 |   Evaluation on link prediction is 4.6x faster than v0.1.0.
23 | - New demo dataset `math` and entity prediction evaluation for knowledge graph.
24 | - Support Kepler and Turing GPU architectures.
25 | - Automatically choose the best episode size with regrad to RAM limit.
26 | - Add template config files for applications.
27 | - Change the update of global embeddings from average to accumulation. Fix a serious
28 |   numeric problem in the update.
29 | - Move file format settings from graph to application. Now one can customize formats
30 |   and use comments in evaluation files. Add document for data format.
31 | - Separate GPU implementation into training routines and models. Routines are in
32 |   `include/instance/gpu/*` and models are in `include/instance/model/*`.
33 | 
34 | v0.1.0 - 2019-08-05
35 | -------------------
36 | - Multi-GPU training of large-scale graph embedding 
37 | - 3 applications: node embedding, knowledge graph embedding and graph &
38 |   high-dimensional data visualization
39 | - Node embedding
40 |     - Model: DeepWalk, LINE, node2vec
41 |     - Evaluation: node classification, link prediction
42 | - Knowledge graph embedding
43 |     - Model: TransE, DistMult, ComplEx, SimplE, RotatE
44 |     - Evaluation: link prediction
45 | - Graph & High-dimensional data visualization
46 |     - Model: LargeVis
47 |     - Evaluation: visualization(2D / 3D), animation(3D), hierarchy(2D)


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | if (WIN32 OR APPLE)
  2 |     set(SKIP_TOOLCHAIN TRUE)
  3 | endif()
  4 | 
  5 | if (SKIP_TOOLCHAIN)
  6 |     # skip toolchain so that code insight can work properly
  7 |     cmake_minimum_required(VERSION 3.0)
  8 |     project(graphvite LANGUAGES CXX)
  9 |     include_directories(include)
 10 |     add_subdirectory(src)
 11 |     return()
 12 | endif()
 13 | 
 14 | cmake_minimum_required(VERSION 3.12)
 15 | project(graphvite LANGUAGES CXX CUDA)
 16 | 
 17 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 18 | include(FindCUDA)
 19 | 
 20 | find_package(Glog)
 21 | find_package(GFlags)
 22 | find_package(PythonLibsNew REQUIRED)
 23 | find_package(pybind11 REQUIRED)
 24 | 
 25 | # CUDA_ROOT & CUDA_ARCH
 26 | get_filename_component(CUDA_ROOT ${CMAKE_CUDA_COMPILER} DIRECTORY)
 27 | get_filename_component(CUDA_ROOT ${CUDA_ROOT} DIRECTORY)
 28 | if (NOT ALL_ARCH)
 29 |     cuda_select_nvcc_arch_flags(CUDA_ARCH Auto)
 30 | else()
 31 |     cuda_select_nvcc_arch_flags(CUDA_ARCH 3.5 5.0 6.0 7.0)
 32 | endif()
 33 | string(REPLACE ";" " " CUDA_ARCH "${CUDA_ARCH}")
 34 | 
 35 | # add externals
 36 | include(ExternalProject)
 37 | set(EXTERNAL_DIR ${PROJECT_SOURCE_DIR}/external)
 38 | 
 39 | if (NOT UPDATE)
 40 |     set(SKIP_UPDATE TRUE)
 41 | else()
 42 |     set(SKIP_UPDATE FALSE)
 43 | endif()
 44 | 
 45 | # glog
 46 | if (NOT GLOG_FOUND)
 47 |     message(WARNING "Can't find glog library. It will be installed from git repository.")
 48 |     ExternalProject_Add(glog
 49 |             GIT_REPOSITORY      https://github.com/google/glog.git
 50 |             UPDATE_DISCONNECTED ${SKIP_UPDATE}
 51 |             PREFIX              glog
 52 |             SOURCE_DIR          ${EXTERNAL_DIR}/glog
 53 |             BINARY_DIR          ${PROJECT_BINARY_DIR}/glog
 54 |             INSTALL_DIR         ${PROJECT_BINARY_DIR}/glog
 55 |             CONFIGURE_COMMAND   test -e Makefile && exit ||
 56 |                                 cd ${EXTERNAL_DIR}/glog && ./autogen.sh && cd - &&
 57 |                                 ${EXTERNAL_DIR}/glog/configure
 58 |             BUILD_COMMAND       test -e .libs/libglog.a || make -j
 59 |             INSTALL_COMMAND     test -e include/glog/logging.h || make install prefix=${PROJECT_BINARY_DIR}/glog)
 60 |     include_directories(${PROJECT_BINARY_DIR}/glog/include)
 61 |     link_directories(${PROJECT_BINARY_DIR}/glog/.libs)
 62 | else()
 63 |     get_filename_component(GLOG_LIBRARY_DIR ${GLOG_LIBRARY} DIRECTORY)
 64 |     include_directories(${GLOG_INCLUDE_DIR})
 65 |     link_directories(${GLOG_LIBRARY_DIR})
 66 | endif ()
 67 | 
 68 | # gflags
 69 | if (NOT GFLAGS_FOUND)
 70 |     message(WARNING "Can't find gflags library. It will be installed from git repository.")
 71 |     ExternalProject_Add(gflags
 72 |             GIT_REPOSITORY      https://github.com/gflags/gflags
 73 |             UPDATE_DISCONNECTED ${SKIP_UPDATE}
 74 |             PREFIX              gflags
 75 |             SOURCE_DIR          ${EXTERNAL_DIR}/gflags
 76 |             BINARY_DIR          ${PROJECT_BINARY_DIR}/gflags
 77 |             INSTALL_DIR         ${PROJECT_BINARY_DIR}/gflags
 78 |             CONFIGURE_COMMAND   test -e Makefile ||
 79 |                                 cmake ${EXTERNAL_DIR}/gflags -DCMAKE_INSTALL_PREFIX=${PROJECT_BINARY_DIR}/gflags
 80 |             BUILD_COMMAND       test -e lib/libgflags.a || make -j
 81 |             INSTALL_COMMAND     test -e include/gflags/gflags.h || make install)
 82 |     include_directories(${PROJECT_BINARY_DIR}/gflags/include)
 83 |     link_directories(${PROJECT_BINARY_DIR}/gflags/lib)
 84 | else()
 85 |     get_filename_component(GFLAGS_LIBRARY_DIR ${GFLAGS_LIBRARY} DIRECTORY)
 86 |     include_directories(${GFLAGS_INCLUDE_DIR})
 87 |     link_directories(${GFLAGS_LIBRARY_DIR})
 88 | endif()
 89 | 
 90 | # faiss
 91 | if (NOT FAISS_PATH)
 92 |     if (NOT NO_FAISS)
 93 |         ExternalProject_Add(faiss
 94 |                 GIT_REPOSITORY      https://github.com/facebookresearch/faiss.git
 95 |                 UPDATE_DISCONNECTED ${SKIP_UPDATE}
 96 |                 PREFIX              faiss
 97 |                 SOURCE_DIR          ${EXTERNAL_DIR}/faiss
 98 |                 BINARY_DIR          ${EXTERNAL_DIR}/faiss
 99 |                 INSTALL_DIR         ${PROJECT_BINARY_DIR}/faiss
100 |                 CONFIGURE_COMMAND   test -e makefile.inc ||
101 |                                     ${EXTERNAL_DIR}/faiss/configure --with-cuda=${CUDA_ROOT} --with-cuda-arch=${CUDA_ARCH}
102 |                 BUILD_COMMAND       test -e libfaiss.so || make -j
103 |                 INSTALL_COMMAND     test -e ${PROJECT_BINARY_DIR}/faiss/include/faiss/gpu/GpuIndexFlat.h ||
104 |                                     make install prefix=${PROJECT_BINARY_DIR}/faiss)
105 |         include_directories(${PROJECT_BINARY_DIR}/faiss/include)
106 |         link_directories(${PROJECT_BINARY_DIR}/faiss/lib)
107 |         set(FAISS_LIBRARY ${PROJECT_BINARY_DIR}/faiss/lib/libfaiss.so)
108 |     endif()
109 | else()
110 |     get_filename_component(FAISS_PARENT ${FAISS_PATH} DIRECTORY)
111 |     include_directories(${FAISS_PARENT})
112 |     link_directories(${FAISS_PATH})
113 |     set(FAISS_LIBRARY ${FAISS_PATH}/libfaiss.so)
114 | endif()
115 | 
116 | if (NOT CMAKE_BUILD_TYPE)
117 |     set(CMAKE_BUILD_TYPE Release)
118 | endif()
119 | 
120 | # compilation flags
121 | set(CMAKE_CXX_FLAGS "-std=c++11 -Wall")
122 | set(CMAKE_CXX_FLAGS_DEBUG "-g -Og")
123 | set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -D NDEBUG")
124 | 
125 | set(CMAKE_CUDA_FLAGS "-std=c++11")
126 | set(CMAKE_CUDA_FLAGS_DEBUG "-g -G")
127 | set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
128 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH}")
129 | 
130 | foreach (MACRO FAST_COMPILE NO_FAISS)
131 |     if (${${MACRO}})
132 |         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D${MACRO}")
133 |         set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D${MACRO}")
134 |     endif()
135 | endforeach()
136 | 
137 | # output directory
138 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
139 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
140 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
141 | # symbolic link to faiss
142 | file(MAKE_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
143 | execute_process(COMMAND ln -sf ${FAISS_LIBRARY} ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libfaiss.so)
144 | 
145 | # project directories
146 | include_directories(${PYTHON_INCLUDE_DIRS})
147 | include_directories(include)
148 | add_subdirectory(src)


--------------------------------------------------------------------------------
/asset/graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/graph.png


--------------------------------------------------------------------------------
/asset/knowledge_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/knowledge_graph.png


--------------------------------------------------------------------------------
/asset/logo/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/logo/favicon.ico


--------------------------------------------------------------------------------
/asset/logo/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/logo/logo.png


--------------------------------------------------------------------------------
/asset/visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/visualization.png


--------------------------------------------------------------------------------
/asset/visualization/imagenet_hierarchy.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/visualization/imagenet_hierarchy.gif


--------------------------------------------------------------------------------
/asset/visualization/mnist_3d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeepGraphLearning/graphvite/5aad6dcc5e6318396a13d1c6a05f44a833b9c3bc/asset/visualization/mnist_3d.gif


--------------------------------------------------------------------------------
/cmake/FindGFlags.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find GFLAGS
 2 | #
 3 | # The following variables are optionally searched for defaults
 4 | #  GFLAGS_ROOT_DIR:            Base directory where all GFLAGS components are found
 5 | #
 6 | # The following are set after configuration is done:
 7 | #  GFLAGS_FOUND
 8 | #  GFLAGS_INCLUDE_DIRS
 9 | #  GFLAGS_LIBRARIES
10 | #  GFLAGS_LIBRARYRARY_DIRS
11 | 
12 | include(FindPackageHandleStandardArgs)
13 | 
14 | set(GFLAGS_ROOT_DIR "" CACHE PATH "Folder contains Gflags")
15 | 
16 | # We are testing only a couple of files in the include directories
17 | if(WIN32)
18 |     find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
19 |         PATHS ${GFLAGS_ROOT_DIR}/src/windows)
20 | else()
21 |     find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
22 |         PATHS ${GFLAGS_ROOT_DIR})
23 | endif()
24 | 
25 | if(MSVC)
26 |     find_library(GFLAGS_LIBRARY_RELEASE
27 |         NAMES libgflags
28 |         PATHS ${GFLAGS_ROOT_DIR}
29 |         PATH_SUFFIXES Release)
30 | 
31 |     find_library(GFLAGS_LIBRARY_DEBUG
32 |         NAMES libgflags-debug
33 |         PATHS ${GFLAGS_ROOT_DIR}
34 |         PATH_SUFFIXES Debug)
35 | 
36 |     set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug ${GFLAGS_LIBRARY_DEBUG})
37 | else()
38 |     find_library(GFLAGS_LIBRARY gflags)
39 | endif()
40 | 
41 | find_package_handle_standard_args(GFlags DEFAULT_MSG GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY)
42 | 
43 | 
44 | if(GFLAGS_FOUND)
45 |     set(GFLAGS_INCLUDE_DIRS ${GFLAGS_INCLUDE_DIR})
46 |     set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY})
47 |     message(STATUS "Found gflags  (include: ${GFLAGS_INCLUDE_DIR}, library: ${GFLAGS_LIBRARY})")
48 |     mark_as_advanced(GFLAGS_LIBRARY_DEBUG GFLAGS_LIBRARY_RELEASE
49 |                      GFLAGS_LIBRARY GFLAGS_INCLUDE_DIR GFLAGS_ROOT_DIR)
50 | endif()


--------------------------------------------------------------------------------
/cmake/FindGlog.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Glog
 2 | #
 3 | # The following variables are optionally searched for defaults
 4 | #  GLOG_ROOT_DIR:            Base directory where all GLOG components are found
 5 | #
 6 | # The following are set after configuration is done:
 7 | #  GLOG_FOUND
 8 | #  GLOG_INCLUDE_DIRS
 9 | #  GLOG_LIBRARIES
10 | 
11 | include(FindPackageHandleStandardArgs)
12 | 
13 | set(GLOG_ROOT_DIR "" CACHE PATH "Folder contains Google glog")
14 | 
15 | if(WIN32)
16 |     find_path(GLOG_INCLUDE_DIR glog/logging.h
17 |         PATHS ${GLOG_ROOT_DIR}/src/windows)
18 | else()
19 |     find_path(GLOG_INCLUDE_DIR glog/logging.h
20 |         PATHS ${GLOG_ROOT_DIR})
21 | endif()
22 | 
23 | if(MSVC)
24 |     find_library(GLOG_LIBRARY_RELEASE libglog_static
25 |         PATHS ${GLOG_ROOT_DIR}
26 |         PATH_SUFFIXES Release)
27 | 
28 |     find_library(GLOG_LIBRARY_DEBUG libglog_static
29 |         PATHS ${GLOG_ROOT_DIR}
30 |         PATH_SUFFIXES Debug)
31 | 
32 |     set(GLOG_LIBRARY optimized ${GLOG_LIBRARY_RELEASE} debug ${GLOG_LIBRARY_DEBUG})
33 | else()
34 |     find_library(GLOG_LIBRARY glog
35 |         PATHS ${GLOG_ROOT_DIR}
36 |         PATH_SUFFIXES lib lib64)
37 | endif()
38 | 
39 | find_package_handle_standard_args(Glog DEFAULT_MSG GLOG_INCLUDE_DIR GLOG_LIBRARY)
40 | 
41 | if(GLOG_FOUND)
42 |   set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIR})
43 |   set(GLOG_LIBRARIES ${GLOG_LIBRARY})
44 |   message(STATUS "Found glog    (include: ${GLOG_INCLUDE_DIR}, library: ${GLOG_LIBRARY})")
45 |   mark_as_advanced(GLOG_ROOT_DIR GLOG_LIBRARY_RELEASE GLOG_LIBRARY_DEBUG
46 |                                  GLOG_LIBRARY GLOG_INCLUDE_DIR)
47 | endif()


--------------------------------------------------------------------------------
/conda/conda_build_config.yaml:
--------------------------------------------------------------------------------
 1 | cxx_compiler_version:
 2 |   - 5.4
 3 | 
 4 | python:
 5 |   - 2.7
 6 |   - 3.6
 7 |   - 3.7
 8 | 
 9 | numpy:
10 |   - 1.11
11 | 
12 | cudatoolkit:
13 |   - 9.2
14 |   - 10.0
15 |   - 10.1
16 | 
17 | pin_run_as_build:
18 |   cudatoolkit:
19 |     max_pin: x.x


--------------------------------------------------------------------------------
/conda/graphvite-mini/build.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | mkdir -p build
 4 | 
 5 | cd build
 6 | cmake .. -DALL_ARCH=True
 7 | make
 8 | cd -
 9 | 
10 | cd python
11 | $PYTHON setup.py install
12 | cd -


--------------------------------------------------------------------------------
/conda/graphvite-mini/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: graphvite-mini
 3 |   version: 0.2.2
 4 | 
 5 | source:
 6 |   path: ../..
 7 | 
 8 | requirements:
 9 |   build:
10 |     # cmake
11 |     - cmake >=3.12
12 |     - {{ compiler("cxx") }}
13 |     - glog
14 |     - gflags
15 |     - cudatoolkit {{ cudatoolkit }}
16 |     - python {{ python }}
17 |     - pybind11
18 |   host:
19 |     # make
20 |     - glog
21 |     - gflags
22 |     - cudatoolkit {{ cudatoolkit }}
23 |     - python {{ python }}
24 |     - pybind11
25 |     - numpy {{ numpy }}
26 |     - mkl >=2018
27 |     # setup
28 |     - pyyaml
29 |     - easydict
30 |     - six
31 |   run:
32 |     - glog
33 |     - gflags
34 |     - cudatoolkit
35 |     - python {{ python }}
36 |     - mkl >=2018
37 |     - numpy >=1.11
38 |     - pyyaml
39 |     - easydict
40 |     - six
41 |     - future
42 |     - psutil
43 | 
44 | build:
45 |   string:
46 |     "py{{ python|replace('.', '') }}\
47 |     cuda{{ cudatoolkit|replace('.', '') }}\
48 |     h{{ environ.get('GIT_FULL_HASH')|string|truncate(7, True, '', 0) }}"
49 | 
50 | test:
51 |   imports:
52 |     - graphvite
53 | 
54 | about:
55 |   home: https://graphvite.io
56 |   license: Apache-2.0
57 |   summary: "A general and high-performance graph embedding system for various applications"


--------------------------------------------------------------------------------
/conda/graphvite/build.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | mkdir -p build
 4 | 
 5 | cd build
 6 | cmake .. -DALL_ARCH=True
 7 | make
 8 | cd -
 9 | 
10 | cd python
11 | $PYTHON setup.py install
12 | cd -


--------------------------------------------------------------------------------
/conda/graphvite/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: graphvite
 3 |   version: 0.2.2
 4 | 
 5 | source:
 6 |   path: ../..
 7 | 
 8 | requirements:
 9 |   build:
10 |     # cmake
11 |     - cmake >=3.12
12 |     - {{ compiler("cxx") }}
13 |     - glog
14 |     - gflags
15 |     - cudatoolkit {{ cudatoolkit }}
16 |     - python {{ python }}
17 |     - pybind11
18 |   host:
19 |     # make
20 |     - glog
21 |     - gflags
22 |     - cudatoolkit {{ cudatoolkit }}
23 |     - python {{ python }}
24 |     - pybind11
25 |     - numpy {{ numpy }}
26 |     - mkl >=2018
27 |     # setup
28 |     - pyyaml
29 |     - easydict
30 |     - six
31 |   run:
32 |     - glog
33 |     - gflags
34 |     - cudatoolkit
35 |     - python {{ python }}
36 |     - mkl >=2018
37 |     - numpy >=1.11
38 |     - pyyaml
39 |     - easydict
40 |     - six
41 |     - future
42 |     - imageio
43 |     - psutil
44 |     - scipy
45 |     - matplotlib
46 |     - pytorch
47 |     - torchvision
48 |     - nltk
49 | 
50 | build:
51 |   string:
52 |     "py{{ python|replace('.', '') }}\
53 |     cuda{{ cudatoolkit|replace('.', '') }}\
54 |     h{{ environ.get('GIT_FULL_HASH')|string|truncate(7, True, '', 0) }}"
55 | 
56 | test:
57 |   imports:
58 |     - graphvite
59 | 
60 | about:
61 |   home: https://graphvite.io
62 |   license: Apache-2.0
63 |   summary: "A general and high-performance graph embedding system for various applications"


--------------------------------------------------------------------------------
/conda/requirements.txt:
--------------------------------------------------------------------------------
 1 | # cmake
 2 | cmake >=3.12
 3 | gxx_linux-64 >=5.4
 4 | glog
 5 | gflags
 6 | cudatoolkit >=9.2
 7 | python
 8 | pybind11
 9 | 
10 | # make
11 | mkl >=2018
12 | 
13 | # run
14 | numpy >=1.11
15 | pyyaml
16 | conda-forge::easydict
17 | six
18 | future
19 | imageio
20 | psutil
21 | scipy
22 | matplotlib
23 | pytorch
24 | torchvision
25 | nltk


--------------------------------------------------------------------------------
/config/demo/math.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: [0]
 6 |   cpu_per_gpu: 8
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <math.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-3
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 8
19 |   batch_size: 100000
20 |   episode_size: 100
21 | 
22 | train:
23 |   model: RotatE
24 |   num_epoch: 2000
25 |   margin: 9
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <math.test>
33 |   filter_files:
34 |     - <math.train>
35 |     - <math.valid>
36 |     - <math.test>
37 |   target: tail
38 | 
39 | save:
40 |   file_name: rotate_math.pkl


--------------------------------------------------------------------------------
/config/demo/quick_start.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: [0]
 6 |   cpu_per_gpu: 8
 7 |   dim: 128
 8 | 
 9 | format:
10 |   delimiters: " \t\r\n"
11 |   comment: "#"
12 | 
13 | graph:
14 |   file_name: <blogcatalog.train>
15 |   as_undirected: true
16 | 
17 | build:
18 |   optimizer:
19 |     type: SGD
20 |     lr: 0.025
21 |     weight_decay: 0.005
22 |   num_partition: auto
23 |   num_negative: 1
24 |   batch_size: 100000
25 |   episode_size: 500
26 | 
27 | train:
28 |   model: LINE
29 |   num_epoch: 2000
30 |   negative_weight: 5
31 |   augmentation_step: 2
32 |   random_walk_length: 40
33 |   random_walk_batch_size: 100
34 |   log_frequency: 1000
35 | 
36 | evaluate:
37 |   - task: link prediction
38 |     file_name: <blogcatalog.test>
39 |     filter_file: <blogcatalog.train>
40 |   - task: node classification
41 |     file_name: <blogcatalog.label>
42 |     portions: [0.2]
43 |     times: 1
44 | 
45 | save:
46 |   file_name: line_blogcatalog.pkl


--------------------------------------------------------------------------------
/config/graph/deepwalk_flickr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <flickr.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 1000
22 | 
23 | train:
24 |   # here the best setting uses no augmentation
25 |   # in this case, DeepWalk is equal to LINE
26 |   model: DeepWalk
27 |   num_epoch: 2000
28 |   negative_weight: 5
29 |   augmentation_step: 1
30 |   random_walk_length: 40
31 |   random_walk_batch_size: 100
32 |   log_frequency: 1000
33 | 
34 | evaluate:
35 |   task: node classification
36 |   file_name: <flickr.label>
37 |   portions: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
38 |   times: 5
39 | 
40 | save:
41 |   file_name: deepwalk_flickr.pkl


--------------------------------------------------------------------------------
/config/graph/deepwalk_friendster-small.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <friendster.small_graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 3500
22 | 
23 | train:
24 |   # here the best setting uses no augmentation
25 |   # in this case, DeepWalk is equal to LINE
26 |   model: DeepWalk
27 |   num_epoch: 2000
28 |   negative_weight: 5
29 |   augmentation_step: 1
30 |   random_walk_length: 40
31 |   random_walk_batch_size: 100
32 |   log_frequency: 1000
33 | 
34 | evaluate:
35 |   task: node classification
36 |   file_name: <friendster.label>
37 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
38 |   times: 5
39 | 
40 | save:
41 |   file_name: deepwalk_friendster-small.pkl


--------------------------------------------------------------------------------
/config/graph/deepwalk_friendster.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 96
 8 | 
 9 | graph:
10 |   file_name: <friendster.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 2500
22 | 
23 | train:
24 |   model: DeepWalk
25 |   num_epoch: 2000
26 |   negative_weight: 5
27 |   augmentation_step: 2
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: node classification
34 |   file_name: <friendster.label>
35 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
36 |   times: 5
37 | 
38 | save:
39 |   file_name: deepwalk_friendster.pkl


--------------------------------------------------------------------------------
/config/graph/deepwalk_hyperlink-pld.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <hyperlink2012.pld_train>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 5000
22 | 
23 | train:
24 |   model: DeepWalk
25 |   num_epoch: 2000
26 |   negative_weight: 5
27 |   augmentation_step: 2
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <hyperlink2012.pld_test>
35 |   filter_file: <hyperlink2012.pld_train>
36 | 
37 | save:
38 |   file_name: deepwalk_hyperlink-pld.pkl


--------------------------------------------------------------------------------
/config/graph/deepwalk_youtube.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <youtube.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 500
22 | 
23 | train:
24 |   model: DeepWalk
25 |   num_epoch: 4000
26 |   negative_weight: 5
27 |   augmentation_step: 5
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: node classification
34 |   file_name: <youtube.label>
35 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
36 |   times: 5
37 | 
38 | save:
39 |   file_name: deepwalk_youtube.pkl


--------------------------------------------------------------------------------
/config/graph/line_flickr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <flickr.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 1000
22 | 
23 | train:
24 |   model: LINE
25 |   num_epoch: 2000
26 |   negative_weight: 5
27 |   augmentation_step: 1
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: node classification
34 |   file_name: <flickr.label>
35 |   portions: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
36 |   times: 5
37 | 
38 | save:
39 |   file_name: line_flickr.pkl


--------------------------------------------------------------------------------
/config/graph/line_friendster-small.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <friendster.small_graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 3500
22 | 
23 | train:
24 |   model: LINE
25 |   num_epoch: 2000
26 |   negative_weight: 5
27 |   augmentation_step: 1
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: node classification
34 |   file_name: <friendster.label>
35 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
36 |   times: 5
37 | 
38 | save:
39 |   file_name: line_friendster-small.pkl


--------------------------------------------------------------------------------
/config/graph/line_friendster.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 96
 8 | 
 9 | graph:
10 |   file_name: <friendster.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 2500
22 | 
23 | train:
24 |   model: LINE
25 |   num_epoch: 2000
26 |   negative_weight: 5
27 |   augmentation_step: 2
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: node classification
34 |   file_name: <friendster.label>
35 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
36 |   times: 5
37 | 
38 | save:
39 |   file_name: line_friendster.pkl


--------------------------------------------------------------------------------
/config/graph/line_hyperlink-pld.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <hyperlink2012.pld_train>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 5000
22 | 
23 | train:
24 |   model: LINE
25 |   num_epoch: 2000
26 |   negative_weight: 5
27 |   augmentation_step: 2
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <hyperlink2012.pld_test>
35 |   filter_file: <hyperlink2012.pld_train>
36 | 
37 | save:
38 |   file_name: line_hyperlink-pld.pkl


--------------------------------------------------------------------------------
/config/graph/line_youtube.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <youtube.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 500
22 | 
23 | train:
24 |   model: LINE
25 |   num_epoch: 4000
26 |   negative_weight: 5
27 |   augmentation_step: 5
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   log_frequency: 1000
31 | 
32 | evaluate:
33 |   task: node classification
34 |   file_name: <youtube.label>
35 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
36 |   times: 5
37 | 
38 | save:
39 |   file_name: line_youtube.pkl


--------------------------------------------------------------------------------
/config/graph/node2vec_youtube.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <youtube.graph>
11 |   as_undirected: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 0.025
17 |     weight_decay: 0.005
18 |   num_partition: auto
19 |   num_negative: 1
20 |   batch_size: 100000
21 |   episode_size: 500
22 | 
23 | train:
24 |   model: node2vec
25 |   num_epoch: 4000
26 |   negative_weight: 5
27 |   augmentation_step: 5
28 |   random_walk_length: 40
29 |   random_walk_batch_size: 100
30 |   p: 4
31 |   q: 2
32 |   log_frequency: 1000
33 | 
34 | evaluate:
35 |   task: node classification
36 |   file_name: <youtube.label>
37 |   portions: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
38 |   times: 5
39 | 
40 | save:
41 |   file_name: node2vec_youtube.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/complex_fb15k-237.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k237.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: ComplEx
24 |   num_epoch: 1000
25 |   l3_regularization: 5.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k237.test>
33 |   filter_files:
34 |     - <fb15k237.train>
35 |     - <fb15k237.valid>
36 |     - <fb15k237.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: complex_fb15k-237.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/complex_fb15k.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-4
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: ComplEx
24 |   num_epoch: 1000
25 |   l3_regularization: 1.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k.test>
33 |   filter_files:
34 |     - <fb15k.train>
35 |     - <fb15k.valid>
36 |     - <fb15k.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: complex_fb15k.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/complex_wikidata5m.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wikidata5m.train>
11 |   normalization: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 1.0e-1
17 |     weight_decay: 0
18 |   num_partition: auto
19 |   num_negative: 64
20 |   batch_size: 100000
21 |   episode_size: 200
22 | 
23 | train:
24 |   model: ComplEx
25 |   num_epoch: 1000
26 |   l3_regularization: 2.0e-3
27 |   sample_batch_size: 2000
28 |   adversarial_temperature: 0.2
29 |   relation_lr_multiplier: 1.0e-3
30 |   log_frequency: 500
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <wikidata5m.test>
35 |   filter_files:
36 |     - <wikidata5m.train>
37 |     - <wikidata5m.valid>
38 |     - <wikidata5m.test>
39 |   # fast_mode: 1000
40 | 
41 | save:
42 |   file_name: complex_wikidata5m.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/complex_wn18.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: ComplEx
24 |   num_epoch: 4000
25 |   l3_regularization: 5.0e-5
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18.test>
33 |   filter_files:
34 |     - <wn18.train>
35 |     - <wn18.valid>
36 |     - <wn18.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: complex_wn18.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/complex_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18rr.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: ComplEx
24 |   num_epoch: 6000
25 |   l3_regularization: 5.0e-6
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18rr.test>
33 |   filter_files:
34 |     - <wn18rr.train>
35 |     - <wn18rr.valid>
36 |     - <wn18rr.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: complex_wn18rr.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/distmult_fb15k-237.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k237.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: DistMult
24 |   num_epoch: 1000
25 |   l3_regularization: 5.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k237.test>
33 |   filter_files:
34 |     - <fb15k237.train>
35 |     - <fb15k237.valid>
36 |     - <fb15k237.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: distmult_fb15k-237.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/distmult_fb15k.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: DistMult
24 |   num_epoch: 1000
25 |   l3_regularization: 1.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k.test>
33 |   filter_files:
34 |     - <fb15k.train>
35 |     - <fb15k.valid>
36 |     - <fb15k.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: distmult_fb15k.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/distmult_wikidata5m.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wikidata5m.train>
11 |   normalization: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 1.0e-1
17 |     weight_decay: 0
18 |   num_partition: auto
19 |   num_negative: 64
20 |   batch_size: 100000
21 |   episode_size: 200
22 | 
23 | train:
24 |   model: DistMult
25 |   num_epoch: 2000
26 |   l3_regularization: 2.0e-3
27 |   sample_batch_size: 2000
28 |   adversarial_temperature: 2
29 |   relation_lr_multiplier: 1.0e-4
30 |   log_frequency: 500
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <wikidata5m.test>
35 |   filter_files:
36 |     - <wikidata5m.train>
37 |     - <wikidata5m.valid>
38 |     - <wikidata5m.test>
39 |   # fast_mode: 1000
40 | 
41 | save:
42 |   file_name: distmult_wikidata5m.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/distmult_wn18.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-4
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: DistMult
24 |   num_epoch: 4000
25 |   l3_regularization: 1.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18.test>
33 |   filter_files:
34 |     - <wn18.train>
35 |     - <wn18.valid>
36 |     - <wn18.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: distmult_wn18.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/distmult_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18rr.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: DistMult
24 |   num_epoch: 6000
25 |   l3_regularization: 1.0e-2
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18rr.test>
33 |   filter_files:
34 |     - <wn18rr.train>
35 |     - <wn18rr.valid>
36 |     - <wn18rr.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: distmult_wn18rr.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/quate_fb15k-237.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k237.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: QuatE
24 |   num_epoch: 1000
25 |   l3_regularization: 5.0e-2
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k237.test>
33 |   filter_files:
34 |     - <fb15k237.train>
35 |     - <fb15k237.valid>
36 |     - <fb15k237.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: quate_fb15k-237.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/quate_fb15k.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: QuatE
24 |   num_epoch: 1000
25 |   l3_regularization: 1.0e-2
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k.test>
33 |   filter_files:
34 |     - <fb15k.train>
35 |     - <fb15k.valid>
36 |     - <fb15k.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: quate_fb15k.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/quate_wikidata5m.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wikidata5m.train>
11 |   normalization: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 1.0
17 |     weight_decay: 0
18 |   num_partition: auto
19 |   num_negative: 64
20 |   batch_size: 100000
21 |   episode_size: 200
22 | 
23 | train:
24 |   model: QuatE
25 |   num_epoch: 2000
26 |   l3_regularization: 5.0e-3
27 |   sample_batch_size: 2000
28 |   adversarial_temperature: 10
29 |   relation_lr_multiplier: 1.0e-4
30 |   log_frequency: 500
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <wikidata5m.test>
35 |   filter_files:
36 |     - <wikidata5m.train>
37 |     - <wikidata5m.valid>
38 |     - <wikidata5m.test>
39 |   # fast_mode: 1000
40 | 
41 | save:
42 |   file_name: quate_wikidata5m.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/quate_wn18.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: QuatE
24 |   num_epoch: 4000
25 |   l3_regularization: 1.0e-2
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18.test>
33 |   filter_files:
34 |     - <wn18.train>
35 |     - <wn18.valid>
36 |     - <wn18.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: quate_wn18.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/quate_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18rr.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: QuatE
24 |   num_epoch: 6000
25 |   l3_regularization: 5.0e-2
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18rr.test>
33 |   filter_files:
34 |     - <wn18rr.train>
35 |     - <wn18rr.valid>
36 |     - <wn18rr.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: quate_wn18rr.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/rotate_fb15k-237.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k237.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: RotatE
24 |   num_epoch: 1000
25 |   margin: 9
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k237.test>
33 |   filter_files:
34 |     - <fb15k237.train>
35 |     - <fb15k237.valid>
36 |     - <fb15k237.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: rotate_fb15k-237.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/rotate_fb15k.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-4
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: RotatE
24 |   num_epoch: 1000
25 |   margin: 24
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k.test>
33 |   filter_files:
34 |     - <fb15k.train>
35 |     - <fb15k.valid>
36 |     - <fb15k.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: rotate_fb15k.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/rotate_wikidata5m.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wikidata5m.train>
11 |   normalization: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 1.0e-2
17 |     weight_decay: 0
18 |   num_partition: auto
19 |   num_negative: 64
20 |   batch_size: 100000
21 |   episode_size: 200
22 | 
23 | train:
24 |   model: RotatE
25 |   num_epoch: 1000
26 |   margin: 6
27 |   sample_batch_size: 2000
28 |   adversarial_temperature: 0.2
29 |   relation_lr_multiplier: 1
30 |   log_frequency: 500
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <wikidata5m.test>
35 |   filter_files:
36 |     - <wikidata5m.train>
37 |     - <wikidata5m.valid>
38 |     - <wikidata5m.test>
39 |   # fast_mode: 1000
40 | 
41 | save:
42 |   file_name: rotate_wikidata5m.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/rotate_wn18.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: RotatE
24 |   num_epoch: 4000
25 |   margin: 9
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18.test>
33 |   filter_files:
34 |     - <wn18.train>
35 |     - <wn18.valid>
36 |     - <wn18.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: rotate_wn18.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/rotate_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18rr.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: RotatE
24 |   num_epoch: 6000
25 |   margin: 6
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18rr.test>
33 |   filter_files:
34 |     - <wn18rr.train>
35 |     - <wn18rr.valid>
36 |     - <wn18rr.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: rotate_wn18rr.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/simple_fb15k-237.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k237.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: SimplE
24 |   num_epoch: 1000
25 |   l3_regularization: 5.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k237.test>
33 |   filter_files:
34 |     - <fb15k237.train>
35 |     - <fb15k237.valid>
36 |     - <fb15k237.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: simple_fb15k-237.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/simple_fb15k.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 2048
 8 | 
 9 | graph:
10 |   file_name: <fb15k.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: SimplE
24 |   num_epoch: 1000
25 |   l3_regularization: 1.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k.test>
33 |   filter_files:
34 |     - <fb15k.train>
35 |     - <fb15k.valid>
36 |     - <fb15k.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: simple_fb15k.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/simple_wikidata5m.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wikidata5m.train>
11 |   normalization: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 1.0
17 |     weight_decay: 0
18 |   num_partition: auto
19 |   num_negative: 64
20 |   batch_size: 100000
21 |   episode_size: 200
22 | 
23 | train:
24 |   model: SimplE
25 |   num_epoch: 2000
26 |   l3_regularization: 2.0e-3
27 |   sample_batch_size: 2000
28 |   adversarial_temperature: 2
29 |   relation_lr_multiplier: 1.0e-4
30 |   log_frequency: 500
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <wikidata5m.test>
35 |   filter_files:
36 |     - <wikidata5m.train>
37 |     - <wikidata5m.valid>
38 |     - <wikidata5m.test>
39 |   # fast_mode: 1000
40 | 
41 | save:
42 |   file_name: simple_wikidata5m.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/simple_wn18.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: SimplE
24 |   num_epoch: 4000
25 |   l3_regularization: 2.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18.test>
33 |   filter_files:
34 |     - <wn18.train>
35 |     - <wn18.valid>
36 |     - <wn18.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: simple_wn18.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/simple_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <wn18rr.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-4
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: SimplE
24 |   num_epoch: 6000
25 |   l3_regularization: 2.0e-3
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18rr.test>
33 |   filter_files:
34 |     - <wn18rr.train>
35 |     - <wn18rr.valid>
36 |     - <wn18rr.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: simple_wn18rr.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/transe_fb15k-237.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <fb15k237.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 2.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: TransE
24 |   num_epoch: 1000
25 |   margin: 9
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k237.test>
33 |   filter_files:
34 |     - <fb15k237.train>
35 |     - <fb15k237.valid>
36 |     - <fb15k237.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: transe_fb15k-237.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/transe_fb15k.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 1024
 8 | 
 9 | graph:
10 |   file_name: <fb15k.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-5
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: TransE
24 |   num_epoch: 1000
25 |   margin: 24
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <fb15k.test>
33 |   filter_files:
34 |     - <fb15k.train>
35 |     - <fb15k.valid>
36 |     - <fb15k.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: transe_fb15k.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/transe_wikidata5m.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wikidata5m.train>
11 |   normalization: true
12 | 
13 | build:
14 |   optimizer:
15 |     type: SGD
16 |     lr: 1.0e-3
17 |     weight_decay: 0
18 |   num_partition: auto
19 |   num_negative: 64
20 |   batch_size: 100000
21 |   episode_size: 200
22 | 
23 | train:
24 |   model: TransE
25 |   num_epoch: 1000
26 |   margin: 12
27 |   sample_batch_size: 2000
28 |   adversarial_temperature: 0.5
29 |   relation_lr_multiplier: 1.0e-2
30 |   log_frequency: 500
31 | 
32 | evaluate:
33 |   task: link prediction
34 |   file_name: <wikidata5m.test>
35 |   filter_files:
36 |     - <wikidata5m.train>
37 |     - <wikidata5m.valid>
38 |     - <wikidata5m.test>
39 |   # fast_mode: 1000
40 | 
41 | save:
42 |   file_name: transe_wikidata5m.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/transe_wn18.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wn18.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 5.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: TransE
24 |   num_epoch: 4000
25 |   margin: 12
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18.test>
33 |   filter_files:
34 |     - <wn18.train>
35 |     - <wn18.valid>
36 |     - <wn18.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: transe_wn18.pkl


--------------------------------------------------------------------------------
/config/knowledge_graph/transe_wn18rr.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   knowledge graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 512
 8 | 
 9 | graph:
10 |   file_name: <wn18rr.train>
11 | 
12 | build:
13 |   optimizer:
14 |     type: Adam
15 |     lr: 1.0e-6
16 |     weight_decay: 0
17 |   num_partition: auto
18 |   num_negative: 64
19 |   batch_size: 100000
20 |   episode_size: 1
21 | 
22 | train:
23 |   model: TransE
24 |   num_epoch: 6000
25 |   margin: 6
26 |   sample_batch_size: 2000
27 |   adversarial_temperature: 2
28 |   log_frequency: 100
29 | 
30 | evaluate:
31 |   task: link prediction
32 |   file_name: <wn18rr.test>
33 |   filter_files:
34 |     - <wn18rr.train>
35 |     - <wn18rr.valid>
36 |     - <wn18rr.test>
37 |   # fast_mode: 3000
38 | 
39 | save:
40 |   file_name: transe_wn18rr.pkl


--------------------------------------------------------------------------------
/config/template/graph.yaml:
--------------------------------------------------------------------------------
  1 | ###########################################################
  2 | # Node embedding configuration file
  3 | ###########################################################
  4 | 
  5 | application:
  6 |   graph
  7 | 
  8 | resource:
  9 |   # List of GPU ids. Default is all GPUs
 10 |   gpus: []
 11 |   # Memory limit for each GPU in bytes. Default is all available memory.
 12 |   gpu_memory_limit: auto
 13 |   # Number of CPU thread per GPU. Default is all CPUs.
 14 |   cpu_per_gpu: auto
 15 |   # Dimension of the embeddings.
 16 |   dim: 128
 17 | 
 18 | format:
 19 |   # String of delimiter characters. Change it if your node name contains blank character.
 20 |   delimiters: " \t\r\n"
 21 |   # Prefix of comment strings. Change it if you use comment style other than Python.
 22 |   comment: "#"
 23 | 
 24 | graph:
 25 |   # Path to edge list file. Each line should be one of the following
 26 |   # [node 1] [delimiter] [node 2] [comment]...
 27 |   # [node 1] [delimiter] [node 2] [delimiter] [weight] [comment]...
 28 |   # [comment]...
 29 |   # For standard datasets, you can specify them by <[dataset].[split]>.
 30 |   file_name:
 31 |   # Symmetrize the graph or not. True is recommended.
 32 |   as_undirected: true
 33 |   # Normalize the adjacency matrix or not. This may influence the performance a little.
 34 |   normalization: false
 35 | 
 36 | build:
 37 |   optimizer:
 38 |     # Optimizer.
 39 |     type: SGD
 40 |     # Learning rate. Default is usually reasonable.
 41 |     lr: 0.025
 42 |     # Weight decay.
 43 |     weight_decay: 0.005
 44 |     # Learning rate schedule, can be "linear" or "constant". Linear is recommended.
 45 |     schedule: linear
 46 |   # Number of partitions. Auto is recommended.
 47 |   num_partition: auto
 48 |   # Number of negative samples per positive sample.
 49 |   # Larger value results in slower training.
 50 |   # The performance may be influenced by num_negative * negative_weight.
 51 |   num_negative: 1
 52 |   # Batch size of samples in CPU-GPU transfer. Default is recommended.
 53 |   batch_size: 100000
 54 |   # Number of batches in a partition block.
 55 |   # Default is recommended.
 56 |   episode_size: auto
 57 | 
 58 | # Comment out this section if not needed.
 59 | load:
 60 |   # Path to model file, can be "*.pkl".
 61 |   file_name: graph.pkl
 62 | 
 63 | train:
 64 |   # Model, can be DeepWalk, LINE or node2vec.
 65 |   model: DeepWalk
 66 |   # Number of epochs. Default is usually reasonable for sparse graphs.
 67 |   # For dense graphs (|E| / |V| > 100), you may use smaller values.
 68 |   num_epoch: 2000
 69 |   # Resume training from a loaded model.
 70 |   resume: false
 71 |   # Weight of negative samples. Values larger than 10 may cause unstable training.
 72 |   negative_weight: 5
 73 |   # Exponent of degrees in negative sampling. Default is recommended.
 74 |   negative_sample_exponent: 0.75
 75 |   # Augmentation step. Default is usually reasonable.
 76 |   # Larger value is needed for sparser graphs.
 77 |   augmentation_step: auto
 78 |   # Return parameter and in-out parameters (node2vec). Need to be tuned on the validation set.
 79 |   p: 1
 80 |   q: 1
 81 |   # Length of each random walk. Default is recommended.
 82 |   random_walk_length: 40
 83 |   # Batch size of random walks in samplers. Default is recommended.
 84 |   random_walk_batch_size: 100
 85 |   # Log every n batches.
 86 |   log_frequency: 1000
 87 | 
 88 | # Comment out this section if not needed.
 89 | evaluate:
 90 |   # Comment out any task if not needed.
 91 |   - task: node classification
 92 |     # Path to node label file. Each line should be one of the following
 93 |     # [node] [delimiter] [label] [comment]...
 94 |     # [comment]...
 95 |     file_name:
 96 |     # Portions of data used for training. Each of them corresponds to one evaluation.
 97 |     portions: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
 98 |     # Number of trials repeated. Change it to 1 if your evaluation set is large enough.
 99 |     times: 5
100 | 
101 |   - task: link prediction
102 |     # Path to link prediction file. Each line should be
103 |     # [node 1] [delimiter] [node 2] [delimiter] [label]
104 |     # where label is 1 for positive and 0 for negative.
105 |     file_name:
106 |     # Path to filter file. If you aren't sure that training data is excluded in evaluation,
107 |     # you can specify the training edge list here.
108 |     filter_file:
109 | 
110 | # Comment out this section if not needed.
111 | save:
112 |   # Path to save file, can be "*.pkl".
113 |   file_name: graph.pkl
114 |   # Save hyperparameters or not.
115 |   save_hyperparameter: false


--------------------------------------------------------------------------------
/config/template/knowledge_graph.yaml:
--------------------------------------------------------------------------------
  1 | ###########################################################
  2 | # Knowledge graph embedding configuration file
  3 | ###########################################################
  4 | 
  5 | application:
  6 |   knowledge graph
  7 | 
  8 | resource:
  9 |   # List of GPU ids. Default is all GPUs
 10 |   gpus: []
 11 |   # Memory limit for each GPU in bytes. Default is all available memory.
 12 |   gpu_memory_limit: auto
 13 |   # Number of CPU thread per GPU. Default is all CPUs.
 14 |   cpu_per_gpu: auto
 15 |   # Dimension of the embeddings.
 16 |   dim: 1024
 17 | 
 18 | format:
 19 |   # String of delimiter characters. Change it if your node name contains blank character.
 20 |   delimiters: " \t\r\n"
 21 |   # Prefix of comment strings. Change it if you use comment style other than Python.
 22 |   comment: "#"
 23 | 
 24 | graph:
 25 |   # Path to triplet file. Each line should be one of the following
 26 |   # [head] [delimiter] [relation] [tail] [comment]...
 27 |   # [head] [delimiter] [relation] [tail] [delimiter] [weight] [comment]...
 28 |   # [comment]...
 29 |   # For standard datasets, you can specify them by <[dataset].[split]>.
 30 |   file_name:
 31 |   # Normalize the adjacency matrix or not. This may influence the performance a little.
 32 |   normalization: false
 33 | 
 34 | build:
 35 |   optimizer:
 36 |     # Optimizer.
 37 |     type: Adam
 38 |     # Learning rate. Default is usually reasonable.
 39 |     lr: 5.0e-5
 40 |     # Weight decay.
 41 |     weight_decay: 0
 42 |     # Learning rate schedule, can be "linear" or "constant". Linear is recommended.
 43 |     schedule: linear
 44 |   # Number of partitions. Auto is recommended.
 45 |   num_partition: auto
 46 |   # Number of negative samples per positive sample.
 47 |   # Larger value results in slower training.
 48 |   num_negative: 64
 49 |   # Batch size of samples in CPU-GPU transfer. Default is recommended.
 50 |   batch_size: 100000
 51 |   # Number of batches in a partition block.
 52 |   # Default is recommended.
 53 |   episode_size: auto
 54 | 
 55 | # Comment out this section if not needed.
 56 | load:
 57 |   # Path to model file, can be "*.pkl".
 58 |   file_name: knowledge_graph.pkl
 59 | 
 60 | train:
 61 |   # Model, can be TransE, DistMult, ComplEx, SimplE or RotatE
 62 |   model: TransE
 63 |   # Number of epochs. Default is usually reasonable.
 64 |   num_epoch: 2000
 65 |   # Resume training from a loaded model.
 66 |   resume: false
 67 |   # Learning rate multiplier for relation embeddings.
 68 |   # Need to be tuned on the validation set if using multiple GPUs.
 69 |   relation_lr_multiplier: 1
 70 |   # L3 regularization (DistMult, ComplEx and SimplE). Need to be tuned on the validation set.
 71 |   l3_regularization: 2.0e-3
 72 |   # Margin (TransE, RotatE). Need to be tuned on the validation set.
 73 |   margin: 12
 74 |   # Batch size of samples in samplers. Default is recommended.
 75 |   sample_batch_size: 2000
 76 |   # Temperature for self-adversarial negative sampling. Default is usually reasonable.
 77 |   adversarial_temperature: 2
 78 |   # Log every n batches.
 79 |   log_frequency: 100
 80 | 
 81 | # Comment out this section if not needed.
 82 | evaluate:
 83 |   # Comment out any task if not needed.
 84 |   - task: link prediction
 85 |     # Path to triplet file. Each line should be one of the following
 86 |     # [head] [delimiter] [relation] [tail] [comment]...
 87 |     # [head] [delimiter] [relation] [tail] [delimiter] [weight] [comment]...
 88 |     # [comment]...
 89 |     file_name:
 90 |     # List of paths to filter files.
 91 |     # Specify all dataset splits for filtered ranking. Comment out for unfiltered ranking.
 92 |     filter_files:
 93 |     # Target entity to rank, can be head, tail or both.
 94 |     target: both
 95 |     # Number of samples to be evaluated. Comment out for precise evaluation.
 96 |     # fast_mode: 3000
 97 |     # Backend, can be graphvite or torch
 98 |     backend: graphvite
 99 | 
100 |   - task: entity prediction
101 |     # Path to triplet file. Each line should be one of the following
102 |     # [head] [delimiter] [relation] [tail] [comment]...
103 |     # [head] [delimiter] [relation] [tail] [delimiter] [weight] [comment]...
104 |     # [comment]...
105 |     file_name:
106 |     # Path to save file, can be "*.txt" or "*.pkl".
107 |     save_file:
108 |     # Target entity to predict, can be head or tail.
109 |     target: tail
110 |     # Top-k recalls will be returned.
111 |     k: 10
112 |     # Backend, can be graphvite or torch.
113 |     backend: graphvite
114 | 
115 | # Comment out this section if not needed.
116 | save:
117 |   # Path to save file, can be "*.pkl".
118 |   file_name: knowledge_graph.pkl
119 |   # Save hyperparameters or not.
120 |   save_hyperparameter: false


--------------------------------------------------------------------------------
/config/template/visualization.yaml:
--------------------------------------------------------------------------------
  1 | ###########################################################
  2 | # High-dimensional data visualization configuration file
  3 | ###########################################################
  4 | 
  5 | application:
  6 |   visualization
  7 | 
  8 | resource:
  9 |   # List of GPU ids. Multiple GPUs will cause unstable results.
 10 |   gpus: [0]
 11 |   # Memory limit for each GPU in bytes. Default is all available memory.
 12 |   gpu_memory_limit: auto
 13 |   # Number of CPU thread per GPU. Default is all CPUs.
 14 |   cpu_per_gpu: auto
 15 |   # Dimension of the embeddings.
 16 |   dim: 2
 17 | 
 18 | format:
 19 |   # String of delimiter characters. Change it if your node name contains blank character.
 20 |   delimiters: " \t\r\n"
 21 |   # Prefix of comment strings. Change it if you use comment style other than Python.
 22 |   comment: "#"
 23 | 
 24 | graph:
 25 |   # Path to vector file. Each line should be one of the following
 26 |   # [value] [delimiter] [value] [delimiter]... [comment]...
 27 |   # [comment]...
 28 |   # For standard datasets, you can specify them by <[dataset].[split]>.
 29 |   vector_file:
 30 |   # Number of neighbors for each node. Default is usually reasonable.
 31 |   num_neighbor: 200
 32 |   # Perplexity for the neighborhood of each node.
 33 |   # Typical values are between 5 and 50. Need to be tuned for best results.
 34 |   # Larger value focuses on global difference and results in larger clusters.
 35 |   perplexity: 30
 36 |   # Normalize the input vectors or not. True is recommended.
 37 |   vector_normalization: true
 38 | 
 39 | build:
 40 |   optimizer:
 41 |     # Optimizer.
 42 |     type: Adam
 43 |     # Learning rate. Default is usually reasonable.
 44 |     lr: 0.5
 45 |     # Weight decay. Default is usually reasonable.
 46 |     weight_decay: 1.0e-5
 47 |     # Learning rate schedule, can be "linear" or "constant". Linear is recommended.
 48 |     schedule: linear
 49 |   # Number of partitions. Auto is recommended.
 50 |   num_partition: auto
 51 |   # Number of negative samples per positive sample.
 52 |   # Larger value results in slower training.
 53 |   # The performance may be influenced by num_negative * negative_weight.
 54 |   num_negative: 5
 55 |   # Batch size of samples in CPU-GPU transfer. Default is recommended.
 56 |   batch_size: 100000
 57 |   # Number of batches in a partition block.
 58 |   # Default is recommended.
 59 |   episode_size: auto
 60 | 
 61 | # Comment out this section if not needed.
 62 | load:
 63 |   # Path to model file, can be "*.pkl".
 64 |   file_name: visualization.pkl
 65 | 
 66 | train:
 67 |   # Model, can be LargeVis.
 68 |   model: LargeVis
 69 |   # Number of epochs. Default is recommended.
 70 |   num_epoch: 50
 71 |   # Resume training from a loaded model.
 72 |   resume: false
 73 |   # Weight of negative samples. Values larger than 10 may cause unstable training.
 74 |   negative_weight: 3
 75 |   # Exponent of degrees in negative sampling. Default is recommended.
 76 |   negative_sample_exponent: 0.75
 77 |   # Batch size of samples in samplers. Default is recommended.
 78 |   sample_batch_size: 2000
 79 |   # Log every n batches.
 80 |   log_frequency: 1000
 81 | 
 82 | # Comment out this section if not needed.
 83 | evaluate:
 84 |   # Comment out any task if not needed.
 85 |   - task: visualization
 86 |     # Path to label file. Each line should be one of the following
 87 |     # [label] [comment]...
 88 |     # [comment]...
 89 |     # The file is assumed to have the same order as input vectors.
 90 |     file_name:
 91 |     # Path to save file, can be either "*.png" or "*.pdf".
 92 |     # If not provided, show the figure in window.
 93 |     save_file:
 94 |     # Size of the figure.
 95 |     figure_size: 10
 96 |     # Size of points. Recommend to use figure_size / 5.
 97 |     scale: 2
 98 | 
 99 |   # This task only works for dim = 3.
100 |   - task: animation
101 |     # Path to label file. Each line should be one of the following
102 |     # [label] [comment]...
103 |     # [comment]...
104 |     file_name:
105 |     # Path to save file, can be "*.gif".
106 |     save_file:
107 |     # Size of the figure.
108 |     figure_size: 5
109 |     # Size of points. Recommend to use figure_size / 5.
110 |     scale: 1
111 |     # Elevation angle. Default is recommended.
112 |     elevation: 30
113 |     # Number of frames. Default is recommended.
114 |     num_frame: 700
115 | 
116 |   - task: hierarchy
117 |     # Path to hierarchical label file. Each line should be one of the following
118 |     # [label] [delimiter] [label] [delimiter]... [comment]...
119 |     # [comment]...
120 |     # Labels should be ordered in ascending depth, i.e. the first label corresponds to the root in the hierarchy.
121 |     # The file is assumed to have the same order as input vectors.
122 |     file_name:
123 |     # Target class to be visualized.
124 |     target:
125 |     # Path to save file, can be "*.gif".
126 |     save_file:
127 |     # Size of the figure.
128 |     figure_size: 10
129 |     # Size of points. Recommend to use figure_size / 5.
130 |     scale: 2
131 |     # Duration of each frame in seconds. Default is recommended.
132 |     duration: 3
133 |   
134 | # Comment out this section if not needed.
135 | save:
136 |   # Path to save file, can be "*.pkl".
137 |   file_name: visualization.pkl
138 |   # Save hyperparameters or not.
139 |   save_hyperparameter: false


--------------------------------------------------------------------------------
/config/template/word_graph.yaml:
--------------------------------------------------------------------------------
 1 | ###########################################################
 2 | # Word embedding configuration file
 3 | ###########################################################
 4 | 
 5 | application:
 6 |   word graph
 7 | 
 8 | resource:
 9 |   # List of GPU ids. Default is all GPUs
10 |   gpus: []
11 |   # Memory limit for each GPU in bytes. Default is all available memory.
12 |   gpu_memory_limit: auto
13 |   # Number of CPU thread per GPU. Default is all CPUs.
14 |   cpu_per_gpu: auto
15 |   # Dimension of the embeddings.
16 |   dim: 128
17 | 
18 | graph:
19 |   # Path to corpus file. Each line should be one of the following
20 |   # [word] [delimiter] [word] [delimiter]... [comment]...
21 |   # [comment]...
22 |   # For standard datasets, you can specify them by <[dataset].[split]>.
23 |   file_name: 
24 |   # Word pairs with distance <= window as counted as edges. Default is recommended.
25 |   window: 5
26 |   # Words with occurrence <= min_count are discarded.
27 |   min_count: 5
28 |   # Normalize the adjacency matrix or not. This may influence the performance a little.
29 |   normalization: false
30 |   # String of delimiter characters. Change it if your node name contains blank character.
31 |   delimiters: " \t\r\n"
32 |   # Prefix of comment strings. Change it if you use comment style other than Python.
33 |   comment: "#"
34 | 
35 | build:
36 |   optimizer:
37 |     # Optimizer.
38 |     type: SGD
39 |     # Learning rate. Default is usually reasonable.
40 |     lr: 0.025
41 |     # Weight decay.
42 |     weight_decay: 0.005
43 |     # Learning rate schedule, can be "linear" or "constant". Linear is recommended.
44 |     schedule: linear
45 |   # Number of partitions. Auto is recommended.
46 |   num_partition: auto
47 |   # Number of negative samples per positive sample.
48 |   # Larger value results in slower training.
49 |   # The performance may be influenced by num_negative * negative_weight.
50 |   num_negative: 1
51 |   # Batch size of samples in CPU-GPU transfer. Default is recommended.
52 |   batch_size: 100000
53 |   # Number of batches in a partition block.
54 |   # Default is recommended, unless it overflows the memory (std::bad_alloc).
55 |   episode_size: auto
56 | 
57 | # Comment out this section if not needed.
58 | load:
59 |   # Path to model file, can be "*.pkl".
60 |   file_name: word_graph.pkl
61 | 
62 | train:
63 |   # Model, can be LINE.
64 |   model: LINE
65 |   # Number of epochs. Default is usually reasonable.
66 |   num_epoch: 80
67 |   # Resume training from a loaded model.
68 |   resume: false
69 |   # Weight of negative samples. Values larger than 10 may cause unstable training.
70 |   negative_weight: 5
71 |   # Exponent of degrees in negative sampling. Default is recommended.
72 |   negative_sample_exponent: 0.75
73 |   # Augmentation step. Default is recommended.
74 |   augmentation_step: 1
75 |   # Length of each random walk. Default is recommended.
76 |   random_walk_length: 40
77 |   # Batch size of random walks in samplers. Default is recommended.
78 |   random_walk_batch_size: 100
79 |   # Log every n batches.
80 |   log_frequency: 1000
81 | 
82 | # Comment out this section if not needed.
83 | save:
84 |   # Path to save file, can be "*.pkl".
85 |   file_name: word_graph.pkl
86 |   # Save hyperparameters or not.
87 |   save_hyperparameter: false


--------------------------------------------------------------------------------
/config/visualization/largevis_imagenet.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   visualization
 3 | 
 4 | resource:
 5 |   gpus: [0]
 6 |   cpu_per_gpu: auto
 7 |   dim: 2
 8 | 
 9 | graph:
10 |   vectors: <imagenet.feature_data>
11 |   num_neighbor: 200
12 |   perplexity: 50
13 | 
14 | build:
15 |   optimizer:
16 |     type: Adam
17 |     lr: 0.5
18 |     weight_decay: 1.0e-5
19 |   num_partition: auto
20 |   num_negative: 5
21 |   batch_size: 100000
22 |   episode_size: 200
23 | 
24 | train:
25 |   model: LargeVis
26 |   num_epoch: 50
27 |   negative_weight: 3
28 |   log_frequency: 1000
29 | 
30 | evaluate:
31 |   task: hierarchy
32 |   file_name: <imagenet.hierarchical_label>
33 |   target: english_setter
34 |   save_file: imagenet_hierarchy.gif
35 | 
36 | save:
37 |   file_name: largevis_imagenet_2d.pkl


--------------------------------------------------------------------------------
/config/visualization/largevis_mnist_2d.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   visualization
 3 | 
 4 | resource:
 5 |   gpus: [0]
 6 |   cpu_per_gpu: auto
 7 |   dim: 2
 8 | 
 9 | graph:
10 |   vectors: <mnist.image_data>
11 |   num_neighbor: 200
12 |   perplexity: 20
13 | 
14 | build:
15 |   optimizer:
16 |     type: Adam
17 |     lr: 0.5
18 |     weight_decay: 1.0e-5
19 |   num_partition: auto
20 |   num_negative: 5
21 |   batch_size: 100000
22 |   episode_size: 200
23 | 
24 | train:
25 |   model: LargeVis
26 |   num_epoch: 50
27 |   negative_weight: 3
28 |   log_frequency: 1000
29 | 
30 | evaluate:
31 |   task: visualization
32 |   Y: <mnist.label_data>
33 |   save_file: mnist_2d.png
34 | 
35 | save:
36 |   file_name: largevis_mnist_2d.pkl


--------------------------------------------------------------------------------
/config/visualization/largevis_mnist_3d.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   visualization
 3 | 
 4 | resource:
 5 |   gpus: [0]
 6 |   cpu_per_gpu: auto
 7 |   dim: 3
 8 | 
 9 | graph:
10 |   vectors: <mnist.image_data>
11 |   num_neighbor: 200
12 |   perplexity: 20
13 | 
14 | build:
15 |   optimizer:
16 |     type: Adam
17 |     lr: 0.5
18 |     weight_decay: 1.0e-5
19 |   num_partition: auto
20 |   num_negative: 5
21 |   batch_size: 100000
22 |   episode_size: 200
23 | 
24 | train:
25 |   model: LargeVis
26 |   num_epoch: 50
27 |   negative_weight: 3
28 |   log_frequency: 1000
29 | 
30 | evaluate:
31 |   task: animation
32 |   Y: <mnist.label_data>
33 |   save_file: mnist_3d.gif
34 | 
35 | save:
36 |   file_name: largevis_mnist_3d.pkl


--------------------------------------------------------------------------------
/config/word_graph/line_wikipedia.yaml:
--------------------------------------------------------------------------------
 1 | application:
 2 |   word graph
 3 | 
 4 | resource:
 5 |   gpus: []
 6 |   cpu_per_gpu: auto
 7 |   dim: 128
 8 | 
 9 | graph:
10 |   file_name: <wikipedia.graph>
11 |   window: 5
12 |   min_count: 5
13 | 
14 | build:
15 |   optimizer:
16 |     type: SGD
17 |     lr: 0.025
18 |     weight_decay: 0.005
19 |   num_partition: auto
20 |   num_negative: 1
21 |   batch_size: 100000
22 |   episode_size: 1000
23 | 
24 | train:
25 |   model: LINE
26 |   num_epoch: 80
27 |   negative_weight: 5
28 |   augmentation_step: 1
29 |   random_walk_length: 40
30 |   random_walk_batch_size: 100
31 |   log_frequency: 1000
32 | 
33 | save:
34 |   file_name: line_wikipedia.pkl


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/doc/source/api/application.rst:
--------------------------------------------------------------------------------
1 | graphvite.application
2 | =====================
3 | 
4 | .. automodule:: graphvite.application
5 |     :members:
6 |     :inherited-members:
7 | 


--------------------------------------------------------------------------------
/doc/source/api/dataset.rst:
--------------------------------------------------------------------------------
1 | graphvite.dataset
2 | =================
3 | 
4 | .. automodule:: graphvite.dataset
5 |     :members:


--------------------------------------------------------------------------------
/doc/source/api/graph.rst:
--------------------------------------------------------------------------------
1 | graphvite.graph
2 | ===============
3 | 
4 | .. automodule:: graphvite.graph
5 |     :members:


--------------------------------------------------------------------------------
/doc/source/api/optimizer.rst:
--------------------------------------------------------------------------------
1 | graphvite.optimizer
2 | ===================
3 | 
4 | .. automodule:: graphvite.optimizer
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/doc/source/api/solver.rst:
--------------------------------------------------------------------------------
1 | graphvite.solver
2 | ================
3 | 
4 | .. automodule:: graphvite.solver
5 |     :members:


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | # import os
 16 | # import sys
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = u'GraphVite'
 23 | copyright = u'2019, MilaGraph'
 24 | author = u'Zhaocheng Zhu, Shizhen Xu, Meng Qu, Jian Tang'
 25 | 
 26 | import re
 27 | from graphvite import __version__
 28 | # The short X.Y version
 29 | version = re.match("\d+\.\d+", __version__).group()
 30 | # The full version, including alpha/beta/rc tags
 31 | release = __version__
 32 | 
 33 | 
 34 | # -- General configuration ---------------------------------------------------
 35 | 
 36 | # If your documentation needs a minimal Sphinx version, state it here.
 37 | #
 38 | # needs_sphinx = '1.0'
 39 | 
 40 | # Add any Sphinx extension module names here, as strings. They can be
 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 42 | # ones.
 43 | extensions = [
 44 |     'sphinx.ext.autodoc',
 45 |     'sphinx.ext.autosummary',
 46 |     'sphinx.ext.coverage',
 47 |     'sphinx.ext.viewcode',
 48 |     'sphinx.ext.napoleon'
 49 | ]
 50 | 
 51 | # Add any paths that contain templates here, relative to this directory.
 52 | templates_path = ['_templates']
 53 | 
 54 | # The suffix(es) of source filenames.
 55 | # You can specify multiple suffix as a list of string:
 56 | #
 57 | # source_suffix = ['.rst', '.md']
 58 | source_suffix = '.rst'
 59 | 
 60 | # The master toctree document.
 61 | master_doc = 'index'
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #
 66 | # This is also used if you do content translation via gettext catalogs.
 67 | # Usually you set "language" from the command line for these cases.
 68 | language = None
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This pattern also affects html_static_path and html_extra_path.
 73 | exclude_patterns = []
 74 | 
 75 | # The name of the Pygments (syntax highlighting) style to use.
 76 | pygments_style = None
 77 | 
 78 | 
 79 | # -- Options for HTML output -------------------------------------------------
 80 | 
 81 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 82 | # a list of builtin themes.
 83 | #
 84 | html_theme = 'sphinx_rtd_theme'
 85 | 
 86 | # The name of an image file (relative to this directory) to place at the top
 87 | # of the sidebar.
 88 | #
 89 | # html_logo = None
 90 | 
 91 | # The name of an image file (relative to this directory) to use as a favicon of
 92 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 93 | # pixels large.
 94 | #
 95 | html_favicon = '../../asset/logo/favicon.ico'
 96 | 
 97 | # Theme options are theme-specific and customize the look and feel of a theme
 98 | # further.  For a list of options available for each theme, see the
 99 | # documentation.
100 | #
101 | # html_theme_options = {}
102 | 
103 | # Add any paths that contain custom static files (such as style sheets) here,
104 | # relative to this directory. They are copied after the builtin static files,
105 | # so a file named "default.css" will overwrite the builtin "default.css".
106 | html_static_path = []
107 | 
108 | # Custom sidebar templates, must be a dictionary that maps document names
109 | # to template names.
110 | #
111 | # The default sidebars (for documents that don't match any pattern) are
112 | # defined by theme itself.  Builtin themes are using these templates by
113 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
114 | # 'searchbox.html']``.
115 | #
116 | # html_sidebars = {}
117 | 
118 | # Generate module index or not
119 | html_domain_indices = False
120 | 
121 | # -- Options for HTMLHelp output ---------------------------------------------
122 | 
123 | # Output file base name for HTML help builder.
124 | htmlhelp_basename = 'GraphVite doc'
125 | 
126 | 
127 | # -- Options for LaTeX output ------------------------------------------------
128 | 
129 | latex_elements = {
130 |     # The paper size ('letterpaper' or 'a4paper').
131 |     #
132 |     # 'papersize': 'letterpaper',
133 | 
134 |     # The font size ('10pt', '11pt' or '12pt').
135 |     #
136 |     # 'pointsize': '10pt',
137 | 
138 |     # Additional stuff for the LaTeX preamble.
139 |     #
140 |     # 'preamble': '',
141 | 
142 |     # Latex figure (float) alignment
143 |     #
144 |     # 'figure_align': 'htbp',
145 | }
146 | 
147 | # Grouping the document tree into LaTeX files. List of tuples
148 | # (source start file, target name, title,
149 | #  author, documentclass [howto, manual, or own class]).
150 | latex_documents = [
151 |     (master_doc, 'GraphVite.tex', u'GraphVite Documentation',
152 |      u'Zhaocheng Zhu, Shizhen Xu, Meng Qu, Jian Tang', 'manual'),
153 | ]
154 | 
155 | 
156 | # -- Options for manual page output ------------------------------------------
157 | 
158 | # One entry per manual page. List of tuples
159 | # (source start file, name, description, authors, manual section).
160 | man_pages = [
161 |     (master_doc, 'graphvite', u'GraphVite Documentation',
162 |      [author], 1)
163 | ]
164 | 
165 | 
166 | # -- Options for Texinfo output ----------------------------------------------
167 | 
168 | # Grouping the document tree into Texinfo files. List of tuples
169 | # (source start file, target name, title, author,
170 | #  dir menu entry, description, category)
171 | texinfo_documents = [
172 |     (master_doc, 'GraphVite', u'GraphVite Documentation',
173 |      author, 'GraphVite', 'One line description of project.',
174 |      'Miscellaneous'),
175 | ]
176 | 
177 | 
178 | # -- Options for Epub output -------------------------------------------------
179 | 
180 | # Bibliographic Dublin Core info.
181 | epub_title = project
182 | 
183 | # The unique identifier of the text. This can be a ISBN number
184 | # or the project homepage.
185 | #
186 | # epub_identifier = ''
187 | 
188 | # A unique identification for the text.
189 | #
190 | # epub_uid = ''
191 | 
192 | # A list of files that should not be packed into the epub file.
193 | epub_exclude_files = ['search.html']
194 | 
195 | 
196 | # -- Extension configuration -------------------------------------------------
197 | 
198 | # sort members by member type
199 | autodoc_member_order = 'groupwise'


--------------------------------------------------------------------------------
/doc/source/developer/framework.rst:
--------------------------------------------------------------------------------
 1 | Understand the Framework
 2 | ========================
 3 | 
 4 | The framework of GraphVite is composed of two parts, a core library and a Python wrapper.
 5 | The Python wrapper can be found in ``python/graphvite/``. It provides an auto wrapper for
 6 | classes in the core library, as well as implementation for applications and datasets.
 7 | 
 8 | The core library is implemented with C++11 and CUDA, and binded to Python using
 9 | `pybind11`_. It covers implementation of all computation-related classes in GraphVite,
10 | such as graphs, solvers and optimizers. All these ingredients are packaged as classes,
11 | similar to the Python interface. The source code can be found in ``include/`` and
12 | ``src/``.
13 | 
14 | .. _pybind11: https://pybind11.readthedocs.io
15 | 
16 | In the C++ implementation, there is something different from Python. The graphs and
17 | solvers are templaterized by the underlying data types, and the length of embedding
18 | vectors. This design enables dynamic data type in Python interface, as well as maximal
19 | compile-time optimization.
20 | 
21 | The C++ interface is highly abstracted to faciliate further development on GraphVite.
22 | Generally, by inheriting from the core interface, we can implement new graph embedding
23 | instances without caring about any scheduling detail.
24 | 
25 | The source code is organized as follows.
26 | 
27 |     - ``include/base/*`` implements basic data structures
28 |     - ``include/core/*`` implements optimizers, and core interface of graphs and solvers
29 |     - ``include/instance/*`` implements instances of graphs and solvers
30 |     - ``include/instance/gpu/*`` implements GPU training & evaluation routines
31 |     - ``include/instance/model/*`` implements forward & backward propagation of models
32 |     - ``include/util/*`` implements basic utils
33 |     - ``include/bind.h`` implements Python bindings
34 |     - ``src/graphvite.cu`` instantiates all Python classes
35 | 


--------------------------------------------------------------------------------
/doc/source/developer/model.rst:
--------------------------------------------------------------------------------
  1 | Customize Models
  2 | ================
  3 | 
  4 | One common demand for graph embedding is to customize the model (i.e. score function).
  5 | Here we will demonstrate an example of adding a new model to the knowledge graph
  6 | solver.
  7 | 
  8 | First, get into ``include/model/knowledge_graph.h``. Fork an existing model class
  9 | (e.g. TransE) and change it to a new name.
 10 | 
 11 | .. code-block:: c++
 12 | 
 13 |     template<class _Vector>
 14 |     class TransE {
 15 |         __host__ __device__ static void forward(...);
 16 | 
 17 |         template <OptimizerType optimizer_type>
 18 |         __host__ __device__ static void backward(...);
 19 | 
 20 |         template <OptimizerType optimizer_type>
 21 |         __host__ __device__ static void backward(...);
 22 | 
 23 |         template <OptimizerType optimizer_type>
 24 |         __host__ __device__ static void backward(...);
 25 |     }
 26 | 
 27 | Here a model class contains a forward function and several overloads of the backward
 28 | function, which correspond to different categories of optimizers. We are going to
 29 | modify a forward and a backward function, and then do some copy-and-paste work to the
 30 | others.
 31 | 
 32 | Let's start from the forward function. This function takes a triplet of embedding
 33 | vectors, and outputs a score.
 34 | 
 35 | .. code-block:: c++
 36 | 
 37 |     void forward(const Vector &head, const Vector &tail, const Vector &relation,
 38 |                  Float &output, float margin)
 39 | 
 40 | The last argument is either margin for latent distance model or l3 regularization
 41 | for tensor decomposition models. For TransE, the function is implemented as
 42 | 
 43 | .. code-block:: c++
 44 | 
 45 |     output = 0;
 46 |     FOR(i, dim)
 47 |         output += abs(head[i] + relation[i] - tail[i]);
 48 |     output = margin - SUM(output);
 49 | 
 50 | Here we need to replace this piece of code with our own formulas. Note that this
 51 | function should be compatible with both CPU and GPU. This can be easily achieved by
 52 | helper macros defined in GraphVite.
 53 | 
 54 | We just need to use the macro ``FOR(i, stop)`` instead of the conventional
 55 | ``for (int i = 0; i < stop; i++)``. For any accumulator ``x`` inside the loop (e.g.
 56 | ``output`` in this case), update it with ``x = SUM(x)`` after the loop to get the
 57 | correct value.
 58 | 
 59 | For the backward function. It takes additional arguments of moment statistics, head
 60 | gradient, optimizer and sample weight. For example, here is an overload with 1 moment
 61 | per embedding.
 62 | 
 63 | .. code-block:: c++
 64 | 
 65 |     template<OptimizerType optimizer_type>
 66 |     void backward(Vector &head, Vector &tail, Vector &relation,
 67 |                   Vector &head_moment1, Vector &tail_moment1, Vector &relation_moment1,
 68 |                   float margin, Float gradient, const Optimizer &optimizer, Float weight)
 69 | 
 70 | The backward function should compute the gradient for each embedding, and update them
 71 | with the optimizer. Typically, this is implemented as
 72 | 
 73 | .. code-block:: c++
 74 | 
 75 |     auto update = get_update_function_1_moment<Float, optimizer_type>();
 76 |     FOR(i, dim) {
 77 |         Float h = head[i];
 78 |         Float t = tail[i];
 79 |         Float r = relation[i];
 80 |         Float s = h + r - t > 0 ? 1 : -1;
 81 |         head[i] -= (optimizer.*update)(h, -gradient * s, head_moment1[i], weight);
 82 |         tail[i] -= (optimizer.*update)(t, gradient * s, tail_moment1[i], weight);
 83 |         relation[i] -= (optimizer.*update)(r, -gradient * s, relation_moment1[i], weight);
 84 |     }
 85 | 
 86 | Here we modify this function according to the partial derivatives of our forward
 87 | function. Once we complete a backward function, we can copy them to the other
 88 | overloads. The only difference among overloads is that they use different update
 89 | function and numbers of moment statistics.
 90 | 
 91 | Finally, we have to let the solver know there is a new model. In
 92 | ``instance/knowledge_graph.cuh``, add the name of your model in
 93 | ``get_available_models()``. Also add run-time dispatch of the new model in
 94 | ``train_dispatch()`` and ``predict_dispatch()``.
 95 | 
 96 | .. code-block:: c++
 97 | 
 98 |     switch (num_moment) {
 99 |         case 0:
100 |             if (solver->model == ...)
101 |                 ...
102 |         case 1:
103 |             if (solver->model == ...)
104 |                 ...
105 |         case 2:
106 |             if (solver->model == ...)
107 |                 ...
108 | 
109 | Compile the source and it should be ready.


--------------------------------------------------------------------------------
/doc/source/developer/routine.rst:
--------------------------------------------------------------------------------
 1 | Customize Routine
 2 | =================
 3 | 
 4 | For advanced developers, GraphVite also supports customizing routines, such as
 5 | training and prediction. Here we will illustrate how to add a new routine to the
 6 | knowledge graph solver.
 7 | 
 8 | Before we start, it would be better if you know some basics about
 9 | `the index and threads`_ in CUDA. In GraphVite, the threads are arranged in a group
10 | of 32 (`warp`_). Threads in a group works simultaneously on an edge sample, where
11 | each thread is responsible for computation in some dimensions, according to the
12 | modulus of the dimension.
13 | 
14 | .. _the index and threads: https://en.wikipedia.org/wiki/Thread_block_(CUDA_programming)#Indexing
15 | .. _warp: https://en.wikipedia.org/wiki/Thread_block_(CUDA_programming)#Warps
16 | 
17 | First, get into ``include/instance/gpu/knowledge_graph.h``. This file includes several
18 | training functions and a prediction function.
19 | 
20 | .. code-block:: c++
21 | 
22 |     template<class Vector, class Index, template<class> class Model, OptimizerType optimizer_type>
23 |     __global__ void train(...)
24 | 
25 |     template<class Vector, class Index, template<class> class Model, OptimizerType optimizer_type>
26 |     __global__ void train_1_moment(...)
27 | 
28 |     template<class Vector, class Index, template<class> class Model, OptimizerType optimizer_type>
29 |     __global__ void train_2_moment(...)
30 | 
31 |     template<class Vector, class Index, template<class> class Model>
32 |     __global__ void predict(...)
33 | 
34 | The 3 implementations correspond to 3 categories of optimizers, as we have seen in
35 | :doc:`routine`. Routines with different numbers of moment statistics are separated
36 | to achieve maximal compile-time optimization.
37 | 
38 | Let's take a look at a training function. Generally, the function body looks like
39 | 
40 | .. code-block:: c++
41 | 
42 |     for (int sample_id = thread_id / kWarpSize; sample_id < batch_size; sample_id += num_thread / kWarpSize) {
43 |         if (adversarial_temperature > kEpsilon)
44 |             for (int s = 0; s < num_negative; s++)
45 |                 normalizer += ...;
46 | 
47 |         for (int s = 0; s <= num_negative; s++) {
48 |             model.forward(sample[s], logit);
49 |             prob = sigmoid(logit);
50 | 
51 |             gradient = ...;
52 |             weight = ...;
53 |             sample_loss += ...;
54 |             model.backward<optimizer_type>(sample[s], gradient);
55 |         }
56 |     }
57 | 
58 | The outer loop iterates over all positive samples. For each positive sample and its
59 | negative samples, we first compute the normalizer of self-adversarial negative
60 | sampling, and then perform forward and backward propagation for each sample.
61 | 
62 | For example, if we want to change the negative log likelihood to a mean square error,
63 | we can change the following lines.
64 | 
65 | .. code-block:: c++
66 | 
67 |     gradient = 2 * (logit - label);
68 |     sample_loss += weight * (logit - label) * (logit - label);
69 | 
70 | Or we can use a margin-based ranking loss like
71 | 
72 | .. code-block:: c++
73 | 
74 |     model.forward(samples[num_negative], positive_score); // the positive sample
75 | 
76 |     for (int s = 0; s < num_negative; s++) {
77 |         model.forward(samples[s], negative_logit);
78 |         if (positive_score - negative_score < margin) {
79 |             sample_loss += negative_score - positive_score + margin;
80 |             gradient = 1;
81 |             model.backward<optimizer_type>(sample[s], gradient);
82 |             model.backward<optimizer_type>(sample[num_negative], -gradient);
83 |         }
84 |     }
85 | 
86 | We may also add new hyperparameters or training routines. Note if we change
87 | the signature of the function, we should also update its calls accrodingly. For
88 | knowledge graph, they are in ``train_dispatch()`` and ``predict_dispatch()`` of file
89 | ``include/instance/knowledge_graph.cuh``.


--------------------------------------------------------------------------------
/doc/source/developer/solver.rst:
--------------------------------------------------------------------------------
 1 | Customize Solvers
 2 | =================
 3 | 
 4 | A more interesting thing to explore is extending GraphVite with new solvers.
 5 | Generally, the core library is capable to perform any graph embedding variant that
 6 | fits into the following paradigm.
 7 | 
 8 | - The training samples are edges.
 9 |   There may be additional attributes (e.g. labels) to edge samples.
10 | 
11 | To support that, GraphVite provides a protocol interface and a series of abstract
12 | classes. We only need to declare the protocols for our parameters, and fill in the
13 | virtual member functions for the classes.
14 | 
15 | Let's begin with the protocol interface. There are 3 main protocols for parameters.
16 | 
17 | - ``head``
18 | - ``tail``
19 | - ``global``
20 | 
21 | For each parameter matrix, it should be assigned one of these protocols.
22 | ``head`` means that the parameter matrix is indexed by head nodes in directed edges,
23 | while ``tail`` corresponds to tail nodes. Any other parameter matrix should be assigned
24 | with ``global``.
25 | 
26 | There are also 2 optional protocols. One is ``in place``, which implies that the
27 | parameter matrix takes in-place update and doesn't need storage for gradients.
28 | The other is ``shared``, which implies the matrix is shared with the previous one.
29 | This may be used for tied weight case.
30 | 
31 | Each parameter matrix should also be specified with a shape. We can use ``auto``
32 | if the shape can be inferred from the protocol and the graph structure.
33 | 
34 | For example, knowledge graph embeddings take the following settings.
35 | 
36 | .. code-block:: c++
37 | 
38 |     // head embeddings, tail embeddings, relation embeddings
39 |     protocols = {head | in place, tail | in place | shared, global};
40 |     shapes = {auto, auto, graph->num_relation};
41 | 
42 | If the learning routine also needs negative sampling, we should additionally
43 | specify a negative sampler protocol. For knowledge graph embedding, this is
44 | 
45 | .. code-block:: c++
46 | 
47 |     negative_sampler_protocol = head | tail;
48 | 
49 | Given the protocols, GraphVite will automatically schedule the paramters and samples
50 | over multiple GPUs, using an algorithm called parallel negative sampling. For a more
51 | detailed explanation of the algorithm, see section 3.2 in `GraphVite paper`_.
52 | 
53 | .. _GraphVite paper: https://arxiv.org/pdf/1903.00757.pdf
54 | 
55 | .. note::
56 |     Parallel negative sampling only takes place when at least one parameter matrix
57 |     is ``head`` or ``tail``. If all parameters are ``global``, GraphVite will schedule
58 |     them by standard data parallel.
59 | 
60 | To implement a new solver, we need to implement ``get_protocols()``,
61 | ``get_sampler_protocol()`` and ``get_shapes()`` as above. Some additional helper
62 | functions may be required to complete the solver.
63 | 
64 | A solver also contains a sampler and a worker class. By default, the sampler samples
65 | positive edges from the graph, with probability proportional to the weight of each
66 | edge. We only need to specify the additional edge attributes in ``get_attributes()``.
67 | 
68 | For the worker, it will build the negative sampler according to the its protocol.
69 | We need to specify the GPU implementation of models in ``train_dispatch()``. See
70 | :doc:`model` for how to do that.
71 | 
72 | Finally, to get our new solver appeared in Python, add a Python declaration for it in
73 | ``include/bind.h``, and instantiate it in ``src/graphvite.cu``.
74 | 
75 | See ``include/instance/*`` for all solver instances.
76 | 
77 | .. note::
78 |     Functions in solver, sampler and worker can be overrided. For example,
79 |     :class:`GraphSolver <graphvite.solver.GraphSolver>` overrides edge sampling with
80 |     online augmentation.


--------------------------------------------------------------------------------
/doc/source/faq.rst:
--------------------------------------------------------------------------------
 1 | Frequently Asked Questions
 2 | ==========================
 3 | 
 4 | .. contents::
 5 |     :local:
 6 | 
 7 | How should I cite GraphVite?
 8 | ----------------------------
 9 | 
10 | If you find GraphVite helps your research, please cite it in your publications.
11 | 
12 | .. code-block:: none
13 | 
14 |     @inproceedings{zhu2019graphvite,
15 |         title={GraphVite: A High-Performance CPU-GPU Hybrid System for Node Embedding},
16 |         author={Zhu, Zhaocheng and Xu, Shizhen and Qu, Meng and Tang, Jian},
17 |         booktitle={The World Wide Web Conference},
18 |         pages={2494--2504},
19 |         year={2019},
20 |         organization={ACM}
21 |     }
22 | 
23 | Why is my CUDA driver version insufficient for CUDA runtime version?
24 | --------------------------------------------------------------------
25 | 
26 | This is because you have installed a GraphVite compiled for some later CUDA version.
27 | You can check your CUDA version with ``nvcc -V``, and then install the corresponding
28 | package by
29 | 
30 | .. code-block:: bash
31 | 
32 |     conda install -c milagraph -c conda-forge graphvite cudatoolkit=x.x
33 | 
34 | where ``x.x`` is your CUDA version, e.g. 9.2 or 10.0.
35 | 
36 | Note graphvite does not support CUDA version earlier than 9.2, due to a failure of
37 | old version ``nvcc``.
38 | 
39 | Why is there a compilation error for template deduction?
40 | --------------------------------------------------------
41 | 
42 | This is due to a failure of old version ``nvcc`` in compiling the templates in
43 | ``pybind11``. Generally, ``nvcc 9.2`` or later will work.
44 | 
45 | Why is the access to embeddings so slow?
46 | ----------------------------------------
47 | 
48 | Due to the binding mechanism, the numpy view of embeddings is generated each time
49 | when you access the embeddings in Python. Such generation may take a non-trivial
50 | overhead. To avoid that cost, we recommend you to copy the reference of the
51 | embeddings.
52 | 
53 | .. code-block:: python
54 | 
55 |     embeddings = solver.vertex_embeddings
56 | 
57 | Now the access to ``embeddings`` should be good.
58 | 
59 | How can I speed up compliation?
60 | -------------------------------
61 | 
62 | The compilation can be accelerated by reducing the number of template instantiations.
63 | You can pass ``-DFAST_COMPILE=True`` to cmake, which will only compile commonly used
64 | embedding dimensions. You may also comment out unnecessary instantiations in
65 | ``src/graphvite.cu`` for further speed-up.
66 | 
67 | How can I solve the BLAS issue in ``faiss``?
68 | --------------------------------------------
69 | 
70 | ``faiss`` is only required by the visualization application in GraphVite. If you do
71 | not need visualization, you can pass ``-DNO_FAISS=True`` to cmake to skip that.


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. GraphVite documentation master file, created by
 2 |    sphinx-quickstart on Wed May 29 18:13:45 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | GraphVite - graph embedding at high speed and large scale
 7 | =========================================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 |    :caption: Get Started
12 | 
13 |    Introduction <introduction>
14 |    install
15 |    quick_start
16 |    overview
17 |    benchmark
18 |    pretrained_model
19 | 
20 | .. toctree::
21 |    :maxdepth: 1
22 |    :caption: User Guide
23 | 
24 |    user/command_line
25 |    user/configuration
26 |    user/format
27 |    user/python
28 |    user/auto
29 | 
30 | .. toctree::
31 |    :maxdepth: 1
32 |    :caption: Developer Guide
33 | 
34 |    developer/framework
35 |    developer/model
36 |    developer/routine
37 |    developer/solver
38 | 
39 | .. toctree::
40 |    :maxdepth: 1
41 |    :caption: Package Reference
42 | 
43 |    Application <api/application>
44 |    Graph <api/graph>
45 |    Solver <api/solver>
46 |    Optimizer <api/optimizer>
47 |    Dataset <api/dataset>
48 | 
49 | .. toctree::
50 |    :maxdepth: 1
51 |    :caption: FAQ
52 | 
53 |    FAQ <faq>
54 | 
55 | Indices and tables
56 | ==================
57 | 
58 | * :ref:`genindex`
59 | * :ref:`search`


--------------------------------------------------------------------------------
/doc/source/install.rst:
--------------------------------------------------------------------------------
 1 | Install
 2 | =======
 3 | 
 4 | GraphVite can be installed from either conda or source. You can also easily install
 5 | the library on `Google Colab`_ for demonstration.
 6 | 
 7 | .. _Google Colab: https://colab.research.google.com/
 8 | 
 9 | Install from conda
10 | ------------------
11 | 
12 | To install GraphVite from ``conda``, you only need one line.
13 | 
14 | .. code-block:: bash
15 | 
16 |     conda install -c milagraph -c conda-forge graphvite cudatoolkit=$(nvcc -V | grep -Po "(?<=V)\d+.\d+")
17 | 
18 | By default, this will install all dependencies, including ``PyTorch`` and
19 | ``matplotlib``. If you only need embedding training without evaluation, there is an
20 | alternative with minimum dependencies.
21 | 
22 | .. code-block:: bash
23 | 
24 |     conda install -c milagraph -c conda-forge graphvite-mini cudatoolkit=$(nvcc -V | grep -Po "(?<=V)\d+.\d+")
25 | 
26 | Install from source
27 | -------------------
28 | 
29 | First, clone GraphVite from GitHub.
30 | 
31 | .. code-block:: bash
32 | 
33 |     git clone https://github.com/DeepGraphLearning/graphvite
34 |     cd graphvite
35 | 
36 | Install compilation and runtime dependencies via ``conda``.
37 | 
38 | .. code-block:: bash
39 | 
40 |     conda install -y --file conda/requirements.txt
41 | 
42 | Compile the code using the following directives. If you have ``faiss`` installed
43 | from source, you can pass ``-DFAISS_PATH=/path/to/faiss`` to ``cmake``.
44 | 
45 | .. code-block:: bash
46 | 
47 |     mkdir build
48 |     cd build && cmake .. && make && cd -
49 | 
50 | Finally, install Python bindings.
51 | 
52 | .. code-block:: bash
53 | 
54 |     cd python && python setup.py install && cd -
55 | 
56 | Install on Colab
57 | ----------------
58 | 
59 | First, install Miniconda on Colab.
60 | 
61 | .. code-block:: bash
62 | 
63 |     !wget -c https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
64 |     !chmod +x Miniconda3-latest-Linux-x86_64.sh
65 |     !./Miniconda3-latest-Linux-x86_64.sh -b -p /usr/local -f
66 | 
67 | Then we install GraphVite and some tools for Jupyter Notebook.
68 | 
69 | .. code-block:: bash
70 | 
71 |     !conda install -y -c milagraph -c conda-forge graphvite \
72 |         python=3.6 cudatoolkit=$(nvcc -V | grep -Po "(?<=V)\d+\.\d+")
73 |     !conda install -y wurlitzer ipykernel
74 | 
75 | Load the installed packages. Now you are ready to go.
76 | 
77 | .. code-block:: python
78 | 
79 |     import site
80 |     site.addsitedir("/usr/local/lib/python3.6/site-packages")
81 |     %reload_ext wurlitzer
82 | 


--------------------------------------------------------------------------------
/doc/source/introduction.rst:
--------------------------------------------------------------------------------
  1 | GraphVite - graph embedding at high speed and large scale
  2 | =========================================================
  3 | 
  4 | .. include:: link.rst
  5 | 
  6 | GraphVite is a general graph embedding engine, dedicated to high-speed and
  7 | large-scale embedding learning in various applications. By cooperating CPUs and GPUs
  8 | for learning, it scales to million-scale or even billion-scale graphs. With its
  9 | Python interface, you can easily practice advanced graph embedding algorithms, and
 10 | get results in incredibly short time.
 11 | 
 12 | Try GraphVite if you have any of the following demands.
 13 | 
 14 | - You want to reproduce graph learning algorithms on a uniform platform.
 15 | - You need fast visualization for graphs or high-dimensional data.
 16 | - You are tired of waiting a long time for prototyping or tuning models.
 17 | - You need to learn representations of large graphs or knowledge graphs.
 18 | 
 19 | Generally, GraphVite provides complete training and evaluation pipelines for 3
 20 | applications: **node embedding**, **knowledge graph embedding** and
 21 | **graph & high-dimensional data visualization**. Besides, it also includes 9 popular
 22 | models, along with their benchmarks on a bunch of standard datasets.
 23 | 
 24 | .. figure:: ../../asset/graph.png
 25 |     :align: left
 26 |     :height: 180px
 27 |     :target: overview.html#node-embedding
 28 |     :figclass: align-center
 29 | 
 30 |     Node Embedding
 31 | 
 32 | .. figure:: ../../asset/knowledge_graph.png
 33 |     :align: left
 34 |     :height: 180px
 35 |     :target: overview.html#knowledge-graph-embedding
 36 |     :figclass: align-center
 37 | 
 38 |     Knowledge Graph |br| Embedding
 39 | 
 40 | .. figure:: ../../asset/visualization.png
 41 |     :align: left
 42 |     :height: 180px
 43 |     :target: overview.html#graph-high-dimensional-data-visualization
 44 |     :figclass: align-center
 45 | 
 46 |     Graph & |br| High-dimensional |br| Data Visualization
 47 | 
 48 | .. |br| raw:: html
 49 | 
 50 |     <br>
 51 | 
 52 | .. raw:: html
 53 | 
 54 |     <div style="clear: both" />
 55 | 
 56 | How fast is GraphVite?
 57 | ----------------------
 58 | 
 59 | To give a brief idea of GraphVite's speed, we summarize the training time of
 60 | GraphVite along with the best open-source implementations. All the time is reported
 61 | based on a server with 24 CPU threads and 4 V100 GPUs.
 62 | 
 63 | Training time of node embedding on `Youtube`_ dataset.
 64 | 
 65 | +-------------+----------------------------+-----------+---------+
 66 | | Model       | Existing Implementation    | GraphVite | Speedup |
 67 | +=============+============================+===========+=========+
 68 | | `DeepWalk`_ | `1.64 hrs (CPU parallel)`_ | 1.19 mins | 82.9x   |
 69 | +-------------+----------------------------+-----------+---------+
 70 | | `LINE`_     | `1.39 hrs (CPU parallel)`_ | 1.17 mins | 71.4x   |
 71 | +-------------+----------------------------+-----------+---------+
 72 | | `node2vec`_ | `24.4 hrs (CPU parallel)`_ | 4.39 mins | 334x    |
 73 | +-------------+----------------------------+-----------+---------+
 74 | 
 75 | .. _1.64 hrs (CPU parallel): https://github.com/phanein/deepwalk
 76 | .. _1.39 hrs (CPU parallel): https://github.com/tangjianpku/LINE
 77 | .. _24.4 hrs (CPU parallel): https://github.com/aditya-grover/node2vec
 78 | 
 79 | Training / evaluation time of knowledge graph embedding on `FB15k`_ dataset.
 80 | 
 81 | +-----------+---------------------------------+--------------------+---------------+
 82 | | Model     | Existing Implementation         | GraphVite          | Speedup       |
 83 | +===========+=================================+====================+===============+
 84 | | `TransE`_ | `1.31 hrs / 1.75 mins (1 GPU)`_ | 13.5 mins / 54.3 s | 5.82x / 1.93x |
 85 | +-----------+---------------------------------+--------------------+---------------+
 86 | | `RotatE`_ | `3.69 hrs / 4.19 mins (1 GPU)`_ | 28.1 mins / 55.8 s | 7.88x / 4.50x |
 87 | +-----------+---------------------------------+--------------------+---------------+
 88 | 
 89 | .. _1.31 hrs / 1.75 mins (1 GPU): https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding
 90 | .. _3.69 hrs / 4.19 mins (1 GPU): https://github.com/DeepGraphLearning/KnowledgeGraphEmbedding
 91 | 
 92 | Training time of high-dimensional data visualization on `MNIST`_ dataset.
 93 | 
 94 | +-------------+-----------------------------+-----------+---------+
 95 | | Model       | Existing Implementation     | GraphVite | Speedup |
 96 | +=============+=============================+===========+=========+
 97 | | `LargeVis`_ | `15.3 mins (CPU parallel)`_ | 13.9 s    | 66.8x   |
 98 | +-------------+-----------------------------+-----------+---------+
 99 | 
100 | .. _15.3 mins (CPU parallel): https://github.com/lferry007/LargeVis
101 | 
102 | Comparison to concurrent work
103 | -----------------------------
104 | 
105 | A work concurrent to GraphVite is `PyTorch-BigGraph`_, which aims at accelerating
106 | knowledge graph embedding on large-scale data. Here is an apple-to-apple comparison
107 | of models implemented in both libraries on `FB15k`_, under the same setting of
108 | hyperparameters.
109 | 
110 | .. _PyTorch-BigGraph: https://torchbiggraph.readthedocs.io
111 | 
112 | +-------------+------------------+-----------+---------+
113 | | Model       | PyTorch-BigGraph | GraphVite | Speedup |
114 | +=============+==================+===========+=========+
115 | | `TransE`_   | 1.21 hrs         | 8.37 mins | 8.70x   |
116 | +-------------+------------------+-----------+---------+
117 | | `DistMult`_ | 2.48 hrs         | 20.3 mins | 7.33x   |
118 | +-------------+------------------+-----------+---------+
119 | | `ComplEx`_  | 3.13 hrs         | 18.5 mins | 10.1x   |
120 | +-------------+------------------+-----------+---------+
121 | 
122 | GraphVite surpasses its counterpart by a signficant margin. Besides, the framework of
123 | GraphVite also supports two more applications, and provides many benchmarks for easy
124 | research and development.
125 | 
126 | About the name
127 | --------------
128 | GraphVite(/ɡɹæfvit/) is a combination of English word "graph" and French word
129 | "vite", which means "rapid". GraphVite represents the traits of this library,
130 | as well as the bilingual environment of `Mila`_ where the library was developed.
131 | 
132 | .. _Mila: https://mila.quebec


--------------------------------------------------------------------------------
/doc/source/link.rst:
--------------------------------------------------------------------------------
 1 | .. Node embedding models
 2 | .. _DeepWalk: https://arxiv.org/pdf/1403.6652.pdf
 3 | .. _LINE: https://arxiv.org/pdf/1503.03578.pdf
 4 | .. _node2vec: https://www.kdd.org/kdd2016/papers/files/rfp0218-groverA.pdf
 5 | 
 6 | .. Knowledge graph embedding models
 7 | .. _TransE: http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf
 8 | .. _DistMult: https://arxiv.org/pdf/1412.6575.pdf
 9 | .. _ComplEx: http://proceedings.mlr.press/v48/trouillon16.pdf
10 | .. _SimplE: https://papers.nips.cc/paper/7682-simple-embedding-for-link-prediction-in-knowledge-graphs.pdf
11 | .. _RotatE: https://arxiv.org/pdf/1902.10197.pdf
12 | .. _QuatE: https://papers.nips.cc/paper/8541-quaternion-knowledge-graph-embeddings.pdf
13 | 
14 | .. Graph & high-dimensional data visualization models
15 | .. _LargeVis: https://arxiv.org/pdf/1602.00370.pdf
16 | 
17 | .. GraphVite
18 | .. _GraphVite: https://arxiv.org/pdf/1903.00757.pdf
19 | .. _Repo: https://github.com/DeepGraphLearning/graphvite
20 | 
21 | .. Graph datasets
22 | .. _Youtube: http://conferences.sigcomm.org/imc/2007/papers/imc170.pdf
23 | .. _Flickr: http://conferences.sigcomm.org/imc/2007/papers/imc170.pdf
24 | .. _Friendster-small: https://arxiv.org/pdf/1903.00757.pdf
25 | 
26 | .. Knowledge graph datasets
27 | .. _FB15k: http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf
28 | .. _FB15k-237: https://www.aclweb.org/anthology/W15-4007
29 | .. _WN18: http://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data.pdf
30 | .. _WN18RR: https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/17366/15884
31 | .. _Wikidata5m: https://arxiv.org/pdf/1911.06136.pdf
32 | 
33 | .. Image datasets
34 | .. _MNIST: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
35 | .. _ImageNet: https://arxiv.org/pdf/1409.0575.pdf
36 | 
37 | .. Misc
38 | .. _WordNet: http://www.cs.columbia.edu/~vh/courses/LexicalSemantics/Ontologies/miller-wordnet95.pdf
39 | .. _Wikidata: https://www.wikidata.org
40 | .. _Wikipedia: https://www.wikipedia.org


--------------------------------------------------------------------------------
/doc/source/overview.rst:
--------------------------------------------------------------------------------
 1 | Application Overview
 2 | ====================
 3 | 
 4 | In GraphVite, the pipelines are packaged into classes, which we call applications.
 5 | 
 6 | There are 3 main applications, node embedding, knowledge graph embedding, and
 7 | graph & high-dimensional data visualization. For each application, GraphVite loads
 8 | an input graph, perfoms embedding training, and finally evaluates the embeddngs on
 9 | downstream tasks.
10 | 
11 | .. _node embedding:
12 | 
13 | Node Embedding
14 | --------------
15 | 
16 | Node embedding is a family of algorithms that learn a representation for each node
17 | in a graph. It is important for graph analysis and a variety of downstream tasks.
18 | 
19 | For example, node embedding can be leveraged for analyzing social networks, citation
20 | networks, or protein-protein interaction networks. It may be also helpful to other
21 | unsupervised learning problems with graph structures.
22 | 
23 | To qualify the learned embeddings, we evaluate them on the node classification and
24 | link prediction tasks.
25 | 
26 | .. seealso::
27 |     Package Reference:
28 |     :class:`GraphApplication <graphvite.application.GraphApplication>`
29 | 
30 | .. _knowledge graph embedding:
31 | 
32 | Knowledge Graph Embedding
33 | -------------------------
34 | 
35 | Knowledge graph (aka. knowledge base) is a family of graphs where each edge has a
36 | type, indicating the relation of the connected nodes. In knowledge graphs, nodes
37 | are called entities, and edges are called relations. The knowledge graph embedding
38 | algorithm aims to learn a representation for each entity and relation.
39 | 
40 | With knowledge graph embeddings, it is easy to compare entities or relations in a
41 | uniform space, and further infer unobserved links in a knowledge graph.
42 | 
43 | The learned embeddings are evaluated under the link prediction task in GraphVite.
44 | 
45 | .. seealso::
46 |     Package Reference:
47 |     :class:`KnowledgeGraphApplication <graphvite.application.KnowledgeGraphApplication>`
48 | 
49 | .. _visualization:
50 | 
51 | Graph & High-dimensional Data Visualization
52 | -------------------------------------------
53 | 
54 | Visualization is a critical step in exploring and analyzing graphs and
55 | high-dimensional data. Typically, visualization methods project each data points into
56 | a low-dimensional space.
57 | 
58 | As most projection methods treat the similarity between data points as a graph,
59 | GraphVite is also able to provide acceleration for this application. Taking a graph
60 | or a group of high-dimensional vectors, GraphVite can produce either 2D or 3D
61 | projections in a very short time.
62 | 
63 | .. seealso::
64 |     Package Reference:
65 |     :class:`VisualizationApplication <graphvite.application.VisualizationApplication>`


--------------------------------------------------------------------------------
/doc/source/pretrained_model.rst:
--------------------------------------------------------------------------------
 1 | Pre-trained Models
 2 | ==================
 3 | 
 4 | .. include:: link.rst
 5 | 
 6 | To facilitate the usage of knowledge graph representations in semantic tasks, we
 7 | provide a bunch of pre-trained embeddings for some common datasets.
 8 | 
 9 | Wikidata5m
10 | ----------
11 | 
12 | `Wikidata5m`_ is a large-scale knowledge graph dataset constructed from `Wikidata`_
13 | and `Wikipedia`_. It contains plenty of entities in the general domain, such as
14 | celebrities, events, concepts and things.
15 | 
16 | We trained 5 standard knowledge graph embedding models on `Wikidata5m`_. The
17 | performance benchmark of these models can be found :ref:`here <knowledge_graph_benchmark>`.
18 | 
19 | +-------------+-----------+---------+----------------------------+
20 | | Model       | Dimension | Size    | Download link              |
21 | +=============+===========+=========+============================+
22 | | `TransE`_   | 512       | 9.33 GB | `transe_wikidata5m.pkl`_   |
23 | +-------------+-----------+---------+----------------------------+
24 | | `DistMult`_ | 512       | 9.33 GB | `distmult_wikidata5m.pkl`_ |
25 | +-------------+-----------+---------+----------------------------+
26 | | `ComplEx`_  | 512       | 9.33 GB | `complex_wikidata5m.pkl`_  |
27 | +-------------+-----------+---------+----------------------------+
28 | | `SimplE`_   | 512       | 9.33 GB | `simple_wikidata5m.pkl`_   |
29 | +-------------+-----------+---------+----------------------------+
30 | | `RotatE`_   | 512       | 9.33 GB | `rotate_wikidata5m.pkl`_   |
31 | +-------------+-----------+---------+----------------------------+
32 | | `QuatE`_    | 512       | 9.36 GB | `quate_wikidata5m.pkl`_    |
33 | +-------------+-----------+---------+----------------------------+
34 | 
35 | .. _transe_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EX4c1Ud8M61KlDUn2U_yz_sBP_bXNuFnudfhRnYzWUFA2A?download=1
36 | .. _distmult_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EQsXL8UmSJhHt2uBdB32muMBo4o4RUaMR6KDEQTcsz3jvg?download=1
37 | .. _complex_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/ERAwwLdsvdRIlrkVujMetmEBV9RgizsFnW91pIpjkBjbTw?download=1
38 | .. _simple_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EVcJpJAzkThPu1vjgJLohscBgwtPajhTZvCCd8nEg1GiwA?download=1
39 | .. _rotate_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EWvX5Z0rWZ9GvmdLaM3ONx4BtxzDFehXdc0gwE52YEiX2Q?download=1
40 | .. _quate_wikidata5m.pkl: https://udemontreal-my.sharepoint.com/:u:/g/personal/zhaocheng_zhu_umontreal_ca/EUGNHMB9tlJAokjxBouyG08ByfAb3-IYHCszTMmJnQSegg?download=1
41 | 
42 | Load pre-trained models
43 | -----------------------
44 | 
45 | The pre-trained models can be loaded through ``pickle``.
46 | 
47 | .. code-block:: python
48 | 
49 |     import pickle
50 |     with open("transe_wikidata5m.pkl", "rb") as fin:
51 |         model = pickle.load(fin)
52 |     entity2id = model.graph.entity2id
53 |     relation2id = model.graph.relation2id
54 |     entity_embeddings = model.solver.entity_embeddings
55 |     relation_embeddings = model.solver.relation_embeddings
56 | 
57 | Load the alias mapping from the dataset. Now we can access the embeddings by natural language index.
58 | 
59 | .. code-block:: python
60 | 
61 |     import graphvite as gv
62 |     alias2entity = gv.dataset.wikidata5m.alias2entity
63 |     alias2relation = gv.dataset.wikidata5m.alias2relation
64 |     print(entity_embeddings[entity2id[alias2entity["machine learning"]]])
65 |     print(relation_embeddings[relation2id[alias2relation["field of work"]]])


--------------------------------------------------------------------------------
/doc/source/quick_start.rst:
--------------------------------------------------------------------------------
 1 | Quick Start
 2 | ===========
 3 | 
 4 | Here is a quick-start example that illustrate the pipeline in GraphVite. If ``pytorch``
 5 | is not installed, we can simply add ``--no-eval`` to skip the evaluation stage.
 6 | 
 7 | .. code-block:: bash
 8 | 
 9 |     graphvite baseline quick start
10 | 
11 | The example will automatically download a social network dataset called BlogCatalog,
12 | where nodes correspond to blog users. For each node, we learn an embedding vector that
13 | preserves its neighborhood structure, which is done by minimizing a reconstruction
14 | loss. GraphVite will display the progress and the loss during training.
15 | 
16 | Once the training is done, the learned embeddings are evaluated on link prediction and
17 | node classification tasks. For link prediction, we try to predict unseen edges with
18 | the embeddings. For node classification, we use the embeddings as inputs for
19 | multi-label classification of nodes.
20 | 
21 | Typically, this example takes no more than 1 minute. We will obtain some output like
22 | 
23 | .. code-block:: none
24 | 
25 |     Batch id: 6000
26 |     loss = 0.371041
27 | 
28 |     ------------- link prediction --------------
29 |     AUC: 0.899933
30 |     
31 |     ----------- node classification ------------
32 |     macro-F1@20%: 0.242114
33 |     micro-F1@20%: 0.391342
34 | 
35 | Note that the F1 scores may vary across different trials, as only one random split is
36 | evaluated for quick demonstration here.
37 | 
38 | The learned embeddings are saved into a pickle dump. We can load them for further
39 | use.
40 | 
41 |     >>> import pickle
42 |     >>> with open("line_blogcatalog.pkl", "rb") as fin:
43 |     >>>     model = pickle.load(fin)
44 |     >>> names = model.graph.id2name
45 |     >>> embeddings = model.solver.vertex_embeddings
46 |     >>> print(names[1024], embeddings[1024])
47 | 
48 | Another interesting example is a synthetic math dataset of arithmetic operations. By
49 | treating the operations as relations of a knowledge graph, we can learn embeddings
50 | that generalize to unseen triplets (i.e. computation formulas). Check out this example
51 | with
52 | 
53 | .. code-block:: bash
54 | 
55 |     graphvite baseline math
56 | 
57 | For a more in-depth tutorial about GraphVite, take a look at
58 | 
59 | - :doc:`user/command_line`
60 | - :doc:`user/configuration`
61 | - :doc:`user/python`
62 | - :doc:`user/auto`


--------------------------------------------------------------------------------
/doc/source/user/auto.rst:
--------------------------------------------------------------------------------
 1 | Magic of Auto
 2 | =============
 3 | 
 4 | Hyperparameter tuning is usually painful for machine learning practioners. In order
 5 | to help users focus on the most important part, GraphVite provides an auto deduction
 6 | for many hyperparameters. Generally, auto deduction will maximize the speed of the
 7 | system, while keep the performance loss as small as possible.
 8 | 
 9 | To invoke auto deduction, we can simply leave hyperparameters to their default
10 | values. An explicit way is to use ``auto`` in configuration files, or value
11 | ``gv.auto`` in Python.
12 | 
13 | Here lists hyperparameters that support auto deduction.
14 | 
15 | .. code-block:: yaml
16 | 
17 |     resource:
18 |         gpus: []
19 |         gpu_memory_limit: auto
20 |         cpu_per_gpu: auto
21 | 
22 |     build:
23 |         optimizer: auto
24 |         num_partition: auto
25 |         episode_size: auto
26 | 
27 |     train:
28 |         # for node embedding
29 |         augmentation_step: auto
30 | 
31 | .. note::
32 |     The auto value for ``gpus`` is an empty list.


--------------------------------------------------------------------------------
/doc/source/user/command_line.rst:
--------------------------------------------------------------------------------
 1 | Command Line
 2 | ============
 3 | 
 4 | As we have seen in :doc:`../quick_start`, GraphVite can be simply invoked from a
 5 | command line. Here are some other useful commands we can use.
 6 | 
 7 | Reproduce baseline benchmarks
 8 | -----------------------------
 9 | 
10 | .. code-block:: bash
11 | 
12 |     graphvite baseline [keyword ...] [--no-eval] [--gpu n] [--cpu m] [--epoch e]
13 | 
14 | GraphVite provides a large number of baselines on standard datasets. To reproduce
15 | a baseline benchmark, we only need to specify the keywords of the experiment, and
16 | the library will do the rest for us.
17 | 
18 | By default, baselines are configured to use all CPUs and GPUs. We may override this
19 | behavior by specifying the number of GPUs and the number of CPUs per GPU. We may also
20 | override the number of training epochs for fast experiments.
21 | 
22 | For example, the following command line reproduces RotatE model on FB15k dataset,
23 | using 4 GPUs and 12 CPUs.
24 | 
25 | .. code-block:: bash
26 | 
27 |     graphvite baseline rotate fb15k --gpu 4 --cpu 3
28 | 
29 | Use ``graphvite list`` to get a list of available baselines.
30 | 
31 | Run configuration files
32 | -----------------------
33 | 
34 | Custom experiments can be easily carried out in GraphVite through a yaml configuration.
35 | This is especially convenient if we want to use GraphVite as an off-the-shelf tool
36 | for pretraining embeddings.
37 | 
38 | .. code-block:: bash
39 | 
40 |     graphvite new [application ...] [--file f]
41 | 
42 | The above command creates a configuration scaffold for our application, where most
43 | settings are ready. We just need to fill a minimal number of settings following the
44 | instructions. For a more detailed introduction on configuration files, see
45 | :ref:`experiment configuration`.
46 | 
47 | Once we complete the configuration file, we can run it by
48 | 
49 | .. code-block:: bash
50 | 
51 |     graphvite run [config] [--no-eval] [--gpu n] [--cpu m] [--epoch e]
52 | 
53 | Visualize high-dimensional vectors
54 | ----------------------------------
55 | 
56 | .. code-block:: bash
57 | 
58 |     graphvite visualize [file] [--label label_file] [--save save_file] [--perplexity n] [--3d]
59 | 
60 | We can visualize our high-dimensional vectors with a simple command line in
61 | GraphVite.
62 | 
63 | The file can be either a numpy dump ``*.npy`` or a text matrix ``*.txt``. We can
64 | also provide a label file indicating the category of each data point. For the save
65 | file, we recommend to use ``png`` format, while ``pdf`` is also supported.


--------------------------------------------------------------------------------
/doc/source/user/configuration.rst:
--------------------------------------------------------------------------------
  1 | Configuration Files
  2 | ===================
  3 | 
  4 | .. include:: ../link.rst
  5 | 
  6 | .. _experiment configuration:
  7 | 
  8 | Experiment configuration
  9 | ------------------------
 10 | 
 11 | An experiment configuration starts with an ``application type``, and contains settings
 12 | for ``resource``, ``format``, ``graph``, ``build``, ``load``, ``train``, ``evaluate``
 13 | and ``save`` stages.
 14 | 
 15 | Here is the configuration used in :doc:`../quick_start`.
 16 | :download:`quick_start.yaml <../../../config/demo/quick_start.yaml>`
 17 | 
 18 | The stages are configured as follows.
 19 | 
 20 | .. code-block:: yaml
 21 | 
 22 |     application: [type]
 23 | 
 24 | The application type can be ``graph``, ``word graph``, ``knowledge graph`` or
 25 | ``visualization``.
 26 | 
 27 | .. code-block:: yaml
 28 | 
 29 |     resource:
 30 |       gpus: [list of GPU ids]
 31 |       gpu_memory_limit: [limit for each GPU in bytes]
 32 |       cpu_per_gpu: [CPU thread per GPU]
 33 |       dim: [dim]
 34 | 
 35 | .. note::
 36 |     For optimal performance, modules are compiled with pre-defined dimensions in C++.
 37 |     As a drawback, only dimensions that are powers of 2 are supported in the library.
 38 | 
 39 | .. code-block:: yaml
 40 | 
 41 |     format:
 42 |       delimiters: [string of delimiter characters]
 43 |       comment: [prefix of comment strings]
 44 | 
 45 | Format section is optional. By default, delimiters are any blank character and comment
 46 | is "#", following the Python style.
 47 | 
 48 | .. code-block:: yaml
 49 | 
 50 |     graph:
 51 |       file_name: [file name]
 52 |       as_undirected: [symmetrize the graph or not]
 53 | 
 54 | For standard datasets, we can specify its file name by ``<[dataset].[split]>``.
 55 | This would make the configuration file independent of the path.
 56 | 
 57 | .. code-block:: yaml
 58 | 
 59 |     build:
 60 |       optimizer:
 61 |         type: [type]
 62 |         lr: [learning rate]
 63 |         weight_decay: [weight decay]
 64 |         schedule: [learning rate schedule]
 65 |         # and other optimizer-specific configuration
 66 |       num_partition: [number of partitions]
 67 |       num_negative: [number of negative samples]
 68 |       batch_size: [batch size]
 69 |       episode_size: [episode size]
 70 | 
 71 | The number of partitions determines how to deal with multi-GPU or large graph cases.
 72 | The more partitions, the less GPU memory consumption and speed. The episode size
 73 | controls the synchronization frequency across partitions.
 74 | 
 75 | See section 3.2 in `GraphVite paper <GraphVite_>`_  for a detailed illustration.
 76 | 
 77 | .. code-block:: yaml
 78 | 
 79 |     load:
 80 |       file_name: [file name]
 81 | 
 82 | Loading a model is optional.
 83 | 
 84 | .. code-block:: yaml
 85 | 
 86 |     train:
 87 |       model: [model]
 88 |       num_epoch: [number of epochs]
 89 |       resume: [resume training or not]
 90 |       log_frequency: [log frequency in batches]
 91 |       # and other application-specific configuration
 92 | 
 93 | To resume training from a loaded model, set ``resume`` to true in ``train``.
 94 | 
 95 | .. seealso::
 96 |     Training interface:
 97 |     :meth:`Graph <graphvite.solver.GraphSolver.train>`,
 98 |     :meth:`Knowledge graph <graphvite.solver.KnowledgeGraphSolver.train>`,
 99 |     :meth:`Visualization <graphvite.solver.VisualizationSolver.train>`
100 | 
101 | .. code-block:: yaml
102 | 
103 |     evaluate:
104 |       - task: [task]
105 |         # and other task-specific configuration
106 |       - task: [task]
107 |         ...
108 | 
109 | Evaluation is optional. There may be multiple evaluation tasks.
110 | 
111 | .. seealso::
112 |     Evaluation tasks:
113 | 
114 |     - Graph: \
115 |       :meth:`link prediction <graphvite.application.GraphApplication.link_prediction>`,
116 |       :meth:`node classification <graphvite.application.GraphApplication.node_classification>`
117 |     - Knowledge graph:
118 |       :meth:`link prediction <graphvite.application.KnowledgeGraphApplication.link_prediction>`,
119 |       :meth:`entity prediction <graphvite.application.KnowledgeGraphApplication.entity_prediction>`
120 |     - Visualization:
121 |       :meth:`visualization <graphvite.application.VisualizationApplication.visualization>`,
122 |       :meth:`animation <graphvite.application.VisualizationApplication.animation>`,
123 |       :meth:`hierarchy <graphvite.application.VisualizationApplication.hierarchy>`
124 | 
125 | .. code-block:: yaml
126 | 
127 |     save:
128 |         file_name: [file name]
129 |         save_hyperparameter: [save hyperparameters or not]
130 | 
131 | Saving the model is optional.
132 | 
133 | For more detailed settings, we recommend to read the baseline configurations
134 | for concrete examples. They can be found under ``config/`` in the Python package,
135 | or in the `GitHub repository <Repo_>`_.
136 | 
137 | Global configuration
138 | --------------------
139 | 
140 | We can overwrite the global settings of GraphVite in ``~/.graphvite/config.yaml``.
141 | 
142 | .. code-block:: yaml
143 | 
144 |     backend: [graphvite or torch]
145 |     dataset_path: [path to store downloaded datasets]
146 |     float_type: [default float type]
147 |     index_type: [default index type]
148 | 
149 | By default, the evaluation backend is ``graphvite``. The datasets are stored in
150 | ``~/.graphvite/dataset``. The data types are ``float32`` and ``uint32`` respectively.


--------------------------------------------------------------------------------
/doc/source/user/format.rst:
--------------------------------------------------------------------------------
 1 | Data Format
 2 | ===========
 3 | 
 4 | GraphVite is designed to support a wide range of formats for graphs. Generally, it
 5 | doesn't enforce any type restriction on input elements. We can either use integers or
 6 | strings as our input. Each line in the file is parsed as
 7 | 
 8 | .. code-block::
 9 | 
10 |     [token] [delimiter] [token] [delimiter]... [comment]...
11 | 
12 | By default, GraphVite treats any blank character as delimiter, and string after ``#``
13 | as comment. You can change these settings in the
14 | :ref:`format section <experiment configuration>` of configuration files, or using
15 | ``app.set_format(delimiters, comment)`` in Python code.
16 | 
17 | GraphVite can also construct graphs from Python objects, which is helpful if graphs
18 | are dynamically generated. It takes a nested list similar to the file format. Each
19 | token should be a string or a float.
20 | 
21 | .. code-block:: python
22 | 
23 |     graph = [[token, token], [token, token], ...]
24 | 
25 | Node Embedding
26 | --------------
27 | 
28 | The input graph for node embedding follows the edge list format. Each line should be
29 | 
30 | .. code-block::
31 | 
32 |     [head] [tail]
33 | 
34 | You may also specify a weight for each edge.
35 | 
36 | .. code-block::
37 | 
38 |     [head] [tail] [weight]
39 | 
40 | For link prediction task, the evaluation file consists of edges and labels.
41 | 
42 | .. code-block::
43 | 
44 |     [head] [tail] [label]
45 | 
46 | where label ``1`` is positive and ``0`` is negative. The filter file takes the same
47 | format as the input graph.
48 | 
49 | For node classification task, each line is a node and a label. If a node has more
50 | than one label, it should take multiple lines.
51 | 
52 | .. code-block::
53 | 
54 |     [node] [label]
55 | 
56 | Knowledge Graph Embedding
57 | -------------------------
58 | 
59 | Each line in a knowledge graph is a triplet.
60 | 
61 | .. code-block::
62 | 
63 |     [head] [relation] [tail]
64 | 
65 | You may also specify a weight for each triplet.
66 | 
67 | .. code-block::
68 | 
69 |     [head] [relation] [tail] [weight]
70 | 
71 | All the files in knowledge graph evaluation tasks take the same triplet format.
72 | 
73 | Graph & High-dimensional Data Visualization
74 | -------------------------------------------
75 | 
76 | For graph visualization, the input format is same as the graph in node embedding.
77 | 
78 | For high-dimensional data visualization, the input format can either be a 2D numpy
79 | array or a text matrix. Each row in the matrix is parsed as a point in the
80 | high-dimensional space.


--------------------------------------------------------------------------------
/doc/source/user/python.rst:
--------------------------------------------------------------------------------
 1 | Python Interface
 2 | ================
 3 | 
 4 | GraphVite provides Python interface for convenient integration with other software.
 5 | To use GraphVite in Python, import these two modules in our script.
 6 | 
 7 |     >>> import graphvite as gv
 8 |     >>> import graphvite.application as gap
 9 | 
10 | The ``graphvite`` module itself provides basic class interface, such as graphs,
11 | solvers, optimizers and datasets. The ``application`` module contains high-level
12 | wrappers of applications, along with their evaluation routines.
13 | 
14 | Applications
15 | ------------
16 | 
17 | We can invoke a node embedding application with the following lines.
18 | 
19 |     >>> app = gap.GraphApplication(dim=128)
20 |     >>> app.load(file_name=gv.dataset.blogcatalog.train)
21 |     >>> app.build()
22 |     >>> app.train()
23 |     >>> app.evaluate("node classification", file_name=gv.dataset.blogcatalog.label)
24 | 
25 | where the arguments of each member function are identical to those in the
26 | :doc:`configuration files <configuration>`.
27 | 
28 | .. seealso::
29 |     Package reference: :doc:`Application <../api/application>`
30 | 
31 | Basic classes
32 | -------------
33 | 
34 | The basic classes are very helpful if we need fine-grained manipulation of the
35 | pipeline. For example, we may train an ensemble of node embedding models on the
36 | same graph. First, create a graph and two node embedding solvers.
37 | 
38 |     >>> graph = gv.graph.Graph()
39 |     >>> graph.load(gv.dataset.blogcatalog.train)
40 |     >>> solvers = [gv.solver.GraphSolver(dim=128, device_ids=[gpu], num_sampler_per_worker=4)
41 |     ...            for gpu in range(2)]
42 | 
43 | Then, build the solvers on that graph. This step determines all memory allocation.
44 | 
45 |     >>> for solver in solvers:
46 |     >>>     solver.build(graph)
47 | 
48 | Now we can train the solver. The training stage of solvers can be fully paralleled
49 | with multiple threads, since GraphVite never holds Python GIL inside basic classes.
50 | 
51 |     >>> from multiprocessing.pool import ThreadPool
52 |     >>> pool = ThreadPool(2)
53 |     >>> models = ["DeepWalk", "LINE"]
54 |     >>> pool.map(lambda x: x[0].train(x[1]), zip(solvers, models))
55 | 
56 | Finally, obtain the ensembled embeddings.
57 | 
58 |     >>> import numpy as np
59 |     >>> vertex_embeddings = np.hstack([s.vertex_embeddings for s in solvers])
60 |     >>> context_embeddings = np.hstack([s.context_embeddings for s in solvers])
61 | 
62 | Note the embeddings are stored in an internal order. To get an index of a specific
63 | node, use the ``name2id`` property of the graph. For example, the following line
64 | prints the vertex embedding of node "1024".
65 | 
66 |     >>> print(vertex_embeddings[graph.name2id["1024"]])
67 | 
68 | .. seealso::
69 |     Package reference: :doc:`Graph <../api/graph>`, :doc:`Solver <../api/solver>`,
70 |     :doc:`Optimizer <../api/optimizer>`, :doc:`Dataset <../api/dataset>`
71 | 
72 | Logging settings
73 | ----------------
74 | 
75 | GraphVite outputs a bunch of messages during stages like training. We can set the
76 | logging level to dismiss unimportant logs.
77 | 
78 | The following lines suppress most logs except hyperparameters and evaluation results.
79 | The verbose mode additionally prints time tags and thread IDs each log.
80 | 
81 |     >>> import logging
82 |     >>> gv.init_logging(logging.WARNING, verbose=True)
83 | 
84 | Messages can be also redirected to files by specifying a value for the ``dir``
85 | argument.


--------------------------------------------------------------------------------
/external/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/include/base/alias_table.cuh:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu
 17 |  */
 18 | 
 19 | #pragma once
 20 | 
 21 | #include <vector>
 22 | #include <memory>
 23 | #include <queue>
 24 | #include <cuda_runtime.h>
 25 | 
 26 | #include "memory.h"
 27 | 
 28 | namespace graphvite {
 29 | 
 30 | template <class Float = float, class Index = size_t>
 31 | class AliasTable;
 32 | 
 33 | namespace gpu {
 34 | 
 35 | template <class Float, class Index>
 36 | __global__ void Sample(AliasTable<Float, Index> sampler, Memory<double, int> rand, Memory<Index, int> result);
 37 | 
 38 | }
 39 | 
 40 | /**
 41 |  * @brief CPU / GPU implementation of the alias table algorithm
 42 |  *
 43 |  * Generate a sample from any discrete distribution in O(1) time.
 44 |  *
 45 |  * @tparam _Float floating type of probability
 46 |  * @tparam _Index integral type of indexes
 47 |  */
 48 | template <class _Float, class _Index>
 49 | class AliasTable {
 50 | public:
 51 |     typedef _Float Float;
 52 |     typedef _Index Index;
 53 | 
 54 |     static const int kThreadPerBlock = 512;
 55 | 
 56 |     int device_id;
 57 |     Index count;
 58 |     cudaStream_t stream;
 59 |     Memory<Float, Index> prob_table;
 60 |     Memory<Index, Index> alias_table;
 61 | 
 62 |     /** @brief Construct an alias table
 63 |      * @param _device_id GPU id, -1 for CPU
 64 |      * @param _stream CUDA stream
 65 |      */
 66 |     AliasTable(int _device_id, cudaStream_t _stream = 0) :
 67 |             device_id(_device_id), count(0), stream(_stream), prob_table(device_id, 0, stream),
 68 |             alias_table(device_id, 0, stream) {}
 69 | 
 70 |     /** Shallow copy constructor */
 71 |     AliasTable(const AliasTable &a) :
 72 |             device_id(a.device_id), count(a.count), stream(a.stream), prob_table(a.prob_table),
 73 |             alias_table(a.alias_table) {}
 74 | 
 75 |     AliasTable &operator=(const AliasTable &) = delete;
 76 | 
 77 |     /** Reallocate the memory space */
 78 |     void reallocate(Index capacity) {
 79 |         prob_table.reallocate(capacity);
 80 |         alias_table.reallocate(capacity);
 81 |     }
 82 | 
 83 |     /** Initialize the table with a distribution */
 84 |     void build(const std::vector<Float> &_prob_table) {
 85 |         count = _prob_table.size();
 86 |         CHECK(count > 0) << "Invalid sampling distribution";
 87 |         prob_table.resize(count);
 88 |         alias_table.resize(count);
 89 | 
 90 |         memcpy(prob_table.host_ptr, _prob_table.data(), count * sizeof(Float));
 91 |         // single precision may cause considerable trunctation error
 92 |         double norm = 0;
 93 |         for (int i = 0; i < count; i++)
 94 |             norm += prob_table[i];
 95 |         norm = norm / count;
 96 |         for (int i = 0; i < count; i++)
 97 |             prob_table[i] /= norm;
 98 | 
 99 |         std::queue<Index> large, little;
100 |         for (int i = 0; i < count; i++) {
101 |             if (prob_table[i] < 1)
102 |                 little.push(i);
103 |             else
104 |                 large.push(i);
105 |         }
106 |         while (!little.empty() && !large.empty()) {
107 |             Index i = little.front(), j = large.front();
108 |             little.pop();
109 |             large.pop();
110 |             alias_table[i] = j;
111 |             prob_table[j] = prob_table[i] + prob_table[j] - 1;
112 |             if (prob_table[j] < 1)
113 |                 little.push(j);
114 |             else
115 |                 large.push(j);
116 |         }
117 |         // suppress some trunction error
118 |         while (!little.empty()) {
119 |             Index i = little.front();
120 |             little.pop();
121 |             alias_table[i] = i;
122 |         }
123 |         while (!large.empty()) {
124 |             Index i = large.front();
125 |             large.pop();
126 |             alias_table[i] = i;
127 |         }
128 |     }
129 | 
130 |     /** Copy the table to GPU */
131 |     void to_device() {
132 |         prob_table.to_device();
133 |         alias_table.to_device();
134 |     }
135 | 
136 |     /** Copy the table to GPU (asynchronous) */
137 |     void to_device_async() {
138 |         prob_table.to_device_async();
139 |         alias_table.to_device_async();
140 |     }
141 | 
142 |     /** Free GPU memory */
143 |     void clear() {
144 |         reallocate(0);
145 |     }
146 | 
147 |     /** Generate a sample on CPU / GPU */
148 |     __host__ __device__ inline Index sample(double rand1, double rand2) const {
149 |         Index index = rand1 * count;
150 |         Float prob = rand2;
151 |         return prob < prob_table[index] ? index : alias_table[index];
152 |     }
153 | 
154 |     /** Generate a batch of samples on GPU */
155 |     void device_sample(const Memory<double, int> &rand, Memory<Index, int> *result) {
156 |         int block_per_grid = (result->count + kThreadPerBlock - 1) / kThreadPerBlock;
157 |         gpu::Sample<Float, Index><<<block_per_grid, kThreadPerBlock, 0, stream>>>(*this, rand, *result);
158 |     }
159 | 
160 |     /**
161 |      * @param count size of the distribution
162 |      * @return GPU memory cost
163 |      */
164 |     static size_t gpu_memory_demand(int count) {
165 |         size_t demand = 0;
166 |         demand += decltype(prob_table)::gpu_memory_demand(count);
167 |         demand += decltype(alias_table)::gpu_memory_demand(count);
168 |         return demand;
169 |     }
170 | };
171 | 
172 | namespace gpu {
173 | 
174 | template <class Float, class Index>
175 | __global__ void Sample(AliasTable<Float, Index> sampler, Memory<double, int> random, Memory<Index, int> result) {
176 |     int thread_id = blockIdx.x * blockDim.x + threadIdx.x;
177 |     if (thread_id < result.count) {
178 |         Float rand1 = random[thread_id * 2];
179 |         Float rand2 = random[thread_id * 2 + 1];
180 |         result[thread_id] = sampler.sample(rand1, rand2);
181 |     }
182 | }
183 | 
184 | } // namespace gpu
185 | 
186 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/base/memory.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu
 17 |  */
 18 | 
 19 | #pragma once
 20 | 
 21 | #include <utility>
 22 | #include <cuda_runtime.h>
 23 | 
 24 | #include "util/debug.h"
 25 | 
 26 | namespace graphvite {
 27 | 
 28 | /**
 29 |  * @brief CPU / GPU memory space allocator
 30 |  * @tparam _T type of data
 31 |  * @tparam _Index integral type of indexes
 32 |  */
 33 | template<class _T, class _Index = size_t>
 34 | class Memory {
 35 | public:
 36 |     typedef _T Data;
 37 |     typedef _Index Index;
 38 | 
 39 |     int device_id;
 40 |     Index count = 0, capacity = 0;
 41 |     cudaStream_t stream;
 42 |     int *refer_count = nullptr;
 43 |     Data *host_ptr = nullptr, *device_ptr = nullptr;
 44 | 
 45 |     /**
 46 |      * @brief Construct a memory space
 47 |      * @param _device_id GPU id, -1 for CPU
 48 |      * @param _count number of data
 49 |      * @param _stream CUDA stream
 50 |      */
 51 |     Memory(int _device_id, Index _count = 0, cudaStream_t _stream = 0) :
 52 |             device_id(_device_id), stream(_stream) {
 53 |         resize(_count);
 54 |     }
 55 | 
 56 |     /** Shallow copy constructor */
 57 |     Memory(const Memory &m) :
 58 |             device_id(m.device_id), count(m.count), capacity(m.capacity), stream(m.stream), refer_count(m.refer_count),
 59 |             host_ptr(m.host_ptr), device_ptr(m.device_ptr) {
 60 |         if (capacity)
 61 |             (*refer_count)++;
 62 |     }
 63 | 
 64 |     Memory &operator=(const Memory &) = delete;
 65 | 
 66 |     ~Memory() { reallocate(0); }
 67 | 
 68 |     /** Swap two memory spaces */
 69 |     void swap(Memory &m) {
 70 |         std::swap(device_id, m.device_id);
 71 |         std::swap(count, m.count);
 72 |         std::swap(capacity, m.capacity);
 73 |         std::swap(stream, m.stream);
 74 |         std::swap(refer_count, m.refer_count);
 75 |         std::swap(host_ptr, m.host_ptr);
 76 |         std::swap(device_ptr, m.device_ptr);
 77 |     }
 78 | 
 79 |     __host__ __device__ Data &operator[](Index index) {
 80 | #ifdef __CUDA_ARCH__
 81 |         return device_ptr[index];
 82 | #else
 83 |         return host_ptr[index];
 84 | #endif
 85 |     }
 86 | 
 87 |     __host__ __device__ Data &operator[](Index index) const {
 88 | #ifdef __CUDA_ARCH__
 89 |         return device_ptr[index];
 90 | #else
 91 |         return host_ptr[index];
 92 | #endif
 93 |     }
 94 | 
 95 |     /** Copy data from another memory */
 96 |     void copy(const Memory &m) {
 97 |         resize(m.count);
 98 |         memcpy(host_ptr, m.host_ptr, count * sizeof(Data));
 99 |     }
100 | 
101 |     /** Copy data from a pointer */
102 |     void copy(void *ptr, Index _count) {
103 |         resize(_count);
104 |         memcpy(host_ptr, ptr, count * sizeof(Data));
105 |     }
106 | 
107 |     /** Reallocate the memory space */
108 |     void reallocate(Index _capacity) {
109 |         if (capacity && !--(*refer_count)) {
110 |             delete refer_count;
111 | #ifdef PINNED_MEMORY
112 |             CUDA_CHECK(cudaFreeHost(host_ptr));
113 | #else
114 |             delete [] host_ptr;
115 | #endif
116 |             if (device_id != -1) {
117 |                 CUDA_CHECK(cudaSetDevice(device_id));
118 |                 CUDA_CHECK(cudaFree(device_ptr));
119 |             }
120 |         }
121 |         capacity = _capacity;
122 |         if (capacity) {
123 |             refer_count = new int(1);
124 | #ifdef PINNED_MEMORY
125 |             CUDA_CHECK(cudaMallocHost(&host_ptr, capacity * sizeof(Data)));
126 | #else
127 |             host_ptr = new Data[capacity];
128 | #endif
129 |             if (device_id != -1) {
130 |                 CUDA_CHECK(cudaSetDevice(device_id));
131 |                 CUDA_CHECK(cudaMalloc(&device_ptr, capacity * sizeof(Data)));
132 |             }
133 |         }
134 |     }
135 | 
136 |     /** Resize the memory space. Reallocate only if the capacity is not enough. */
137 |     void resize(Index _count) {
138 |         if (_count > capacity || (capacity && *refer_count > 1))
139 |             reallocate(_count);
140 |         count = _count;
141 |     }
142 | 
143 |     /** Copy the memory space to GPU */
144 |     void to_device(Index copy_count = 0) {
145 |         if (count && device_id != -1) {
146 |             if (!copy_count)
147 |                 copy_count = count;
148 |             CUDA_CHECK(cudaSetDevice(device_id));
149 |             CUDA_CHECK(cudaMemcpyAsync(device_ptr, host_ptr, copy_count * sizeof(Data), cudaMemcpyHostToDevice, stream));
150 |             CUDA_CHECK(cudaStreamSynchronize(stream));
151 |         }
152 |     }
153 | 
154 |     /** Copy the memory space to GPU (asynchronous) */
155 |     void to_device_async(Index copy_count = 0) {
156 |         if (count && device_id != -1) {
157 |             if (!copy_count)
158 |                 copy_count = count;
159 |             CUDA_CHECK(cudaSetDevice(device_id));
160 |             CUDA_CHECK(cudaMemcpyAsync(device_ptr, host_ptr, copy_count * sizeof(Data), cudaMemcpyHostToDevice, stream));
161 |         }
162 |     }
163 | 
164 |     /** Copy the memory space back from GPU */
165 |     void to_host(Index copy_count = 0) {
166 |         if (count && device_id != -1) {
167 |             if (!copy_count)
168 |                 copy_count = count;
169 |             CUDA_CHECK(cudaSetDevice(device_id));
170 |             CUDA_CHECK(cudaMemcpyAsync(host_ptr, device_ptr, copy_count * sizeof(Data), cudaMemcpyDeviceToHost, stream));
171 |             CUDA_CHECK(cudaStreamSynchronize(stream));
172 |         }
173 |     }
174 | 
175 |     /** Copy the memory space back from GPU (asynchronous) */
176 |     void to_host_async(Index copy_count = 0) {
177 |         if (count && device_id != -1) {
178 |             if (!copy_count)
179 |                 copy_count = count;
180 |             CUDA_CHECK(cudaSetDevice(device_id));
181 |             CUDA_CHECK(cudaMemcpyAsync(host_ptr, device_ptr, copy_count * sizeof(Data), cudaMemcpyDeviceToHost, stream));
182 |         }
183 |     }
184 | 
185 |     /** Fill the memory space with data. Automatically resize the memory when necessary. */
186 |     void fill(const Data &data, Index _count = 0) {
187 |         if (_count)
188 |             resize(_count);
189 |         for (Index i = 0; i < count; i++)
190 |             host_ptr[i] = data;
191 |     }
192 | 
193 |     /** Gather data from a pool according to an index mapping. Automatically resize the memory when necessary. */
194 |     void gather(const std::vector<Data> &memory, const std::vector<Index> &mapping) {
195 |         if (!mapping.empty()) {
196 |             resize(mapping.size());
197 |             for (Index i = 0; i < count; i++)
198 |                 host_ptr[i] = memory[mapping[i]];
199 |         }
200 |         else {
201 |             resize(memory.size());
202 |             for (Index i = 0; i < count; i++)
203 |                 host_ptr[i] = memory[i];
204 |         }
205 |     }
206 | 
207 |     /** Scatter data to a pool according to an index mapping */
208 |     void scatter(std::vector<Data> &memory, const std::vector<Index> &mapping) {
209 |         if (!mapping.empty()) {
210 |             for (Index i = 0; i < count; i++)
211 |                 memory[mapping[i]] = host_ptr[i];
212 |         }
213 |         else {
214 |             for (Index i = 0; i < count; i++)
215 |                 memory[i] = host_ptr[i];
216 |         }
217 |     }
218 | 
219 |     /** Scatter data to a pool by addition, according to an index mapping */
220 |     void scatter_add(std::vector<Data> &memory, const std::vector<Index> &mapping) {
221 |         if (!mapping.empty()) {
222 |             for (Index i = 0; i < count; i++)
223 |                 memory[mapping[i]] += host_ptr[i];
224 |         }
225 |         else {
226 |             for (Index i = 0; i < count; i++)
227 |                 memory[i] += host_ptr[i];
228 |         }
229 |     }
230 | 
231 |     /** Scatter data to a pool by substraction, according to an index mapping */
232 |     void scatter_sub(std::vector<Data> &memory, const std::vector<Index> &mapping) {
233 |         if (!mapping.empty()) {
234 |             for (Index i = 0; i < count; i++)
235 |                 memory[mapping[i]] -= host_ptr[i];
236 |         }
237 |         else {
238 |             for (Index i = 0; i < count; i++)
239 |                 memory[i] -= host_ptr[i];
240 |         }
241 |     }
242 | 
243 |     /**
244 |      * @param capacity number of data
245 |      * @return GPU memory cost
246 |      */
247 |     static size_t gpu_memory_demand(int capacity) {
248 |         return capacity * sizeof(Data);
249 |     }
250 | };
251 | 
252 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/base/vector.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu
 17 |  */
 18 | 
 19 | #pragma once
 20 | 
 21 | #include <type_traits>
 22 | #include "util/gpu.cuh"
 23 | 
 24 | namespace graphvite {
 25 | 
 26 | /**
 27 |  * @brief Vector computation
 28 |  * @tparam _dim dimension
 29 |  * @tparam _Float floating type of data
 30 |  */
 31 | template<size_t _dim, class _Float = float>
 32 | class Vector {
 33 |      static_assert(std::is_floating_point<_Float>::value, "Vector can only be instantiated with floating point types");
 34 |     // static_assert(_dim % gpu::kWarpSize == 0, "`dim` should be divided by 32");
 35 | public:
 36 |     static const size_t dim = _dim;
 37 |     typedef size_t Index;
 38 |     typedef _Float Float;
 39 |     Float data[dim];
 40 | 
 41 |     /** Default constructor */
 42 |     Vector() = default;
 43 | 
 44 |     /** Construct a vector of repeat scalar */
 45 |     Vector(Float f) {
 46 | #pragma unroll
 47 |         for (Index i = 0; i < dim; i++)
 48 |             data[i] = f;
 49 |     }
 50 | 
 51 |     __host__ __device__ Float &operator[](Index index) {
 52 |         return data[index];
 53 |     }
 54 | 
 55 |     __host__ __device__ Float operator[](Index index) const {
 56 |         return data[index];
 57 |     }
 58 | 
 59 |     __host__ __device__ Vector &operator=(const Vector &v) {
 60 | #if __CUDA_ARCH__
 61 |         using namespace gpu;
 62 |         const int lane_id = threadIdx.x % kWarpSize;
 63 |         for (Index i = lane_id; i < dim; i += kWarpSize)
 64 | #else
 65 |         for (Index i = 0; i < dim; i++)
 66 | #endif
 67 |             data[i] = v[i];
 68 |         return *this;
 69 |     }
 70 | 
 71 |     Vector &operator =(Float f) {
 72 | #pragma unroll
 73 |         for (Index i = 0; i < dim; i++)
 74 |             data[i] = f;
 75 |         return *this;
 76 |     }
 77 | 
 78 |     Vector &operator +=(const Vector &v) {
 79 | #pragma unroll
 80 |         for (Index i = 0; i < dim; i++)
 81 |             data[i] += v[i];
 82 |         return *this;
 83 |     }
 84 | 
 85 | 
 86 |     Vector &operator -=(const Vector &v) {
 87 | #pragma unroll
 88 |         for (Index i = 0; i < dim; i++)
 89 |             data[i] -= v[i];
 90 |         return *this;
 91 |     }
 92 | 
 93 |     Vector &operator *=(const Vector &v) {
 94 | #pragma unroll
 95 |         for (Index i = 0; i < dim; i++)
 96 |             data[i] *= v[i];
 97 |         return *this;
 98 |     }
 99 | 
100 |     Vector &operator /=(const Vector &v) {
101 | #pragma unroll
102 |         for (Index i = 0; i < dim; i++)
103 |             data[i] /= v[i];
104 |         return *this;
105 |     }
106 | 
107 |     Vector &operator +=(Float f) {
108 | #pragma unroll
109 |         for (Index i = 0; i < dim; i++)
110 |             data[i] += f;
111 |         return *this;
112 |     }
113 | 
114 |     Vector &operator -=(Float f) {
115 | #pragma unroll
116 |         for (Index i = 0; i < dim; i++)
117 |             data[i] -= f;
118 |         return *this;
119 |     }
120 | 
121 |     Vector &operator *=(Float f) {
122 | #pragma unroll
123 |         for (Index i = 0; i < dim; i++)
124 |             data[i] *= f;
125 |         return *this;
126 |     }
127 | 
128 |     Vector &operator /=(Float f) {
129 | #pragma unroll
130 |         for (Index i = 0; i < dim; i++)
131 |             data[i] /= f;
132 |         return *this;
133 |     }
134 | 
135 |     Vector operator +(const Vector &v) {
136 |         Vector result;
137 | #pragma unroll
138 |         for (Index i = 0; i < dim; i++)
139 |             result[i] = (*this)[i] + v[i];
140 |         return result;
141 |     }
142 | 
143 |     Vector operator -(const Vector &v) {
144 |         Vector result;
145 | #pragma unroll
146 |         for (Index i = 0; i < dim; i++)
147 |             result[i] = (*this)[i] - v[i];
148 |         return result;
149 |     }
150 | 
151 |     Vector operator *(const Vector &v) {
152 |         Vector result;
153 | #pragma unroll
154 |         for (Index i = 0; i < dim; i++)
155 |             result[i] = (*this)[i] * v[i];
156 |         return result;
157 |     }
158 | 
159 |     Vector operator /(const Vector &v) {
160 |         Vector result;
161 | #pragma unroll
162 |         for (Index i = 0; i < dim; i++)
163 |             result[i] = (*this)[i] / v[i];
164 |         return result;
165 |     }
166 | 
167 |     Vector operator +(Float f) {
168 |         Vector result;
169 | #pragma unroll
170 |         for (Index i = 0; i < dim; i++)
171 |             result[i] = (*this)[i] + f;
172 |         return result;
173 |     }
174 | 
175 |     Vector operator -(Float f) {
176 |         Vector result;
177 | #pragma unroll
178 |         for (Index i = 0; i < dim; i++)
179 |             result[i] = (*this)[i] - f;
180 |         return result;
181 |     }
182 | 
183 |     Vector operator *(Float f) {
184 |         Vector result;
185 | #pragma unroll
186 |         for (Index i = 0; i < dim; i++)
187 |             result[i] = (*this)[i] * f;
188 |         return result;
189 |     }
190 | 
191 |     Vector operator /(Float f) {
192 |         Vector result;
193 | #pragma unroll
194 |         for (Index i = 0; i < dim; i++)
195 |             result[i] = (*this)[i] / f;
196 |         return result;
197 |     }
198 | 
199 |     friend Vector operator +(Float f, const Vector &v) {
200 |         Vector result;
201 | #pragma unroll
202 |         for (Index i = 0; i < dim; i++)
203 |             result[i] = v[i] + f;
204 |         return result;
205 |     }
206 | 
207 |     friend Vector operator -(Float f, const Vector &v) {
208 |         Vector result;
209 | #pragma unroll
210 |         for (Index i = 0; i < dim; i++)
211 |             result[i] = v[i] - f;
212 |         return result;
213 |     }
214 | 
215 |     friend Vector operator *(Float f, const Vector &v) {
216 |         Vector result;
217 | #pragma unroll
218 |         for (Index i = 0; i < dim; i++)
219 |             result[i] = v[i] * f;
220 |         return result;
221 |     }
222 | };
223 | 
224 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/core/graph.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu
 17 |  */
 18 | 
 19 | #pragma once
 20 | 
 21 | #define FILE_OFFSET_BITS 64
 22 | #include <cstdio>
 23 | #undef FILE_OFFSET_BITS
 24 | 
 25 | #include <tuple>
 26 | #include <string>
 27 | #include <vector>
 28 | #include <glog/logging.h>
 29 | 
 30 | #include "util/common.h"
 31 | #include "util/debug.h"
 32 | 
 33 | namespace graphvite {
 34 | 
 35 | /**
 36 |  * @brief General interface of graphs
 37 |  * @tparam _Index integral type of node indexes
 38 |  * @tparam _Attributes types of additional edge attributes
 39 |  *
 40 |  * @note To add a new graph, you need to
 41 |  * - derive a template graph class from GraphMixin
 42 |  * - implement all virtual functions for that class
 43 |  * - add python binding of instantiations of that class in extension.h & extension.cu
 44 |  */
 45 | template<class _Index, class ..._Attributes>
 46 | class GraphMixin {
 47 | public:
 48 |     typedef _Index Index;
 49 |     typedef std::tuple<Index, float, _Attributes...> VertexEdge;
 50 |     typedef std::tuple<Index, Index, float, _Attributes...> Edge;
 51 | 
 52 |     std::vector<std::vector<VertexEdge>> vertex_edges;
 53 |     std::vector<Edge> edges;
 54 |     std::vector<float> vertex_weights, edge_weights;
 55 |     std::vector<size_t> flat_offsets;
 56 | 
 57 |     Index num_vertex;
 58 |     size_t num_edge;
 59 | 
 60 | #define USING_GRAPH_MIXIN(type) \
 61 |     using typename type::VertexEdge; \
 62 |     using typename type::Edge; \
 63 |     using type::vertex_edges; \
 64 |     using type::edges; \
 65 |     using type::vertex_weights; \
 66 |     using type::edge_weights; \
 67 |     using type::num_vertex; \
 68 |     using type::num_edge; \
 69 |     using type::info
 70 | 
 71 |     GraphMixin() = default;
 72 |     GraphMixin(const GraphMixin &) = delete;
 73 |     GraphMixin &operator=(const GraphMixin &) = delete;
 74 | 
 75 |     /** Clear the graph and free CPU memory */
 76 |     virtual void clear() {
 77 |         num_vertex = 0;
 78 |         num_edge = 0;
 79 |         decltype(vertex_edges)().swap(vertex_edges);
 80 |         decltype(edges)().swap(edges);
 81 |         decltype(vertex_weights)().swap(vertex_weights);
 82 |         decltype(edge_weights)().swap(edge_weights);
 83 |         decltype(flat_offsets)().swap(flat_offsets);
 84 |     }
 85 | 
 86 |     /** Flatten the adjacency list to an edge list */
 87 |     virtual void flatten() {
 88 |         if (!edges.empty())
 89 |             return;
 90 | 
 91 |         size_t offset = 0;
 92 |         flat_offsets.resize(num_vertex);
 93 |         for (Index u = 0; u < num_vertex; u++) {
 94 |             for (auto &&vertex_edge : vertex_edges[u]) {
 95 |                 edges.push_back(std::tuple_cat(std::tie(u), vertex_edge));
 96 |                 edge_weights.push_back(std::get<1>(vertex_edge));
 97 |             }
 98 |             flat_offsets[u] = offset;
 99 |             offset += vertex_edges[u].size();
100 |         }
101 |     }
102 | 
103 |     virtual inline std::string name() const {
104 |         std::stringstream ss;
105 |         ss << "GraphMixin<" << pretty::type2name<Index>();
106 |         auto _ = {0, (ss << ", " << pretty::type2name<_Attributes>(), 0)...};
107 |         ss << ">";
108 |         return ss.str();
109 |     }
110 | 
111 |     virtual inline std::string graph_info() const {
112 |         std::stringstream ss;
113 |         ss << "#vertex: " << num_vertex << ", #edge: " << num_edge;
114 |         return ss.str();
115 |     }
116 | 
117 |     /** Return information about the graph */
118 |     std::string info() {
119 |         std::stringstream ss;
120 |         ss << name() << std::endl;
121 |         ss << pretty::header("Graph") << std::endl;
122 |         ss << graph_info();
123 |         return ss.str();
124 |     }
125 | };
126 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/instance/model/graph.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu, Shizhen Xu
 17 |  */
 18 | 
 19 | #pragma once
 20 | 
 21 | #include "core/optimizer.h"
 22 | #include "util/gpu.cuh"
 23 | 
 24 | namespace graphvite {
 25 | 
 26 | /**
 27 |  * @brief LINE model
 28 |  * @tparam _Vector vector type of embeddings
 29 |  *
 30 |  * Forward: dot(vertex, context)
 31 |  * Backward: gradient of forward function
 32 |  */
 33 | template<class _Vector>
 34 | class LINE {
 35 | public:
 36 |     static const size_t dim = _Vector::dim;
 37 |     typedef _Vector Vector;
 38 |     typedef typename _Vector::Float Float;
 39 | 
 40 |     __host__ __device__ static void forward(const Vector &vertex, const Vector &context, Float &output) {
 41 |         output = 0;
 42 |         FOR(i, dim)
 43 |             output += vertex[i] * context[i];
 44 |         output = SUM(output);
 45 |     }
 46 | 
 47 |     template<OptimizerType optimizer_type>
 48 |     __host__ __device__
 49 |     static void backward(Vector &vertex, Vector &context,
 50 |                          Float gradient, const Optimizer &optimizer, Float weight = 1) {
 51 |         auto update = get_update_function < Float, optimizer_type>();
 52 |         FOR(i, dim) {
 53 |             Float v = vertex[i];
 54 |             Float c = context[i];
 55 |             vertex[i] -= (optimizer.*update)(v, gradient * c, weight);
 56 |             context[i] -= (optimizer.*update)(c, gradient * v, weight);
 57 |         }
 58 |     }
 59 | 
 60 |     template<OptimizerType optimizer_type>
 61 |     __host__ __device__
 62 |     static void backward(Vector &vertex, Vector &context, Vector &vertex_moment1, Vector &context_moment1,
 63 |                          Float gradient, const Optimizer &optimizer, Float weight = 1) {
 64 |         auto update = get_update_function_1_moment < Float, optimizer_type>();
 65 |         FOR(i, dim) {
 66 |             Float v = vertex[i];
 67 |             Float c = context[i];
 68 |             vertex[i] -= (optimizer.*update)(v, gradient * c, vertex_moment1[i], weight);
 69 |             context[i] -= (optimizer.*update)(c, gradient * v, context_moment1[i], weight);
 70 |         }
 71 |     }
 72 | 
 73 |     template<OptimizerType optimizer_type>
 74 |     __host__ __device__
 75 |     static void backward(Vector &vertex, Vector &context, Vector &vertex_moment1, Vector &context_moment1,
 76 |                          Vector &vertex_moment2, Vector &context_moment2,
 77 |                          Float gradient, const Optimizer &optimizer, Float weight = 1) {
 78 |         auto update = get_update_function_2_moment < Float, optimizer_type>();
 79 |         FOR(i, dim) {
 80 |             Float v = vertex[i];
 81 |             Float c = context[i];
 82 |             vertex[i] -= (optimizer.*update)(v, gradient * c, vertex_moment1[i], vertex_moment2[i], weight);
 83 |             context[i] -= (optimizer.*update)(c, gradient * v, context_moment1[i], context_moment2[i], weight);
 84 |         }
 85 |     }
 86 | };
 87 | 
 88 | /**
 89 |  * @brief DeepWalk model
 90 |  * @tparam _Vector vector type of embeddings
 91 |  *
 92 |  * Forward: dot(vertex, context)
 93 |  * Backward: gradient of forward function
 94 |  */
 95 | template<class _Vector>
 96 | class DeepWalk : public LINE<_Vector> {};
 97 | 
 98 | /**
 99 |  * @brief node2vec model
100 |  * @tparam _Vector vector type of embeddings
101 |  *
102 |  * Forward: dot(vertex, context)
103 |  * Backward: gradient of forward function
104 |  */
105 | template<class _Vector>
106 | class Node2Vec : public LINE<_Vector> {};
107 | 
108 | }


--------------------------------------------------------------------------------
/include/instance/model/visualization.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * @author Zhaocheng Zhu
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include "core/optimizer.h"
22 | #include "util/gpu.cuh"
23 | 
24 | namespace graphvite {
25 | 
26 | /**
27 |  * @brief LargeVis model
28 |  * @tparam _Vector vector type of embeddings
29 |  *
30 |  * Forward: L2_norm(head - tail) ^ 2
31 |  * Backward: gradient of forward function
32 |  */
33 | template<class _Vector>
34 | class LargeVis {
35 | public:
36 |     static const size_t dim = _Vector::dim;
37 |     typedef _Vector Vector;
38 |     typedef typename _Vector::Float Float;
39 | 
40 |     __host__ __device__
41 |     static void forward(const Vector &head, const Vector &tail, Float &output) {
42 |         output = 0;
43 |         FOR(i, dim)
44 |             output += (head[i] - tail[i]) * (head[i] - tail[i]);
45 |         output = SUM(output);
46 |     }
47 | 
48 |     template<OptimizerType optimizer_type>
49 |     __host__ __device__
50 |     static void backward(Vector &head, Vector &tail, Float gradient, const Optimizer &optimizer, Float weight = 1) {
51 |         auto update = get_update_function<Float, optimizer_type>();
52 |         FOR(i, dim) {
53 |             Float h = head[i];
54 |             Float t = tail[i];
55 |             head[i] -= (optimizer.*update)(h, gradient * (h - t), weight);
56 |             tail[i] -= (optimizer.*update)(t, gradient * (t - h), weight);
57 |         }
58 |     }
59 | 
60 |     template<OptimizerType optimizer_type>
61 |     __host__ __device__
62 |     static void backward(Vector &head, Vector &tail, Vector &head_moment1, Vector &tail_moment1,
63 |                          Float gradient, const Optimizer &optimizer, Float weight = 1) {
64 |         auto update = get_update_function_1_moment<Float, optimizer_type>();
65 |         FOR(i, dim) {
66 |             Float h = head[i];
67 |             Float t = tail[i];
68 |             head[i] -= (optimizer.*update)(h, gradient * (h - t), head_moment1[i], weight);
69 |             tail[i] -= (optimizer.*update)(t, gradient * (t - h), tail_moment1[i], weight);
70 |         }
71 |     }
72 | 
73 |     template<OptimizerType optimizer_type>
74 |     __host__ __device__
75 |     static void backward(Vector &head, Vector &tail, Vector &head_moment1, Vector &tail_moment1,
76 |                          Vector &head_moment2, Vector &tail_moment2,
77 |                          Float gradient, const Optimizer &optimizer, Float weight = 1) {
78 |         auto update = get_update_function_2_moment<Float, optimizer_type>();
79 |         FOR(i, dim) {
80 |             Float h = head[i];
81 |             Float t = tail[i];
82 |             head[i] -= (optimizer.*update)(h, gradient * (h - t), head_moment1[i], head_moment2[i], weight);
83 |             tail[i] -= (optimizer.*update)(t, gradient * (t - h), tail_moment1[i], tail_moment2[i], weight);
84 |         }
85 |     }
86 | };
87 | 
88 | }


--------------------------------------------------------------------------------
/include/util/common.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * @author Zhaocheng Zhu
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include "io.h"
22 | #include "math.h"
23 | 
24 | namespace graphvite {
25 | 
26 | #define DEPRECATED(reason) __attribute__ ((deprecated(reason)))
27 | 
28 | const float kEpsilon = 1e-15;
29 | const int kAuto = 0;
30 | const size_t kMaxLineLength = 1 << 22;
31 | 
32 | constexpr size_t KiB(size_t x) {
33 |     return x << 10;
34 | }
35 | 
36 | constexpr size_t MiB(size_t x) {
37 |     return x << 20;
38 | }
39 | 
40 | constexpr size_t GiB(size_t x) {
41 |     return x << 30;
42 | }
43 | 
44 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/util/debug.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * @author Zhaocheng Zhu
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <curand.h>
22 | #include <cuda_runtime.h>
23 | #include <glog/logging.h>
24 | 
25 | namespace graphvite {
26 | 
27 | #define CUDA_CHECK(error) CudaCheck((error), __FILE__, __LINE__)
28 | #define CURAND_CHECK(error) CurandCheck((error), __FILE__, __LINE__)
29 | 
30 | inline void CudaCheck(cudaError_t error, const char *file_name, int line) {
31 |     CHECK(error == cudaSuccess)
32 |             << "CUDA error " << cudaGetErrorString(error) << " at " << file_name << ":" << line;
33 | }
34 | 
35 | inline void CurandCheck(curandStatus_t error, const char *file_name, int line) {
36 |     CHECK(error == CURAND_STATUS_SUCCESS)
37 |             << "CURAND error " << error << " at " << file_name << ":" << line;
38 | }
39 | 
40 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/util/gpu.cuh:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * @author Zhaocheng Zhu, Shizhen Xu
17 |  */
18 | 
19 | #pragma once
20 | 
21 | namespace graphvite {
22 | 
23 | // helper macros for CPU-GPU agnostic code
24 | #if __CUDA_ARCH__
25 | 
26 | #define FOR(i, stop) \
27 |     const int lane_id = threadIdx.x % gpu::kWarpSize; \
28 |     for (int i = lane_id; i < (stop); i += gpu::kWarpSize)
29 | #define SUM(x) gpu::WarpBroadcast(gpu::WarpReduce(x), 0)
30 | 
31 | #else
32 | 
33 | #define FOR(i, stop) \
34 |     for (int i = 0; i < stop; i++)
35 | #define SUM(x) (x)
36 | 
37 | #endif
38 | 
39 | namespace gpu {
40 | 
41 | const int kBlockPerGrid = 8192;
42 | const int kThreadPerBlock = 512;
43 | const int kWarpSize = 32;
44 | const unsigned kFullMask = 0xFFFFFFFF;
45 | 
46 | template<class T>
47 | __device__ T WarpReduce(T value) {
48 | #pragma unroll
49 |     for (int delta = 1; delta < kWarpSize; delta *= 2)
50 | #if __CUDACC_VER_MAJOR__ >= 9
51 |         value += __shfl_down_sync(kFullMask, value, delta);
52 | #else
53 |         value += __shfl_down(value, delta);
54 | #endif
55 |     return value;
56 | }
57 | 
58 | template<class T>
59 | __device__ T WarpBroadcast(T value, int lane_id) {
60 | #if __CUDACC_VER_MAJOR__ >= 9
61 |     return __shfl_sync(kFullMask, value, lane_id);
62 | #else
63 |     return __shfl(value, lane_id);
64 | #endif
65 | }
66 | 
67 | } // namespace gpu
68 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/util/io.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu
 17 |  */
 18 | 
 19 | #pragma once
 20 | 
 21 | #include <sstream>
 22 | #include <glog/logging.h>
 23 | 
 24 | namespace graphvite {
 25 | 
 26 | void init_logging(int threshold = google::INFO, std::string dir = "", bool verbose = false) {
 27 |     static bool initialized = false;
 28 | 
 29 |     FLAGS_minloglevel = threshold;
 30 |     if (dir == "")
 31 |         FLAGS_logtostderr = true;
 32 |     else
 33 |         FLAGS_log_dir = dir;
 34 |     FLAGS_log_prefix = verbose;
 35 |     if (!initialized) {
 36 |         google::InitGoogleLogging("graphvite");
 37 |         initialized = true;
 38 |     }
 39 | }
 40 | 
 41 | namespace pretty {
 42 | 
 43 | template <class T>
 44 | std::string type2name();
 45 | 
 46 | template <>
 47 | std::string type2name<float>() { return "float32"; }
 48 | 
 49 | template <>
 50 | std::string type2name<double>() { return "float64"; }
 51 | 
 52 | template <>
 53 | std::string type2name<int>() { return "int32"; }
 54 | 
 55 | template <>
 56 | std::string type2name<unsigned int>() { return "uint32"; }
 57 | 
 58 | template <>
 59 | std::string type2name<long long>() { return "int64"; }
 60 | 
 61 | template <>
 62 | std::string type2name<unsigned long long>() { return "uint64"; }
 63 | 
 64 | std::string yes_no(bool x) {
 65 |     return x ? "yes" : "no";
 66 | }
 67 | 
 68 | std::string size_string(size_t size) {
 69 |     std::stringstream ss;
 70 |     ss.precision(3);
 71 |     if (size >= 1 << 30)
 72 |         ss << size / float(1 << 30) << " GiB";
 73 |     else if (size >= 1 << 20)
 74 |         ss << size / float(1 << 20) << " MiB";
 75 |     else if (size >= 1 << 10)
 76 |         ss << size / float(1 << 10) << " KiB";
 77 |     else
 78 |         ss << size << " B";
 79 |     return ss.str();
 80 | }
 81 | 
 82 | const size_t kLineWidth = 44;
 83 | std::string begin(kLineWidth, '<');
 84 | std::string end(kLineWidth, '>');
 85 | 
 86 | inline std::string block(const std::string &content) {
 87 |     std::stringstream ss;
 88 |     ss << begin << std::endl;
 89 |     ss << content << std::endl;
 90 |     ss << end << std::endl;
 91 |     return ss.str();
 92 | }
 93 | 
 94 | inline std::string header(const std::string &content) {
 95 |     std::stringstream ss;
 96 |     size_t padding = kLineWidth - content.length() - 2;
 97 |     std::string line(padding / 2, '-');
 98 |     ss << line << " " << content << " " << line;
 99 |     if (padding % 2 == 1)
100 |         ss << '-';
101 |     return ss.str();
102 | }
103 | 
104 | } // namespace pretty
105 | } // namespace graphvite


--------------------------------------------------------------------------------
/include/util/math.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * @author Zhaocheng Zhu
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <algorithm>
22 | #include <type_traits>
23 | 
24 | namespace graphvite {
25 | 
26 | #ifndef __CUDA_ARCH__
27 | using std::abs; // the template version of abs()
28 | #endif
29 | 
30 | template<class Float>
31 | __host__ __device__ Float sigmoid(Float x) {
32 |     return x > 0 ? 1 / (1 + exp(-x)) : exp(x) / (exp(x) + 1);
33 | }
34 | 
35 | template<class Float>
36 | __host__ __device__ Float safe_exp(Float x);
37 | 
38 | template<>
39 | __host__ __device__ float safe_exp(float x) {
40 |     static const float kLogitClip = 80;
41 | #if __CUDA_ARCH__
42 |     return exp(min(max(x, -kLogitClip), kLogitClip));
43 | #else
44 |     return std::exp(std::min(std::max(x, -kLogitClip), kLogitClip));
45 | #endif
46 | }
47 | 
48 | template<>
49 | __host__ __device__ double safe_exp(double x) {
50 |     static const double kLogitClip = 700;
51 | #if __CUDA_ARCH__
52 |     return exp(min(max(x, -kLogitClip), kLogitClip));
53 | #else
54 |     return std::exp(std::min(std::max(x, -kLogitClip), kLogitClip));
55 | #endif
56 | }
57 | 
58 | template<class Integer>
59 | __host__ __device__ Integer bit_floor(Integer x) {
60 |     static_assert(std::is_integral<Integer>::value, "bit_floor() can only be invoked with integral types");
61 | #pragma unroll
62 |     for (int i = 1; i < sizeof(Integer) * 8; i *= 2)
63 |         x |= x >> i;
64 |     return (x + 1) >> 1;
65 | }
66 | 
67 | template<class Integer>
68 | __host__ __device__ Integer bit_ceil(Integer x) {
69 |     static_assert(std::is_integral<Integer>::value, "bit_ceil() can only be invoked with integral types");
70 |     x--;
71 | #pragma unroll
72 |     for (int i = 1; i < sizeof(Integer) * 8; i *= 2)
73 |         x |= x >> i;
74 |     return x + 1;
75 | }
76 | 
77 | } // namespace graphvie


--------------------------------------------------------------------------------
/include/util/time.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  *
16 |  * @author Zhaocheng Zhu
17 |  */
18 | 
19 | #pragma once
20 | 
21 | #include <string>
22 | #include <chrono>
23 | #include <unordered_map>
24 | #include <glog/logging.h>
25 | 
26 | namespace graphvite {
27 | 
28 | #ifdef USE_TIMER
29 | class Timer {
30 | public:
31 | 	typedef std::chrono::system_clock::time_point time_point;
32 | 	typedef std::chrono::high_resolution_clock clock;
33 | 
34 | 	static std::unordered_map<std::string, int> occurrence;
35 | 
36 | 	const char *prompt;
37 | 	int log_frequency;
38 | 	time_point start;
39 | 
40 | 	Timer(const char *_prompt, int _log_frequency = 1)
41 | 		: prompt(_prompt), log_frequency(_log_frequency), start(clock::now()) {
42 | 		if (occurrence.find(prompt) == occurrence.end())
43 | 			occurrence[prompt] = 0;
44 | 	}
45 | 
46 | 	~Timer() {
47 | 		time_point end = clock::now();
48 | 		LOG_IF(INFO, ++occurrence[prompt] == 1) << prompt << ": " << (end - start).count() / 1.0e6 << " ms";
49 | 		occurrence[prompt] %= log_frequency;
50 | 	}
51 | };
52 | 
53 | std::unordered_map<std::string, int> Timer::occurrence;
54 | #else
55 | class Timer {
56 | public:
57 |     template<class... Args>
58 |     Timer(const Args &...args) {}
59 | };
60 | #endif
61 | 
62 | } // namespace graphvite


--------------------------------------------------------------------------------
/python/graphvite/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | """GraphVite: graph embedding at high speed and large scale"""
18 | from __future__ import absolute_import, unicode_literals
19 | 
20 | import os
21 | import sys
22 | import imp
23 | import logging
24 | 
25 | from . import util
26 | 
27 | package_path = os.path.dirname(__file__)
28 | candidate_paths = [
29 |     os.path.realpath(os.path.join(package_path, "lib")),
30 |     os.path.realpath(os.path.join(package_path, "../../lib")),
31 |     os.path.realpath(os.path.join(package_path, "../../build/lib"))
32 | ]
33 | lib_file = imp.find_module("libgraphvite", candidate_paths)[1]
34 | lib_path = os.path.dirname(lib_file)
35 | with util.chdir(lib_path):
36 |     lib = imp.load_dynamic("libgraphvite", lib_file)
37 | 
38 | from libgraphvite import dtype, auto, __version__
39 | 
40 | from . import base
41 | from .base import init_logging
42 | cfg = base.load_global_config()
43 | base.init_logging(logging.INFO)
44 | 
45 | from . import helper
46 | from . import graph, solver, optimizer
47 | from . import dataset
48 | 
49 | module = sys.modules[__name__]
50 | module.__dict__.update(dtype.__members__)


--------------------------------------------------------------------------------
/python/graphvite/application/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | """Application module of GraphVite"""
18 | from __future__ import absolute_import
19 | 
20 | from .application import Application, \
21 |     GraphApplication, WordGraphApplication, KnowledgeGraphApplication, VisualizationApplication
22 | 
23 | __all__ = [
24 |     "Application",
25 |     "GraphApplication", "WordGraphApplication", "KnowledgeGraphApplication", "VisualizationApplication"
26 | ]


--------------------------------------------------------------------------------
/python/graphvite/application/network.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 MilaGraph. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # Author: Zhaocheng Zhu
 16 | 
 17 | """Neural network definitions for applications"""
 18 | from __future__ import absolute_import
 19 | 
 20 | import types
 21 | import numpy as np
 22 | 
 23 | import torch
 24 | from torch import nn
 25 | 
 26 | 
 27 | class NodeClassifier(nn.Module):
 28 |     """
 29 |     Node classification network for graphs
 30 |     """
 31 |     def __init__(self, embedding, num_class, normalization=False):
 32 |         super(NodeClassifier, self).__init__()
 33 |         if normalization:
 34 |             embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True)
 35 |         embedding = torch.as_tensor(embedding)
 36 |         self.embeddings = nn.Embedding.from_pretrained(embedding, freeze=True)
 37 |         self.linear = nn.Linear(embedding.size(1), num_class, bias=True)
 38 | 
 39 |     def forward(self, indexes):
 40 |         x = self.embeddings(indexes)
 41 |         x = self.linear(x)
 42 |         return x
 43 | 
 44 | 
 45 | class LinkPredictor(nn.Module):
 46 |     """
 47 |     Link prediction network for graphs / knowledge graphs
 48 |     """
 49 |     def __init__(self, score_function, *embeddings, **kwargs):
 50 |         super(LinkPredictor, self).__init__()
 51 |         if isinstance(score_function, types.FunctionType):
 52 |             self.score_function = score_function
 53 |         else:
 54 |             self.score_function = getattr(LinkPredictor, score_function)
 55 |         self.kwargs = kwargs
 56 |         self.embeddings = nn.ModuleList()
 57 |         for embedding in embeddings:
 58 |             embedding = torch.as_tensor(embedding)
 59 |             embedding = nn.Embedding.from_pretrained(embedding, freeze=True)
 60 |             self.embeddings.append(embedding)
 61 | 
 62 |     def forward(self, *indexes):
 63 |         assert len(indexes) == len(self.embeddings)
 64 |         vectors = []
 65 |         for index, embedding in zip(indexes, self.embeddings):
 66 |             vectors.append(embedding(index))
 67 |         return self.score_function(*vectors, **self.kwargs)
 68 | 
 69 |     @staticmethod
 70 |     def LINE(heads, tails):
 71 |         x = heads * tails
 72 |         score = x.sum(dim=1)
 73 |         return score
 74 | 
 75 |     DeepWalk = LINE
 76 | 
 77 |     @staticmethod
 78 |     def TransE(heads, relations, tails, margin=12):
 79 |         x = heads + relations - tails
 80 |         score = margin - x.norm(p=1, dim=1)
 81 |         return score
 82 | 
 83 |     @staticmethod
 84 |     def RotatE(heads, relations, tails, margin=12):
 85 |         dim = heads.size(1) // 2
 86 | 
 87 |         head_re, head_im = heads.view(-1, dim, 2).permute(2, 0, 1)
 88 |         tail_re, tail_im = tails.view(-1, dim, 2).permute(2, 0, 1)
 89 |         relations = relations[:, :dim]
 90 |         relation_re, relation_im = torch.cos(relations), torch.sin(relations)
 91 | 
 92 |         x_re = head_re * relation_re - head_im * relation_im - tail_re
 93 |         x_im = head_re * relation_im + head_im * relation_re - tail_im
 94 |         x = torch.stack([x_re, x_im], dim=0)
 95 |         score = margin - x.norm(p=2, dim=0).sum(dim=1)
 96 |         return score
 97 | 
 98 |     @staticmethod
 99 |     def DistMult(heads, relations, tails):
100 |         x = heads * relations * tails
101 |         score = x.sum(dim=1)
102 |         return score
103 | 
104 |     @staticmethod
105 |     def ComplEx(heads, relations, tails):
106 |         dim = heads.size(1) // 2
107 | 
108 |         head_re, head_im = heads.view(-1, dim, 2).permute(2, 0, 1)
109 |         tail_re, tail_im = tails.view(-1, dim, 2).permute(2, 0, 1)
110 |         relation_re, relation_im = relations.view(-1, dim, 2).permute(2, 0, 1)
111 | 
112 |         x_re = head_re * relation_re - head_im * relation_im
113 |         x_im = head_re * relation_im + head_im * relation_re
114 |         x = x_re * tail_re + x_im * tail_im
115 |         score = x.sum(dim=1)
116 |         return score
117 | 
118 |     @staticmethod
119 |     def SimplE(heads, relations, tails):
120 |         dim = heads.size(1) // 2
121 | 
122 |         tails = tails.view(-1, dim, 2).flip(2).view(-1, dim * 2)
123 | 
124 |         x = heads * relations * tails
125 |         score = x.sum(dim=1)
126 |         return score
127 | 
128 |     @staticmethod
129 |     def QuatE(heads, relations, tails):
130 |         dim = heads.size(1) // 4
131 | 
132 |         head_r, head_i, head_j, head_k = heads.view(-1, dim, 4).permute(2, 0, 1)
133 |         tail_r, tail_i, tail_j, tail_k = tails.view(-1, dim, 4).permute(2, 0, 1)
134 |         relation_r, relation_i, relation_j, relation_k = relations.view(-1, dim, 4).permute(2, 0, 1)
135 | 
136 |         relation_norm = relations.view(-1, dim, 4).norm(p=2, dim=2)
137 |         x_r = head_r * relation_r - head_i * relation_i - head_j * relation_j - head_k * relation_k
138 |         x_i = head_r * relation_i + head_i * relation_r + head_j * relation_k - head_k * relation_j
139 |         x_j = head_r * relation_j - head_i * relation_k + head_j * relation_r + head_k * relation_i
140 |         x_k = head_r * relation_k + head_i * relation_j - head_j * relation_i + head_k * relation_r
141 |         x = (x_r * tail_r + x_i * tail_i + x_j * tail_j + x_k * tail_k) / (relation_norm + 1e-15)
142 |         score = x.sum(dim=1)
143 |         return score


--------------------------------------------------------------------------------
/python/graphvite/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | from __future__ import absolute_import
18 | 
19 | import os
20 | import sys
21 | import yaml
22 | import logging
23 | from easydict import EasyDict
24 | 
25 | from . import lib, dtype
26 | from .util import recursive_default, assert_in
27 | 
28 | 
29 | root = os.path.expanduser("~/.graphvite")
30 | if not os.path.exists(root):
31 |     os.mkdir(root)
32 | 
33 | # default config
34 | default = EasyDict()
35 | default.backend = "graphvite"
36 | default.dataset_path = os.path.join(root, "dataset")
37 | default.float_type = dtype.float32
38 | default.index_type = dtype.uint32
39 | 
40 | 
41 | def load_global_config():
42 |     config_file = os.path.join(root, "config.yaml")
43 |     if os.path.exists(config_file):
44 |         with open(config_file, "r") as fin:
45 |             cfg = EasyDict(yaml.safe_load(fin))
46 |         cfg = recursive_default(cfg, default)
47 |     else:
48 |         cfg = default
49 | 
50 |     assert_in(["graphvite", "torch"], backend=cfg.backend)
51 |     if not os.path.exists(cfg.dataset_path):
52 |         os.mkdir(cfg.dataset_path)
53 |     if isinstance(cfg.float_type, str):
54 |         cfg.float_type = eval(cfg.float_type)
55 |     if isinstance(cfg.index_type, str):
56 |         cfg.index_type = eval(cfg.index_type)
57 | 
58 |     return cfg
59 | 
60 | 
61 | def init_logging(level=logging.INFO, dir="", verbose=False):
62 |     """
63 |     Init logging.
64 | 
65 |     Parameters:
66 |         level (int, optional): logging level, INFO, WARNING, ERROR or FATAL
67 |         dir (str, optional): log directory, leave empty for standard I/O
68 |         verbose (bool, optional): verbose mode
69 |     """
70 |     logger = logging.getLogger(__package__)
71 |     logger.level = level
72 |     if dir == "":
73 |         logger.handlers = [logging.StreamHandler(sys.stdout)]
74 |     else:
75 |         logger.handlers = [logging.FileHandler(os.path.join(dir, "log.txt"))]
76 | 
77 |     if level <= logging.INFO:
78 |         lib.init_logging(lib.INFO, dir, verbose)
79 |     elif level <= logging.WARNING:
80 |         lib.init_logging(lib.WARNING, dir, verbose)
81 |     elif level <= logging.ERROR:
82 |         lib.init_logging(lib.ERROR, dir, verbose)
83 |     else:
84 |         lib.init_logging(lib.FATAL, dir, verbose)


--------------------------------------------------------------------------------
/python/graphvite/graph.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | """Graph module of GraphVite"""
18 | from __future__ import absolute_import
19 | 
20 | import sys
21 | 
22 | from . import lib, cfg
23 | from .helper import find_all_templates, make_helper_class
24 | 
25 | module = sys.modules[__name__]
26 | 
27 | for name in find_all_templates(lib.graph):
28 |     module.__dict__[name] = make_helper_class(lib.graph, name, module,
29 |                                               ["index_type"], [cfg.index_type])
30 | 
31 | __all__ = [
32 |     "Graph", "WordGraph", "KnowledgeGraph", "KNNGraph"
33 | ]


--------------------------------------------------------------------------------
/python/graphvite/helper.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 MilaGraph. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # Author: Zhaocheng Zhu
 16 | 
 17 | """Helper functions for loading C++ extension"""
 18 | from __future__ import absolute_import, print_function
 19 | 
 20 | import re
 21 | 
 22 | from . import lib
 23 | lib.name2dtype = {n: t for t, n in lib.dtype2name.items()}
 24 | 
 25 | 
 26 | def signature(name, *args):
 27 |     strings = [name]
 28 |     for arg in args:
 29 |         if isinstance(arg, lib.dtype):
 30 |             strings.append(lib.dtype2name[arg])
 31 |         else:
 32 |             strings.append(str(arg))
 33 |     return "_".join(strings)
 34 | 
 35 | 
 36 | def find_all_names(module):
 37 |     pattern = re.compile("[^_]+")
 38 |     names = []
 39 |     for name in module.__dict__:
 40 |         if pattern.match(name):
 41 |             names.append(name)
 42 |     return names
 43 | 
 44 | 
 45 | def find_all_templates(module):
 46 |     pattern = re.compile("([^_]+)(?:_[^_]+)+")
 47 |     names = set()
 48 |     for full_name in module.__dict__:
 49 |         result = pattern.match(full_name)
 50 |         if result:
 51 |             names.add(result.group(1))
 52 |     return list(names)
 53 | 
 54 | 
 55 | def get_any_instantiation(module, name):
 56 |     pattern = re.compile("%s(?:_[^_]+)+" % name)
 57 |     for full_name in module.__dict__:
 58 |         if pattern.match(full_name):
 59 |             return getattr(module, full_name)
 60 | 
 61 | 
 62 | def get_instantiation_info(module, name, template_keys):
 63 |     pattern = re.compile("%s((?:_[^_]+)+)" % name)
 64 |     possible_parameters = []
 65 |     for full_name in module.__dict__:
 66 |         result = pattern.match(full_name)
 67 |         if result:
 68 |             possible_parameters.append(result.group(1).split("_")[1:])
 69 |     template_values = zip(*possible_parameters)
 70 | 
 71 |     infos = ["Instantiations:"]
 72 |     for key, values in zip(template_keys, template_values):
 73 |         values = list(set(values))
 74 |         if values[0] in lib.name2dtype:
 75 |             values = [lib.name2dtype[v] for v in values]
 76 |         else:
 77 |             values = sorted(eval(v) for v in values)
 78 |         values = [str(v) for v in values]
 79 |         infos.append("- **%s**: %s" % (key, ", ".join(values)))
 80 |     return "\n    ".join(infos)
 81 | 
 82 | 
 83 | class TemplateHelper(object):
 84 | 
 85 |     def __new__(cls, *args, **kwargs):
 86 |         args = list(args)
 87 |         parameters = []
 88 |         for i, key in enumerate(cls.template_keys):
 89 |             if args:
 90 |                 parameters.append(args.pop(0))
 91 |             elif key in kwargs:
 92 |                 parameters.append(kwargs.pop(key))
 93 |             else:
 94 |                 value = cls.template_values[i]
 95 |                 if value is None:
 96 |                     raise TypeError("Required argument `%s` (pos %d) not found" % (key, i))
 97 |                 else:
 98 |                     parameters.append(value)
 99 | 
100 |         full_name = signature(cls.name, *parameters)
101 |         if hasattr(cls.module, full_name):
102 |             return getattr(cls.module, full_name)(*args, **kwargs)
103 |         else:
104 |             strings = ["%s=%s" % (k, v) for k, v in zip(cls.template_keys, parameters)]
105 |             raise AttributeError("Can't find an instantiation of %s with %s" % (cls.name, ", ".join(strings)))
106 | 
107 | 
108 | def make_helper_class(module, name, target_module, template_keys, template_values):
109 |     InstanceClass = get_any_instantiation(module, name)
110 |     # copy all members so that autodoc can work
111 |     members = dict(InstanceClass.__dict__)
112 |     # add instantiation info to docstring
113 |     doc = InstanceClass.__doc__
114 |     indent = re.search("\n *", doc).group(0)
115 |     info = "\n" + get_instantiation_info(module, name, template_keys)
116 |     doc += info.replace("\n", indent)
117 |     members.update({
118 |         "module": module,
119 |         "name": name,
120 |         "__module__": target_module.__name__,
121 |         "__doc__": doc,
122 |         "template_keys": template_keys,
123 |         "template_values": template_values
124 |     })
125 |     TemplateClass = type(name, (TemplateHelper,), members)
126 |     return TemplateClass


--------------------------------------------------------------------------------
/python/graphvite/optimizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | """Optimizer module of GraphVite"""
18 | from __future__ import absolute_import
19 | 
20 | import sys
21 | 
22 | from . import lib, auto
23 | from .helper import find_all_names
24 | 
25 | module = sys.modules[__name__]
26 | 
27 | 
28 | class Optimizer(object):
29 |     """
30 |     Optimizer(type=auto, *args, **kwargs)
31 |     Create an optimizer instance of any type.
32 | 
33 |     Parameters:
34 |         type (str or auto): optimizer type,
35 |             can be 'SGD', 'Momentum', 'AdaGrad', 'RMSprop' or 'Adam'
36 |     """
37 |     def __new__(cls, type=auto, *args, **kwargs):
38 |         if type == auto:
39 |             return lib.optimizer.Optimizer(auto)
40 |         elif hasattr(lib.optimizer, type):
41 |             return getattr(lib.optimizer, type)(*args, **kwargs)
42 |         else:
43 |             raise ValueError("Unknown optimizer `%s`" % type)
44 | 
45 | 
46 | for name in find_all_names(lib.optimizer):
47 |     if name not in module.__dict__:
48 |          Class = getattr(lib.optimizer, name)
49 |          # transfer module ownership so that autodoc can work
50 |          Class.__module__ = Class.__module__.replace("libgraphvite", "graphvite")
51 |          module.__dict__[name] = Class
52 | 
53 | __all__ = [
54 |     "Optimizer",
55 |     "LRSchedule",
56 |     "SGD", "Momentum", "AdaGrad", "RMSprop", "Adam"
57 | ]


--------------------------------------------------------------------------------
/python/graphvite/solver.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | """Solver module of GraphVite"""
18 | from __future__ import absolute_import
19 | 
20 | import sys
21 | 
22 | from . import lib, cfg
23 | from .helper import find_all_templates, make_helper_class
24 | 
25 | module = sys.modules[__name__]
26 | 
27 | for name in find_all_templates(lib.solver):
28 |     module.__dict__[name] = make_helper_class(lib.solver, name, module,
29 |                                               ["dim", "float_type", "index_type"],
30 |                                               [None, cfg.float_type, cfg.index_type])
31 | 
32 | __all__ = [
33 |     "GraphSolver", "KnowledgeGraphSolver", "VisualizationSolver"
34 | ]


--------------------------------------------------------------------------------
/python/graphvite/util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 MilaGraph. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # Author: Zhaocheng Zhu
 16 | 
 17 | """Util module of GraphVite"""
 18 | from __future__ import print_function, absolute_import
 19 | 
 20 | import os
 21 | import sys
 22 | import logging
 23 | import tempfile
 24 | from time import time
 25 | from functools import wraps
 26 | 
 27 | import numpy as np
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | 
 32 | def recursive_default(obj, default):
 33 |     if isinstance(default, dict):
 34 |         new_obj = {}
 35 |         for key in default:
 36 |             if key in obj:
 37 |                 new_obj[key] = recursive_default(obj[key], default[key])
 38 |             else:
 39 |                 new_obj[key] = default[key]
 40 |         return type(default)(new_obj)
 41 |     else:
 42 |         return obj
 43 | 
 44 | 
 45 | def recursive_map(obj, function):
 46 |     if isinstance(obj, dict):
 47 |         return type(obj)({k: recursive_map(v, function) for k, v in obj.items()})
 48 |     elif isinstance(obj, list):
 49 |         return type(obj)([recursive_map(x, function) for x in obj])
 50 |     else:
 51 |         return function(obj)
 52 | 
 53 | 
 54 | def assert_in(candidates, **kwargs):
 55 | 
 56 |     def readable_list(iterable):
 57 |         iterable = ["`%s`" % x for x in iterable]
 58 |         s = ", ".join(iterable[:-1])
 59 |         if s:
 60 |             s += " and "
 61 |         s += iterable[-1]
 62 |         return s
 63 | 
 64 |     for key, value in kwargs.items():
 65 |         assert value in candidates, \
 66 |             "Unknown %s `%s`, candidates are %s" % (key, value, readable_list(candidates))
 67 | 
 68 | 
 69 | class chdir(object):
 70 |     """
 71 |     Context manager for working directory.
 72 | 
 73 |     Parameters:
 74 |         dir (str): new working directory
 75 |     """
 76 |     def __init__(self, dir):
 77 |         self.dir = dir
 78 | 
 79 |     def __enter__(self):
 80 |         self.old_dir = os.getcwd()
 81 |         os.chdir(self.dir)
 82 | 
 83 |     def __exit__(self, *args):
 84 |         os.chdir(self.old_dir)
 85 | 
 86 | 
 87 | class SharedNDArray(np.memmap):
 88 |     """
 89 |     Shared numpy ndarray with serialization interface.
 90 |     This class can be used as a drop-in replacement for arguments in multiprocessing.
 91 | 
 92 |     Parameters:
 93 |         array (array-like): input data
 94 |     """
 95 |     def __new__(cls, array):
 96 |         if "linux" not in sys.platform:
 97 |             raise EnvironmentError("SharedNDArray only works on Linux")
 98 | 
 99 |         array = np.asarray(array)
100 |         file = tempfile.NamedTemporaryFile()
101 |         self = super(SharedNDArray, cls).__new__(cls, file, dtype=array.dtype, shape=array.shape)
102 |         # keep reference to the tmp file, otherwise it will be released
103 |         self.file = file
104 |         self[:] = array
105 |         return self
106 | 
107 |     @classmethod
108 |     def from_memmap(cls, *args, **kwargs):
109 |         return super(SharedNDArray, cls).__new__(cls, *args, **kwargs)
110 | 
111 |     def __reduce__(self):
112 |         order = "C" if self.flags["C_CONTIGUOUS"] else "F"
113 |         return self.__class__.from_memmap, (self.filename, self.dtype, self.mode, self.offset, self.shape, order)
114 | 
115 |     def __array_wrap__(self, arr, context=None):
116 |         arr = super(np.memmap, self).__array_wrap__(arr, context)
117 | 
118 |         if self is arr or type(self) is not SharedNDArray:
119 |             return arr
120 |         if arr.shape == ():
121 |             return arr[()]
122 | 
123 |         return arr.view(np.ndarray)
124 | 
125 | 
126 | class Monitor(object):
127 |     """
128 |     Function call monitor.
129 | 
130 |     Parameters:
131 |         name_style (str): style of displayed function name,
132 |             can be `full`, `class` or `func`
133 |     """
134 | 
135 |     def __init__(self, name_style="class"):
136 |         assert name_style in ["full", "class", "func"]
137 |         self.name_style = name_style
138 | 
139 |     def get_name(self, function, instance):
140 |         is_method = function.__code__.co_argcount > 0 and function.__code__.co_varnames[0] == "self"
141 |         if self.name_style == "func" or not is_method:
142 |             return "%s" % function.__name__
143 |         if self.name_style == "class":
144 |             return "%s.%s" % (instance.__class__.__name__, function.__name__)
145 |         if self.name_style == "full":
146 |             return "%s.%s.%s" % (instance.__module__, instance.__class__.__name__, function.__name__)
147 | 
148 |     def time(self, function):
149 |         """
150 |         Monitor the run time of a function.
151 | 
152 |         Parameters:
153 |             function (function): function to monitor
154 | 
155 |         Returns:
156 |             function: wrapped function
157 |         """
158 |         @wraps(function)
159 |         def wrapper(*args, **kwargs):
160 |             name = self.get_name(function, args[0])
161 |             start = time()
162 |             result = function(*args, **kwargs)
163 |             end = time()
164 |             logger.info("[time] %s: %g s" % (name, end - start))
165 |             return result
166 | 
167 |         return wrapper
168 | 
169 |     def call(self, function):
170 |         """
171 |         Monitor the arguments of a function.
172 | 
173 |         Parameters:
174 |             function (function): function to monitor
175 | 
176 |         Returns:
177 |             function: wrapped function
178 |         """
179 |         @wraps(function)
180 |         def wrapper(*args, **kwargs):
181 |             name = self.get_name(function, args[0])
182 |             strings = ["%s" % repr(arg) for arg in args]
183 |             strings += ["%s=%s" % (k, repr(v)) for k, v in kwargs.items()]
184 |             logger.info("[call] %s(%s)" % (name, ", ".join(strings)))
185 |             return function(*args, **kwargs)
186 | 
187 |         return wrapper
188 | 
189 |     def result(self, function):
190 |         """
191 |         Monitor the return value of a function.
192 | 
193 |         Parameters:
194 |             function (function): function to monitor
195 | 
196 |         Returns:
197 |             function: wrapped function
198 |         """
199 |         @wraps(function)
200 |         def wrapper(*args, **kwargs):
201 |             name = self.get_name(function, args[0])
202 |             strings = ["%s" % repr(arg) for arg in args]
203 |             strings += ["%s=%s" % (k, repr(v)) for k, v in kwargs.items()]
204 |             result = function(*args, **kwargs)
205 |             logger.info("[result] %s(%s) = %s" % (name, ", ".join(strings), result))
206 |             return result
207 | 
208 |         return wrapper
209 | 
210 | 
211 | monitor = Monitor()


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 MilaGraph. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Author: Zhaocheng Zhu
16 | 
17 | from __future__ import print_function, absolute_import
18 | 
19 | import os
20 | from setuptools import setup, find_packages
21 | 
22 | from graphvite import __version__, lib_path, lib_file
23 | 
24 | name = "graphvite"
25 | faiss_file = os.path.join(lib_path, "libfaiss.so")
26 | project_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
27 | 
28 | # library files
29 | install_path = os.path.join(name, "lib")
30 | data_files = [(install_path, [lib_file, faiss_file])]
31 | # configuration files
32 | for path, dirs, files in os.walk(os.path.join(project_path, "config")):
33 |     install_path = os.path.join(name, os.path.relpath(path, project_path))
34 |     files = [os.path.join(path, file) for file in files]
35 |     data_files.append((install_path, files))
36 | 
37 | setup(
38 |     name=name,
39 |     version=__version__,
40 |     description="A general and high-performance graph embedding system for various applications",
41 |     packages=find_packages(),
42 |     data_files=data_files,
43 |     entry_points={"console_scripts": ["graphvite = graphvite.cmd:main"]},
44 |     zip_safe=False,
45 |     #install_requires=["numpy", "pyyaml", "easydict", "six", "future"],
46 |     #extras_requires={"app": ["imageio", "psutil", "scipy", "matplotlib", "torch", "torchvision", "nltk"]}
47 | )


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | if (WIN32)
 2 |     add_library(graphvite graphvite.cu)
 3 | else ()
 4 |     add_library(graphvite SHARED graphvite.cu)
 5 |     set_target_properties(graphvite PROPERTIES
 6 |             CXX_VISIBILITY_PRESET   "hidden"
 7 |             CUDA_VISIBILITY_PRESET  "hidden"
 8 |             LINK_FLAGS              "-flto -Wl,-rpath=$ORIGIN"
 9 |             OUTPUT_NAME             graphvite)
10 | 
11 |     target_link_libraries(graphvite pthread curand glog.so)
12 |     if (FAISS_LIBRARY)
13 |         target_link_libraries(graphvite faiss.so)
14 |     endif()
15 |     target_compile_options(graphvite PRIVATE "-Xcompiler=-fno-fat-lto-objects") # -flto
16 | endif ()


--------------------------------------------------------------------------------
/src/graphvite.cu:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2019 MilaGraph. All Rights Reserved.
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  *
 16 |  * @author Zhaocheng Zhu
 17 |  */
 18 | 
 19 | #include "util/common.h"
 20 | 
 21 | //#define USE_TIMER
 22 | //#define PINNED_MEMORY
 23 | 
 24 | #include "bind.h"
 25 | 
 26 | const std::string version = "0.2.2";
 27 | 
 28 | PYBIND11_MODULE(libgraphvite, module) {
 29 |     py::options options;
 30 |     options.disable_function_signatures();
 31 | 
 32 |     // optimizers
 33 |     auto optimizer = module.def_submodule("optimizer");
 34 |     pyLRSchedule(optimizer, "LRSchedule");
 35 |     pyOptimizer(optimizer, "Optimizer");
 36 |     pySGD(optimizer, "SGD");
 37 |     pyMomentum(optimizer, "Momentum");
 38 |     pyAdaGrad(optimizer, "AdaGrad");
 39 |     pyRMSprop(optimizer, "RMSprop");
 40 |     pyAdam(optimizer, "Adam");
 41 | 
 42 |     // graphs
 43 |     auto graph = module.def_submodule("graph");
 44 |     pyGraph<unsigned int>(graph, "Graph");
 45 |     pyWordGraph<unsigned int>(graph, "WordGraph");
 46 |     pyKnowledgeGraph<unsigned int>(graph, "KnowledgeGraph");
 47 |     pyKNNGraph<unsigned int>(graph, "KNNGraph");
 48 | 
 49 |     // solvers
 50 |     auto solver = module.def_submodule("solver");
 51 | 
 52 |     pyGraphSolver<128, float, unsigned int>(solver, "GraphSolver");
 53 | #ifndef FAST_COMPILE
 54 |     pyGraphSolver<32, float, unsigned int>(solver, "GraphSolver");
 55 |     pyGraphSolver<64, float, unsigned int>(solver, "GraphSolver");
 56 |     pyGraphSolver<96, float, unsigned int>(solver, "GraphSolver");
 57 |     pyGraphSolver<256, float, unsigned int>(solver, "GraphSolver");
 58 |     pyGraphSolver<512, float, unsigned int>(solver, "GraphSolver");
 59 | #endif
 60 | 
 61 |     pyKnowledgeGraphSolver<512, float, unsigned int>(solver, "KnowledgeGraphSolver");
 62 |     pyKnowledgeGraphSolver<1024, float, unsigned int>(solver, "KnowledgeGraphSolver");
 63 |     pyKnowledgeGraphSolver<2048, float, unsigned int>(solver, "KnowledgeGraphSolver");
 64 | #ifndef FAST_COMPILE
 65 |     pyKnowledgeGraphSolver<32, float, unsigned int>(solver, "KnowledgeGraphSolver");
 66 |     pyKnowledgeGraphSolver<64, float, unsigned int>(solver, "KnowledgeGraphSolver");
 67 |     pyKnowledgeGraphSolver<96, float, unsigned int>(solver, "KnowledgeGraphSolver");
 68 |     pyKnowledgeGraphSolver<128, float, unsigned int>(solver, "KnowledgeGraphSolver");
 69 |     pyKnowledgeGraphSolver<256, float, unsigned int>(solver, "KnowledgeGraphSolver");
 70 | #endif
 71 | 
 72 |     pyVisualizationSolver<2, float, unsigned int>(solver, "VisualizationSolver");
 73 | #ifndef FAST_COMPILE
 74 |     pyVisualizationSolver<3, float, unsigned int>(solver, "VisualizationSolver");
 75 | #endif
 76 | 
 77 |     // interface
 78 |     py::enum_<DType> pyDType(module, "dtype");
 79 |     pyDType.value("uint32", DType::uint32)
 80 |            .value("uint64", DType::uint64)
 81 |            .value("float32", DType::float32)
 82 |            .value("float64", DType::float64);
 83 |     module.attr("dtype2name") = dtype2name;
 84 | 
 85 |     // glog
 86 |     module.def("init_logging", graphvite::init_logging, py::no_gil(),
 87 |                py::arg("threshhold") = google::INFO, py::arg("dir") = "", py::arg("verbose") = false);
 88 |     module.attr("INFO") = google::INFO;
 89 |     module.attr("WARNING") = google::WARNING;
 90 |     module.attr("ERROR") = google::ERROR;
 91 |     module.attr("FATAL") = google::FATAL;
 92 | 
 93 |     // io
 94 |     auto io = module.def_submodule("io");
 95 |     io.def("size_string", graphvite::pretty::size_string, py::no_gil(), py::arg("size"));
 96 |     io.def("yes_no", graphvite::pretty::yes_no, py::no_gil(), py::arg("x"));
 97 |     io.def("block", graphvite::pretty::block, py::no_gil(), py::arg("content"));
 98 |     io.def("header", graphvite::pretty::header, py::no_gil(), py::arg("content"));
 99 | 
100 |     module.attr("auto") = graphvite::kAuto;
101 |     module.def("KiB", graphvite::KiB, py::no_gil(), py::arg("size"));
102 |     module.def("MiB", graphvite::MiB, py::no_gil(), py::arg("size"));
103 |     module.def("GiB", graphvite::GiB, py::no_gil(), py::arg("size"));
104 | 
105 |     module.attr("__version__") = version;
106 | }


--------------------------------------------------------------------------------