├── .circleci ├── Dockerfile.cuda10.1 ├── Dockerfile.cuda10.2 ├── Dockerfile.cuda11.0 ├── Dockerfile.cuda11.1 ├── Dockerfile.cuda9.2 └── config.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── README.md ├── cmake ├── FindPackageHandleStandardArgs.cmake ├── FindPackageMessage.cmake ├── Finduv.cmake ├── MiscCheck.cmake ├── Options.cmake └── Sanitize.cmake ├── docs ├── cuda_gotchas.md ├── development.md ├── linux_support.md ├── shm.md └── thread_model.md ├── setup.py ├── tensorpipe ├── .clang-format ├── .clang-tidy ├── CMakeLists.txt ├── benchmark │ ├── CMakeLists.txt │ ├── benchmark_pipe.cc │ ├── benchmark_transport.cc │ ├── channel_registry.cc │ ├── channel_registry.h │ ├── measurements.h │ ├── options.cc │ ├── options.h │ ├── registry.h │ ├── transport_registry.cc │ └── transport_registry.h ├── channel │ ├── basic │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ └── factory.h │ ├── channel.h │ ├── channel_boilerplate.h │ ├── channel_impl_boilerplate.h │ ├── cma │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ └── factory.h │ ├── context.h │ ├── context_boilerplate.h │ ├── context_impl_boilerplate.h │ ├── cuda_basic │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── constants.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ └── factory.h │ ├── cuda_gdr │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── constants.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── error.h │ │ ├── factory.cc │ │ └── factory.h │ ├── cuda_ipc │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── constants.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ └── factory.h │ ├── cuda_xth │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ └── factory.h │ ├── error.cc │ ├── error.h │ ├── helpers.cc │ ├── helpers.h │ ├── mpt │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ ├── factory.h │ │ └── nop_types.h │ └── xth │ │ ├── channel_impl.cc │ │ ├── channel_impl.h │ │ ├── context_impl.cc │ │ ├── context_impl.h │ │ ├── factory.cc │ │ └── factory.h ├── common │ ├── address.cc │ ├── address.h │ ├── allocator.cc │ ├── allocator.h │ ├── buffer.h │ ├── busy_polling_loop.h │ ├── callback.h │ ├── cpu_buffer.h │ ├── cuda.h │ ├── cuda_buffer.cc │ ├── cuda_buffer.h │ ├── cuda_lib.h │ ├── cuda_loop.cc │ ├── cuda_loop.h │ ├── deferred_executor.h │ ├── defs.h │ ├── device.h │ ├── dl.h │ ├── epoll_loop.cc │ ├── epoll_loop.h │ ├── error.cc │ ├── error.h │ ├── error_macros.h │ ├── fd.cc │ ├── fd.h │ ├── ibv.cc │ ├── ibv.h │ ├── ibv_lib.h │ ├── memory.h │ ├── nop.h │ ├── nvml_lib.h │ ├── optional.h │ ├── queue.h │ ├── ringbuffer.h │ ├── ringbuffer_read_write_ops.h │ ├── ringbuffer_role.h │ ├── shm_ringbuffer.h │ ├── shm_segment.cc │ ├── shm_segment.h │ ├── socket.cc │ ├── socket.h │ ├── state_machine.h │ ├── stream_read_write_ops.h │ ├── strings.h │ ├── system.cc │ └── system.h ├── config.h.in ├── config_cuda.h.in ├── core │ ├── context.cc │ ├── context.h │ ├── context_impl.cc │ ├── context_impl.h │ ├── error.cc │ ├── error.h │ ├── listener.cc │ ├── listener.h │ ├── listener_impl.cc │ ├── listener_impl.h │ ├── message.h │ ├── nop_types.h │ ├── pipe.cc │ ├── pipe.h │ ├── pipe_impl.cc │ └── pipe_impl.h ├── misc │ ├── CMakeLists.txt │ └── dump_state_machine.cc ├── python │ ├── CMakeLists.txt │ └── tensorpipe.cc ├── tensorpipe.h ├── tensorpipe_cuda.h ├── test │ ├── CMakeLists.txt │ ├── channel │ │ ├── basic │ │ │ └── basic_test.cc │ │ ├── channel_test.cc │ │ ├── channel_test.h │ │ ├── channel_test_cpu.cc │ │ ├── channel_test_cpu.h │ │ ├── channel_test_cuda.cc │ │ ├── channel_test_cuda.h │ │ ├── channel_test_cuda_multi_gpu.cc │ │ ├── channel_test_cuda_xdtt.cc │ │ ├── cma │ │ │ ├── CMakeLists.txt │ │ │ ├── cma_test.cc │ │ │ ├── docker_tests.sh │ │ │ ├── probe.cc │ │ │ └── probe_report_checker.py │ │ ├── cuda_basic │ │ │ └── cuda_basic_test.cc │ │ ├── cuda_gdr │ │ │ └── cuda_gdr_test.cc │ │ ├── cuda_helpers.h │ │ ├── cuda_ipc │ │ │ └── cuda_ipc_test.cc │ │ ├── cuda_xth │ │ │ └── cuda_xth_test.cc │ │ ├── kernel.cu │ │ ├── kernel.cuh │ │ ├── mpt │ │ │ └── mpt_test.cc │ │ └── xth │ │ │ └── xth_test.cc │ ├── common │ │ ├── cuda_test.cc │ │ ├── defs_test.cc │ │ ├── epoll_loop_test.cc │ │ ├── ringbuffer_test.cc │ │ ├── shm_ringbuffer_test.cc │ │ ├── shm_segment_test.cc │ │ └── system_test.cc │ ├── core │ │ ├── context_test.cc │ │ ├── listener_test.cc │ │ ├── pipe_cuda_test.cc │ │ ├── pipe_test.cc │ │ └── pipe_test.h │ ├── peer_group.h │ ├── python │ │ └── tensorpipe.py │ ├── test.cc │ ├── test_environment.cc │ ├── test_environment.h │ └── transport │ │ ├── connection_test.cc │ │ ├── context_test.cc │ │ ├── ibv │ │ ├── connection_test.cc │ │ ├── context_test.cc │ │ ├── ibv_test.cc │ │ ├── ibv_test.h │ │ └── sockaddr_test.cc │ │ ├── listener_test.cc │ │ ├── shm │ │ ├── connection_test.cc │ │ ├── listener_test.cc │ │ ├── reactor_test.cc │ │ ├── shm_test.cc │ │ ├── shm_test.h │ │ └── sockaddr_test.cc │ │ ├── transport_test.h │ │ └── uv │ │ ├── connection_test.cc │ │ ├── context_test.cc │ │ ├── loop_test.cc │ │ ├── sockaddr_test.cc │ │ ├── uv_test.cc │ │ └── uv_test.h └── transport │ ├── connection.h │ ├── connection_boilerplate.h │ ├── connection_impl_boilerplate.h │ ├── context.h │ ├── context_boilerplate.h │ ├── context_impl_boilerplate.h │ ├── error.cc │ ├── error.h │ ├── ibv │ ├── connection_impl.cc │ ├── connection_impl.h │ ├── constants.h │ ├── context_impl.cc │ ├── context_impl.h │ ├── error.cc │ ├── error.h │ ├── factory.cc │ ├── factory.h │ ├── listener_impl.cc │ ├── listener_impl.h │ ├── reactor.cc │ ├── reactor.h │ ├── sockaddr.cc │ ├── sockaddr.h │ ├── utility.cc │ └── utility.h │ ├── listener.h │ ├── listener_boilerplate.h │ ├── listener_impl_boilerplate.h │ ├── shm │ ├── connection_impl.cc │ ├── connection_impl.h │ ├── context_impl.cc │ ├── context_impl.h │ ├── factory.cc │ ├── factory.h │ ├── listener_impl.cc │ ├── listener_impl.h │ ├── reactor.cc │ ├── reactor.h │ ├── sockaddr.cc │ └── sockaddr.h │ └── uv │ ├── connection_impl.cc │ ├── connection_impl.h │ ├── context_impl.cc │ ├── context_impl.h │ ├── error.cc │ ├── error.h │ ├── factory.cc │ ├── factory.h │ ├── listener_impl.cc │ ├── listener_impl.h │ ├── loop.cc │ ├── loop.h │ ├── sockaddr.cc │ ├── sockaddr.h │ ├── utility.cc │ ├── utility.h │ └── uv.h └── third_party └── README.md /.circleci/Dockerfile.cuda10.1: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.1-devel-ubuntu18.04 2 | 3 | # Install APT packages. 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential cmake 6 | 7 | COPY . /tensorpipe 8 | 9 | WORKDIR /tensorpipe 10 | -------------------------------------------------------------------------------- /.circleci/Dockerfile.cuda10.2: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-devel-ubuntu18.04 2 | 3 | # Install APT packages. 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential cmake 6 | 7 | COPY . /tensorpipe 8 | 9 | WORKDIR /tensorpipe 10 | -------------------------------------------------------------------------------- /.circleci/Dockerfile.cuda11.0: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.0-devel-ubuntu18.04 2 | 3 | # Install APT packages. 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential cmake 6 | 7 | COPY . /tensorpipe 8 | 9 | WORKDIR /tensorpipe 10 | -------------------------------------------------------------------------------- /.circleci/Dockerfile.cuda11.1: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.1-devel-ubuntu18.04 2 | 3 | # Install APT packages. 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential cmake 6 | 7 | COPY . /tensorpipe 8 | 9 | WORKDIR /tensorpipe 10 | -------------------------------------------------------------------------------- /.circleci/Dockerfile.cuda9.2: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.2-devel-ubuntu18.04 2 | 3 | # Install APT packages. 4 | RUN apt-get update && \ 5 | apt-get install -y build-essential cmake 6 | 7 | COPY . /tensorpipe 8 | 9 | WORKDIR /tensorpipe 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .DS_Store 3 | /build/ 4 | /cmake-build-debug/ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/pybind11"] 2 | path = third_party/pybind11 3 | url = https://github.com/pybind/pybind11.git 4 | [submodule "third_party/libuv"] 5 | path = third_party/libuv 6 | url = https://github.com/libuv/libuv.git 7 | branch = v1.x 8 | [submodule "third_party/googletest"] 9 | path = third_party/googletest 10 | url = https://github.com/google/googletest.git 11 | [submodule "third_party/libnop"] 12 | path = third_party/libnop 13 | url = https://github.com/google/libnop.git 14 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | cmake_minimum_required(VERSION 3.5 FATAL_ERROR) 8 | 9 | project(tensorpipe LANGUAGES C CXX) 10 | 11 | set(CMAKE_CXX_STANDARD 14) 12 | 13 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") 14 | 15 | # Expose build options. 16 | include(Options) 17 | 18 | # Define sanitizer option, if specified. 19 | include(Sanitize) 20 | 21 | # Misc checks to cope with various compiler modes. 22 | include(MiscCheck) 23 | 24 | add_subdirectory(tensorpipe) 25 | 26 | install(EXPORT TensorpipeTargets 27 | DESTINATION share/cmake/Tensorpipe 28 | FILE TensorpipeTargets.cmake) 29 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to TensorPipe 2 | 3 | We want to make contributing to this project as easy and transparent as 4 | possible. 5 | 6 | ## Our Development Process 7 | 8 | This project's source-of-truth is the version in Facebook's internal codebase, 9 | which is continuously synced with the GitHub mirror using 10 | [ShipIt](https://github.com/facebook/fbshipit). Pull requests on GitHub are 11 | copied over using ImportIt (a companion tool for ShipIt). 12 | 13 | ## Pull Requests 14 | 15 | We actively welcome your pull requests. 16 | 17 | 1. Fork the repo and create your branch from `main`. 18 | 2. If you've added code that should be tested, add tests. 19 | 3. If you've changed APIs, update the documentation. 20 | 4. Ensure the test suite passes. 21 | 5. Make sure your code lints. 22 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 23 | 24 | ## Contributor License Agreement ("CLA") 25 | 26 | In order to accept your pull request, we need you to submit a CLA. You only 27 | need to do this once to work on any of Facebook's open source projects. 28 | 29 | Complete your CLA here: 30 | 31 | ## Issues 32 | 33 | We use GitHub issues to track public bugs. Please ensure your description is 34 | clear and has sufficient instructions to be able to reproduce the issue. 35 | 36 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the 37 | safe disclosure of security bugs. In those cases, please go through the process 38 | outlined on that page and do not file a public issue. 39 | 40 | ## Coding Style 41 | 42 | This source code is formatted using `clang-format`, with project-specific rules 43 | recorded in the `.clang-format` file. 44 | 45 | ## License 46 | 47 | By contributing to TensorPipe, you agree that your contributions will be 48 | licensed under the LICENSE.txt file in the root directory of this source tree. 49 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For TensorPipe software 4 | 5 | Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Meta nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /cmake/FindPackageMessage.cmake: -------------------------------------------------------------------------------- 1 | # Copyright 2000-2020 Kitware, Inc. and Contributors 2 | # All rights reserved. 3 | # 4 | # Distributed under the OSI-approved BSD 3-Clause License. See 5 | # https://cmake.org/licensing for details. 6 | 7 | #.rst: 8 | # FindPackageMessage 9 | # ------------------ 10 | # 11 | # 12 | # 13 | # FIND_PACKAGE_MESSAGE( "message for user" "find result details") 14 | # 15 | # This macro is intended to be used in FindXXX.cmake modules files. It 16 | # will print a message once for each unique find result. This is useful 17 | # for telling the user where a package was found. The first argument 18 | # specifies the name (XXX) of the package. The second argument 19 | # specifies the message to display. The third argument lists details 20 | # about the find result so that if they change the message will be 21 | # displayed again. The macro also obeys the QUIET argument to the 22 | # find_package command. 23 | # 24 | # Example: 25 | # 26 | # :: 27 | # 28 | # if(X11_FOUND) 29 | # FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}" 30 | # "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]") 31 | # else() 32 | # ... 33 | # endif() 34 | 35 | function(FIND_PACKAGE_MESSAGE pkg msg details) 36 | # Avoid printing a message repeatedly for the same find result. 37 | if(NOT ${pkg}_FIND_QUIETLY) 38 | string(REPLACE "\n" "" details "${details}") 39 | set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg}) 40 | if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}") 41 | # The message has not yet been printed. 42 | message(STATUS "${msg}") 43 | 44 | # Save the find details in the cache to avoid printing the same 45 | # message again. 46 | set("${DETAILS_VAR}" "${details}" 47 | CACHE INTERNAL "Details about finding ${pkg}") 48 | endif() 49 | endif() 50 | endfunction() 51 | -------------------------------------------------------------------------------- /cmake/Finduv.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # 8 | # Finduv 9 | # ------ 10 | # 11 | # Imported Targets 12 | # ^^^^^^^^^^^^^^^^ 13 | # 14 | # An imported target named ``uv::uv`` is provided if libuv has been found. 15 | # 16 | # Result Variables 17 | # ^^^^^^^^^^^^^^^^ 18 | # 19 | # This module defines the following variables: 20 | # 21 | # ``uv_FOUND`` 22 | # True if libuv was found, false otherwise. 23 | # ``uv_LIBRARY_DIRS`` 24 | # The path(s) to uv libraries. 25 | # ``uv_VERSION`` 26 | # The version of libuv found. 27 | # 28 | 29 | find_package(PkgConfig QUIET) 30 | 31 | if((NOT TP_BUILD_LIBUV) AND PkgConfig_FOUND) 32 | pkg_check_modules(uv QUIET IMPORTED_TARGET GLOBAL libuv) 33 | if(uv_FOUND) 34 | add_library(uv::uv ALIAS PkgConfig::uv) 35 | endif() 36 | endif() 37 | 38 | if(NOT uv_FOUND) 39 | set(uv_VERSION "1.41.0") 40 | set(uv_LIBRARY_DIRS "submodule") 41 | 42 | set(libuv_DIR ${PROJECT_SOURCE_DIR}/third_party/libuv) 43 | add_subdirectory(${libuv_DIR} 44 | ${PROJECT_BINARY_DIR}/third_party/libuv 45 | EXCLUDE_FROM_ALL) 46 | 47 | # This hack duplicates the `uv_a` target, so that we can call 48 | # install(TARGETS ... EXPORT) on it, which is not possible when the target is 49 | # defined in a subdirectory in CMake 3.5. 50 | get_target_property(_uv_sources uv_a SOURCES) 51 | set(_uv_sources_abs) 52 | foreach(_uv_src ${_uv_sources}) 53 | list(APPEND _uv_sources_abs "${libuv_DIR}/${_uv_src}") 54 | endforeach() 55 | 56 | add_library(tensorpipe_uv STATIC ${_uv_sources_abs}) 57 | if(BUILD_SHARED_LIBS) 58 | set_target_properties(tensorpipe_uv PROPERTIES POSITION_INDEPENDENT_CODE 1) 59 | endif() 60 | 61 | get_target_property(_link_libs uv_a LINK_LIBRARIES) 62 | target_link_libraries(tensorpipe_uv PRIVATE ${_link_libs}) 63 | 64 | get_target_property(_include_dirs uv_a INCLUDE_DIRECTORIES) 65 | target_include_directories(tensorpipe_uv PRIVATE ${_include_dirs}) 66 | target_include_directories(tensorpipe_uv PUBLIC $) 67 | 68 | get_target_property(_compile_definitions uv_a COMPILE_DEFINITIONS) 69 | target_compile_definitions(tensorpipe_uv PRIVATE ${_compile_definitions}) 70 | 71 | get_target_property(_compile_options uv_a COMPILE_OPTIONS) 72 | target_compile_options(tensorpipe_uv PRIVATE ${_compile_options}) 73 | 74 | install(TARGETS tensorpipe_uv 75 | EXPORT TensorpipeTargets 76 | ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR}) 77 | 78 | add_library(uv::uv ALIAS tensorpipe_uv) 79 | endif() 80 | 81 | include(FindPackageHandleStandardArgs) 82 | find_package_handle_standard_args(uv 83 | REQUIRED_VARS uv_VERSION 84 | VERSION_VAR uv_VERSION) 85 | -------------------------------------------------------------------------------- /cmake/MiscCheck.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | include(CheckCXXSourceCompiles) 8 | include(CMakePushCheckState) 9 | 10 | # We use the [[nodiscard]] attribute, which GCC 5 complains about. 11 | # Silence this warning if GCC 5 is used. 12 | if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 13 | if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6) 14 | add_definitions("-Wno-attributes") 15 | endif() 16 | endif() 17 | -------------------------------------------------------------------------------- /cmake/Options.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | if(CMAKE_SYSTEM_NAME STREQUAL "Linux") 8 | set(LINUX ON) 9 | else() 10 | set(LINUX OFF) 11 | endif() 12 | 13 | macro(TP_CONDITIONAL_BACKEND name docstring condition) 14 | # No clue why this monstrosity is needed. But cmake_dependent_option has it, 15 | # and the code doesn't seem to work without it. 16 | string(REGEX REPLACE " +" ";" TP_CONDITIONAL_BACKEND_CONDITION "${condition}") 17 | if(${TP_CONDITIONAL_BACKEND_CONDITION}) 18 | set(TP_CONDITIONAL_BACKEND_CAN_ENABLE ON) 19 | else() 20 | set(TP_CONDITIONAL_BACKEND_CAN_ENABLE OFF) 21 | endif() 22 | set(${name} ${TP_CONDITIONAL_BACKEND_CAN_ENABLE} CACHE BOOL ${docstring}) 23 | if(${name} AND NOT ${TP_CONDITIONAL_BACKEND_CAN_ENABLE}) 24 | message(FATAL_ERROR "${name} was explicitly set, but that can't be honored") 25 | endif() 26 | endmacro() 27 | 28 | # Try to auto-detect the presence of some libraries in order to enable/disable 29 | # the transports/channels that make use of them. 30 | # TODO Add CUDA to this list, in order to fix the TODO below 31 | 32 | # TODO: Default to ON if CUDA available. 33 | option(TP_USE_CUDA "Enable support for CUDA tensors" OFF) 34 | 35 | # Optional features 36 | option(TP_BUILD_BENCHMARK "Build benchmarks" OFF) 37 | option(TP_BUILD_MISC "Build misc tools" OFF) 38 | option(TP_BUILD_PYTHON "Build python bindings" OFF) 39 | option(TP_BUILD_TESTING "Build tests" OFF) 40 | 41 | # Whether to build a static or shared library 42 | if(BUILD_SHARED_LIBS) 43 | set(TP_STATIC_OR_SHARED SHARED CACHE STRING "") 44 | else() 45 | set(TP_STATIC_OR_SHARED STATIC CACHE STRING "") 46 | endif() 47 | mark_as_advanced(TP_STATIC_OR_SHARED) 48 | 49 | # Force to build libuv from the included submodule 50 | option(TP_BUILD_LIBUV "Build libuv from source" OFF) 51 | 52 | # Directories 53 | include(GNUInstallDirs) 54 | set(TP_INSTALL_LIBDIR ${CMAKE_INSTALL_LIBDIR} CACHE STRING "Directory in which to install libraries") 55 | mark_as_advanced(TP_INSTALL_LIBDIR) 56 | set(TP_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE STRING "Directory in which to install public headers") 57 | mark_as_advanced(TP_INSTALL_INCLUDEDIR) 58 | -------------------------------------------------------------------------------- /cmake/Sanitize.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | if(SANITIZE) 8 | add_definitions("-fsanitize=${SANITIZE}") 9 | add_definitions("-fno-omit-frame-pointer") 10 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${SANITIZE}") 11 | endif() 12 | -------------------------------------------------------------------------------- /docs/development.md: -------------------------------------------------------------------------------- 1 | # Development 2 | 3 | TensorPipe uses CMake for its build system. 4 | 5 | ## Dependencies 6 | 7 | To build TensorPipe, you need: 8 | 9 | * C++14 compatible compiler (GCC >= 5.5 or Clang >= 6) 10 | 11 | ## Clone the repository 12 | 13 | Example: 14 | 15 | ``` shell 16 | git clone --recursive https://github.com/pytorch/tensorpipe 17 | ``` 18 | 19 | If you have updated an already cloned repository, make sure that the 20 | submodules are up to date: 21 | 22 | ``` shell 23 | git submodule sync 24 | git submodule update --init 25 | ``` 26 | 27 | It is imperative to check out the submodules before running CMake. 28 | 29 | Find the list of submodules and a description of what they're used for 30 | on [this page][third_party]. 31 | 32 | [third_party]: https://github.com/pytorch/tensorpipe/tree/main/third_party 33 | 34 | ## Using CMake 35 | 36 | Example: 37 | 38 | ``` shell 39 | mkdir build 40 | cd build 41 | cmake ../ -DCMAKE_BUILD_TYPE=Debug -DSANITIZE=thread 42 | make 43 | ``` 44 | 45 | You can specify CMake variables by passing them as arguments to the `cmake` command. 46 | 47 | Useful CMake variables: 48 | 49 | * `CMAKE_C_COMPILER` -- Define which C compiler to use. 50 | * `CMAKE_CXX_COMPILER` -- Define which C++ compiler to use. 51 | * `CMAKE_C_FLAGS` -- Additional flags for the C compiler. 52 | * `CMAKE_CXX_FLAGS` -- Additional flags for the C++ compiler. 53 | * `CMAKE_BUILD_TYPE` -- For example: `release`, `debug`. 54 | 55 | Useful TensorPipe specific variables: 56 | 57 | * `SANITIZE` -- configure the sanitizer to use (if any); for 58 | example: `address` or `thread`, to run with `asan` or `tsan`, 59 | respectively. 60 | 61 | ## Ninja 62 | 63 | To make CMake output something other than the default `Makefile`, see 64 | [`cmake-generators(7)`][cmake-generators]. We like to use the 65 | [Ninja][ninja] generator because it works well for incremental builds. 66 | On the command line, specify `-GNinja` to use it. 67 | 68 | [cmake-generators]: https://cmake.org/cmake/help/v3.4/manual/cmake-generators.7.html 69 | [ninja]: https://en.wikipedia.org/wiki/Ninja_(build_system) 70 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | import os 9 | import subprocess 10 | import sys 11 | from pathlib import Path 12 | 13 | from setuptools import Extension, setup 14 | from setuptools.command.build_ext import build_ext 15 | 16 | 17 | class CMakeBuild(build_ext): 18 | def run(self): 19 | for ext in self.extensions: 20 | self.build_extension(ext) 21 | 22 | def build_extension(self, ext): 23 | if not os.path.exists(self.build_temp): 24 | os.makedirs(self.build_temp) 25 | 26 | source_path = Path(__file__).parent.resolve() 27 | output_path = Path(self.get_ext_fullpath(ext.name)).parent.resolve() 28 | build_type = "Debug" if self.debug else "Release" 29 | 30 | cmake_cmd = [ 31 | "cmake", 32 | f"{source_path}", 33 | f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={output_path}", 34 | f"-DPYTHON_EXECUTABLE={sys.executable}", 35 | f"-DCMAKE_BUILD_TYPE={build_type}", 36 | "-DCMAKE_C_COMPILER=clang-6.0", 37 | "-DCMAKE_CXX_COMPILER=clang++-6.0", 38 | "-DCMAKE_POSITION_INDEPENDENT_CODE=true", 39 | "-DTP_BUILD_PYTHON=true", 40 | ] 41 | 42 | for opt in os.environ: 43 | if opt.startswith("TP_"): 44 | cmake_cmd.append(f"-D{opt}={os.environ[opt]}") 45 | 46 | make_cmd = ["make", "-j", "pytensorpipe"] 47 | 48 | subprocess.check_call(cmake_cmd, cwd=self.build_temp) 49 | subprocess.check_call(make_cmd, cwd=self.build_temp) 50 | 51 | 52 | setup( 53 | name="tensorpipe", 54 | version="0.0.0", 55 | author="Facebook AI Research", 56 | ext_modules=[Extension("pytensorpipe", sources=[])], 57 | cmdclass={"build_ext": CMakeBuild}, 58 | zip_safe=False, 59 | ) 60 | -------------------------------------------------------------------------------- /tensorpipe/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignAfterOpenBracket: AlwaysBreak 4 | AlignConsecutiveAssignments: false 5 | AlignConsecutiveDeclarations: false 6 | AlignEscapedNewlinesLeft: true 7 | AlignOperands: false 8 | AlignTrailingComments: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: false 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: Empty 13 | AllowShortIfStatementsOnASingleLine: false 14 | AllowShortLoopsOnASingleLine: false 15 | AlwaysBreakAfterReturnType: None 16 | AlwaysBreakBeforeMultilineStrings: true 17 | AlwaysBreakTemplateDeclarations: true 18 | BinPackArguments: false 19 | BinPackParameters: false 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: false 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: false 26 | AfterObjCDeclaration: false 27 | AfterStruct: false 28 | AfterUnion: false 29 | BeforeCatch: false 30 | BeforeElse: false 31 | IndentBraces: false 32 | BreakBeforeBinaryOperators: None 33 | BreakBeforeBraces: Attach 34 | BreakBeforeTernaryOperators: true 35 | BreakConstructorInitializersBeforeComma: false 36 | BreakAfterJavaFieldAnnotations: false 37 | BreakStringLiterals: false 38 | ColumnLimit: 80 39 | CommentPragmas: '^ IWYU pragma:' 40 | CompactNamespaces: false 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | ConstructorInitializerIndentWidth: 4 43 | ContinuationIndentWidth: 4 44 | Cpp11BracedListStyle: true 45 | DerivePointerAlignment: false 46 | DisableFormat: false 47 | ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ] 48 | IncludeCategories: 49 | - Regex: '^<.*\.h(pp)?>' 50 | Priority: 1 51 | - Regex: '^<.*' 52 | Priority: 2 53 | - Regex: '.*' 54 | Priority: 3 55 | IndentCaseLabels: true 56 | IndentWidth: 2 57 | IndentWrappedFunctionNames: false 58 | KeepEmptyLinesAtTheStartOfBlocks: false 59 | MacroBlockBegin: '' 60 | MacroBlockEnd: '' 61 | MaxEmptyLinesToKeep: 1 62 | NamespaceIndentation: None 63 | ObjCBlockIndentWidth: 2 64 | ObjCSpaceAfterProperty: false 65 | ObjCSpaceBeforeProtocolList: false 66 | PenaltyBreakBeforeFirstCallParameter: 1 67 | PenaltyBreakComment: 300 68 | PenaltyBreakFirstLessLess: 120 69 | PenaltyBreakString: 1000 70 | PenaltyExcessCharacter: 1000000 71 | PenaltyReturnTypeOnItsOwnLine: 2000000 72 | PointerAlignment: Left 73 | ReflowComments: true 74 | SortIncludes: true 75 | SpaceAfterCStyleCast: false 76 | SpaceBeforeAssignmentOperators: true 77 | SpaceBeforeParens: ControlStatements 78 | SpaceInEmptyParentheses: false 79 | SpacesBeforeTrailingComments: 1 80 | SpacesInAngles: false 81 | SpacesInContainerLiterals: true 82 | SpacesInCStyleCastParentheses: false 83 | SpacesInParentheses: false 84 | SpacesInSquareBrackets: false 85 | Standard: Cpp11 86 | TabWidth: 8 87 | UseTab: Never 88 | ... 89 | -------------------------------------------------------------------------------- /tensorpipe/.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | InheritParentConfig: true 3 | Checks: ' 4 | readability-identifier-naming, 5 | readability-inconsistent-declaration-parameter-name, 6 | readability-named-parameter, 7 | ' 8 | FormatStyle: file 9 | CheckOptions: 10 | # Names of classes (and structs?) 11 | - { key: readability-identifier-naming.ClassCase, value: CamelCase } 12 | # Names of enums and enum classes 13 | - { key: readability-identifier-naming.EnumCase, value: CamelCase } 14 | # Names of members and methods of classes (and structs?) 15 | - { key: readability-identifier-naming.MemberCase, value: camelBack } 16 | - { key: readability-identifier-naming.PrivateMemberCase, value: camelBack } 17 | - { key: readability-identifier-naming.PrivateMemberSuffix, value: '_' } 18 | - { key: readability-identifier-naming.ProtectedMemberCase, value: camelBack } 19 | - { key: readability-identifier-naming.ProtectedMemberSuffix, value: '_' } 20 | - { key: readability-identifier-naming.MethodCase, value: camelBack } 21 | # Names of parameters and local variables 22 | - { key: readability-identifier-naming.LocalVariableCase, value: camelBack } 23 | - { key: readability-identifier-naming.ParameterCase, value: camelBack } 24 | # Names of constants 25 | - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } 26 | - { key: readability-identifier-naming.GlobalConstantPrefix, value: 'k' } 27 | # FIXME scoped enums are only supported in clang-tidy 12. 28 | # Names of (non-class) enum members 29 | # - { key: readability-identifier-naming.EnumConstantCase, value: UPPER_CASE } 30 | # Names of enum class members 31 | # - { key: readability-identifier-naming.ScopedEnumConstantCase, value: CamelCase } 32 | # - { key: readability-identifier-naming.ScopedEnumConstantPrefix, value: 'k' } 33 | # Names of template parameters 34 | - { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase } 35 | # Names of global functions 36 | - { key: readability-identifier-naming.FunctionCase, value: camelBack } 37 | # Names of namespaces 38 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case } 39 | ... 40 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # TODO: Make those separate CMake projects. 8 | 9 | add_executable(benchmark_transport benchmark_transport.cc options.cc transport_registry.cc) 10 | target_link_libraries(benchmark_transport PRIVATE tensorpipe) 11 | 12 | add_executable(benchmark_pipe benchmark_pipe.cc options.cc transport_registry.cc channel_registry.cc) 13 | target_link_libraries(benchmark_pipe PRIVATE tensorpipe tensorpipe_cuda) 14 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/channel_registry.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | TP_DEFINE_SHARED_REGISTRY( 15 | TensorpipeChannelRegistry, 16 | tensorpipe::channel::Context); 17 | 18 | // BASIC 19 | 20 | std::shared_ptr makeBasicChannel() { 21 | return tensorpipe::channel::basic::create(); 22 | } 23 | 24 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, basic, makeBasicChannel); 25 | 26 | // CMA 27 | 28 | #if TENSORPIPE_HAS_CMA_CHANNEL 29 | std::shared_ptr makeCmaChannel() { 30 | return tensorpipe::channel::cma::create(); 31 | } 32 | 33 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cma, makeCmaChannel); 34 | #endif // TENSORPIPE_HAS_CMA_CHANNEL 35 | 36 | // MPT 37 | 38 | std::shared_ptr makeMptChannel() { 39 | throw std::runtime_error("mtp channel requires arguments"); 40 | } 41 | 42 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, mpt, makeMptChannel); 43 | 44 | // XTH 45 | 46 | std::shared_ptr makeXthChannel() { 47 | return tensorpipe::channel::xth::create(); 48 | } 49 | 50 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, xth, makeXthChannel); 51 | 52 | // CUDA XTH 53 | 54 | std::shared_ptr makeCudaXthChannel() { 55 | return tensorpipe::channel::cuda_xth::create(); 56 | } 57 | 58 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_xth, makeCudaXthChannel); 59 | 60 | // CUDA BASIC 61 | 62 | std::shared_ptr makeCudaBasicChannel() { 63 | return tensorpipe::channel::cuda_basic::create( 64 | tensorpipe::channel::basic::create()); 65 | } 66 | 67 | TP_REGISTER_CREATOR( 68 | TensorpipeChannelRegistry, 69 | cuda_basic, 70 | makeCudaBasicChannel); 71 | 72 | // CUDA IPC 73 | 74 | #if TENSORPIPE_HAS_CUDA_IPC_CHANNEL 75 | std::shared_ptr makeCudaIpcChannel() { 76 | return tensorpipe::channel::cuda_ipc::create(); 77 | } 78 | 79 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_ipc, makeCudaIpcChannel); 80 | #endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL 81 | 82 | // CUDA GDR 83 | 84 | #if TENSORPIPE_HAS_CUDA_GDR_CHANNEL 85 | std::shared_ptr makeCudaGdrChannel() { 86 | return tensorpipe::channel::cuda_gdr::create(); 87 | } 88 | 89 | TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_gdr, makeCudaGdrChannel); 90 | #endif // TENSORPIPE_HAS_CUDA_GDR_CHANNEL 91 | 92 | void validateChannelContext( 93 | std::shared_ptr context) { 94 | if (!context) { 95 | auto keys = TensorpipeChannelRegistry().keys(); 96 | std::cout 97 | << "The channel you passed in is not supported. The following channels are valid: "; 98 | for (const auto& key : keys) { 99 | std::cout << key << ", "; 100 | } 101 | std::cout << "\n"; 102 | exit(EXIT_FAILURE); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/channel_registry.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | TP_DECLARE_SHARED_REGISTRY( 15 | TensorpipeChannelRegistry, 16 | tensorpipe::channel::Context); 17 | 18 | void validateChannelContext( 19 | std::shared_ptr context); 20 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/measurements.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace benchmark { 17 | 18 | class Measurements { 19 | using clock = std::chrono::high_resolution_clock; 20 | using nanoseconds = std::chrono::nanoseconds; 21 | 22 | public: 23 | void markStart() { 24 | start_ = clock::now(); 25 | } 26 | 27 | void markStop(size_t count = 1) { 28 | samples_.push_back((clock::now() - start_) / count); 29 | } 30 | 31 | void sort() { 32 | std::sort(samples_.begin(), samples_.end()); 33 | } 34 | 35 | void reserve(size_t capacity) { 36 | samples_.reserve(capacity); 37 | } 38 | 39 | size_t size() const { 40 | return samples_.size(); 41 | } 42 | 43 | nanoseconds sum() const { 44 | nanoseconds sum{0}; 45 | for (const auto& sample : samples_) { 46 | sum += sample; 47 | } 48 | return sum; 49 | } 50 | 51 | nanoseconds percentile(float f) const { 52 | return samples_[static_cast(f * samples_.size())]; 53 | } 54 | 55 | private: 56 | clock::time_point start_; 57 | std::vector samples_; 58 | }; 59 | 60 | } // namespace benchmark 61 | } // namespace tensorpipe 62 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/options.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace benchmark { 18 | 19 | enum class TensorType { 20 | kCpu, 21 | kCuda, 22 | }; 23 | 24 | struct Options { 25 | std::string mode; // server or client 26 | std::string transport; // shm or uv 27 | std::string channel; // basic 28 | std::string address; // address for listen or connect 29 | int numRoundTrips{0}; // number of write/read pairs 30 | size_t numPayloads{0}; 31 | size_t payloadSize{0}; 32 | size_t numTensors{0}; 33 | size_t tensorSize{0}; 34 | TensorType tensorType{TensorType::kCpu}; 35 | size_t metadataSize{0}; 36 | size_t cudaSyncPeriod{1}; 37 | }; 38 | 39 | struct Options parseOptions(int argc, char** argv); 40 | 41 | } // namespace benchmark 42 | } // namespace tensorpipe 43 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/transport_registry.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | TP_DEFINE_SHARED_REGISTRY( 14 | TensorpipeTransportRegistry, 15 | tensorpipe::transport::Context); 16 | 17 | // IBV 18 | 19 | #if TENSORPIPE_HAS_IBV_TRANSPORT 20 | std::shared_ptr makeIbvContext() { 21 | return tensorpipe::transport::ibv::create(); 22 | } 23 | 24 | TP_REGISTER_CREATOR(TensorpipeTransportRegistry, ibv, makeIbvContext); 25 | #endif // TENSORPIPE_HAS_IBV_TRANSPORT 26 | 27 | // SHM 28 | 29 | #if TENSORPIPE_HAS_SHM_TRANSPORT 30 | std::shared_ptr makeShmContext() { 31 | return tensorpipe::transport::shm::create(); 32 | } 33 | 34 | TP_REGISTER_CREATOR(TensorpipeTransportRegistry, shm, makeShmContext); 35 | #endif // TENSORPIPE_HAS_SHM_TRANSPORT 36 | 37 | // UV 38 | 39 | std::shared_ptr makeUvContext() { 40 | return tensorpipe::transport::uv::create(); 41 | } 42 | 43 | TP_REGISTER_CREATOR(TensorpipeTransportRegistry, uv, makeUvContext); 44 | 45 | void validateTransportContext( 46 | std::shared_ptr context) { 47 | if (!context) { 48 | auto keys = TensorpipeTransportRegistry().keys(); 49 | std::cout 50 | << "The transport you passed in is not supported. The following transports are valid: "; 51 | for (const auto& key : keys) { 52 | std::cout << key << ", "; 53 | } 54 | std::cout << "\n"; 55 | exit(EXIT_FAILURE); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /tensorpipe/benchmark/transport_registry.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | TP_DECLARE_SHARED_REGISTRY( 15 | TensorpipeTransportRegistry, 16 | tensorpipe::transport::Context); 17 | 18 | void validateTransportContext( 19 | std::shared_ptr context); 20 | -------------------------------------------------------------------------------- /tensorpipe/channel/basic/context_impl.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace channel { 18 | namespace basic { 19 | 20 | std::shared_ptr ContextImpl::create() { 21 | std::unordered_map deviceDescriptors = { 22 | {Device{kCpuDeviceType, 0}, "any"}}; 23 | return std::make_shared(std::move(deviceDescriptors)); 24 | } 25 | 26 | ContextImpl::ContextImpl( 27 | std::unordered_map deviceDescriptors) 28 | : ContextImplBoilerplate( 29 | std::move(deviceDescriptors)) {} 30 | 31 | std::shared_ptr ContextImpl::createChannel( 32 | std::vector> connections, 33 | Endpoint /* unused */) { 34 | TP_DCHECK_EQ(numConnectionsNeeded(), connections.size()); 35 | return createChannelInternal(std::move(connections[0])); 36 | } 37 | 38 | void ContextImpl::handleErrorImpl() {} 39 | 40 | void ContextImpl::joinImpl() {} 41 | 42 | bool ContextImpl::inLoop() const { 43 | return loop_.inLoop(); 44 | }; 45 | 46 | void ContextImpl::deferToLoop(std::function fn) { 47 | loop_.deferToLoop(std::move(fn)); 48 | }; 49 | 50 | } // namespace basic 51 | } // namespace channel 52 | } // namespace tensorpipe 53 | -------------------------------------------------------------------------------- /tensorpipe/channel/basic/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | namespace tensorpipe { 18 | namespace channel { 19 | namespace basic { 20 | 21 | class ChannelImpl; 22 | 23 | class ContextImpl final 24 | : public ContextImplBoilerplate { 25 | public: 26 | static std::shared_ptr create(); 27 | 28 | explicit ContextImpl( 29 | std::unordered_map deviceDescriptors); 30 | 31 | std::shared_ptr createChannel( 32 | std::vector> connections, 33 | Endpoint endpoint); 34 | 35 | // Implement the DeferredExecutor interface. 36 | bool inLoop() const override; 37 | void deferToLoop(std::function fn) override; 38 | 39 | protected: 40 | // Implement the entry points called by ContextImplBoilerplate. 41 | void handleErrorImpl() override; 42 | void joinImpl() override; 43 | 44 | private: 45 | OnDemandDeferredExecutor loop_; 46 | }; 47 | 48 | } // namespace basic 49 | } // namespace channel 50 | } // namespace tensorpipe 51 | -------------------------------------------------------------------------------- /tensorpipe/channel/basic/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace basic { 18 | 19 | std::shared_ptr create() { 20 | return std::make_shared>(); 21 | } 22 | 23 | } // namespace basic 24 | } // namespace channel 25 | } // namespace tensorpipe 26 | -------------------------------------------------------------------------------- /tensorpipe/channel/basic/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace basic { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace basic 22 | } // namespace channel 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/channel/channel.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | // Channels are an out of band mechanism to transfer data between 19 | // processes. Examples include a direct address space to address space 20 | // memory copy on the same machine, or a GPU-to-GPU memory copy. 21 | // 22 | // Construction of a channel happens as follows. 23 | // 24 | // 1) During initialization of a pipe, the connecting peer sends its 25 | // list of channel contexts and their device descriptors. The 26 | // device descriptor is used to determine whether or not a 27 | // channel can be used by a pair of peers. 28 | // 2) The listening side of the pipe compares the list it received 29 | // its own list to determine the list of channels that should be used 30 | // for the peers. 31 | // 3) For every channel that should be constructed, the listening 32 | // side registers a slot with its low level listener. These slots 33 | // uniquely identify inbound connections on this listener (by 34 | // sending a word-sized indentifier immediately after connecting) 35 | // and can be used to construct new connections. These slots are 36 | // sent to the connecting side of the pipe, which then attempts 37 | // to establish a new connection for every token. 38 | // 4) At this time, we have a new control connection for every 39 | // channel that is about to be constructed. Both sides of the 40 | // pipe can now create the channel instance using the newly 41 | // created connection. Further initialization that needs to 42 | // happen is defered to the channel implementation. We assume the 43 | // channel is usable from the moment it is constructed. 44 | // 45 | namespace tensorpipe { 46 | namespace channel { 47 | 48 | using TSendCallback = std::function; 49 | using TRecvCallback = std::function; 50 | 51 | // Abstract base class for channel classes. 52 | class Channel { 53 | public: 54 | // Send memory region to peer. 55 | virtual void send(Buffer buffer, size_t length, TSendCallback callback) = 0; 56 | 57 | // Receive memory region from peer. 58 | virtual void recv(Buffer buffer, size_t length, TRecvCallback callback) = 0; 59 | 60 | // Tell the channel what its identifier is. 61 | // 62 | // This is only supposed to be called from the high-level pipe. It will only 63 | // used for logging and debugging purposes. 64 | virtual void setId(std::string id) = 0; 65 | 66 | // Put the channel in a terminal state, aborting pending operations and 67 | // rejecting future ones, and release its resources. This may be carried out 68 | // asynchronously, in background. 69 | virtual void close() = 0; 70 | 71 | virtual ~Channel() = default; 72 | }; 73 | 74 | } // namespace channel 75 | } // namespace tensorpipe 76 | -------------------------------------------------------------------------------- /tensorpipe/channel/cma/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace tensorpipe { 23 | namespace channel { 24 | namespace cma { 25 | 26 | class ChannelImpl; 27 | 28 | class ContextImpl final 29 | : public ContextImplBoilerplate { 30 | public: 31 | static std::shared_ptr create(); 32 | 33 | explicit ContextImpl( 34 | std::unordered_map deviceDescriptors); 35 | 36 | std::shared_ptr createChannel( 37 | std::vector> connections, 38 | Endpoint endpoint); 39 | 40 | size_t numConnectionsNeeded() const override; 41 | 42 | // Implement the DeferredExecutor interface. 43 | bool inLoop() const override; 44 | void deferToLoop(std::function fn) override; 45 | 46 | using copy_request_callback_fn = std::function; 47 | 48 | void requestCopy( 49 | pid_t remotePid, 50 | void* remotePtr, 51 | void* localPtr, 52 | size_t length, 53 | copy_request_callback_fn fn); 54 | 55 | protected: 56 | // Implement the entry points called by ContextImplBoilerplate. 57 | void handleErrorImpl() override; 58 | void joinImpl() override; 59 | 60 | private: 61 | OnDemandDeferredExecutor loop_; 62 | 63 | struct CopyRequest { 64 | pid_t remotePid; 65 | void* remotePtr; 66 | void* localPtr; 67 | size_t length; 68 | copy_request_callback_fn callback; 69 | }; 70 | 71 | std::thread thread_; 72 | Queue> requests_{std::numeric_limits::max()}; 73 | 74 | // This is atomic because it may be accessed from outside the loop. 75 | std::atomic nextRequestId_{0}; 76 | 77 | void handleCopyRequests(); 78 | }; 79 | 80 | } // namespace cma 81 | } // namespace channel 82 | } // namespace tensorpipe 83 | -------------------------------------------------------------------------------- /tensorpipe/channel/cma/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cma { 18 | 19 | std::shared_ptr create() { 20 | return std::make_shared>(); 21 | } 22 | 23 | } // namespace cma 24 | } // namespace channel 25 | } // namespace tensorpipe 26 | -------------------------------------------------------------------------------- /tensorpipe/channel/cma/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cma { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace cma 22 | } // namespace channel 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_basic/constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | namespace channel { 15 | namespace cuda_basic { 16 | 17 | // FIXME Avoid this anonymous namespace and use inline variables in C++-17. 18 | namespace { 19 | 20 | // Define all three (redundant) values to make them explicit and avoid 21 | // misunderstandings due to miscalculations. 22 | static constexpr size_t kStagingAreaSize = 16 * 1024 * 1024; 23 | static constexpr size_t kSlotSize = 1024 * 1024; 24 | static constexpr size_t kNumSlots = 16; 25 | 26 | static_assert(kStagingAreaSize == kSlotSize * kNumSlots, ""); 27 | 28 | } // namespace 29 | 30 | } // namespace cuda_basic 31 | } // namespace channel 32 | } // namespace tensorpipe 33 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_basic/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace tensorpipe { 22 | namespace channel { 23 | namespace cuda_basic { 24 | 25 | class ChannelImpl; 26 | 27 | class ContextImpl final 28 | : public ContextImplBoilerplate { 29 | public: 30 | static std::shared_ptr create( 31 | std::shared_ptr cpuContext); 32 | 33 | ContextImpl( 34 | CudaLib cudaLib, 35 | std::shared_ptr cpuContext, 36 | std::unordered_map deviceDescriptors); 37 | 38 | std::shared_ptr createChannel( 39 | std::vector> connections, 40 | Endpoint endpoint); 41 | 42 | size_t numConnectionsNeeded() const override; 43 | 44 | bool canCommunicateWithRemote( 45 | const std::string& localDeviceDescriptor, 46 | const std::string& remoteDeviceDescriptor) const override; 47 | 48 | const CudaLib& getCudaLib(); 49 | Allocator& getCudaHostSendAllocator(int deviceIdx); 50 | Allocator& getCudaHostRecvAllocator(int deviceIdx); 51 | 52 | // Implement the DeferredExecutor interface. 53 | bool inLoop() const override; 54 | void deferToLoop(std::function fn) override; 55 | 56 | protected: 57 | // Implement the entry points called by ContextImplBoilerplate. 58 | void handleErrorImpl() override; 59 | void joinImpl() override; 60 | void setIdImpl() override; 61 | 62 | private: 63 | OnDemandDeferredExecutor loop_; 64 | 65 | const CudaLib cudaLib_; 66 | 67 | const std::shared_ptr cpuContext_; 68 | // TODO: Lazy initialization of cuda loop. 69 | CudaLoop cudaLoop_; 70 | 71 | struct CudaHostAllocator { 72 | CudaPinnedBuffer buffer; 73 | Allocator allocator; 74 | }; 75 | optional cudaHostSendAllocator_; 76 | optional cudaHostRecvAllocator_; 77 | }; 78 | 79 | } // namespace cuda_basic 80 | } // namespace channel 81 | } // namespace tensorpipe 82 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_basic/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_basic { 18 | 19 | std::shared_ptr create(std::shared_ptr cpuContext) { 20 | return std::make_shared>( 21 | std::move(cpuContext)); 22 | } 23 | 24 | } // namespace cuda_basic 25 | } // namespace channel 26 | } // namespace tensorpipe 27 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_basic/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_basic { 18 | 19 | std::shared_ptr create(std::shared_ptr cpuContext); 20 | 21 | } // namespace cuda_basic 22 | } // namespace channel 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_gdr/constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | namespace channel { 15 | namespace cuda_gdr { 16 | 17 | namespace { 18 | 19 | // We should probably allow these to be user-configured. But, for now, we'll set 20 | // them to the lowest value they can have, the rationale being that this way 21 | // they will always be valid. 22 | constexpr uint8_t kPortNum = 1; 23 | constexpr uint8_t kGlobalIdentifierIndex = 0; 24 | 25 | // FIXME Instead of hardcoding the next three values, we could use 26 | // ibv_query_device to obtain max_cqe, max_qp_wr and max_srq_wr and deduce from 27 | // them the maximum allowed values for these parameters. 28 | 29 | constexpr uint32_t kNumRecvs = 1024; 30 | constexpr uint32_t kNumSends = 1024; 31 | 32 | // How many elements the completion queue should be able to hold. These elements 33 | // will be either the completed receive requests of the SRQ, or the completed 34 | // send requests from a connection's queue pair. We can bound the former value 35 | // but not the latter, so we try to add some margin. 36 | constexpr int kCompletionQueueSize = kNumRecvs + kNumSends; 37 | 38 | // How many work completions to poll from the completion queue at each reactor 39 | // iteration. 40 | constexpr int kNumPolledWorkCompletions = 32; 41 | 42 | } // namespace 43 | 44 | } // namespace cuda_gdr 45 | } // namespace channel 46 | } // namespace tensorpipe 47 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_gdr/error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_gdr { 18 | 19 | class IbvError final : public BaseError { 20 | public: 21 | explicit IbvError(std::string error) : error_(error) {} 22 | 23 | std::string what() const override { 24 | return error_; 25 | } 26 | 27 | private: 28 | std::string error_; 29 | }; 30 | 31 | } // namespace cuda_gdr 32 | } // namespace channel 33 | } // namespace tensorpipe 34 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_gdr/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_gdr { 18 | 19 | std::shared_ptr create( 20 | optional> gpuIdxToNicName) { 21 | return std::make_shared>( 22 | std::move(gpuIdxToNicName)); 23 | } 24 | 25 | } // namespace cuda_gdr 26 | } // namespace channel 27 | } // namespace tensorpipe 28 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_gdr/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | namespace tensorpipe { 18 | namespace channel { 19 | namespace cuda_gdr { 20 | 21 | std::shared_ptr create( 22 | optional> gpuIdxToNicName = nullopt); 23 | 24 | } // namespace cuda_gdr 25 | } // namespace channel 26 | } // namespace tensorpipe 27 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_ipc/constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | namespace channel { 15 | namespace cuda_ipc { 16 | 17 | // FIXME Avoid this anonymous namespace and use inline variables in C++-17. 18 | namespace { 19 | 20 | // Define all three (redundant) values to make them explicit and avoid 21 | // misunderstandings due to miscalculations. 22 | static constexpr size_t kStagingAreaSize = 32 * 1024 * 1024; 23 | static constexpr size_t kSlotSize = 8 * 1024 * 1024; 24 | static constexpr size_t kNumSlots = 4; 25 | 26 | static_assert(kStagingAreaSize == kSlotSize * kNumSlots, ""); 27 | 28 | } // namespace 29 | 30 | } // namespace cuda_ipc 31 | } // namespace channel 32 | } // namespace tensorpipe 33 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_ipc/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_ipc { 18 | 19 | std::shared_ptr create() { 20 | return std::make_shared>(); 21 | } 22 | 23 | } // namespace cuda_ipc 24 | } // namespace channel 25 | } // namespace tensorpipe 26 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_ipc/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_ipc { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace cuda_ipc 22 | } // namespace channel 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_xth/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace channel { 18 | namespace cuda_xth { 19 | 20 | class ChannelImpl; 21 | 22 | class ContextImpl final 23 | : public ContextImplBoilerplate { 24 | public: 25 | static std::shared_ptr create(); 26 | 27 | ContextImpl( 28 | CudaLib cudaLib, 29 | std::unordered_map deviceDescriptors); 30 | 31 | std::shared_ptr createChannel( 32 | std::vector> connections, 33 | Endpoint endpoint); 34 | 35 | size_t numConnectionsNeeded() const override; 36 | 37 | const CudaLib& getCudaLib(); 38 | 39 | // Implement the DeferredExecutor interface. 40 | bool inLoop() const override; 41 | void deferToLoop(std::function fn) override; 42 | 43 | protected: 44 | // Implement the entry points called by ContextImplBoilerplate. 45 | void handleErrorImpl() override; 46 | void joinImpl() override; 47 | 48 | private: 49 | OnDemandDeferredExecutor loop_; 50 | 51 | const CudaLib cudaLib_; 52 | }; 53 | 54 | } // namespace cuda_xth 55 | } // namespace channel 56 | } // namespace tensorpipe 57 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_xth/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_xth { 18 | 19 | std::shared_ptr create() { 20 | return std::make_shared>(); 21 | } 22 | 23 | } // namespace cuda_xth 24 | } // namespace channel 25 | } // namespace tensorpipe 26 | -------------------------------------------------------------------------------- /tensorpipe/channel/cuda_xth/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace cuda_xth { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace cuda_xth 22 | } // namespace channel 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/channel/error.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace tensorpipe { 15 | namespace channel { 16 | 17 | std::string ContextClosedError::what() const { 18 | return "context closed"; 19 | } 20 | 21 | std::string ChannelClosedError::what() const { 22 | return "channel closed"; 23 | } 24 | 25 | std::string ContextNotViableError::what() const { 26 | return "context not viable"; 27 | } 28 | 29 | } // namespace channel 30 | } // namespace tensorpipe 31 | -------------------------------------------------------------------------------- /tensorpipe/channel/error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | 18 | class ContextClosedError final : public BaseError { 19 | public: 20 | ContextClosedError() {} 21 | 22 | std::string what() const override; 23 | }; 24 | 25 | class ChannelClosedError final : public BaseError { 26 | public: 27 | ChannelClosedError() {} 28 | 29 | std::string what() const override; 30 | }; 31 | 32 | class ContextNotViableError final : public BaseError { 33 | public: 34 | ContextNotViableError() {} 35 | 36 | std::string what() const override; 37 | }; 38 | 39 | } // namespace channel 40 | } // namespace tensorpipe 41 | -------------------------------------------------------------------------------- /tensorpipe/channel/helpers.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace channel { 18 | 19 | std::string saveDescriptor(const AbstractNopHolder& object) { 20 | const size_t len = object.getSize(); 21 | std::string out(len, '\0'); 22 | NopWriter writer( 23 | const_cast(reinterpret_cast(out.data())), len); 24 | 25 | nop::Status status = object.write(writer); 26 | TP_THROW_ASSERT_IF(status.has_error()) 27 | << "Error saving descriptor: " << status.GetErrorMessage(); 28 | 29 | return out; 30 | } 31 | 32 | void loadDescriptor(AbstractNopHolder& object, const std::string& in) { 33 | const size_t len = in.size(); 34 | NopReader reader(reinterpret_cast(in.data()), len); 35 | 36 | nop::Status status = object.read(reader); 37 | TP_THROW_ASSERT_IF(status.has_error()) 38 | << "Error loading descriptor: " << status.GetErrorMessage(); 39 | } 40 | 41 | } // namespace channel 42 | } // namespace tensorpipe 43 | -------------------------------------------------------------------------------- /tensorpipe/channel/helpers.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | // Note: never include this file from headers! 12 | 13 | #include 14 | 15 | #include 16 | 17 | namespace tensorpipe { 18 | namespace channel { 19 | 20 | std::string saveDescriptor(const AbstractNopHolder& object); 21 | 22 | void loadDescriptor(AbstractNopHolder& object, const std::string& in); 23 | 24 | } // namespace channel 25 | } // namespace tensorpipe 26 | -------------------------------------------------------------------------------- /tensorpipe/channel/mpt/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace mpt { 18 | 19 | std::shared_ptr create( 20 | std::vector> contexts, 21 | std::vector> listeners) { 22 | return std::make_shared>( 23 | std::move(contexts), std::move(listeners)); 24 | } 25 | 26 | } // namespace mpt 27 | } // namespace channel 28 | } // namespace tensorpipe 29 | -------------------------------------------------------------------------------- /tensorpipe/channel/mpt/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | namespace tensorpipe { 18 | namespace channel { 19 | namespace mpt { 20 | 21 | std::shared_ptr create( 22 | std::vector> contexts, 23 | std::vector> listeners); 24 | 25 | } // namespace mpt 26 | } // namespace channel 27 | } // namespace tensorpipe 28 | -------------------------------------------------------------------------------- /tensorpipe/channel/mpt/nop_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | namespace tensorpipe { 19 | namespace channel { 20 | namespace mpt { 21 | 22 | struct LaneAdvertisement { 23 | // This pointless constructor is needed to work around a bug in GCC 5.5 (and 24 | // possibly other versions). It appears to be needed in the nop types that are 25 | // used inside std::vectors. 26 | LaneAdvertisement() {} 27 | 28 | std::string address; 29 | uint64_t registrationId; 30 | NOP_STRUCTURE(LaneAdvertisement, address, registrationId); 31 | }; 32 | 33 | struct ServerHello { 34 | std::vector laneAdvertisements; 35 | NOP_STRUCTURE(ServerHello, laneAdvertisements); 36 | }; 37 | 38 | struct ClientHello { 39 | uint64_t registrationId; 40 | NOP_STRUCTURE(ClientHello, registrationId); 41 | }; 42 | 43 | using Packet = nop::Variant; 44 | 45 | } // namespace mpt 46 | } // namespace channel 47 | } // namespace tensorpipe 48 | -------------------------------------------------------------------------------- /tensorpipe/channel/xth/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace tensorpipe { 23 | namespace channel { 24 | namespace xth { 25 | 26 | class ChannelImpl; 27 | 28 | class ContextImpl final 29 | : public ContextImplBoilerplate { 30 | public: 31 | static std::shared_ptr create(); 32 | 33 | explicit ContextImpl( 34 | std::unordered_map deviceDescriptors); 35 | 36 | std::shared_ptr createChannel( 37 | std::vector> connections, 38 | Endpoint endpoint); 39 | 40 | size_t numConnectionsNeeded() const override; 41 | 42 | // Implement the DeferredExecutor interface. 43 | bool inLoop() const override; 44 | void deferToLoop(std::function fn) override; 45 | 46 | using copy_request_callback_fn = std::function; 47 | 48 | void requestCopy( 49 | void* remotePtr, 50 | void* localPtr, 51 | size_t length, 52 | copy_request_callback_fn fn); 53 | 54 | protected: 55 | // Implement the entry points called by ContextImplBoilerplate. 56 | void handleErrorImpl() override; 57 | void joinImpl() override; 58 | 59 | private: 60 | OnDemandDeferredExecutor loop_; 61 | 62 | struct CopyRequest { 63 | void* remotePtr; 64 | void* localPtr; 65 | size_t length; 66 | copy_request_callback_fn callback; 67 | }; 68 | 69 | std::thread thread_; 70 | Queue> requests_; 71 | 72 | // This is atomic because it may be accessed from outside the loop. 73 | std::atomic nextRequestId_{0}; 74 | 75 | void handleCopyRequests(); 76 | }; 77 | 78 | } // namespace xth 79 | } // namespace channel 80 | } // namespace tensorpipe 81 | -------------------------------------------------------------------------------- /tensorpipe/channel/xth/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace xth { 18 | 19 | std::shared_ptr create() { 20 | return std::make_shared>(); 21 | } 22 | 23 | } // namespace xth 24 | } // namespace channel 25 | } // namespace tensorpipe 26 | -------------------------------------------------------------------------------- /tensorpipe/channel/xth/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace channel { 17 | namespace xth { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace xth 22 | } // namespace channel 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/common/address.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | 15 | std::tuple splitSchemeOfURL(const std::string& url) { 16 | std::string::size_type endOfScheme = url.find("://"); 17 | if (endOfScheme == std::string::npos) { 18 | TP_THROW_EINVAL() << "url has no scheme: " << url; 19 | } 20 | return std::make_tuple( 21 | url.substr(0, endOfScheme), url.substr(endOfScheme + 3)); 22 | } 23 | 24 | } // namespace tensorpipe 25 | -------------------------------------------------------------------------------- /tensorpipe/common/address.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | 15 | std::tuple splitSchemeOfURL(const std::string& url); 16 | 17 | } 18 | -------------------------------------------------------------------------------- /tensorpipe/common/allocator.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace tensorpipe { 15 | 16 | Allocator::Allocator(uint8_t* data, size_t numChunks, size_t chunkSize) 17 | : numChunks_(numChunks), 18 | chunkSize_(chunkSize), 19 | data_(data), 20 | chunkAvailable_(numChunks, true) {} 21 | 22 | Allocator::~Allocator() { 23 | close(); 24 | } 25 | 26 | void Allocator::alloc(size_t size, TAllocCallback callback) { 27 | TP_DCHECK(size <= chunkSize_); 28 | pendingAllocations_.push_back(std::move(callback)); 29 | processAllocations(); 30 | } 31 | 32 | size_t Allocator::getChunkLength() const { 33 | return chunkSize_; 34 | } 35 | 36 | void Allocator::close() { 37 | if (closed_) { 38 | return; 39 | } 40 | closed_ = true; 41 | processAllocations(); 42 | } 43 | 44 | void Allocator::processAllocations() { 45 | while (!pendingAllocations_.empty()) { 46 | auto& callback = pendingAllocations_.front(); 47 | if (closed_) { 48 | callback(TP_CREATE_ERROR(AllocatorClosedError), nullptr); 49 | } else { 50 | TChunk ptr = getAvailableChunk(); 51 | if (!ptr) { 52 | break; 53 | } 54 | callback(Error::kSuccess, std::move(ptr)); 55 | } 56 | pendingAllocations_.pop_front(); 57 | } 58 | } 59 | 60 | Allocator::TChunk Allocator::getAvailableChunk() { 61 | for (size_t curChunk = 0; curChunk < numChunks_; ++curChunk) { 62 | if (chunkAvailable_[curChunk]) { 63 | chunkAvailable_[curChunk] = false; 64 | ++allocatedChunks_; 65 | return TChunk(data_ + curChunk * chunkSize_, [this](uint8_t* ptr) { 66 | releaseChunk(ptr); 67 | }); 68 | } 69 | } 70 | 71 | return nullptr; 72 | } 73 | 74 | void Allocator::releaseChunk(uint8_t* ptr) { 75 | size_t chunkId = (ptr - data_) / chunkSize_; 76 | chunkAvailable_[chunkId] = true; 77 | --allocatedChunks_; 78 | processAllocations(); 79 | } 80 | 81 | } // namespace tensorpipe 82 | -------------------------------------------------------------------------------- /tensorpipe/common/allocator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | namespace tensorpipe { 20 | 21 | class AllocatorClosedError final : public BaseError { 22 | std::string what() const override { 23 | return "allocator closed"; 24 | } 25 | }; 26 | 27 | class Allocator { 28 | public: 29 | // Note: this is a std::shared_ptr semantically. A shared_ptr with 30 | // array type is supported in C++17 and higher. 31 | using TChunk = std::shared_ptr; 32 | using TAllocCallback = std::function; 33 | 34 | explicit Allocator(uint8_t* data, size_t numChunks, size_t chunkSize); 35 | 36 | ~Allocator(); 37 | 38 | void alloc(size_t size, TAllocCallback callback); 39 | size_t getChunkLength() const; 40 | 41 | void close(); 42 | 43 | private: 44 | const size_t numChunks_; 45 | const size_t chunkSize_; 46 | uint8_t* const data_; 47 | std::vector chunkAvailable_; 48 | size_t allocatedChunks_{0}; 49 | std::deque pendingAllocations_; 50 | bool closed_{false}; 51 | 52 | void processAllocations(); 53 | TChunk getAvailableChunk(); 54 | void releaseChunk(uint8_t* ptr); 55 | }; 56 | 57 | } // namespace tensorpipe 58 | -------------------------------------------------------------------------------- /tensorpipe/common/busy_polling_loop.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | namespace tensorpipe { 20 | 21 | class BusyPollingLoop : public EventLoopDeferredExecutor { 22 | protected: 23 | virtual bool pollOnce() = 0; 24 | 25 | virtual bool readyToClose() = 0; 26 | 27 | void stopBusyPolling() { 28 | closed_ = true; 29 | // No need to wake up the thread, since it is busy-waiting. 30 | } 31 | 32 | void eventLoop() override { 33 | while (!closed_ || !readyToClose()) { 34 | if (pollOnce()) { 35 | // continue 36 | } else if (deferredFunctionCount_ > 0) { 37 | deferredFunctionCount_ -= runDeferredFunctionsFromEventLoop(); 38 | } else { 39 | std::this_thread::yield(); 40 | } 41 | } 42 | } 43 | 44 | void wakeupEventLoopToDeferFunction() override { 45 | ++deferredFunctionCount_; 46 | // No need to wake up the thread, since it is busy-waiting. 47 | } 48 | 49 | private: 50 | std::atomic closed_{false}; 51 | 52 | std::atomic deferredFunctionCount_{0}; 53 | }; 54 | 55 | } // namespace tensorpipe 56 | -------------------------------------------------------------------------------- /tensorpipe/common/cpu_buffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | 15 | struct CpuBuffer { 16 | void* ptr{nullptr}; 17 | 18 | Device getDevice() const { 19 | return Device{kCpuDeviceType, 0}; 20 | } 21 | }; 22 | 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/common/cuda_buffer.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace tensorpipe { 15 | 16 | Device CudaBuffer::getDevice() const { 17 | static CudaLib cudaLib = []() { 18 | Error error; 19 | CudaLib lib; 20 | std::tie(error, lib) = CudaLib::create(); 21 | TP_THROW_ASSERT_IF(error) 22 | << "Cannot get CUDA device for pointer because libcuda could not be loaded: " 23 | << error.what(); 24 | return lib; 25 | }(); 26 | 27 | return Device{kCudaDeviceType, cudaDeviceForPointer(cudaLib, ptr)}; 28 | } 29 | 30 | } // namespace tensorpipe 31 | -------------------------------------------------------------------------------- /tensorpipe/common/cuda_buffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | 17 | struct CudaBuffer { 18 | void* ptr{nullptr}; 19 | cudaStream_t stream{cudaStreamDefault}; 20 | 21 | Device getDevice() const; 22 | }; 23 | 24 | } // namespace tensorpipe 25 | -------------------------------------------------------------------------------- /tensorpipe/common/cuda_loop.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | #include 22 | 23 | namespace tensorpipe { 24 | 25 | class CudaLoop { 26 | struct Operation { 27 | std::function callback; 28 | Error error; 29 | }; 30 | 31 | public: 32 | CudaLoop(); 33 | 34 | ~CudaLoop(); 35 | 36 | void join(); 37 | void close(); 38 | 39 | void addCallback( 40 | int device, 41 | cudaStream_t stream, 42 | std::function callback); 43 | 44 | private: 45 | std::thread thread_; 46 | std::deque operations_; 47 | std::mutex mutex_; 48 | std::condition_variable cv_; 49 | uint64_t pendingOperations_{0}; 50 | 51 | bool closed_{false}; 52 | std::atomic joined_{false}; 53 | 54 | void processCallbacks(); 55 | 56 | // Proxy static method for cudaStreamAddCallback(), which does not accept 57 | // lambdas. 58 | static void CUDART_CB runCudaCallback( 59 | cudaStream_t stream, 60 | cudaError_t cudaError, 61 | void* callbackPtr); 62 | }; 63 | 64 | } // namespace tensorpipe 65 | -------------------------------------------------------------------------------- /tensorpipe/common/device.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | 17 | const std::string kCpuDeviceType{"cpu"}; 18 | const std::string kCudaDeviceType{"cuda"}; 19 | 20 | struct Device { 21 | std::string type; 22 | int index; 23 | 24 | // This pointless constructor is needed to work around a bug in GCC 5.5 (and 25 | // possibly other versions). It appears to be needed in the nop types that 26 | // are used inside nop::Optional. 27 | Device() {} 28 | 29 | Device(std::string type, int index) : type(std::move(type)), index(index) {} 30 | 31 | std::string toString() const { 32 | std::stringstream ss; 33 | ss << type << ":" << index; 34 | return ss.str(); 35 | } 36 | 37 | bool operator==(const Device& other) const { 38 | return type == other.type && index == other.index; 39 | } 40 | }; 41 | 42 | } // namespace tensorpipe 43 | 44 | namespace std { 45 | 46 | template <> 47 | struct hash<::tensorpipe::Device> { 48 | size_t operator()(const ::tensorpipe::Device& device) const noexcept { 49 | return std::hash{}(device.toString()); 50 | } 51 | }; 52 | 53 | template <> 54 | struct hash> { 55 | size_t operator()(const std::pair<::tensorpipe::Device, ::tensorpipe::Device>& 56 | p) const noexcept { 57 | size_t h1 = std::hash<::tensorpipe::Device>{}(p.first); 58 | size_t h2 = std::hash<::tensorpipe::Device>{}(p.second); 59 | // Shifting one hash to avoid collisions between (a, b) and (b, a). 60 | return h1 ^ (h2 << 1); 61 | } 62 | }; 63 | 64 | } // namespace std 65 | -------------------------------------------------------------------------------- /tensorpipe/common/dl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | namespace tensorpipe { 26 | 27 | class DlError final : public BaseError { 28 | public: 29 | explicit DlError(char* error) : error_(error) {} 30 | 31 | std::string what() const override { 32 | return error_; 33 | } 34 | 35 | private: 36 | std::string error_; 37 | }; 38 | 39 | class DynamicLibraryHandle { 40 | public: 41 | DynamicLibraryHandle() = default; 42 | 43 | static std::tuple create( 44 | const char* filename, 45 | int flags) { 46 | void* ptr = ::dlopen(filename, flags); 47 | if (ptr == nullptr) { 48 | return std::make_tuple( 49 | TP_CREATE_ERROR(DlError, ::dlerror()), DynamicLibraryHandle()); 50 | } 51 | return std::make_tuple(Error::kSuccess, DynamicLibraryHandle(ptr)); 52 | } 53 | 54 | bool hasValue() const { 55 | return ptr_ != nullptr; 56 | } 57 | 58 | std::tuple loadSymbol(const char* name) { 59 | // Since dlsym doesn't return a specific value to signal errors (because 60 | // NULL is a valid return value), we need to detect errors by calling 61 | // dlerror and checking whether it returns a string or not (i.e., NULL). But 62 | // in order to do so, we must first reset the error, in case one was already 63 | // recorded. 64 | ::dlerror(); 65 | void* ptr = ::dlsym(ptr_.get(), name); 66 | char* err = ::dlerror(); 67 | if (err != nullptr) { 68 | return std::make_tuple(TP_CREATE_ERROR(DlError, err), nullptr); 69 | } 70 | return std::make_tuple(Error::kSuccess, ptr); 71 | } 72 | 73 | std::tuple getFilename() { 74 | struct link_map* linkMap; 75 | int rv = ::dlinfo(ptr_.get(), RTLD_DI_LINKMAP, &linkMap); 76 | if (rv < 0) { 77 | return std::make_tuple( 78 | TP_CREATE_ERROR(DlError, ::dlerror()), std::string()); 79 | } 80 | std::array path; 81 | char* resolvedPath = ::realpath(linkMap->l_name, path.data()); 82 | if (resolvedPath == nullptr) { 83 | return std::make_tuple( 84 | TP_CREATE_ERROR(SystemError, "realpath", errno), std::string()); 85 | } 86 | TP_DCHECK(resolvedPath == path.data()); 87 | return std::make_tuple(Error::kSuccess, std::string(path.data())); 88 | } 89 | 90 | private: 91 | struct Deleter { 92 | void operator()(void* ptr) { 93 | int res = ::dlclose(ptr); 94 | TP_THROW_ASSERT_IF(res != 0) << "dlclose() failed: " << ::dlerror(); 95 | } 96 | }; 97 | 98 | DynamicLibraryHandle(void* ptr) : ptr_(ptr, Deleter{}) {} 99 | 100 | std::unique_ptr ptr_; 101 | }; 102 | 103 | } // namespace tensorpipe 104 | -------------------------------------------------------------------------------- /tensorpipe/common/error.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace tensorpipe { 17 | 18 | const Error Error::kSuccess = Error(); 19 | 20 | std::string Error::what() const { 21 | TP_DCHECK(error_); 22 | std::ostringstream ss; 23 | ss << error_->what() << " (this error originated at " << file_ << ":" << line_ 24 | << ")"; 25 | return ss.str(); 26 | } 27 | 28 | std::string SystemError::what() const { 29 | std::ostringstream ss; 30 | ss << syscall_ << ": " << strerror(error_); 31 | return ss.str(); 32 | } 33 | 34 | int SystemError::errorCode() const { 35 | return error_; 36 | } 37 | 38 | std::string ShortReadError::what() const { 39 | std::ostringstream ss; 40 | ss << "short read: got " << actual_ << " bytes while expecting to read " 41 | << expected_ << " bytes"; 42 | return ss.str(); 43 | } 44 | 45 | std::string ShortWriteError::what() const { 46 | std::ostringstream ss; 47 | ss << "short write: wrote " << actual_ << " bytes while expecting to write " 48 | << expected_ << " bytes"; 49 | return ss.str(); 50 | } 51 | 52 | std::string EOFError::what() const { 53 | return "eof"; 54 | } 55 | 56 | } // namespace tensorpipe 57 | -------------------------------------------------------------------------------- /tensorpipe/common/error_macros.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #define TP_CREATE_ERROR(typ, ...) \ 15 | (Error( \ 16 | std::make_shared(__VA_ARGS__), \ 17 | TP_TRIM_FILENAME(__FILE__), \ 18 | __LINE__)) 19 | -------------------------------------------------------------------------------- /tensorpipe/common/fd.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | namespace tensorpipe { 18 | 19 | ssize_t Fd::read(void* buf, size_t count) { 20 | ssize_t rv = -1; 21 | for (;;) { 22 | rv = ::read(fd_, buf, count); 23 | if (rv == -1 && errno == EINTR) { 24 | continue; 25 | } 26 | break; 27 | } 28 | return rv; 29 | } 30 | 31 | // Proxy to write(2) with EINTR retry. 32 | ssize_t Fd::write(const void* buf, size_t count) { 33 | ssize_t rv = -1; 34 | for (;;) { 35 | rv = ::write(fd_, buf, count); 36 | if (rv == -1 && errno == EINTR) { 37 | continue; 38 | } 39 | break; 40 | } 41 | return rv; 42 | } 43 | 44 | // Call read and throw if it doesn't complete. 45 | Error Fd::readFull(void* buf, size_t count) { 46 | auto rv = read(buf, count); 47 | if (rv == -1) { 48 | return TP_CREATE_ERROR(SystemError, "read", errno); 49 | } 50 | if (rv != count) { 51 | return TP_CREATE_ERROR(ShortReadError, count, rv); 52 | } 53 | return Error::kSuccess; 54 | } 55 | 56 | // Call write and throw if it doesn't complete. 57 | Error Fd::writeFull(const void* buf, size_t count) { 58 | auto rv = write(buf, count); 59 | if (rv == -1) { 60 | return TP_CREATE_ERROR(SystemError, "write", errno); 61 | } 62 | if (rv != count) { 63 | return TP_CREATE_ERROR(ShortWriteError, count, rv); 64 | } 65 | return Error::kSuccess; 66 | } 67 | 68 | } // namespace tensorpipe 69 | -------------------------------------------------------------------------------- /tensorpipe/common/fd.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | 18 | namespace tensorpipe { 19 | 20 | class Fd { 21 | public: 22 | Fd() = default; 23 | 24 | explicit Fd(int fd) : fd_(fd) {} 25 | 26 | virtual ~Fd() { 27 | reset(); 28 | } 29 | 30 | // Disable copy constructor. 31 | Fd(const Fd&) = delete; 32 | 33 | // Disable copy assignment. 34 | Fd& operator=(const Fd&) = delete; 35 | 36 | // Custom move constructor. 37 | Fd(Fd&& other) noexcept { 38 | std::swap(fd_, other.fd_); 39 | } 40 | 41 | // Custom move assignment. 42 | Fd& operator=(Fd&& other) noexcept { 43 | std::swap(fd_, other.fd_); 44 | return *this; 45 | } 46 | 47 | // Return underlying file descriptor. 48 | int fd() const { 49 | return fd_; 50 | } 51 | 52 | bool hasValue() const { 53 | return fd_ >= 0; 54 | } 55 | 56 | void reset() { 57 | if (hasValue()) { 58 | ::close(fd_); 59 | fd_ = -1; 60 | } 61 | } 62 | 63 | // Proxy to read(2) with EINTR retry. 64 | ssize_t read(void* buf, size_t count); 65 | 66 | // Proxy to write(2) with EINTR retry. 67 | ssize_t write(const void* buf, size_t count); 68 | 69 | // Call read and return error if it doesn't exactly read `count` bytes. 70 | Error readFull(void* buf, size_t count); 71 | 72 | // Call write and return error if it doesn't exactly write `count` bytes. 73 | Error writeFull(const void* buf, size_t count); 74 | 75 | // Call `readFull` with trivially copyable type. Throws on errors. 76 | template 77 | T readOrThrow() { 78 | T tmp; 79 | static_assert(std::is_trivially_copyable::value, "!"); 80 | auto err = readFull(&tmp, sizeof(T)); 81 | if (err) { 82 | throw std::runtime_error(err.what()); 83 | } 84 | return tmp; 85 | } 86 | 87 | // Call `writeFull` with trivially copyable type. Throws on errors. 88 | template 89 | void writeOrThrow(const T& t) { 90 | static_assert(std::is_trivially_copyable::value, "!"); 91 | auto err = writeFull(&t, sizeof(T)); 92 | if (err) { 93 | throw std::runtime_error(err.what()); 94 | } 95 | } 96 | 97 | // Call `readFull` with trivially copyable type. 98 | template 99 | Error read(T* t) { 100 | static_assert(std::is_trivially_copyable::value, "!"); 101 | return readFull(t, sizeof(T)); 102 | } 103 | 104 | // Call `writeFull` with trivially copyable type. 105 | template 106 | Error write(const T& t) { 107 | static_assert(std::is_trivially_copyable::value, "!"); 108 | return writeFull(&t, sizeof(T)); 109 | } 110 | 111 | protected: 112 | int fd_{-1}; 113 | }; 114 | 115 | } // namespace tensorpipe 116 | -------------------------------------------------------------------------------- /tensorpipe/common/memory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace tensorpipe { 20 | 21 | class MmappedPtr { 22 | MmappedPtr(uint8_t* ptr, size_t length) { 23 | ptr_ = decltype(ptr_)(ptr, Deleter{length}); 24 | } 25 | 26 | public: 27 | MmappedPtr() = default; 28 | 29 | static std::tuple create( 30 | size_t length, 31 | int prot, 32 | int flags, 33 | int fd) { 34 | void* ptr; 35 | ptr = ::mmap(nullptr, length, prot, flags, fd, 0); 36 | if (ptr == MAP_FAILED) { 37 | return std::make_tuple( 38 | TP_CREATE_ERROR(SystemError, "mmap", errno), MmappedPtr()); 39 | } 40 | return std::make_tuple( 41 | Error::kSuccess, MmappedPtr(reinterpret_cast(ptr), length)); 42 | } 43 | 44 | uint8_t* ptr() { 45 | return ptr_.get(); 46 | } 47 | 48 | const uint8_t* ptr() const { 49 | return ptr_.get(); 50 | } 51 | 52 | size_t getLength() const { 53 | return ptr_.get_deleter().length; 54 | } 55 | 56 | void reset() { 57 | ptr_.reset(); 58 | } 59 | 60 | private: 61 | struct Deleter { 62 | size_t length; 63 | 64 | void operator()(void* ptr) { 65 | int ret = ::munmap(ptr, length); 66 | TP_THROW_SYSTEM_IF(ret != 0, errno); 67 | } 68 | }; 69 | 70 | std::unique_ptr ptr_{nullptr, Deleter{}}; 71 | }; 72 | 73 | } // namespace tensorpipe 74 | -------------------------------------------------------------------------------- /tensorpipe/common/queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | 17 | template 18 | class Queue { 19 | public: 20 | explicit Queue(int capacity = 1) : capacity_(capacity) {} 21 | 22 | void push(T t) { 23 | std::unique_lock lock(mutex_); 24 | while (items_.size() >= capacity_) { 25 | cv_.wait(lock); 26 | } 27 | items_.push_back(std::move(t)); 28 | cv_.notify_all(); 29 | } 30 | 31 | T pop() { 32 | std::unique_lock lock(mutex_); 33 | while (items_.size() == 0) { 34 | cv_.wait(lock); 35 | } 36 | T t(std::move(items_.front())); 37 | items_.pop_front(); 38 | cv_.notify_all(); 39 | return t; 40 | } 41 | 42 | private: 43 | std::mutex mutex_; 44 | std::condition_variable cv_; 45 | const int capacity_; 46 | std::deque items_; 47 | }; 48 | 49 | } // namespace tensorpipe 50 | -------------------------------------------------------------------------------- /tensorpipe/common/shm_ringbuffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | 17 | /// Creates ringbuffer on shared memory. 18 | /// 19 | /// is the minimum size of the data section of the RingBuffer. 20 | /// 21 | template 22 | std::tuple> 23 | createShmRingBuffer(size_t minRbByteSize) { 24 | Error error; 25 | ShmSegment headerSegment; 26 | RingBufferHeader* header; 27 | std::tie(error, headerSegment, header) = 28 | ShmSegment::create>(minRbByteSize); 29 | if (error) { 30 | return std::make_tuple( 31 | std::move(error), ShmSegment(), ShmSegment(), RingBuffer()); 32 | } 33 | 34 | ShmSegment dataSegment; 35 | uint8_t* data; 36 | std::tie(error, dataSegment, data) = 37 | ShmSegment::create(header->kDataPoolByteSize); 38 | if (error) { 39 | return std::make_tuple( 40 | std::move(error), ShmSegment(), ShmSegment(), RingBuffer()); 41 | } 42 | 43 | // Note: cannot use implicit construction from initializer list on GCC 5.5: 44 | // "converting to XYZ from initializer list would use explicit constructor". 45 | return std::make_tuple( 46 | Error::kSuccess, 47 | std::move(headerSegment), 48 | std::move(dataSegment), 49 | RingBuffer(header, data)); 50 | } 51 | 52 | template 53 | std::tuple> 54 | loadShmRingBuffer(Fd headerFd, Fd dataFd) { 55 | Error error; 56 | ShmSegment headerSegment; 57 | RingBufferHeader* header; 58 | std::tie(error, headerSegment, header) = 59 | ShmSegment::load>(std::move(headerFd)); 60 | if (error) { 61 | return std::make_tuple( 62 | std::move(error), ShmSegment(), ShmSegment(), RingBuffer()); 63 | } 64 | constexpr auto kHeaderSize = sizeof(RingBufferHeader); 65 | if (unlikely(kHeaderSize != headerSegment.getSize())) { 66 | TP_THROW_SYSTEM(EPERM) << "Header segment of unexpected size"; 67 | } 68 | 69 | ShmSegment dataSegment; 70 | uint8_t* data; 71 | std::tie(error, dataSegment, data) = 72 | ShmSegment::load(std::move(dataFd)); 73 | if (error) { 74 | return std::make_tuple( 75 | std::move(error), ShmSegment(), ShmSegment(), RingBuffer()); 76 | } 77 | if (unlikely(header->kDataPoolByteSize != dataSegment.getSize())) { 78 | TP_THROW_SYSTEM(EPERM) << "Data segment of unexpected size"; 79 | } 80 | 81 | return std::make_tuple( 82 | Error::kSuccess, 83 | std::move(headerSegment), 84 | std::move(dataSegment), 85 | RingBuffer(header, data)); 86 | } 87 | 88 | } // namespace tensorpipe 89 | -------------------------------------------------------------------------------- /tensorpipe/common/strings.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | 17 | inline std::string joinStrs(const std::vector& strs) { 18 | if (strs.empty()) { 19 | return ""; 20 | } 21 | std::ostringstream oss; 22 | oss << strs[0]; 23 | for (size_t idx = 1; idx < strs.size(); idx++) { 24 | oss << ", " << strs[idx]; 25 | } 26 | return oss.str(); 27 | } 28 | 29 | template 30 | std::string formatMatrix(const std::vector>& matrix) { 31 | std::ostringstream oss; 32 | oss << "{"; 33 | for (size_t rowIdx = 0; rowIdx < matrix.size(); rowIdx++) { 34 | if (rowIdx > 0) { 35 | oss << ", "; 36 | } 37 | oss << "{"; 38 | for (size_t colIdx = 0; colIdx < matrix[rowIdx].size(); colIdx++) { 39 | if (colIdx > 0) { 40 | oss << ", "; 41 | } 42 | oss << matrix[rowIdx][colIdx]; 43 | } 44 | oss << "}"; 45 | } 46 | oss << "}"; 47 | return oss.str(); 48 | } 49 | 50 | // Since text manipulation is hard, let's use this to double-check our results. 51 | inline bool isValidUuid(const std::string& uuid) { 52 | // Check it's in this format: 53 | // aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee 54 | // |0 |5 |10 |15 |20 |25 |30 |35 55 | if (uuid.size() != 36) { 56 | return false; 57 | } 58 | for (int i = 0; i < uuid.size(); i++) { 59 | if (i == 8 || i == 13 || i == 18 || i == 23) { 60 | if (uuid[i] != '-') { 61 | return false; 62 | } 63 | } else { 64 | if (!((uuid[i] >= '0' && uuid[i] <= '9') || 65 | (uuid[i] >= 'a' && uuid[i] <= 'f'))) { 66 | return false; 67 | } 68 | } 69 | } 70 | return true; 71 | } 72 | 73 | } // namespace tensorpipe 74 | -------------------------------------------------------------------------------- /tensorpipe/config.h.in: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #cmakedefine01 TENSORPIPE_HAS_SHM_TRANSPORT 12 | #cmakedefine01 TENSORPIPE_HAS_IBV_TRANSPORT 13 | 14 | #cmakedefine01 TENSORPIPE_HAS_CMA_CHANNEL 15 | -------------------------------------------------------------------------------- /tensorpipe/config_cuda.h.in: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #cmakedefine01 TENSORPIPE_HAS_CUDA_IPC_CHANNEL 12 | #cmakedefine01 TENSORPIPE_HAS_CUDA_GDR_CHANNEL 13 | -------------------------------------------------------------------------------- /tensorpipe/core/context.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace tensorpipe { 19 | 20 | Context::Context(ContextOptions opts) 21 | : impl_(std::make_shared(std::move(opts))) { 22 | impl_->init(); 23 | } 24 | 25 | void Context::registerTransport( 26 | int64_t priority, 27 | std::string transport, 28 | std::shared_ptr context) { 29 | impl_->registerTransport(priority, std::move(transport), std::move(context)); 30 | } 31 | 32 | void Context::registerChannel( 33 | int64_t priority, 34 | std::string channel, 35 | std::shared_ptr context) { 36 | impl_->registerChannel(priority, std::move(channel), std::move(context)); 37 | } 38 | 39 | std::shared_ptr Context::listen( 40 | const std::vector& urls) { 41 | return impl_->listen(urls); 42 | } 43 | 44 | std::shared_ptr Context::connect( 45 | const std::string& url, 46 | PipeOptions opts) { 47 | return impl_->connect(url, std::move(opts)); 48 | } 49 | 50 | void Context::close() { 51 | impl_->close(); 52 | } 53 | 54 | void Context::join() { 55 | impl_->join(); 56 | } 57 | 58 | Context::~Context() { 59 | join(); 60 | } 61 | 62 | } // namespace tensorpipe 63 | -------------------------------------------------------------------------------- /tensorpipe/core/context.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #include 19 | 20 | namespace tensorpipe { 21 | 22 | class ContextImpl; 23 | class Listener; 24 | class Pipe; 25 | 26 | class ContextOptions { 27 | public: 28 | // The name should be a semantically meaningful description of this context. 29 | // It will only be used for logging and debugging purposes, to identify the 30 | // endpoints of a pipe. 31 | ContextOptions&& name(std::string name) && { 32 | name_ = std::move(name); 33 | return std::move(*this); 34 | } 35 | 36 | private: 37 | std::string name_; 38 | 39 | friend ContextImpl; 40 | }; 41 | 42 | class PipeOptions { 43 | public: 44 | // The name should be a semantically meaningful description of the context 45 | // that the pipe is connecting to. It will only be used for logging and 46 | // debugging purposes, to identify the endpoints of a pipe. 47 | PipeOptions&& remoteName(std::string remoteName) && { 48 | remoteName_ = std::move(remoteName); 49 | return std::move(*this); 50 | } 51 | 52 | private: 53 | std::string remoteName_; 54 | 55 | friend ContextImpl; 56 | }; 57 | 58 | class Context final { 59 | public: 60 | explicit Context(ContextOptions opts = ContextOptions()); 61 | 62 | void registerTransport( 63 | int64_t priority, 64 | std::string transport, 65 | std::shared_ptr context); 66 | 67 | void registerChannel( 68 | int64_t priority, 69 | std::string channel, 70 | std::shared_ptr context); 71 | 72 | std::shared_ptr listen(const std::vector& urls); 73 | 74 | std::shared_ptr connect( 75 | const std::string& url, 76 | PipeOptions opts = PipeOptions()); 77 | 78 | // Put the context in a terminal state, in turn closing all of its pipes and 79 | // listeners, and release its resources. This may be done asynchronously, in 80 | // background. 81 | void close(); 82 | 83 | // Wait for all resources to be released and all background activity to stop. 84 | void join(); 85 | 86 | ~Context(); 87 | 88 | private: 89 | // The implementation is managed by a shared_ptr because each child object 90 | // will also hold a shared_ptr to it. However, its lifetime is tied to the one 91 | // of this public object since when the latter is destroyed the implementation 92 | // is closed and joined. 93 | const std::shared_ptr impl_; 94 | }; 95 | 96 | } // namespace tensorpipe 97 | -------------------------------------------------------------------------------- /tensorpipe/core/error.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | 15 | std::string LogicError::what() const { 16 | std::ostringstream ss; 17 | ss << "logic error: " << reason_; 18 | return ss.str(); 19 | } 20 | 21 | std::string ContextClosedError::what() const { 22 | return "context closed"; 23 | } 24 | 25 | std::string ListenerClosedError::what() const { 26 | return "listener closed"; 27 | } 28 | 29 | std::string PipeClosedError::what() const { 30 | return "pipe closed"; 31 | } 32 | 33 | } // namespace tensorpipe 34 | -------------------------------------------------------------------------------- /tensorpipe/core/error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | 17 | class LogicError final : public BaseError { 18 | public: 19 | explicit LogicError(std::string reason) : reason_(std::move(reason)) {} 20 | 21 | std::string what() const override; 22 | 23 | private: 24 | const std::string reason_; 25 | }; 26 | 27 | class ContextClosedError final : public BaseError { 28 | public: 29 | explicit ContextClosedError() {} 30 | 31 | std::string what() const override; 32 | }; 33 | 34 | class ListenerClosedError final : public BaseError { 35 | public: 36 | explicit ListenerClosedError() {} 37 | 38 | std::string what() const override; 39 | }; 40 | 41 | class PipeClosedError final : public BaseError { 42 | public: 43 | explicit PipeClosedError() {} 44 | 45 | std::string what() const override; 46 | }; 47 | 48 | } // namespace tensorpipe 49 | -------------------------------------------------------------------------------- /tensorpipe/core/listener.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | namespace tensorpipe { 20 | 21 | Listener::Listener( 22 | ConstructorToken /* unused */, 23 | std::shared_ptr context, 24 | std::string id, 25 | const std::vector& urls) 26 | : impl_(std::make_shared( 27 | std::move(context), 28 | std::move(id), 29 | urls)) { 30 | impl_->init(); 31 | } 32 | 33 | void Listener::close() { 34 | impl_->close(); 35 | } 36 | 37 | Listener::~Listener() { 38 | close(); 39 | } 40 | 41 | void Listener::accept(accept_callback_fn fn) { 42 | impl_->accept(std::move(fn)); 43 | } 44 | 45 | const std::map& Listener::addresses() const { 46 | return impl_->addresses(); 47 | } 48 | 49 | const std::string& Listener::address(const std::string& transport) const { 50 | return impl_->address(transport); 51 | } 52 | 53 | std::string Listener::url(const std::string& transport) const { 54 | return impl_->url(transport); 55 | } 56 | 57 | } // namespace tensorpipe 58 | -------------------------------------------------------------------------------- /tensorpipe/core/listener.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | namespace tensorpipe { 20 | 21 | class ContextImpl; 22 | class ListenerImpl; 23 | class Pipe; 24 | 25 | // The listener. 26 | // 27 | // Listeners are used to produce pipes. Depending on the type of the 28 | // context, listeners may use a variety of addresses to listen on. For 29 | // example, for TCP/IP sockets they listen on an IPv4 or IPv6 address, 30 | // for Unix domain sockets they listen on a path, etcetera. 31 | // 32 | // A pipe can only be accepted from this listener after it has been 33 | // fully established. This means that both its connection and all its 34 | // side channels have been established. 35 | // 36 | class Listener final { 37 | // Use the passkey idiom to allow make_shared to call what should be a private 38 | // constructor. See https://abseil.io/tips/134 for more information. 39 | struct ConstructorToken {}; 40 | 41 | public: 42 | Listener( 43 | ConstructorToken token, 44 | std::shared_ptr context, 45 | std::string id, 46 | const std::vector& urls); 47 | 48 | // 49 | // Entry points for user code 50 | // 51 | 52 | using accept_callback_fn = 53 | std::function)>; 54 | 55 | void accept(accept_callback_fn fn); 56 | 57 | // Returns map with the materialized address of listeners by transport. 58 | // 59 | // If you don't bind a transport listener to a specific port or address, it 60 | // may generate its address automatically. Then, in order to connect to the 61 | // listener, the user must use a separate mechanism to communicate the 62 | // materialized address to whoever wants to connect. 63 | // 64 | const std::map& addresses() const; 65 | 66 | // Returns materialized address for specific transport. 67 | // 68 | // See `addresses()` for more information. 69 | // 70 | const std::string& address(const std::string& transport) const; 71 | 72 | // Returns URL with materialized address for specific transport. 73 | // 74 | // See `addresses()` for more information. 75 | // 76 | std::string url(const std::string& transport) const; 77 | 78 | // Put the listener in a terminal state, aborting its pending operations and 79 | // rejecting future ones, and release its resrouces. This may be carried out 80 | // asynchronously, in background. Since the pipes may occasionally use the 81 | // listener to open new connections, closing a listener may trigger errors 82 | // in the pipes. 83 | void close(); 84 | 85 | ~Listener(); 86 | 87 | private: 88 | // Using a shared_ptr allows us to detach the lifetime of the implementation 89 | // from the public object's one and perform the destruction asynchronously. 90 | const std::shared_ptr impl_; 91 | 92 | // Allow context to access constructor token. 93 | friend ContextImpl; 94 | }; 95 | 96 | } // namespace tensorpipe 97 | -------------------------------------------------------------------------------- /tensorpipe/core/nop_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace tensorpipe { 25 | 26 | struct SpontaneousConnection { 27 | std::string contextName; 28 | NOP_STRUCTURE(SpontaneousConnection, contextName); 29 | }; 30 | 31 | struct RequestedConnection { 32 | uint64_t registrationId; 33 | NOP_STRUCTURE(RequestedConnection, registrationId); 34 | }; 35 | 36 | NOP_EXTERNAL_STRUCTURE(Device, type, index); 37 | 38 | struct Brochure { 39 | std::unordered_map transportDomainDescriptors; 40 | std::unordered_map> 41 | channelDeviceDescriptors; 42 | NOP_STRUCTURE(Brochure, transportDomainDescriptors, channelDeviceDescriptors); 43 | }; 44 | 45 | struct BrochureAnswer { 46 | std::string transport; 47 | std::string address; 48 | std::unordered_map transportRegistrationIds; 49 | std::string transportDomainDescriptor; 50 | std::unordered_map> channelRegistrationIds; 51 | std::unordered_map> 52 | channelDeviceDescriptors; 53 | std::unordered_map, std::string> 54 | channelForDevicePair; 55 | NOP_STRUCTURE( 56 | BrochureAnswer, 57 | transport, 58 | address, 59 | transportRegistrationIds, 60 | transportDomainDescriptor, 61 | channelRegistrationIds, 62 | channelDeviceDescriptors, 63 | channelForDevicePair); 64 | }; 65 | 66 | NOP_EXTERNAL_STRUCTURE(Descriptor::Payload, length, metadata); 67 | NOP_EXTERNAL_STRUCTURE( 68 | Descriptor::Tensor, 69 | length, 70 | sourceDevice, 71 | targetDevice, 72 | metadata); 73 | NOP_EXTERNAL_STRUCTURE(Descriptor, metadata, payloads, tensors); 74 | 75 | struct DescriptorReply { 76 | std::vector targetDevices; 77 | NOP_STRUCTURE(DescriptorReply, targetDevices); 78 | }; 79 | 80 | using Packet = nop::Variant; 81 | 82 | } // namespace tensorpipe 83 | -------------------------------------------------------------------------------- /tensorpipe/core/pipe.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | namespace tensorpipe { 18 | 19 | Pipe::Pipe( 20 | ConstructorToken /* unused */, 21 | std::shared_ptr context, 22 | std::string id, 23 | std::string remoteName, 24 | const std::string& url) 25 | : impl_(std::make_shared( 26 | std::move(context), 27 | std::move(id), 28 | std::move(remoteName), 29 | url)) { 30 | impl_->init(); 31 | } 32 | 33 | Pipe::Pipe(ConstructorToken /* unused */, std::shared_ptr impl) 34 | : impl_(std::move(impl)) {} 35 | 36 | const std::string& Pipe::getRemoteName() { 37 | return impl_->getRemoteName(); 38 | } 39 | 40 | Pipe::~Pipe() { 41 | close(); 42 | } 43 | 44 | void Pipe::close() { 45 | impl_->close(); 46 | } 47 | 48 | void Pipe::readDescriptor(read_descriptor_callback_fn fn) { 49 | impl_->readDescriptor(std::move(fn)); 50 | } 51 | 52 | void Pipe::read(Allocation allocation, read_callback_fn fn) { 53 | impl_->read(std::move(allocation), std::move(fn)); 54 | } 55 | 56 | void Pipe::write(Message message, write_callback_fn fn) { 57 | impl_->write(std::move(message), std::move(fn)); 58 | } 59 | 60 | } // namespace tensorpipe 61 | -------------------------------------------------------------------------------- /tensorpipe/misc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | add_executable(dump_state_machine dump_state_machine.cc) 8 | find_package(Clang REQUIRED) 9 | target_include_directories(dump_state_machine PRIVATE ${CLANG_INCLUDE_DIRS}) 10 | target_link_libraries(dump_state_machine PRIVATE 11 | clangTooling 12 | clangBasic 13 | clangASTMatchers) 14 | -------------------------------------------------------------------------------- /tensorpipe/python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | if(NOT (COMMAND pybind11_add_module)) 8 | add_subdirectory( 9 | ${PROJECT_SOURCE_DIR}/third_party/pybind11 10 | ${PROJECT_BINARY_DIR}/third_party/pybind11 11 | EXCLUDE_FROM_ALL) 12 | endif() 13 | 14 | set(PYBIND11_CPP_STANDARD -std=c++14) 15 | pybind11_add_module(pytensorpipe tensorpipe.cc) 16 | target_link_libraries(pytensorpipe PRIVATE tensorpipe) 17 | -------------------------------------------------------------------------------- /tensorpipe/tensorpipe.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | // High-level API 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | #include 24 | 25 | // Transports 26 | 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #if TENSORPIPE_HAS_SHM_TRANSPORT 35 | #include 36 | #endif // TENSORPIPE_HAS_SHM_TRANSPORT 37 | 38 | #if TENSORPIPE_HAS_IBV_TRANSPORT 39 | #include 40 | #include 41 | #include 42 | #endif // TENSORPIPE_HAS_IBV_TRANSPORT 43 | 44 | // Channels 45 | 46 | #include 47 | #include 48 | 49 | #include 50 | #include 51 | #include 52 | 53 | #if TENSORPIPE_HAS_CMA_CHANNEL 54 | #include 55 | #endif // TENSORPIPE_HAS_CMA_CHANNEL 56 | -------------------------------------------------------------------------------- /tensorpipe/tensorpipe_cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | // High-level API 14 | 15 | #include 16 | 17 | // Channels 18 | 19 | #include 20 | #include 21 | 22 | #if TENSORPIPE_HAS_CUDA_GDR_CHANNEL 23 | #include 24 | #endif // TENSORPIPE_HAS_CUDA_GDR_CHANNEL 25 | 26 | #if TENSORPIPE_HAS_CUDA_IPC_CHANNEL 27 | #include 28 | #endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL 29 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/basic/basic_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | namespace { 13 | 14 | class BasicChannelTestHelper : public CpuChannelTestHelper { 15 | protected: 16 | std::shared_ptr makeContextInternal( 17 | std::string id) override { 18 | auto context = tensorpipe::channel::basic::create(); 19 | context->setId(std::move(id)); 20 | return context; 21 | } 22 | }; 23 | 24 | BasicChannelTestHelper helper; 25 | 26 | } // namespace 27 | 28 | INSTANTIATE_TEST_CASE_P(Basic, ChannelTestSuite, ::testing::Values(&helper)); 29 | 30 | INSTANTIATE_TEST_CASE_P(Basic, CpuChannelTestSuite, ::testing::Values(&helper)); 31 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/channel_test_cpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | class CpuDataWrapper : public DataWrapper { 19 | public: 20 | explicit CpuDataWrapper(size_t length) : vector_(length) {} 21 | 22 | explicit CpuDataWrapper(std::vector v) : vector_(v) {} 23 | 24 | tensorpipe::Buffer buffer() const override { 25 | return tensorpipe::CpuBuffer{.ptr = const_cast(vector_.data())}; 26 | } 27 | 28 | size_t bufferLength() const override { 29 | return vector_.size(); 30 | } 31 | 32 | std::vector unwrap() override { 33 | return vector_; 34 | } 35 | 36 | private: 37 | std::vector vector_; 38 | }; 39 | 40 | class CpuChannelTestHelper : public ChannelTestHelper { 41 | public: 42 | std::unique_ptr makeDataWrapper(size_t length) override { 43 | return std::make_unique(length); 44 | } 45 | 46 | std::unique_ptr makeDataWrapper( 47 | std::vector v) override { 48 | return std::make_unique(std::move(v)); 49 | } 50 | }; 51 | 52 | class CpuChannelTestSuite 53 | : public ::testing::TestWithParam {}; 54 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/channel_test_cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | class CudaDataWrapper : public DataWrapper { 20 | public: 21 | // Non-copyable. 22 | CudaDataWrapper(const CudaDataWrapper&) = delete; 23 | CudaDataWrapper& operator=(const CudaDataWrapper&) = delete; 24 | // Non-movable. 25 | CudaDataWrapper(CudaDataWrapper&& other) = delete; 26 | CudaDataWrapper& operator=(CudaDataWrapper&& other) = delete; 27 | 28 | explicit CudaDataWrapper(size_t length) : length_(length) { 29 | if (length_ > 0) { 30 | TP_CUDA_CHECK(cudaSetDevice(0)); 31 | TP_CUDA_CHECK(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); 32 | TP_CUDA_CHECK(cudaMalloc(&cudaPtr_, length_)); 33 | } 34 | } 35 | 36 | explicit CudaDataWrapper(std::vector v) : CudaDataWrapper(v.size()) { 37 | if (length_ > 0) { 38 | TP_CUDA_CHECK(cudaMemcpyAsync( 39 | cudaPtr_, v.data(), length_, cudaMemcpyDefault, stream_)); 40 | } 41 | } 42 | 43 | tensorpipe::Buffer buffer() const override { 44 | return tensorpipe::CudaBuffer{ 45 | .ptr = cudaPtr_, 46 | .stream = stream_, 47 | }; 48 | } 49 | 50 | size_t bufferLength() const override { 51 | return length_; 52 | } 53 | 54 | std::vector unwrap() override { 55 | std::vector v(length_); 56 | if (length_ > 0) { 57 | TP_CUDA_CHECK(cudaStreamSynchronize(stream_)); 58 | TP_CUDA_CHECK(cudaMemcpy(v.data(), cudaPtr_, length_, cudaMemcpyDefault)); 59 | } 60 | return v; 61 | } 62 | 63 | ~CudaDataWrapper() override { 64 | if (length_ > 0) { 65 | TP_CUDA_CHECK(cudaFree(cudaPtr_)); 66 | TP_CUDA_CHECK(cudaStreamDestroy(stream_)); 67 | } 68 | } 69 | 70 | private: 71 | void* cudaPtr_{nullptr}; 72 | size_t length_{0}; 73 | cudaStream_t stream_{cudaStreamDefault}; 74 | }; 75 | 76 | class CudaChannelTestHelper : public ChannelTestHelper { 77 | public: 78 | std::unique_ptr makeDataWrapper(size_t length) override { 79 | return std::make_unique(length); 80 | } 81 | 82 | std::unique_ptr makeDataWrapper( 83 | std::vector v) override { 84 | return std::make_unique(std::move(v)); 85 | } 86 | }; 87 | 88 | class CudaChannelTestSuite 89 | : public ::testing::TestWithParam {}; 90 | 91 | class CudaMultiGPUChannelTestSuite 92 | : public ::testing::TestWithParam {}; 93 | 94 | class CudaXDTTChannelTestSuite 95 | : public ::testing::TestWithParam {}; 96 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cma/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | add_executable(tensorpipe_channel_cma_probe 8 | probe.cc 9 | ) 10 | 11 | target_link_libraries(tensorpipe_channel_cma_probe PRIVATE 12 | tensorpipe 13 | ) 14 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cma/cma_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | namespace { 13 | 14 | class CmaChannelTestHelper : public CpuChannelTestHelper { 15 | protected: 16 | std::shared_ptr makeContextInternal( 17 | std::string id) override { 18 | auto context = tensorpipe::channel::cma::create(); 19 | context->setId(std::move(id)); 20 | return context; 21 | } 22 | }; 23 | 24 | CmaChannelTestHelper helper; 25 | 26 | } // namespace 27 | 28 | INSTANTIATE_TEST_CASE_P(Cma, ChannelTestSuite, ::testing::Values(&helper)); 29 | 30 | INSTANTIATE_TEST_CASE_P(Cma, CpuChannelTestSuite, ::testing::Values(&helper)); 31 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cma/probe_report_checker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | import json 9 | import sys 10 | 11 | if __name__ == "__main__": 12 | if len(sys.argv) < 1: 13 | raise RuntimeError() 14 | if len(sys.argv) != 4: 15 | print( 16 | f"Usage: {sys.argv[0]} [first report] [second report] [supposed to work]", 17 | file=sys.stderr, 18 | ) 19 | sys.exit(0) 20 | 21 | with open(sys.argv[1], "rb") as f: 22 | first_report = json.load(f) 23 | with open(sys.argv[2], "rb") as f: 24 | second_report = json.load(f) 25 | supposed_to_work = int(sys.argv[3]) 26 | 27 | worked_in_practice = ( 28 | first_report["syscall_success"] == 1 and second_report["syscall_success"] == 1 29 | ) 30 | if worked_in_practice != supposed_to_work: 31 | raise RuntimeError( 32 | f"The syscall didn't behave as the test expected it to. It " 33 | f"{'succeeded' if worked_in_practice else 'failed'} whereas it was " 34 | f"supposed to {'succeed' if supposed_to_work else 'fail'}." 35 | ) 36 | 37 | detected_as_working = ( 38 | first_report["viability"] == 1 39 | and second_report["viability"] == 1 40 | and first_report["device_descriptor"] == second_report["device_descriptor"] 41 | ) 42 | if detected_as_working != worked_in_practice: 43 | print( 44 | f"The CMA autodetection didn't correctly predict the behavior of the " 45 | f"syscall. It determined it would " 46 | f"{'succeed' if detected_as_working else 'fail'} whereas it actually " 47 | f"{'succeeded' if worked_in_practice else 'failed'}.", 48 | file=sys.stderr, 49 | ) 50 | sys.exit(1) 51 | 52 | sys.exit(0) 53 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cuda_basic/cuda_basic_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace { 16 | 17 | class CudaBasicChannelTestHelper : public CudaChannelTestHelper { 18 | protected: 19 | std::shared_ptr makeContextInternal( 20 | std::string id) override { 21 | auto cpuContext = tensorpipe::channel::basic::create(); 22 | auto context = 23 | tensorpipe::channel::cuda_basic::create(std::move(cpuContext)); 24 | context->setId(std::move(id)); 25 | return context; 26 | } 27 | 28 | public: 29 | std::shared_ptr makePeerGroup() override { 30 | return std::make_shared(); 31 | } 32 | }; 33 | 34 | CudaBasicChannelTestHelper helper; 35 | 36 | class CudaBasicChannelTestSuite : public ChannelTestSuite {}; 37 | 38 | } // namespace 39 | 40 | class CannotCommunicateCpuToCpuTest : public ChannelTestCase { 41 | public: 42 | void run(ChannelTestHelper* /* unused */) override { 43 | ForkedThreadPeerGroup pg; 44 | pg.spawn( 45 | [&]() { 46 | auto cpuContext = tensorpipe::channel::basic::create(); 47 | auto ctx = 48 | tensorpipe::channel::cuda_basic::create(std::move(cpuContext)); 49 | auto deviceDescriptors = ctx->deviceDescriptors(); 50 | auto it = deviceDescriptors.find( 51 | tensorpipe::Device{tensorpipe::kCpuDeviceType, 0}); 52 | EXPECT_FALSE(it == deviceDescriptors.end()); 53 | auto descriptor = it->second; 54 | EXPECT_FALSE(ctx->canCommunicateWithRemote(descriptor, descriptor)); 55 | }, 56 | [&]() { 57 | // Do nothing. 58 | }); 59 | } 60 | }; 61 | 62 | CHANNEL_TEST(CudaBasicChannelTestSuite, CannotCommunicateCpuToCpu); 63 | 64 | INSTANTIATE_TEST_CASE_P( 65 | CudaBasic, 66 | ChannelTestSuite, 67 | ::testing::Values(&helper)); 68 | 69 | INSTANTIATE_TEST_CASE_P( 70 | CudaBasic, 71 | CudaChannelTestSuite, 72 | ::testing::Values(&helper)); 73 | 74 | INSTANTIATE_TEST_CASE_P( 75 | CudaBasic, 76 | CudaMultiGPUChannelTestSuite, 77 | ::testing::Values(&helper)); 78 | 79 | INSTANTIATE_TEST_CASE_P( 80 | CudaBasic, 81 | CudaXDTTChannelTestSuite, 82 | ::testing::Values(&helper)); 83 | 84 | INSTANTIATE_TEST_CASE_P( 85 | CudaBasic, 86 | CudaBasicChannelTestSuite, 87 | ::testing::Values(&helper)); 88 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cuda_gdr/cuda_gdr_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace { 15 | 16 | class CudaGdrChannelTestHelper : public CudaChannelTestHelper { 17 | protected: 18 | std::shared_ptr makeContextInternal( 19 | std::string id) override { 20 | auto context = tensorpipe::channel::cuda_gdr::create(); 21 | context->setId(std::move(id)); 22 | return context; 23 | } 24 | 25 | public: 26 | std::shared_ptr makePeerGroup() override { 27 | return std::make_shared(); 28 | } 29 | }; 30 | 31 | CudaGdrChannelTestHelper helper; 32 | 33 | } // namespace 34 | 35 | INSTANTIATE_TEST_CASE_P(CudaGdr, ChannelTestSuite, ::testing::Values(&helper)); 36 | 37 | INSTANTIATE_TEST_CASE_P( 38 | CudaGdr, 39 | CudaChannelTestSuite, 40 | ::testing::Values(&helper)); 41 | 42 | INSTANTIATE_TEST_CASE_P( 43 | CudaGdr, 44 | CudaMultiGPUChannelTestSuite, 45 | ::testing::Values(&helper)); 46 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace { 15 | 16 | class CudaIpcChannelTestHelper : public CudaChannelTestHelper { 17 | protected: 18 | std::shared_ptr makeContextInternal( 19 | std::string id) override { 20 | auto context = tensorpipe::channel::cuda_ipc::create(); 21 | context->setId(std::move(id)); 22 | return context; 23 | } 24 | 25 | public: 26 | std::shared_ptr makePeerGroup() override { 27 | return std::make_shared(); 28 | } 29 | }; 30 | 31 | CudaIpcChannelTestHelper helper; 32 | 33 | class CudaIpcChannelTestSuite : public ChannelTestSuite {}; 34 | 35 | } // namespace 36 | 37 | class CannotCommunicateInSameProcessTest : public ChannelTestCase { 38 | public: 39 | void run(ChannelTestHelper* /* unused */) override { 40 | ForkedThreadPeerGroup pg; 41 | pg.spawn( 42 | [&]() { 43 | auto ctx = tensorpipe::channel::cuda_ipc::create(); 44 | auto deviceDescriptors = ctx->deviceDescriptors(); 45 | EXPECT_GT(deviceDescriptors.size(), 0); 46 | auto descriptor = deviceDescriptors.begin()->second; 47 | // From within a given process, the device descriptors will be the 48 | // same. 49 | EXPECT_FALSE(ctx->canCommunicateWithRemote(descriptor, descriptor)); 50 | }, 51 | [&]() { 52 | // Do nothing. 53 | }); 54 | } 55 | }; 56 | 57 | CHANNEL_TEST(CudaIpcChannelTestSuite, CannotCommunicateInSameProcess); 58 | 59 | INSTANTIATE_TEST_CASE_P(CudaIpc, ChannelTestSuite, ::testing::Values(&helper)); 60 | 61 | INSTANTIATE_TEST_CASE_P( 62 | CudaIpc, 63 | CudaChannelTestSuite, 64 | ::testing::Values(&helper)); 65 | 66 | INSTANTIATE_TEST_CASE_P( 67 | CudaIpc, 68 | CudaMultiGPUChannelTestSuite, 69 | ::testing::Values(&helper)); 70 | 71 | INSTANTIATE_TEST_CASE_P( 72 | CudaIpc, 73 | CudaIpcChannelTestSuite, 74 | ::testing::Values(&helper)); 75 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/cuda_xth/cuda_xth_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace { 15 | 16 | class CudaXthChannelTestHelper : public CudaChannelTestHelper { 17 | protected: 18 | std::shared_ptr makeContextInternal( 19 | std::string id) override { 20 | auto context = tensorpipe::channel::cuda_xth::create(); 21 | context->setId(std::move(id)); 22 | return context; 23 | } 24 | 25 | public: 26 | std::shared_ptr makePeerGroup() override { 27 | return std::make_shared(); 28 | } 29 | }; 30 | 31 | CudaXthChannelTestHelper helper; 32 | 33 | } // namespace 34 | 35 | INSTANTIATE_TEST_CASE_P(CudaXth, ChannelTestSuite, ::testing::Values(&helper)); 36 | 37 | INSTANTIATE_TEST_CASE_P( 38 | CudaXth, 39 | CudaChannelTestSuite, 40 | ::testing::Values(&helper)); 41 | 42 | INSTANTIATE_TEST_CASE_P( 43 | CudaXth, 44 | CudaMultiGPUChannelTestSuite, 45 | ::testing::Values(&helper)); 46 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/kernel.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | __global__ void _slowKernel(char* ptr, int sz) { 12 | int idx = blockIdx.x * blockDim.x + threadIdx.x; 13 | for (; idx < sz; idx += (gridDim.x * blockDim.x)) { 14 | for (int i = 0; i < 100000; ++i) { 15 | ptr[idx] += ptr[(idx + 1007) % sz] + i; 16 | } 17 | } 18 | } 19 | 20 | void slowKernel(void* ptr, int kSize, cudaStream_t stream) { 21 | _slowKernel<<<128, 128, 0, stream>>>((char*)ptr, kSize); 22 | } 23 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/kernel.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | // This kernel takes time and puts garbage data in the buffer. It is used to 14 | // test proper synchronization in CUDA channels. 15 | void slowKernel(void* ptr, int kSize, cudaStream_t stream); 16 | -------------------------------------------------------------------------------- /tensorpipe/test/channel/xth/xth_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | namespace { 13 | 14 | class XthChannelTestHelper : public CpuChannelTestHelper { 15 | protected: 16 | std::shared_ptr makeContextInternal( 17 | std::string id) override { 18 | auto context = tensorpipe::channel::xth::create(); 19 | context->setId(std::move(id)); 20 | return context; 21 | } 22 | }; 23 | 24 | XthChannelTestHelper helper; 25 | 26 | } // namespace 27 | 28 | INSTANTIATE_TEST_CASE_P(Xth, ChannelTestSuite, ::testing::Values(&helper)); 29 | 30 | INSTANTIATE_TEST_CASE_P(Xth, CpuChannelTestSuite, ::testing::Values(&helper)); 31 | -------------------------------------------------------------------------------- /tensorpipe/test/common/cuda_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace { 19 | 20 | tensorpipe::CudaLib getCudaLib() { 21 | tensorpipe::Error error; 22 | tensorpipe::CudaLib cudaLib; 23 | std::tie(error, cudaLib) = tensorpipe::CudaLib::create(); 24 | EXPECT_FALSE(error) << error.what(); 25 | return cudaLib; 26 | } 27 | 28 | } // namespace 29 | 30 | // This tests whether we can retrieve the index of the device on which a pointer 31 | // resides under "normal" circumstances (in the same context where it was 32 | // allocated, or in a "fresh" thread). 33 | TEST(Cuda, DeviceForPointer) { 34 | if (TestEnvironment::numCudaDevices() < 2) { 35 | GTEST_SKIP() << "Skipping test requiring >=2 CUDA devices."; 36 | } 37 | 38 | ForkedThreadPeerGroup pg; 39 | pg.spawn( 40 | [&]() { 41 | TP_CUDA_CHECK(cudaSetDevice(1)); 42 | void* ptr; 43 | TP_CUDA_CHECK(cudaMalloc(&ptr, 1024)); 44 | 45 | EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1); 46 | 47 | std::string ptrStr( 48 | reinterpret_cast(&ptr), 49 | reinterpret_cast(&ptr) + sizeof(void*)); 50 | pg.send(PeerGroup::kClient, ptrStr); 51 | }, 52 | [&]() { 53 | std::string ptrStr = pg.recv(PeerGroup::kClient); 54 | void* ptr = *reinterpret_cast(&ptrStr[0]); 55 | 56 | EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1); 57 | }); 58 | } 59 | 60 | // This tests whether we can retrieve the index of the device on which a pointer 61 | // resided after we've explicitly set the current device to an invalid value. 62 | // This is known to cause problems in recent versions of CUDA, possibly because 63 | // of a bug. 64 | TEST(Cuda, DeviceForPointerAfterReset) { 65 | if (TestEnvironment::numCudaDevices() < 2) { 66 | GTEST_SKIP() << "Skipping test requiring >=2 CUDA devices."; 67 | } 68 | 69 | ForkedThreadPeerGroup pg; 70 | pg.spawn( 71 | [&]() { 72 | TP_CUDA_CHECK(cudaSetDevice(1)); 73 | void* ptr; 74 | TP_CUDA_CHECK(cudaMalloc(&ptr, 1024)); 75 | 76 | TP_CUDA_CHECK(cudaSetDevice(0)); 77 | 78 | EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1); 79 | 80 | std::string ptrStr( 81 | reinterpret_cast(&ptr), 82 | reinterpret_cast(&ptr) + sizeof(void*)); 83 | pg.send(PeerGroup::kClient, ptrStr); 84 | }, 85 | [&]() { 86 | std::string ptrStr = pg.recv(PeerGroup::kClient); 87 | void* ptr = *reinterpret_cast(&ptrStr[0]); 88 | 89 | TP_CUDA_CHECK(cudaSetDevice(0)); 90 | 91 | EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1); 92 | }); 93 | } 94 | -------------------------------------------------------------------------------- /tensorpipe/test/common/defs_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | TEST(Defs, Exception) { 14 | EXPECT_THROW(TP_THROW_EINVAL(), std::invalid_argument); 15 | EXPECT_THROW(TP_THROW_EINVAL() << "hola", std::invalid_argument); 16 | EXPECT_THROW(TP_THROW_EINVAL() << "adioshola", std::invalid_argument); 17 | EXPECT_THROW(TP_THROW_SYSTEM(ENODATA) << "adioshola", std::system_error); 18 | EXPECT_THROW(TP_THROW_SYSTEM(EBUSY), std::system_error); 19 | EXPECT_THROW(TP_THROW_SYSTEM(EBUSY) << "my message", std::system_error); 20 | } 21 | -------------------------------------------------------------------------------- /tensorpipe/test/common/system_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | using namespace tensorpipe; 14 | 15 | TEST(Pow2, isPow2) { 16 | for (uint64_t i = 0; i < 63; ++i) { 17 | EXPECT_TRUE(isPow2(1ull << i)); 18 | } 19 | 20 | EXPECT_FALSE(isPow2(3)); 21 | EXPECT_FALSE(isPow2(5)); 22 | EXPECT_FALSE(isPow2(10)); 23 | EXPECT_FALSE(isPow2(15)); 24 | EXPECT_TRUE(isPow2(16)); 25 | EXPECT_FALSE(isPow2(17)); 26 | EXPECT_FALSE(isPow2(18)); 27 | EXPECT_FALSE(isPow2(25)); 28 | EXPECT_FALSE(isPow2(1028)); 29 | } 30 | 31 | TEST(Pow2, nextPow2) { 32 | for (uint64_t i = 0; i < 63; ++i) { 33 | uint64_t p2 = 1ull << i; 34 | uint64_t nextP2 = 1ull << (i + 1); 35 | EXPECT_EQ(nextPow2(p2), p2); 36 | EXPECT_EQ(nextPow2(p2 + 1), nextP2); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /tensorpipe/test/core/listener_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | using namespace tensorpipe; 20 | 21 | TEST(Listener, ClosingAbortsOperations) { 22 | auto context = std::make_shared(); 23 | 24 | context->registerTransport(0, "uv", transport::uv::create()); 25 | context->registerChannel(0, "basic", channel::basic::create()); 26 | 27 | { 28 | auto listener = context->listen({"uv://127.0.0.1"}); 29 | 30 | std::promise donePromise; 31 | listener->accept( 32 | [&](const Error& error, std::shared_ptr /* unused */) { 33 | EXPECT_TRUE(error); 34 | donePromise.set_value(); 35 | }); 36 | listener->close(); 37 | donePromise.get_future().get(); 38 | } 39 | 40 | context->join(); 41 | } 42 | -------------------------------------------------------------------------------- /tensorpipe/test/test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | // One-time init to use EPIPE errors instead of SIGPIPE 12 | namespace { 13 | 14 | struct Initializer { 15 | explicit Initializer() { 16 | signal(SIGPIPE, SIG_IGN); 17 | } 18 | }; 19 | 20 | Initializer initializer; 21 | 22 | } // namespace 23 | -------------------------------------------------------------------------------- /tensorpipe/test/test_environment.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #if TP_USE_CUDA 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #endif // TP_USE_CUDA 19 | 20 | int TestEnvironment::numCudaDevices() { 21 | static int count = -1; 22 | if (count == -1) { 23 | #if TP_USE_CUDA 24 | pid_t pid = fork(); 25 | TP_THROW_SYSTEM_IF(pid < 0, errno) << "Failed to fork"; 26 | if (pid == 0) { 27 | int res; 28 | TP_CUDA_CHECK(cudaGetDeviceCount(&res)); 29 | std::exit(res); 30 | } else { 31 | int status; 32 | TP_THROW_SYSTEM_IF(waitpid(pid, &status, 0) < 0, errno) 33 | << "Failed to wait for child process"; 34 | TP_THROW_ASSERT_IF(!WIFEXITED(status)); 35 | count = WEXITSTATUS(status); 36 | } 37 | #else // TP_USE_CUDA 38 | count = 0; 39 | #endif // TP_USE_CUDA 40 | } 41 | 42 | return count; 43 | } 44 | -------------------------------------------------------------------------------- /tensorpipe/test/test_environment.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | class TestEnvironment { 12 | public: 13 | static int numCudaDevices(); 14 | }; 15 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/context_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | using namespace tensorpipe; 14 | using namespace tensorpipe::transport; 15 | 16 | TEST_P(TransportTest, Context_Basics) { 17 | auto context = GetParam()->getContext(); 18 | auto addr = GetParam()->defaultAddr(); 19 | 20 | { 21 | std::mutex mutex; 22 | std::condition_variable cv; 23 | std::vector> connections; 24 | 25 | // Listener runs callback for every new connection. 26 | auto listener = context->listen(addr); 27 | listener->accept( 28 | [&](const Error& error, std::shared_ptr connection) { 29 | ASSERT_FALSE(error) << error.what(); 30 | std::lock_guard lock(mutex); 31 | connections.push_back(std::move(connection)); 32 | cv.notify_one(); 33 | }); 34 | 35 | // Connect to listener. 36 | auto conn = context->connect(listener->addr()); 37 | 38 | // Wait for new connection 39 | { 40 | std::unique_lock lock(mutex); 41 | while (connections.empty()) { 42 | cv.wait(lock); 43 | } 44 | } 45 | } 46 | 47 | context->join(); 48 | } 49 | 50 | TEST_P(TransportTest, Context_DomainDescriptor) { 51 | auto context = GetParam()->getContext(); 52 | 53 | { 54 | const auto& domainDescriptor = context->domainDescriptor(); 55 | EXPECT_FALSE(domainDescriptor.empty()); 56 | } 57 | 58 | context->join(); 59 | } 60 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/ibv/context_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace { 15 | 16 | class IbvTransportContextTest : public TransportTest {}; 17 | 18 | IbvTransportTestHelper helper; 19 | 20 | } // namespace 21 | 22 | using namespace tensorpipe; 23 | 24 | // Linux-only because OSX machines on CircleCI cannot resolve their hostname 25 | #ifdef __linux__ 26 | TEST_P(IbvTransportContextTest, LookupHostnameAddress) { 27 | Error error; 28 | std::string addr; 29 | std::tie(error, addr) = transport::ibv::lookupAddrForHostname(); 30 | EXPECT_FALSE(error) << error.what(); 31 | EXPECT_NE(addr, ""); 32 | } 33 | #endif 34 | 35 | // Interface name conventions change based on platform. Linux uses "lo", OSX 36 | // uses lo0, Windows uses integers. 37 | #ifdef __linux__ 38 | #define LOOPBACK_INTERFACE "lo" 39 | #elif __APPLE__ 40 | #define LOOPBACK_INTERFACE "lo0" 41 | #endif 42 | 43 | #ifdef LOOPBACK_INTERFACE 44 | TEST_P(IbvTransportContextTest, LookupInterfaceAddress) { 45 | Error error; 46 | std::string addr; 47 | std::tie(error, addr) = 48 | transport::ibv::lookupAddrForIface(LOOPBACK_INTERFACE); 49 | EXPECT_FALSE(error) << error.what(); 50 | EXPECT_NE(addr, ""); 51 | } 52 | #endif 53 | 54 | INSTANTIATE_TEST_CASE_P( 55 | Ibv, 56 | IbvTransportContextTest, 57 | ::testing::Values(&helper)); 58 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/ibv/ibv_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | namespace { 12 | 13 | IbvTransportTestHelper helper; 14 | 15 | } // namespace 16 | 17 | INSTANTIATE_TEST_CASE_P(Ibv, TransportTest, ::testing::Values(&helper)); 18 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/ibv/ibv_test.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | class IbvTransportTestHelper : public TransportTestHelper { 15 | protected: 16 | std::shared_ptr getContextInternal() 17 | override { 18 | return tensorpipe::transport::ibv::create(); 19 | } 20 | 21 | public: 22 | std::string defaultAddr() override { 23 | return "127.0.0.1"; 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/shm/listener_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace tensorpipe; 20 | using namespace tensorpipe::transport; 21 | 22 | namespace { 23 | 24 | class ShmListenerTest : public TransportTest {}; 25 | 26 | SHMTransportTestHelper helper; 27 | 28 | std::string generateUniqueAddr() { 29 | const ::testing::TestInfo* const testInfo = 30 | ::testing::UnitTest::GetInstance()->current_test_info(); 31 | std::ostringstream ss; 32 | ss << "tensorpipe_test_" << testInfo->test_suite_name() << "." 33 | << testInfo->name() << "_" << ::getpid(); 34 | return ss.str(); 35 | } 36 | 37 | } // namespace 38 | 39 | TEST_P(ShmListenerTest, ExplicitAbstractSocketName) { 40 | std::string expectedAddr = generateUniqueAddr(); 41 | std::shared_ptr ctx = GetParam()->getContext(); 42 | std::shared_ptr listener = ctx->listen(expectedAddr); 43 | std::string actualAddr = listener->addr(); 44 | ASSERT_EQ(actualAddr, expectedAddr); 45 | std::shared_ptr outgoingConnection = ctx->connect(actualAddr); 46 | std::promise prom; 47 | listener->accept( 48 | [&](const Error& error, std::shared_ptr /* unused */) { 49 | EXPECT_FALSE(error) << error.what(); 50 | prom.set_value(); 51 | }); 52 | std::future_status res = prom.get_future().wait_for(std::chrono::seconds(1)); 53 | ASSERT_NE(res, std::future_status::timeout); 54 | } 55 | 56 | TEST_P(ShmListenerTest, AutobindAbstractSocketName) { 57 | std::shared_ptr ctx = GetParam()->getContext(); 58 | std::shared_ptr listener = ctx->listen(""); 59 | std::string addr = listener->addr(); 60 | ASSERT_NE(addr, ""); 61 | // Since Linux 2.3.15 (Aug 1999) the address is in this format, see unix(7). 62 | ASSERT_THAT(addr, ::testing::MatchesRegex("[0-9a-f]{5}")); 63 | std::shared_ptr outgoingConnection = ctx->connect(addr); 64 | std::promise prom; 65 | listener->accept( 66 | [&](const Error& error, std::shared_ptr /* unused */) { 67 | EXPECT_FALSE(error) << error.what(); 68 | prom.set_value(); 69 | }); 70 | std::future_status res = prom.get_future().wait_for(std::chrono::seconds(1)); 71 | ASSERT_NE(res, std::future_status::timeout); 72 | } 73 | 74 | INSTANTIATE_TEST_CASE_P(Shm, ShmListenerTest, ::testing::Values(&helper)); 75 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/shm/shm_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | namespace { 12 | 13 | SHMTransportTestHelper helper; 14 | 15 | } // namespace 16 | 17 | INSTANTIATE_TEST_CASE_P(Shm, TransportTest, ::testing::Values(&helper)); 18 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/shm/shm_test.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | class SHMTransportTestHelper : public TransportTestHelper { 17 | protected: 18 | std::shared_ptr getContextInternal() 19 | override { 20 | return tensorpipe::transport::shm::create(); 21 | } 22 | 23 | public: 24 | std::string defaultAddr() override { 25 | return ""; 26 | } 27 | }; 28 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/shm/sockaddr_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | using namespace tensorpipe::transport; 14 | 15 | TEST(ShmSockaddr, FromToString) { 16 | auto addr = shm::Sockaddr::createAbstractUnixAddr("foo"); 17 | ASSERT_EQ(addr.str(), std::string("foo")); 18 | } 19 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/uv/connection_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | namespace { 14 | 15 | class UVTransportConnectionTest : public TransportTest {}; 16 | 17 | UVTransportTestHelper helper; 18 | 19 | } // namespace 20 | 21 | using namespace tensorpipe; 22 | using namespace tensorpipe::transport; 23 | 24 | TEST_P(UVTransportConnectionTest, LargeWrite) { 25 | constexpr int kMsgSize = 16 * 1024 * 1024; 26 | std::string msg(kMsgSize, 0x42); 27 | 28 | testConnection( 29 | [&](std::shared_ptr conn) { 30 | doWrite(conn, msg.c_str(), msg.length(), [&, conn](const Error& error) { 31 | ASSERT_FALSE(error) << error.what(); 32 | peers_->done(PeerGroup::kServer); 33 | }); 34 | peers_->join(PeerGroup::kServer); 35 | }, 36 | [&](std::shared_ptr conn) { 37 | doRead( 38 | conn, [&, conn](const Error& error, const void* data, size_t len) { 39 | ASSERT_FALSE(error) << error.what(); 40 | ASSERT_EQ(len, msg.length()); 41 | const char* cdata = (const char*)data; 42 | for (int i = 0; i < len; ++i) { 43 | const char c = cdata[i]; 44 | ASSERT_EQ(c, msg[i]) << "Wrong value at position " << i 45 | << " of " << msg.length(); 46 | } 47 | peers_->done(PeerGroup::kClient); 48 | }); 49 | peers_->join(PeerGroup::kClient); 50 | }); 51 | } 52 | 53 | INSTANTIATE_TEST_CASE_P( 54 | Uv, 55 | UVTransportConnectionTest, 56 | ::testing::Values(&helper)); 57 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/uv/context_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | namespace { 15 | 16 | class UVTransportContextTest : public TransportTest {}; 17 | 18 | UVTransportTestHelper helper; 19 | 20 | } // namespace 21 | 22 | using namespace tensorpipe; 23 | 24 | // Linux-only because OSX machines on CircleCI cannot resolve their hostname 25 | #ifdef __linux__ 26 | TEST_P(UVTransportContextTest, LookupHostnameAddress) { 27 | Error error; 28 | std::string addr; 29 | std::tie(error, addr) = transport::uv::lookupAddrForHostname(); 30 | EXPECT_FALSE(error) << error.what(); 31 | EXPECT_NE(addr, ""); 32 | } 33 | #endif 34 | 35 | // Interface name conventions change based on platform. Linux uses "lo", OSX 36 | // uses lo0, Windows uses integers. 37 | #ifdef __linux__ 38 | #define LOOPBACK_INTERFACE "lo" 39 | #elif __APPLE__ 40 | #define LOOPBACK_INTERFACE "lo0" 41 | #endif 42 | 43 | #ifdef LOOPBACK_INTERFACE 44 | TEST_P(UVTransportContextTest, LookupInterfaceAddress) { 45 | Error error; 46 | std::string addr; 47 | std::tie(error, addr) = transport::uv::lookupAddrForIface(LOOPBACK_INTERFACE); 48 | EXPECT_FALSE(error) << error.what(); 49 | EXPECT_NE(addr, ""); 50 | } 51 | #endif 52 | 53 | TEST_P(UVTransportContextTest, LookupAddressLikeNccl) { 54 | Error error; 55 | std::string addr; 56 | std::tie(error, addr) = transport::uv::lookupAddrLikeNccl(); 57 | EXPECT_FALSE(error) << error.what(); 58 | EXPECT_NE(addr, ""); 59 | } 60 | 61 | INSTANTIATE_TEST_CASE_P(Uv, UVTransportContextTest, ::testing::Values(&helper)); 62 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/uv/loop_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | using namespace tensorpipe::transport::uv; 14 | 15 | namespace test { 16 | namespace transport { 17 | namespace uv { 18 | 19 | TEST(UvLoop, Defer) { 20 | Loop loop; 21 | 22 | { 23 | // Defer function on event loop thread. 24 | std::promise prom; 25 | loop.deferToLoop([&] { prom.set_value(std::this_thread::get_id()); }); 26 | ASSERT_NE(std::this_thread::get_id(), prom.get_future().get()); 27 | } 28 | 29 | loop.join(); 30 | } 31 | 32 | } // namespace uv 33 | } // namespace transport 34 | } // namespace test 35 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/uv/uv_test.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | namespace { 12 | 13 | UVTransportTestHelper helper; 14 | 15 | } // namespace 16 | 17 | INSTANTIATE_TEST_CASE_P(Uv, TransportTest, ::testing::Values(&helper)); 18 | -------------------------------------------------------------------------------- /tensorpipe/test/transport/uv/uv_test.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | class UVTransportTestHelper : public TransportTestHelper { 15 | protected: 16 | std::shared_ptr getContextInternal() 17 | override { 18 | return tensorpipe::transport::uv::create(); 19 | } 20 | 21 | public: 22 | std::string defaultAddr() override { 23 | return "127.0.0.1"; 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /tensorpipe/transport/connection.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | namespace tensorpipe { 19 | namespace transport { 20 | 21 | class Connection { 22 | public: 23 | using read_callback_fn = 24 | std::function; 25 | 26 | virtual void read(read_callback_fn fn) = 0; 27 | 28 | virtual void read(void* ptr, size_t length, read_callback_fn fn) = 0; 29 | 30 | using write_callback_fn = std::function; 31 | 32 | virtual void write(const void* ptr, size_t length, write_callback_fn fn) = 0; 33 | 34 | // 35 | // Helper functions for reading/writing nop objects. 36 | // 37 | 38 | // Read and parse a nop object. 39 | // 40 | // This function may be overridden by a subclass. 41 | // 42 | // For example, the shm transport may be able to bypass reading into a 43 | // temporary buffer and instead instead read directly from its peer's 44 | // ring buffer. This saves an allocation and a memory copy. 45 | // 46 | using read_nop_callback_fn = std::function; 47 | 48 | virtual void read(AbstractNopHolder& object, read_nop_callback_fn fn) = 0; 49 | 50 | // Serialize and write nop object. 51 | // 52 | // This function may be overridden by a subclass. 53 | // 54 | // For example, the shm transport may be able to bypass serialization 55 | // into a temporary buffer and instead instead serialize directly into 56 | // its peer's ring buffer. This saves an allocation and a memory copy. 57 | // 58 | virtual void write(const AbstractNopHolder& object, write_callback_fn fn) = 0; 59 | 60 | // Tell the connection what its identifier is. 61 | // 62 | // This is only supposed to be called from the high-level pipe or from 63 | // channels. It will only used for logging and debugging purposes. 64 | virtual void setId(std::string id) = 0; 65 | 66 | virtual void close() = 0; 67 | 68 | virtual ~Connection() = default; 69 | }; 70 | 71 | } // namespace transport 72 | } // namespace tensorpipe 73 | -------------------------------------------------------------------------------- /tensorpipe/transport/context.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | namespace tensorpipe { 15 | namespace transport { 16 | 17 | class Connection; 18 | class Listener; 19 | 20 | class Context { 21 | public: 22 | virtual std::shared_ptr connect(std::string addr) = 0; 23 | 24 | virtual std::shared_ptr listen(std::string addr) = 0; 25 | 26 | // Return whether the context is able to operate correctly. 27 | // 28 | // Some transport types may be unable to perform as intended under 29 | // some circumstances (e.g., specialized hardware unavailable, lack 30 | // of permissions). They can report it through this method in order 31 | // for the core context to avoid registering them in the first place. 32 | // 33 | virtual bool isViable() const = 0; 34 | 35 | // Return string to describe the domain for this context. 36 | // 37 | // Two processes with a context of the same type can connect to each 38 | // other if one side's domain descriptor is "accepted" by the other 39 | // one, using the canCommunicateWithRemote method below. That method 40 | // must be symmetric, and unless overridden defaults to string 41 | // comparison. 42 | // 43 | // For example, for a transport that leverages TCP/IP, this may be 44 | // as simple as the address family (assuming we can route between 45 | // any two processes). For a transport that leverages shared memory, 46 | // this descriptor must uniquely identify the machine, such that 47 | // only co-located processes generate the same domain descriptor. 48 | // 49 | virtual const std::string& domainDescriptor() const = 0; 50 | 51 | // Compare local and remote domain descriptor for compatibility. 52 | // 53 | // Determine whether a connection can be opened between this context 54 | // and a remote one that has the given domain descriptor. This 55 | // function needs to be symmetric: if we called this method on the 56 | // remote context with the local descriptor we should get the same 57 | // answer. Unless overridden it defaults to string comparison. 58 | // 59 | virtual bool canCommunicateWithRemote( 60 | const std::string& remoteDomainDescriptor) const { 61 | return domainDescriptor() == remoteDomainDescriptor; 62 | } 63 | 64 | // Tell the context what its identifier is. 65 | // 66 | // This is only supposed to be called from the high-level context or from 67 | // channel contexts. It will only used for logging and debugging purposes. 68 | virtual void setId(std::string id) = 0; 69 | 70 | virtual void close() = 0; 71 | 72 | virtual void join() = 0; 73 | 74 | virtual ~Context() = default; 75 | }; 76 | 77 | } // namespace transport 78 | } // namespace tensorpipe 79 | -------------------------------------------------------------------------------- /tensorpipe/transport/error.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | namespace tensorpipe { 12 | namespace transport { 13 | 14 | std::string ContextClosedError::what() const { 15 | return "context closed"; 16 | } 17 | 18 | std::string ListenerClosedError::what() const { 19 | return "listener closed"; 20 | } 21 | 22 | std::string ConnectionClosedError::what() const { 23 | return "connection closed"; 24 | } 25 | 26 | std::string ContextNotViableError::what() const { 27 | return "context not viable"; 28 | } 29 | 30 | } // namespace transport 31 | } // namespace tensorpipe 32 | -------------------------------------------------------------------------------- /tensorpipe/transport/error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | 18 | class ContextClosedError final : public BaseError { 19 | public: 20 | ContextClosedError() {} 21 | 22 | std::string what() const override; 23 | }; 24 | 25 | class ListenerClosedError final : public BaseError { 26 | public: 27 | ListenerClosedError() {} 28 | 29 | std::string what() const override; 30 | }; 31 | 32 | class ConnectionClosedError final : public BaseError { 33 | public: 34 | ConnectionClosedError() {} 35 | 36 | std::string what() const override; 37 | }; 38 | 39 | class ContextNotViableError final : public BaseError { 40 | public: 41 | ContextNotViableError() {} 42 | 43 | std::string what() const override; 44 | }; 45 | 46 | } // namespace transport 47 | } // namespace tensorpipe 48 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | namespace tensorpipe { 21 | namespace transport { 22 | namespace ibv { 23 | 24 | class ConnectionImpl; 25 | class ListenerImpl; 26 | 27 | class ContextImpl final 28 | : public ContextImplBoilerplate { 29 | public: 30 | static std::shared_ptr create(); 31 | 32 | ContextImpl(IbvLib ibvLib, IbvDeviceList deviceList); 33 | 34 | // Implement the DeferredExecutor interface. 35 | bool inLoop() const override; 36 | void deferToLoop(std::function fn) override; 37 | 38 | void registerDescriptor( 39 | int fd, 40 | int events, 41 | std::shared_ptr h); 42 | 43 | void unregisterDescriptor(int fd); 44 | 45 | Reactor& getReactor(); 46 | 47 | protected: 48 | // Implement the entry points called by ContextImplBoilerplate. 49 | void handleErrorImpl() override; 50 | void joinImpl() override; 51 | 52 | private: 53 | Reactor reactor_; 54 | EpollLoop loop_{this->reactor_}; 55 | }; 56 | 57 | } // namespace ibv 58 | } // namespace transport 59 | } // namespace tensorpipe 60 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/error.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | namespace tensorpipe { 18 | namespace transport { 19 | namespace ibv { 20 | 21 | std::string IbvError::what() const { 22 | return error_; 23 | } 24 | 25 | std::string GetaddrinfoError::what() const { 26 | std::ostringstream ss; 27 | ss << "getaddrinfo: " << gai_strerror(error_); 28 | return ss.str(); 29 | } 30 | 31 | std::string NoAddrFoundError::what() const { 32 | return "no address found"; 33 | } 34 | 35 | } // namespace ibv 36 | } // namespace transport 37 | } // namespace tensorpipe 38 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | namespace ibv { 18 | 19 | class IbvError final : public BaseError { 20 | public: 21 | explicit IbvError(std::string error) : error_(error) {} 22 | 23 | std::string what() const override; 24 | 25 | private: 26 | std::string error_; 27 | }; 28 | 29 | class GetaddrinfoError final : public BaseError { 30 | public: 31 | explicit GetaddrinfoError(int error) : error_(error) {} 32 | 33 | std::string what() const override; 34 | 35 | private: 36 | int error_; 37 | }; 38 | 39 | class NoAddrFoundError final : public BaseError { 40 | public: 41 | NoAddrFoundError() {} 42 | 43 | std::string what() const override; 44 | }; 45 | 46 | } // namespace ibv 47 | } // namespace transport 48 | } // namespace tensorpipe 49 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace transport { 18 | namespace ibv { 19 | 20 | std::shared_ptr create() { 21 | return std::make_shared< 22 | ContextBoilerplate>(); 23 | } 24 | 25 | } // namespace ibv 26 | } // namespace transport 27 | } // namespace tensorpipe 28 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | namespace ibv { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace ibv 22 | } // namespace transport 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/listener_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | namespace tensorpipe { 25 | namespace transport { 26 | namespace ibv { 27 | 28 | class ConnectionImpl; 29 | class ContextImpl; 30 | 31 | class ListenerImpl final 32 | : public ListenerImplBoilerplate, 33 | public EpollLoop::EventHandler { 34 | public: 35 | // Create a listener that listens on the specified address. 36 | ListenerImpl( 37 | ConstructorToken token, 38 | std::shared_ptr context, 39 | std::string id, 40 | std::string addr); 41 | 42 | // Implementation of EventHandler. 43 | void handleEventsFromLoop(int events) override; 44 | 45 | protected: 46 | // Implement the entry points called by ListenerImplBoilerplate. 47 | void initImplFromLoop() override; 48 | void acceptImplFromLoop(accept_callback_fn fn) override; 49 | std::string addrImplFromLoop() const override; 50 | void handleErrorImpl() override; 51 | 52 | private: 53 | Socket socket_; 54 | Sockaddr sockaddr_; 55 | std::deque fns_; 56 | }; 57 | 58 | } // namespace ibv 59 | } // namespace transport 60 | } // namespace tensorpipe 61 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/sockaddr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace tensorpipe { 19 | namespace transport { 20 | namespace ibv { 21 | 22 | class Sockaddr final : public tensorpipe::Sockaddr { 23 | public: 24 | static Sockaddr createInetSockAddr(const std::string& str); 25 | 26 | Sockaddr(const struct sockaddr* addr, socklen_t addrlen) { 27 | TP_ARG_CHECK(addr != nullptr); 28 | TP_ARG_CHECK_LE(addrlen, sizeof(addr_)); 29 | // Ensure the sockaddr_storage is zeroed, because we don't always 30 | // write to all fields in the `sockaddr_[in|in6]` structures. 31 | std::memset(&addr_, 0, sizeof(addr_)); 32 | std::memcpy(&addr_, addr, addrlen); 33 | addrlen_ = addrlen; 34 | } 35 | 36 | inline const struct sockaddr* addr() const override { 37 | return reinterpret_cast(&addr_); 38 | } 39 | 40 | inline struct sockaddr* addr() { 41 | return reinterpret_cast(&addr_); 42 | } 43 | 44 | inline socklen_t addrlen() const override { 45 | return addrlen_; 46 | } 47 | 48 | std::string str() const; 49 | 50 | private: 51 | struct sockaddr_storage addr_; 52 | socklen_t addrlen_; 53 | }; 54 | 55 | } // namespace ibv 56 | } // namespace transport 57 | } // namespace tensorpipe 58 | -------------------------------------------------------------------------------- /tensorpipe/transport/ibv/utility.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace transport { 18 | namespace ibv { 19 | 20 | std::tuple lookupAddrForIface(std::string iface); 21 | 22 | std::tuple lookupAddrForHostname(); 23 | 24 | } // namespace ibv 25 | } // namespace transport 26 | } // namespace tensorpipe 27 | -------------------------------------------------------------------------------- /tensorpipe/transport/listener.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | namespace tensorpipe { 19 | namespace transport { 20 | 21 | class Listener { 22 | public: 23 | using accept_callback_fn = std::function< 24 | void(const Error& error, std::shared_ptr connection)>; 25 | 26 | virtual void accept(accept_callback_fn fn) = 0; 27 | 28 | // Return address that this listener is listening on. 29 | // This may be required if the listening address is not known up 30 | // front, or dynamically populated by the operating system (e.g. by 31 | // letting the operating system pick a TCP port to listen on). 32 | virtual std::string addr() const = 0; 33 | 34 | // Tell the listener what its identifier is. 35 | // 36 | // This is only supposed to be called from the high-level listener or from 37 | // channel contexts. It will only used for logging and debugging purposes. 38 | virtual void setId(std::string id) = 0; 39 | 40 | virtual void close() = 0; 41 | 42 | virtual ~Listener() = default; 43 | }; 44 | 45 | } // namespace transport 46 | } // namespace tensorpipe 47 | -------------------------------------------------------------------------------- /tensorpipe/transport/shm/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace tensorpipe { 20 | namespace transport { 21 | namespace shm { 22 | 23 | class ConnectionImpl; 24 | class ListenerImpl; 25 | 26 | class ContextImpl final 27 | : public ContextImplBoilerplate { 28 | public: 29 | static std::shared_ptr create(); 30 | 31 | explicit ContextImpl(std::string domainDescriptor); 32 | 33 | // Implement the DeferredExecutor interface. 34 | bool inLoop() const override; 35 | void deferToLoop(std::function fn) override; 36 | 37 | void registerDescriptor( 38 | int fd, 39 | int events, 40 | std::shared_ptr h); 41 | 42 | void unregisterDescriptor(int fd); 43 | 44 | using TToken = uint32_t; 45 | using TFunction = std::function; 46 | 47 | TToken addReaction(TFunction fn); 48 | 49 | void removeReaction(TToken token); 50 | 51 | std::tuple reactorFds(); 52 | 53 | protected: 54 | // Implement the entry points called by ContextImplBoilerplate. 55 | void handleErrorImpl() override; 56 | void joinImpl() override; 57 | 58 | private: 59 | Reactor reactor_; 60 | EpollLoop loop_{this->reactor_}; 61 | }; 62 | 63 | } // namespace shm 64 | } // namespace transport 65 | } // namespace tensorpipe 66 | -------------------------------------------------------------------------------- /tensorpipe/transport/shm/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace transport { 18 | namespace shm { 19 | 20 | std::shared_ptr create() { 21 | return std::make_shared< 22 | ContextBoilerplate>(); 23 | } 24 | 25 | } // namespace shm 26 | } // namespace transport 27 | } // namespace tensorpipe 28 | -------------------------------------------------------------------------------- /tensorpipe/transport/shm/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | namespace shm { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace shm 22 | } // namespace transport 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/transport/shm/listener_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace tensorpipe { 21 | namespace transport { 22 | namespace shm { 23 | 24 | class ConnectionImpl; 25 | class ContextImpl; 26 | 27 | class ListenerImpl final 28 | : public ListenerImplBoilerplate, 29 | public EpollLoop::EventHandler { 30 | public: 31 | // Create a listener that listens on the specified address. 32 | ListenerImpl( 33 | ConstructorToken token, 34 | std::shared_ptr context, 35 | std::string id, 36 | std::string addr); 37 | 38 | // Implementation of EventHandler. 39 | void handleEventsFromLoop(int events) override; 40 | 41 | protected: 42 | // Implement the entry points called by ListenerImplBoilerplate. 43 | void initImplFromLoop() override; 44 | void acceptImplFromLoop(accept_callback_fn fn) override; 45 | std::string addrImplFromLoop() const override; 46 | void handleErrorImpl() override; 47 | 48 | private: 49 | Socket socket_; 50 | Sockaddr sockaddr_; 51 | std::deque fns_; 52 | }; 53 | 54 | } // namespace shm 55 | } // namespace transport 56 | } // namespace tensorpipe 57 | -------------------------------------------------------------------------------- /tensorpipe/transport/shm/sockaddr.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | namespace tensorpipe { 20 | namespace transport { 21 | namespace shm { 22 | 23 | Sockaddr Sockaddr::createAbstractUnixAddr(const std::string& name) { 24 | struct sockaddr_un sun; 25 | sun.sun_family = AF_UNIX; 26 | std::memset(&sun.sun_path, 0, sizeof(sun.sun_path)); 27 | // There are three "modes" for binding UNIX domain sockets: 28 | // - if len(path) == 0: it autobinds to an abstract address 29 | // - if len(path) > 0 and path[0] == 0: it uses an explicit abstract address 30 | // - if len(path) > 0 and path[0] != 0: it uses a concrete filesystem path 31 | if (name == "") { 32 | return Sockaddr( 33 | reinterpret_cast(&sun), sizeof(sun.sun_family)); 34 | } else { 35 | constexpr size_t offset = 1; 36 | const size_t len = std::min(sizeof(sun.sun_path) - offset, name.size()); 37 | std::strncpy(&sun.sun_path[offset], name.data(), len); 38 | 39 | // Note: instead of using sizeof(sun) we compute the addrlen from 40 | // the string length of the abstract socket name. If we use 41 | // sizeof(sun), lsof shows all the trailing NUL characters. 42 | return Sockaddr( 43 | reinterpret_cast(&sun), 44 | sizeof(sun.sun_family) + offset + len); 45 | } 46 | }; 47 | 48 | Sockaddr::Sockaddr(const struct sockaddr* addr, socklen_t addrlen) { 49 | TP_ARG_CHECK(addr != nullptr); 50 | TP_ARG_CHECK_LE(addrlen, sizeof(addr_)); 51 | std::memset(&addr_, 0, sizeof(addr_)); 52 | std::memcpy(&addr_, addr, addrlen); 53 | addrlen_ = addrlen; 54 | } 55 | 56 | std::string Sockaddr::str() const { 57 | TP_DCHECK_GE(addrlen_, sizeof(sockaddr_un::sun_family)); 58 | if (addrlen_ == sizeof(sockaddr_un::sun_family)) { 59 | return ""; 60 | } else { 61 | const struct sockaddr_un* sun{ 62 | reinterpret_cast(&addr_)}; 63 | TP_DCHECK_EQ(sun->sun_path[0], '\0'); 64 | constexpr size_t offset = 1; 65 | const size_t len = addrlen_ - sizeof(sun->sun_family) - offset; 66 | return std::string(&sun->sun_path[offset], len); 67 | } 68 | } 69 | 70 | } // namespace shm 71 | } // namespace transport 72 | } // namespace tensorpipe 73 | -------------------------------------------------------------------------------- /tensorpipe/transport/shm/sockaddr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace tensorpipe { 24 | namespace transport { 25 | namespace shm { 26 | 27 | class Sockaddr final : public tensorpipe::Sockaddr { 28 | public: 29 | static Sockaddr createAbstractUnixAddr(const std::string& name); 30 | 31 | inline const struct sockaddr* addr() const override { 32 | return reinterpret_cast(&addr_); 33 | } 34 | 35 | inline socklen_t addrlen() const override { 36 | return addrlen_; 37 | } 38 | 39 | std::string str() const; 40 | 41 | explicit Sockaddr(const struct sockaddr* addr, socklen_t addrlen); 42 | 43 | private: 44 | struct sockaddr_storage addr_; 45 | socklen_t addrlen_; 46 | }; 47 | 48 | } // namespace shm 49 | } // namespace transport 50 | } // namespace tensorpipe 51 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/connection_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace tensorpipe { 23 | namespace transport { 24 | namespace uv { 25 | 26 | class ContextImpl; 27 | class ListenerImpl; 28 | 29 | class ConnectionImpl final : public ConnectionImplBoilerplate< 30 | ContextImpl, 31 | ListenerImpl, 32 | ConnectionImpl> { 33 | public: 34 | // Create a connection that is already connected (e.g. from a listener). 35 | ConnectionImpl( 36 | ConstructorToken token, 37 | std::shared_ptr context, 38 | std::string id, 39 | std::unique_ptr handle); 40 | 41 | // Create a connection that connects to the specified address. 42 | ConnectionImpl( 43 | ConstructorToken token, 44 | std::shared_ptr context, 45 | std::string id, 46 | std::string addr); 47 | 48 | protected: 49 | // Implement the entry points called by ConnectionImplBoilerplate. 50 | void initImplFromLoop() override; 51 | void readImplFromLoop(read_callback_fn fn) override; 52 | void readImplFromLoop(void* ptr, size_t length, read_callback_fn fn) override; 53 | void writeImplFromLoop(const void* ptr, size_t length, write_callback_fn fn) 54 | override; 55 | void handleErrorImpl() override; 56 | 57 | private: 58 | // Called when libuv is about to read data from connection. 59 | void allocCallbackFromLoop(uv_buf_t* buf); 60 | 61 | // Called when libuv has read data from connection. 62 | void readCallbackFromLoop(ssize_t nread, const uv_buf_t* buf); 63 | 64 | // Called when libuv has written data to connection. 65 | void writeCallbackFromLoop(int status); 66 | 67 | // Called when libuv has closed the handle. 68 | void closeCallbackFromLoop(); 69 | 70 | const std::unique_ptr handle_; 71 | optional sockaddr_; 72 | 73 | std::deque readOperations_; 74 | std::deque writeOperations_; 75 | }; 76 | 77 | } // namespace uv 78 | } // namespace transport 79 | } // namespace tensorpipe 80 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/context_impl.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | namespace uv { 18 | 19 | namespace { 20 | 21 | // Prepend descriptor with transport name so it's easy to 22 | // disambiguate descriptors when debugging. 23 | const std::string kDomainDescriptorPrefix{"uv:"}; 24 | 25 | std::string generateDomainDescriptor() { 26 | return kDomainDescriptorPrefix + "*"; 27 | } 28 | 29 | } // namespace 30 | 31 | std::shared_ptr ContextImpl::create() { 32 | return std::make_shared(); 33 | } 34 | 35 | ContextImpl::ContextImpl() 36 | : ContextImplBoilerplate( 37 | generateDomainDescriptor()) {} 38 | 39 | void ContextImpl::handleErrorImpl() { 40 | loop_.close(); 41 | } 42 | 43 | void ContextImpl::joinImpl() { 44 | loop_.join(); 45 | } 46 | 47 | bool ContextImpl::inLoop() const { 48 | return loop_.inLoop(); 49 | }; 50 | 51 | void ContextImpl::deferToLoop(std::function fn) { 52 | loop_.deferToLoop(std::move(fn)); 53 | }; 54 | 55 | std::unique_ptr ContextImpl::createHandle() { 56 | return std::make_unique(loop_.ptr(), loop_); 57 | }; 58 | 59 | } // namespace uv 60 | } // namespace transport 61 | } // namespace tensorpipe 62 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/context_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace tensorpipe { 22 | namespace transport { 23 | namespace uv { 24 | 25 | class ConnectionImpl; 26 | class ListenerImpl; 27 | 28 | class ContextImpl final 29 | : public ContextImplBoilerplate { 30 | public: 31 | static std::shared_ptr create(); 32 | 33 | ContextImpl(); 34 | 35 | // Implement the DeferredExecutor interface. 36 | bool inLoop() const override; 37 | void deferToLoop(std::function fn) override; 38 | 39 | std::unique_ptr createHandle(); 40 | 41 | protected: 42 | // Implement the entry points called by ContextImplBoilerplate. 43 | void handleErrorImpl() override; 44 | void joinImpl() override; 45 | 46 | private: 47 | Loop loop_; 48 | }; 49 | 50 | } // namespace uv 51 | } // namespace transport 52 | } // namespace tensorpipe 53 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/error.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | 13 | namespace tensorpipe { 14 | namespace transport { 15 | namespace uv { 16 | 17 | std::string UVError::what() const { 18 | return formatUvError(error_); 19 | } 20 | 21 | std::string NoAddrFoundError::what() const { 22 | return "no address found"; 23 | } 24 | 25 | } // namespace uv 26 | } // namespace transport 27 | } // namespace tensorpipe 28 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | namespace uv { 18 | 19 | class UVError final : public BaseError { 20 | public: 21 | explicit UVError(int error) : error_(error) {} 22 | 23 | std::string what() const override; 24 | 25 | private: 26 | int error_; 27 | }; 28 | 29 | class NoAddrFoundError final : public BaseError { 30 | public: 31 | NoAddrFoundError() {} 32 | 33 | std::string what() const override; 34 | }; 35 | 36 | } // namespace uv 37 | } // namespace transport 38 | } // namespace tensorpipe 39 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/factory.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace tensorpipe { 17 | namespace transport { 18 | namespace uv { 19 | 20 | std::shared_ptr create() { 21 | return std::make_shared< 22 | ContextBoilerplate>(); 23 | } 24 | 25 | } // namespace uv 26 | } // namespace transport 27 | } // namespace tensorpipe 28 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/factory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | namespace tensorpipe { 16 | namespace transport { 17 | namespace uv { 18 | 19 | std::shared_ptr create(); 20 | 21 | } // namespace uv 22 | } // namespace transport 23 | } // namespace tensorpipe 24 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/listener_impl.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace tensorpipe { 21 | namespace transport { 22 | namespace uv { 23 | 24 | ListenerImpl::ListenerImpl( 25 | ConstructorToken token, 26 | std::shared_ptr context, 27 | std::string id, 28 | std::string addr) 29 | : ListenerImplBoilerplate( 30 | token, 31 | std::move(context), 32 | std::move(id)), 33 | handle_(context_->createHandle()), 34 | sockaddr_(Sockaddr::createInetSockAddr(addr)) {} 35 | 36 | void ListenerImpl::initImplFromLoop() { 37 | context_->enroll(*this); 38 | 39 | TP_VLOG(9) << "Listener " << id_ << " is initializing in loop"; 40 | 41 | TP_THROW_ASSERT_IF(context_->closed()); 42 | handle_->initFromLoop(); 43 | auto rv = handle_->bindFromLoop(sockaddr_); 44 | TP_THROW_UV_IF(rv < 0, rv); 45 | handle_->armCloseCallbackFromLoop( 46 | [this]() { this->closeCallbackFromLoop(); }); 47 | handle_->listenFromLoop( 48 | [this](int status) { this->connectionCallbackFromLoop(status); }); 49 | 50 | sockaddr_ = handle_->sockNameFromLoop(); 51 | } 52 | 53 | void ListenerImpl::acceptImplFromLoop(accept_callback_fn fn) { 54 | callback_.arm(std::move(fn)); 55 | } 56 | 57 | std::string ListenerImpl::addrImplFromLoop() const { 58 | return sockaddr_.str(); 59 | } 60 | 61 | void ListenerImpl::connectionCallbackFromLoop(int status) { 62 | TP_DCHECK(context_->inLoop()); 63 | TP_VLOG(9) << "Listener " << id_ 64 | << " has an incoming connection ready to be accepted (" 65 | << formatUvError(status) << ")"; 66 | 67 | if (status != 0) { 68 | setError(TP_CREATE_ERROR(UVError, status)); 69 | return; 70 | } 71 | 72 | auto connection = context_->createHandle(); 73 | TP_THROW_ASSERT_IF(context_->closed()); 74 | connection->initFromLoop(); 75 | handle_->acceptFromLoop(*connection); 76 | callback_.trigger( 77 | Error::kSuccess, createAndInitConnection(std::move(connection))); 78 | } 79 | 80 | void ListenerImpl::closeCallbackFromLoop() { 81 | TP_VLOG(9) << "Listener " << id_ << " has finished closing its handle"; 82 | context_->unenroll(*this); 83 | } 84 | 85 | void ListenerImpl::handleErrorImpl() { 86 | callback_.triggerAll([&]() { 87 | return std::make_tuple(std::cref(error_), std::shared_ptr()); 88 | }); 89 | handle_->closeFromLoop(); 90 | // Do NOT unenroll here, as we must keep the UV handle alive until the close 91 | // callback fires. 92 | } 93 | 94 | } // namespace uv 95 | } // namespace transport 96 | } // namespace tensorpipe 97 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/listener_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace tensorpipe { 20 | namespace transport { 21 | namespace uv { 22 | 23 | class ConnectionImpl; 24 | class ContextImpl; 25 | 26 | class ListenerImpl final : public ListenerImplBoilerplate< 27 | ContextImpl, 28 | ListenerImpl, 29 | ConnectionImpl> { 30 | public: 31 | // Create a listener that listens on the specified address. 32 | ListenerImpl( 33 | ConstructorToken token, 34 | std::shared_ptr context, 35 | std::string id, 36 | std::string addr); 37 | 38 | protected: 39 | // Implement the entry points called by ListenerImplBoilerplate. 40 | void initImplFromLoop() override; 41 | void acceptImplFromLoop(accept_callback_fn fn) override; 42 | std::string addrImplFromLoop() const override; 43 | void handleErrorImpl() override; 44 | 45 | private: 46 | // Called by libuv if the listening socket can accept a new connection. Status 47 | // is 0 in case of success, < 0 otherwise. See `uv_connection_cb` for more 48 | // information. 49 | void connectionCallbackFromLoop(int status); 50 | 51 | // Called when libuv has closed the handle. 52 | void closeCallbackFromLoop(); 53 | 54 | const std::unique_ptr handle_; 55 | Sockaddr sockaddr_; 56 | 57 | // Once an accept callback fires, it becomes disarmed and must be rearmed. 58 | // Any firings that occur while the callback is disarmed are stashed and 59 | // triggered as soon as it's rearmed. With libuv we don't have the ability 60 | // to disable the lower-level callback when the user callback is disarmed. 61 | // So we'll keep getting notified of new connections even if we don't know 62 | // what to do with them and don't want them. Thus we must store them 63 | // somewhere. This is what RearmableCallback is for. 64 | RearmableCallback> callback_; 65 | }; 66 | 67 | } // namespace uv 68 | } // namespace transport 69 | } // namespace tensorpipe 70 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/loop.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace tensorpipe { 15 | namespace transport { 16 | namespace uv { 17 | 18 | Loop::Loop() { 19 | int rv; 20 | rv = uv_loop_init(&loop_); 21 | TP_THROW_UV_IF(rv < 0, rv); 22 | rv = uv_async_init(&loop_, &async_, uvAsyncCb); 23 | TP_THROW_UV_IF(rv < 0, rv); 24 | async_.data = this; 25 | 26 | startThread("TP_UV_loop"); 27 | } 28 | 29 | void Loop::close() { 30 | if (!closed_.exchange(true)) { 31 | // It's fine to capture this because the loop won't be destroyed until join 32 | // has completed, and join won't complete until this operation is performed. 33 | deferToLoop( 34 | [this]() { uv_unref(reinterpret_cast(&async_)); }); 35 | } 36 | } 37 | 38 | void Loop::join() { 39 | close(); 40 | 41 | if (!joined_.exchange(true)) { 42 | joinThread(); 43 | } 44 | } 45 | 46 | Loop::~Loop() noexcept { 47 | join(); 48 | } 49 | 50 | void Loop::wakeupEventLoopToDeferFunction() { 51 | auto rv = uv_async_send(&async_); 52 | TP_THROW_UV_IF(rv < 0, rv); 53 | } 54 | 55 | void Loop::eventLoop() { 56 | int rv; 57 | 58 | rv = uv_run(&loop_, UV_RUN_DEFAULT); 59 | TP_THROW_ASSERT_IF(rv > 0) 60 | << ": uv_run returned with active handles or requests"; 61 | } 62 | 63 | void Loop::cleanUpLoop() { 64 | int rv; 65 | 66 | uv_ref(reinterpret_cast(&async_)); 67 | uv_close(reinterpret_cast(&async_), nullptr); 68 | 69 | rv = uv_run(&loop_, UV_RUN_NOWAIT); 70 | TP_THROW_ASSERT_IF(rv > 0) 71 | << ": uv_run returned with active handles or requests"; 72 | 73 | // Release resources associated with loop. 74 | rv = uv_loop_close(&loop_); 75 | TP_THROW_UV_IF(rv < 0, rv); 76 | } 77 | 78 | void Loop::uvAsyncCb(uv_async_t* handle) { 79 | auto& loop = *reinterpret_cast(handle->data); 80 | loop.runDeferredFunctionsFromEventLoop(); 81 | } 82 | 83 | } // namespace uv 84 | } // namespace transport 85 | } // namespace tensorpipe 86 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/loop.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | namespace tensorpipe { 24 | namespace transport { 25 | namespace uv { 26 | 27 | class Loop final : public EventLoopDeferredExecutor { 28 | public: 29 | Loop(); 30 | 31 | uv_loop_t* ptr() { 32 | return &loop_; 33 | } 34 | 35 | bool closed() { 36 | return closed_; 37 | } 38 | 39 | void close(); 40 | 41 | void join(); 42 | 43 | ~Loop() noexcept; 44 | 45 | protected: 46 | // Event loop thread entry function. 47 | void eventLoop() override; 48 | 49 | // Clean up after event loop transitioned to on-demand. 50 | void cleanUpLoop() override; 51 | 52 | // Wake up the event loop. 53 | void wakeupEventLoopToDeferFunction() override; 54 | 55 | private: 56 | uv_loop_t loop_; 57 | uv_async_t async_; 58 | std::atomic closed_{false}; 59 | std::atomic joined_{false}; 60 | 61 | // This function is called by the event loop thread whenever 62 | // we have to run a number of deferred functions. 63 | static void uvAsyncCb(uv_async_t* handle); 64 | }; 65 | 66 | } // namespace uv 67 | } // namespace transport 68 | } // namespace tensorpipe 69 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/sockaddr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | namespace tensorpipe { 19 | namespace transport { 20 | namespace uv { 21 | 22 | class Sockaddr final : public tensorpipe::Sockaddr { 23 | public: 24 | static Sockaddr createInetSockAddr(const std::string& str); 25 | 26 | Sockaddr(const struct sockaddr* addr, socklen_t addrlen) { 27 | TP_ARG_CHECK(addr != nullptr); 28 | TP_ARG_CHECK_LE(addrlen, sizeof(addr_)); 29 | // Ensure the sockaddr_storage is zeroed, because we don't always 30 | // write to all fields in the `sockaddr_[in|in6]` structures. 31 | std::memset(&addr_, 0, sizeof(addr_)); 32 | std::memcpy(&addr_, addr, addrlen); 33 | addrlen_ = addrlen; 34 | } 35 | 36 | inline const struct sockaddr* addr() const override { 37 | return reinterpret_cast(&addr_); 38 | } 39 | 40 | inline struct sockaddr* addr() { 41 | return reinterpret_cast(&addr_); 42 | } 43 | 44 | inline socklen_t addrlen() const override { 45 | return addrlen_; 46 | } 47 | 48 | std::string str() const; 49 | 50 | private: 51 | struct sockaddr_storage addr_; 52 | socklen_t addrlen_; 53 | }; 54 | 55 | } // namespace uv 56 | } // namespace transport 57 | } // namespace tensorpipe 58 | -------------------------------------------------------------------------------- /tensorpipe/transport/uv/utility.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | namespace tensorpipe { 20 | namespace transport { 21 | namespace uv { 22 | 23 | std::tuple lookupAddrForIface(std::string iface); 24 | 25 | std::tuple lookupAddrForHostname(); 26 | 27 | // Try to replicate the same logic used by NCCL to find a node's own address. 28 | // Roughly, it returns the "first" usable address it can find, and prioritizes 29 | // the interfaces with an `ib` prefix and de-prioritizes those with a `docker` 30 | // or `lo` prefix. It can optionally only return only IPv4 or IPv4 addresses. 31 | std::tuple lookupAddrLikeNccl( 32 | optional familyFilter = nullopt); 33 | 34 | } // namespace uv 35 | } // namespace transport 36 | } // namespace tensorpipe 37 | -------------------------------------------------------------------------------- /third_party/README.md: -------------------------------------------------------------------------------- 1 | # third_party 2 | 3 | This directory includes dependencies as [submodules][submodules]. 4 | 5 | [submodules]: https://git-scm.com/book/en/v2/Git-Tools-Submodules 6 | 7 | ## Build dependencies 8 | 9 | * **libuv** is a multi-platform support library with a focus on asynchronous I/O. 10 | 11 | ## Test dependencies 12 | 13 | * **backward-cpp** is a beautiful stack trace pretty printer for C++. 14 | * **googletest** is a C++ test framework. 15 | --------------------------------------------------------------------------------