├── .gitignore ├── .pre-commit-config.yaml ├── include └── ucxpp │ ├── ucxpp.h │ ├── detail │ ├── noncopyable.h │ ├── serdes.h │ └── debug.h │ ├── error.h │ ├── config.h │ ├── worker.h │ ├── context.h │ ├── task.h │ ├── address.h │ ├── endpoint.h │ ├── awaitable.h │ └── memory.h ├── examples ├── include │ ├── worker_epoll.h │ ├── acceptor.h │ ├── connector.h │ ├── ep_transmission.h │ └── socket │ │ ├── tcp_listener.h │ │ ├── channel.h │ │ ├── event_loop.h │ │ └── tcp_connection.h ├── acceptor.cc ├── connector.cc ├── worker_epoll.cc ├── ep_transmission.cc ├── socket │ ├── channel.cc │ ├── tcp_listener.cc │ ├── tcp_connection.cc │ └── event_loop.cc ├── helloworld.cc └── perftest.cc ├── src ├── config.cc ├── awaitable.cc ├── address.cc ├── context.cc ├── worker.cc ├── endpoint.cc └── memory.cc ├── README.md ├── .github └── workflows │ ├── build.yml │ └── pages.yml ├── CMakeLists.txt ├── .clang-format └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .cache 3 | .vscode 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-clang-format 3 | rev: v15.0.4 4 | hooks: 5 | - id: clang-format -------------------------------------------------------------------------------- /include/ucxpp/ucxpp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ucxpp/address.h" 4 | #include "ucxpp/context.h" 5 | #include "ucxpp/endpoint.h" 6 | #include "ucxpp/task.h" 7 | #include "ucxpp/worker.h" -------------------------------------------------------------------------------- /examples/include/worker_epoll.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/channel.h" 4 | #include "socket/event_loop.h" 5 | #include 6 | 7 | #include 8 | 9 | namespace ucxpp { 10 | 11 | void register_loop(std::shared_ptr worker, 12 | std::shared_ptr loop); 13 | 14 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/detail/noncopyable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace ucxpp { 4 | 5 | class noncopyable { 6 | public: 7 | noncopyable() = default; 8 | noncopyable(noncopyable &&) = default; 9 | noncopyable(noncopyable const &) = delete; 10 | noncopyable &operator=(noncopyable const &) = delete; 11 | noncopyable &operator=(noncopyable &&) = default; 12 | }; 13 | 14 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/include/acceptor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/tcp_connection.h" 4 | #include "socket/tcp_listener.h" 5 | #include 6 | 7 | #include "ucxpp/address.h" 8 | #include "ucxpp/endpoint.h" 9 | #include "ucxpp/task.h" 10 | 11 | namespace ucxpp { 12 | 13 | class acceptor { 14 | std::shared_ptr worker_; 15 | std::shared_ptr listener_; 16 | local_address address_; 17 | 18 | public: 19 | acceptor(std::shared_ptr worker, 20 | std::shared_ptr listener); 21 | acceptor(acceptor &&) = default; 22 | acceptor &operator=(acceptor &&) = default; 23 | task> accept(); 24 | }; 25 | 26 | } // namespace ucxpp -------------------------------------------------------------------------------- /src/config.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/config.h" 2 | 3 | #include "ucxpp/error.h" 4 | namespace ucxpp { 5 | 6 | config::config(char const *env_prefix, char const *filename) { 7 | check_ucs_status(::ucp_config_read(env_prefix, filename, &config_), 8 | "failed to read ucp config"); 9 | } 10 | 11 | config::~config() { ::ucp_config_release(config_); } 12 | 13 | ucp_config_t *config::handle() const { return config_; } 14 | 15 | void config::modify(char const *name, char const *value) { 16 | check_ucs_status(::ucp_config_modify(config_, name, value), 17 | "failed to modify ucp config"); 18 | } 19 | 20 | void config::print() const { 21 | ::ucp_config_print(config_, stdout, nullptr, UCS_CONFIG_PRINT_CONFIG); 22 | } 23 | 24 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/acceptor.cc: -------------------------------------------------------------------------------- 1 | #include "acceptor.h" 2 | 3 | #include "ep_transmission.h" 4 | #include "socket/tcp_connection.h" 5 | 6 | #include "ucxpp/address.h" 7 | #include "ucxpp/endpoint.h" 8 | 9 | namespace ucxpp { 10 | 11 | acceptor::acceptor(std::shared_ptr worker, 12 | std::shared_ptr listener) 13 | : worker_(worker), listener_(listener), address_(worker->get_address()) {} 14 | 15 | task> acceptor::accept() { 16 | auto channel = co_await listener_->accept(); 17 | socket::tcp_connection connection(channel); 18 | auto endpoint = co_await from_tcp_connection(connection, worker_); 19 | co_await send_address(address_, connection); 20 | co_return endpoint; 21 | } 22 | 23 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/include/connector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/tcp_connection.h" 4 | #include 5 | 6 | #include "ucxpp/address.h" 7 | #include "ucxpp/endpoint.h" 8 | #include "ucxpp/worker.h" 9 | 10 | namespace ucxpp { 11 | 12 | class connector { 13 | std::shared_ptr worker_; 14 | std::shared_ptr loop_; 15 | std::string hostname_; 16 | uint16_t port_; 17 | local_address address_; 18 | 19 | public: 20 | connector(std::shared_ptr worker, 21 | std::shared_ptr loop, 22 | std::string const &hostname, uint16_t port); 23 | connector(connector &&) = default; 24 | connector &operator=(connector &&) = default; 25 | task> connect(); 26 | }; 27 | 28 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/connector.cc: -------------------------------------------------------------------------------- 1 | #include "connector.h" 2 | 3 | #include "ep_transmission.h" 4 | 5 | #include "ucxpp/address.h" 6 | 7 | namespace ucxpp { 8 | 9 | connector::connector(std::shared_ptr worker, 10 | std::shared_ptr loop, 11 | std::string const &hostname, uint16_t port) 12 | : worker_(worker), loop_(loop), hostname_(hostname), port_(port), 13 | address_(worker->get_address()) {} 14 | 15 | task> connector::connect() { 16 | auto connection = 17 | co_await socket::tcp_connection::connect(loop_, hostname_, port_); 18 | co_await send_address(address_, *connection); 19 | auto endpoint = co_await from_tcp_connection(*connection, worker_); 20 | co_return endpoint; 21 | } 22 | 23 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/worker_epoll.cc: -------------------------------------------------------------------------------- 1 | #include "worker_epoll.h" 2 | 3 | #include "socket/channel.h" 4 | 5 | namespace ucxpp { 6 | 7 | void register_loop(std::shared_ptr worker, 8 | std::shared_ptr loop) { 9 | auto event_channel = 10 | std::make_shared(worker->event_fd(), loop); 11 | event_channel->set_event_loop(loop); 12 | event_channel->set_readable_callback([worker, event_channel]() { 13 | do { 14 | while (worker->progress()) { 15 | } 16 | } while (!worker->arm()); 17 | if (worker.use_count() > 2) { 18 | event_channel->wait_readable(); 19 | } else { 20 | event_channel->set_event_loop(nullptr); 21 | event_channel->set_readable_callback([]() {}); 22 | } 23 | }); 24 | event_channel->wait_readable(); 25 | } 26 | 27 | } // namespace ucxpp -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UCX++ 2 | 3 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 4 | 5 | [Documentation](https://liuhaohua.com/ucxpp/) | [Examples](https://liuhaohua.com/ucxpp/examples.html) 6 | 7 | This library relieves your pain of writing asynchronous UCX code. 8 | 9 | ## Quick Example 10 | 11 | Check [helloworld.cc](./examples/helloworld.cc) for API usage example. 12 | 13 | ## Building 14 | 15 | Requires: C++ compiler with C++20 standard support and `ucx` development headers installed. 16 | 17 | ```bash 18 | git clone https://github.com/howardlau1999/ucxpp && cd ucxpp 19 | cmake -Bbuild -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR . 20 | cmake --build build 21 | 22 | # To install 23 | cmake --install build 24 | ``` 25 | 26 | ## Developing 27 | 28 | Install `clang-format` and `pre-commit`. 29 | 30 | ```bash 31 | pip install pre-commit 32 | pre-commit install 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/include/ep_transmission.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/tcp_connection.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace ucxpp { 10 | 11 | /** 12 | * @brief Accept a UCX endpoint from a remote peer 13 | * 14 | * @param conncetion The TCP connection 15 | * @param worker The UCX worker 16 | * @return task> A coroutine that returns the 17 | * accepted endpoint 18 | */ 19 | task> 20 | from_tcp_connection(socket::tcp_connection &conncetion, 21 | std::shared_ptr worker); 22 | 23 | /** 24 | * @brief Send the address to a remote peer 25 | * 26 | * @param connection The TCP connection to send the address over 27 | * @return task A coroutine 28 | */ 29 | task send_address(local_address const &address, 30 | socket::tcp_connection &connection); 31 | 32 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/include/socket/tcp_listener.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/event_loop.h" 4 | #include 5 | 6 | #include "ucxpp/detail/noncopyable.h" 7 | 8 | namespace ucxpp { 9 | namespace socket { 10 | 11 | /** 12 | * @brief This class is used to listen for incoming TCP connections. 13 | * 14 | */ 15 | class tcp_listener : public noncopyable { 16 | std::shared_ptr channel_; 17 | 18 | public: 19 | struct accept_awaitable { 20 | std::shared_ptr channel_; 21 | void *buffer_; 22 | int client_fd_; 23 | int do_io(); 24 | 25 | public: 26 | accept_awaitable(std::shared_ptr channel); 27 | bool await_ready(); 28 | void await_suspend(std::coroutine_handle<> h); 29 | std::shared_ptr await_resume(); 30 | }; 31 | tcp_listener(std::shared_ptr loop, std::string const &hostname, 32 | uint16_t port); 33 | accept_awaitable accept(); 34 | }; 35 | 36 | } // namespace socket 37 | } // namespace ucxpp -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | strategy: 13 | matrix: 14 | os: [ubuntu-22.04, ubuntu-20.04] 15 | runs-on: ${{ matrix.os }} 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Install ninja 19 | run: sudo apt update && sudo apt install -y ninja-build 20 | - name: Setup gcc 10 as default 21 | if: ${{ matrix.os == 'ubuntu-20.04' }} 22 | run: sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 23 | - name: Install UCX from source 24 | run: (curl -fsSL https://github.com/openucx/ucx/releases/download/v1.13.0/ucx-1.13.0.tar.gz | tar xz) && cd ucx-1.13.0 && ./configure --without-go --without-java && make -j$(nproc) && sudo make -j$(nproc) install 25 | - name: Configure 26 | run: cmake -GNinja -Bbuild . 27 | - name: Build 28 | run: cmake --build build -------------------------------------------------------------------------------- /examples/include/socket/channel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace ucxpp { 8 | namespace socket { 9 | 10 | class event_loop; 11 | 12 | /** 13 | * @brief This class represents a pollable channel. 14 | * 15 | */ 16 | class channel : public std::enable_shared_from_this { 17 | public: 18 | static std::function noop_callback; 19 | using callback_fn = std::function; 20 | 21 | private: 22 | int fd_; 23 | std::shared_ptr loop_; 24 | callback_fn readable_callback_; 25 | callback_fn writable_callback_; 26 | 27 | public: 28 | static void set_nonblocking(int fd); 29 | channel(int fd, std::shared_ptr loop = nullptr); 30 | int fd(); 31 | void set_event_loop(std::shared_ptr loop); 32 | void set_nonblocking(); 33 | void wait_readable(); 34 | void wait_writable(); 35 | void readable_callback(); 36 | void writable_callback(); 37 | void set_readable_callback(callback_fn &&callback); 38 | void set_writable_callback(callback_fn &&callback); 39 | std::shared_ptr loop(); 40 | ~channel(); 41 | }; 42 | 43 | } // namespace socket 44 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/include/socket/event_loop.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/channel.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace ucxpp { 12 | namespace socket { 13 | 14 | /** 15 | * @brief This class is a loop the drives asynchronous I/O. 16 | * 17 | */ 18 | class event_loop { 19 | int epoll_fd_; 20 | int close_event_fd_; 21 | const size_t max_events_; 22 | std::shared_mutex mutex_; 23 | std::unordered_map> channels_; 24 | std::vector events_; 25 | 26 | void register_channel(std::shared_ptr channel, 27 | struct epoll_event *event); 28 | 29 | public: 30 | event_loop(size_t max_events = 10); 31 | static std::shared_ptr new_loop(size_t max_events = 10); 32 | void poll(bool &close_triggered); 33 | void loop(); 34 | void close(); 35 | void register_read(std::shared_ptr channel); 36 | void register_write(std::shared_ptr channel); 37 | void deregister(socket::channel &channel); 38 | ~event_loop(); 39 | }; 40 | 41 | } // namespace socket 42 | } // namespace ucxpp -------------------------------------------------------------------------------- /src/awaitable.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/awaitable.h" 2 | 3 | #include 4 | 5 | #include "ucxpp/endpoint.h" 6 | #include "ucxpp/worker.h" 7 | 8 | namespace ucxpp { 9 | 10 | ep_flush_awaitable::ep_flush_awaitable(std::shared_ptr endpoint) 11 | : endpoint_(endpoint) {} 12 | 13 | bool ep_flush_awaitable::await_ready() noexcept { 14 | auto send_param = build_param(); 15 | auto request = ::ucp_ep_flush_nbx(endpoint_->handle(), &send_param); 16 | return check_request_ready(request); 17 | } 18 | 19 | ep_close_awaitable::ep_close_awaitable(std::shared_ptr endpoint) 20 | : endpoint_(endpoint) {} 21 | 22 | bool ep_close_awaitable::await_ready() noexcept { 23 | auto send_param = build_param(); 24 | auto request = ::ucp_ep_close_nbx(endpoint_->handle(), &send_param); 25 | if (check_request_ready(request)) { 26 | return true; 27 | } 28 | endpoint_->close_request_ = request; 29 | return false; 30 | } 31 | 32 | worker_flush_awaitable::worker_flush_awaitable(std::shared_ptr worker) 33 | : worker_(worker) {} 34 | 35 | bool worker_flush_awaitable::await_ready() noexcept { 36 | auto send_param = build_param(); 37 | auto request = ::ucp_worker_flush_nbx(worker_->handle(), &send_param); 38 | return check_request_ready(request); 39 | } 40 | 41 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/detail/serdes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace ucxpp { 10 | namespace detail { 11 | 12 | static inline uint16_t ntoh(uint16_t const &value) { return ::be16toh(value); } 13 | 14 | static inline uint32_t ntoh(uint32_t const &value) { return ::be32toh(value); } 15 | 16 | static inline uint64_t ntoh(uint64_t const &value) { return ::be64toh(value); } 17 | 18 | static inline uint16_t hton(uint16_t const &value) { return ::htobe16(value); } 19 | 20 | static inline uint32_t hton(uint32_t const &value) { return ::htobe32(value); } 21 | 22 | static inline uint64_t hton(uint64_t const &value) { return ::htobe64(value); } 23 | 24 | template ::value>::type> 26 | void serialize(T const &value, It &it) { 27 | T nvalue = hton(value); 28 | std::copy_n(reinterpret_cast(&nvalue), sizeof(T), it); 29 | } 30 | 31 | template ::value>::type> 33 | void deserialize(It &it, T &value) { 34 | std::copy_n(it, sizeof(T), reinterpret_cast(&value)); 35 | it += sizeof(T); 36 | value = ntoh(value); 37 | } 38 | 39 | } // namespace detail 40 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/error.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | namespace ucxpp { 12 | 13 | constexpr size_t kErrorStringBufferSize = 1024; 14 | 15 | static inline void throw_with(const char *message) { 16 | throw std::runtime_error(message); 17 | } 18 | 19 | template 20 | static inline void throw_with(const char *format, Args... args) { 21 | char buffer[kErrorStringBufferSize]; 22 | ::snprintf(buffer, sizeof(buffer), format, args...); 23 | throw std::runtime_error(buffer); 24 | } 25 | 26 | static inline void check_ucs_status(ucs_status_t status, const char *message) { 27 | if (status == UCS_OK) [[likely]] { 28 | return; 29 | } 30 | if (status == UCS_INPROGRESS) [[likely]] { 31 | return; 32 | } 33 | 34 | throw_with("%s: %s (status=%d)", message, ::ucs_status_string(status), 35 | status); 36 | } 37 | 38 | static inline void check_rc(int rc, const char *message) { 39 | if (rc != 0) [[unlikely]] { 40 | throw_with("%s: %s (rc=%d)", message, ::strerror(rc), rc); 41 | } 42 | } 43 | 44 | static inline void check_ptr(void *ptr, const char *message) { 45 | if (ptr == nullptr) [[unlikely]] { 46 | throw_with("%s: %s (errno=%d)", message, ::strerror(errno), errno); 47 | } 48 | } 49 | 50 | static inline void check_errno(int rc, const char *message) { 51 | if (rc < 0) [[unlikely]] { 52 | throw_with("%s: %s (errno=%d)", message, ::strerror(errno), errno); 53 | } 54 | } 55 | 56 | } // namespace ucxpp -------------------------------------------------------------------------------- /.github/workflows/pages.yml: -------------------------------------------------------------------------------- 1 | name: Doxygen Action 2 | 3 | # Controls when the action will run. Triggers the workflow on push or pull request 4 | # events but only for the master branch 5 | on: 6 | push: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | concurrency: 11 | group: "pages" 12 | cancel-in-progress: true 13 | 14 | permissions: 15 | contents: read 16 | pages: write 17 | id-token: write 18 | 19 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 20 | jobs: 21 | # This workflow contains a single job called "build" 22 | 23 | deploy: 24 | # The type of runner that the job will run on 25 | runs-on: ubuntu-latest 26 | environment: 27 | name: github-pages 28 | url: ${{ steps.deployment.outputs.page_url }} 29 | # Steps represent a sequence of tasks that will be executed as part of the job 30 | steps: 31 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 32 | - uses: actions/checkout@v3 33 | - name: Doxygen Action 34 | uses: mattnotmitt/doxygen-action@v1 35 | with: 36 | # Path to Doxyfile 37 | doxyfile-path: "./Doxyfile" # default is ./Doxyfile 38 | # Working directory 39 | working-directory: "." # default is . 40 | - name: Setup Pages 41 | uses: actions/configure-pages@v1 42 | - name: Upload artifact 43 | uses: actions/upload-pages-artifact@v1 44 | with: 45 | # Upload entire repository 46 | path: 'html' 47 | - name: Deploy to GitHub Pages 48 | id: deployment 49 | uses: actions/deploy-pages@main -------------------------------------------------------------------------------- /examples/include/socket/tcp_connection.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "socket/channel.h" 4 | #include 5 | #include 6 | #include 7 | 8 | #include "ucxpp/task.h" 9 | 10 | namespace ucxpp { 11 | namespace socket { 12 | 13 | /** 14 | * @brief This class represents an established TCP connection. 15 | * 16 | */ 17 | class tcp_connection { 18 | std::shared_ptr channel_; 19 | 20 | public: 21 | class rw_awaitable { 22 | std::shared_ptr channel_; 23 | void *buffer_; 24 | int n_; 25 | size_t length_; 26 | bool write_; 27 | int do_io(); 28 | 29 | public: 30 | rw_awaitable(std::shared_ptr channel, bool write, void *buffer, 31 | size_t length); 32 | bool await_ready(); 33 | void await_suspend(std::coroutine_handle<> h); 34 | int await_resume(); 35 | }; 36 | class connect_awaitable { 37 | int rc_; 38 | std::shared_ptr channel_; 39 | 40 | public: 41 | connect_awaitable(std::shared_ptr loop, 42 | std::string const &hostname, uint16_t port); 43 | bool await_ready(); 44 | void await_suspend(std::coroutine_handle<> h); 45 | std::shared_ptr await_resume(); 46 | }; 47 | static connect_awaitable connect(std::shared_ptr loop, 48 | std::string const &hostname, uint16_t port); 49 | tcp_connection(std::shared_ptr channel); 50 | rw_awaitable recv(void *buffer, size_t length); 51 | rw_awaitable send(const void *buffer, size_t length); 52 | }; 53 | 54 | } // namespace socket 55 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "ucxpp/detail/noncopyable.h" 6 | 7 | namespace ucxpp { 8 | 9 | /** 10 | * @brief Stores UCX configuration options. 11 | * 12 | */ 13 | class config : public noncopyable { 14 | ucp_config_t *config_; 15 | 16 | public: 17 | /** 18 | * @brief Construct a new config object 19 | * 20 | * @param env_prefix If non-NULL, the routine searches for the environment 21 | * variables that start with _UCX_ prefix. Otherwise, the routine 22 | * searches for the environment variables that start with UCX_ prefix. 23 | * @param filename If non-NULL, read configuration from the file defined by 24 | * filename. If the file does not exist, it will be ignored and no error 25 | * reported to the application. 26 | */ 27 | config(char const *env_prefix = nullptr, char const *filename = nullptr); 28 | 29 | /** 30 | * @brief Destroy the config object and release memory 31 | * 32 | */ 33 | ~config(); 34 | 35 | /** 36 | * @brief Get the native UCX configuration handle 37 | * 38 | * @return ucp_config_t* The native UCX configuration handle 39 | */ 40 | ucp_config_t *handle() const; 41 | 42 | /** 43 | * @brief Modify a configuration option 44 | * 45 | * @param name The name of the configuration option to modify 46 | * @param value The new value of the configuration option 47 | */ 48 | void modify(char const *name, char const *value); 49 | 50 | /** 51 | * @brief Print the configuration to the standard output 52 | * 53 | */ 54 | void print() const; 55 | }; 56 | 57 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/ep_transmission.cc: -------------------------------------------------------------------------------- 1 | #include "ep_transmission.h" 2 | 3 | #include 4 | 5 | namespace ucxpp { 6 | 7 | task> 8 | from_tcp_connection(socket::tcp_connection &conncetion, 9 | std::shared_ptr worker) { 10 | size_t address_length_read = 0; 11 | char address_length_buffer[sizeof(size_t)]; 12 | while (address_length_read < sizeof(size_t)) { 13 | int n = 14 | co_await conncetion.recv(address_length_buffer + address_length_read, 15 | sizeof(size_t) - address_length_read); 16 | if (n == 0) { 17 | throw std::runtime_error("failed to read address length"); 18 | } 19 | address_length_read += n; 20 | } 21 | size_t address_length; 22 | char *p = address_length_buffer; 23 | detail::deserialize(p, address_length); 24 | std::vector address_buffer(address_length); 25 | size_t address_read = 0; 26 | while (address_read < address_length) { 27 | int n = co_await conncetion.recv(&address_buffer[address_read], 28 | address_length - address_read); 29 | if (n == 0) { 30 | throw std::runtime_error("failed to read address"); 31 | } 32 | address_read += n; 33 | } 34 | auto remote_addr = remote_address(std::move(address_buffer)); 35 | co_return std::make_shared(worker, remote_addr); 36 | } 37 | 38 | task send_address(local_address const &address, 39 | socket::tcp_connection &connection) { 40 | auto buffer = address.serialize(); 41 | size_t sent = 0; 42 | while (sent < buffer.size()) { 43 | int n = co_await connection.send(&buffer[sent], buffer.size() - sent); 44 | if (n < 0) { 45 | throw std::runtime_error("send failed"); 46 | } 47 | sent += n; 48 | } 49 | co_return; 50 | } 51 | 52 | } // namespace ucxpp -------------------------------------------------------------------------------- /src/address.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/address.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "ucxpp/task.h" 12 | #include "ucxpp/worker.h" 13 | 14 | #include "ucxpp/detail/debug.h" 15 | #include "ucxpp/detail/serdes.h" 16 | 17 | namespace ucxpp { 18 | 19 | local_address::local_address(std::shared_ptr worker, 20 | ucp_address_t *address, size_t address_length) 21 | : worker_(worker), address_(address), address_length_(address_length) {} 22 | 23 | local_address::local_address(local_address &&other) 24 | : worker_(std::move(other.worker_)), 25 | address_(std::exchange(other.address_, nullptr)), 26 | address_length_(other.address_length_) {} 27 | 28 | local_address &local_address::operator=(local_address &&other) { 29 | worker_ = std::move(other.worker_); 30 | address_ = std::exchange(other.address_, nullptr); 31 | address_length_ = other.address_length_; 32 | return *this; 33 | } 34 | 35 | std::vector local_address::serialize() const { 36 | std::vector buffer; 37 | auto it = std::back_inserter(buffer); 38 | detail::serialize(address_length_, it); 39 | std::copy_n(reinterpret_cast(address_), address_length_, it); 40 | return buffer; 41 | } 42 | 43 | const ucp_address_t *local_address::get_address() const { return address_; } 44 | 45 | size_t local_address::get_length() const { return address_length_; } 46 | 47 | local_address::~local_address() { 48 | if (address_ == nullptr) [[unlikely]] { 49 | return; 50 | } 51 | ::ucp_worker_release_address(worker_->worker_, address_); 52 | } 53 | 54 | remote_address::remote_address(std::vector const &address) 55 | : address_(address) {} 56 | 57 | remote_address::remote_address(std::vector &&address) 58 | : address_(std::move(address)) {} 59 | 60 | const ucp_address_t *remote_address::get_address() const { 61 | return reinterpret_cast(address_.data()); 62 | } 63 | 64 | size_t remote_address::get_length() const { return address_.size(); } 65 | 66 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/socket/channel.cc: -------------------------------------------------------------------------------- 1 | #include "socket/channel.h" 2 | 3 | #include "socket/event_loop.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ucxpp/error.h" 13 | 14 | #include "ucxpp/detail/debug.h" 15 | 16 | namespace ucxpp { 17 | namespace socket { 18 | 19 | channel::channel(int fd, std::shared_ptr loop) 20 | : fd_(fd), loop_(loop), readable_callback_(noop_callback), 21 | writable_callback_(noop_callback) {} 22 | 23 | int channel::fd() { return fd_; } 24 | 25 | std::function channel::noop_callback = []() {}; 26 | 27 | void channel::set_event_loop(std::shared_ptr loop) { loop_ = loop; } 28 | 29 | void channel::set_nonblocking(int fd) { 30 | int opts = fcntl(fd, F_GETFL); 31 | check_errno(opts, "failed to get fcntl flags"); 32 | opts |= O_NONBLOCK; 33 | check_errno(fcntl(fd, F_SETFL, opts), "failed to set fcntl flags"); 34 | } 35 | 36 | void channel::set_nonblocking() { set_nonblocking(fd_); } 37 | 38 | void channel::writable_callback() { 39 | loop_->deregister(*this); 40 | writable_callback_(); 41 | } 42 | 43 | void channel::readable_callback() { 44 | loop_->deregister(*this); 45 | readable_callback_(); 46 | } 47 | 48 | void channel::wait_readable() { 49 | loop_->register_read(this->shared_from_this()); 50 | } 51 | 52 | void channel::wait_writable() { 53 | loop_->register_write(this->shared_from_this()); 54 | } 55 | 56 | void channel::set_writable_callback(callback_fn &&callback) { 57 | writable_callback_ = callback; 58 | } 59 | 60 | void channel::set_readable_callback(callback_fn &&callback) { 61 | readable_callback_ = callback; 62 | } 63 | 64 | std::shared_ptr channel::loop() { return loop_; } 65 | 66 | channel::~channel() { 67 | if (!loop_) { 68 | return; 69 | } 70 | loop_->deregister(*this); 71 | assert(fd_ > 0); 72 | if (auto rc = ::close(fd_); rc != 0) [[unlikely]] { 73 | UCXPP_LOG_ERROR("failed to close fd %d: %s (errno=%d)", fd_, 74 | strerror(errno), errno); 75 | } else { 76 | UCXPP_LOG_TRACE("closed fd %d", fd_); 77 | } 78 | } 79 | 80 | } // namespace socket 81 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/detail/debug.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #ifndef SOURCE_PATH_LENGTH 6 | #define SOURCE_PATH_LENGTH 0 7 | #endif 8 | 9 | #define __UCXPP_FILENAME__ (&__FILE__[SOURCE_PATH_LENGTH]) 10 | 11 | namespace ucxpp { 12 | 13 | enum class LogLevel { 14 | TRACE, 15 | DEBUG, 16 | INFO, 17 | WARN, 18 | ERROR, 19 | }; 20 | } 21 | 22 | constexpr static inline ucxpp::LogLevel ucxpp_log_level = 23 | ucxpp::LogLevel::DEBUG; 24 | 25 | #define UCXPP_LOG_TRACE(msg, ...) \ 26 | do { \ 27 | if (ucxpp_log_level > ucxpp::LogLevel::TRACE) \ 28 | break; \ 29 | printf("[TRACE] [%s:%d] " msg "\n", __UCXPP_FILENAME__, \ 30 | __LINE__ __VA_OPT__(, ) __VA_ARGS__); \ 31 | } while (0) 32 | 33 | #define UCXPP_LOG_DEBUG(msg, ...) \ 34 | do { \ 35 | if (ucxpp_log_level > ucxpp::LogLevel::DEBUG) \ 36 | break; \ 37 | printf("[DEBUG] [%s:%d] " msg "\n", __UCXPP_FILENAME__, \ 38 | __LINE__ __VA_OPT__(, ) __VA_ARGS__); \ 39 | } while (0) 40 | 41 | #define UCXPP_LOG_INFO(msg, ...) \ 42 | do { \ 43 | if (ucxpp_log_level > ucxpp::LogLevel::INFO) \ 44 | break; \ 45 | printf("[INFO ] [%s:%d] " msg "\n", __UCXPP_FILENAME__, \ 46 | __LINE__ __VA_OPT__(, ) __VA_ARGS__); \ 47 | } while (0) 48 | 49 | #define UCXPP_LOG_ERROR(msg, ...) \ 50 | do { \ 51 | printf("[ERROR] [%s:%d] " msg "\n", __UCXPP_FILENAME__, \ 52 | __LINE__ __VA_OPT__(, ) __VA_ARGS__); \ 53 | } while (0) 54 | -------------------------------------------------------------------------------- /src/context.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/context.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "ucxpp/awaitable.h" 10 | #include "ucxpp/config.h" 11 | #include "ucxpp/error.h" 12 | 13 | namespace ucxpp { 14 | 15 | context::builder::builder() : features_(0), print_config_(false) {} 16 | 17 | std::shared_ptr context::builder::build() { 18 | return std::make_shared(features_, print_config_, enable_mt_); 19 | } 20 | 21 | context::builder &context::builder::enable_print_config() { 22 | print_config_ = true; 23 | return *this; 24 | } 25 | 26 | context::builder &context::builder::enable_wakeup() { 27 | features_ |= UCP_FEATURE_WAKEUP; 28 | return *this; 29 | } 30 | 31 | context::builder &context::builder::enable_tag() { 32 | features_ |= UCP_FEATURE_TAG; 33 | return *this; 34 | } 35 | 36 | context::builder &context::builder::enable_stream() { 37 | features_ |= UCP_FEATURE_STREAM; 38 | return *this; 39 | } 40 | 41 | context::builder &context::builder::enable_am() { 42 | features_ |= UCP_FEATURE_AM; 43 | return *this; 44 | } 45 | 46 | context::builder &context::builder::enable_rma() { 47 | features_ |= UCP_FEATURE_RMA; 48 | return *this; 49 | } 50 | 51 | context::builder &context::builder::enable_amo32() { 52 | features_ |= UCP_FEATURE_AMO32; 53 | return *this; 54 | } 55 | 56 | context::builder &context::builder::enable_amo64() { 57 | features_ |= UCP_FEATURE_AMO64; 58 | return *this; 59 | } 60 | 61 | context::builder &context::builder::enable_mt() { 62 | enable_mt_ = true; 63 | return *this; 64 | } 65 | 66 | context::context(uint64_t features, bool print_config, bool enable_mt) 67 | : features_(features) { 68 | config config; 69 | ucp_params_t ucp_params; 70 | ucp_params.field_mask = UCP_PARAM_FIELD_FEATURES; 71 | ucp_params.features = features; 72 | if (enable_mt) { 73 | ucp_params.field_mask |= UCP_PARAM_FIELD_MT_WORKERS_SHARED; 74 | ucp_params.mt_workers_shared = 1; 75 | } 76 | check_ucs_status(::ucp_init(&ucp_params, config.handle(), &context_), 77 | "failed to init ucp"); 78 | if (print_config) { 79 | config.print(); 80 | } 81 | } 82 | 83 | uint64_t context::features() const { return features_; } 84 | 85 | ucp_context_h context::handle() const { return context_; } 86 | 87 | context::~context() { ::ucp_cleanup(context_); } 88 | 89 | } // namespace ucxpp -------------------------------------------------------------------------------- /src/worker.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/worker.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "ucxpp/address.h" 15 | #include "ucxpp/awaitable.h" 16 | #include "ucxpp/error.h" 17 | 18 | #include "ucxpp/detail/debug.h" 19 | 20 | namespace ucxpp { 21 | 22 | worker::worker(std::shared_ptr ctx) : ctx_(ctx), event_fd_(-1) { 23 | ucp_worker_params_t worker_params; 24 | worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; 25 | worker_params.thread_mode = UCS_THREAD_MODE_SINGLE; 26 | check_ucs_status(::ucp_worker_create(ctx->context_, &worker_params, &worker_), 27 | "failed to create ucp worker"); 28 | if (ctx_->features() & UCP_FEATURE_WAKEUP) { 29 | check_ucs_status(::ucp_worker_get_efd(worker_, &event_fd_), 30 | "failed to get ucp worker event fd"); 31 | } 32 | } 33 | 34 | int worker::event_fd() const { 35 | assert(event_fd_ != -1); 36 | return event_fd_; 37 | } 38 | 39 | std::shared_ptr worker::context_ptr() const { return ctx_; } 40 | 41 | local_address worker::get_address() const { 42 | ucp_address_t *address; 43 | size_t address_length; 44 | check_ucs_status(::ucp_worker_get_address(worker_, &address, &address_length), 45 | "failed to get address"); 46 | return ucxpp::local_address(shared_from_this(), address, address_length); 47 | } 48 | 49 | ucp_worker_h worker::handle() const { return worker_; } 50 | 51 | bool worker::progress() const { return ::ucp_worker_progress(worker_); } 52 | 53 | void worker::wait() const { 54 | check_ucs_status(::ucp_worker_wait(worker_), "failed to wait worker"); 55 | } 56 | 57 | bool worker::arm() const { 58 | auto status = ::ucp_worker_arm(worker_); 59 | if (status == UCS_ERR_BUSY) { 60 | return false; 61 | } 62 | check_ucs_status(status, "failed to arm worker"); 63 | return true; 64 | } 65 | 66 | tag_recv_awaitable worker::tag_recv(void *buffer, size_t length, ucp_tag_t tag, 67 | ucp_tag_t tag_mask) const { 68 | return tag_recv_awaitable(worker_, buffer, length, tag, tag_mask); 69 | } 70 | 71 | void worker::fence() { 72 | check_ucs_status(::ucp_worker_fence(worker_), "failed to fence worker"); 73 | } 74 | 75 | worker_flush_awaitable worker::flush() { 76 | return worker_flush_awaitable(this->shared_from_this()); 77 | } 78 | 79 | worker::~worker() { ::ucp_worker_destroy(worker_); } 80 | 81 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/worker.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "ucxpp/address.h" 11 | #include "ucxpp/awaitable.h" 12 | #include "ucxpp/context.h" 13 | 14 | namespace ucxpp { 15 | 16 | /** 17 | * @brief Abstraction for a UCX worker. 18 | * 19 | */ 20 | class worker : public std::enable_shared_from_this { 21 | friend class local_address; 22 | friend class endpoint; 23 | ucp_worker_h worker_; 24 | std::shared_ptr ctx_; 25 | int event_fd_; 26 | 27 | public: 28 | /** 29 | * @brief Construct a new worker object 30 | * 31 | * @param ctx UCX context 32 | */ 33 | worker(std::shared_ptr ctx); 34 | 35 | /** 36 | * @brief Get the event fd for the worker. The wakeup feature must be enabled 37 | * for this to work. 38 | * 39 | * @return int 40 | */ 41 | int event_fd() const; 42 | 43 | /** 44 | * @brief Get the worker's context object 45 | * 46 | * @return std::shared_ptr The worker's context object 47 | */ 48 | std::shared_ptr context_ptr() const; 49 | 50 | /** 51 | * @brief Get the worker's UCX address 52 | * 53 | * @return local_address The worker's UCX address 54 | */ 55 | local_address get_address() const; 56 | 57 | /** 58 | * @brief Get the worker's native UCX handle 59 | * 60 | * @return ucp_worker_h The worker's native UCX handle 61 | */ 62 | ucp_worker_h handle() const; 63 | 64 | /** 65 | * @brief Progress the worker 66 | * 67 | * @return true If progress was made 68 | * @return false If no progress was made 69 | */ 70 | bool progress() const; 71 | 72 | /** 73 | * @brief Wait for an event on the worker. It should be called only after a 74 | * call to progress() returns false. 75 | * 76 | */ 77 | void wait() const; 78 | 79 | /** 80 | * @brief Arm the worker for next event notification. 81 | * 82 | * @return true If the worker was armed 83 | * @return false If the worker has pending events. In this case, the user must 84 | * call progress() until it returns false. 85 | */ 86 | bool arm() const; 87 | 88 | /** 89 | * @brief Tag receive to the buffer 90 | * 91 | * @param buffer The buffer to receive to 92 | * @param length The length of the buffer 93 | * @param tag The tag to receive with 94 | * @param tag_mask The bit mask for tag matching, 0 means accepting any tag 95 | * @return tag_recv_awaitable A coroutine that returns a pair of number of 96 | * bytes received and the sender tag upon completion 97 | */ 98 | tag_recv_awaitable tag_recv(void *buffer, size_t length, ucp_tag_t tag, 99 | ucp_tag_t tag_mask = 0xFFFFFFFFFFFFFFFF) const; 100 | 101 | /** 102 | * @brief Fence the worker. Operations issued on the worker before the fence 103 | * are ensured to complete before operations issued after the fence. 104 | * 105 | */ 106 | void fence(); 107 | 108 | /** 109 | * @brief Flush the worker 110 | * 111 | * @return worker_flush_awaitable A coroutine that returns when the worker is 112 | * flushed 113 | */ 114 | worker_flush_awaitable flush(); 115 | 116 | ~worker(); 117 | }; 118 | 119 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/context.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | namespace ucxpp { 18 | 19 | /** 20 | * @brief Abstraction of a UCX context 21 | * 22 | */ 23 | class context : public std::enable_shared_from_this { 24 | friend class worker; 25 | friend class local_memory_handle; 26 | ucp_context_h context_; 27 | uint64_t features_; 28 | 29 | public: 30 | /** 31 | * @brief Context builder 32 | * 33 | */ 34 | class builder { 35 | uint64_t features_; 36 | bool print_config_; 37 | bool enable_mt_; 38 | 39 | public: 40 | builder(); 41 | /** 42 | * @brief Build and return a context object 43 | * 44 | * @return std::shared_ptr The built context object 45 | */ 46 | std::shared_ptr build(); 47 | 48 | /** 49 | * @brief Print the config to stdout when building context 50 | * 51 | * @return builder 52 | */ 53 | builder &enable_print_config(); 54 | 55 | /** 56 | * @brief Enable the wakeup feature 57 | * 58 | * @return builder& 59 | */ 60 | builder &enable_wakeup(); 61 | 62 | /** 63 | * @brief Enable tag-related operations 64 | * 65 | * @return builder& 66 | */ 67 | builder &enable_tag(); 68 | 69 | /** 70 | * @brief Enable stream-related operations 71 | * 72 | * @return builder& 73 | */ 74 | builder &enable_stream(); 75 | 76 | /** 77 | * @brief Enable active message feature 78 | * 79 | * @return builder& 80 | */ 81 | builder &enable_am(); 82 | 83 | /** 84 | * @brief Enable remote memory access feature 85 | * 86 | * @return builder& 87 | */ 88 | builder &enable_rma(); 89 | 90 | /** 91 | * @brief Enable atomic memory operations with 32-bit operands 92 | * 93 | * @return builder& 94 | */ 95 | builder &enable_amo32(); 96 | 97 | /** 98 | * @brief Enable atomic memory operations with 64-bit operands 99 | * 100 | * @return builder& 101 | */ 102 | builder &enable_amo64(); 103 | 104 | /** 105 | * @brief Enable multi-threading 106 | * 107 | * @return builder& 108 | */ 109 | builder &enable_mt(); 110 | }; 111 | 112 | /** 113 | * @brief Construct a new context object 114 | * 115 | * @param features Feature flags 116 | * @param print_config Print the config to stdout 117 | * @param enable_mt Enable multi-threading 118 | */ 119 | context(uint64_t features, bool print_config, bool enable_mt); 120 | 121 | /** 122 | * @brief Get the features of the context 123 | * 124 | * @return uint64_t Feature flags 125 | */ 126 | uint64_t features() const; 127 | 128 | /** 129 | * @brief Get the native UCX handle of the context 130 | * 131 | * @return ucp_context_h The native UCX handle 132 | */ 133 | ucp_context_h handle() const; 134 | 135 | /** 136 | * @brief Destroy the context object and release resources 137 | * 138 | */ 139 | ~context(); 140 | }; 141 | 142 | } // namespace ucxpp -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.3 FATAL_ERROR) 2 | project(ucxpp) 3 | set(CMAKE_CXX_STANDARD 20) 4 | 5 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") 6 | find_package(Threads REQUIRED) 7 | find_package(ucx REQUIRED) 8 | 9 | if (NOT CMAKE_BUILD_TYPE) 10 | set(CMAKE_BUILD_TYPE RelWithDebInfo) 11 | endif () 12 | 13 | string(LENGTH "${CMAKE_SOURCE_DIR}/" SOURCE_PATH_LENGTH) 14 | add_definitions("-DSOURCE_PATH_LENGTH=${SOURCE_PATH_LENGTH}") 15 | 16 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 17 | option(UCXPP_BUILD_EXAMPLES "Build examples" ON) 18 | else() 19 | option(UCXPP_BUILD_EXAMPLES "Build examples" OFF) 20 | endif() 21 | 22 | set(UCXPP_SOURCE_FILES 23 | src/awaitable.cc 24 | src/context.cc 25 | src/worker.cc 26 | src/endpoint.cc 27 | src/address.cc 28 | src/memory.cc 29 | src/config.cc 30 | ) 31 | 32 | add_library(ucxpp STATIC ${UCXPP_SOURCE_FILES}) 33 | set(UCXPP_LINK_LIBRARIES ucx::ucp ucx::uct ucx::ucs Threads::Threads) 34 | list(APPEND 35 | UCXPP_COMPILE_OPTIONS 36 | PUBLIC 37 | -flto 38 | PRIVATE 39 | -fno-rtti 40 | -Wall 41 | -Wextra 42 | -pedantic 43 | -Werror 44 | ) 45 | list(APPEND 46 | UCXPP_LINK_OPTIONS 47 | PUBLIC 48 | -flto 49 | PRIVATE 50 | -fno-rtti 51 | ) 52 | 53 | if (CMAKE_CXX_COMPILER_ID MATCHES Clang) 54 | list(APPEND 55 | UCXPP_COMPILE_OPTIONS 56 | PUBLIC 57 | -stdlib=libc++ 58 | PUBLIC 59 | -fcoroutines-ts 60 | ) 61 | list(APPEND 62 | UCXPP_LINK_OPTIONS 63 | PUBLIC 64 | -stdlib=libc++ 65 | ) 66 | elseif (CMAKE_CXX_COMPILER_ID MATCHES GNU) 67 | if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "11") 68 | list(APPEND 69 | UCXPP_COMPILE_OPTIONS 70 | PUBLIC 71 | -fcoroutines 72 | ) 73 | endif () 74 | endif () 75 | if (UCXPP_COMPILE_OPTIONS) 76 | target_compile_options(ucxpp ${UCXPP_COMPILE_OPTIONS}) 77 | endif () 78 | if (UCXPP_LINK_OPTIONS) 79 | target_link_options(ucxpp ${UCXPP_LINK_OPTIONS}) 80 | endif () 81 | target_link_libraries(ucxpp ${UCXPP_LINK_LIBRARIES}) 82 | target_include_directories(ucxpp PUBLIC include) 83 | 84 | set(UCXPP_EXAMPLES helloworld perftest) 85 | if (UCXPP_BUILD_EXAMPLES) 86 | set(UCXPP_EXAMPLES_LIB_SOURCE_FILES 87 | examples/socket/channel.cc 88 | examples/socket/event_loop.cc 89 | examples/socket/tcp_connection.cc 90 | examples/socket/tcp_listener.cc 91 | examples/acceptor.cc 92 | examples/connector.cc 93 | examples/worker_epoll.cc 94 | examples/ep_transmission.cc 95 | ) 96 | add_library(ucxpp_examples STATIC ${UCXPP_EXAMPLES_LIB_SOURCE_FILES}) 97 | target_include_directories(ucxpp_examples PUBLIC examples/include) 98 | target_link_libraries(ucxpp_examples PUBLIC ucxpp) 99 | target_compile_options(ucxpp_examples ${UCXPP_COMPILE_OPTIONS}) 100 | target_link_options(ucxpp_examples ${UCXPP_LINK_OPTIONS}) 101 | foreach (EXAMPLE ${UCXPP_EXAMPLES}) 102 | add_executable(${EXAMPLE} examples/${EXAMPLE}.cc) 103 | target_link_libraries(${EXAMPLE} ucm ucxpp_examples) 104 | target_compile_options(${EXAMPLE} ${UCXPP_COMPILE_OPTIONS}) 105 | target_link_options(${EXAMPLE} ${UCXPP_LINK_OPTIONS}) 106 | endforeach () 107 | endif () 108 | 109 | include(GNUInstallDirs) 110 | install(TARGETS ucxpp EXPORT ucxpp ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) 111 | install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ucxpp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 112 | -------------------------------------------------------------------------------- /include/ucxpp/task.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ucxpp/detail/debug.h" 11 | 12 | namespace ucxpp { 13 | 14 | template class value_returner { 15 | public: 16 | std::promise promise_; 17 | void return_value(T &&value) { promise_.set_value(std::forward(value)); } 18 | }; 19 | 20 | template <> class value_returner { 21 | public: 22 | std::promise promise_; 23 | void return_void() { promise_.set_value(); } 24 | }; 25 | 26 | template 27 | struct promise_base : public value_returner { 28 | std::suspend_never initial_suspend() { return {}; } 29 | auto final_suspend() noexcept { 30 | struct awaiter { 31 | std::coroutine_handle<> release_detached_; 32 | bool await_ready() noexcept { return false; } 33 | std::coroutine_handle<> 34 | await_suspend(CoroutineHandle suspended) noexcept { 35 | if (suspended.promise().continuation_) { 36 | return suspended.promise().continuation_; 37 | } else { 38 | if (release_detached_) { 39 | release_detached_.destroy(); 40 | } 41 | return std::noop_coroutine(); 42 | } 43 | } 44 | void await_resume() noexcept {} 45 | }; 46 | return awaiter{release_detached_}; 47 | } 48 | 49 | std::coroutine_handle<> continuation_; 50 | std::coroutine_handle<> release_detached_; 51 | }; 52 | 53 | template struct task { 54 | struct promise_type 55 | : public promise_base> { 56 | task get_return_object() { 57 | return std::coroutine_handle::from_promise(*this); 58 | } 59 | void unhandled_exception() { 60 | this->promise_.set_exception(std::current_exception()); 61 | } 62 | promise_type() : future_(this->promise_.get_future()) {} 63 | std::future &get_future() { return future_; } 64 | void set_detached_task(std::coroutine_handle h) { 65 | this->release_detached_ = h; 66 | } 67 | std::future future_; 68 | }; 69 | 70 | struct task_awaiter { 71 | std::coroutine_handle h_; 72 | task_awaiter(std::coroutine_handle h) : h_(h) {} 73 | bool await_ready() { return h_.done(); } 74 | auto await_suspend(std::coroutine_handle<> suspended) { 75 | h_.promise().continuation_ = suspended; 76 | } 77 | auto await_resume() { return h_.promise().future_.get(); } 78 | }; 79 | 80 | using coroutine_handle_type = std::coroutine_handle; 81 | 82 | auto operator co_await() const { return task_awaiter(h_); } 83 | 84 | ~task() { 85 | if (!detached_) { 86 | if (!h_.done()) { 87 | h_.promise().set_detached_task(h_); 88 | get_future().get(); 89 | } else { 90 | h_.destroy(); 91 | } 92 | } 93 | } 94 | task(task &&other) 95 | : h_(std::exchange(other.h_, nullptr)), 96 | detached_(std::exchange(other.detached_, true)) {} 97 | task(coroutine_handle_type h) : h_(h), detached_(false) {} 98 | coroutine_handle_type h_; 99 | bool detached_; 100 | operator coroutine_handle_type() const { return h_; } 101 | std::future &get_future() const { return h_.promise().get_future(); } 102 | void detach() { 103 | assert(!detached_); 104 | h_.promise().set_detached_task(h_); 105 | detached_ = true; 106 | } 107 | }; 108 | 109 | } // namespace ucxpp -------------------------------------------------------------------------------- /src/endpoint.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/endpoint.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "ucxpp/address.h" 12 | #include "ucxpp/awaitable.h" 13 | #include "ucxpp/error.h" 14 | 15 | #include "ucxpp/detail/debug.h" 16 | #include "ucxpp/detail/serdes.h" 17 | 18 | namespace ucxpp { 19 | 20 | void endpoint::error_cb(void *ep, ucp_ep_h ep_h, ucs_status_t status) { 21 | UCXPP_LOG_ERROR("Endpoint error: ep=%p ep_h=%p status=%s", ep, 22 | reinterpret_cast(ep_h), ::ucs_status_string(status)); 23 | auto ep_ptr = reinterpret_cast(ep); 24 | if (!ep_ptr->close_request_) { 25 | auto request = ::ucp_ep_close_nb(ep_h, UCP_EP_CLOSE_MODE_FLUSH); 26 | if (UCS_PTR_IS_ERR(request)) { 27 | UCXPP_LOG_ERROR( 28 | "ep=%p ep_h=%p close failed: %s", ep, reinterpret_cast(ep_h), 29 | ::ucs_status_string(UCS_PTR_STATUS(ep_ptr->close_request_))); 30 | ep_ptr->close_request_ = nullptr; 31 | ep_ptr->ep_ = nullptr; 32 | } else if (UCS_PTR_IS_PTR(request)) { 33 | ep_ptr->close_request_ = request; 34 | } else { 35 | ep_ptr->close_request_ = nullptr; 36 | ep_ptr->ep_ = nullptr; 37 | } 38 | } 39 | } 40 | 41 | endpoint::endpoint(std::shared_ptr worker, remote_address const &peer) 42 | : worker_(worker), peer_(peer) { 43 | ucp_ep_params_t ep_params; 44 | ep_params.field_mask = 45 | UCP_EP_PARAM_FIELD_REMOTE_ADDRESS | UCP_EP_PARAM_FIELD_ERR_HANDLER; 46 | ep_params.address = peer.get_address(); 47 | ep_params.err_handler.cb = &error_cb; 48 | ep_params.err_handler.arg = this; 49 | check_ucs_status(::ucp_ep_create(worker_->worker_, &ep_params, &ep_), 50 | "failed to create ep"); 51 | } 52 | 53 | std::shared_ptr endpoint::worker_ptr() const { return worker_; } 54 | 55 | void endpoint::print() const { ::ucp_ep_print_info(ep_, stdout); } 56 | 57 | ucp_ep_h endpoint::handle() const { return ep_; } 58 | 59 | const remote_address &endpoint::get_address() const { return peer_; } 60 | 61 | stream_send_awaitable endpoint::stream_send(void const *buffer, 62 | size_t length) const { 63 | return stream_send_awaitable(ep_, buffer, length); 64 | } 65 | 66 | stream_recv_awaitable endpoint::stream_recv(void *buffer, size_t length) const { 67 | return stream_recv_awaitable(ep_, buffer, length); 68 | } 69 | 70 | tag_send_awaitable endpoint::tag_send(void const *buffer, size_t length, 71 | ucp_tag_t tag) const { 72 | return tag_send_awaitable(ep_, buffer, length, tag); 73 | } 74 | 75 | ep_flush_awaitable endpoint::flush() const { 76 | return ep_flush_awaitable(this->shared_from_this()); 77 | } 78 | 79 | task endpoint::close() { 80 | co_await ep_close_awaitable(this->shared_from_this()); 81 | ep_ = nullptr; 82 | co_return; 83 | } 84 | 85 | void endpoint::close_cb(void *request, ucs_status_t status, void *user_data) { 86 | UCXPP_LOG_DEBUG("endpoint closed request=%p status=%s user_data=%p", request, 87 | ::ucs_status_string(status), user_data); 88 | ::ucp_request_free(request); 89 | } 90 | 91 | endpoint::~endpoint() { 92 | if (ep_ != nullptr && close_request_ == nullptr) { 93 | ucp_request_param_t param; 94 | param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK; 95 | param.cb.send = &close_cb; 96 | ::ucp_ep_close_nbx(ep_, ¶m); 97 | } 98 | } 99 | 100 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/address.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "ucxpp/task.h" 9 | 10 | #include "ucxpp/detail/noncopyable.h" 11 | namespace ucxpp { 12 | 13 | class worker; 14 | 15 | /** 16 | * @brief Represents a local UCX address. 17 | * 18 | */ 19 | class local_address : public noncopyable { 20 | std::shared_ptr worker_; 21 | ucp_address_t *address_; 22 | size_t address_length_; 23 | friend class endpoint; 24 | 25 | public: 26 | /** 27 | * @brief Construct a new local address object 28 | * 29 | * @param worker UCX worker 30 | * @param address UCP address 31 | * @param address_length UCP address length 32 | */ 33 | local_address(std::shared_ptr worker, ucp_address_t *address, 34 | size_t address_length); 35 | 36 | /** 37 | * @brief Construct a new local address object 38 | * 39 | * @param other Another local address object to move from 40 | */ 41 | local_address(local_address &&other); 42 | 43 | /** 44 | * @brief Move assignment operator 45 | * 46 | * @param other Another local address object to move from 47 | * @return local_address& This object 48 | */ 49 | local_address &operator=(local_address &&other); 50 | 51 | /** 52 | * @brief Serialize the address to a buffer ready to be sent to a remote peer 53 | * 54 | * @return std::vector The serialized address 55 | */ 56 | std::vector serialize() const; 57 | 58 | /** 59 | * @brief Get the UCP address 60 | * 61 | * @return const ucp_address_t* The UCP address 62 | */ 63 | const ucp_address_t *get_address() const; 64 | 65 | /** 66 | * @brief Get the length of the address 67 | * 68 | * @return size_t The address length 69 | */ 70 | size_t get_length() const; 71 | 72 | /** 73 | * @brief Destroy the local address object and release the buffer 74 | * 75 | */ 76 | ~local_address(); 77 | }; 78 | 79 | /** 80 | * @brief Represents a remote UCX address. 81 | * 82 | */ 83 | class remote_address { 84 | std::vector address_; 85 | 86 | public: 87 | /** 88 | * @brief Construct a new remote address object 89 | * 90 | * @param address The received address buffer 91 | */ 92 | remote_address(std::vector const &address); 93 | 94 | /** 95 | * @brief Construct a new remote address object 96 | * 97 | * @param address Another remote address object to move from 98 | */ 99 | remote_address(std::vector &&address); 100 | 101 | /** 102 | * @brief Move construct a new remote address object 103 | * 104 | * @param other Another remote address object to move from 105 | */ 106 | remote_address(remote_address &&other) = default; 107 | 108 | /** 109 | * @brief Construct a new remote address object 110 | * 111 | * @param other Another remote address object to copy from 112 | */ 113 | remote_address(remote_address const &other) = default; 114 | 115 | /** 116 | * @brief Copy assignment operator 117 | * 118 | * @param other Another remote address object to copy from 119 | * @return remote_address& This object 120 | */ 121 | remote_address &operator=(remote_address const &other) = default; 122 | 123 | /** 124 | * @brief Move assignment operator 125 | * 126 | * @param other Another remote address object to move from 127 | * @return remote_address& This object 128 | */ 129 | remote_address &operator=(remote_address &&other) = default; 130 | 131 | /** 132 | * @brief Get the UCP address 133 | * 134 | * @return const ucp_address_t* The UCP address 135 | */ 136 | const ucp_address_t *get_address() const; 137 | 138 | /** 139 | * @brief Get the length of the address 140 | * 141 | * @return size_t The length of the address 142 | */ 143 | size_t get_length() const; 144 | }; 145 | 146 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/endpoint.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "ucxpp/address.h" 12 | #include "ucxpp/awaitable.h" 13 | #include "ucxpp/error.h" 14 | #include "ucxpp/memory.h" 15 | #include "ucxpp/task.h" 16 | #include "ucxpp/worker.h" 17 | 18 | #include "ucxpp/detail/noncopyable.h" 19 | 20 | namespace ucxpp { 21 | 22 | /** 23 | * @brief Abstraction for a UCX endpoint. 24 | * 25 | */ 26 | class endpoint : public noncopyable, 27 | public std::enable_shared_from_this { 28 | friend class worker; 29 | friend class local_memory_handle; 30 | friend class remote_memory_handle; 31 | friend class ep_close_awaitable; 32 | std::shared_ptr worker_; 33 | ucp_ep_h ep_; 34 | void *close_request_; 35 | remote_address peer_; 36 | 37 | public: 38 | /** 39 | * @brief Construct a new endpoint object 40 | * 41 | * @param worker UCX worker 42 | * @param peer Remote UCX address 43 | */ 44 | endpoint(std::shared_ptr worker, remote_address const &peer); 45 | 46 | /** 47 | * @brief Error handler for all endpoints 48 | * 49 | * @param ep endpoint object 50 | * @param ep_h UCX endpoint handle 51 | * @param status error status 52 | */ 53 | static void error_cb(void *ep, ucp_ep_h ep_h, ucs_status_t status); 54 | 55 | /** 56 | * @brief Get the worker object 57 | * 58 | * @return std::shared_ptr The endpoint's worker 59 | */ 60 | std::shared_ptr worker_ptr() const; 61 | 62 | /** 63 | * @brief Print the endpoint's information 64 | * 65 | */ 66 | void print() const; 67 | 68 | /** 69 | * @brief Get the endpoint's native UCX handle 70 | * 71 | * @return ucp_ep_h The endpoint's native UCX handle 72 | */ 73 | ucp_ep_h handle() const; 74 | 75 | /** 76 | * @brief Get the endpoint's remote address 77 | * 78 | * @return remote_address The endpoint's remote address 79 | */ 80 | const remote_address &get_address() const; 81 | 82 | /** 83 | * @brief Stream send the buffer 84 | * 85 | * @param buffer The buffer to send 86 | * @param length The length of the buffer 87 | * @return stream_send_awaitable A coroutine that returns upon completion 88 | */ 89 | stream_send_awaitable stream_send(void const *buffer, size_t length) const; 90 | 91 | /** 92 | * @brief Stream receive to the buffer 93 | * 94 | * @param buffer The buffer to receive to 95 | * @param length The length of the buffer 96 | * @return stream_recv_awaitable A coroutine that returns number of bytes 97 | * received upon completion 98 | */ 99 | stream_recv_awaitable stream_recv(void *buffer, size_t length) const; 100 | 101 | /** 102 | * @brief Tag send the buffer 103 | * 104 | * @param buffer The buffer to send 105 | * @param length The length of the buffer 106 | * @param tag The tag to send with 107 | * @return tag_send_awaitable A coroutine that returns upon completion 108 | */ 109 | tag_send_awaitable tag_send(void const *buffer, size_t length, 110 | ucp_tag_t tag) const; 111 | 112 | /** 113 | * @brief Flush the endpoint 114 | * 115 | * @return ep_flush_awaitable A coroutine that returns upon completion 116 | */ 117 | ep_flush_awaitable flush() const; 118 | 119 | /** 120 | * @brief Close the endpoint. You should not use the endpoint after calling 121 | * this function. 122 | * 123 | * @return task A coroutine that returns upon completion 124 | */ 125 | task close(); 126 | 127 | /** 128 | * @brief Endpoint close callback 129 | * 130 | * @param request UCX request handle 131 | * @param status UCX status 132 | * @param user_data User data 133 | */ 134 | static void close_cb(void *request, ucs_status_t status, void *user_data); 135 | 136 | /** 137 | * @brief Destroy the endpoint object. If the endpoint is not closed yet, it 138 | * will be closed. 139 | * 140 | */ 141 | ~endpoint(); 142 | }; 143 | 144 | } // namespace ucxpp 145 | 146 | /** \example helloworld.cc 147 | * This is an example of how to use the endpoint class. 148 | */ -------------------------------------------------------------------------------- /examples/socket/tcp_listener.cc: -------------------------------------------------------------------------------- 1 | #include "socket/tcp_listener.h" 2 | 3 | #include "socket/channel.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ucxpp/error.h" 13 | 14 | #include "ucxpp/detail/debug.h" 15 | 16 | namespace ucxpp { 17 | namespace socket { 18 | static inline void *get_in_addr(struct sockaddr *sa) { 19 | if (sa->sa_family == AF_INET) { 20 | return &(((struct sockaddr_in *)sa)->sin_addr); 21 | } 22 | 23 | return &(((struct sockaddr_in6 *)sa)->sin6_addr); 24 | } 25 | 26 | static inline uint16_t get_in_port(struct sockaddr *sa) { 27 | if (sa->sa_family == AF_INET) { 28 | return (((struct sockaddr_in *)sa)->sin_port); 29 | } 30 | 31 | return (((struct sockaddr_in6 *)sa)->sin6_port); 32 | } 33 | 34 | std::string get_in_addr_string(struct addrinfo *ai) { 35 | char s[INET6_ADDRSTRLEN]; 36 | inet_ntop(ai->ai_family, get_in_addr(ai->ai_addr), s, sizeof(s)); 37 | return s; 38 | } 39 | 40 | std::string get_in_addr_string(struct sockaddr_storage *ss) { 41 | char s[INET6_ADDRSTRLEN]; 42 | inet_ntop(ss->ss_family, get_in_addr(reinterpret_cast(ss)), 43 | s, sizeof(s)); 44 | return s; 45 | } 46 | 47 | int tcp_listener::accept_awaitable::do_io() { 48 | struct sockaddr_storage client_addr = {}; 49 | socklen_t client_addr_len = sizeof(client_addr); 50 | int client_fd = ::accept4(channel_->fd(), 51 | reinterpret_cast(&client_addr), 52 | &client_addr_len, SOCK_CLOEXEC | SOCK_NONBLOCK); 53 | if (client_fd < 0) { 54 | if (errno == EAGAIN || errno == EWOULDBLOCK) { 55 | return client_fd; 56 | } 57 | } 58 | check_errno(client_fd, "failed to accept"); 59 | auto const &client_ip = get_in_addr_string(&client_addr); 60 | UCXPP_LOG_DEBUG( 61 | "accepted connection from %s:%d fd=%d", client_ip.c_str(), 62 | get_in_port(reinterpret_cast(&client_addr)), 63 | client_fd); 64 | return client_fd; 65 | } 66 | 67 | tcp_listener::accept_awaitable::accept_awaitable( 68 | std::shared_ptr channel) 69 | : channel_(channel), client_fd_(-1) {} 70 | 71 | bool tcp_listener::accept_awaitable::await_ready() { 72 | client_fd_ = do_io(); 73 | return client_fd_ > 0; 74 | } 75 | 76 | void tcp_listener::accept_awaitable::await_suspend(std::coroutine_handle<> h) { 77 | channel_->set_readable_callback([h]() { h.resume(); }); 78 | channel_->wait_readable(); 79 | } 80 | 81 | std::shared_ptr tcp_listener::accept_awaitable::await_resume() { 82 | if (client_fd_ < 0) { 83 | client_fd_ = do_io(); 84 | } 85 | check_errno(client_fd_, "could not accept after readable"); 86 | auto channel_ptr = std::make_shared(client_fd_, channel_->loop()); 87 | channel_ptr->set_nonblocking(); 88 | return channel_ptr; 89 | } 90 | 91 | tcp_listener::tcp_listener(std::shared_ptr loop, 92 | std::string const &hostname, uint16_t port) { 93 | std::string port_str = std::to_string(port); 94 | int fd = ::socket(AF_INET, SOCK_STREAM, 0); 95 | 96 | check_errno(fd, "failed to create socket"); 97 | { 98 | int32_t yes = 1; 99 | check_rc(::setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)), 100 | "failed to set reuse address"); 101 | } 102 | 103 | struct addrinfo hints, *servinfo, *p; 104 | ::bzero(&hints, sizeof(hints)); 105 | hints.ai_family = AF_INET; 106 | hints.ai_socktype = SOCK_STREAM; 107 | hints.ai_flags = AI_PASSIVE; 108 | 109 | if (auto rc = getaddrinfo(hostname.empty() ? nullptr : hostname.c_str(), 110 | port_str.c_str(), &hints, &servinfo); 111 | rc != 0) { 112 | throw_with("getaddrinfo: %s", gai_strerror(rc)); 113 | } 114 | 115 | for (p = servinfo; p != nullptr; p = p->ai_next) { 116 | try { 117 | auto const &ip = get_in_addr_string(p); 118 | UCXPP_LOG_DEBUG("binding %s:%d", ip.c_str(), port); 119 | check_errno(::bind(fd, p->ai_addr, p->ai_addrlen), "failed to bind"); 120 | } catch (std::runtime_error &e) { 121 | UCXPP_LOG_ERROR("%s", e.what()); 122 | continue; 123 | } 124 | break; 125 | } 126 | ::freeaddrinfo(servinfo); 127 | check_ptr(p, "failed to bind"); 128 | check_errno(::listen(fd, 128), "failed to listen"); 129 | channel_ = std::make_shared(fd, loop); 130 | channel_->set_nonblocking(); 131 | UCXPP_LOG_DEBUG("acceptor fd %d listening on %d", fd, port); 132 | } 133 | 134 | tcp_listener::accept_awaitable tcp_listener::accept() { 135 | return accept_awaitable{channel_}; 136 | } 137 | 138 | } // namespace socket 139 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/socket/tcp_connection.cc: -------------------------------------------------------------------------------- 1 | #include "socket/tcp_connection.h" 2 | 3 | #include "socket/channel.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ucxpp/error.h" 11 | 12 | #include "ucxpp/detail/debug.h" 13 | 14 | namespace ucxpp { 15 | namespace socket { 16 | 17 | tcp_connection::tcp_connection(std::shared_ptr channel) 18 | : channel_(channel) {} 19 | 20 | tcp_connection::connect_awaitable 21 | tcp_connection::connect(std::shared_ptr loop, 22 | const std::string &hostname, uint16_t port) { 23 | return connect_awaitable(loop, hostname, port); 24 | } 25 | 26 | tcp_connection::connect_awaitable::connect_awaitable( 27 | std::shared_ptr loop, std::string const &hostname, 28 | uint16_t port) 29 | : rc_(-1) { 30 | struct addrinfo hints, *servinfo, *p; 31 | auto const port_str = std::to_string(port); 32 | bzero(&hints, sizeof(hints)); 33 | hints.ai_family = AF_UNSPEC; 34 | hints.ai_socktype = SOCK_STREAM; 35 | if (auto rc = 36 | ::getaddrinfo(hostname.c_str(), port_str.c_str(), &hints, &servinfo); 37 | rc != 0) { 38 | throw_with("failed to getaddrinfo: %s", ::gai_strerror(rc)); 39 | } 40 | for (p = servinfo; p != nullptr; p = p->ai_next) { 41 | int fd = ::socket(p->ai_family, p->ai_socktype, p->ai_protocol); 42 | if (fd == -1) { 43 | UCXPP_LOG_ERROR("failed to create socket: %s(errno=%d)", strerror(errno), 44 | errno); 45 | continue; 46 | } 47 | auto channel = std::make_shared(fd, loop); 48 | try { 49 | channel->set_nonblocking(); 50 | } catch (std::runtime_error &e) { 51 | UCXPP_LOG_ERROR("%s", e.what()); 52 | continue; 53 | } 54 | rc_ = ::connect(channel->fd(), p->ai_addr, p->ai_addrlen); 55 | if (rc_ < 0) { 56 | if (errno != EINPROGRESS) { 57 | UCXPP_LOG_ERROR("failed to connect: %s(errno=%d)", strerror(errno), 58 | errno); 59 | continue; 60 | } 61 | } 62 | channel_ = channel; 63 | break; 64 | } 65 | freeaddrinfo(servinfo); 66 | if (p == nullptr) { 67 | throw_with("failed to connect"); 68 | } 69 | } 70 | 71 | bool tcp_connection::connect_awaitable::await_ready() { return rc_ == 0; } 72 | 73 | void tcp_connection::connect_awaitable::await_suspend( 74 | std::coroutine_handle<> h) { 75 | channel_->set_writable_callback([h]() { h.resume(); }); 76 | channel_->wait_writable(); 77 | } 78 | 79 | std::shared_ptr 80 | tcp_connection::connect_awaitable::await_resume() { 81 | int err = 0; 82 | socklen_t len = sizeof(err); 83 | int status = ::getsockopt(channel_->fd(), SOL_SOCKET, SO_ERROR, &err, &len); 84 | check_errno(status, "failed to get socket error"); 85 | check_rc(err, "failed to connect"); 86 | UCXPP_LOG_DEBUG("fd %d connected", channel_->fd()); 87 | return std::make_shared(channel_); 88 | } 89 | 90 | tcp_connection::rw_awaitable tcp_connection::recv(void *buffer, size_t length) { 91 | return rw_awaitable(channel_, false, buffer, length); 92 | } 93 | 94 | tcp_connection::rw_awaitable tcp_connection::send(const void *buffer, 95 | size_t length) { 96 | return rw_awaitable(channel_, true, const_cast(buffer), length); 97 | } 98 | 99 | tcp_connection::rw_awaitable::rw_awaitable(std::shared_ptr channel, 100 | bool write, void *buffer, 101 | size_t length) 102 | : channel_(channel), buffer_(buffer), n_(-1), length_(length), 103 | write_(write) {} 104 | 105 | int tcp_connection::rw_awaitable::do_io() { 106 | int n = -1; 107 | if (write_) { 108 | n = ::write(channel_->fd(), buffer_, length_); 109 | } else { 110 | n = ::read(channel_->fd(), buffer_, length_); 111 | } 112 | return n; 113 | } 114 | 115 | bool tcp_connection::rw_awaitable::await_ready() { 116 | n_ = do_io(); 117 | if (n_ >= 0) { 118 | return true; 119 | } 120 | if (errno == EAGAIN || errno == EWOULDBLOCK) { 121 | return false; 122 | } else { 123 | check_errno(n_, "failed to read write"); 124 | } 125 | return false; 126 | } 127 | 128 | void tcp_connection::rw_awaitable::await_suspend(std::coroutine_handle<> h) { 129 | auto &&callback = [h]() { h.resume(); }; 130 | if (write_) { 131 | channel_->set_writable_callback(callback); 132 | channel_->wait_writable(); 133 | } else { 134 | channel_->set_readable_callback(callback); 135 | channel_->wait_readable(); 136 | } 137 | } 138 | 139 | int tcp_connection::rw_awaitable::await_resume() { 140 | if (n_ < 0) { 141 | n_ = do_io(); 142 | check_errno(n_, "failed to io after readable or writable"); 143 | } 144 | return n_; 145 | } 146 | 147 | } // namespace socket 148 | } // namespace ucxpp -------------------------------------------------------------------------------- /src/memory.cc: -------------------------------------------------------------------------------- 1 | #include "ucxpp/memory.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "ucxpp/endpoint.h" 11 | #include "ucxpp/error.h" 12 | 13 | namespace ucxpp { 14 | 15 | local_memory_handle::local_memory_handle(std::shared_ptr ctx, 16 | ucp_mem_h mem) 17 | : ctx_(ctx), mem_(mem) {} 18 | 19 | local_memory_handle::local_memory_handle(local_memory_handle &&other) 20 | : ctx_(std::move(other.ctx_)), mem_(std::exchange(other.mem_, nullptr)) {} 21 | 22 | local_memory_handle 23 | local_memory_handle::register_mem(std::shared_ptr ctx, void *address, 24 | size_t length) { 25 | ucp_mem_h mem; 26 | ucp_mem_map_params_t map_params; 27 | map_params.address = address; 28 | map_params.length = length; 29 | map_params.field_mask = 30 | UCP_MEM_MAP_PARAM_FIELD_ADDRESS | UCP_MEM_MAP_PARAM_FIELD_LENGTH; 31 | check_ucs_status(::ucp_mem_map(ctx->context_, &map_params, &mem), 32 | "failed to map memory"); 33 | 34 | return local_memory_handle(ctx, mem); 35 | } 36 | 37 | std::pair 38 | local_memory_handle::allocate_mem(std::shared_ptr ctx, size_t length) { 39 | ucp_mem_h mem; 40 | ucp_mem_attr_t attr; 41 | ucp_mem_map_params_t map_params; 42 | map_params.address = nullptr; 43 | map_params.length = length; 44 | map_params.flags = UCP_MEM_MAP_ALLOCATE; 45 | map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS | 46 | UCP_MEM_MAP_PARAM_FIELD_LENGTH | 47 | UCP_MEM_MAP_PARAM_FIELD_FLAGS; 48 | check_ucs_status(::ucp_mem_map(ctx->context_, &map_params, &mem), 49 | "failed to map memory"); 50 | attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS; 51 | check_ucs_status(::ucp_mem_query(mem, &attr), "failed to get memory address"); 52 | return std::make_pair(attr.address, local_memory_handle(ctx, mem)); 53 | } 54 | 55 | packed_memory_rkey local_memory_handle::pack_rkey() const { 56 | void *buffer; 57 | size_t length; 58 | check_ucs_status(::ucp_rkey_pack(ctx_->context_, mem_, &buffer, &length), 59 | "failed to pack memory"); 60 | return packed_memory_rkey(buffer, length); 61 | } 62 | 63 | ucp_mem_h local_memory_handle::handle() const { return mem_; } 64 | 65 | local_memory_handle::~local_memory_handle() { 66 | if (mem_ != nullptr) { 67 | (void)::ucp_mem_unmap(ctx_->context_, mem_); 68 | } 69 | } 70 | 71 | packed_memory_rkey::packed_memory_rkey(void *buffer, size_t length) 72 | : buffer_(buffer), length_(length) {} 73 | 74 | packed_memory_rkey::packed_memory_rkey(packed_memory_rkey &&other) 75 | : buffer_(std::exchange(other.buffer_, nullptr)), 76 | length_(std::exchange(other.length_, 0)) {} 77 | 78 | void *packed_memory_rkey::get_buffer() { return buffer_; } 79 | 80 | void const *packed_memory_rkey::get_buffer() const { return buffer_; } 81 | 82 | size_t packed_memory_rkey::get_length() const { return length_; } 83 | 84 | packed_memory_rkey::~packed_memory_rkey() { 85 | if (buffer_ != nullptr) { 86 | ::ucp_rkey_buffer_release(buffer_); 87 | } 88 | } 89 | 90 | remote_memory_handle::remote_memory_handle(std::shared_ptr endpoint, 91 | void const *packed_rkey_buffer) 92 | : endpoint_(endpoint), ep_(endpoint->handle()) { 93 | check_ucs_status( 94 | ::ucp_ep_rkey_unpack(endpoint_->handle(), packed_rkey_buffer, &rkey_), 95 | "failed to unpack memory"); 96 | } 97 | 98 | remote_memory_handle::remote_memory_handle(remote_memory_handle &&other) 99 | : endpoint_(std::move(other.endpoint_)), 100 | ep_(std::exchange(other.ep_, nullptr)), 101 | rkey_(std::exchange(other.rkey_, nullptr)) {} 102 | 103 | std::shared_ptr remote_memory_handle::endpoint_ptr() const { 104 | return endpoint_; 105 | } 106 | 107 | ucp_rkey_h remote_memory_handle::handle() const { return rkey_; } 108 | 109 | rma_put_awaitable remote_memory_handle::put(void const *buffer, size_t length, 110 | uint64_t raddr) const { 111 | return rma_put_awaitable(ep_, buffer, length, raddr, rkey_); 112 | } 113 | 114 | rma_get_awaitable remote_memory_handle::get(void *buffer, size_t length, 115 | uint64_t raddr) const { 116 | return rma_get_awaitable(ep_, buffer, length, raddr, rkey_); 117 | } 118 | 119 | rma_put_awaitable remote_memory_handle::write(void const *buffer, size_t length, 120 | uint64_t raddr) const { 121 | return rma_put_awaitable(ep_, buffer, length, raddr, rkey_); 122 | } 123 | 124 | rma_get_awaitable remote_memory_handle::read(void *buffer, size_t length, 125 | uint64_t raddr) const { 126 | return rma_get_awaitable(ep_, buffer, length, raddr, rkey_); 127 | } 128 | 129 | remote_memory_handle::~remote_memory_handle() { 130 | if (rkey_ != nullptr) { 131 | ::ucp_rkey_destroy(rkey_); 132 | } 133 | } 134 | 135 | } // namespace ucxpp -------------------------------------------------------------------------------- /examples/socket/event_loop.cc: -------------------------------------------------------------------------------- 1 | #include "socket/event_loop.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "ucxpp/error.h" 16 | 17 | #include "ucxpp/detail/debug.h" 18 | 19 | namespace ucxpp { 20 | namespace socket { 21 | 22 | static inline std::string events_string(int events) { 23 | std::vector parts; 24 | if (events & EPOLLIN) { 25 | parts.emplace_back("EPOLLIN"); 26 | } 27 | if (events & EPOLLPRI) { 28 | parts.emplace_back("EPOLLPRI"); 29 | } 30 | if (events & EPOLLOUT) { 31 | parts.emplace_back("EPOLLOUT"); 32 | } 33 | if (events & EPOLLERR) { 34 | parts.emplace_back("EPOLLERR"); 35 | } 36 | if (events & EPOLLHUP) { 37 | parts.emplace_back("EPOLLHUP"); 38 | } 39 | auto str = std::string(); 40 | bool first = true; 41 | for (auto &&part : parts) { 42 | if (!first) 43 | str += " | "; 44 | str += part; 45 | first = false; 46 | } 47 | return str; 48 | } 49 | 50 | event_loop::event_loop(size_t max_events) 51 | : epoll_fd_(-1), close_event_fd_(-1), max_events_(max_events), 52 | events_(max_events) { 53 | epoll_fd_ = ::epoll_create1(EPOLL_CLOEXEC); 54 | check_errno(epoll_fd_, "failed to create epoll fd"); 55 | close_event_fd_ = ::eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 56 | check_errno(close_event_fd_, "failed to create close event fd"); 57 | struct epoll_event event; 58 | event.events = EPOLLIN | EPOLLERR; 59 | event.data.fd = close_event_fd_; 60 | check_errno(::epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, close_event_fd_, &event), 61 | "failed to add close event fd to epoll"); 62 | } 63 | 64 | std::shared_ptr event_loop::new_loop(size_t max_events) { 65 | return std::make_shared(max_events); 66 | } 67 | 68 | void event_loop::register_channel(std::shared_ptr channel, 69 | struct epoll_event *event) { 70 | assert(epoll_fd_ > 0); 71 | UCXPP_LOG_TRACE("epoll add fd=%d events=%s", channel->fd(), 72 | events_string(event->events).c_str()); 73 | { 74 | std::lock_guard lock(mutex_); 75 | channels_.insert(std::make_pair(channel->fd(), channel)); 76 | } 77 | auto rc = ::epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, channel->fd(), event); 78 | try { 79 | check_errno(rc, "failed to add fd to epoll"); 80 | } catch (...) { 81 | std::lock_guard lock(mutex_); 82 | channels_.erase(channel->fd()); 83 | throw; 84 | } 85 | } 86 | 87 | void event_loop::register_read(std::shared_ptr channel) { 88 | struct epoll_event event; 89 | event.data.fd = channel->fd(); 90 | event.events = EPOLLIN | EPOLLPRI; 91 | register_channel(channel, &event); 92 | } 93 | 94 | void event_loop::register_write(std::shared_ptr channel) { 95 | struct epoll_event event; 96 | event.data.fd = channel->fd(); 97 | event.events = EPOLLOUT; 98 | register_channel(channel, &event); 99 | } 100 | 101 | void event_loop::deregister(socket::channel &channel) { 102 | assert(epoll_fd_ > 0); 103 | struct epoll_event event; 104 | ::bzero(&event, sizeof(event)); 105 | auto rc = ::epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, channel.fd(), &event); 106 | if (rc < 0 && errno != ENOENT) { 107 | check_errno(rc, "failed to remove fd from epoll"); 108 | } 109 | { 110 | std::lock_guard lock(mutex_); 111 | channels_.erase(channel.fd()); 112 | } 113 | } 114 | 115 | void event_loop::poll(bool &close_triggered) { 116 | int nr_events = ::epoll_wait(epoll_fd_, &events_[0], max_events_, 1); 117 | if (nr_events < 0 && errno == EINTR) [[unlikely]] { 118 | return; 119 | } 120 | check_errno(nr_events, "failed to epoll wait"); 121 | for (int i = 0; i < nr_events; ++i) { 122 | auto &event = events_[i]; 123 | auto fd = event.data.fd; 124 | UCXPP_LOG_TRACE("fd: %d events: %s", fd, 125 | events_string(event.events).c_str()); 126 | if (event.data.fd == close_event_fd_) { 127 | close_triggered = true; 128 | continue; 129 | } 130 | auto channel = [&]() { 131 | std::shared_lock lock(mutex_); 132 | auto it = channels_.find(fd); 133 | assert(it != channels_.end()); 134 | return it->second.lock(); 135 | }(); 136 | if (channel) { 137 | if (event.events & EPOLLIN || event.events & EPOLLERR) { 138 | channel->readable_callback(); 139 | if (event.events & EPOLLERR) { 140 | std::lock_guard lock(mutex_); 141 | channels_.erase(fd); 142 | } 143 | } 144 | if (event.events & EPOLLOUT || event.events & EPOLLERR) { 145 | channel->writable_callback(); 146 | if (event.events & EPOLLERR) { 147 | std::lock_guard lock(mutex_); 148 | channels_.erase(fd); 149 | } 150 | } 151 | } else { 152 | std::lock_guard lock(mutex_); 153 | channels_.erase(fd); 154 | } 155 | } 156 | } 157 | 158 | void event_loop::loop() { 159 | bool close_triggered = false; 160 | while (!close_triggered) { 161 | poll(close_triggered); 162 | } 163 | } 164 | 165 | void event_loop::close() { 166 | uint64_t one = 1; 167 | check_errno(::write(close_event_fd_, &one, sizeof(one)), 168 | "failed to write event fd"); 169 | } 170 | 171 | event_loop::~event_loop() { 172 | if (close_event_fd_ > 0) { 173 | if (auto rc = ::close(close_event_fd_); rc != 0) { 174 | UCXPP_LOG_ERROR("failed to close event fd %d: %s (errno=%d)", 175 | close_event_fd_, strerror(errno), errno); 176 | } else { 177 | UCXPP_LOG_TRACE("closed event fd %d", close_event_fd_); 178 | } 179 | } 180 | if (epoll_fd_ > 0) { 181 | if (auto rc = ::close(epoll_fd_); rc != 0) { 182 | UCXPP_LOG_ERROR("failed to close epoll fd %d: %s (errno=%d)", epoll_fd_, 183 | strerror(errno), errno); 184 | } else { 185 | UCXPP_LOG_TRACE("closed epoll fd %d", epoll_fd_); 186 | } 187 | } 188 | } 189 | 190 | } // namespace socket 191 | 192 | } // namespace ucxpp -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: LLVM 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: Align 6 | AlignArrayOfStructures: None 7 | AlignConsecutiveMacros: None 8 | AlignConsecutiveAssignments: None 9 | AlignConsecutiveBitFields: None 10 | AlignConsecutiveDeclarations: None 11 | AlignEscapedNewlines: Right 12 | AlignOperands: Align 13 | AlignTrailingComments: true 14 | AllowAllArgumentsOnNextLine: true 15 | AllowAllParametersOfDeclarationOnNextLine: true 16 | AllowShortEnumsOnASingleLine: true 17 | AllowShortBlocksOnASingleLine: Never 18 | AllowShortCaseLabelsOnASingleLine: false 19 | AllowShortFunctionsOnASingleLine: All 20 | AllowShortLambdasOnASingleLine: All 21 | AllowShortIfStatementsOnASingleLine: Never 22 | AllowShortLoopsOnASingleLine: false 23 | AlwaysBreakAfterDefinitionReturnType: None 24 | AlwaysBreakAfterReturnType: None 25 | AlwaysBreakBeforeMultilineStrings: false 26 | AlwaysBreakTemplateDeclarations: MultiLine 27 | AttributeMacros: 28 | - __capability 29 | BinPackArguments: true 30 | BinPackParameters: true 31 | BraceWrapping: 32 | AfterCaseLabel: false 33 | AfterClass: false 34 | AfterControlStatement: Never 35 | AfterEnum: false 36 | AfterFunction: false 37 | AfterNamespace: false 38 | AfterObjCDeclaration: false 39 | AfterStruct: false 40 | AfterUnion: false 41 | AfterExternBlock: false 42 | BeforeCatch: false 43 | BeforeElse: false 44 | BeforeLambdaBody: false 45 | BeforeWhile: false 46 | IndentBraces: false 47 | SplitEmptyFunction: true 48 | SplitEmptyRecord: true 49 | SplitEmptyNamespace: true 50 | BreakBeforeBinaryOperators: None 51 | BreakBeforeConceptDeclarations: true 52 | BreakBeforeBraces: Attach 53 | BreakBeforeInheritanceComma: false 54 | BreakInheritanceList: BeforeColon 55 | BreakBeforeTernaryOperators: true 56 | BreakConstructorInitializersBeforeComma: false 57 | BreakConstructorInitializers: BeforeColon 58 | BreakAfterJavaFieldAnnotations: false 59 | BreakStringLiterals: true 60 | ColumnLimit: 80 61 | CommentPragmas: '^ IWYU pragma:' 62 | QualifierAlignment: Leave 63 | CompactNamespaces: false 64 | ConstructorInitializerIndentWidth: 4 65 | ContinuationIndentWidth: 4 66 | Cpp11BracedListStyle: true 67 | DeriveLineEnding: true 68 | DerivePointerAlignment: false 69 | DisableFormat: false 70 | EmptyLineAfterAccessModifier: Never 71 | EmptyLineBeforeAccessModifier: LogicalBlock 72 | ExperimentalAutoDetectBinPacking: false 73 | PackConstructorInitializers: BinPack 74 | BasedOnStyle: '' 75 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 76 | AllowAllConstructorInitializersOnNextLine: true 77 | FixNamespaceComments: true 78 | ForEachMacros: 79 | - foreach 80 | - Q_FOREACH 81 | - BOOST_FOREACH 82 | IfMacros: 83 | - KJ_IF_MAYBE 84 | IncludeBlocks: Regroup 85 | IncludeCategories: 86 | - Regex: '^"(ucxpp)/(detail)' 87 | Priority: 4 88 | CaseSensitive: false 89 | - Regex: '^("|<)(ucxpp)/' 90 | Priority: 3 91 | CaseSensitive: false 92 | - Regex: '^<(ucp)/' 93 | Priority: 2 94 | CaseSensitive: false 95 | - Regex: '.*' 96 | Priority: 1 97 | CaseSensitive: false 98 | IncludeIsMainRegex: '(Test)?$' 99 | IncludeIsMainSourceRegex: '' 100 | IndentAccessModifiers: false 101 | IndentCaseLabels: false 102 | IndentCaseBlocks: false 103 | IndentGotoLabels: true 104 | IndentPPDirectives: None 105 | IndentExternBlock: AfterExternBlock 106 | IndentRequires: false 107 | IndentWidth: 2 108 | IndentWrappedFunctionNames: false 109 | InsertTrailingCommas: None 110 | JavaScriptQuotes: Leave 111 | JavaScriptWrapImports: true 112 | KeepEmptyLinesAtTheStartOfBlocks: true 113 | LambdaBodyIndentation: Signature 114 | MacroBlockBegin: '' 115 | MacroBlockEnd: '' 116 | MaxEmptyLinesToKeep: 1 117 | NamespaceIndentation: None 118 | ObjCBinPackProtocolList: Auto 119 | ObjCBlockIndentWidth: 2 120 | ObjCBreakBeforeNestedBlockParam: true 121 | ObjCSpaceAfterProperty: false 122 | ObjCSpaceBeforeProtocolList: true 123 | PenaltyBreakAssignment: 2 124 | PenaltyBreakBeforeFirstCallParameter: 19 125 | PenaltyBreakComment: 300 126 | PenaltyBreakFirstLessLess: 120 127 | PenaltyBreakOpenParenthesis: 0 128 | PenaltyBreakString: 1000 129 | PenaltyBreakTemplateDeclaration: 10 130 | PenaltyExcessCharacter: 1000000 131 | PenaltyReturnTypeOnItsOwnLine: 60 132 | PenaltyIndentedWhitespace: 0 133 | PointerAlignment: Right 134 | PPIndentWidth: -1 135 | ReferenceAlignment: Pointer 136 | ReflowComments: true 137 | RemoveBracesLLVM: false 138 | SeparateDefinitionBlocks: Leave 139 | ShortNamespaceLines: 1 140 | SortIncludes: CaseSensitive 141 | SortJavaStaticImport: Before 142 | SortUsingDeclarations: true 143 | SpaceAfterCStyleCast: false 144 | SpaceAfterLogicalNot: false 145 | SpaceAfterTemplateKeyword: true 146 | SpaceBeforeAssignmentOperators: true 147 | SpaceBeforeCaseColon: false 148 | SpaceBeforeCpp11BracedList: false 149 | SpaceBeforeCtorInitializerColon: true 150 | SpaceBeforeInheritanceColon: true 151 | SpaceBeforeParens: ControlStatements 152 | SpaceBeforeParensOptions: 153 | AfterControlStatements: true 154 | AfterForeachMacros: true 155 | AfterFunctionDefinitionName: false 156 | AfterFunctionDeclarationName: false 157 | AfterIfMacros: true 158 | AfterOverloadedOperator: false 159 | BeforeNonEmptyParentheses: false 160 | SpaceAroundPointerQualifiers: Default 161 | SpaceBeforeRangeBasedForLoopColon: true 162 | SpaceInEmptyBlock: false 163 | SpaceInEmptyParentheses: false 164 | SpacesBeforeTrailingComments: 1 165 | SpacesInAngles: Never 166 | SpacesInConditionalStatement: false 167 | SpacesInContainerLiterals: true 168 | SpacesInCStyleCastParentheses: false 169 | SpacesInLineCommentPrefix: 170 | Minimum: 1 171 | Maximum: -1 172 | SpacesInParentheses: false 173 | SpacesInSquareBrackets: false 174 | SpaceBeforeSquareBrackets: false 175 | BitFieldColonSpacing: Both 176 | Standard: Latest 177 | StatementAttributeLikeMacros: 178 | - Q_EMIT 179 | StatementMacros: 180 | - Q_UNUSED 181 | - QT_REQUIRE_VERSION 182 | TabWidth: 8 183 | UseCRLF: false 184 | UseTab: Never 185 | WhitespaceSensitiveMacros: 186 | - STRINGIZE 187 | - PP_STRINGIZE 188 | - BOOST_PP_STRINGIZE 189 | - NS_SWIFT_NAME 190 | - CF_SWIFT_NAME 191 | ... 192 | 193 | -------------------------------------------------------------------------------- /examples/helloworld.cc: -------------------------------------------------------------------------------- 1 | #include "acceptor.h" 2 | #include "connector.h" 3 | #include "socket/channel.h" 4 | #include "worker_epoll.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "ucxpp/context.h" 12 | #include "ucxpp/endpoint.h" 13 | #include "ucxpp/memory.h" 14 | #include 15 | 16 | constexpr ucp_tag_t kTestTag = 0xFD709394UL; 17 | constexpr ucp_tag_t kBellTag = 0xbe11be11UL; 18 | 19 | ucxpp::task> 20 | receive_mr(std::shared_ptr ep) { 21 | using std::cout; 22 | using std::endl; 23 | uint64_t remote_addr; 24 | co_await ep->stream_recv(&remote_addr, sizeof(remote_addr)); 25 | remote_addr = ::be64toh(remote_addr); 26 | size_t rkey_length; 27 | co_await ep->stream_recv(&rkey_length, sizeof(rkey_length)); 28 | rkey_length = ::be64toh(rkey_length); 29 | std::vector rkey_buffer(rkey_length); 30 | size_t rkey_recved = 0; 31 | while (rkey_recved < rkey_length) { 32 | auto n = co_await ep->stream_recv(&rkey_buffer[rkey_recved], 33 | rkey_length - rkey_recved); 34 | rkey_recved += n; 35 | } 36 | co_return std::make_pair(remote_addr, 37 | ucxpp::remote_memory_handle(ep, rkey_buffer.data())); 38 | } 39 | 40 | ucxpp::task client(ucxpp::connector connector) { 41 | auto ep = co_await connector.connect(); 42 | ep->print(); 43 | char buffer[6]; 44 | 45 | /* Tag Send/Recv */ 46 | auto [n, sender_tag] = 47 | co_await ep->worker_ptr()->tag_recv(buffer, sizeof(buffer), kTestTag); 48 | std::cout << "Received " << n << " bytes from " << std::hex << sender_tag 49 | << std::dec << ": " << buffer << std::endl; 50 | std::copy_n("world", 6, buffer); 51 | co_await ep->tag_send(buffer, sizeof(buffer), kTestTag); 52 | 53 | /* Stream Send/Recv */ 54 | n = co_await ep->stream_recv(buffer, sizeof(buffer)); 55 | std::cout << "Received " << n << " bytes: " << buffer << std::endl; 56 | std::copy_n("world", 6, buffer); 57 | co_await ep->stream_send(buffer, sizeof(buffer)); 58 | 59 | /* RMA Get/Put */ 60 | auto local_mr = ucxpp::local_memory_handle::register_mem( 61 | ep->worker_ptr()->context_ptr(), buffer, sizeof(buffer)); 62 | auto [remote_addr, remote_mr] = co_await receive_mr(ep); 63 | std::cout << "Remote addr: 0x" << std::hex << remote_addr << std::dec 64 | << std::endl; 65 | co_await remote_mr.get(buffer, sizeof(buffer), remote_addr); 66 | std::cout << "Read from server: " << buffer << std::endl; 67 | std::copy_n("world", 6, buffer); 68 | co_await remote_mr.put(buffer, sizeof(buffer), remote_addr); 69 | std::cout << "Wrote to server: " << buffer << std::endl; 70 | size_t bell; 71 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 72 | 73 | /* Atomic */ 74 | uint64_t local_value = 1; 75 | uint64_t reply_value = 0; 76 | auto [atomic_raddr, atomic_mr] = co_await receive_mr(ep); 77 | 78 | /* Fetch and Add */ 79 | co_await atomic_mr.atomic_fetch_add(atomic_raddr, local_value, reply_value); 80 | std::cout << "Fetched and added on server: " << reply_value << std::endl; 81 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 82 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 83 | 84 | /* Compare and Swap */ 85 | local_value = reply_value + local_value; 86 | reply_value = 456; 87 | co_await atomic_mr.atomic_compare_swap(atomic_raddr, local_value, 88 | reply_value); 89 | std::cout << "Compared and swapped on server: " << reply_value << std::endl; 90 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 91 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 92 | 93 | /* Swap */ 94 | local_value = 123; 95 | co_await atomic_mr.atomic_swap(atomic_raddr, local_value, reply_value); 96 | std::cout << "Swapped on server: " << reply_value << std::endl; 97 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 98 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 99 | 100 | /* Fetch and And */ 101 | local_value = 0xF; 102 | co_await atomic_mr.atomic_fetch_and(atomic_raddr, local_value, reply_value); 103 | std::cout << "Fetched and anded on server: " << reply_value << std::endl; 104 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 105 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 106 | 107 | /* Fetch and Or */ 108 | local_value = 0xF; 109 | co_await atomic_mr.atomic_fetch_or(atomic_raddr, local_value, reply_value); 110 | std::cout << "Fetched and ored on server: " << reply_value << std::endl; 111 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 112 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 113 | 114 | /* Fetch and Xor */ 115 | local_value = 0xF; 116 | co_await atomic_mr.atomic_fetch_xor(atomic_raddr, local_value, reply_value); 117 | std::cout << "Fetched and xored on server: " << reply_value << std::endl; 118 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 119 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 120 | 121 | co_await ep->flush(); 122 | co_await ep->close(); 123 | 124 | co_return; 125 | } 126 | 127 | ucxpp::task send_mr(std::shared_ptr ep, void *address, 128 | ucxpp::local_memory_handle const &local_mr) { 129 | auto packed_rkey = local_mr.pack_rkey(); 130 | auto rkey_length = ::htobe64(packed_rkey.get_length()); 131 | auto remote_addr = ::htobe64(reinterpret_cast(address)); 132 | co_await ep->stream_send(&remote_addr, sizeof(remote_addr)); 133 | co_await ep->stream_send(&rkey_length, sizeof(rkey_length)); 134 | co_await ep->stream_send(packed_rkey.get_buffer(), packed_rkey.get_length()); 135 | co_return; 136 | } 137 | 138 | ucxpp::task handle_endpoint(std::shared_ptr ep) { 139 | ep->print(); 140 | char buffer[6] = "Hello"; 141 | 142 | /* Tag Send/Recv */ 143 | co_await ep->tag_send(buffer, sizeof(buffer), kTestTag); 144 | auto [n, sender_tag] = 145 | co_await ep->worker_ptr()->tag_recv(buffer, sizeof(buffer), kTestTag); 146 | std::cout << "Received " << n << " bytes from " << std::hex << sender_tag 147 | << std::dec << ": " << buffer << std::endl; 148 | 149 | /* Stream Send/Recv */ 150 | std::copy_n("Hello", 6, buffer); 151 | co_await ep->stream_send(buffer, sizeof(buffer)); 152 | n = co_await ep->stream_recv(buffer, sizeof(buffer)); 153 | std::cout << "Received " << n << " bytes: " << buffer << std::endl; 154 | 155 | /* RMA Get/Put */ 156 | std::copy_n("Hello", 6, buffer); 157 | auto local_mr = ucxpp::local_memory_handle::register_mem( 158 | ep->worker_ptr()->context_ptr(), buffer, sizeof(buffer)); 159 | co_await send_mr(ep, buffer, local_mr); 160 | 161 | size_t bell; 162 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 163 | std::cout << "Written by client: " << buffer << std::endl; 164 | 165 | /* Atomic */ 166 | uint64_t value = 42; 167 | auto atomic_mr = ucxpp::local_memory_handle::register_mem( 168 | ep->worker_ptr()->context_ptr(), &value, sizeof(value)); 169 | co_await send_mr(ep, &value, atomic_mr); 170 | 171 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 172 | std::cout << "Fetched and added by client: " << value << std::endl; 173 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 174 | 175 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 176 | std::cout << "Compared and Swapped by client: " << value << std::endl; 177 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 178 | 179 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 180 | std::cout << "Swapped by client: " << value << std::endl; 181 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 182 | 183 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 184 | std::cout << "Fetched and Anded by client: " << value << std::endl; 185 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 186 | 187 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 188 | std::cout << "Fetched and Ored by client: " << value << std::endl; 189 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 190 | 191 | co_await ep->worker_ptr()->tag_recv(&bell, sizeof(bell), kBellTag); 192 | std::cout << "Fetched and Xored by client: " << value << std::endl; 193 | co_await ep->tag_send(&bell, sizeof(bell), kBellTag); 194 | 195 | co_await ep->flush(); 196 | co_await ep->close(); 197 | 198 | co_return; 199 | } 200 | 201 | ucxpp::task server(ucxpp::acceptor acceptor) { 202 | while (true) { 203 | auto ep = co_await acceptor.accept(); 204 | handle_endpoint(ep).detach(); 205 | } 206 | co_return; 207 | } 208 | 209 | int main(int argc, char *argv[]) { 210 | auto ctx = ucxpp::context::builder() 211 | .enable_stream() 212 | .enable_tag() 213 | .enable_rma() 214 | .enable_amo64() 215 | .enable_wakeup() 216 | .build(); 217 | auto loop = ucxpp::socket::event_loop::new_loop(); 218 | auto worker = std::make_shared(ctx); 219 | ucxpp::register_loop(worker, loop); 220 | bool close_triggered; 221 | if (argc == 2) { 222 | auto listener = std::make_shared( 223 | loop, "0.0.0.0", std::stoi(argv[1])); 224 | auto acceptor = ucxpp::acceptor(worker, listener); 225 | server(std::move(acceptor)).detach(); 226 | } else if (argc == 3) { 227 | auto connector = 228 | ucxpp::connector(worker, loop, argv[1], std::stoi(argv[2])); 229 | client(std::move(connector)).detach(); 230 | } else { 231 | std::cout << "Usage: " << argv[0] << " " << std::endl; 232 | } 233 | while (worker.use_count() > 1) { 234 | loop->poll(close_triggered); 235 | } 236 | loop->close(); 237 | loop->poll(close_triggered); 238 | return 0; 239 | } -------------------------------------------------------------------------------- /examples/perftest.cc: -------------------------------------------------------------------------------- 1 | #include "acceptor.h" 2 | #include "connector.h" 3 | #include "worker_epoll.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | enum class test_category { 22 | tag, 23 | stream, 24 | rma, 25 | }; 26 | 27 | enum class test_type { 28 | bw, 29 | lat, 30 | }; 31 | 32 | struct perf_context { 33 | std::pair test = {test_category::tag, 34 | test_type::bw}; 35 | size_t concurrency = 1; 36 | size_t iterations = 10000000; 37 | size_t message_size = 8; 38 | size_t warmup_iterations = 10000; 39 | bool epoll = false; 40 | std::string server_address; 41 | uint16_t server_port = 8888; 42 | std::optional core; 43 | }; 44 | 45 | constexpr ucp_tag_t k_test_tag = 0xFD709394; 46 | 47 | bool g_connected = false; 48 | 49 | static size_t g_counter = 0; 50 | static size_t g_last_counter = 0; 51 | auto g_last_tick = std::chrono::steady_clock::now(); 52 | decltype(g_last_tick) g_start; 53 | 54 | void reset_report() { 55 | g_counter = 0; 56 | g_last_counter = 0; 57 | g_last_tick = std::chrono::steady_clock::now(); 58 | } 59 | 60 | void print_report(perf_context const &perf, bool final = false) { 61 | auto counter = g_counter; 62 | auto tick = std::chrono::steady_clock::now(); 63 | auto elapsed = tick - g_last_tick; 64 | if (final) [[unlikely]] { 65 | std::chrono::duration total_elapsed = tick - g_start; 66 | ::fprintf(stdout, "----- Finished -----\n"); 67 | ::fprintf(stdout, "Total elapsed: %.6fs\n", total_elapsed.count()); 68 | ::fprintf(stdout, "Average IOPS: %zu\n", 69 | static_cast(counter / total_elapsed.count())); 70 | ::fprintf(stdout, "Average BW: %.3fMB/s\n", 71 | counter * perf.message_size / total_elapsed.count() / 1024 / 72 | 1024); 73 | } else if (elapsed.count() > 1000000000) [[unlikely]] { 74 | ::fprintf(stdout, "%zu,%zu\n", counter, 75 | (counter - g_last_counter) * 1000000000 / elapsed.count()); 76 | g_last_counter = counter; 77 | g_last_tick = tick; 78 | } 79 | } 80 | 81 | static void bind_cpu(int core) { 82 | cpu_set_t cpuset; 83 | if (auto rc = 84 | ::pthread_getaffinity_np(::pthread_self(), sizeof(cpuset), &cpuset); 85 | rc != 0) { 86 | ::perror("failed to get original affinity"); 87 | return; 88 | } 89 | if (!CPU_ISSET(core, &cpuset)) { 90 | ::fprintf(stderr, "core %d is not in affinity mask\n", core); 91 | return; 92 | } 93 | CPU_ZERO(&cpuset); 94 | CPU_SET(core, &cpuset); 95 | if (auto rc = 96 | ::pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); 97 | rc != 0) { 98 | ::perror("failed to set affinity"); 99 | return; 100 | } 101 | } 102 | 103 | ucxpp::task sender(std::shared_ptr ep, 104 | size_t &iterations, bool warmup, 105 | perf_context const &perf) { 106 | auto [buffer, local_mr] = ucxpp::local_memory_handle::allocate_mem( 107 | ep->worker_ptr()->context_ptr(), perf.message_size); 108 | auto total_iterations = warmup ? perf.warmup_iterations : perf.iterations; 109 | while (iterations < total_iterations) { 110 | switch (perf.test.first) { 111 | case test_category::stream: { 112 | co_await ep->stream_send(buffer, perf.message_size); 113 | } break; 114 | default: { 115 | co_await ep->tag_send(buffer, perf.message_size, k_test_tag); 116 | } break; 117 | } 118 | iterations++; 119 | print_report(perf); 120 | } 121 | } 122 | 123 | ucxpp::task client(ucxpp::connector connector, perf_context const &perf) { 124 | auto ep = co_await connector.connect(); 125 | ep->print(); 126 | g_connected = true; 127 | 128 | ::fprintf(stderr, "Warming up...\n"); 129 | { 130 | auto tasks = std::vector>(); 131 | for (size_t i = 0; i < perf.concurrency; ++i) { 132 | tasks.emplace_back(sender(ep, g_counter, true, perf)); 133 | } 134 | for (auto &task : tasks) { 135 | co_await task; 136 | } 137 | } 138 | 139 | reset_report(); 140 | g_start = std::chrono::steady_clock::now(); 141 | ::fprintf(stderr, "Running...\n"); 142 | { 143 | auto tasks = std::vector>(); 144 | for (size_t i = 0; i < perf.concurrency; ++i) { 145 | tasks.emplace_back(sender(ep, g_counter, false, perf)); 146 | } 147 | for (auto &task : tasks) { 148 | co_await task; 149 | } 150 | } 151 | print_report(perf, true); 152 | 153 | co_await ep->flush(); 154 | co_await ep->close(); 155 | 156 | co_return; 157 | } 158 | 159 | ucxpp::task receiver(std::shared_ptr ep, 160 | size_t &iterations, bool warmup, 161 | perf_context const &perf) { 162 | auto [buffer, local_mr] = ucxpp::local_memory_handle::allocate_mem( 163 | ep->worker_ptr()->context_ptr(), perf.message_size); 164 | auto total_iterations = warmup ? perf.warmup_iterations : perf.iterations; 165 | switch (perf.test.first) { 166 | case test_category::stream: { 167 | auto total_bytes = perf.message_size * total_iterations; 168 | while (iterations < total_bytes) { 169 | auto n = co_await ep->stream_recv(buffer, perf.message_size); 170 | iterations += n; 171 | } 172 | } break; 173 | default: { 174 | while (iterations < total_iterations) { 175 | co_await ep->worker_ptr()->tag_recv(buffer, perf.message_size, 176 | k_test_tag); 177 | iterations++; 178 | print_report(perf); 179 | } 180 | break; 181 | } 182 | } 183 | } 184 | 185 | ucxpp::task server(ucxpp::acceptor acceptor, perf_context const &perf) { 186 | auto ep = co_await acceptor.accept(); 187 | ep->print(); 188 | g_connected = true; 189 | 190 | ::fprintf(stderr, "Warming up...\n"); 191 | { 192 | auto tasks = std::vector>(); 193 | for (size_t i = 0; i < perf.concurrency; ++i) { 194 | tasks.emplace_back(receiver(ep, g_counter, true, perf)); 195 | } 196 | for (auto &task : tasks) { 197 | co_await task; 198 | } 199 | } 200 | 201 | ::fprintf(stderr, "Running...\n"); 202 | reset_report(); 203 | g_start = std::chrono::steady_clock::now(); 204 | { 205 | std::vector> tasks; 206 | for (size_t i = 0; i < perf.concurrency; ++i) { 207 | tasks.emplace_back(receiver(ep, g_counter, false, perf)); 208 | } 209 | for (auto &task : tasks) { 210 | co_await task; 211 | } 212 | } 213 | if (perf.test.first == test_category::stream) { 214 | g_counter = perf.iterations; 215 | } 216 | print_report(perf, true); 217 | 218 | co_await ep->flush(); 219 | co_await ep->close(); 220 | 221 | co_return; 222 | } 223 | 224 | void print_usage(char const *argv0) { 225 | ::fprintf(stderr, 226 | "Usage: %s [options] \n-c\tSpecify the core\n" 227 | "-t\tSpecify test type (tag, stream)\n" 228 | "-o\tSpecifies concurrent requests (default: 1)\n" 229 | "-n\tSpecifies number of iterations (default: 1000000)\n" 230 | "-s\tSpecifies message size (default: 8)\n" 231 | "-w\tSpecifies number of warmup iterations (default: 10000)\n" 232 | "-e\tUse epoll for worker progress (default: false)\n" 233 | "-p\tServer port (default 8888)\n", 234 | argv0); 235 | } 236 | 237 | int main(int argc, char *argv[]) { 238 | std::ios_base::sync_with_stdio(false); 239 | auto args = std::vector(argv + 1, argv + argc); 240 | perf_context perf; 241 | for (size_t i = 0; i < args.size(); ++i) { 242 | if (args[i] == "-h") { 243 | print_usage(argv[0]); 244 | return 0; 245 | } else if (args[i] == "-t") { 246 | if (args[++i] == "tag") { 247 | perf.test.first = test_category::tag; 248 | } else if (args[i] == "stream") { 249 | perf.test.first = test_category::stream; 250 | } else { 251 | ::fprintf(stderr, "Unknown test type: %s", args[i].c_str()); 252 | return 1; 253 | } 254 | } else if (args[i] == "-c") { 255 | perf.core = std::stoi(args[++i]); 256 | } else if (args[i] == "-o") { 257 | perf.concurrency = std::stoul(args[++i]); 258 | } else if (args[i] == "-s") { 259 | perf.message_size = std::stoul(args[++i]); 260 | } else if (args[i] == "-n") { 261 | perf.iterations = std::stoul(args[++i]); 262 | } else if (args[i] == "-w") { 263 | perf.warmup_iterations = std::stoul(args[++i]); 264 | } else if (args[i] == "-e") { 265 | perf.epoll = true; 266 | } else if (args[i] == "-p") { 267 | perf.server_port = std::stoul(args[++i]); 268 | } else if (args[i][0] == '-') { 269 | ::fprintf(stderr, "unknown option: %s\n", args[i].c_str()); 270 | return 1; 271 | } else { 272 | perf.server_address = args[i]; 273 | } 274 | } 275 | auto ctx = [&]() { 276 | auto builder = ucxpp::context::builder(); 277 | if (perf.test.first == test_category::stream) { 278 | builder.enable_stream(); 279 | } else { 280 | builder.enable_tag(); 281 | } 282 | if (perf.epoll) { 283 | builder.enable_wakeup(); 284 | } 285 | return builder.build(); 286 | }(); 287 | auto loop = ucxpp::socket::event_loop::new_loop(); 288 | auto worker = [&]() { return std::make_shared(ctx); }(); 289 | if (perf.core.has_value()) { 290 | bind_cpu(perf.core.value()); 291 | } else { 292 | ::fprintf(stderr, 293 | "Warning: no core specified, using all cores available\n"); 294 | } 295 | 296 | if (perf.server_address.empty()) { 297 | auto listener = std::make_shared( 298 | loop, "0.0.0.0", perf.server_port); 299 | auto acceptor = ucxpp::acceptor(worker, listener); 300 | server(std::move(acceptor), perf).detach(); 301 | } else { 302 | auto connector = 303 | ucxpp::connector(worker, loop, perf.server_address, perf.server_port); 304 | client(std::move(connector), perf).detach(); 305 | } 306 | 307 | if (!perf.epoll) { 308 | bool dummy; 309 | while (!g_connected) { 310 | loop->poll(dummy); 311 | } 312 | loop->close(); 313 | loop = nullptr; 314 | while (worker.use_count() > 1) { 315 | worker->progress(); 316 | } 317 | } else { 318 | ucxpp::register_loop(worker, loop); 319 | bool dummy = false; 320 | while (worker.use_count() > 1) { 321 | loop->poll(dummy); 322 | } 323 | loop->close(); 324 | loop->poll(dummy); 325 | } 326 | 327 | return 0; 328 | } 329 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /include/ucxpp/awaitable.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include "ucxpp/error.h" 17 | 18 | #include "ucxpp/detail/debug.h" 19 | 20 | namespace ucxpp { 21 | 22 | class base_awaitable { 23 | protected: 24 | std::coroutine_handle<> h_; 25 | ucs_status_t status_; 26 | base_awaitable() : h_(nullptr), status_(UCS_OK) {} 27 | bool check_request_ready(ucs_status_ptr_t request) { 28 | if (UCS_PTR_IS_PTR(request)) [[unlikely]] { 29 | status_ = UCS_INPROGRESS; 30 | return false; 31 | } else if (UCS_PTR_IS_ERR(request)) [[unlikely]] { 32 | status_ = UCS_PTR_STATUS(request); 33 | UCXPP_LOG_ERROR("%s", ::ucs_status_string(status_)); 34 | return true; 35 | } 36 | 37 | status_ = UCS_OK; 38 | return true; 39 | } 40 | }; 41 | 42 | /* Common awaitable class for send-like callbacks */ 43 | template class send_awaitable : public base_awaitable { 44 | public: 45 | static void send_cb(void *request, ucs_status_t status, void *user_data) { 46 | auto self = reinterpret_cast(user_data); 47 | self->status_ = status; 48 | ::ucp_request_free(request); 49 | self->h_.resume(); 50 | } 51 | 52 | ucp_request_param_t build_param() { 53 | ucp_request_param_t send_param; 54 | send_param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | 55 | UCP_OP_ATTR_FIELD_USER_DATA | 56 | UCP_OP_ATTR_FLAG_MULTI_SEND; 57 | send_param.cb.send = &send_cb; 58 | send_param.user_data = this; 59 | return send_param; 60 | } 61 | 62 | bool await_suspend(std::coroutine_handle<> h) { 63 | h_ = h; 64 | return status_ == UCS_INPROGRESS; 65 | } 66 | 67 | void await_resume() const { check_ucs_status(status_, "operation failed"); } 68 | }; 69 | 70 | class stream_send_awaitable : public send_awaitable { 71 | ucp_ep_h ep_; 72 | void const *buffer_; 73 | size_t length_; 74 | friend class send_awaitable; 75 | 76 | public: 77 | stream_send_awaitable(ucp_ep_h ep, void const *buffer, size_t length) 78 | : ep_(ep), buffer_(buffer), length_(length) {} 79 | 80 | bool await_ready() noexcept { 81 | auto send_param = build_param(); 82 | auto request = ::ucp_stream_send_nbx(ep_, buffer_, length_, &send_param); 83 | return check_request_ready(request); 84 | } 85 | }; 86 | 87 | class tag_send_awaitable : public send_awaitable { 88 | ucp_ep_h ep_; 89 | ucp_tag_t tag_; 90 | void const *buffer_; 91 | size_t length_; 92 | friend class send_awaitable; 93 | 94 | public: 95 | tag_send_awaitable(ucp_ep_h ep, void const *buffer, size_t length, 96 | ucp_tag_t tag) 97 | : ep_(ep), tag_(tag), buffer_(buffer), length_(length) {} 98 | 99 | bool await_ready() noexcept { 100 | auto send_param = build_param(); 101 | auto request = ::ucp_tag_send_nbx(ep_, buffer_, length_, tag_, &send_param); 102 | return check_request_ready(request); 103 | } 104 | }; 105 | 106 | class rma_put_awaitable : public send_awaitable { 107 | ucp_ep_h ep_; 108 | void const *buffer_; 109 | size_t length_; 110 | uint64_t remote_addr_; 111 | ucp_rkey_h rkey_; 112 | friend class send_awaitable; 113 | 114 | public: 115 | rma_put_awaitable(ucp_ep_h ep, void const *buffer, size_t length, 116 | uint64_t remote_addr, ucp_rkey_h rkey) 117 | : ep_(ep), buffer_(buffer), length_(length), remote_addr_(remote_addr), 118 | rkey_(rkey) {} 119 | 120 | bool await_ready() noexcept { 121 | auto send_param = build_param(); 122 | auto request = 123 | ::ucp_put_nbx(ep_, buffer_, length_, remote_addr_, rkey_, &send_param); 124 | return check_request_ready(request); 125 | } 126 | }; 127 | 128 | class rma_get_awaitable : public send_awaitable { 129 | ucp_ep_h ep_; 130 | void *buffer_; 131 | size_t length_; 132 | uint64_t remote_addr_; 133 | ucp_rkey_h rkey_; 134 | friend class send_awaitable; 135 | 136 | public: 137 | rma_get_awaitable(ucp_ep_h ep, void *buffer, size_t length, 138 | uint64_t remote_addr, ucp_rkey_h rkey) 139 | : ep_(ep), buffer_(buffer), length_(length), remote_addr_(remote_addr), 140 | rkey_(rkey) {} 141 | 142 | bool await_ready() noexcept { 143 | auto send_param = build_param(); 144 | auto request = 145 | ::ucp_get_nbx(ep_, buffer_, length_, remote_addr_, rkey_, &send_param); 146 | return check_request_ready(request); 147 | } 148 | }; 149 | 150 | template 151 | class rma_atomic_awaitable : public send_awaitable> { 152 | static_assert(sizeof(T) == 4 || sizeof(T) == 8, "Only 4-byte and 8-byte " 153 | "integers are supported"); 154 | ucp_ep_h ep_; 155 | ucp_atomic_op_t const op_; 156 | void const *buffer_; 157 | uint64_t remote_addr_; 158 | ucp_rkey_h rkey_; 159 | void *reply_buffer_; 160 | friend class send_awaitable>; 161 | 162 | public: 163 | rma_atomic_awaitable(ucp_ep_h ep, ucp_atomic_op_t const op, 164 | void const *buffer, uint64_t remote_addr, 165 | ucp_rkey_h rkey, void *reply_buffer = nullptr) 166 | : ep_(ep), op_(op), buffer_(buffer), remote_addr_(remote_addr), 167 | rkey_(rkey), reply_buffer_(reply_buffer) { 168 | if (op == UCP_ATOMIC_OP_SWAP || op == UCP_ATOMIC_OP_CSWAP) { 169 | assert(reply_buffer != nullptr); 170 | } 171 | } 172 | 173 | bool await_ready() noexcept { 174 | auto send_param = this->build_param(); 175 | send_param.op_attr_mask |= UCP_OP_ATTR_FIELD_DATATYPE; 176 | send_param.datatype = ucp_dt_make_contig(sizeof(T)); 177 | if (reply_buffer_ != nullptr) { 178 | send_param.op_attr_mask |= UCP_OP_ATTR_FIELD_REPLY_BUFFER; 179 | send_param.reply_buffer = reply_buffer_; 180 | } 181 | auto request = ::ucp_atomic_op_nbx(ep_, op_, buffer_, 1, remote_addr_, 182 | rkey_, &send_param); 183 | return this->check_request_ready(request); 184 | } 185 | }; 186 | 187 | /* These awaitables are not on "hot" path so they can hold a shared_ptr */ 188 | class endpoint; 189 | class ep_flush_awaitable : public send_awaitable { 190 | std::shared_ptr endpoint_; 191 | friend class send_awaitable; 192 | 193 | public: 194 | ep_flush_awaitable(std::shared_ptr endpoint); 195 | bool await_ready() noexcept; 196 | }; 197 | 198 | class ep_close_awaitable : public send_awaitable { 199 | std::shared_ptr endpoint_; 200 | friend class send_awaitable; 201 | 202 | public: 203 | ep_close_awaitable(std::shared_ptr endpoint); 204 | bool await_ready() noexcept; 205 | }; 206 | 207 | class worker; 208 | class worker_flush_awaitable : public send_awaitable { 209 | std::shared_ptr worker_; 210 | friend class send_awaitable; 211 | 212 | public: 213 | worker_flush_awaitable(std::shared_ptr worker); 214 | 215 | bool await_ready() noexcept; 216 | }; 217 | 218 | /* Common awaitable class for stream-recv-like callbacks */ 219 | class stream_recv_awaitable : base_awaitable { 220 | private: 221 | ucp_ep_h ep_; 222 | ucp_worker_h worker_; 223 | size_t received_; 224 | void *buffer_; 225 | size_t length_; 226 | void *request_; 227 | 228 | public: 229 | stream_recv_awaitable(ucp_ep_h ep, void *buffer, size_t length) 230 | : ep_(ep), received_(0), buffer_(buffer), length_(length) {} 231 | 232 | stream_recv_awaitable(ucp_ep_h ep, ucp_worker_h worker, void *buffer, 233 | size_t length, stream_recv_awaitable *&cancel) 234 | : ep_(ep), worker_(worker), received_(0), buffer_(buffer), 235 | length_(length) { 236 | cancel = this; 237 | } 238 | 239 | static void stream_recv_cb(void *request, ucs_status_t status, 240 | size_t received, void *user_data) { 241 | auto self = reinterpret_cast(user_data); 242 | self->status_ = status; 243 | self->received_ = received; 244 | ::ucp_request_free(request); 245 | self->h_.resume(); 246 | } 247 | 248 | bool await_ready() noexcept { 249 | ucp_request_param_t stream_recv_param; 250 | stream_recv_param.op_attr_mask = 251 | UCP_OP_ATTR_FIELD_CALLBACK | UCP_OP_ATTR_FIELD_USER_DATA; 252 | stream_recv_param.cb.recv_stream = &stream_recv_cb; 253 | stream_recv_param.user_data = this; 254 | auto request = ::ucp_stream_recv_nbx(ep_, buffer_, length_, &received_, 255 | &stream_recv_param); 256 | 257 | if (!check_request_ready(request)) { 258 | request_ = request; 259 | return false; 260 | } 261 | 262 | return true; 263 | } 264 | 265 | bool await_suspend(std::coroutine_handle<> h) { 266 | h_ = h; 267 | return status_ == UCS_INPROGRESS; 268 | } 269 | 270 | size_t await_resume() const { 271 | check_ucs_status(status_, "operation failed"); 272 | return received_; 273 | } 274 | 275 | void cancel() { 276 | if (request_ != nullptr) { 277 | ::ucp_request_cancel(worker_, request_); 278 | } 279 | } 280 | }; 281 | 282 | /* Common awaitable class for tag-recv-like callbacks */ 283 | class tag_recv_awaitable : public base_awaitable { 284 | 285 | private: 286 | ucp_worker_h worker_; 287 | void *request_; 288 | void *buffer_; 289 | size_t length_; 290 | ucp_tag_t tag_; 291 | ucp_tag_t tag_mask_; 292 | ucp_tag_recv_info_t recv_info_; 293 | 294 | public: 295 | tag_recv_awaitable(ucp_worker_h worker, void *buffer, size_t length, 296 | ucp_tag_t tag, ucp_tag_t tag_mask) 297 | : worker_(worker), request_(nullptr), buffer_(buffer), length_(length), 298 | tag_(tag), tag_mask_(tag_mask) {} 299 | 300 | tag_recv_awaitable(ucp_worker_h worker, void *buffer, size_t length, 301 | ucp_tag_t tag, ucp_tag_t tag_mask, 302 | tag_recv_awaitable *&cancel) 303 | : tag_recv_awaitable(worker, buffer, length, tag, tag_mask) { 304 | cancel = this; 305 | } 306 | 307 | static void tag_recv_cb(void *request, ucs_status_t status, 308 | ucp_tag_recv_info_t const *tag_info, 309 | void *user_data) { 310 | auto self = reinterpret_cast(user_data); 311 | self->status_ = status; 312 | self->recv_info_.length = tag_info->length; 313 | self->recv_info_.sender_tag = tag_info->sender_tag; 314 | ::ucp_request_free(request); 315 | self->h_.resume(); 316 | } 317 | 318 | bool await_ready() noexcept { 319 | ucp_request_param_t tag_recv_param; 320 | tag_recv_param.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK | 321 | UCP_OP_ATTR_FIELD_USER_DATA | 322 | UCP_OP_ATTR_FIELD_RECV_INFO; 323 | tag_recv_param.cb.recv = &tag_recv_cb; 324 | tag_recv_param.user_data = this; 325 | tag_recv_param.recv_info.tag_info = &recv_info_; 326 | 327 | auto request = ::ucp_tag_recv_nbx(worker_, buffer_, length_, tag_, 328 | tag_mask_, &tag_recv_param); 329 | 330 | if (!check_request_ready(request)) { 331 | request_ = request; 332 | return false; 333 | } 334 | return true; 335 | } 336 | 337 | bool await_suspend(std::coroutine_handle<> h) { 338 | h_ = h; 339 | return status_ == UCS_INPROGRESS; 340 | } 341 | 342 | std::pair await_resume() { 343 | request_ = nullptr; 344 | check_ucs_status(status_, "error in ucp_tag_recv_nbx"); 345 | return std::make_pair(recv_info_.length, recv_info_.sender_tag); 346 | } 347 | 348 | void cancel() { 349 | if (request_) { 350 | ::ucp_request_cancel(worker_, request_); 351 | request_ = nullptr; 352 | } 353 | } 354 | }; 355 | 356 | } // namespace ucxpp -------------------------------------------------------------------------------- /include/ucxpp/memory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "ucxpp/awaitable.h" 7 | #include "ucxpp/context.h" 8 | 9 | #include "ucxpp/detail/noncopyable.h" 10 | 11 | namespace ucxpp { 12 | 13 | class endpoint; 14 | 15 | /** 16 | * @brief A serializable UCX memory handle ready to send to peer. 17 | * 18 | */ 19 | class packed_memory_rkey : public noncopyable { 20 | friend class local_memory_handle; 21 | void *buffer_; 22 | size_t length_; 23 | packed_memory_rkey(void *buffer, size_t length); 24 | 25 | public: 26 | packed_memory_rkey(packed_memory_rkey &&other); 27 | void *get_buffer(); 28 | void const *get_buffer() const; 29 | size_t get_length() const; 30 | ~packed_memory_rkey(); 31 | }; 32 | 33 | /** 34 | * @brief Represents a registered local memory region. 35 | * 36 | */ 37 | class local_memory_handle : public noncopyable { 38 | std::shared_ptr ctx_; 39 | ucp_mem_h mem_; 40 | 41 | public: 42 | /** 43 | * @brief Construct a new local memory handle object 44 | * 45 | * @param ctx UCX context 46 | * @param mem UCX memory handle 47 | */ 48 | local_memory_handle(std::shared_ptr ctx, ucp_mem_h mem); 49 | 50 | /** 51 | * @brief Construct a new local memory handle object 52 | * 53 | * @param other Another local memory handle to move from 54 | */ 55 | local_memory_handle(local_memory_handle &&other); 56 | 57 | /** 58 | * @brief Allocate and register a local memory region 59 | * 60 | * @param ctx UCX context 61 | * @param length Desired length of the memory region 62 | * @return std::pair A pair of pointer to the 63 | * allocated memory region and the local memory handle 64 | */ 65 | static std::pair 66 | allocate_mem(std::shared_ptr ctx, size_t length); 67 | static local_memory_handle register_mem(std::shared_ptr ctx, 68 | void *address, size_t length); 69 | 70 | /** 71 | * @brief Get the native UCX memory handle 72 | * 73 | * @return ucp_mem_h 74 | */ 75 | ucp_mem_h handle() const; 76 | 77 | /** 78 | * @brief Pack the information needed for remote access to the memory region. 79 | * It is intended to be sent to the remote peer. 80 | * 81 | * @return packed_memory_rkey The packed memory handle 82 | */ 83 | packed_memory_rkey pack_rkey() const; 84 | 85 | /** 86 | * @brief Destroy the local memory handle object. The memory region will be 87 | * deregistered. 88 | * 89 | */ 90 | ~local_memory_handle(); 91 | }; 92 | 93 | /** 94 | * @brief Represents a remote memory region. Note that this does not contain the 95 | * remote address. It should be kept by the user. 96 | * 97 | */ 98 | class remote_memory_handle : public noncopyable { 99 | std::shared_ptr endpoint_; 100 | ucp_ep_h ep_; 101 | ucp_rkey_h rkey_; 102 | 103 | public: 104 | /** 105 | * @brief Construct a new remote memory handle object. All subsequent remote 106 | * memory access will happen on the given endpoint. 107 | * 108 | * @param endpoint UCX endpoint 109 | * @param packed_rkey_buffer Packed remote key buffer received from remote 110 | * peer 111 | */ 112 | remote_memory_handle(std::shared_ptr endpoint, 113 | void const *packed_rkey_buffer); 114 | 115 | /** 116 | * @brief Construct a new remote memory handle object 117 | * 118 | * @param other Another remote memory handle to move from 119 | */ 120 | remote_memory_handle(remote_memory_handle &&other); 121 | 122 | /** 123 | * @brief Write to the remote memory region 124 | * 125 | * @param buffer Local buffer to write from 126 | * @param length Length of the buffer 127 | * @param remote_addr Remote address to write to 128 | * @return rma_put_awaitable A coroutine that returns upon completion 129 | */ 130 | rma_put_awaitable put(void const *buffer, size_t length, 131 | uint64_t remote_addr) const; 132 | 133 | /** 134 | * @brief Read from the remote memory region 135 | * 136 | * @param buffer Local buffer to read into 137 | * @param length Length of the buffer 138 | * @param remote_addr Remote address to read from 139 | * @return rma_get_awaitable A coroutine that returns upon completion 140 | */ 141 | rma_get_awaitable get(void *buffer, size_t length, 142 | uint64_t remote_addr) const; 143 | 144 | /** 145 | * \copydoc endpoint::put 146 | * 147 | */ 148 | rma_put_awaitable write(void const *buffer, size_t length, 149 | uint64_t remote_addr) const; 150 | 151 | /** 152 | * \copydoc endpoint::get 153 | * 154 | */ 155 | rma_get_awaitable read(void *buffer, size_t length, 156 | uint64_t remote_addr) const; 157 | 158 | /** 159 | * @brief Get the memory region's endpoint object 160 | * 161 | * @return std::shared_ptr The memory region's endpoint object 162 | */ 163 | std::shared_ptr endpoint_ptr() const; 164 | 165 | /** 166 | * @brief Get the native UCX rkey handle 167 | * 168 | * @return ucp_rkey_h The native UCX rkey handle 169 | */ 170 | ucp_rkey_h handle() const; 171 | 172 | /** 173 | * @brief Atomically fetch and add a value to the remote memory region 174 | * 175 | * @tparam T The type of the value to add, should be of 4 bytes or 8 bytes 176 | * long 177 | * @param remote_addr The remote address to add to 178 | * @param delta The value to add 179 | * @param old_value A reference to a variable to store the old value 180 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 181 | * The old value is placed in old_value 182 | */ 183 | template 184 | rma_atomic_awaitable atomic_fetch_add(uint64_t remote_addr, T const &delta, 185 | T &old_value) const { 186 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_ADD, &delta, remote_addr, 187 | rkey_, &old_value); 188 | } 189 | 190 | /** 191 | * @brief Atomically fetch and AND a value to the remote memory region 192 | * 193 | * @tparam T The type of the value to AND, should be of 4 bytes or 8 bytes 194 | * long 195 | * @param remote_addr The remote address to AND to 196 | * @param delta The other operand of the AND operation 197 | * @param old_value A reference to a variable to store the old value 198 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 199 | * The old value is placed in old_value 200 | */ 201 | template 202 | rma_atomic_awaitable atomic_fetch_and(uint64_t remote_addr, T const &bits, 203 | T &old_value) const { 204 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_AND, &bits, remote_addr, 205 | rkey_, &old_value); 206 | } 207 | 208 | /** 209 | * @brief Atomically fetch and OR a value to the remote memory region 210 | * 211 | * @tparam T The type of the value to OR, should be of 4 bytes or 8 bytes 212 | * long 213 | * @param remote_addr The remote address to OR to 214 | * @param delta The other operand of the OR operation 215 | * @param old_value A reference to a variable to store the old value 216 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 217 | * The old value is placed in old_value 218 | */ 219 | template 220 | rma_atomic_awaitable atomic_fetch_or(uint64_t remote_addr, T const &bits, 221 | T &old_value) const { 222 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_OR, &bits, remote_addr, 223 | rkey_, &old_value); 224 | } 225 | 226 | /** 227 | * @brief Atomically fetch and XOR a value to the remote memory region 228 | * 229 | * @tparam T The type of the value to XOR, should be of 4 bytes or 8 bytes 230 | * long 231 | * @param remote_addr The remote address to XOR to 232 | * @param delta The other operand of the XOR operation 233 | * @param old_value A reference to a variable to store the old value 234 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 235 | * The old value is placed in old_value 236 | */ 237 | template 238 | rma_atomic_awaitable atomic_fetch_xor(uint64_t remote_addr, T const &bits, 239 | T &old_value) const { 240 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_XOR, &bits, remote_addr, 241 | rkey_, &old_value); 242 | } 243 | 244 | /** 245 | * @brief Atomically add to a value in the remote memory region 246 | * 247 | * @tparam T The type of the value to add, should be of 4 bytes or 8 bytes 248 | * long 249 | * @param remote_addr The remote address to add to 250 | * @param delta The value to add 251 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 252 | */ 253 | template 254 | rma_atomic_awaitable atomic_add(uint64_t remote_addr, 255 | T const &delta) const { 256 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_ADD, &delta, remote_addr, 257 | rkey_); 258 | } 259 | 260 | /** 261 | * @brief Atomically AND a value in the remote memory region 262 | * 263 | * @tparam T The type of the value to AND, should be of 4 bytes or 8 bytes 264 | * long 265 | * @param remote_addr The remote address to AND to 266 | * @param delta The other operand of the AND operation 267 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 268 | */ 269 | template 270 | rma_atomic_awaitable atomic_and(uint64_t remote_addr, 271 | T const &bits) const { 272 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_AND, &bits, remote_addr, 273 | rkey_); 274 | } 275 | 276 | /** 277 | * @brief Atomically OR to a value in the remote memory region 278 | * 279 | * @tparam T The type of the value to OR, should be of 4 bytes or 8 bytes long 280 | * @param remote_addr The remote address to OR to 281 | * @param delta The other operand of the OR operation 282 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 283 | */ 284 | template 285 | rma_atomic_awaitable atomic_or(uint64_t remote_addr, T const &bits) const { 286 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_OR, &bits, remote_addr, 287 | rkey_); 288 | } 289 | 290 | /** 291 | * @brief Atomically XOR a value to the remote memory region 292 | * 293 | * @tparam T The type of the value to XOR, should be of 4 bytes or 8 bytes 294 | * long 295 | * @param remote_addr The remote address to XOR to 296 | * @param delta The other operand of the XOR operation 297 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 298 | * The old value is placed in old_value 299 | */ 300 | template 301 | rma_atomic_awaitable atomic_xor(uint64_t remote_addr, 302 | T const &bits) const { 303 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_XOR, &bits, remote_addr, 304 | rkey_); 305 | } 306 | 307 | /** 308 | * @brief Atomically swap a value in the remote memory region 309 | * 310 | * @tparam T The type of the value to swap, should be of 4 bytes or 8 bytes 311 | * long 312 | * @param remote_addr The remote address to swap 313 | * @param new_value The new value to swap in 314 | * @param old_value A reference to a variable to store the old value 315 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 316 | * The old value is placed in old_value 317 | */ 318 | template 319 | rma_atomic_awaitable atomic_swap(uint64_t remote_addr, T const &new_value, 320 | T &old_value) const { 321 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_SWAP, &new_value, 322 | remote_addr, rkey_, &old_value); 323 | } 324 | 325 | /** 326 | * @brief Atomically compare and swap a value in the remote memory region 327 | * 328 | * @tparam T The type of the value to swap, should be of 4 bytes or 8 bytes 329 | * long 330 | * @param raddr The remote address to swap 331 | * @param expected The expected value to compare against 332 | * @param desired_and_old A reference to a variable to store the desired new 333 | * value and the old value swapped out 334 | * @return rma_atomic_awaitable A coroutine that returns upon completion. 335 | * The old value is placed in desired_and_old 336 | */ 337 | template 338 | rma_atomic_awaitable atomic_compare_swap(uint64_t raddr, T const &expected, 339 | T &desired_and_old) const { 340 | return rma_atomic_awaitable(ep_, UCP_ATOMIC_OP_CSWAP, &expected, raddr, 341 | rkey_, &desired_and_old); 342 | } 343 | 344 | /** 345 | * @brief Destroy the remote memory handle object and the associated rkey 346 | * handle 347 | * 348 | */ 349 | ~remote_memory_handle(); 350 | }; 351 | 352 | } // namespace ucxpp --------------------------------------------------------------------------------