├── Makefile.am ├── autogen.sh ├── src ├── team_lib │ ├── ucx │ │ ├── allgather │ │ │ ├── allgather.h │ │ │ └── allgather_ring.c │ │ ├── alltoallv │ │ │ └── alltoallv.h │ │ ├── fanout │ │ │ ├── fanout.h │ │ │ └── fanout_linear.c │ │ ├── fanin │ │ │ ├── fanin.h │ │ │ └── fanin_linear.c │ │ ├── reduce │ │ │ ├── reduce.h │ │ │ └── reduce_linear.c │ │ ├── barrier │ │ │ └── barrier.h │ │ ├── bcast │ │ │ ├── bcast.h │ │ │ ├── bcast_linear.c │ │ │ └── bcast_knomial.c │ │ ├── alltoall │ │ │ ├── alltoall.h │ │ │ └── alltoall_linear_shift.c │ │ ├── allreduce │ │ │ ├── allreduce.c │ │ │ ├── allreduce.h │ │ │ └── allreduce_knomial.h │ │ ├── xccl_ucx_team.h │ │ ├── Makefile.am │ │ ├── xccl_ucx_ep.h │ │ ├── xccl_ucx_tag.h │ │ ├── xccl_ucx_context.h │ │ └── xccl_ucx_team.c │ ├── multirail │ │ ├── Makefile.am │ │ └── xccl_mrail_lib.h │ ├── hmc │ │ ├── Makefile.am │ │ └── xccl_hmc_lib.h │ ├── nccl │ │ ├── Makefile.am │ │ ├── xccl_nccl_collective.h │ │ └── xccl_nccl_lib.h │ ├── sharp │ │ ├── Makefile.am │ │ ├── xccl_sharp_collective.h │ │ ├── xccl_sharp_map.h │ │ └── xccl_sharp_lib.h │ ├── mpod │ │ ├── Makefile.am │ │ ├── xccl_mpod_allreduce.c │ │ ├── xccl_mpod_barrier.c │ │ ├── xccl_mpod_alltoallv.c │ │ ├── xccl_mpod_cpu.c │ │ ├── xccl_mpod_nccl_reqs.c │ │ ├── xccl_mpod_bcast.c │ │ └── xccl_mpod_allgather.c │ └── hier │ │ ├── Makefile.am │ │ ├── xccl_hier_sbgp.h │ │ ├── xccl_hier_context.h │ │ ├── xccl_hier_task_schedule.h │ │ ├── xccl_hier_lib.h │ │ └── xccl_hier_team.h ├── core │ ├── xccl_collective.h │ ├── xccl_lib.h │ ├── xccl_context.h │ ├── xccl_team_lib.c │ ├── xccl_tasks_queue.h │ ├── xccl_mm.h │ ├── xccl_global_opts.h │ ├── xccl_progress_queue.h │ ├── xccl_finalize.c │ ├── xccl_lock_free_tasks_queue.h │ ├── xccl_team.h │ ├── xccl_global_opts.c │ ├── xccl_progress_queue.c │ ├── xccl_tasks_queue.c │ ├── xccl_query.c │ ├── xccl_schedule.c │ ├── xccl_schedule.h │ ├── xccl_mm.c │ ├── xccl_collective.c │ ├── xccl_ucs.h │ ├── xccl_lib.c │ ├── xccl_team.c │ └── xccl_lock_free_tasks_queue.c ├── utils │ ├── cuda │ │ ├── kernels │ │ │ ├── xccl_cuda_kernel.cu │ │ │ └── Makefile.am │ │ ├── Makefile.am │ │ └── cuda_mem_component.h │ ├── xccl_log.h │ ├── utils.h │ └── mem_component.h ├── api │ ├── xccl_version.h │ ├── xccl_status.h │ ├── xccl_def.h │ └── xccl_tls.h └── Makefile.am ├── .gitignore ├── test ├── test_mpi.h ├── test.c ├── test_utils.h ├── test_mpi_barrier.c ├── test_mpi_create_team_nb.c ├── Makefile.am ├── test_mpi_allreduce.c ├── test_mpi_reduce.c ├── test_mpi_cuda_allreduce.c ├── test_mpi_allgather.c ├── test_mpi_cuda_reduce.c ├── test_mpi_bcast.c ├── test_mpi_fanout_get.c ├── test_mpi_alltoall.c └── test_mt.c ├── cudalt.py ├── m4 ├── hmc.m4 ├── nccl.m4 ├── sharp.m4 └── ucx.m4 ├── LICENSE └── .github └── workflows └── main.yml /Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2020 Mellanox Technologies. All rights reserved. 3 | # $HEADER$ 4 | # 5 | SUBDIRS = src 6 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | rm -rf autom4te.cache 3 | mkdir -p config/m4 config/aux 4 | autoreconf -f --install || exit 1 5 | rm -rf autom4te.cache 6 | exit 0 7 | -------------------------------------------------------------------------------- /src/team_lib/ucx/allgather/allgather.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLGATHER_H_ 2 | #define ALLGATHER_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_allgather_ring_start(xccl_ucx_collreq_t *req); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/team_lib/ucx/alltoallv/alltoallv.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLTOALLV_H_ 2 | #define ALLTOALLV_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_alltoallv_pairwise_start(xccl_ucx_collreq_t *req); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/team_lib/ucx/fanout/fanout.h: -------------------------------------------------------------------------------- 1 | #ifndef TEAM_UCX_FANOUT_H_ 2 | #define TEAM_UCX_FANOUT_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_fanout_linear_start(xccl_ucx_collreq_t *req); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/team_lib/ucx/fanin/fanin.h: -------------------------------------------------------------------------------- 1 | #ifndef FANIN_H_ 2 | #define FANIN_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_fanin_linear_start(xccl_ucx_collreq_t *req); 6 | xccl_status_t xccl_ucx_fanin_linear_progress(xccl_ucx_collreq_t *req); 7 | #endif 8 | -------------------------------------------------------------------------------- /src/team_lib/ucx/reduce/reduce.h: -------------------------------------------------------------------------------- 1 | #ifndef REDUCE_H_ 2 | #define REDUCE_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_reduce_linear_start(xccl_ucx_collreq_t *req); 6 | xccl_status_t xccl_ucx_reduce_knomial_start(xccl_ucx_collreq_t *req); 7 | #endif 8 | -------------------------------------------------------------------------------- /src/team_lib/ucx/barrier/barrier.h: -------------------------------------------------------------------------------- 1 | #ifndef BARRIER_H_ 2 | #define BARRIER_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_barrier_knomial_start(xccl_ucx_collreq_t *req); 6 | xccl_status_t xccl_ucx_barrier_knomial_progress(xccl_ucx_collreq_t *req); 7 | #endif 8 | -------------------------------------------------------------------------------- /src/team_lib/ucx/bcast/bcast.h: -------------------------------------------------------------------------------- 1 | #ifndef TEAM_UCX_BCAST_H_ 2 | #define TEAM_UCX_BCAST_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_bcast_linear_start(xccl_ucx_collreq_t *req); 6 | xccl_status_t xccl_ucx_bcast_knomial_start(xccl_ucx_collreq_t *req); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/team_lib/ucx/alltoall/alltoall.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLTOALL_H_ 2 | #define ALLTOALL_H_ 3 | #include "../xccl_ucx_lib.h" 4 | 5 | xccl_status_t xccl_ucx_alltoall_pairwise_start(xccl_ucx_collreq_t *req); 6 | 7 | xccl_status_t xccl_ucx_alltoall_linear_shift_start(xccl_ucx_collreq_t *req); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.lo 3 | *.la 4 | Makefile.in 5 | config.* 6 | m4/ 7 | *.a 8 | aclocal.m4 9 | configure 10 | depcomp 11 | install-sh 12 | libtool 13 | ltmain.sh 14 | autom4te.cache/ 15 | libltdl/ 16 | *.out 17 | missing 18 | .vscode 19 | compile 20 | build* 21 | *.code-workspace 22 | config 23 | 24 | -------------------------------------------------------------------------------- /src/core/xccl_collective.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_COLLECTIVE_H_ 7 | #define XCCL_COLLECTIVE_H_ 8 | 9 | #include 10 | #include 11 | 12 | typedef struct xccl_coll_req { 13 | xccl_tl_coll_req_t *req; 14 | } xccl_coll_req_t; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/team_lib/ucx/allreduce/allreduce.c: -------------------------------------------------------------------------------- 1 | #include "allreduce.h" 2 | 3 | const char* xccl_allreduce_alg_names[] = { 4 | [XCCL_UCX_ALLREDUCE_ALG_KNOMIAL] = "knomial", 5 | [XCCL_UCX_ALLREDUCE_ALG_SRA] = "sra", 6 | [XCCL_UCX_ALLREDUCE_ALG_AUTO] = "auto", 7 | }; 8 | 9 | const xccl_ucx_coll_start_fn_p xccl_ucx_allreduce_start[] = { 10 | [XCCL_UCX_ALLREDUCE_ALG_KNOMIAL] = xccl_ucx_allreduce_knomial_start, 11 | [XCCL_UCX_ALLREDUCE_ALG_SRA] = xccl_ucx_allreduce_sra_start 12 | }; 13 | -------------------------------------------------------------------------------- /src/utils/cuda/kernels/xccl_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | __global__ void dummy_kernel(volatile int *stop) { 5 | int should_stop; 6 | do { 7 | should_stop = *stop; 8 | } while(!should_stop); 9 | return; 10 | } 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | cudaError_t xccl_cuda_dummy_kernel(int *stop, cudaStream_t stream) 17 | { 18 | dummy_kernel<<<1, 1, 0, stream>>>(stop); 19 | return cudaGetLastError(); 20 | } 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /src/core/xccl_lib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_LIB_H_ 7 | #define XCCL_LIB_H_ 8 | 9 | #include "config.h" 10 | #include 11 | 12 | typedef struct xccl_lib_config { 13 | const char *tls; 14 | } xccl_lib_config_t; 15 | 16 | typedef struct xccl_team_lib xccl_team_lib_t; 17 | typedef struct xccl_lib { 18 | int n_libs_opened; 19 | int libs_array_size; 20 | xccl_team_lib_t **libs; 21 | } xccl_lib_t; 22 | 23 | extern xccl_lib_t xccl_static_lib; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/api/xccl_version.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | 7 | /** 8 | * Construct a XCCL version identifier from major and minor version numbers. 9 | */ 10 | #define XCCL_VERSION(_major, _minor) \ 11 | (((_major) << XCCL_VERSION_MAJOR_SHIFT) | \ 12 | ((_minor) << XCCL_VERSION_MINOR_SHIFT)) 13 | #define XCCL_VERSION_MAJOR_SHIFT 24 14 | #define XCCL_VERSION_MINOR_SHIFT 16 15 | 16 | 17 | /** 18 | * XCCL API version is 1.0 19 | */ 20 | #define XCCL_API_MAJOR 1 21 | #define XCCL_API_MINOR 0 22 | #define XCCL_API_VERSION XCCL_VERSION(1, 0) 23 | -------------------------------------------------------------------------------- /src/team_lib/multirail/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | if HAVE_TEAM_MRAIL 11 | sources = \ 12 | xccl_mrail_lib.h \ 13 | xccl_mrail_lib.c 14 | 15 | component_noinst = 16 | component_install = xccl_team_lib_mrail.la 17 | 18 | xccl_team_lib_mrail_la_SOURCES =$(sources) 19 | xccl_team_lib_mrail_la_CPPFLAGS = $(AM_CPPFLAGS) 20 | xccl_team_lib_mrail_la_LDFLAGS = -module -avoid-version 21 | xccl_team_lib_mrail_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 22 | pkglib_LTLIBRARIES = $(component_install) 23 | endif 24 | -------------------------------------------------------------------------------- /src/team_lib/hmc/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | if HAVE_HMC 11 | sources = \ 12 | xccl_hmc_lib.c \ 13 | xccl_hmc_lib.h 14 | 15 | component_noinst = 16 | component_install = xccl_team_lib_hmc.la 17 | 18 | xccl_team_lib_hmc_la_SOURCES =$(sources) 19 | xccl_team_lib_hmc_la_CPPFLAGS = $(AM_CPPFLAGS) $(HMC_CPPFLAGS) 20 | xccl_team_lib_hmc_la_LDFLAGS = -module -avoid-version $(HMC_LDFLAGS) 21 | xccl_team_lib_hmc_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 22 | pkglib_LTLIBRARIES = $(component_install) 23 | endif 24 | -------------------------------------------------------------------------------- /src/core/xccl_context.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_CONTEXT_H_ 7 | #define XCCL_CONTEXT_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | typedef struct xccl_context { 14 | xccl_lib_t *lib; 15 | xccl_context_params_t params; 16 | xccl_tl_context_t **tl_ctx; 17 | int n_tl_ctx; 18 | } xccl_context_t; 19 | 20 | typedef struct xccl_context_config { 21 | xccl_lib_t *lib; 22 | xccl_tl_context_config_t **configs; 23 | int n_tl_cfg; 24 | }xccl_context_config_t; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/core/xccl_team_lib.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | ucs_config_field_t xccl_team_lib_config_table[] = { 4 | {"LOG_LEVEL", "warn", 5 | "XCCL logging level. Messages with a level higher or equal to the selected " 6 | "will be printed.\n" 7 | "Possible values are: fatal, error, warn, info, debug, trace, data, func, poll.", 8 | ucs_offsetof(xccl_team_lib_config_t, log_component), 9 | UCS_CONFIG_TYPE_LOG_COMP}, 10 | 11 | {"PRIORITY", "-1", 12 | "XCCL team lib priority.\n" 13 | "Possible values are: [1,inf]", 14 | ucs_offsetof(xccl_team_lib_config_t, priority), 15 | UCS_CONFIG_TYPE_INT}, 16 | 17 | {NULL} 18 | }; 19 | 20 | 21 | ucs_config_field_t xccl_tl_context_config_table[] = { 22 | 23 | {NULL} 24 | }; 25 | -------------------------------------------------------------------------------- /src/utils/cuda/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | if HAVE_CUDA 11 | SUBDIRS = kernels 12 | 13 | sources = \ 14 | cuda_mem_component.c \ 15 | cuda_mem_component.h 16 | 17 | component_noinst = 18 | component_install = xccl_cuda_mem_component.la 19 | 20 | xccl_cuda_mem_component_la_SOURCES =$(sources) 21 | xccl_cuda_mem_component_la_CPPFLAGS = $(AM_CPPFLAGS) $(CUDA_CPPFLAGS) 22 | xccl_cuda_mem_component_la_LDFLAGS = -module -avoid-version $(CUDA_LDFLAGS) 23 | xccl_cuda_mem_component_la_LIBADD = kernels/libxccl_cuda_kernels.la 24 | pkglib_LTLIBRARIES = $(component_install) 25 | endif 26 | -------------------------------------------------------------------------------- /src/team_lib/nccl/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | if HAVE_NCCL 11 | sources = \ 12 | xccl_nccl_lib.c \ 13 | xccl_nccl_collective.c \ 14 | xccl_nccl_lib.h 15 | 16 | component_noinst = 17 | component_install = xccl_team_lib_nccl.la 18 | 19 | xccl_team_lib_nccl_la_SOURCES =$(sources) 20 | xccl_team_lib_nccl_la_CPPFLAGS = $(AM_CPPFLAGS) $(CUDA_CPPFLAGS) $(NCCL_CPPFLAGS) $(CPPFLAGS) 21 | xccl_team_lib_nccl_la_LDFLAGS = -module -avoid-version $(CUDA_LDFLAGS) $(NCCL_LDFLAGS) 22 | xccl_team_lib_nccl_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 23 | 24 | pkglib_LTLIBRARIES = $(component_install) 25 | endif 26 | -------------------------------------------------------------------------------- /src/team_lib/sharp/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | if HAVE_SHARP 11 | sources = \ 12 | xccl_sharp_lib.c \ 13 | xccl_sharp_lib.h \ 14 | xccl_sharp_map.h \ 15 | xccl_sharp_collective.c \ 16 | xccl_sharp_collective.h 17 | 18 | component_noinst = 19 | component_install = xccl_team_lib_sharp.la 20 | 21 | xccl_team_lib_sharp_la_SOURCES =$(sources) 22 | xccl_team_lib_sharp_la_CPPFLAGS = $(AM_CPPFLAGS) $(SHARP_CPPFLAGS) 23 | xccl_team_lib_sharp_la_LDFLAGS = -module -avoid-version $(SHARP_LDFLAGS) 24 | xccl_team_lib_sharp_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 25 | 26 | pkglib_LTLIBRARIES = $(component_install) 27 | endif 28 | -------------------------------------------------------------------------------- /src/core/xccl_tasks_queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef TASKS_QUEUE_H_ 7 | #define TASKS_QUEUE_H_ 8 | 9 | #include 10 | #include 11 | #include "xccl_schedule.h" 12 | #include "xccl_progress_queue.h" 13 | 14 | typedef struct xccl_tasks_queue { 15 | ucs_list_link_t list; 16 | } xccl_tasks_queue_t; 17 | 18 | xccl_status_t tasks_queue_init(xccl_progress_queue_t *handle); 19 | 20 | xccl_status_t tasks_queue_insert(xccl_progress_queue_t *handle, xccl_coll_task_t *task); 21 | 22 | xccl_status_t tasks_queue_progress(xccl_progress_queue_t *handle); 23 | 24 | xccl_status_t tasks_queue_destroy(xccl_progress_queue_t *handle); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/team_lib/ucx/allreduce/allreduce.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLREDUCE_H_ 2 | #define ALLREDUCE_H_ 3 | 4 | #include "../xccl_ucx_lib.h" 5 | 6 | typedef enum { 7 | XCCL_UCX_ALLREDUCE_ALG_KNOMIAL, 8 | XCCL_UCX_ALLREDUCE_ALG_SRA, 9 | XCCL_UCX_ALLREDUCE_ALG_AUTO, 10 | XCCL_UCX_ALLREDUCE_ALG_LAST 11 | } xccl_ucx_allreduce_alg_t; 12 | 13 | extern const xccl_ucx_coll_start_fn_p xccl_ucx_allreduce_start[]; 14 | extern const char* xccl_allreduce_alg_names[]; 15 | 16 | xccl_status_t xccl_ucx_allreduce_knomial_start(xccl_ucx_collreq_t *req); 17 | xccl_status_t xccl_ucx_allreduce_knomial_progress(xccl_ucx_collreq_t *req); 18 | 19 | xccl_status_t xccl_ucx_allreduce_sra_start(xccl_ucx_collreq_t *req); 20 | xccl_status_t xccl_ucx_allreduce_sra_progress(xccl_ucx_collreq_t *req); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/core/xccl_mm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_MM_H_ 7 | #define XCCL_MM_H_ 8 | 9 | #include 10 | #include 11 | 12 | typedef struct xccl_tl_mem_handle { 13 | xccl_tl_id_t id; 14 | } xccl_tl_mem_handle_t; 15 | 16 | typedef struct xccl_mem_handle { 17 | xccl_team_t *team; 18 | xccl_tl_mem_h handles[1]; 19 | } xccl_mem_handle_t; 20 | 21 | static inline xccl_tl_mem_h xccl_mem_handle_by_tl_id(xccl_mem_h memh, xccl_tl_id_t id) 22 | { 23 | int i; 24 | 25 | for (i=0; iteam->n_teams; i++) { 26 | if (memh->handles[i]->id == id) { 27 | return memh->handles[i]; 28 | } 29 | } 30 | 31 | return NULL; 32 | } 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/team_lib/mpod/Makefile.am: -------------------------------------------------------------------------------- 1 | if HAVE_MPOD 2 | sources = \ 3 | uthash.h \ 4 | xccl_mpod_lib.h \ 5 | xccl_mpod_lib.c \ 6 | xccl_mpod_nccl_reqs.c \ 7 | xccl_mpod_barrier.c \ 8 | xccl_mpod_cpu.c \ 9 | xccl_mpod_allreduce.c \ 10 | xccl_mpod_allreduce_split.c \ 11 | xccl_mpod_allreduce_coalesce.c \ 12 | xccl_mpod_allreduce_replicate.c \ 13 | xccl_mpod_bcast.c \ 14 | xccl_mpod_allgather.c \ 15 | xccl_mpod_alltoall.c \ 16 | xccl_mpod_alltoallv.c 17 | 18 | component_noinst = 19 | component_install = xccl_team_lib_mpod.la 20 | 21 | xccl_team_lib_mpod_la_SOURCES = $(sources) 22 | xccl_team_lib_mpod_la_CPPFLAGS = $(AM_CPPFLAGS) $(CPPFLAGS) -I../../../external 23 | xccl_team_lib_mpod_la_LDFLAGS = -module -avoid-version 24 | xccl_team_lib_mpod_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 25 | 26 | pkglib_LTLIBRARIES = $(component_install) 27 | endif 28 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_allreduce.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | xccl_status_t xccl_mpod_allreduce_init(xccl_mpod_coll_req_t *req) 4 | { 5 | xccl_status_t status = XCCL_OK; 6 | 7 | char *str = getenv("XCCL_MPOD_ALLREDUCE_ALGORITHM"); 8 | if (str == NULL) { 9 | str = "replicate"; 10 | } 11 | 12 | if (!strcmp(str, "replicate")) { 13 | status = xccl_mpod_allreduce_init_replicate(req); 14 | xccl_mpod_err_pop(status, fn_fail); 15 | } else if (!strcmp(str, "split")) { 16 | status = xccl_mpod_allreduce_init_split(req); 17 | xccl_mpod_err_pop(status, fn_fail); 18 | } else { 19 | status = xccl_mpod_allreduce_init_coalesce(req); 20 | xccl_mpod_err_pop(status, fn_fail); 21 | } 22 | 23 | fn_exit: 24 | return status; 25 | fn_fail: 26 | goto fn_exit; 27 | } 28 | -------------------------------------------------------------------------------- /src/team_lib/ucx/xccl_ucx_team.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | #ifndef XCCL_UCX_TEAM_H_ 6 | #define XCCL_UCX_TEAM_H_ 7 | #include "xccl_ucx_lib.h" 8 | 9 | typedef struct xccl_ucx_team_t { 10 | xccl_tl_team_t super; 11 | uint16_t ctx_id; 12 | uint16_t seq_num; 13 | int max_addrlen; 14 | ucp_ep_h *ucp_eps; 15 | xccl_ep_range_t range; 16 | void *nb_create_req; 17 | } xccl_ucx_team_t; 18 | 19 | xccl_status_t xccl_ucx_team_create_post(xccl_tl_context_t *context, 20 | xccl_team_params_t *params, 21 | xccl_tl_team_t **team); 22 | xccl_status_t xccl_ucx_team_create_test(xccl_tl_team_t *team); 23 | xccl_status_t xccl_ucx_team_destroy(xccl_tl_team_t *team); 24 | #endif 25 | -------------------------------------------------------------------------------- /test/test_mpi.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_MPI_H 2 | #define TEST_MPI_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define STR(x) # x 12 | #define XCCL_CHECK(_call) if (XCCL_OK != (_call)) { \ 13 | fprintf(stderr, "*** XCCL TEST FAIL: %s\n", STR(_call)); \ 14 | MPI_Abort(MPI_COMM_WORLD, -1); \ 15 | } 16 | 17 | extern xccl_team_h xccl_world_team; 18 | 19 | extern xccl_context_h team_ctx; 20 | 21 | int xccl_mpi_test_init(int argc, char **argv, 22 | xccl_collective_cap_t coll_types, unsigned thread_mode); 23 | 24 | void xccl_mpi_test_finalize(void); 25 | 26 | int xccl_mpi_create_comm_nb(MPI_Comm comm, xccl_team_h *team); 27 | 28 | int xccl_mpi_create_comm(MPI_Comm comm, xccl_team_h *team); 29 | 30 | void xccl_mpi_test_progress(void); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/team_lib/hier/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | sources = \ 11 | xccl_hier_lib.h \ 12 | xccl_hier_lib.c \ 13 | xccl_hier_context.h \ 14 | xccl_hier_context.c \ 15 | xccl_hier_team.h \ 16 | xccl_hier_team.c \ 17 | xccl_hier_sbgp.h \ 18 | xccl_hier_sbgp.c \ 19 | xccl_hier_schedule.h \ 20 | xccl_hier_schedule.c \ 21 | xccl_hier_schedule_allreduce.c \ 22 | xccl_hier_schedule_alltoall.c \ 23 | xccl_hier_schedule_barrier.c \ 24 | xccl_hier_schedule_bcast.c 25 | 26 | component_noinst = 27 | component_install = xccl_team_lib_hier.la 28 | 29 | xccl_team_lib_hier_la_SOURCES =$(sources) 30 | xccl_team_lib_hier_la_CPPFLAGS = $(AM_CPPFLAGS) 31 | xccl_team_lib_hier_la_LDFLAGS = -module -avoid-version 32 | xccl_team_lib_hier_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 33 | pkglib_LTLIBRARIES = $(component_install) 34 | 35 | -------------------------------------------------------------------------------- /src/core/xccl_global_opts.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_GLOBAL_OPTS_H_ 7 | #define XCCL_GLOBAL_OPTS_H_ 8 | 9 | #include "config.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | typedef struct xccl_config { 19 | /* Log level above which log messages will be printed*/ 20 | ucs_log_component_config_t log_component; 21 | 22 | /* Team libraries path */ 23 | char *team_lib_path; 24 | 25 | /* Size of internal memory component cache */ 26 | size_t mem_component_cache_size; 27 | 28 | /* Maximum number of concurrent active tasks without slowing down performance */ 29 | int lf_tasks_queue_size; 30 | } xccl_config_t; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /test/test.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include 7 | #include 8 | 9 | int main(int argc, char **argv) { 10 | 11 | xccl_lib_config_t lib_config = { 12 | .field_mask = XCCL_LIB_CONFIG_FIELD_TEAM_USAGE, 13 | .team_usage = XCCL_USAGE_SW_COLLECTIVES | 14 | XCCL_USAGE_HW_COLLECTIVES, 15 | }; 16 | xccl_lib_h lib; 17 | xccl_lib_init(lib_config, &lib); 18 | 19 | xccl_context_params_t team_ctx_params = { 20 | .field_mask = XCCL_CONTEXT_CONFIG_FIELD_TEAM_LIB_NAME | 21 | XCCL_CONTEXT_CONFIG_FIELD_THREAD_MODE | 22 | XCCL_CONTEXT_CONFIG_FIELD_COMPLETION_TYPE, 23 | .team_lib_name = "ucx", 24 | .thread_mode = XCCL_LIB_THREAD_SINGLE, 25 | .completion_type = XCCL_TEAM_COMPLETION_BLOCKING, 26 | }; 27 | xccl_context_h team_ctx; 28 | xccl_context_create(lib, team_ctx_params, &team_ctx); 29 | return 0; 30 | } 31 | -------------------------------------------------------------------------------- /src/api/xccl_status.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #ifndef XCCL_STATUS_H_ 8 | #define XCCL_STATUS_H_ 9 | 10 | typedef enum { 11 | /* Operation completed successfully */ 12 | XCCL_OK = 0, 13 | 14 | /* Operation is queued and still in progress */ 15 | XCCL_INPROGRESS = 1, 16 | 17 | /* Failure codes */ 18 | XCCL_ERR_NO_MESSAGE = -1, 19 | XCCL_ERR_NO_RESOURCE = -2, 20 | XCCL_ERR_NO_MEMORY = -4, 21 | XCCL_ERR_INVALID_PARAM = -5, 22 | XCCL_ERR_UNREACHABLE = -6, 23 | XCCL_ERR_NOT_IMPLEMENTED = -8, 24 | XCCL_ERR_MESSAGE_TRUNCATED = -9, 25 | XCCL_ERR_NO_PROGRESS = -10, 26 | XCCL_ERR_BUFFER_TOO_SMALL = -11, 27 | XCCL_ERR_NO_ELEM = -12, 28 | XCCL_ERR_UNSUPPORTED = -22, 29 | XCCL_ERR_LAST = -100 30 | } xccl_status_t; 31 | #endif 32 | -------------------------------------------------------------------------------- /src/core/xccl_progress_queue.h: -------------------------------------------------------------------------------- 1 | #ifndef XCCL_PROGRESS_QUEUE 2 | #define XCCL_PROGRESS_QUEUE 3 | #include 4 | #include 5 | 6 | typedef struct xccl_tl_context xccl_tl_context_t; 7 | typedef struct xccl_coll_task xccl_coll_task_t; 8 | typedef struct xccl_progress_queue xccl_progress_queue_t; 9 | 10 | typedef struct progress_queue_api{ 11 | xccl_status_t (*progress_queue_enqueue)(xccl_progress_queue_t*, xccl_coll_task_t*); 12 | xccl_status_t (*progress_queue_progress_tasks)(xccl_progress_queue_t*); 13 | xccl_status_t (*progress_queue_destroy)(xccl_progress_queue_t*); 14 | } progress_queue_api_t; 15 | 16 | struct xccl_progress_queue { 17 | void* ctx; 18 | progress_queue_api_t api; 19 | }; 20 | 21 | xccl_status_t xccl_ctx_progress_queue_init(xccl_progress_queue_t **q, unsigned thread_mode); 22 | xccl_status_t xccl_task_enqueue(xccl_progress_queue_t *q, xccl_coll_task_t *task); 23 | xccl_status_t xccl_ctx_progress_queue(xccl_tl_context_t *tl_ctx); 24 | xccl_status_t xccl_ctx_progress_queue_destroy(xccl_progress_queue_t *q); 25 | #endif 26 | -------------------------------------------------------------------------------- /src/team_lib/hmc/xccl_hmc_lib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | #ifndef XCCL_TEAM_LIB_HMC_H_ 6 | #define XCCL_TEAM_LIB_HMC_H_ 7 | #include "xccl_team_lib.h" 8 | #include 9 | 10 | typedef struct xccl_tl_hmc_context_config { 11 | xccl_tl_context_config_t super; 12 | ucs_config_names_array_t devices; 13 | } xccl_tl_hmc_context_config_t; 14 | 15 | typedef struct xccl_team_lib_hmc { 16 | xccl_team_lib_t super; 17 | } xccl_team_lib_hmc_t; 18 | extern xccl_team_lib_hmc_t xccl_team_lib_hmc; 19 | 20 | typedef struct xccl_hmc_context { 21 | xccl_tl_context_t super; 22 | hmc_ctx_h hmc_ctx; 23 | } xccl_hmc_context_t; 24 | 25 | typedef struct xccl_hmc_team { 26 | xccl_tl_team_t super; 27 | hmc_comm_h hmc_comm; 28 | } xccl_hmc_team_t; 29 | 30 | typedef struct xccl_hmc_coll_req { 31 | xccl_tl_coll_req_t super; 32 | xccl_hmc_team_t *team; 33 | void *handle; 34 | void *buf; 35 | size_t len; 36 | int root; 37 | } xccl_hmc_coll_req_t; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/core/xccl_finalize.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #include "config.h" 8 | #include 9 | #include "xccl_team_lib.h" 10 | #include "xccl_global_opts.h" 11 | #include "utils/mem_component.h" 12 | #include 13 | #include 14 | #include 15 | 16 | extern xccl_config_t xccl_lib_global_config; 17 | extern ucs_config_field_t xccl_lib_global_config_table[]; 18 | 19 | xccl_status_t xccl_team_lib_finalize(xccl_team_lib_h lib) 20 | { 21 | if (lib->team_lib_close) { 22 | lib->team_lib_close(lib); 23 | } 24 | dlclose(lib->dl_handle); 25 | return XCCL_OK; 26 | } 27 | 28 | __attribute__((destructor)) 29 | static void xccl_destructor(void) 30 | { 31 | int i; 32 | 33 | ucs_config_parser_release_opts(&xccl_lib_global_config, xccl_lib_global_config_table); 34 | xccl_mem_component_finalize(); 35 | 36 | for (i=0; i 2 | 3 | typedef enum test_mem_type { 4 | TEST_MEM_TYPE_HOST = 0, 5 | TEST_MEM_TYPE_CUDA = 1 6 | } test_mem_type_t; 7 | 8 | typedef enum test_memcpy_kind { 9 | TEST_MEMCPY_H2H, 10 | TEST_MEMCPY_H2D, 11 | TEST_MEMCPY_D2H, 12 | TEST_MEMCPY_D2D 13 | } test_memcpy_kind_t; 14 | 15 | void test_print_header(xccl_collective_type_t coll_type, 16 | test_mem_type_t mtype, 17 | int test_count_start, 18 | int test_count_finish); 19 | 20 | xccl_status_t test_xccl_set_device(test_mem_type_t mtype); 21 | 22 | xccl_status_t test_xccl_mem_alloc(void **ptr, size_t size, test_mem_type_t mtype); 23 | 24 | xccl_status_t test_xccl_mem_free(void *ptr, test_mem_type_t mtype); 25 | 26 | xccl_status_t test_xccl_memcpy(void *dst, void *src, size_t size, test_memcpy_kind_t kind); 27 | 28 | xccl_status_t test_xccl_memset(void *ptr, int value, size_t size, test_mem_type_t mtype); 29 | 30 | xccl_status_t test_xccl_memcmp(void *ptr1, test_mem_type_t ptr1_mtype, 31 | void *ptr2, test_mem_type_t ptr2_mtype, 32 | size_t size, int *result); 33 | -------------------------------------------------------------------------------- /src/core/xccl_lock_free_tasks_queue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef LF_TASKS_QUEUE_H_ 7 | #define LF_TASKS_QUEUE_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "xccl_global_opts.h" 14 | #include "xccl_schedule.h" 15 | #include "xccl_progress_queue.h" 16 | 17 | 18 | #define LINE_SIZE 8 19 | #define NUM_POOLS 2 20 | 21 | extern xccl_config_t xccl_lib_global_config; 22 | 23 | typedef struct xccl_lf_tasks_queue { 24 | ucs_spinlock_t locked_queue_lock; 25 | xccl_coll_task_t*** tasks; 26 | uint32_t which_pool; 27 | ucs_list_link_t locked_queue; 28 | uint32_t tasks_countrs[2]; 29 | } xccl_lf_tasks_queue_t; 30 | 31 | xccl_status_t lf_tasks_queue_init(xccl_progress_queue_t *handle); 32 | 33 | xccl_status_t lf_tasks_queue_insert(xccl_progress_queue_t *handle, xccl_coll_task_t *task); 34 | 35 | xccl_status_t lf_tasks_queue_progress(xccl_progress_queue_t *handle); 36 | 37 | xccl_status_t lf_tasks_queue_destroy(xccl_progress_queue_t *handle); 38 | 39 | #endif -------------------------------------------------------------------------------- /src/utils/cuda/cuda_mem_component.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #ifndef XCCL_CUDA_MEM_COMPONENT_H_ 8 | #define XCCL_CUDA_MEM_COMPONENT_H_ 9 | 10 | #include 11 | #include 12 | 13 | typedef struct xccl_cuda_mem_component_stream_request { 14 | xccl_mem_component_stream_request_t super; 15 | int is_free; 16 | int stop_request; 17 | void *dev_stop_request; 18 | cudaEvent_t event; 19 | } xccl_cuda_mem_component_stream_request_t; 20 | 21 | typedef struct xccl_cuda_mc_event { 22 | xccl_mc_event_t super; 23 | int is_free; 24 | cudaEvent_t cuda_event; 25 | } xccl_cuda_mc_event_t; 26 | 27 | typedef struct xccl_cuda_mem_component { 28 | xccl_mem_component_t super; 29 | cudaStream_t stream; 30 | xccl_cuda_mem_component_stream_request_t *stream_requests; 31 | xccl_cuda_mc_event_t *events; 32 | } xccl_cuda_mem_component_t; 33 | 34 | #endif -------------------------------------------------------------------------------- /src/utils/cuda/kernels/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | NVCC = nvcc 11 | NVCCFLAGS = "${UCS_CPPFLAGS} -I${XCCL_TOP_SRCDIR}/src -I${XCCL_TOP_SRCDIR}/src/core" --compiler-options -fno-rtti,-fno-exceptions 12 | NV_ARCH_FLAGS = -arch=sm_50 \ 13 | -gencode=arch=compute_37,code=sm_37 \ 14 | -gencode=arch=compute_50,code=sm_50 \ 15 | -gencode=arch=compute_52,code=sm_52 \ 16 | -gencode=arch=compute_60,code=sm_60 \ 17 | -gencode=arch=compute_61,code=sm_61 \ 18 | -gencode=arch=compute_70,code=sm_70 \ 19 | -gencode=arch=compute_70,code=compute_70 20 | LINK = $(LIBTOOL) --mode=link $(CC) -o $@ 21 | 22 | .cu.o: 23 | $(NVCC) -c $< -o $@ $(NVCCFLAGS) $(NV_ARCH_FLAGS) 24 | 25 | .cu.lo: 26 | $(top_srcdir)/cudalt.py $@ $(NVCC) -c $< $(NVCCFLAGS) $(NV_ARCH_FLAGS) 27 | 28 | comp_noinst = libxccl_cuda_kernels.la 29 | 30 | libxccl_cuda_kernels_la_SOURCES = xccl_cuda_reduce.cu \ 31 | xccl_cuda_reduce_multi.cu \ 32 | xccl_cuda_kernel.cu 33 | libxccl_cuda_kernels_la_CPPFLAGS = 34 | 35 | noinst_LTLIBRARIES = $(comp_noinst) 36 | -------------------------------------------------------------------------------- /src/core/xccl_team.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_TEAM_H_ 7 | #define XCCL_TEAM_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define XCCL_CHECK_TEAM(_team) \ 15 | do { \ 16 | if (_team->status != XCCL_OK) { \ 17 | xccl_error("team %p is used before team_create is completed", _team); \ 18 | return XCCL_ERR_INVALID_PARAM; \ 19 | } \ 20 | } while(0) 21 | 22 | typedef struct xccl_team { 23 | xccl_context_t *ctx; 24 | int coll_team_id[XCCL_COLL_LAST][UCS_MEMORY_TYPE_LAST]; 25 | int n_teams; 26 | int last_team_create_posted; 27 | xccl_status_t status; 28 | xccl_team_params_t params; 29 | xccl_tl_team_t *tl_teams[1]; 30 | } xccl_team_t; 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/team_lib/hier/xccl_hier_sbgp.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #ifndef XCCL_HIER_SBGP_H_ 8 | #define XCCL_HIER_SBGP_H_ 9 | 10 | #include "xccl_team_lib.h" 11 | typedef enum sbgp_type_t { 12 | SBGP_UNDEF = 0, 13 | SBGP_NUMA, 14 | SBGP_SOCKET, 15 | SBGP_NODE, 16 | SBGP_NODE_LEADERS, 17 | SBGP_SOCKET_LEADERS, 18 | SBGP_NUMA_LEADERS, 19 | SBGP_FLAT, 20 | SBGP_LAST 21 | } sbgp_type_t; 22 | 23 | typedef enum sbgp_status_t { 24 | SBGP_DISABLED = 0, 25 | SBGP_ENABLED, 26 | SBGP_NOT_EXISTS, 27 | } sbgp_status_t; 28 | 29 | typedef struct xccl_hier_team xccl_hier_team_t; 30 | typedef struct sbgp_t { 31 | sbgp_type_t type; 32 | sbgp_status_t status; 33 | int group_size; 34 | int group_rank; 35 | int *rank_map; 36 | xccl_hier_team_t *hier_team; 37 | } sbgp_t; 38 | 39 | extern char* sbgp_type_str[SBGP_LAST]; 40 | xccl_status_t sbgp_create(xccl_hier_team_t *team, sbgp_type_t type); 41 | xccl_status_t sbgp_cleanup(sbgp_t *sbgp); 42 | 43 | static inline int sbgp_rank2team(sbgp_t *sbgp, int rank) 44 | { 45 | return sbgp->rank_map[rank]; 46 | } 47 | 48 | int xccl_hier_compare_proc_data(const void* a, const void* b); 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/team_lib/sharp/xccl_sharp_collective.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_TEAM_SHARP_COLLECTIVE_H_ 7 | #define XCCL_TEAM_SHARP_COLLECTIVE_H_ 8 | 9 | #include "xccl_sharp_lib.h" 10 | 11 | typedef struct xccl_sharp_coll_req { 12 | xccl_tl_coll_req_t super; 13 | xccl_sharp_team_t *team; 14 | struct sharp_coll_reduce_spec reduce_spec; 15 | struct sharp_coll_comm *sharp_comm; 16 | void *handle; 17 | xccl_sharp_buf_t *sharp_buf; 18 | xccl_collective_type_t coll_type; 19 | int (*start)(struct xccl_sharp_coll_req* req); 20 | xccl_sharp_rcache_region_t *src_rregion; 21 | xccl_sharp_rcache_region_t *dst_rregion; 22 | } xccl_sharp_coll_req_t; 23 | 24 | xccl_status_t xccl_sharp_collective_init(xccl_coll_op_args_t *coll_args, 25 | xccl_tl_coll_req_t **request, 26 | xccl_tl_team_t *team); 27 | xccl_status_t xccl_sharp_collective_post(xccl_tl_coll_req_t *request); 28 | xccl_status_t xccl_sharp_collective_wait(xccl_tl_coll_req_t *request); 29 | xccl_status_t xccl_sharp_collective_test(xccl_tl_coll_req_t *request); 30 | xccl_status_t xccl_sharp_collective_finalize(xccl_tl_coll_req_t *request); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /test/test_mpi_barrier.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #define _BSD_SOURCE 7 | #include "test_mpi.h" 8 | #include 9 | #include 10 | #include 11 | 12 | static inline void 13 | do_barrier(xccl_team_h team) { 14 | xccl_coll_req_h request; 15 | xccl_coll_op_args_t coll = { 16 | .field_mask = 0, 17 | .coll_type = XCCL_BARRIER, 18 | .alg.set_by_user = 0, 19 | .tag = 123, //todo 20 | }; 21 | XCCL_CHECK(xccl_collective_init(&coll, &request, team)); 22 | XCCL_CHECK(xccl_collective_post(request)); 23 | while (XCCL_OK != xccl_collective_test(request)) { 24 | xccl_context_progress(team_ctx); 25 | } 26 | XCCL_CHECK(xccl_collective_finalize(request)); 27 | } 28 | 29 | int main (int argc, char **argv) { 30 | int rank, size, i, sleep_us; 31 | 32 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_BARRIER, XCCL_THREAD_MODE_SINGLE)); 33 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 34 | MPI_Comm_size(MPI_COMM_WORLD, &size); 35 | 36 | srand(time(NULL)); 37 | for (i=0; i 2 | 3 | xccl_config_t xccl_lib_global_config = { 4 | .log_component = {UCS_LOG_LEVEL_WARN, "XCCL"}, 5 | .team_lib_path = "", 6 | .mem_component_cache_size = 4096, 7 | .lf_tasks_queue_size = 128 8 | }; 9 | 10 | ucs_config_field_t xccl_lib_global_config_table[] = { 11 | {"LOG_LEVEL", "warn", 12 | "XCCL logging level. Messages with a level higher or equal to the selected " 13 | "will be printed.\n" 14 | "Possible values are: fatal, error, warn, info, debug, trace, data, func, poll.", 15 | ucs_offsetof(xccl_config_t, log_component), 16 | UCS_CONFIG_TYPE_LOG_COMP}, 17 | 18 | {"TEAM_LIB_PATH", "", 19 | "Specifies team libraries location", 20 | ucs_offsetof(xccl_config_t, team_lib_path), 21 | UCS_CONFIG_TYPE_STRING}, 22 | 23 | {"MEM_COMPONENT_CACHE_SIZE", "4096", 24 | "Size of memory component preallocated buffer size", 25 | ucs_offsetof(xccl_config_t, mem_component_cache_size), 26 | UCS_CONFIG_TYPE_MEMUNITS}, 27 | 28 | {"LF_TASKS_QUEUE_SIZE", "128", 29 | "Maximum number of active tasks that will be simultaneously in the progress queue. Any number of tasks beyond this size will suffer from slower performance", 30 | ucs_offsetof(xccl_config_t, lf_tasks_queue_size), 31 | UCS_CONFIG_TYPE_INT}, 32 | 33 | NULL 34 | }; 35 | extern ucs_list_link_t ucs_config_global_list; 36 | UCS_CONFIG_REGISTER_TABLE(xccl_lib_global_config_table, "XCCL global", NULL, 37 | xccl_lib_global_config, &ucs_config_global_list) 38 | -------------------------------------------------------------------------------- /src/core/xccl_progress_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | xccl_status_t xccl_ctx_progress_queue_init(xccl_progress_queue_t **q, unsigned thread_mode) { 9 | xccl_progress_queue_t *pq = (xccl_progress_queue_t*) malloc(sizeof(xccl_progress_queue_t)); 10 | xccl_status_t status; 11 | switch (thread_mode) { 12 | case XCCL_THREAD_MODE_SINGLE: 13 | status = tasks_queue_init(pq); 14 | break; 15 | case XCCL_THREAD_MODE_MULTIPLE: 16 | status = lf_tasks_queue_init(pq); 17 | break; 18 | default: 19 | status = XCCL_ERR_INVALID_PARAM; 20 | break; 21 | } 22 | if (status != XCCL_OK) { 23 | return status; 24 | } 25 | *q = pq; 26 | return XCCL_OK; 27 | } 28 | 29 | xccl_status_t xccl_task_enqueue(xccl_progress_queue_t *q, 30 | xccl_coll_task_t *task) { 31 | task->was_progressed = 0; 32 | return (q->api.progress_queue_enqueue)(q,task); 33 | } 34 | 35 | xccl_status_t xccl_ctx_progress_queue(xccl_tl_context_t *tl_ctx) { 36 | return (tl_ctx->pq->api.progress_queue_progress_tasks)(tl_ctx->pq); 37 | } 38 | 39 | xccl_status_t xccl_ctx_progress_queue_destroy(xccl_progress_queue_t *q) { 40 | xccl_status_t status = (q->api.progress_queue_destroy)(q); 41 | if (status != XCCL_OK) { 42 | return status; 43 | } 44 | free(q); 45 | return XCCL_OK; 46 | } 47 | -------------------------------------------------------------------------------- /src/utils/xccl_log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_UTILS_LOG_H_ 7 | #define XCCL_UTILS_LOG_H_ 8 | 9 | #include "config.h" 10 | #include "xccl_global_opts.h" 11 | 12 | extern xccl_config_t xccl_lib_global_config; 13 | #define xccl_log_component(_level, _fmt, ...) \ 14 | do { \ 15 | ucs_log_component(_level, &xccl_lib_global_config.log_component, _fmt, ## __VA_ARGS__); \ 16 | } while (0) 17 | 18 | #define xccl_error(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_ERROR, _fmt, ## __VA_ARGS__) 19 | #define xccl_warn(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_WARN, _fmt, ## __VA_ARGS__) 20 | #define xccl_info(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_INFO, _fmt, ## __VA_ARGS__) 21 | #define xccl_debug(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_DEBUG, _fmt, ## __VA_ARGS__) 22 | #define xccl_trace(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_TRACE, _fmt, ## __VA_ARGS__) 23 | #define xccl_trace_req(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_TRACE_REQ, _fmt, ## __VA_ARGS__) 24 | #define xccl_trace_data(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_TRACE_DATA, _fmt, ## __VA_ARGS__) 25 | #define xccl_trace_async(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_TRACE_ASYNC, _fmt, ## __VA_ARGS__) 26 | #define xccl_trace_func(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_TRACE_FUNC, "%s(" _fmt ")", __FUNCTION__, ## __VA_ARGS__) 27 | #define xccl_trace_poll(_fmt, ...) xccl_log_component(UCS_LOG_LEVEL_TRACE_POLL, _fmt, ## __VA_ARGS__) 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/api/xccl_def.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #ifndef XCCL_DEF_H_ 8 | #define XCCL_DEF_H_ 9 | 10 | #include 11 | #include 12 | 13 | typedef struct xccl_lib* xccl_lib_h; 14 | 15 | /** 16 | * @ingroup XCCL_LIB_CONFIG 17 | * @brief XCCL configuration descriptor 18 | * 19 | * This descriptor defines the configuration for @ref xccl_lib_h 20 | * "XCCL team library". The configuration is loaded from the run-time 21 | * environment (using configuration files of environment variables) 22 | * using @ref xccl_lib_config_read "xccl_lib_config_read" routine and can be printed 23 | * using @ref xccl_lib_config_print "xccl_lib_config_print" routine. In addition, 24 | * application is responsible to release the descriptor using 25 | * @ref xccl_lib_config_release "xccl_lib_config_release" routine. 26 | */ 27 | typedef struct xccl_lib_config xccl_lib_config_t; 28 | 29 | typedef struct xccl_context_config xccl_context_config_t; 30 | 31 | /** 32 | * @ingroup XCCL_TEAM 33 | * @brief XCCL team handle 34 | * 35 | * @todo add description here 36 | */ 37 | typedef struct xccl_team* xccl_team_h; 38 | 39 | /** 40 | * @ingroup XCCL_TEAM_CONTEXT 41 | * @brief XCCL team context handle 42 | * 43 | * @todo add description here 44 | */ 45 | typedef struct xccl_context* xccl_context_h; 46 | 47 | /** 48 | * @ingroup XCCL_COLL 49 | * @brief XCCL collective requst handle 50 | * 51 | * @todo add description here 52 | */ 53 | 54 | typedef struct xccl_coll_req* xccl_coll_req_h; 55 | 56 | typedef struct xccl_mem_handle* xccl_mem_h; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /src/core/xccl_tasks_queue.c: -------------------------------------------------------------------------------- 1 | #include "xccl_tasks_queue.h" 2 | 3 | xccl_status_t tasks_queue_init(xccl_progress_queue_t *handle) { 4 | handle->ctx = (void *) malloc(sizeof(xccl_tasks_queue_t)); 5 | xccl_tasks_queue_t *ctx = (xccl_tasks_queue_t *) handle->ctx; 6 | ucs_list_head_init(&ctx->list); 7 | 8 | handle->api.progress_queue_enqueue = &tasks_queue_insert; 9 | handle->api.progress_queue_progress_tasks = &tasks_queue_progress; 10 | handle->api.progress_queue_destroy = &tasks_queue_destroy; 11 | return XCCL_OK; 12 | } 13 | 14 | xccl_status_t tasks_queue_insert(xccl_progress_queue_t *handle, xccl_coll_task_t *task) { 15 | xccl_tasks_queue_t *ctx = (xccl_tasks_queue_t *) handle->ctx; 16 | ucs_list_add_tail(&ctx->list, &task->list_elem); 17 | return XCCL_OK; 18 | } 19 | 20 | xccl_status_t tasks_queue_progress(xccl_progress_queue_t *handle) { 21 | xccl_tasks_queue_t *ctx = (xccl_tasks_queue_t *) handle->ctx; 22 | xccl_coll_task_t *task, *tmp; 23 | xccl_status_t status; 24 | ucs_list_for_each_safe(task, tmp, &ctx->list, list_elem) 25 | { 26 | if (task->progress) { 27 | if (0 < task->progress(task)) { 28 | return status; 29 | } 30 | } 31 | if (XCCL_TASK_STATE_COMPLETED == task->state) { 32 | xccl_event_manager_notify(&task->em, XCCL_EVENT_COMPLETED); 33 | ucs_list_del(&task->list_elem); 34 | } 35 | } 36 | return XCCL_OK; 37 | } 38 | 39 | xccl_status_t tasks_queue_destroy(xccl_progress_queue_t *handle) { 40 | xccl_tasks_queue_t *ctx = (xccl_tasks_queue_t *) handle->ctx; 41 | free(ctx); 42 | return XCCL_OK; 43 | } 44 | -------------------------------------------------------------------------------- /test/test_mpi_create_team_nb.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include "test_mpi.h" 7 | 8 | int main (int argc, char **argv) { 9 | int rank, size; 10 | int sbuf, rbuf; 11 | 12 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLREDUCE, XCCL_THREAD_MODE_SINGLE)); 13 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 14 | MPI_Comm_size(MPI_COMM_WORLD, &size); 15 | 16 | MPI_Comm split_comm; 17 | xccl_team_h split_team; 18 | MPI_Comm_split(MPI_COMM_WORLD, rank % 2, rank, &split_comm); 19 | int split_rank, split_size; 20 | MPI_Comm_rank(split_comm, &split_rank); 21 | MPI_Comm_size(split_comm, &split_size); 22 | 23 | if (split_rank % 2 == 0) { 24 | MPI_Sendrecv(&sbuf, 1, MPI_INT, (split_rank + 1) % split_size, 123, 25 | &rbuf, 1, MPI_INT, (split_rank + split_size - 1) % split_size, 123, 26 | split_comm, MPI_STATUS_IGNORE); 27 | xccl_mpi_create_comm_nb(split_comm, &split_team); 28 | } else { 29 | xccl_mpi_create_comm_nb(split_comm, &split_team); 30 | MPI_Sendrecv(&sbuf, 1, MPI_INT, (split_rank + 1) % split_size, 123, 31 | &rbuf, 1, MPI_INT, (split_rank + split_size - 1) % split_size, 123, 32 | split_comm, MPI_STATUS_IGNORE); 33 | } 34 | while (XCCL_INPROGRESS == xccl_team_create_test(split_team)) {;}; 35 | xccl_team_destroy(split_team); 36 | MPI_Comm_free(&split_comm); 37 | if (0 == rank) { 38 | printf("Correctness check: %s\n", "PASS"); 39 | } 40 | 41 | xccl_mpi_test_finalize(); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /test/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | bin_PROGRAMS = test_mpi_allreduce \ 11 | test_mpi_bcast \ 12 | test_mpi_barrier \ 13 | test_mpi_reduce \ 14 | test_mpi_fanout_get \ 15 | test_mpi_create_team_nb \ 16 | test_mpi_alltoall \ 17 | test_mpi_alltoallv \ 18 | test_mpi_allgather \ 19 | test_mpi_mt 20 | if HAVE_CUDA 21 | bin_PROGRAMS += test_mpi_cuda_allreduce \ 22 | test_mpi_cuda_reduce \ 23 | test_mpi_cuda_alltoall 24 | endif 25 | 26 | test_mpi_allreduce_SOURCES=test_mpi_allreduce.c test_mpi.c 27 | test_mpi_reduce_SOURCES=test_mpi_reduce.c test_mpi.c 28 | test_mpi_bcast_SOURCES=test_mpi_bcast.c test_mpi.c test_utils.c 29 | test_mpi_barrier_SOURCES=test_mpi_barrier.c test_mpi.c 30 | test_mpi_fanout_get_SOURCES=test_mpi_fanout_get.c test_mpi.c 31 | test_mpi_create_team_nb_SOURCES=test_mpi_create_team_nb.c test_mpi.c 32 | test_mpi_alltoall_SOURCES=test_mpi_alltoall.c test_mpi.c 33 | test_mpi_alltoallv_SOURCES=test_mpi_alltoallv.c test_mpi.c test_utils.c 34 | test_mpi_allgather_SOURCES=test_mpi_allgather.c test_mpi.c test_utils.c 35 | test_mpi_mt_SOURCES=test_mt.c test_mpi.c 36 | if HAVE_CUDA 37 | test_mpi_cuda_allreduce_SOURCES=test_mpi_cuda_allreduce.c test_mpi.c 38 | test_mpi_cuda_reduce_SOURCES=test_mpi_cuda_reduce.c test_mpi.c 39 | test_mpi_cuda_alltoall_SOURCES=test_mpi_cuda_alltoall.c test_mpi.c 40 | endif 41 | 42 | CC=mpicc 43 | CFLAGS+=-I${includedir} -std=c11 44 | LDFLAGS=-L$(libdir) -lxccl 45 | 46 | if HAVE_CUDA 47 | CPPFLAGS+=$(CUDA_CPPFLAGS) 48 | CFLAGS+=$(CUDA_CFLAGS) -DXCCL_TEST_WITH_CUDA 49 | LDFLAGS+=$(CUDA_LDFLAGS) -lcudart 50 | endif 51 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_barrier.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | static xccl_status_t barrier_post(xccl_mpod_coll_req_t *req) 4 | { 5 | xccl_status_t status = XCCL_OK; 6 | 7 | status = req->team->context->lib.ucx->collective_post(req->chunks[0].real_req.ucx_flat); 8 | xccl_mpod_err_pop(status, fn_fail); 9 | 10 | fn_exit: 11 | return status; 12 | fn_fail: 13 | goto fn_exit; 14 | } 15 | 16 | static xccl_status_t barrier_test(xccl_mpod_coll_req_t *req) 17 | { 18 | xccl_status_t status = req->team->context->lib.ucx->collective_test(req->chunks[0].real_req.ucx_flat); 19 | 20 | return status; 21 | } 22 | 23 | static xccl_status_t barrier_finalize(xccl_mpod_coll_req_t *req) 24 | { 25 | xccl_status_t status = XCCL_OK; 26 | 27 | status = req->team->context->lib.ucx->collective_finalize(req->chunks[0].real_req.ucx_flat); 28 | xccl_mpod_err_pop(status, fn_fail); 29 | 30 | free(req->chunks); 31 | 32 | fn_exit: 33 | return status; 34 | fn_fail: 35 | goto fn_exit; 36 | } 37 | 38 | xccl_status_t xccl_mpod_barrier_init(xccl_mpod_coll_req_t *req) 39 | { 40 | xccl_status_t status = XCCL_OK; 41 | 42 | req->chunks = (xccl_mpod_chunk_s *) malloc(sizeof(xccl_mpod_chunk_s)); 43 | req->num_chunks = 1; 44 | 45 | status = req->team->context->lib.ucx->collective_init(&req->coll_args, &req->chunks[0].real_req.ucx_flat, 46 | req->team->team.ucx_flat); 47 | xccl_mpod_err_pop(status, fn_fail); 48 | 49 | req->collective_post = barrier_post; 50 | req->collective_test = barrier_test; 51 | req->collective_finalize = barrier_finalize; 52 | 53 | fn_exit: 54 | return status; 55 | fn_fail: 56 | goto fn_exit; 57 | } 58 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_alltoallv.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | static xccl_status_t alltoallv_post(xccl_mpod_coll_req_t *req) 4 | { 5 | xccl_status_t status = XCCL_OK; 6 | 7 | status = req->team->context->lib.ucx->collective_post(req->chunks[0].real_req.ucx_flat); 8 | xccl_mpod_err_pop(status, fn_fail); 9 | 10 | fn_exit: 11 | return status; 12 | fn_fail: 13 | goto fn_exit; 14 | } 15 | 16 | static xccl_status_t alltoallv_test(xccl_mpod_coll_req_t *req) 17 | { 18 | xccl_status_t status = req->team->context->lib.ucx->collective_test(req->chunks[0].real_req.ucx_flat); 19 | 20 | return status; 21 | } 22 | 23 | static xccl_status_t alltoallv_finalize(xccl_mpod_coll_req_t *req) 24 | { 25 | xccl_status_t status = XCCL_OK; 26 | 27 | status = req->team->context->lib.ucx->collective_finalize(req->chunks[0].real_req.ucx_flat); 28 | xccl_mpod_err_pop(status, fn_fail); 29 | 30 | free(req->chunks); 31 | 32 | fn_exit: 33 | return status; 34 | fn_fail: 35 | goto fn_exit; 36 | } 37 | 38 | xccl_status_t xccl_mpod_alltoallv_init(xccl_mpod_coll_req_t *req) 39 | { 40 | xccl_status_t status = XCCL_OK; 41 | 42 | req->chunks = (xccl_mpod_chunk_s *) malloc(sizeof(xccl_mpod_chunk_s)); 43 | req->num_chunks = 1; 44 | 45 | status = req->team->context->lib.ucx->collective_init(&req->coll_args, &req->chunks[0].real_req.ucx_flat, 46 | req->team->team.ucx_flat); 47 | xccl_mpod_err_pop(status, fn_fail); 48 | 49 | req->collective_post = alltoallv_post; 50 | req->collective_test = alltoallv_test; 51 | req->collective_finalize = alltoallv_finalize; 52 | 53 | fn_exit: 54 | return status; 55 | fn_fail: 56 | goto fn_exit; 57 | } 58 | -------------------------------------------------------------------------------- /src/api/xccl_tls.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_TLS_H_ 7 | #define XCCL_TLS_H_ 8 | 9 | #include 10 | #include 11 | 12 | typedef enum xccl_tl_id { 13 | XCCL_TL_NULL = 0, 14 | XCCL_TL_UCX = UCS_BIT(0), 15 | XCCL_TL_HIER = UCS_BIT(1), 16 | XCCL_TL_SHARP = UCS_BIT(2), 17 | XCCL_TL_HMC = UCS_BIT(3), 18 | XCCL_TL_SHMSEG = UCS_BIT(4), 19 | XCCL_TL_MRAIL = UCS_BIT(5), 20 | XCCL_TL_NCCL = UCS_BIT(6), 21 | XCCL_TL_MPOD = UCS_BIT(7), 22 | XCCL_TL_LAST, 23 | XCCL_TL_ALL = (XCCL_TL_LAST << 1) - 3 24 | } xccl_tl_id_t; 25 | 26 | static inline 27 | const char* xccl_tl_str(xccl_tl_id_t tl_id) 28 | { 29 | switch(tl_id) { 30 | case XCCL_TL_UCX: 31 | return "ucx"; 32 | case XCCL_TL_HIER: 33 | return "hier"; 34 | case XCCL_TL_MRAIL: 35 | return "mrail"; 36 | case XCCL_TL_SHARP: 37 | return "sharp"; 38 | case XCCL_TL_HMC: 39 | return "hmc"; 40 | case XCCL_TL_SHMSEG: 41 | return "shmseg"; 42 | case XCCL_TL_NCCL: 43 | return "nccl"; 44 | case XCCL_TL_MPOD: 45 | return "mpod"; 46 | default: 47 | break; 48 | } 49 | return NULL; 50 | } 51 | 52 | static inline 53 | xccl_tl_id_t xccl_tls_str_to_bitmap(const char *tls_str) 54 | { 55 | xccl_tl_id_t tls = XCCL_TL_NULL; 56 | uint64_t i; 57 | 58 | if (!tls_str) { 59 | return tls; 60 | } 61 | 62 | for (i = 1; i < XCCL_TL_LAST; i = i << 1) { 63 | if (strstr(tls_str, xccl_tl_str((xccl_tl_id_t)i))) { 64 | tls = (xccl_tl_id_t)(tls | i); 65 | } 66 | } 67 | 68 | return tls; 69 | } 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /src/team_lib/nccl/xccl_nccl_collective.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_NCCL_COLLECTIVE_H_ 7 | #define XCCL_NCCL_COLLECTIVE_H_ 8 | 9 | #include 10 | #include 11 | 12 | #define ncclOpUnsupported (ncclNumOps + 1) 13 | #define ncclDataTypeUnsupported (ncclNumTypes + 1) 14 | 15 | typedef xccl_status_t (*xccl_nccl_collective_start_fn)(xccl_tl_coll_req_t *req); 16 | 17 | typedef struct xccl_nccl_coll_req { 18 | xccl_tl_coll_req_t super; 19 | xccl_coll_op_args_t args; 20 | xccl_nccl_team_t *team; 21 | xccl_nccl_collective_start_fn coll_start; 22 | cudaEvent_t completed; 23 | } xccl_nccl_coll_req_t; 24 | 25 | xccl_status_t 26 | xccl_nccl_collective_init_base(xccl_coll_op_args_t *coll_args, 27 | xccl_nccl_coll_req_t **request, 28 | xccl_nccl_team_t *team); 29 | 30 | xccl_status_t 31 | xccl_nccl_allreduce_init(xccl_coll_op_args_t *coll_args, 32 | xccl_nccl_coll_req_t *request, 33 | xccl_nccl_team_t *team); 34 | 35 | xccl_status_t 36 | xccl_nccl_alltoall_init(xccl_coll_op_args_t *coll_args, 37 | xccl_nccl_coll_req_t *request, 38 | xccl_nccl_team_t *team); 39 | 40 | xccl_status_t 41 | xccl_nccl_alltoallv_init(xccl_coll_op_args_t *coll_args, 42 | xccl_nccl_coll_req_t *request, 43 | xccl_nccl_team_t *team); 44 | 45 | xccl_status_t 46 | xccl_nccl_allgather_init(xccl_coll_op_args_t *coll_args, 47 | xccl_nccl_coll_req_t *request, 48 | xccl_nccl_team_t *team); 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/team_lib/ucx/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2009-2020 Mellanox Technologies. All rights reserved. 3 | # $COPYRIGHT$ 4 | # 5 | # Additional copyrights may follow 6 | # 7 | # $HEADER$ 8 | # 9 | 10 | if HAVE_UCX 11 | allgather = allgather/allgather.h \ 12 | allgather/allgather_ring.c 13 | 14 | allreduce = allreduce/allreduce.h \ 15 | allreduce/allreduce_knomial.h \ 16 | allreduce/allreduce.c \ 17 | allreduce/allreduce_knomial.c \ 18 | allreduce/allreduce_sra.c 19 | 20 | alltoall = alltoall/alltoall.h \ 21 | alltoall/alltoall_pairwise.c \ 22 | alltoall/alltoall_linear_shift.c 23 | 24 | alltoallv = alltoallv/alltoallv.h \ 25 | alltoallv/alltoallv_pairwise.c 26 | 27 | reduce = reduce.h \ 28 | reduce/reduce_linear.c \ 29 | reduce/reduce_knomial.c 30 | 31 | fanin = fanin.h \ 32 | fanin/fanin_linear.c 33 | 34 | fanout = fanout.h \ 35 | fanout/fanout_linear.c 36 | 37 | bcast = bcast.h \ 38 | bcast/bcast_linear.c\ 39 | bcast/bcast_knomial.c 40 | 41 | barrier = barrier.h \ 42 | barrier/barrier_knomial.c 43 | 44 | sources = \ 45 | xccl_ucx_tag.h \ 46 | xccl_ucx_team.h \ 47 | xccl_ucx_team.c \ 48 | xccl_ucx_context.h \ 49 | xccl_ucx_context.c \ 50 | xccl_ucx_lib.h \ 51 | xccl_ucx_ep.h \ 52 | xccl_ucx_lib.c \ 53 | xccl_ucx_sendrecv.h \ 54 | ${allgather} \ 55 | ${allreduce} \ 56 | ${alltoall} \ 57 | ${alltoallv} \ 58 | ${reduce} \ 59 | ${fanin}\ 60 | ${fanout}\ 61 | ${bcast} \ 62 | ${barrier} 63 | 64 | component_noinst = 65 | component_install = xccl_team_lib_ucx.la 66 | 67 | xccl_team_lib_ucx_la_LIBADD = $(XCCL_TOP_BUILDDIR)/src/libxccl.la 68 | xccl_team_lib_ucx_la_SOURCES =$(sources) 69 | xccl_team_lib_ucx_la_CPPFLAGS = $(AM_CPPFLAGS) $(UCX_CPPFLAGS) 70 | xccl_team_lib_ucx_la_LDFLAGS = -module -avoid-version $(UCX_LDFLAGS) $(UCX_LIBADD) 71 | 72 | pkglib_LTLIBRARIES = $(component_install) 73 | endif 74 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_cpu.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | static xccl_status_t cpu_post(xccl_mpod_coll_req_t *req) 4 | { 5 | xccl_status_t status = XCCL_OK; 6 | 7 | status = req->team->context->lib.ucx->collective_finalize(req->chunks[0].real_req.ucx_flat); 8 | xccl_mpod_err_pop(status, fn_fail); 9 | 10 | fn_exit: 11 | return status; 12 | fn_fail: 13 | goto fn_exit; 14 | } 15 | 16 | static xccl_status_t cpu_test(xccl_mpod_coll_req_t *req) 17 | { 18 | xccl_status_t status = XCCL_OK; 19 | 20 | status = req->team->context->lib.ucx->collective_test(req->chunks[0].real_req.ucx_flat); 21 | xccl_mpod_err_pop(status, fn_fail); 22 | 23 | fn_exit: 24 | return status; 25 | fn_fail: 26 | goto fn_exit; 27 | } 28 | 29 | static xccl_status_t cpu_finalize(xccl_mpod_coll_req_t *req) 30 | { 31 | xccl_status_t status = XCCL_OK; 32 | 33 | status = req->team->context->lib.ucx->collective_finalize(req->chunks[0].real_req.ucx_flat); 34 | xccl_mpod_err_pop(status, fn_fail); 35 | 36 | free(req->chunks); 37 | 38 | fn_exit: 39 | return status; 40 | fn_fail: 41 | goto fn_exit; 42 | } 43 | 44 | xccl_status_t xccl_mpod_cpu_init(xccl_mpod_coll_req_t *req) 45 | { 46 | xccl_status_t status = XCCL_OK; 47 | 48 | req->chunks = (xccl_mpod_chunk_s *) malloc(sizeof(xccl_mpod_chunk_s)); 49 | req->num_chunks = 1; 50 | 51 | status = req->team->context->lib.ucx->collective_init(&req->coll_args, &req->chunks[0].real_req.ucx_flat, 52 | req->team->team.ucx_flat); 53 | xccl_mpod_err_pop(status, fn_fail); 54 | 55 | req->collective_post = cpu_post; 56 | req->collective_test = cpu_test; 57 | req->collective_finalize = cpu_finalize; 58 | 59 | fn_exit: 60 | return status; 61 | fn_fail: 62 | goto fn_exit; 63 | } 64 | -------------------------------------------------------------------------------- /src/utils/utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #ifndef XCCL_UTILS_H_ 7 | #define XCCL_UTILS_H_ 8 | #include "api/xccl.h" 9 | #include "reduce.h" 10 | #include 11 | 12 | static inline void 13 | xccl_oob_allreduce(void *sbuf, void *rbuf, size_t count, xccl_dt_t dt, xccl_op_t op, 14 | xccl_tl_team_t *team, xccl_oob_collectives_t oob) 15 | { 16 | size_t team_size = oob.size; 17 | void *tmp; 18 | size_t len = count*xccl_dt_size(dt); 19 | int i; 20 | tmp = malloc(team_size*len); 21 | xccl_oob_allgather(sbuf, tmp, len, &oob); 22 | xccl_dt_reduce(tmp, (void*)((ptrdiff_t)tmp + len), rbuf, count, dt, op); 23 | for (i=2; icount == 0) { 47 | return NULL; 48 | } 49 | for (i=0; icount; i++) { 50 | total_len += strlen(array->names[i]) + 1; 51 | } 52 | str = (char*)malloc(total_len); 53 | if (!str) { 54 | return NULL; 55 | } 56 | strcpy(str, array->names[0]); 57 | for (i=1; icount; i++) { 58 | strcat(str, ","); 59 | strcat(str, array->names[i]); 60 | } 61 | return str; 62 | } 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /src/team_lib/hier/xccl_hier_context.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | #ifndef XCCL_HIER_CONTEXT_H_ 6 | #define XCCL_HIER_CONTEXT_H_ 7 | 8 | #include "xccl_hier_lib.h" 9 | #include "api/xccl_tls.h" 10 | #include 11 | 12 | typedef struct xccl_hier_proc_data { 13 | unsigned long node_hash; 14 | int node_id; 15 | int socketid; //if process is bound to a socket 16 | int pid; 17 | } xccl_hier_proc_data_t; 18 | 19 | typedef struct xccl_hier_tl_t { 20 | xccl_context_h xccl_ctx; 21 | int enabled; 22 | } xccl_hier_tl_t; 23 | 24 | typedef struct xccl_hier_context { 25 | xccl_tl_context_t super; 26 | xccl_hier_proc_data_t local_proc; // local proc data 27 | xccl_hier_proc_data_t *procs; // data for all processes 28 | xccl_hier_tl_t tls[ucs_ilog2(XCCL_TL_LAST-1)+1]; 29 | int nnodes; 30 | int max_ppn; 31 | int min_ppn; 32 | int max_n_sockets; 33 | int node_leader_rank_id; 34 | int use_sm_get_bcast; 35 | int bcast_pipeline_depth; 36 | size_t bcast_sm_get_thresh; 37 | size_t bcast_pipeline_thresh; 38 | } xccl_hier_context_t; 39 | 40 | xccl_status_t xccl_hier_create_context(xccl_team_lib_t *lib, 41 | xccl_context_params_t *params, 42 | xccl_tl_context_config_t *config, 43 | xccl_tl_context_t **context); 44 | xccl_status_t xccl_hier_destroy_context(xccl_tl_context_t *context); 45 | xccl_status_t xccl_hier_context_progress(xccl_tl_context_t *team_ctx); 46 | #endif 47 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | # 4 | 5 | SUBDIRS = \ 6 | . \ 7 | team_lib/hier \ 8 | team_lib/multirail 9 | 10 | if HAVE_UCX 11 | SUBDIRS += team_lib/ucx 12 | endif 13 | if HAVE_SHARP 14 | SUBDIRS += team_lib/sharp 15 | endif 16 | if HAVE_HMC 17 | SUBDIRS += team_lib/hmc 18 | endif 19 | if HAVE_NCCL 20 | SUBDIRS += team_lib/nccl 21 | endif 22 | if HAVE_MPOD 23 | SUBDIRS += team_lib/mpod 24 | endif 25 | 26 | if HAVE_CUDA 27 | SUBDIRS += utils/cuda 28 | endif 29 | 30 | lib_LTLIBRARIES = libxccl.la 31 | noinst_LIBRARIES = 32 | 33 | libxccl_la_CPPFLAGS = $(AM_CPPFLAGS) $(UCS_CPPFLAGS) 34 | libxccl_la_CFLAGS = 35 | libxccl_la_LDFLAGS = -ldl -version-info $(SOVERSION) --no-undefined $(UCS_LDFLAGS) 36 | 37 | nobase_dist_libxccl_la_HEADERS = \ 38 | api/xccl.h \ 39 | api/xccl_def.h \ 40 | api/xccl_version.h \ 41 | api/xccl_status.h \ 42 | api/xccl_tls.h 43 | 44 | noinst_HEADERS = core/xccl_team_lib.h 45 | 46 | libxccl_la_SOURCES = \ 47 | core/xccl_init.c \ 48 | core/xccl_finalize.c \ 49 | core/xccl_lib.c \ 50 | core/xccl_context.c \ 51 | core/xccl_team.c \ 52 | core/xccl_collective.c \ 53 | core/xccl_lock_free_tasks_queue.c \ 54 | core/xccl_tasks_queue.c \ 55 | core/xccl_mm.c \ 56 | core/xccl_query.c \ 57 | core/xccl_global_opts.c \ 58 | core/xccl_team_lib.c \ 59 | core/xccl_progress_queue.c \ 60 | core/xccl_schedule.c \ 61 | utils/socketid.c \ 62 | utils/mem_component.c 63 | 64 | libxccl_ladir = $(includedir) 65 | 66 | install-exec-hook: 67 | cp -f $(XCCL_TOP_BUILDDIR)/src/libxccl.la $(libdir)/ 68 | perl -pi -e "s/installed=no/installed=yes/" $(libdir)/libxccl.la 69 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_nccl_reqs.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | xccl_status_t xccl_mpod_nccl_req_init(xccl_mpod_coll_req_t *mpod_req, 4 | xccl_coll_op_args_t *coll_args, 5 | xccl_mpod_nccl_req_s *nccl_req) 6 | { 7 | xccl_status_t status = XCCL_OK; 8 | 9 | nccl_req->mpod_req = mpod_req; 10 | 11 | status = mpod_req->team->context->lib.nccl->collective_init(coll_args, &nccl_req->r, mpod_req->team->team.nccl); 12 | xccl_mpod_err_pop(status, fn_fail); 13 | 14 | fn_exit: 15 | return status; 16 | fn_fail: 17 | goto fn_exit; 18 | } 19 | 20 | xccl_status_t xccl_mpod_nccl_req_finalize(xccl_mpod_nccl_req_s *nccl_req) 21 | { 22 | xccl_status_t status = XCCL_OK; 23 | 24 | status = nccl_req->mpod_req->team->context->lib.nccl->collective_finalize(nccl_req->r); 25 | xccl_mpod_err_pop(status, fn_fail); 26 | 27 | fn_exit: 28 | return status; 29 | fn_fail: 30 | goto fn_exit; 31 | } 32 | 33 | xccl_status_t xccl_mpod_nccl_req_post(xccl_mpod_nccl_req_s *nccl_req) 34 | { 35 | xccl_status_t status = XCCL_OK; 36 | 37 | status = nccl_req->mpod_req->team->context->lib.nccl->collective_post(nccl_req->r); 38 | xccl_mpod_err_pop(status, fn_fail); 39 | 40 | fn_exit: 41 | return status; 42 | fn_fail: 43 | goto fn_exit; 44 | } 45 | 46 | xccl_status_t xccl_mpod_nccl_req_wait(xccl_mpod_nccl_req_s *nccl_req) 47 | { 48 | xccl_status_t status = XCCL_OK; 49 | 50 | do { 51 | status = xccl_mpod_nccl_req_test(nccl_req); 52 | } while (status == XCCL_INPROGRESS); 53 | 54 | return status; 55 | } 56 | 57 | xccl_status_t xccl_mpod_nccl_req_test(xccl_mpod_nccl_req_s *nccl_req) 58 | { 59 | xccl_status_t status = XCCL_OK; 60 | 61 | status = nccl_req->mpod_req->team->context->lib.nccl->collective_test(nccl_req->r); 62 | xccl_mpod_err_pop(status, fn_fail); 63 | 64 | fn_exit: 65 | return status; 66 | fn_fail: 67 | goto fn_exit; 68 | } 69 | -------------------------------------------------------------------------------- /test/test_mpi_allreduce.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include "test_mpi.h" 7 | 8 | int main (int argc, char **argv) { 9 | const int count = 32; 10 | xccl_coll_req_h request; 11 | int rank, size, i, status = 0, status_global; 12 | int sbuf[count], rbuf[count], rbuf_mpi[count]; 13 | 14 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLREDUCE, XCCL_THREAD_MODE_SINGLE)); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); 17 | 18 | for (i=0; i 10 | 11 | static inline xccl_status_t close_eps(ucp_ep_h *eps, int n_eps, ucp_worker_h worker) 12 | { 13 | void *close_req; 14 | ucs_status_t status; 15 | int i; 16 | for(i = 0; i < n_eps; i++){ 17 | if (!eps[i]) { 18 | continue; 19 | } 20 | close_req = ucp_ep_close_nb(eps[i], UCP_EP_CLOSE_MODE_FLUSH); 21 | if (UCS_PTR_IS_ERR(close_req)) { 22 | fprintf(stderr, "failed to start ep close, ep %p", eps[i]); 23 | } 24 | status = UCS_PTR_STATUS(close_req); 25 | if (status != UCS_OK) { 26 | while (status != UCS_OK) { 27 | ucp_worker_progress(worker); 28 | status = ucp_request_check_status(close_req); 29 | } 30 | ucp_request_free(close_req); 31 | } 32 | } 33 | return status; 34 | } 35 | 36 | static inline xccl_status_t 37 | connect_ep(xccl_team_lib_ucx_context_t *ctx, xccl_ucx_team_t *team, 38 | char *addr_array, size_t max_addrlen, int rank) 39 | { 40 | ucp_address_t *address = (ucp_address_t*)(addr_array + max_addrlen*rank); 41 | ucp_ep_params_t ep_params; 42 | ucs_status_t status; 43 | ucp_ep_h *ep; 44 | if (ctx->ucp_eps) { 45 | ep = &ctx->ucp_eps[xccl_range_to_rank(team->range, rank)]; 46 | } else { 47 | ep = &team->ucp_eps[rank]; 48 | } 49 | if (*ep) { 50 | return XCCL_OK; 51 | } 52 | ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; 53 | ep_params.address = address; 54 | 55 | status = ucp_ep_create(ctx->ucp_worker, &ep_params, ep); 56 | 57 | if (UCS_OK != status) { 58 | fprintf(stderr, "UCX returned connect error: %s\n", 59 | ucs_status_string(status)); 60 | return XCCL_ERR_UNREACHABLE; 61 | } 62 | return XCCL_OK; 63 | } 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/team_lib/ucx/xccl_ucx_tag.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #ifndef TEAM_UCX_TAG_H_ 7 | #define TEAM_UCX_TAG_H_ 8 | /* 9 | * UCX tag structure: 10 | * 11 | * 0123456 7 01234567 01234567 01234567 01234567 01234567 01234567 01234567 12 | * | | | | 13 | * RESERVED (7) | MCASTbit(1) | message tag (16) | source rank (24) | context id (16) 14 | */ 15 | #define TEAM_UCX_RESERVED_BITS 7 16 | #define TEAM_UCX_SCOLL_BITS 1 17 | #define TEAM_UCX_TAG_BITS 16 18 | #define TEAM_UCX_RANK_BITS 24 19 | #define TEAM_UCX_CONTEXT_BITS 16 20 | 21 | #define TEAM_UCX_RESERVED_BITS_OFFSET (TEAM_UCX_CONTEXT_BITS + \ 22 | TEAM_UCX_RANK_BITS + \ 23 | TEAM_UCX_TAG_BITS + \ 24 | TEAM_UCX_SCOLL_BITS) 25 | #define TEAM_UCX_SCOLL_BITS_OFFSET (TEAM_UCX_CONTEXT_BITS + \ 26 | TEAM_UCX_RANK_BITS + TEAM_UCX_TAG_BITS) 27 | #define TEAM_UCX_TAG_BITS_OFFSET (TEAM_UCX_CONTEXT_BITS + TEAM_UCX_RANK_BITS) 28 | #define TEAM_UCX_RANK_BITS_OFFSET (TEAM_UCX_CONTEXT_BITS) 29 | #define TEAM_UCX_CONTEXT_BITS_OFFSET 0 30 | 31 | #define TEAM_UCX_MAX_CTAG ((((uint64_t)1) << TEAM_UCX_CTAG_BITS) - 1) 32 | #define TEAM_UCX_MAX_TAG ((((uint64_t)1) << TEAM_UCX_TAG_BITS) - 1) 33 | #define TEAM_UCX_MAX_RANK ((((uint64_t)1) << TEAM_UCX_RANK_BITS) - 1) 34 | #define TEAM_UCX_MAX_CONTEXT ((((uint64_t)1) << TEAM_UCX_CONTEXT_BITS) - 1) 35 | 36 | #define TEAM_UCX_TAG_MASK (TEAM_UCX_MAX_TAG << TEAM_UCX_TAG_BITS_OFFSET) 37 | #define TEAM_UCX_RANK_MASK (TEAM_UCX_MAX_RANK << TEAM_UCX_RANK_BITS_OFFSET) 38 | #define TEAM_UCX_CONTEXT_MASK (TEAM_UCX_MAX_CONTEXT << TEAM_UCX_CONTEXT_BITS_OFFSET) 39 | #define TEAM_UCX_TAG_SENDER_MASK ((((uint64_t)1) << \ 40 | (TEAM_UCX_CONTEXT_BITS + TEAM_UCX_RANK_BITS)) - 1) 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/team_lib/ucx/xccl_ucx_context.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | #ifndef XCCL_UCX_CONTEXT_H_ 6 | #define XCCL_UCX_CONTEXT_H_ 7 | #include "xccl_ucx_lib.h" 8 | 9 | typedef struct xccl_team_lib_ucx_context { 10 | xccl_tl_context_t super; 11 | ucp_ep_h *ucp_eps; 12 | ucp_address_t *worker_address; 13 | int ucx_inited; 14 | ucp_context_h ucp_context; 15 | ucp_worker_h ucp_worker; 16 | size_t ucp_addrlen; 17 | int next_cid; 18 | unsigned num_to_probe; 19 | unsigned barrier_kn_radix; 20 | unsigned bcast_kn_radix; 21 | unsigned allreduce_kn_radix; 22 | unsigned allreduce_alg_id; 23 | unsigned reduce_kn_radix; 24 | unsigned alltoall_pairwise_chunk; 25 | int alltoall_pairwise_reverse; 26 | unsigned alltoall_pairwise_barrier; 27 | int block_stream; 28 | unsigned pre_mem_map; 29 | } xccl_team_lib_ucx_context_t; 30 | 31 | xccl_status_t xccl_ucx_create_context(xccl_team_lib_t *lib, 32 | xccl_context_params_t *params, 33 | xccl_tl_context_config_t *config, 34 | xccl_tl_context_t **context); 35 | xccl_status_t xccl_ucx_destroy_context(xccl_tl_context_t *context); 36 | 37 | void xccl_ucx_mem_map(void *addr, size_t length, ucs_memory_type_t mem_type, 38 | xccl_team_lib_ucx_context_t *ctx); 39 | #define TEAM_UCX_CTX(_team) (ucs_derived_of((_team)->super.ctx, xccl_team_lib_ucx_context_t)) 40 | #define TEAM_UCX_CTX_REQ(_req) (ucs_derived_of((_req)->team->ctx, xccl_team_lib_ucx_context_t)) 41 | #define TEAM_UCX_WORKER(_team) TEAM_UCX_CTX(_team)->ucp_worker 42 | 43 | enum { 44 | XCCL_TEAM_UCX_NO_PRE_MAP = 0, 45 | XCCL_TEAM_UCX_COLL_INIT_PRE_MAP = 1, 46 | XCCL_TEAM_UCX_ALLOC_PRE_MAP = 2, 47 | XCCL_TEAM_UCX_COLL_INIT_AND_ALLOC_PRE_MAP = 3, 48 | }; 49 | #endif 50 | -------------------------------------------------------------------------------- /src/team_lib/hier/xccl_hier_task_schedule.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #ifndef XCCL_HIER_TASK_SCHEDULE_H 8 | #define XCCL_HIER_TASK_SCHEDULE_H 9 | 10 | #include 11 | #include "xccl_team_lib.h" 12 | #include "xccl_hier_team.h" 13 | 14 | typedef struct xccl_coll_args xccl_coll_args_t; 15 | 16 | typedef struct xccl_hier_task { 17 | xccl_coll_task_t super; 18 | xccl_coll_op_args_t xccl_coll; 19 | xccl_hier_pair_t *pair; 20 | xccl_coll_req_h req; 21 | void *scratch; 22 | } xccl_hier_task_t; 23 | 24 | typedef struct xccl_seq_schedule { 25 | xccl_schedule_t super; 26 | xccl_tl_coll_req_t req; 27 | int dep; 28 | xccl_hier_task_t *tasks; 29 | } xccl_seq_schedule_t; 30 | 31 | xccl_status_t hier_task_progress_handler(xccl_coll_task_t *task); 32 | void hier_task_completed_handler(xccl_coll_task_t *task); 33 | 34 | xccl_status_t build_allreduce_task_schedule(xccl_hier_team_t *team, 35 | xccl_coll_op_args_t coll, 36 | xccl_hier_allreduce_spec_t spec, 37 | xccl_seq_schedule_t **sched); 38 | xccl_status_t build_alltoall_task_schedule(xccl_hier_team_t *team, 39 | xccl_coll_op_args_t coll, 40 | xccl_hier_alltoall_spec_t spec, 41 | xccl_seq_schedule_t **sched); 42 | xccl_status_t build_bcast_task_schedule(xccl_hier_team_t *team, 43 | xccl_coll_op_args_t coll, 44 | xccl_hier_bcast_spec_t spec, 45 | xccl_seq_schedule_t **sched); 46 | xccl_status_t build_barrier_task_schedule(xccl_hier_team_t *team, 47 | xccl_coll_op_args_t coll, 48 | xccl_hier_barrier_spec_t spec, 49 | xccl_seq_schedule_t **sched); 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/core/xccl_query.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | xccl_status_t xccl_get_tl_list(xccl_lib_h lib, xccl_tl_id_t **tls, 13 | unsigned *tl_count) 14 | { 15 | int i; 16 | 17 | *tl_count = lib->n_libs_opened; 18 | (*tls) = (xccl_tl_id_t*)malloc((*tl_count) * sizeof(xccl_tl_id_t)); 19 | for(i = 0; i < *tl_count; i++) { 20 | (*tls)[i] = lib->libs[i]->id; 21 | } 22 | 23 | return XCCL_OK; 24 | } 25 | 26 | void xccl_free_tl_list(xccl_tl_id_t *tls) 27 | { 28 | free(tls); 29 | } 30 | 31 | xccl_status_t xccl_tl_query(xccl_lib_h lib, xccl_tl_id_t *tl_id, 32 | xccl_tl_attr_t *tl_attr) 33 | { 34 | xccl_team_lib_t *tl = NULL; 35 | xccl_status_t status; 36 | int i; 37 | 38 | for(i = 0; i < lib->n_libs_opened; i++){ 39 | if (lib->libs[i]->id == *tl_id) { 40 | tl = lib->libs[i]; 41 | break; 42 | } 43 | } 44 | 45 | if (tl == NULL) { 46 | xccl_warn("Wrong tl_id specified (%d)", *tl_id); 47 | return XCCL_ERR_INVALID_PARAM; 48 | } 49 | 50 | if (tl->team_lib_query == NULL) { 51 | xccl_warn("TL %s is not supported", xccl_tl_str(*tl_id)); 52 | return XCCL_ERR_NOT_IMPLEMENTED; 53 | } 54 | 55 | status = tl->team_lib_query(tl, tl_attr); 56 | 57 | return status; 58 | } 59 | 60 | void xccl_free_tl_attr(xccl_tl_attr_t *attr) { 61 | if (attr->field_mask & XCCL_TL_ATTR_FILED_DEVICES) { 62 | free(attr->devices); 63 | } 64 | } 65 | 66 | xccl_status_t xccl_ctx_query(xccl_context_h ctx, xccl_ctx_attr_t *attr) 67 | { 68 | int i; 69 | if (attr->field_mask & XCCL_CTX_ATTR_FIELD_SUPPORTED_COLLS) { 70 | attr->supported_colls = 0; 71 | for (i = 0; i < ctx->n_tl_ctx; i++) { 72 | attr->supported_colls |= ctx->tl_ctx[i]->lib->params.coll_types; 73 | } 74 | } 75 | return XCCL_OK; 76 | } 77 | 78 | void xccl_free_ctx_attr(xccl_ctx_attr_t *attr) 79 | { 80 | } 81 | -------------------------------------------------------------------------------- /src/core/xccl_schedule.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void xccl_event_manager_init(xccl_event_manager_t *em) 6 | { 7 | int i; 8 | for (i = 0; i < XCCL_EVENT_LAST; i++) { 9 | em->listeners_size[i] = 0; 10 | } 11 | } 12 | 13 | void xccl_event_manager_subscribe(xccl_event_manager_t *em, 14 | xccl_event_t event, 15 | xccl_coll_task_t *task) 16 | { 17 | em->listeners[event][em->listeners_size[event]] = task; 18 | em->listeners_size[event]++; 19 | } 20 | 21 | void xccl_event_manager_notify(xccl_event_manager_t *em, 22 | xccl_event_t event) 23 | { 24 | xccl_coll_task_t *task; 25 | int i; 26 | 27 | for (i = 0; i < em->listeners_size[event]; i++) { 28 | task = em->listeners[event][i]; 29 | task->handlers[event](task); 30 | } 31 | } 32 | 33 | void xccl_coll_task_init(xccl_coll_task_t *task) 34 | { 35 | task->state = XCCL_TASK_STATE_NOT_READY; 36 | xccl_event_manager_init(&task->em); 37 | task->busy = 0; 38 | } 39 | 40 | void schedule_completed_handler(xccl_coll_task_t *task) 41 | { 42 | xccl_schedule_t *self = (xccl_schedule_t*)task; 43 | self->n_completed_tasks += 1; 44 | if (self->n_completed_tasks == self->n_tasks) { 45 | self->super.state = XCCL_TASK_STATE_COMPLETED; 46 | } 47 | } 48 | 49 | void xccl_schedule_init(xccl_schedule_t *schedule, xccl_tl_context_t *tl_ctx) 50 | { 51 | xccl_coll_task_init(&schedule->super); 52 | schedule->super.handlers[XCCL_EVENT_COMPLETED] = schedule_completed_handler; 53 | schedule->n_completed_tasks = 0; 54 | schedule->tl_ctx = tl_ctx; 55 | schedule->n_tasks = 0; 56 | } 57 | 58 | void xccl_schedule_add_task(xccl_schedule_t *schedule, xccl_coll_task_t *task) 59 | { 60 | xccl_event_manager_subscribe(&task->em, XCCL_EVENT_COMPLETED, &schedule->super); 61 | task->schedule = schedule; 62 | schedule->n_tasks++; 63 | } 64 | 65 | void xccl_schedule_start(xccl_schedule_t *schedule) 66 | { 67 | schedule->super.state = XCCL_TASK_STATE_INPROGRESS; 68 | xccl_event_manager_notify(&schedule->super.em, XCCL_EVENT_SCHEDULE_STARTED); 69 | } 70 | -------------------------------------------------------------------------------- /m4/hmc.m4: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) Mellanox Technologies Ltd. 2001-2019. ALL RIGHTS RESERVED. 3 | # See file LICENSE for terms. 4 | # 5 | 6 | AC_DEFUN([XCCL_CHECK_HMC],[ 7 | 8 | AS_IF([test "x$hmc_checked" != "xyes"],[ 9 | 10 | hmc_happy="no" 11 | 12 | AC_ARG_WITH([hmc], 13 | [AS_HELP_STRING([--with-hmc=(DIR)], [Enable the use of HMC (default is guess).])], 14 | [], [with_hmc=guess]) 15 | 16 | AS_IF([test "x$with_hmc" != "xno"], 17 | [save_CPPFLAGS="$CPPFLAGS" 18 | save_CFLAGS="$CFLAGS" 19 | save_LDFLAGS="$LDFLAGS" 20 | 21 | AS_IF([test ! -z "$with_hmc" -a "x$with_hmc" != "xyes" -a "x$with_hmc" != "xguess"], 22 | [ 23 | xccl_check_hmc_dir="$with_hmc" 24 | AS_IF([test -d "$with_hmc/lib64"],[libsuff="64"],[libsuff=""]) 25 | xccl_check_hmc_libdir="$with_hmc/lib$libsuff" 26 | CPPFLAGS="-I$with_hmc/include $save_CPPFLAGS" 27 | LDFLAGS="-L$xccl_check_hmc_libdir $save_LDFLAGS" 28 | ]) 29 | AS_IF([test ! -z "$with_hmc_libdir" -a "x$with_hmc_libdir" != "xyes"], 30 | [xccl_check_hmc_libdir="$with_hmc_libdir" 31 | LDFLAGS="-L$xccl_check_hmc_libdir $save_LDFLAGS"]) 32 | 33 | AC_CHECK_HEADERS([hmc.h], 34 | [AC_CHECK_LIB([hmc] , [hmc_init], 35 | [hmc_happy="yes"], 36 | [AC_MSG_WARN([HMC is not detected. Disable.]) 37 | hmc_happy="no"]) 38 | ], [hmc_happy="no"]) 39 | 40 | CFLAGS="$save_CFLAGS" 41 | CPPFLAGS="$save_CPPFLAGS" 42 | LDFLAGS="$save_LDFLAGS" 43 | 44 | AS_IF([test "x$hmc_happy" = "xyes"], 45 | [ 46 | AC_SUBST(HMC_CPPFLAGS, "-I$xccl_check_hmc_dir/include/ ") 47 | AC_SUBST(HMC_LDFLAGS, "-L$xccl_check_hmc_dir/lib -lhmc") 48 | ], 49 | [ 50 | AS_IF([test "x$with_hmc" != "xguess"], 51 | [AC_MSG_ERROR([HMC support is requested but HMC packages cannot be found])], 52 | [AC_MSG_WARN([HMC not found])]) 53 | ]) 54 | 55 | ], 56 | [AC_MSG_WARN([HMC was explicitly disabled])]) 57 | 58 | hmc_checked=yes 59 | AM_CONDITIONAL([HAVE_HMC], [test "x$hmc_happy" != xno]) 60 | ]) 61 | 62 | ]) 63 | -------------------------------------------------------------------------------- /m4/nccl.m4: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | # See file LICENSE for terms. 4 | # 5 | 6 | AC_DEFUN([XCCL_CHECK_NCCL],[ 7 | 8 | AS_IF([test "x$nccl_checked" != "xyes"],[ 9 | 10 | nccl_happy="no" 11 | 12 | AC_ARG_WITH([nccl], 13 | [AS_HELP_STRING([--with-nccl=(DIR)], [Enable the use of NCCL (default is guess).])], 14 | [], [with_nccl=guess]) 15 | 16 | AS_IF([test "x$with_nccl" != "xno"], 17 | [save_CPPFLAGS="$CPPFLAGS" 18 | save_CFLAGS="$CFLAGS" 19 | save_LDFLAGS="$LDFLAGS" 20 | 21 | AS_IF([test ! -z "$with_nccl" -a "x$with_nccl" != "xyes" -a "x$with_nccl" != "xguess"], 22 | [ 23 | xccl_check_nccl_dir="$with_nccl" 24 | xccl_check_nccl_libdir="$with_nccl/lib" 25 | CPPFLAGS="-I$with_nccl/include $save_CPPFLAGS $CUDA_CPPFLAGS" 26 | LDFLAGS="-L$xccl_check_nccl_libdir $save_LDFLAGS $CUDA_LDFLAGS" 27 | ]) 28 | AS_IF([test ! -z "$with_nccl_libdir" -a "x$with_nccl_libdir" != "xyes"], 29 | [xccl_check_nccl_libdir="$with_nccl_libdir" 30 | LDFLAGS="-L$xccl_check_nccl_libdir $save_LDFLAGS $CUDA_LDFLAGS"]) 31 | 32 | AC_CHECK_HEADERS([nccl.h], 33 | [AC_CHECK_LIB([nccl] , [ncclCommInitRank], 34 | [nccl_happy="yes"], 35 | [AC_MSG_WARN([NCCL is not detected. Disable.]) 36 | nccl_happy="no"]) 37 | ], [nccl_happy="no"]) 38 | 39 | 40 | AS_IF([test "x$nccl_happy" = "xyes"], 41 | [ 42 | AC_SUBST(NCCL_CPPFLAGS, "-I$xccl_check_nccl_dir/include/ ") 43 | AC_SUBST(NCCL_LDFLAGS, "-lnccl -L$xccl_check_nccl_dir/lib") 44 | ], 45 | [ 46 | AS_IF([test "x$with_nccl" != "xguess"], 47 | [AC_MSG_ERROR([NCCL support is requested but NCCL packages cannot be found])], 48 | [AC_MSG_WARN([NCCL not found])]) 49 | ]) 50 | CFLAGS="$save_CFLAGS" 51 | CPPFLAGS="$save_CPPFLAGS" 52 | LDFLAGS="$save_LDFLAGS" 53 | 54 | ], 55 | [AC_MSG_WARN([NCCL was explicitly disabled])]) 56 | 57 | nccl_checked=yes 58 | AM_CONDITIONAL([HAVE_NCCL], [test "x$nccl_happy" != xno]) 59 | ]) 60 | 61 | ]) 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2015 UT-Battelle, LLC. All rights reserved. 2 | Copyright (C) 2014-2020 Mellanox Technologies Ltd. All rights reserved. 3 | Copyright (C) 2014-2015 The University of Houston System. All rights reserved. 4 | Copyright (C) 2015 The University of Tennessee and The University 5 | of Tennessee Research Foundation. All rights reserved. 6 | Copyright (C) 2016 ARM Ltd. All rights reserved. 7 | Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. 8 | Copyright (C) 2016-2017 Advanced Micro Devices, Inc. All rights reserved. 9 | Copyright (C) 2019 UChicago Argonne, LLC. All rights reserved. 10 | Copyright (c) 2018-2019 NVIDIA CORPORATION. All rights reserved. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions 14 | are met: 15 | 16 | 1. Redistributions of source code must retain the above copyright 17 | notice, this list of conditions and the following disclaimer. 18 | 2. Redistributions in binary form must reproduce the above copyright 19 | notice, this list of conditions and the following disclaimer in the 20 | documentation and/or other materials provided with the distribution. 21 | 3. Neither the name of the copyright holder nor the names of its 22 | contributors may be used to endorse or promote products derived from 23 | this software without specific prior written permission. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 31 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 32 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 33 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 34 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 35 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | -------------------------------------------------------------------------------- /src/core/xccl_schedule.h: -------------------------------------------------------------------------------- 1 | #ifndef XCCL_SCHEDULE_H 2 | #define XCCL_SCHEDULE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define MAX_LISTENERS 16 9 | 10 | typedef enum { 11 | XCCL_EVENT_COMPLETED = 0, 12 | XCCL_EVENT_SCHEDULE_STARTED, 13 | XCCL_EVENT_LAST 14 | } xccl_event_t; 15 | 16 | typedef enum { 17 | XCCL_TASK_STATE_NOT_READY, 18 | XCCL_TASK_STATE_INPROGRESS, 19 | XCCL_TASK_STATE_COMPLETED 20 | } xccl_task_state_t; 21 | 22 | typedef struct xccl_coll_task xccl_coll_task_t; 23 | 24 | typedef void (*xccl_task_event_handler_p)(xccl_coll_task_t *task); 25 | 26 | typedef struct xccl_event_manager { 27 | xccl_coll_task_t *listeners[XCCL_EVENT_LAST][MAX_LISTENERS]; 28 | int listeners_size[XCCL_EVENT_LAST]; 29 | } xccl_event_manager_t; 30 | 31 | typedef struct xccl_coll_task { 32 | xccl_event_manager_t em; 33 | xccl_task_state_t state; 34 | xccl_task_event_handler_p handlers[XCCL_EVENT_LAST]; 35 | xccl_status_t (*progress)(struct xccl_coll_task *self); 36 | struct xccl_schedule *schedule; 37 | volatile int busy; 38 | /* used for progress queue */ 39 | xccl_coll_task_t* next; 40 | ucs_list_link_t list_elem; 41 | int was_progressed; 42 | } xccl_coll_task_t; 43 | 44 | typedef struct xccl_tl_context xccl_tl_context_t; 45 | typedef struct xccl_schedule { 46 | xccl_coll_task_t super; 47 | int n_completed_tasks; 48 | int n_tasks; 49 | xccl_tl_context_t *tl_ctx; 50 | } xccl_schedule_t; 51 | 52 | void xccl_event_manager_init(xccl_event_manager_t *em); 53 | void xccl_event_manager_subscribe(xccl_event_manager_t *em, 54 | xccl_event_t event, 55 | xccl_coll_task_t *task); 56 | void xccl_event_manager_notify(xccl_event_manager_t *em, xccl_event_t event); 57 | void xccl_coll_task_init(xccl_coll_task_t *task); 58 | void schedule_completed_handler(xccl_coll_task_t *task); 59 | void xccl_schedule_init(xccl_schedule_t *schedule, xccl_tl_context_t *tl_ctx); 60 | void xccl_schedule_add_task(xccl_schedule_t *schedule, xccl_coll_task_t *task); 61 | void xccl_schedule_start(xccl_schedule_t *schedule); 62 | xccl_status_t xccl_schedule_progress(xccl_schedule_t *schedule); 63 | 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /src/core/xccl_mm.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | xccl_status_t xccl_global_mem_map_start(xccl_team_h team, xccl_mem_map_params_t params, 4 | xccl_mem_h *memh_p) 5 | { 6 | XCCL_CHECK_TEAM(team); 7 | xccl_status_t status; 8 | int i; 9 | xccl_team_lib_t *tl; 10 | xccl_mem_handle_t *memh = calloc(1, sizeof(*memh) + 11 | sizeof(xccl_tl_mem_h)*(team->n_teams-1)); 12 | for (i=0; in_teams; i++) { 13 | tl = team->tl_teams[i]->ctx->lib; 14 | if (tl->global_mem_map_start) { 15 | if (XCCL_OK != (status = tl->global_mem_map_start( 16 | team->tl_teams[i], params, &memh->handles[i]))) { 17 | goto error; 18 | } 19 | memh->handles[i]->id = tl->id; 20 | } 21 | } 22 | memh->team = team; 23 | *memh_p = memh; 24 | return XCCL_OK; 25 | error: 26 | *memh_p = NULL; 27 | free(memh); 28 | return status; 29 | } 30 | 31 | xccl_status_t xccl_global_mem_map_test(xccl_mem_h memh_p) 32 | { 33 | int all_done = 1; 34 | xccl_mem_handle_t *memh = memh_p; 35 | xccl_status_t status; 36 | xccl_team_lib_t *tl; 37 | int i; 38 | 39 | for (i=0; iteam->n_teams; i++) { 40 | tl = memh->team->tl_teams[i]->ctx->lib; 41 | if (memh->handles[i]) { 42 | assert(tl->global_mem_map_test); 43 | status = tl->global_mem_map_test(memh->handles[i]); 44 | if (XCCL_INPROGRESS == status) { 45 | all_done = 0; 46 | } else if (XCCL_OK != status) { 47 | return status; 48 | } 49 | } 50 | } 51 | return all_done == 1 ? XCCL_OK : XCCL_INPROGRESS; 52 | } 53 | 54 | xccl_status_t xccl_global_mem_unmap(xccl_mem_h memh_p) 55 | { 56 | xccl_mem_handle_t *memh = memh_p; 57 | xccl_status_t status; 58 | int i; 59 | xccl_team_lib_t *tl; 60 | 61 | for (i=0; iteam->n_teams; i++) { 62 | tl = memh->team->tl_teams[i]->ctx->lib; 63 | if (memh->handles[i]) { 64 | assert(tl->global_mem_unmap); 65 | if (XCCL_OK != (status = tl->global_mem_unmap(memh->handles[i]))) { 66 | return status; 67 | } 68 | } 69 | } 70 | free(memh); 71 | return XCCL_OK; 72 | } 73 | -------------------------------------------------------------------------------- /src/team_lib/ucx/allreduce/allreduce_knomial.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLREDUCE_KNOMIAL_H_ 2 | #define ALLREDUCE_KNOMIAL_H_ 3 | 4 | enum { 5 | KN_BASE, 6 | KN_PROXY, 7 | KN_EXTRA 8 | }; 9 | 10 | #define CALC_POW_K_SUP(_size, _radix, _pow_k_sup, _full_tree_size) do{ \ 11 | int pk = 1; \ 12 | int fs = _radix; \ 13 | while (fs < _size) { \ 14 | pk++; fs*=_radix; \ 15 | } \ 16 | _pow_k_sup = pk; \ 17 | _full_tree_size = (fs != _size) ? fs/_radix : fs; \ 18 | if ((fs != _size) && (_size / _full_tree_size == 1)) \ 19 | _pow_k_sup--; \ 20 | }while(0) 21 | 22 | #define KN_RECURSIVE_SETUP(__radix, __myrank, __size, __pow_k_sup, \ 23 | __full_tree_size, __n_full_subtrees, \ 24 | __full_size, __node_type) do{ \ 25 | CALC_POW_K_SUP(__size, __radix, __pow_k_sup, __full_tree_size); \ 26 | __n_full_subtrees = __size / __full_tree_size; \ 27 | __full_size = __n_full_subtrees*__full_tree_size; \ 28 | __node_type = __myrank >= __full_size ? KN_EXTRA : \ 29 | (__size > __full_size && __myrank < __size - __full_size ? \ 30 | KN_PROXY : KN_BASE); \ 31 | }while(0) 32 | 33 | #define KN_RECURSIVE_GET_PROXY(__myrank, __full_size) (__myrank - __full_size) 34 | #define KN_RECURSIVE_GET_EXTRA(__myrank, __full_size) (__myrank + __full_size) 35 | 36 | enum { 37 | PHASE_0, 38 | PHASE_1, 39 | PHASE_EXTRA, 40 | PHASE_PROXY, 41 | }; 42 | 43 | #define CHECK_PHASE(_p) case _p: goto _p; break; 44 | #define GOTO_PHASE(_phase) do{ \ 45 | switch (_phase) { \ 46 | CHECK_PHASE(PHASE_EXTRA); \ 47 | CHECK_PHASE(PHASE_PROXY); \ 48 | CHECK_PHASE(PHASE_1); \ 49 | case PHASE_0: break; \ 50 | }; \ 51 | } while(0) 52 | 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /src/team_lib/sharp/xccl_sharp_map.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_SHARP_MAP_H_ 7 | #define XCCL_SHARP_MAP_H_ 8 | 9 | #include 10 | #include 11 | 12 | int xccl_to_sharp_dtype[XCCL_DT_LAST_PREDEFINED]; 13 | int xccl_to_sharp_reduce_op[XCCL_OP_LAST_PREDEFINED]; 14 | 15 | static void map_xccl_to_sharp_dtype() 16 | { 17 | int dt; 18 | for (dt = 0; dt < XCCL_DT_LAST_PREDEFINED; dt++) { 19 | xccl_to_sharp_dtype[dt] = SHARP_DTYPE_NULL; 20 | } 21 | xccl_to_sharp_dtype[XCCL_DT_INT16] = SHARP_DTYPE_SHORT; 22 | xccl_to_sharp_dtype[XCCL_DT_INT32] = SHARP_DTYPE_INT; 23 | xccl_to_sharp_dtype[XCCL_DT_INT64] = SHARP_DTYPE_LONG; 24 | xccl_to_sharp_dtype[XCCL_DT_UINT16] = SHARP_DTYPE_UNSIGNED_SHORT; 25 | xccl_to_sharp_dtype[XCCL_DT_UINT32] = SHARP_DTYPE_UNSIGNED; 26 | xccl_to_sharp_dtype[XCCL_DT_UINT64] = SHARP_DTYPE_UNSIGNED_LONG; 27 | xccl_to_sharp_dtype[XCCL_DT_FLOAT16] = SHARP_DTYPE_FLOAT_SHORT; 28 | xccl_to_sharp_dtype[XCCL_DT_FLOAT32] = SHARP_DTYPE_FLOAT; 29 | xccl_to_sharp_dtype[XCCL_DT_FLOAT64] = SHARP_DTYPE_DOUBLE; 30 | } 31 | 32 | static void map_xccl_to_sharp_reduce_op_type() 33 | { 34 | int op; 35 | for (op = 0; op < XCCL_OP_LAST_PREDEFINED; op++) { 36 | xccl_to_sharp_reduce_op[op] = SHARP_OP_NULL; 37 | } 38 | xccl_to_sharp_reduce_op[XCCL_OP_MAX] = SHARP_OP_MAX; 39 | xccl_to_sharp_reduce_op[XCCL_OP_MIN] = SHARP_OP_MIN; 40 | xccl_to_sharp_reduce_op[XCCL_OP_SUM] = SHARP_OP_SUM; 41 | xccl_to_sharp_reduce_op[XCCL_OP_PROD] = SHARP_OP_PROD; 42 | /* TODO: not supported? 43 | xccl_to_sharp_reduce_op[XCCL_OP_AND] = SHARP_OP_AND; 44 | xccl_to_sharp_reduce_op[XCCL_OP_OR] = SHARP_OP_OR; 45 | xccl_to_sharp_reduce_op[XCCL_OP_XOR] = SHARP_OP_XOR;*/ 46 | xccl_to_sharp_reduce_op[XCCL_OP_LAND] = SHARP_OP_LAND; 47 | xccl_to_sharp_reduce_op[XCCL_OP_LOR] = SHARP_OP_LOR; 48 | xccl_to_sharp_reduce_op[XCCL_OP_LXOR] = SHARP_OP_LXOR; 49 | xccl_to_sharp_reduce_op[XCCL_OP_BAND] = SHARP_OP_BAND; 50 | xccl_to_sharp_reduce_op[XCCL_OP_BOR] = SHARP_OP_BOR; 51 | xccl_to_sharp_reduce_op[XCCL_OP_BXOR] = SHARP_OP_BXOR; 52 | xccl_to_sharp_reduce_op[XCCL_OP_MAXLOC] = SHARP_OP_MAXLOC; 53 | xccl_to_sharp_reduce_op[XCCL_OP_MINLOC] = SHARP_OP_MINLOC; 54 | } 55 | 56 | #endif -------------------------------------------------------------------------------- /test/test_mpi_reduce.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include "test_mpi.h" 7 | 8 | int main (int argc, char **argv) { 9 | size_t msg_size; 10 | xccl_coll_req_h request; 11 | int rank, size, i, status = 0, status_global, count, r; 12 | int *sbuf, *rbuf, *rbuf_mpi; 13 | 14 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLREDUCE, XCCL_THREAD_MODE_SINGLE)); 15 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); 17 | 18 | count = argc > 1 ? atoi(argv[1]) : 32; 19 | msg_size = count*sizeof(int); 20 | sbuf = (int*)malloc(msg_size); 21 | rbuf = (int*)malloc(msg_size); 22 | rbuf_mpi = (int*)malloc(msg_size); 23 | 24 | for (r=0; r]]) 47 | ], 48 | [ 49 | AS_IF([test "x$with_sharp" != "xguess"], 50 | [AC_MSG_ERROR([SHARP support is requested but SHARP packages cannot be found])], 51 | [AC_MSG_WARN([SHARP not found])]) 52 | ]) 53 | CFLAGS="$save_CFLAGS" 54 | CPPFLAGS="$save_CPPFLAGS" 55 | LDFLAGS="$save_LDFLAGS" 56 | 57 | ], 58 | [AC_MSG_WARN([SHARP was explicitly disabled])]) 59 | 60 | sharp_checked=yes 61 | AM_CONDITIONAL([HAVE_SHARP], [test "x$sharp_happy" != xno]) 62 | ]) 63 | 64 | ]) 65 | -------------------------------------------------------------------------------- /src/team_lib/hier/xccl_hier_lib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_TEAM_LIB_HIER_H_ 7 | #define XCCL_TEAM_LIB_HIER_H_ 8 | 9 | #include "xccl_team_lib.h" 10 | 11 | typedef struct xccl_team_lib_hier_config { 12 | xccl_team_lib_config_t super; 13 | } xccl_team_lib_hier_config_t; 14 | 15 | typedef struct xccl_tl_hier_context_config { 16 | xccl_tl_context_config_t super; 17 | ucs_config_names_array_t devices; 18 | int enable_sharp; 19 | int enable_shmseg; 20 | int enable_hmc; 21 | int enable_nccl; 22 | unsigned bcast_pipeline_depth; 23 | int bcast_sm_get; 24 | int node_leader_rank_id; 25 | size_t bcast_sm_get_thresh; 26 | size_t bcast_pipeline_thresh; 27 | } xccl_tl_hier_context_config_t; 28 | 29 | typedef struct xccl_team_lib_hier { 30 | xccl_team_lib_t super; 31 | xccl_team_lib_hier_config_t config; 32 | xccl_lib_h tl_lib; 33 | } xccl_team_lib_hier_t; 34 | extern xccl_team_lib_hier_t xccl_team_lib_hier; 35 | 36 | #define xccl_team_hier_log_component(_level, _fmt, ...) \ 37 | do { \ 38 | ucs_log_component(_level, &xccl_team_lib_hier.config.super.log_component, _fmt, ## __VA_ARGS__); \ 39 | } while (0) 40 | 41 | #define xccl_hier_error(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_ERROR, _fmt, ## __VA_ARGS__) 42 | #define xccl_hier_warn(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_WARN, _fmt, ## __VA_ARGS__) 43 | #define xccl_hier_info(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_INFO, _fmt, ## __VA_ARGS__) 44 | #define xccl_hier_debug(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_DEBUG, _fmt, ## __VA_ARGS__) 45 | #define xccl_hier_trace(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_TRACE, _fmt, ## __VA_ARGS__) 46 | #define xccl_hier_trace_req(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_TRACE_REQ, _fmt, ## __VA_ARGS__) 47 | #define xccl_hier_trace_data(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_TRACE_DATA, _fmt, ## __VA_ARGS__) 48 | #define xccl_hier_trace_async(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_TRACE_ASYNC, _fmt, ## __VA_ARGS__) 49 | #define xccl_hier_trace_func(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_TRACE_FUNC, "%s(" _fmt ")", __FUNCTION__, ## __VA_ARGS__) 50 | #define xccl_hier_trace_poll(_fmt, ...) xccl_team_hier_log_component(UCS_LOG_LEVEL_TRACE_POLL, _fmt, ## __VA_ARGS__) 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/team_lib/hier/xccl_hier_team.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | #ifndef XCCL_HIER_TEAM_H_ 6 | #define XCCL_HIER_TEAM_H_ 7 | #include "xccl_hier_lib.h" 8 | #include "xccl_hier_sbgp.h" 9 | #include "xccl_hier_context.h" 10 | 11 | typedef struct xccl_hier_pair { 12 | xccl_team_h team; 13 | sbgp_t *sbgp; 14 | } xccl_hier_pair_t; 15 | 16 | typedef enum { 17 | XCCL_HIER_PAIR_NODE_UCX, 18 | XCCL_HIER_PAIR_SOCKET_UCX, 19 | XCCL_HIER_PAIR_NODE_LEADERS_UCX, 20 | XCCL_HIER_PAIR_SOCKET_LEADERS_UCX, 21 | XCCL_HIER_PAIR_NODE_SHMSEG, 22 | XCCL_HIER_PAIR_SOCKET_SHMSEG, 23 | XCCL_HIER_PAIR_SOCKET_LEADERS_SHMSEG, 24 | XCCL_HIER_PAIR_NODE_LEADERS_SHARP, 25 | XCCL_HIER_PAIR_NODE_LEADERS_HMC, 26 | XCCL_HIER_PAIR_NODE_NCCL, 27 | XCCL_HIER_PAIR_FLAT_UCX, 28 | XCCL_HIER_PAIR_LAST, 29 | } xccl_hier_pair_type_t; 30 | 31 | typedef struct xccl_hier_team { 32 | xccl_tl_team_t super; 33 | sbgp_t sbgps[SBGP_LAST]; 34 | xccl_hier_pair_t *pairs[XCCL_HIER_PAIR_LAST]; 35 | int node_leader_rank; 36 | int no_socket; 37 | } xccl_hier_team_t; 38 | 39 | xccl_status_t xccl_hier_team_create_post(xccl_tl_context_t *context, 40 | xccl_team_params_t *config, 41 | xccl_tl_team_t **team); 42 | xccl_status_t xccl_hier_team_create_test(xccl_tl_team_t *team); 43 | xccl_status_t xccl_hier_team_destroy(xccl_tl_team_t *team); 44 | 45 | 46 | static inline int xccl_hier_team_rank2ctx(xccl_hier_team_t *team, int rank) 47 | { 48 | return xccl_range_to_rank(team->super.params.range, rank); 49 | } 50 | 51 | static inline int sbgp_rank2ctx(sbgp_t *sbgp, int rank) 52 | { 53 | return xccl_range_to_rank(sbgp->hier_team->super.params.range, 54 | sbgp_rank2team(sbgp, rank)); 55 | } 56 | 57 | static inline int is_rank_on_local_node(int rank, xccl_hier_team_t *team) 58 | { 59 | xccl_hier_context_t *ctx = ucs_derived_of(team->super.ctx, xccl_hier_context_t); 60 | return ctx->procs[xccl_hier_team_rank2ctx(team, rank)].node_hash 61 | == ctx->local_proc.node_hash; 62 | } 63 | 64 | static inline int is_rank_on_local_socket(int rank, xccl_hier_team_t *team) 65 | { 66 | xccl_hier_context_t *ctx = ucs_derived_of(team->super.ctx, xccl_hier_context_t); 67 | if (ctx->local_proc.socketid < 0) { 68 | return 0; 69 | } 70 | xccl_hier_proc_data_t *proc = &ctx->procs[xccl_hier_team_rank2ctx(team, rank)]; 71 | return proc->node_hash == ctx->local_proc.node_hash && 72 | proc->socketid == ctx->local_proc.socketid; 73 | } 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /src/team_lib/ucx/fanin/fanin_linear.c: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | #include "xccl_ucx_lib.h" 3 | #include "fanin.h" 4 | #include "xccl_ucx_sendrecv.h" 5 | #include 6 | #include 7 | 8 | xccl_status_t xccl_ucx_fanin_linear_progress(xccl_ucx_collreq_t *req) 9 | { 10 | xccl_tl_team_t *team = req->team; 11 | int group_rank = team->params.oob.rank; 12 | int group_size = team->params.oob.size; 13 | xccl_ucx_request_t **reqs = req->fanin_linear.reqs; 14 | if (req->args.root == group_rank) { 15 | if (req->fanin_linear.step == ((group_rank + 1) % group_size)) { 16 | xccl_ucx_recv_nb(NULL, 0, UCS_MEMORY_TYPE_UNKNOWN, 17 | req->fanin_linear.step, (xccl_ucx_team_t*)team, 18 | req->tag, &reqs[0]); 19 | req->fanin_linear.step = ((req->fanin_linear.step + 1) % group_size); 20 | } 21 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 22 | if (req->fanin_linear.step != group_rank) { 23 | xccl_ucx_recv_nb(NULL, 0, req->fanin_linear.step, 24 | UCS_MEMORY_TYPE_UNKNOWN, 25 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 26 | req->fanin_linear.step = 27 | ((req->fanin_linear.step + 1) % group_size); 28 | } else { 29 | goto completion; 30 | } 31 | } 32 | } else { 33 | if (req->fanin_linear.step == 0) { 34 | xccl_ucx_send_nb(NULL, 0, UCS_MEMORY_TYPE_UNKNOWN, 35 | req->args.root, (xccl_ucx_team_t*)team, 36 | req->tag, &reqs[0]); 37 | req->fanin_linear.step = 1; 38 | } 39 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 40 | goto completion; 41 | } 42 | } 43 | return XCCL_OK; 44 | 45 | completion: 46 | /* fprintf(stderr, "Complete fanin, level %d frag %d and full coll arg\n", */ 47 | /* COLL_ID_IN_SCHEDULE(bcol_args), bcol_args->next_frag-1); */ 48 | req->complete = XCCL_OK; 49 | return XCCL_OK; 50 | } 51 | 52 | xccl_status_t xccl_ucx_fanin_linear_start(xccl_ucx_collreq_t *req) 53 | { 54 | size_t data_size = req->args.buffer_info.len; 55 | int group_rank = req->team->params.oob.rank; 56 | int group_size = req->team->params.oob.size; 57 | memset(req->fanin_linear.reqs, 0, sizeof(req->fanin_linear.reqs)); 58 | if (req->args.root == group_rank) { 59 | req->fanin_linear.step = (group_rank + 1) % group_size; 60 | } else { 61 | req->fanin_linear.step = 0; 62 | } 63 | req->progress = xccl_ucx_fanin_linear_progress; 64 | return xccl_ucx_fanin_linear_progress(req); 65 | } 66 | -------------------------------------------------------------------------------- /src/team_lib/ucx/fanout/fanout_linear.c: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | #include "xccl_ucx_lib.h" 3 | #include "fanout.h" 4 | #include "xccl_ucx_sendrecv.h" 5 | #include 6 | #include 7 | 8 | xccl_status_t xccl_ucx_fanout_linear_progress(xccl_ucx_collreq_t *req) 9 | { 10 | xccl_tl_team_t *team = req->team; 11 | int group_rank = team->params.oob.rank; 12 | int group_size = team->params.oob.size; 13 | xccl_ucx_request_t **reqs = req->fanout_linear.reqs; 14 | 15 | if (req->args.root == group_rank) { 16 | if (req->fanout_linear.step == ((group_rank + 1) % group_size)) { 17 | xccl_ucx_send_nb(NULL, 0, UCS_MEMORY_TYPE_UNKNOWN, 18 | req->fanout_linear.step, 19 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 20 | req->fanout_linear.step = ((req->fanout_linear.step + 1) % group_size); 21 | } 22 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 23 | if (req->fanout_linear.step != group_rank) { 24 | xccl_ucx_send_nb(NULL, 0, UCS_MEMORY_TYPE_UNKNOWN, 25 | req->fanout_linear.step, 26 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 27 | req->fanout_linear.step = 28 | ((req->fanout_linear.step + 1) % group_size); 29 | } else { 30 | goto completion; 31 | } 32 | } 33 | 34 | } else { 35 | if (req->fanout_linear.step == 0) { 36 | xccl_ucx_recv_nb(NULL, 0, UCS_MEMORY_TYPE_UNKNOWN, req->args.root, 37 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 38 | req->fanout_linear.step = 1; 39 | } 40 | if (UCS_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 41 | goto completion; 42 | } 43 | } 44 | return XCCL_OK; 45 | 46 | completion: 47 | /* fprintf(stderr, "Complete fanout, level %d frag %d and full coll arg\n", */ 48 | /* COLL_ID_IN_SCHEDULE(bcol_args), bcol_args->next_frag-1); */ 49 | req->complete = XCCL_OK; 50 | return XCCL_OK; 51 | } 52 | 53 | xccl_status_t xccl_ucx_fanout_linear_start(xccl_ucx_collreq_t *req) 54 | { 55 | size_t data_size = req->args.buffer_info.len; 56 | int group_rank = req->team->params.oob.rank; 57 | int group_size = req->team->params.oob.size; 58 | memset(req->fanout_linear.reqs, 0, sizeof(req->fanout_linear.reqs)); 59 | req->fanout_linear.step = 0; 60 | if (req->args.root == group_rank) { 61 | req->fanout_linear.step = (group_rank + 1) % group_size; 62 | } 63 | req->progress = xccl_ucx_fanout_linear_progress; 64 | return xccl_ucx_fanout_linear_progress(req); 65 | } 66 | -------------------------------------------------------------------------------- /src/core/xccl_collective.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #include "config.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | xccl_status_t xccl_collective_init(xccl_coll_op_args_t *coll_args, 17 | xccl_coll_req_h *request, xccl_team_h team) 18 | { 19 | int tl_team_id; 20 | xccl_tl_team_t *tl_team; 21 | xccl_team_lib_t *lib; 22 | xccl_coll_req_t *xccl_req; 23 | xccl_status_t status; 24 | ucs_memory_type_t mtype; 25 | 26 | 27 | XCCL_CHECK_TEAM(team); 28 | mtype = UCS_MEMORY_TYPE_HOST; 29 | if ((coll_args->coll_type == XCCL_BCAST) || 30 | (coll_args->coll_type == XCCL_ALLREDUCE) || 31 | (coll_args->coll_type == XCCL_REDUCE) || 32 | (coll_args->coll_type == XCCL_ALLTOALL) || 33 | (coll_args->coll_type == XCCL_ALLTOALLV) || 34 | (coll_args->coll_type == XCCL_ALLGATHER)) { 35 | status = xccl_mem_component_type(coll_args->buffer_info.src_buffer, 36 | &mtype); 37 | if (status != XCCL_OK) { 38 | xccl_error("memtype detection error"); 39 | return XCCL_ERR_INVALID_PARAM; 40 | } 41 | } 42 | 43 | tl_team_id = team->coll_team_id[coll_args->coll_type][mtype]; 44 | if (tl_team_id < 0) { 45 | xccl_error("no teams supported col %d memory type %s found", 46 | coll_args->coll_type, 47 | ucs_memory_type_names[mtype]); 48 | return XCCL_ERR_UNSUPPORTED; 49 | } 50 | tl_team = team->tl_teams[tl_team_id]; 51 | lib = tl_team->ctx->lib; 52 | xccl_trace("collective init: coll %d, team %s, memory type %s", 53 | coll_args->coll_type, lib->name, ucs_memory_type_names[mtype]); 54 | 55 | xccl_req = (xccl_coll_req_t*)malloc(sizeof(xccl_coll_req_t)); 56 | if (xccl_req == NULL) { 57 | return XCCL_ERR_NO_MEMORY; 58 | } 59 | 60 | status = lib->collective_init(coll_args, &xccl_req->req, tl_team); 61 | if (status != XCCL_OK) { 62 | free(xccl_req); 63 | return status; 64 | } 65 | 66 | *request = xccl_req; 67 | return status; 68 | } 69 | 70 | xccl_status_t xccl_collective_post(xccl_coll_req_h request) 71 | { 72 | return request->req->lib->collective_post(request->req); 73 | } 74 | 75 | xccl_status_t xccl_collective_wait(xccl_coll_req_h request) 76 | { 77 | return request->req->lib->collective_wait(request->req); 78 | } 79 | 80 | xccl_status_t xccl_collective_test(xccl_coll_req_h request) 81 | { 82 | return request->req->lib->collective_test(request->req); 83 | } 84 | 85 | xccl_status_t xccl_collective_finalize(xccl_coll_req_h request) 86 | { 87 | xccl_status_t status; 88 | 89 | status = request->req->lib->collective_finalize(request->req); 90 | free(request); 91 | return status; 92 | } 93 | -------------------------------------------------------------------------------- /test/test_mpi_cuda_allreduce.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #include 8 | #include "test_mpi.h" 9 | 10 | int main (int argc, char **argv) { 11 | const int count = 32; 12 | const int msg_size = count * sizeof(int); 13 | xccl_coll_req_h request; 14 | int rank, size, i, status = 0, status_global; 15 | int *sbuf_host, *sbuf_cuda, *rbuf_cuda, *rbuf_host, *rbuf_mpi; 16 | char *local_rank; 17 | cudaStream_t stream; 18 | 19 | local_rank = getenv("OMPI_COMM_WORLD_LOCAL_RANK"); 20 | if (local_rank) { 21 | cudaSetDevice(atoi(local_rank)); 22 | } 23 | cudaStreamCreate(&stream); 24 | 25 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLREDUCE, XCCL_THREAD_MODE_SINGLE)); 26 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 27 | MPI_Comm_size(MPI_COMM_WORLD, &size); 28 | 29 | sbuf_host = (int*)malloc(msg_size); 30 | rbuf_mpi = (int*)malloc(msg_size); 31 | rbuf_host = (int*)malloc(msg_size); 32 | cudaMalloc((void**)&sbuf_cuda, msg_size); 33 | cudaMalloc((void**)&rbuf_cuda, msg_size); 34 | 35 | for (i=0; i 10 | #include 11 | #include 12 | #include "xccl_team_lib.h" 13 | 14 | typedef struct xccl_team_lib_sharp_config { 15 | xccl_team_lib_config_t super; 16 | int enable_rcache; 17 | size_t zcopy_thresh; 18 | unsigned bcopy_buf_num; 19 | } xccl_team_lib_sharp_config_t; 20 | 21 | typedef struct xccl_tl_sharp_context_config { 22 | xccl_tl_context_config_t super; 23 | char *device; 24 | } xccl_tl_sharp_context_config_t; 25 | 26 | typedef struct xccl_team_lib_sharp { 27 | xccl_team_lib_t super; 28 | xccl_team_lib_sharp_config_t config; 29 | } xccl_team_lib_sharp_t; 30 | 31 | extern xccl_team_lib_sharp_t xccl_team_lib_sharp; 32 | 33 | #define xccl_team_sharp_log_component(_level, _fmt, ...) \ 34 | do { \ 35 | ucs_log_component(_level, &xccl_team_lib_sharp.config.super.log_component, _fmt, ## __VA_ARGS__); \ 36 | } while (0) 37 | 38 | #define xccl_sharp_error(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_ERROR, _fmt, ## __VA_ARGS__) 39 | #define xccl_sharp_warn(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_WARN, _fmt, ## __VA_ARGS__) 40 | #define xccl_sharp_info(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_INFO, _fmt, ## __VA_ARGS__) 41 | #define xccl_sharp_debug(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_DEBUG, _fmt, ## __VA_ARGS__) 42 | #define xccl_sharp_trace(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_TRACE, _fmt, ## __VA_ARGS__) 43 | #define xccl_sharp_trace_req(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_TRACE_REQ, _fmt, ## __VA_ARGS__) 44 | #define xccl_sharp_trace_data(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_TRACE_DATA, _fmt, ## __VA_ARGS__) 45 | #define xccl_sharp_trace_async(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_TRACE_ASYNC, _fmt, ## __VA_ARGS__) 46 | #define xccl_sharp_trace_func(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_TRACE_FUNC, "%s(" _fmt ")", __FUNCTION__, ## __VA_ARGS__) 47 | #define xccl_sharp_trace_poll(_fmt, ...) xccl_team_sharp_log_component(UCS_LOG_LEVEL_TRACE_POLL, _fmt, ## __VA_ARGS__) 48 | 49 | typedef struct xccl_sharp_rcache_region { 50 | ucs_rcache_region_t super; 51 | void *memh; 52 | } xccl_sharp_rcache_region_t; 53 | 54 | typedef struct xccl_sharp_context { 55 | xccl_tl_context_t super; 56 | struct sharp_coll_context *sharp_context; 57 | ucs_rcache_t *rcache; 58 | } xccl_sharp_context_t; 59 | 60 | typedef struct xccl_sharp_buf { 61 | void *buf; 62 | void *mr; 63 | void *orig_src_buf; 64 | void *orig_dst_buf; 65 | int used; 66 | } xccl_sharp_buf_t; 67 | 68 | typedef struct xccl_sharp_team { 69 | xccl_tl_team_t super; 70 | struct sharp_coll_comm *sharp_comm; 71 | xccl_sharp_buf_t *bufs; 72 | } xccl_sharp_team_t; 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /test/test_mpi_allgather.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #include "test_mpi.h" 8 | #include "test_utils.h" 9 | 10 | int main (int argc, char **argv) { 11 | int rank, size, i, r, count, 12 | status = 0, status_global; 13 | int *sbuf, *rbuf, *sbuf_mpi, *rbuf_mpi; 14 | xccl_coll_req_h request; 15 | test_mem_type_t mtype; 16 | int not_equal; 17 | 18 | mtype = argc > 2 ? atoi(argv[2]) : TEST_MEM_TYPE_HOST; 19 | XCCL_CHECK(test_xccl_set_device(mtype)); 20 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLGATHER, XCCL_THREAD_MODE_SINGLE)); 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &size); 23 | count = argc > 1 ? atoi(argv[1]) : 32; 24 | count = size*((count + size - 1)/size); 25 | if (rank == 0) { 26 | test_print_header(XCCL_ALLGATHER, mtype, count, count); 27 | } 28 | 29 | XCCL_CHECK(test_xccl_mem_alloc((void**)&sbuf, count*sizeof(int), mtype)); 30 | XCCL_CHECK(test_xccl_mem_alloc((void**)&rbuf, count*sizeof(int), mtype)); 31 | sbuf_mpi = (int*)malloc(count*sizeof(int)); 32 | rbuf_mpi = (int*)malloc(count*sizeof(int)); 33 | 34 | for (i=0; i 6 | #include 7 | 8 | xccl_status_t xccl_ucx_bcast_linear_progress(xccl_ucx_collreq_t *req) 9 | { 10 | xccl_tl_team_t *team = req->team; 11 | void *data_buffer = req->args.buffer_info.dst_buffer; 12 | size_t data_size = req->args.buffer_info.len; 13 | int group_rank = team->params.oob.rank; 14 | int group_size = team->params.oob.size; 15 | xccl_ucx_request_t **reqs = req->bcast_linear.reqs; 16 | 17 | if (req->args.root == group_rank) { 18 | if (req->bcast_linear.step == ((group_rank + 1) % group_size)) { 19 | xccl_ucx_send_nb(data_buffer, data_size, req->dst_mem_type, 20 | req->bcast_linear.step, 21 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 22 | req->bcast_linear.step = ((req->bcast_linear.step + 1) % group_size); 23 | } 24 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 25 | if (req->bcast_linear.step != group_rank) { 26 | xccl_ucx_send_nb(data_buffer, data_size, req->dst_mem_type, 27 | req->bcast_linear.step, (xccl_ucx_team_t*)team, 28 | req->tag, &reqs[0]); 29 | req->bcast_linear.step = 30 | ((req->bcast_linear.step + 1) % group_size); 31 | } else { 32 | goto completion; 33 | } 34 | } 35 | 36 | } else { 37 | if (req->bcast_linear.step == 0) { 38 | xccl_ucx_recv_nb(data_buffer, data_size, req->dst_mem_type, 39 | req->args.root, (xccl_ucx_team_t*)team, req->tag, 40 | &reqs[0]); 41 | req->bcast_linear.step = 1; 42 | } 43 | if (UCS_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 44 | goto completion; 45 | } 46 | } 47 | return XCCL_OK; 48 | 49 | completion: 50 | /* fprintf(stderr, "Complete bcast, level %d frag %d and full coll arg\n", */ 51 | /* COLL_ID_IN_SCHEDULE(bcol_args), bcol_args->next_frag-1); */ 52 | req->complete = XCCL_OK; 53 | return XCCL_OK; 54 | } 55 | 56 | xccl_status_t xccl_ucx_bcast_linear_start(xccl_ucx_collreq_t *req) 57 | { 58 | size_t data_size = req->args.buffer_info.len; 59 | int group_rank = req->team->params.oob.rank; 60 | int group_size = req->team->params.oob.size; 61 | 62 | xccl_ucx_trace("linear bcast start"); 63 | memset(req->bcast_linear.reqs, 0, sizeof(req->bcast_linear.reqs)); 64 | req->bcast_linear.step = 0; 65 | if (req->args.root == group_rank) { 66 | req->bcast_linear.step = (group_rank + 1) % group_size; 67 | if (req->args.buffer_info.src_buffer != 68 | req->args.buffer_info.dst_buffer) { 69 | xccl_ucx_send_recv(req->args.buffer_info.src_buffer, data_size, 70 | req->src_mem_type, group_rank, req->tag, 71 | req->args.buffer_info.dst_buffer, data_size, 72 | req->dst_mem_type, group_rank, req->tag, 73 | (xccl_ucx_team_t *)req->team); 74 | } 75 | } 76 | req->progress = xccl_ucx_bcast_linear_progress; 77 | return xccl_ucx_bcast_linear_progress(req); 78 | } 79 | -------------------------------------------------------------------------------- /src/team_lib/nccl/xccl_nccl_lib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_TEAM_LIB_NCCL_H_ 7 | #define XCCL_TEAM_LIB_NCCL_H_ 8 | 9 | #include 10 | #include 11 | #include "xccl_team_lib.h" 12 | 13 | typedef struct xccl_team_lib_nccl_config { 14 | xccl_team_lib_config_t super; 15 | int enable_allreduce; 16 | int enable_alltoall; 17 | int enable_alltoallv; 18 | int enable_allgather; 19 | int enable_bcast; 20 | } xccl_team_lib_nccl_config_t; 21 | 22 | typedef struct xccl_tl_nccl_context_config { 23 | xccl_tl_context_config_t super; 24 | char *device; 25 | } xccl_tl_nccl_context_config_t; 26 | 27 | typedef struct xccl_team_lib_nccl { 28 | xccl_team_lib_t super; 29 | xccl_team_lib_nccl_config_t config; 30 | } xccl_team_lib_nccl_t; 31 | 32 | extern xccl_team_lib_nccl_t xccl_team_lib_nccl; 33 | 34 | #define xccl_team_nccl_log_component(_level, _fmt, ...) \ 35 | do { \ 36 | ucs_log_component(_level, &xccl_team_lib_nccl.config.super.log_component, _fmt, ## __VA_ARGS__); \ 37 | } while (0) 38 | 39 | #define xccl_nccl_error(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_ERROR, _fmt, ## __VA_ARGS__) 40 | #define xccl_nccl_warn(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_WARN, _fmt, ## __VA_ARGS__) 41 | #define xccl_nccl_info(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_INFO, _fmt, ## __VA_ARGS__) 42 | #define xccl_nccl_debug(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_DEBUG, _fmt, ## __VA_ARGS__) 43 | #define xccl_nccl_trace(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_TRACE, _fmt, ## __VA_ARGS__) 44 | #define xccl_nccl_trace_req(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_TRACE_REQ, _fmt, ## __VA_ARGS__) 45 | #define xccl_nccl_trace_data(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_TRACE_DATA, _fmt, ## __VA_ARGS__) 46 | #define xccl_nccl_trace_async(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_TRACE_ASYNC, _fmt, ## __VA_ARGS__) 47 | #define xccl_nccl_trace_func(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_TRACE_FUNC, "%s(" _fmt ")", __FUNCTION__, ## __VA_ARGS__) 48 | #define xccl_nccl_trace_poll(_fmt, ...) xccl_team_nccl_log_component(UCS_LOG_LEVEL_TRACE_POLL, _fmt, ## __VA_ARGS__) 49 | 50 | typedef struct xccl_nccl_context { 51 | xccl_tl_context_t super; 52 | } xccl_nccl_context_t; 53 | 54 | typedef struct xccl_nccl_team { 55 | xccl_tl_team_t super; 56 | int team_size; 57 | ncclComm_t nccl_comm; 58 | cudaStream_t stream; 59 | } xccl_nccl_team_t; 60 | 61 | #define CUDACHECK(cmd) do { \ 62 | cudaError_t e = cmd; \ 63 | if(cudaSuccess != e) { \ 64 | xccl_nccl_error("CUDA error %s:%d '%d' %s", \ 65 | __FILE__,__LINE__, e, cudaGetErrorName(e)); \ 66 | return XCCL_ERR_NO_MESSAGE; \ 67 | } \ 68 | } while(0) 69 | 70 | #define NCCLCHECK(cmd) do { \ 71 | ncclResult_t e = cmd; \ 72 | if(ncclSuccess != e) { \ 73 | xccl_nccl_error("NCCL error %s:%d '%d' %s", \ 74 | __FILE__,__LINE__, e, ncclGetErrorString(e)); \ 75 | return XCCL_ERR_NO_MESSAGE; \ 76 | } \ 77 | } while(0) 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /test/test_mpi_cuda_reduce.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #include 8 | #include "test_mpi.h" 9 | 10 | int main (int argc, char **argv) { 11 | const int count = 32; 12 | const int msg_size = count * sizeof(int); 13 | xccl_coll_req_h request; 14 | int rank, size, i, status = 0, status_global; 15 | int *sbuf_host, *sbuf_cuda, *rbuf_cuda, *rbuf_host, *rbuf_mpi; 16 | char *local_rank; 17 | int r; 18 | cudaStream_t stream; 19 | 20 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLREDUCE, XCCL_THREAD_MODE_SINGLE)); 21 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 22 | MPI_Comm_size(MPI_COMM_WORLD, &size); 23 | 24 | local_rank = getenv("OMPI_COMM_WORLD_LOCAL_RANK"); 25 | if (local_rank) { 26 | cudaSetDevice(atoi(local_rank)); 27 | } 28 | cudaStreamCreate(&stream); 29 | 30 | sbuf_host = (int*)malloc(msg_size); 31 | rbuf_mpi = (int*)malloc(msg_size); 32 | rbuf_host = (int*)malloc(msg_size); 33 | cudaMalloc((void**)&sbuf_cuda, msg_size); 34 | cudaMalloc((void**)&rbuf_cuda, msg_size); 35 | 36 | for (r=0; r 1 ? atoi(argv[1]) : 32; 18 | mtype = argc > 2 ? atoi(argv[2]) : TEST_MEM_TYPE_HOST; 19 | 20 | XCCL_CHECK(test_xccl_set_device(mtype)); 21 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_BCAST, XCCL_THREAD_MODE_SINGLE)); 22 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 23 | MPI_Comm_size(MPI_COMM_WORLD, &size); 24 | if (rank == 0) { 25 | test_print_header(XCCL_BCAST, mtype, count, count); 26 | } 27 | 28 | XCCL_CHECK(test_xccl_mem_alloc((void**)&buf, count*sizeof(int), 29 | mtype)); 30 | XCCL_CHECK(test_xccl_mem_alloc((void**)&buf_mpi, count*sizeof(int), 31 | TEST_MEM_TYPE_HOST)); 32 | for (r=0; r 7 | #include 8 | 9 | 10 | xccl_status_t xccl_ucx_allgather_ring_progress(xccl_ucx_collreq_t *req) 11 | { 12 | int group_rank = req->team->params.oob.rank; 13 | int group_size = req->team->params.oob.size; 14 | int data_size = req->args.buffer_info.len / group_size; 15 | int sendto = (group_rank + 1) % group_size; 16 | int recvfrom = (group_rank - 1 + group_size) % group_size; 17 | xccl_ucx_team_t *team = ucs_derived_of(req->team, xccl_ucx_team_t); 18 | int max_polls = TEAM_UCX_CTX(team)->num_to_probe; 19 | int n_polls = 0; 20 | int step; 21 | ptrdiff_t sbuf; 22 | ptrdiff_t rbuf; 23 | xccl_status_t status; 24 | int cidx; 25 | 26 | while ((n_polls++ < max_polls) && (req->allgather_ring.step < group_size - 1)) { 27 | step = req->allgather_ring.step; 28 | if ((req->allgather_ring.reqs[0] == NULL) && 29 | (req->allgather_ring.reqs[1] == NULL)) { 30 | sbuf = (ptrdiff_t)req->args.buffer_info.dst_buffer + 31 | ((group_rank-step+group_size)%group_size)*data_size; 32 | xccl_ucx_send_nb((void*)sbuf, data_size, req->dst_mem_type, 33 | sendto, team, req->tag, 34 | &req->allgather_ring.reqs[0]); 35 | rbuf = (ptrdiff_t)req->args.buffer_info.dst_buffer + 36 | ((group_rank-step-1+group_size)%group_size)*data_size; 37 | xccl_ucx_recv_nb((void*)rbuf, data_size, req->dst_mem_type, 38 | recvfrom, team, req->tag, 39 | &req->allgather_ring.reqs[1]); 40 | } 41 | status = xccl_ucx_req_test(team, req->allgather_ring.reqs, 2, &cidx, 1, 2); 42 | if (status == XCCL_OK) { 43 | req->allgather_ring.step += 1; 44 | n_polls = 0; 45 | } 46 | } 47 | 48 | if (req->allgather_ring.step < group_size - 1) { 49 | return XCCL_OK; 50 | } 51 | 52 | if (XCCL_INPROGRESS == xccl_ucx_testall(team, req->allgather_ring.reqs, 2)) { 53 | return XCCL_OK; 54 | } 55 | req->complete = XCCL_OK; 56 | return XCCL_OK; 57 | } 58 | 59 | xccl_status_t xccl_ucx_allgather_ring_start(xccl_ucx_collreq_t *req) 60 | { 61 | int group_rank = req->team->params.oob.rank; 62 | int group_size = req->team->params.oob.size; 63 | int data_size = req->args.buffer_info.len / group_size; 64 | ptrdiff_t sbuf = (ptrdiff_t)req->args.buffer_info.src_buffer; 65 | ptrdiff_t rbuf = (ptrdiff_t)req->args.buffer_info.dst_buffer; 66 | 67 | if (sbuf != rbuf || group_rank) { 68 | xccl_ucx_send_recv((void*)(sbuf), data_size, 69 | req->src_mem_type, 70 | group_rank, req->tag, 71 | (void*)(rbuf + data_size*group_rank), data_size, 72 | req->dst_mem_type, 73 | group_rank, req->tag, 74 | (xccl_ucx_team_t*)req->team); 75 | } 76 | req->allgather_ring.step = 0; 77 | req->allgather_ring.reqs[0] = NULL; 78 | req->allgather_ring.reqs[1] = NULL; 79 | req->progress = xccl_ucx_allgather_ring_progress; 80 | 81 | return xccl_ucx_allgather_ring_progress(req); 82 | } 83 | -------------------------------------------------------------------------------- /test/test_mpi_fanout_get.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #define _BSD_SOURCE 7 | #include "test_mpi.h" 8 | #include 9 | 10 | int main (int argc, char **argv) { 11 | int rank, size, i, r, count, 12 | status = 0, status_global; 13 | int *buf, *buf_mpi; 14 | xccl_coll_req_h request; 15 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_FANOUT_GET, XCCL_THREAD_MODE_SINGLE)); 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 17 | MPI_Comm_size(MPI_COMM_WORLD, &size); 18 | 19 | count = argc > 1 ? atoi(argv[1]) : 32; 20 | buf = (int*)malloc(count*sizeof(int)); 21 | buf_mpi = (int*)malloc(count*sizeof(int)); 22 | 23 | const int max_iters = 5; 24 | int iters; 25 | for (r = 0; r 11 | #include 12 | 13 | #define ucs_ilog2(_n) \ 14 | ( \ 15 | __builtin_constant_p(_n) ? ( \ 16 | (_n) < 1 ? 0 : \ 17 | (_n) & (1ULL << 63) ? 63 : \ 18 | (_n) & (1ULL << 62) ? 62 : \ 19 | (_n) & (1ULL << 61) ? 61 : \ 20 | (_n) & (1ULL << 60) ? 60 : \ 21 | (_n) & (1ULL << 59) ? 59 : \ 22 | (_n) & (1ULL << 58) ? 58 : \ 23 | (_n) & (1ULL << 57) ? 57 : \ 24 | (_n) & (1ULL << 56) ? 56 : \ 25 | (_n) & (1ULL << 55) ? 55 : \ 26 | (_n) & (1ULL << 54) ? 54 : \ 27 | (_n) & (1ULL << 53) ? 53 : \ 28 | (_n) & (1ULL << 52) ? 52 : \ 29 | (_n) & (1ULL << 51) ? 51 : \ 30 | (_n) & (1ULL << 50) ? 50 : \ 31 | (_n) & (1ULL << 49) ? 49 : \ 32 | (_n) & (1ULL << 48) ? 48 : \ 33 | (_n) & (1ULL << 47) ? 47 : \ 34 | (_n) & (1ULL << 46) ? 46 : \ 35 | (_n) & (1ULL << 45) ? 45 : \ 36 | (_n) & (1ULL << 44) ? 44 : \ 37 | (_n) & (1ULL << 43) ? 43 : \ 38 | (_n) & (1ULL << 42) ? 42 : \ 39 | (_n) & (1ULL << 41) ? 41 : \ 40 | (_n) & (1ULL << 40) ? 40 : \ 41 | (_n) & (1ULL << 39) ? 39 : \ 42 | (_n) & (1ULL << 38) ? 38 : \ 43 | (_n) & (1ULL << 37) ? 37 : \ 44 | (_n) & (1ULL << 36) ? 36 : \ 45 | (_n) & (1ULL << 35) ? 35 : \ 46 | (_n) & (1ULL << 34) ? 34 : \ 47 | (_n) & (1ULL << 33) ? 33 : \ 48 | (_n) & (1ULL << 32) ? 32 : \ 49 | (_n) & (1ULL << 31) ? 31 : \ 50 | (_n) & (1ULL << 30) ? 30 : \ 51 | (_n) & (1ULL << 29) ? 29 : \ 52 | (_n) & (1ULL << 28) ? 28 : \ 53 | (_n) & (1ULL << 27) ? 27 : \ 54 | (_n) & (1ULL << 26) ? 26 : \ 55 | (_n) & (1ULL << 25) ? 25 : \ 56 | (_n) & (1ULL << 24) ? 24 : \ 57 | (_n) & (1ULL << 23) ? 23 : \ 58 | (_n) & (1ULL << 22) ? 22 : \ 59 | (_n) & (1ULL << 21) ? 21 : \ 60 | (_n) & (1ULL << 20) ? 20 : \ 61 | (_n) & (1ULL << 19) ? 19 : \ 62 | (_n) & (1ULL << 18) ? 18 : \ 63 | (_n) & (1ULL << 17) ? 17 : \ 64 | (_n) & (1ULL << 16) ? 16 : \ 65 | (_n) & (1ULL << 15) ? 15 : \ 66 | (_n) & (1ULL << 14) ? 14 : \ 67 | (_n) & (1ULL << 13) ? 13 : \ 68 | (_n) & (1ULL << 12) ? 12 : \ 69 | (_n) & (1ULL << 11) ? 11 : \ 70 | (_n) & (1ULL << 10) ? 10 : \ 71 | (_n) & (1ULL << 9) ? 9 : \ 72 | (_n) & (1ULL << 8) ? 8 : \ 73 | (_n) & (1ULL << 7) ? 7 : \ 74 | (_n) & (1ULL << 6) ? 6 : \ 75 | (_n) & (1ULL << 5) ? 5 : \ 76 | (_n) & (1ULL << 4) ? 4 : \ 77 | (_n) & (1ULL << 3) ? 3 : \ 78 | (_n) & (1ULL << 2) ? 2 : \ 79 | (_n) & (1ULL << 1) ? 1 : \ 80 | (_n) & (1ULL << 0) ? 0 : \ 81 | 0 \ 82 | ) : \ 83 | (sizeof(_n) <= 4) ? \ 84 | __ucs_ilog2_u32((uint32_t)(_n)) : \ 85 | __ucs_ilog2_u64((uint64_t)(_n)) \ 86 | ) 87 | 88 | /* Returns the number of 1-bits in x */ 89 | #define ucs_popcount(_n) \ 90 | ((sizeof(_n) <= 4) ? __builtin_popcount((uint32_t)(_n)) : __builtin_popcountl(_n)) 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_bcast.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | #define INTER_POD_INITIATED (0) 4 | #define INTRA_POD_INITIATED (1) 5 | 6 | static xccl_status_t bcast_post(xccl_mpod_coll_req_t *req) 7 | { 8 | xccl_status_t status = XCCL_OK; 9 | 10 | if (req->team->slice_id == req->coll_args.root % req->team->pod_size) { 11 | status = req->team->context->lib.ucx->collective_post(req->chunks[0].real_req.ucx_slice); 12 | xccl_mpod_err_pop(status, fn_fail); 13 | 14 | req->chunks[0].phase_id = INTER_POD_INITIATED; 15 | } else { 16 | status = xccl_mpod_nccl_req_post(&req->chunks[0].real_req.nccl[0]); 17 | xccl_mpod_err_pop(status, fn_fail); 18 | 19 | req->chunks[0].phase_id = INTRA_POD_INITIATED; 20 | } 21 | 22 | fn_exit: 23 | return status; 24 | fn_fail: 25 | goto fn_exit; 26 | } 27 | 28 | static xccl_status_t bcast_test(xccl_mpod_coll_req_t *req) 29 | { 30 | xccl_status_t status = XCCL_OK; 31 | 32 | if (req->chunks[0].phase_id == INTER_POD_INITIATED) { 33 | status = req->team->context->lib.ucx->collective_test(req->chunks[0].real_req.ucx_slice); 34 | if (status == XCCL_INPROGRESS) { 35 | goto fn_exit; 36 | } else { 37 | xccl_mpod_err_pop(status, fn_fail); 38 | 39 | status = xccl_mpod_nccl_req_post(&req->chunks[0].real_req.nccl[0]); 40 | xccl_mpod_err_pop(status, fn_fail); 41 | 42 | req->chunks[0].phase_id = INTRA_POD_INITIATED; 43 | } 44 | } 45 | 46 | if (req->chunks[0].phase_id == INTRA_POD_INITIATED) { 47 | status = xccl_mpod_nccl_req_test(&req->chunks[0].real_req.nccl[0]); 48 | if (status == XCCL_INPROGRESS) { 49 | goto fn_exit; 50 | } else { 51 | xccl_mpod_err_pop(status, fn_fail); 52 | } 53 | } 54 | 55 | fn_exit: 56 | return status; 57 | fn_fail: 58 | goto fn_exit; 59 | } 60 | 61 | static xccl_status_t bcast_finalize(xccl_mpod_coll_req_t *req) 62 | { 63 | xccl_status_t status = XCCL_OK; 64 | 65 | if (req->team->slice_id == req->coll_args.root % req->team->pod_size) { 66 | status = req->team->context->lib.ucx->collective_finalize(req->chunks[0].real_req.ucx_slice); 67 | xccl_mpod_err_pop(status, fn_fail); 68 | } 69 | 70 | status = xccl_mpod_nccl_req_finalize(&req->chunks[0].real_req.nccl[0]); 71 | xccl_mpod_err_pop(status, fn_fail); 72 | 73 | free(req->chunks); 74 | 75 | fn_exit: 76 | return status; 77 | fn_fail: 78 | goto fn_exit; 79 | } 80 | 81 | xccl_status_t xccl_mpod_bcast_init(xccl_mpod_coll_req_t *req) 82 | { 83 | xccl_status_t status = XCCL_OK; 84 | 85 | req->chunks = (xccl_mpod_chunk_s *) malloc(sizeof(xccl_mpod_chunk_s)); 86 | req->num_chunks = 1; 87 | 88 | if (req->team->slice_id == req->coll_args.root % req->team->pod_size) { 89 | xccl_coll_op_args_t ucx_coll_args = req->coll_args; 90 | ucx_coll_args.root = req->coll_args.root / req->team->pod_size; 91 | status = req->team->context->lib.ucx->collective_init(&ucx_coll_args, &req->chunks[0].real_req.ucx_slice, 92 | req->team->team.ucx_slice); 93 | xccl_mpod_err_pop(status, fn_fail); 94 | } 95 | 96 | xccl_coll_op_args_t nccl_coll_args = req->coll_args; 97 | nccl_coll_args.root = req->coll_args.root % req->team->pod_size; 98 | status = xccl_mpod_nccl_req_init(req, &nccl_coll_args, &req->chunks[0].real_req.nccl[0]); 99 | xccl_mpod_err_pop(status, fn_fail); 100 | 101 | req->collective_post = bcast_post; 102 | req->collective_test = bcast_test; 103 | req->collective_finalize = bcast_finalize; 104 | 105 | fn_exit: 106 | return status; 107 | fn_fail: 108 | goto fn_exit; 109 | } 110 | -------------------------------------------------------------------------------- /m4/ucx.m4: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | # See file LICENSE for terms. 4 | # 5 | 6 | AC_DEFUN([XCCL_CHECK_UCX],[ 7 | UCX_MIN_REQUIRED_MAJOR=1 8 | UCX_MIN_REQUIRED_MINOR=10 9 | AS_IF([test "x$ucx_checked" != "xyes"],[ 10 | ucx_happy="no" 11 | 12 | AC_ARG_WITH([ucx], 13 | [AS_HELP_STRING([--with-ucx=(DIR)], [Enable the use of UCX (default is guess).])], 14 | [], [with_ucx=guess]) 15 | 16 | AS_IF([test "x$with_ucx" != "xno"], 17 | [ 18 | save_CPPFLAGS="$CPPFLAGS" 19 | save_CFLAGS="$CFLAGS" 20 | save_LDFLAGS="$LDFLAGS" 21 | 22 | AS_IF([test ! -z "$with_ucx" -a "x$with_ucx" != "xyes" -a "x$with_ucx" != "xguess"], 23 | [ 24 | check_ucx_dir="$with_ucx" 25 | check_ucx_libdir="$with_ucx/lib" 26 | CPPFLAGS="-I$with_ucx/include $save_CPPFLAGS" 27 | LDFLAGS="-L$check_ucx_libdir $save_LDFLAGS" 28 | ]) 29 | 30 | AS_IF([test "x$check_ucx_dir" = "x" -a "x$HPCX_UCX_DIR" != "x"], 31 | [ 32 | check_ucx_dir="$HPCX_UCX_DIR" 33 | check_ucx_libdir="$HPCX_UCX_DIR/lib" 34 | CPPFLAGS="-I$check_ucx_dir/include $save_CPPFLAGS" 35 | LDFLAGS="-L$check_ucx_libdir $save_LDFLAGS" 36 | ]) 37 | 38 | AS_IF([test ! -z "$with_ucx_libdir" -a "x$with_ucx_libdir" != "xyes"], 39 | [ 40 | check_ucx_libdir="$with_ucx_libdir" 41 | LDFLAGS="-L$check_ucx_libdir $save_LDFLAGS" 42 | ]) 43 | 44 | AC_CHECK_HEADERS([ucp/api/ucp.h], 45 | [ 46 | AC_CHECK_LIB([ucp], [ucp_tag_send_nb], 47 | [ 48 | ucx_happy="yes" 49 | ], 50 | [ 51 | ucx_happy="no" 52 | ], [-luct -lucm -lucs]) 53 | ], 54 | [ 55 | ucx_happy="no" 56 | ]) 57 | 58 | AS_IF([test "x$ucx_happy" = "xyes"], 59 | [ 60 | AS_IF([test "x$check_ucx_dir" != "x"], 61 | [ 62 | AC_MSG_RESULT([UCX dir: $check_ucx_dir]) 63 | AC_SUBST(UCX_CPPFLAGS, "-I$check_ucx_dir/include/") 64 | AC_SUBST(UCS_CPPFLAGS, "-I$check_ucx_dir/include/") 65 | ucx_major=$(cat $check_ucx_dir/include/ucp/api/ucp_version.h | grep -Po "UCP_API_MAJOR\s+\K\d+") 66 | ucx_minor=$(cat $check_ucx_dir/include/ucp/api/ucp_version.h | grep -Po "UCP_API_MINOR\s+\K\d+") 67 | AC_MSG_RESULT([Detected UCX version: ${ucx_major}.${ucx_minor}]) 68 | AS_IF([test $ucx_major -eq 1 && test $ucx_minor -lt ${UCX_MIN_REQUIRED_MINOR}], 69 | [ 70 | AC_MSG_ERROR([Required UCX version: ${UCX_MIN_REQUIRED_MAJOR}.${UCX_MIN_REQUIRED_MINOR}]) 71 | ucx_happy=no 72 | ], []) 73 | ]) 74 | 75 | AS_IF([test "x$check_ucx_libdir" != "x"], 76 | [ 77 | AC_SUBST(UCX_LDFLAGS, "-L$check_ucx_libdir") 78 | AC_SUBST(UCS_LDFLAGS, "-L$check_ucx_libdir") 79 | ]) 80 | 81 | AC_SUBST(UCX_LIBADD, "-lucp -lucs -lucm") 82 | AC_SUBST(UCS_LIBADD, "-lucs") 83 | ], 84 | [ 85 | AS_IF([test "x$with_ucx" != "xguess"], 86 | [ 87 | AC_MSG_ERROR([UCX support is requested but UCX packages cannot be found]) 88 | ], 89 | [ 90 | AC_MSG_WARN([UCX not found]) 91 | ]) 92 | ]) 93 | 94 | CFLAGS="$save_CFLAGS" 95 | CPPFLAGS="$save_CPPFLAGS" 96 | LDFLAGS="$save_LDFLAGS" 97 | 98 | ], 99 | [ 100 | AC_MSG_WARN([UCX was explicitly disabled]) 101 | ]) 102 | 103 | ucx_checked=yes 104 | AM_CONDITIONAL([HAVE_UCX], [test "x$ucx_happy" != xno]) 105 | ]) 106 | ]) 107 | -------------------------------------------------------------------------------- /src/team_lib/multirail/xccl_mrail_lib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * See file LICENSE for terms. 4 | */ 5 | 6 | #ifndef XCCL_TEAM_LIB_MRAIL_H_ 7 | #define XCCL_TEAM_LIB_MRAIL_H_ 8 | 9 | #include 10 | #include 11 | 12 | #define MAX_TLS_NUMBER 16 13 | 14 | typedef struct xccl_team_lib_mrail_config { 15 | xccl_team_lib_config_t super; 16 | xccl_tl_id_t replicated_tl_id; 17 | unsigned replicas_num; 18 | unsigned threads_num; 19 | unsigned thread_poll_cnt; 20 | } xccl_team_lib_mrail_config_t; 21 | 22 | typedef struct xccl_tl_mrail_context_config { 23 | xccl_tl_context_config_t super; 24 | ucs_config_names_array_t devices; 25 | } xccl_tl_mrail_context_config_t; 26 | 27 | typedef struct xccl_mrail_progress_thread { 28 | ucs_list_link_t list; 29 | pthread_t tid; 30 | pthread_mutex_t mutex; 31 | pthread_cond_t cond; 32 | unsigned poll_cnt; 33 | int close; 34 | } xccl_mrail_progress_thread_t; 35 | 36 | typedef struct xccl_mrail_progress_request { 37 | ucs_list_link_t list; 38 | xccl_context_h ctx; 39 | xccl_coll_req_h req; 40 | xccl_status_t completed; 41 | } xccl_mrail_progress_request_t; 42 | 43 | typedef struct xccl_team_lib_mrail { 44 | xccl_team_lib_t super; 45 | xccl_team_lib_mrail_config_t config; 46 | xccl_mrail_progress_thread_t threads[MAX_TLS_NUMBER]; 47 | } xccl_team_lib_mrail_t; 48 | extern xccl_team_lib_mrail_t xccl_team_lib_mrail; 49 | 50 | typedef struct xccl_mrail_context { 51 | xccl_tl_context_t super; 52 | /* tls array holds n_tls pointers to some other team library */ 53 | xccl_context_h tls[MAX_TLS_NUMBER]; 54 | xccl_lib_h tl; 55 | /* number of times team library is replicated, 56 | * typically equal to number of HW resources available 57 | */ 58 | size_t n_tls; 59 | } xccl_mrail_context_t; 60 | 61 | //TODO: how many teams allowed per context? Do we need more than 1 team? 62 | typedef struct xccl_mrail_team { 63 | xccl_tl_team_t super; 64 | /* teams array holds n_teams pointers to some other team library teams */ 65 | xccl_team_h teams[MAX_TLS_NUMBER]; 66 | size_t n_teams; 67 | } xccl_mrail_team_t; 68 | 69 | typedef struct xccl_mrail_coll_req { 70 | xccl_tl_coll_req_t super; 71 | xccl_mrail_team_t team; 72 | xccl_mrail_progress_request_t reqs[MAX_TLS_NUMBER]; 73 | size_t n_reqs; 74 | } xccl_mrail_coll_req_t; 75 | 76 | #define xccl_team_mrail_log_component(_level, _fmt, ...) \ 77 | do { \ 78 | ucs_log_component(_level, &xccl_team_lib_mrail.config.super.log_component, _fmt, ## __VA_ARGS__); \ 79 | } while (0) 80 | 81 | #define xccl_mrail_error(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_ERROR, _fmt, ## __VA_ARGS__) 82 | #define xccl_mrail_warn(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_WARN, _fmt, ## __VA_ARGS__) 83 | #define xccl_mrail_info(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_INFO, _fmt, ## __VA_ARGS__) 84 | #define xccl_mrail_debug(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_DEBUG, _fmt, ## __VA_ARGS__) 85 | #define xccl_mrail_trace(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_TRACE, _fmt, ## __VA_ARGS__) 86 | #define xccl_mrail_trace_req(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_TRACE_REQ, _fmt, ## __VA_ARGS__) 87 | #define xccl_mrail_trace_data(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_TRACE_DATA, _fmt, ## __VA_ARGS__) 88 | #define xccl_mrail_trace_async(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_TRACE_ASYNC, _fmt, ## __VA_ARGS__) 89 | #define xccl_mrail_trace_func(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_TRACE_FUNC, "%s(" _fmt ")", __FUNCTION__, ## __VA_ARGS__) 90 | #define xccl_mrail_trace_poll(_fmt, ...) xccl_team_mrail_log_component(UCS_LOG_LEVEL_TRACE_POLL, _fmt, ## __VA_ARGS__) 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /src/team_lib/ucx/alltoall/alltoall_linear_shift.c: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | #include "xccl_ucx_lib.h" 3 | #include "alltoall.h" 4 | #include "xccl_ucx_sendrecv.h" 5 | #include "utils/mem_component.h" 6 | #include 7 | #include 8 | 9 | static inline int get_peer(int rank, int size, int step) 10 | { 11 | return (step - rank + size)%size; 12 | } 13 | 14 | xccl_status_t xccl_ucx_alltoall_linear_shift_progress(xccl_ucx_collreq_t *req) 15 | { 16 | ptrdiff_t sbuf = (ptrdiff_t)req->args.buffer_info.src_buffer; 17 | ptrdiff_t rbuf = (ptrdiff_t)req->args.buffer_info.dst_buffer; 18 | xccl_ucx_team_t *team = ucs_derived_of(req->team, xccl_ucx_team_t); 19 | int max_polls = TEAM_UCX_CTX(team)->num_to_probe; 20 | int group_rank = team->super.params.oob.rank; 21 | int group_size = team->super.params.oob.size; 22 | size_t data_size = req->args.buffer_info.len; 23 | int n_polls = 0; 24 | 25 | int completed_idx, peer; 26 | void *send_buf, *recv_buf; 27 | xccl_status_t status; 28 | 29 | while ((n_polls++ < max_polls) && 30 | (req->alltoall_linear_shift.step < group_size)) { 31 | status = xccl_ucx_req_test(team, req->alltoall_linear_shift.reqs, 2, 32 | &completed_idx, 1, 2); 33 | if (status == XCCL_OK) { 34 | peer = get_peer(group_rank, group_size, req->alltoall_linear_shift.step); 35 | if (peer != group_rank) { 36 | xccl_ucx_send_recv((void*)(sbuf + peer*data_size), data_size, 37 | req->src_mem_type, group_rank, req->tag, 38 | req->alltoall_linear_shift.scratch, data_size, 39 | req->src_mem_type, group_rank, req->tag, team); 40 | xccl_ucx_send_nb(req->alltoall_linear_shift.scratch, data_size, 41 | req->src_mem_type, peer, team, req->tag, 42 | &req->alltoall_linear_shift.reqs[0]); 43 | xccl_ucx_recv_nb((void*)(rbuf + peer*data_size), data_size, 44 | req->dst_mem_type, peer, team, req->tag, 45 | &req->alltoall_linear_shift.reqs[1]); 46 | } else { 47 | if (sbuf != rbuf) { 48 | xccl_ucx_send_recv((void*)(sbuf + peer*data_size), data_size, 49 | req->src_mem_type, group_rank, req->tag, 50 | (void*)(rbuf + peer*data_size), data_size, 51 | req->dst_mem_type, group_rank, req->tag, 52 | team); 53 | } 54 | } 55 | n_polls = 0; 56 | req->alltoall_linear_shift.step++; 57 | } 58 | } 59 | 60 | if (req->alltoall_linear_shift.step < group_size) { 61 | return XCCL_OK; 62 | } 63 | 64 | if (xccl_ucx_testall(team, req->alltoall_linear_shift.reqs, 2) == XCCL_INPROGRESS) { 65 | return XCCL_OK; 66 | } 67 | 68 | req->complete = XCCL_OK; 69 | if (req->alltoall_linear_shift.scratch) { 70 | xccl_mem_component_free(req->alltoall_linear_shift.scratch, 71 | req->src_mem_type); 72 | } 73 | 74 | return XCCL_OK; 75 | } 76 | 77 | xccl_status_t xccl_ucx_alltoall_linear_shift_start(xccl_ucx_collreq_t *req) 78 | { 79 | size_t data_size = req->args.buffer_info.len; 80 | xccl_mem_component_alloc(&req->alltoall_linear_shift.scratch, 81 | data_size, req->src_mem_type); 82 | 83 | req->alltoall_linear_shift.reqs[0] = NULL; 84 | req->alltoall_linear_shift.reqs[1] = NULL; 85 | req->alltoall_linear_shift.step = 0; 86 | req->progress = xccl_ucx_alltoall_linear_shift_progress; 87 | 88 | return xccl_ucx_alltoall_linear_shift_progress(req); 89 | } 90 | -------------------------------------------------------------------------------- /src/utils/mem_component.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | 7 | #ifndef XCCL_MEM_COMPONENT_H_ 8 | #define XCCL_MEM_COMPONENT_H_ 9 | 10 | #include "api/xccl.h" 11 | #include 12 | 13 | typedef struct xccl_mem_component_buf { 14 | void *buf; 15 | size_t size; 16 | int used; 17 | } xccl_mem_component_buf_t; 18 | 19 | typedef struct xccl_mem_component_stream_request { 20 | ucs_memory_type_t mem_type; 21 | } xccl_mem_component_stream_request_t; 22 | 23 | /* mc stands for mem component */ 24 | typedef struct xccl_mc_event { 25 | ucs_memory_type_t mem_type; 26 | } xccl_mc_event_t; 27 | 28 | typedef struct xccl_mem_component { 29 | xccl_status_t (*open)(); 30 | xccl_status_t (*mem_alloc)(void **ptr, size_t len); 31 | xccl_status_t (*mem_free)(void *ptr); 32 | xccl_status_t (*mem_type)(void *ptr, ucs_memory_type_t *mem_type); 33 | xccl_status_t (*reduce)(void *sbuf1, void *sbuf2, void *target, 34 | size_t count, xccl_dt_t dtype, xccl_op_t op); 35 | xccl_status_t (*reduce_multi)(void *sbuf1, void *sbuf2, void *rbuf, 36 | size_t count, size_t size, size_t stride, 37 | xccl_dt_t dtype, xccl_op_t op); 38 | xccl_status_t (*memcpy_async)(void *sbuf, void *dbuf, size_t size, xccl_stream_t *stream); 39 | xccl_status_t (*event_record)(xccl_stream_t *stream, 40 | xccl_mc_event_t **event); 41 | xccl_status_t (*event_query)(xccl_mc_event_t *event); 42 | xccl_status_t (*event_free)(xccl_mc_event_t *event); 43 | xccl_status_t (*start_stream_activity)(xccl_stream_t *stream, 44 | xccl_mem_component_stream_request_t **req); 45 | xccl_status_t (*finish_stream_activity)(xccl_mem_component_stream_request_t *req); 46 | void (*close)(); 47 | void *dlhandle; 48 | xccl_mem_component_buf_t cache; 49 | } xccl_mem_component_t; 50 | 51 | 52 | xccl_status_t xccl_mem_component_init(const char* components_path); 53 | 54 | xccl_status_t xccl_mem_component_alloc(void **ptr, size_t len, 55 | ucs_memory_type_t mem_type); 56 | 57 | xccl_status_t xccl_mem_component_free(void *ptr, ucs_memory_type_t mem_type); 58 | 59 | xccl_status_t xccl_mem_component_type(void *ptr, ucs_memory_type_t *mem_type); 60 | 61 | xccl_status_t xccl_mem_component_reduce(void *sbuf1, void *sbuf2, void *target, 62 | size_t count, xccl_dt_t dtype, 63 | xccl_op_t op, ucs_memory_type_t mem_type); 64 | 65 | xccl_status_t xccl_mem_component_memcpy_async(void *sbuf, void *dbuf, size_t size, 66 | xccl_stream_t *stream); 67 | 68 | xccl_status_t xccl_mem_component_start_acitivity(xccl_stream_t *stream, 69 | xccl_mem_component_stream_request_t **req); 70 | 71 | xccl_status_t xccl_mem_component_finish_acitivity(xccl_mem_component_stream_request_t *req); 72 | 73 | /* 74 | * Performs reduction of multiple vectors and stores result to rbuf 75 | * rbuf = sbuf1 + sbuf2{0} + sbuf2{1} + sbuf2{count-1} 76 | * count - number of vectors in sbuf2 77 | * size - size of each verctor 78 | * stride - offset between vectors in sbuf2 79 | */ 80 | 81 | xccl_status_t 82 | xccl_mem_component_reduce_multi(void *sbuf1, void *sbuf2, void *rbuf, size_t count, 83 | size_t size, size_t stride, xccl_dt_t dtype, 84 | xccl_op_t op, ucs_memory_type_t mem_type); 85 | 86 | xccl_status_t xccl_mc_event_record(xccl_stream_t *stream, 87 | xccl_mc_event_t **event); 88 | 89 | xccl_status_t xccl_mc_event_query(xccl_mc_event_t *event); 90 | 91 | xccl_status_t xccl_mc_event_free(xccl_mc_event_t *event); 92 | 93 | void xccl_mem_component_free_cache(); 94 | 95 | void xccl_mem_component_finalize(); 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | HPCX_PKG_NAME: hpcx-v2.5.0-gcc-MLNX_OFED_LINUX-4.7-1.0.0.1-ubuntu18.04-x86_64 7 | HPCX_PKG_LINK: http://www.mellanox.com/downloads/hpc/hpc-x/v2.5 8 | OPEN_UCX_LINK: https://github.com/openucx/ucx 9 | OPEN_UCX_BRANCH: v1.10.x 10 | jobs: 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Get HPCX 17 | run: wget "${HPCX_PKG_LINK}/${HPCX_PKG_NAME}.tbz" -P /tmp 18 | - name: Unpack HPCX 19 | run: cd /tmp && tar xjf "${HPCX_PKG_NAME}.tbz" 20 | - name: Get UCX 21 | run: git clone ${OPEN_UCX_LINK} /tmp/ucx && cd /tmp/ucx && git checkout ${OPEN_UCX_BRANCH} 22 | - name: Build UCX 23 | run: cd /tmp/ucx && ./autogen.sh && ./contrib/configure-release-mt --without-java --disable-numa --prefix $PWD/install && make -j install 24 | - uses: actions/checkout@v1 25 | - name: Build 26 | run: | 27 | source /tmp/${HPCX_PKG_NAME}/hpcx-init.sh; hpcx_load 28 | ./autogen.sh 29 | ./configure --prefix=$PWD/install --with-ucx=/tmp/ucx/install 30 | make -j`nproc` install 31 | - name: Build XCCL tests 32 | run: | 33 | source /tmp/${HPCX_PKG_NAME}/hpcx-init.sh; hpcx_load 34 | export LD_LIBRARY_PATH=/tmp/ucx/install/lib:$LD_LIBRARY_PATH 35 | export LIBRARY_PATH=/tmp/ucx/install/lib:$LIBRARY_PATH 36 | export CPATH=/tmp/ucx/install/lib:$CPATH 37 | make -C test 38 | - name: Test run 39 | run: | 40 | source /tmp/${HPCX_PKG_NAME}/hpcx-mt-init.sh; hpcx_load 41 | export LD_LIBRARY_PATH=/tmp/ucx/install/lib:$LD_LIBRARY_PATH 42 | export UCX_SOCKADDR_CM_ENABLE=n 43 | mpirun --oversubscribe -x XCCL_TEST_TLS=hier -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_allreduce 44 | mpirun --oversubscribe -x XCCL_TEST_TLS=hier -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_bcast 45 | mpirun --oversubscribe -x XCCL_TEST_TLS=hier -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_barrier 46 | 47 | mpirun --oversubscribe -x XCCL_TEAM_HIER_NODE_LEADER_RANK_ID=3 -x XCCL_TEST_TLS=hier -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_allreduce 48 | mpirun --oversubscribe -x XCCL_TEAM_HIER_NODE_LEADER_RANK_ID=4 -x XCCL_TEST_TLS=hier -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_bcast 49 | mpirun --oversubscribe -x XCCL_TEAM_HIER_NODE_LEADER_RANK_ID=5 -x XCCL_TEST_TLS=hier -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_barrier 50 | 51 | mpirun --oversubscribe -x XCCL_TEAM_UCX_ALLREDUCE_ALG_ID=0 -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_allreduce 52 | mpirun --oversubscribe -x XCCL_TEAM_UCX_ALLREDUCE_ALG_ID=1 -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_allreduce 53 | mpirun --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_bcast 54 | mpirun --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_barrier 55 | mpirun --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_alltoall 56 | mpirun --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_alltoallv 57 | mpirun --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_allgather 58 | mpirun -x XCCL_TEAM_UCX_ALLTOALL_PAIRWISE_CHUNK=0 --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_alltoall 59 | mpirun -x XCCL_TEAM_UCX_ALLTOALL_PAIRWISE_CHUNK=0 --oversubscribe -x XCCL_TEST_TLS=ucx -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_alltoallv 60 | mpirun --oversubscribe -x XCCL_TEST_TLS=hier -x XCCL_TEST_ITERS=500 -x XCCL_TEST_NTHREADS=4 -x XCCL_TEST_CHECK=1 -np 8 -H localhost:8 --bind-to none -mca coll ^hcoll ./test/test_mpi_mt 61 | -------------------------------------------------------------------------------- /test/test_mpi_alltoall.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include "test_mpi.h" 7 | 8 | int run_test(void *sbuf, void *rbuf, void *rbuf_mpi, int count, int rank, int size) 9 | { 10 | xccl_coll_req_h request; 11 | MPI_Request mpi_req; 12 | int status, status_global, completed; 13 | int i = 0, j; 14 | 15 | status = 0; 16 | xccl_coll_op_args_t coll = { 17 | .field_mask = 0, 18 | .coll_type = XCCL_ALLTOALL, 19 | .buffer_info = { 20 | .src_buffer = sbuf, 21 | .dst_buffer = rbuf, 22 | .len = count*sizeof(int), 23 | }, 24 | .alg.set_by_user = 0, 25 | .tag = 123, //todo 26 | }; 27 | 28 | XCCL_CHECK(xccl_collective_init(&coll, &request, xccl_world_team)); 29 | XCCL_CHECK(xccl_collective_post(request)); 30 | while (XCCL_OK != xccl_collective_test(request)) { 31 | xccl_context_progress(team_ctx); 32 | } 33 | XCCL_CHECK(xccl_collective_finalize(request)); 34 | 35 | if (sbuf != rbuf) { 36 | MPI_Ialltoall(sbuf, count, MPI_INT, rbuf_mpi, count, MPI_INT, MPI_COMM_WORLD, &mpi_req); 37 | } else { 38 | MPI_Ialltoall(MPI_IN_PLACE, count, MPI_INT, rbuf_mpi, count, MPI_INT, MPI_COMM_WORLD, &mpi_req); 39 | } 40 | completed = 0; 41 | while (!completed) { 42 | MPI_Test(&mpi_req, &completed, MPI_STATUS_IGNORE); 43 | xccl_mpi_test_progress(); 44 | } 45 | 46 | if (0 != memcmp(rbuf, rbuf_mpi, size*count*sizeof(int))) { 47 | fprintf(stderr, "RST CHECK FAILURE at rank %d, count %d\n", rank, count); 48 | status = 1; 49 | } 50 | 51 | MPI_Allreduce(&status, &status_global, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); 52 | return status_global; 53 | } 54 | 55 | int main (int argc, char **argv) 56 | { 57 | const int iters = 5; 58 | size_t msglen_min, msglen_max; 59 | int count_max, count_min, count, 60 | rank, size, i, status_global; 61 | int *sbuf, *rbuf, *rbuf_mpi; 62 | msglen_min = argc > 1 ? atoi(argv[1]) : 4; 63 | msglen_max = argc > 2 ? atoi(argv[2]) : 1024; 64 | if (msglen_max < msglen_min) { 65 | fprintf(stderr, "Incorrect msglen settings\n"); 66 | return -1; 67 | } 68 | count_max = (msglen_max + sizeof(int) - 1)/sizeof(int); 69 | count_min = (msglen_min + sizeof(int) - 1)/sizeof(int); 70 | 71 | XCCL_CHECK(xccl_mpi_test_init(argc, argv, XCCL_COLL_CAP_ALLTOALL, XCCL_THREAD_MODE_SINGLE)); 72 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 73 | MPI_Comm_size(MPI_COMM_WORLD, &size); 74 | 75 | sbuf = malloc(count_max*size*sizeof(int)); 76 | rbuf = malloc(count_max*size*sizeof(int)); 77 | rbuf_mpi = malloc(count_max*size*sizeof(int)); 78 | for (i=0; i 8 | #include 9 | 10 | xccl_status_t xccl_ucx_reduce_linear_progress(xccl_ucx_collreq_t *req) 11 | { 12 | xccl_tl_team_t *team = req->team; 13 | void *data_buffer = req->args.buffer_info.dst_buffer; 14 | size_t data_size = req->args.buffer_info.len; 15 | int group_rank = team->params.oob.rank; 16 | int group_size = team->params.oob.size; 17 | void *scratch = req->reduce_linear.scratch; 18 | xccl_ucx_request_t **reqs = req->reduce_linear.reqs; 19 | 20 | if (req->args.root == group_rank) { 21 | if (req->reduce_linear.step == ((group_rank + 1) % group_size)) { 22 | xccl_ucx_recv_nb(scratch, data_size, req->src_mem_type, 23 | req->reduce_linear.step, 24 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 25 | req->reduce_linear.step = ((req->reduce_linear.step + 1) % group_size); 26 | } 27 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 28 | xccl_mem_component_reduce(scratch, 29 | data_buffer, 30 | data_buffer, 31 | req->args.reduce_info.count, 32 | req->args.reduce_info.dt, 33 | req->args.reduce_info.op, 34 | req->src_mem_type); 35 | 36 | if (req->reduce_linear.step != group_rank) { 37 | xccl_ucx_recv_nb(scratch, data_size, req->src_mem_type, 38 | req->reduce_linear.step, 39 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 40 | req->reduce_linear.step = 41 | ((req->reduce_linear.step + 1) % group_size); 42 | } else { 43 | goto completion; 44 | } 45 | } 46 | } else { 47 | if (req->reduce_linear.step == 0) { 48 | xccl_ucx_send_nb(req->args.buffer_info.src_buffer, data_size, 49 | req->src_mem_type, req->args.root, 50 | (xccl_ucx_team_t*)team, req->tag, &reqs[0]); 51 | req->reduce_linear.step = 1; 52 | } 53 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 1)) { 54 | goto completion; 55 | } 56 | } 57 | return XCCL_OK; 58 | 59 | completion: 60 | /* fprintf(stderr, "Complete reduce, level %d frag %d and full coll arg\n", */ 61 | /* COLL_ID_IN_SCHEDULE(bcol_args), bcol_args->next_frag-1); */ 62 | req->complete = XCCL_OK; 63 | if (req->reduce_linear.scratch) { 64 | xccl_mem_component_free(req->reduce_linear.scratch, req->src_mem_type); 65 | } 66 | return XCCL_OK; 67 | } 68 | 69 | xccl_status_t xccl_ucx_reduce_linear_start(xccl_ucx_collreq_t *req) 70 | { 71 | size_t data_size = req->args.buffer_info.len; 72 | int group_rank = req->team->params.oob.rank; 73 | int group_size = req->team->params.oob.size; 74 | 75 | memset(req->reduce_linear.reqs, 0, sizeof(req->reduce_linear.reqs)); 76 | req->reduce_linear.step = 0; 77 | if (req->args.root == group_rank) { 78 | xccl_mem_component_alloc(&req->reduce_linear.scratch, 79 | data_size, 80 | req->src_mem_type); 81 | xccl_ucx_send_recv(req->args.buffer_info.src_buffer, data_size, 82 | req->src_mem_type, group_rank, req->tag, 83 | req->args.buffer_info.dst_buffer, data_size, 84 | req->dst_mem_type, group_rank, req->tag, 85 | (xccl_ucx_team_t *)req->team); 86 | req->reduce_linear.step = (group_rank + 1) % group_size; 87 | } else { 88 | req->reduce_linear.scratch = NULL; 89 | } 90 | req->progress = xccl_ucx_reduce_linear_progress; 91 | return xccl_ucx_reduce_linear_progress(req); 92 | } 93 | -------------------------------------------------------------------------------- /src/core/xccl_lib.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "utils/xccl_log.h" 6 | #include "utils/mem_component.h" 7 | #include 8 | 9 | extern ucs_list_link_t ucs_config_global_list; 10 | static ucs_config_field_t xccl_lib_config_table[] = { 11 | 12 | {NULL} 13 | }; 14 | UCS_CONFIG_REGISTER_TABLE(xccl_lib_config_table, "XCCL", NULL, 15 | xccl_lib_config_t, &ucs_config_global_list); 16 | 17 | #define CHECK_LIB_CONFIG_CAP(_cap, _CAP_FIELD) do{ \ 18 | if ((params->field_mask & XCCL_LIB_PARAM_FIELD_ ## _CAP_FIELD) && \ 19 | !(params-> _cap & tl->params. _cap)) { \ 20 | xccl_info("Disqualifying team %s due to %s cap", \ 21 | tl->name, UCS_PP_QUOTE(_CAP_FIELD)); \ 22 | continue; \ 23 | } \ 24 | } while(0) 25 | 26 | 27 | static void xccl_lib_filter(const xccl_lib_params_t *params, xccl_lib_t *lib) 28 | { 29 | int i; 30 | int n_libs = xccl_static_lib.n_libs_opened; 31 | lib->libs = (xccl_team_lib_t**)malloc(sizeof(xccl_team_lib_t*)*n_libs); 32 | lib->n_libs_opened = 0; 33 | for (i=0; ilibs[lib->n_libs_opened++] = tl; 41 | } 42 | } 43 | 44 | xccl_status_t xccl_lib_init(const xccl_lib_params_t *params, 45 | const xccl_lib_config_t *config, 46 | xccl_lib_h *xccl_lib) 47 | { 48 | xccl_lib_t *lib; 49 | 50 | if (xccl_static_lib.n_libs_opened == 0) { 51 | return XCCL_ERR_NO_MESSAGE; 52 | } 53 | 54 | lib = malloc(sizeof(*lib)); 55 | if (lib == NULL) { 56 | return XCCL_ERR_NO_MEMORY; 57 | } 58 | 59 | xccl_lib_filter(params, lib); 60 | if (lib->n_libs_opened == 0) { 61 | xccl_error("XCCL lib init: no plugins left after filtering by params\n"); 62 | return XCCL_ERR_NO_MESSAGE; 63 | } 64 | 65 | *xccl_lib = lib; 66 | //TODO: move to appropriate place 67 | //ucs_config_parser_warn_unused_env_vars_once("XCCL_"); 68 | return XCCL_OK; 69 | } 70 | 71 | xccl_status_t xccl_lib_config_read(const char *env_prefix, 72 | const char *filename, 73 | xccl_lib_config_t **config_p){ 74 | xccl_lib_config_t *config; 75 | xccl_status_t status; 76 | char full_prefix[128] = "XCCL_"; 77 | 78 | config = malloc(sizeof(*config)); 79 | if (config == NULL) { 80 | status = XCCL_ERR_NO_MEMORY; 81 | goto err; 82 | } 83 | 84 | if ((env_prefix != NULL) && (strlen(env_prefix) > 0)) { 85 | snprintf(full_prefix, sizeof(full_prefix), "%s_%s", env_prefix, "XCCL_"); 86 | } 87 | 88 | status = ucs_config_parser_fill_opts(config, xccl_lib_config_table, full_prefix, 89 | NULL, 0); 90 | if (status != UCS_OK) { 91 | goto err_free; 92 | } 93 | 94 | *config_p = config; 95 | return XCCL_OK; 96 | 97 | err_free: 98 | free(config); 99 | err: 100 | return status; 101 | } 102 | 103 | void xccl_lib_config_release(xccl_lib_config_t *config) 104 | { 105 | free(config); 106 | } 107 | 108 | void xccl_lib_config_print(const xccl_lib_config_t *config, FILE *stream, 109 | const char *title, ucs_config_print_flags_t print_flags) 110 | { 111 | ucs_config_parser_print_opts(stream, title, config, xccl_lib_config_table, 112 | NULL, "XCCL_", print_flags); 113 | } 114 | 115 | void xccl_lib_cleanup(xccl_lib_h lib_p) 116 | { 117 | if (lib_p->libs) { 118 | free(lib_p->libs); 119 | } 120 | xccl_mem_component_free_cache(); 121 | free(lib_p); 122 | } 123 | -------------------------------------------------------------------------------- /src/team_lib/mpod/xccl_mpod_allgather.c: -------------------------------------------------------------------------------- 1 | #include "xccl_mpod_lib.h" 2 | 3 | #define INTRA_POD_ALLGATHER_INITIATED (0) 4 | #define INTER_POD_ALLGATHER_INITIATED (1) 5 | #define INTRA_POD_BCAST_INITIATED (2) 6 | 7 | static xccl_status_t allgather_post(xccl_mpod_coll_req_t *req) 8 | { 9 | xccl_status_t status = XCCL_OK; 10 | 11 | status = xccl_mpod_nccl_req_post(&req->chunks[0].real_req.nccl[0]); 12 | xccl_mpod_err_pop(status, fn_fail); 13 | 14 | req->chunks[0].phase_id = INTRA_POD_ALLGATHER_INITIATED; 15 | 16 | fn_exit: 17 | return status; 18 | fn_fail: 19 | goto fn_exit; 20 | } 21 | 22 | static xccl_status_t allgather_test(xccl_mpod_coll_req_t *req) 23 | { 24 | xccl_status_t status = XCCL_OK; 25 | 26 | switch (req->chunks[0].phase_id) { 27 | case INTRA_POD_ALLGATHER_INITIATED: 28 | status = xccl_mpod_nccl_req_test(&req->chunks[0].real_req.nccl[0]); 29 | if (status == XCCL_OK) { 30 | if (req->team->slice_id == 0) { 31 | status = req->team->context->lib.ucx->collective_post(req->chunks[0].real_req.ucx_slice); 32 | xccl_mpod_err_pop(status, fn_fail); 33 | } 34 | 35 | status = XCCL_INPROGRESS; 36 | req->chunks[0].phase_id = INTER_POD_ALLGATHER_INITIATED; 37 | } 38 | break; 39 | 40 | case INTER_POD_ALLGATHER_INITIATED: 41 | if (req->team->slice_id == 0) { 42 | status = req->team->context->lib.ucx->collective_test(req->chunks[0].real_req.ucx_slice); 43 | } 44 | if (status == XCCL_OK) { 45 | status = xccl_mpod_nccl_req_post(&req->chunks[0].real_req.nccl[1]); 46 | xccl_mpod_err_pop(status, fn_fail); 47 | 48 | status = XCCL_INPROGRESS; 49 | req->chunks[0].phase_id = INTRA_POD_BCAST_INITIATED; 50 | } 51 | break; 52 | 53 | case INTRA_POD_BCAST_INITIATED: 54 | status = xccl_mpod_nccl_req_test(&req->chunks[0].real_req.nccl[1]); 55 | break; 56 | } 57 | 58 | fn_exit: 59 | return status; 60 | fn_fail: 61 | goto fn_exit; 62 | } 63 | 64 | static xccl_status_t allgather_finalize(xccl_mpod_coll_req_t *req) 65 | { 66 | xccl_status_t status = XCCL_OK; 67 | 68 | status = xccl_mpod_nccl_req_finalize(&req->chunks[0].real_req.nccl[0]); 69 | xccl_mpod_err_pop(status, fn_fail); 70 | 71 | if (req->team->slice_id == 0) { 72 | status = req->team->context->lib.ucx->collective_finalize(req->chunks[0].real_req.ucx_slice); 73 | xccl_mpod_err_pop(status, fn_fail); 74 | } 75 | 76 | status = xccl_mpod_nccl_req_finalize(&req->chunks[0].real_req.nccl[1]); 77 | xccl_mpod_err_pop(status, fn_fail); 78 | 79 | free(req->chunks); 80 | 81 | fn_exit: 82 | return status; 83 | fn_fail: 84 | goto fn_exit; 85 | } 86 | 87 | xccl_status_t xccl_mpod_allgather_init(xccl_mpod_coll_req_t *req) 88 | { 89 | xccl_status_t status = XCCL_OK; 90 | 91 | req->chunks = (xccl_mpod_chunk_s *) malloc(sizeof(xccl_mpod_chunk_s)); 92 | req->num_chunks = 1; 93 | 94 | /* phase 1 */ 95 | xccl_coll_op_args_t nccl_coll_args = req->coll_args; 96 | nccl_coll_args.buffer_info.len /= req->team->num_pods; 97 | status = xccl_mpod_nccl_req_init(req, &nccl_coll_args, &req->chunks[0].real_req.nccl[0]); 98 | xccl_mpod_err_pop(status, fn_fail); 99 | 100 | /* phase 2 */ 101 | if (req->team->slice_id == 0) { 102 | xccl_coll_op_args_t ucx_coll_args = req->coll_args; 103 | ucx_coll_args.buffer_info.src_buffer = req->coll_args.buffer_info.dst_buffer; 104 | status = req->team->context->lib.ucx->collective_init(&ucx_coll_args, &req->chunks[0].real_req.ucx_slice, 105 | req->team->team.ucx_slice); 106 | xccl_mpod_err_pop(status, fn_fail); 107 | } 108 | 109 | /* phase 3 */ 110 | nccl_coll_args = req->coll_args; 111 | nccl_coll_args.coll_type = XCCL_BCAST; 112 | nccl_coll_args.root = 0; 113 | nccl_coll_args.buffer_info.src_buffer = nccl_coll_args.buffer_info.dst_buffer; 114 | status = xccl_mpod_nccl_req_init(req, &nccl_coll_args, &req->chunks[0].real_req.nccl[1]); 115 | xccl_mpod_err_pop(status, fn_fail); 116 | 117 | req->collective_post = allgather_post; 118 | req->collective_test = allgather_test; 119 | req->collective_finalize = allgather_finalize; 120 | 121 | fn_exit: 122 | return status; 123 | fn_fail: 124 | goto fn_exit; 125 | } 126 | -------------------------------------------------------------------------------- /src/team_lib/ucx/bcast/bcast_knomial.c: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | #include "xccl_ucx_lib.h" 3 | #include "bcast.h" 4 | #include "xccl_ucx_sendrecv.h" 5 | #include 6 | #include 7 | 8 | #define CALC_DIST(_size, _radix, _dist) do{ \ 9 | _dist = 1; \ 10 | while (_dist*_radix < _size) { \ 11 | _dist*=_radix; \ 12 | } \ 13 | }while(0) 14 | 15 | xccl_status_t xccl_ucx_bcast_knomial_progress(xccl_ucx_collreq_t *req) 16 | { 17 | xccl_tl_team_t *team = req->team; 18 | void *data_buffer = req->args.buffer_info.dst_buffer; 19 | size_t data_size = req->args.buffer_info.len; 20 | int group_rank = team->params.oob.rank; 21 | int group_size = team->params.oob.size; 22 | int root = req->args.root; 23 | int radix = req->bcast_kn.radix; 24 | xccl_ucx_request_t **reqs = req->bcast_kn.reqs; 25 | int vrank = (group_rank - root + group_size) % group_size; 26 | int dist = req->bcast_kn.dist; 27 | int i, vpeer, peer, vroot_at_level, root_at_level, pos; 28 | 29 | if (req->bcast_kn.active_reqs) { 30 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 31 | req->bcast_kn.active_reqs)) { 32 | req->bcast_kn.active_reqs = 0; 33 | } else { 34 | return XCCL_OK; 35 | } 36 | } 37 | 38 | while (dist >= 1) { 39 | if (vrank % dist == 0) { 40 | pos = (vrank/dist) % radix; 41 | } else { 42 | pos = -1; 43 | } 44 | if (pos == 0) { 45 | for (i=radix-1; i>=1; i--) { 46 | vpeer = vrank + i*dist; 47 | if (vpeer < group_size) { 48 | peer = (vpeer + root) % group_size; 49 | xccl_ucx_send_nb(data_buffer, data_size, req->dst_mem_type, 50 | peer, (xccl_ucx_team_t*)team, req->tag, 51 | &reqs[req->bcast_kn.active_reqs++]); 52 | } 53 | } 54 | } else if (pos > 0) { 55 | vroot_at_level = vrank - pos*dist; 56 | root_at_level = (vroot_at_level + root) % group_size; 57 | xccl_ucx_recv_nb(data_buffer, data_size, req->dst_mem_type, 58 | root_at_level, (xccl_ucx_team_t*)team, req->tag, 59 | &reqs[req->bcast_kn.active_reqs++]); 60 | assert(req->bcast_kn.active_reqs == 1); 61 | } 62 | dist /= radix; 63 | 64 | if (req->bcast_kn.active_reqs) { 65 | if (XCCL_OK == xccl_ucx_testall((xccl_ucx_team_t *)team, reqs, 66 | req->bcast_kn.active_reqs)) { 67 | req->bcast_kn.active_reqs = 0; 68 | } else { 69 | req->bcast_kn.dist = dist; 70 | return XCCL_OK; 71 | } 72 | } 73 | } 74 | req->complete = XCCL_OK; 75 | return XCCL_OK; 76 | } 77 | 78 | xccl_status_t xccl_ucx_bcast_knomial_start(xccl_ucx_collreq_t *req) 79 | { 80 | size_t data_size = req->args.buffer_info.len; 81 | int group_rank = req->team->params.oob.rank; 82 | int group_size = req->team->params.oob.size; 83 | xccl_ucx_debug("knomial bcast start: group_size %d, group_rank %d," 84 | "data_size %zd", 85 | group_size, group_rank, data_size); 86 | memset(req->bcast_kn.reqs, 0, sizeof(req->bcast_kn.reqs)); 87 | req->bcast_kn.radix = TEAM_UCX_CTX_REQ(req)->bcast_kn_radix; 88 | if (req->bcast_kn.radix > req->team->params.oob.size) { 89 | req->bcast_kn.radix = req->team->params.oob.size; 90 | } 91 | 92 | req->bcast_kn.active_reqs = 0; 93 | CALC_DIST(group_size, req->bcast_kn.radix, req->bcast_kn.dist); 94 | if (req->args.root == group_rank) { 95 | if (req->args.buffer_info.src_buffer != 96 | req->args.buffer_info.dst_buffer) { 97 | xccl_ucx_send_recv(req->args.buffer_info.src_buffer, data_size, 98 | req->src_mem_type, group_rank, req->tag, 99 | req->args.buffer_info.dst_buffer, data_size, 100 | req->dst_mem_type, group_rank, req->tag, 101 | (xccl_ucx_team_t *)req->team); 102 | } 103 | } 104 | req->progress = xccl_ucx_bcast_knomial_progress; 105 | return xccl_ucx_bcast_knomial_progress(req); 106 | } 107 | -------------------------------------------------------------------------------- /test/test_mt.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include "test_mpi.h" 7 | #include 8 | 9 | int use_mpi = 0; 10 | void* do_allreduce(void *arg) { 11 | const int count = 32; 12 | xccl_coll_req_h request; 13 | int rank, size, i, status = 0, status_global, j; 14 | xccl_team_h team = (xccl_team_h)arg; 15 | int sbuf[count], rbuf[count]; 16 | int iters = 10000; 17 | int check = 0; 18 | char *var = getenv("XCCL_TEST_ITERS"); 19 | if (var) iters = atoi(var); 20 | var = getenv("XCCL_TEST_CHECK"); 21 | if (var) check = atoi(var); 22 | 23 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); 25 | 26 | for (i=0; i 10 | #include 11 | #include 12 | #include 13 | 14 | static int compare_teams_by_priority(const void* t1, const void* t2) 15 | { 16 | const xccl_tl_team_t** team1 = (const xccl_tl_team_t**)t1; 17 | const xccl_tl_team_t** team2 = (const xccl_tl_team_t**)t2; 18 | return (*team2)->ctx->lib->priority - (*team1)->ctx->lib->priority; 19 | } 20 | 21 | 22 | xccl_status_t xccl_team_create_post(xccl_context_h context, 23 | xccl_team_params_t *params, 24 | xccl_team_t **xccl_team) 25 | { 26 | int i; 27 | int n_ctx = context->n_tl_ctx; 28 | xccl_collective_type_t c; 29 | xccl_team_t *team; 30 | xccl_tl_context_t *tl_ctx; 31 | xccl_status_t status; 32 | 33 | *xccl_team = NULL; 34 | if (context->n_tl_ctx < 1) { 35 | xccl_error("No library contexts available"); 36 | return XCCL_ERR_NO_MESSAGE; 37 | } 38 | 39 | team = (xccl_team_t*)malloc(sizeof(*team) + 40 | sizeof(xccl_tl_team_t*)*(n_ctx-1)); 41 | team->ctx = context; 42 | team->n_teams = 0; 43 | memcpy(&team->params, params, sizeof(xccl_team_params_t)); 44 | for (i=0; in_tl_ctx; i++) { 45 | tl_ctx = context->tl_ctx[i]; 46 | status = tl_ctx->lib->team_create_post(tl_ctx, params, 47 | &team->tl_teams[team->n_teams]); 48 | if (status != XCCL_OK) { 49 | continue; 50 | } 51 | status = tl_ctx->lib->team_create_test(team->tl_teams[team->n_teams]); 52 | team->n_teams++; 53 | if (status == XCCL_INPROGRESS) { 54 | /* workaround to fix oob allgather issue if multiple teams use it 55 | simultaneously*/ 56 | break; 57 | } 58 | } 59 | if (team->n_teams == 0) { 60 | xccl_warn("no teams were opened"); 61 | return XCCL_ERR_NO_MESSAGE; 62 | } 63 | team->last_team_create_posted = i; 64 | team->status = XCCL_INPROGRESS; 65 | *xccl_team = team; 66 | return XCCL_OK; 67 | } 68 | 69 | xccl_status_t xccl_team_create_test(xccl_team_t *team) 70 | { 71 | int i, c, m; 72 | xccl_tl_context_t *tl_ctx; 73 | xccl_status_t status; 74 | 75 | tl_ctx = team->ctx->tl_ctx[team->n_teams - 1]; 76 | status = tl_ctx->lib->team_create_test(team->tl_teams[team->n_teams-1]); 77 | if (status != XCCL_OK) { 78 | return status; 79 | } 80 | 81 | for (i = team->last_team_create_posted + 1; i < team->ctx->n_tl_ctx; i++) { 82 | tl_ctx = team->ctx->tl_ctx[i]; 83 | status = tl_ctx->lib->team_create_post(tl_ctx, &team->params, 84 | &team->tl_teams[team->n_teams]); 85 | team->last_team_create_posted = i; 86 | if (status != XCCL_OK) { 87 | continue; 88 | } 89 | status = tl_ctx->lib->team_create_test(team->tl_teams[team->n_teams]); 90 | team->n_teams++; 91 | if (status == XCCL_INPROGRESS) { 92 | /* workaround to fix oob allgather issue if multiple teams use it 93 | simultaneously*/ 94 | return XCCL_INPROGRESS; 95 | } 96 | } 97 | qsort(team->tl_teams, team->n_teams, sizeof(xccl_tl_team_t*), 98 | compare_teams_by_priority); 99 | for (m = 0; m < UCS_MEMORY_TYPE_LAST; m++) { 100 | for (c = 0; c < XCCL_COLL_LAST; c++) { 101 | team->coll_team_id[c][m] = -1; 102 | for (i=0; in_teams; i++) { 103 | if ((team->tl_teams[i]->ctx->lib->params.coll_types & UCS_BIT(c)) && 104 | (team->tl_teams[i]->ctx->lib->mem_types & UCS_BIT(m))) { 105 | team->coll_team_id[c][m] = i; 106 | break; 107 | } 108 | } 109 | } 110 | } 111 | team->status = XCCL_OK; 112 | /* TODO: check if some teams are never used after selection and clean them up */ 113 | return XCCL_OK; 114 | } 115 | 116 | void xccl_team_destroy(xccl_team_t *team) 117 | { 118 | xccl_tl_context_t *tl_ctx; 119 | int i; 120 | 121 | if (team->status != XCCL_OK) { 122 | xccl_error("team %p is used before team_create is completed", team); 123 | return; 124 | } 125 | 126 | for (i=0; in_teams; i++) { 127 | tl_ctx = team->tl_teams[i]->ctx; 128 | tl_ctx->lib->team_destroy(team->tl_teams[i]); 129 | } 130 | free(team); 131 | } 132 | -------------------------------------------------------------------------------- /src/core/xccl_lock_free_tasks_queue.c: -------------------------------------------------------------------------------- 1 | #include "xccl_lock_free_tasks_queue.h" 2 | 3 | xccl_status_t lf_tasks_queue_init(xccl_progress_queue_t *handle) { 4 | handle->ctx = (void *) malloc(sizeof(xccl_lf_tasks_queue_t)); 5 | xccl_lf_tasks_queue_t *ctx = (xccl_lf_tasks_queue_t *) handle->ctx; 6 | 7 | ctx->tasks = (xccl_coll_task_t ***) calloc(NUM_POOLS, sizeof(xccl_coll_task_t **)); 8 | if (ctx->tasks == NULL) { 9 | return XCCL_ERR_NO_MEMORY; 10 | } 11 | ctx->tasks[0] = (xccl_coll_task_t **) calloc(LINE_SIZE, sizeof(xccl_coll_task_t *)); 12 | if (ctx->tasks[0] == NULL) { 13 | return XCCL_ERR_NO_MEMORY; 14 | } 15 | ctx->tasks[1] = (xccl_coll_task_t **) calloc(LINE_SIZE, sizeof(xccl_coll_task_t *)); 16 | if (ctx->tasks[1] == NULL) { 17 | return XCCL_ERR_NO_MEMORY; 18 | } 19 | 20 | ucs_spinlock_init(&(ctx->locked_queue_lock), 0); 21 | ucs_list_head_init(&ctx->locked_queue); 22 | ctx->which_pool = 0; 23 | ctx->tasks_countrs[0] = 0; 24 | ctx->tasks_countrs[1] = 0; 25 | 26 | handle->api.progress_queue_enqueue = &lf_tasks_queue_insert; 27 | handle->api.progress_queue_progress_tasks = &lf_tasks_queue_progress; 28 | handle->api.progress_queue_destroy = &lf_tasks_queue_destroy; 29 | return XCCL_OK; 30 | } 31 | 32 | 33 | xccl_status_t lf_tasks_queue_insert(xccl_progress_queue_t *handle, xccl_coll_task_t *task) { 34 | xccl_lf_tasks_queue_t *ctx = (xccl_lf_tasks_queue_t *) handle->ctx; 35 | int i, j; 36 | xccl_status_t status; 37 | int which_pool = task->was_progressed ^(ctx->which_pool & 1); 38 | for (i = 0; i < LINE_SIZE; i++) { 39 | if (__sync_bool_compare_and_swap(&(ctx->tasks[which_pool][i]), 0, task)) { 40 | ucs_atomic_add32(&ctx->tasks_countrs[which_pool], 1); 41 | return XCCL_OK; 42 | } 43 | } 44 | ucs_spin_lock(&ctx->locked_queue_lock); 45 | ucs_list_add_tail(&ctx->locked_queue, &task->list_elem); 46 | ucs_spin_unlock(&ctx->locked_queue_lock); 47 | return XCCL_OK; 48 | } 49 | 50 | xccl_status_t lf_tasks_queue_pop(xccl_lf_tasks_queue_t *ctx, xccl_coll_task_t **popped_task_ptr, int is_first_call) { 51 | int i, j; 52 | int curr_which_pool = ctx->which_pool; 53 | int which_pool = curr_which_pool & 1; 54 | xccl_coll_task_t *popped_task = NULL; 55 | if (ctx->tasks_countrs[which_pool]) { 56 | for (i = 0; i < LINE_SIZE; i++) { 57 | popped_task = ctx->tasks[which_pool][i]; 58 | if (popped_task) { 59 | if (__sync_bool_compare_and_swap(&(ctx->tasks[which_pool][i]), popped_task, 0)) { 60 | ucs_atomic_sub32(&ctx->tasks_countrs[which_pool], 1); 61 | *popped_task_ptr = popped_task; 62 | popped_task->was_progressed = 1; 63 | return XCCL_OK; 64 | } else { 65 | i = -1; 66 | break; 67 | } 68 | } 69 | } 70 | } 71 | if (is_first_call) { 72 | /* TODO: Change atomics to UCS once release v1.9.1 is out */ 73 | ucs_atomic_cswap32(&ctx->which_pool, curr_which_pool, curr_which_pool + 1); 74 | return lf_tasks_queue_pop(ctx, popped_task_ptr, 0); 75 | } 76 | popped_task = NULL; 77 | ucs_spin_lock(&ctx->locked_queue_lock); 78 | if (!ucs_list_is_empty(&ctx->locked_queue)) { 79 | popped_task = ucs_list_extract_head(&ctx->locked_queue, xccl_coll_task_t, list_elem); 80 | } 81 | ucs_spin_unlock(&ctx->locked_queue_lock); 82 | if (popped_task != NULL) { 83 | popped_task->was_progressed = 1; 84 | } 85 | *popped_task_ptr = popped_task; 86 | return XCCL_OK; 87 | } 88 | 89 | xccl_status_t lf_tasks_queue_progress(xccl_progress_queue_t *handle) { 90 | xccl_lf_tasks_queue_t *ctx = (xccl_lf_tasks_queue_t *) handle->ctx; 91 | xccl_coll_task_t *task; 92 | xccl_status_t status = lf_tasks_queue_pop(ctx, &task, 1); 93 | if (status != XCCL_OK) { 94 | return status; 95 | } 96 | if (task) { 97 | if (task->progress) { 98 | if (0 < task->progress(task)) { 99 | return status; 100 | } 101 | } 102 | if (XCCL_TASK_STATE_COMPLETED == task->state) { 103 | xccl_event_manager_notify(&task->em, XCCL_EVENT_COMPLETED); 104 | } else { 105 | return lf_tasks_queue_insert(handle, task); 106 | } 107 | } 108 | return XCCL_OK; 109 | } 110 | 111 | xccl_status_t lf_tasks_queue_destroy(xccl_progress_queue_t *handle) { 112 | xccl_lf_tasks_queue_t *ctx = (xccl_lf_tasks_queue_t *) handle->ctx; 113 | int i; 114 | for (i = 0; i < NUM_POOLS; i++) { 115 | free(ctx->tasks[i]); 116 | } 117 | free(ctx->tasks); 118 | ucs_spinlock_destroy(&ctx->locked_queue_lock); 119 | free(ctx); 120 | return XCCL_OK; 121 | } 122 | -------------------------------------------------------------------------------- /src/team_lib/ucx/xccl_ucx_team.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) Mellanox Technologies Ltd. 2001-2020. ALL RIGHTS RESERVED. 3 | * 4 | * See file LICENSE for terms. 5 | */ 6 | #include "config.h" 7 | #include "xccl_ucx_context.h" 8 | #include "xccl_ucx_team.h" 9 | #include "xccl_ucx_ep.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | struct xccl_ucx_nb_create_req { 18 | int phase; 19 | void *scratch; 20 | void *allgather_req; 21 | }; 22 | 23 | xccl_status_t xccl_ucx_team_create_post(xccl_tl_context_t *context, 24 | xccl_team_params_t *params, 25 | xccl_tl_team_t **team) 26 | { 27 | xccl_status_t status = XCCL_OK; 28 | xccl_team_lib_ucx_context_t *ctx = 29 | ucs_derived_of(context, xccl_team_lib_ucx_context_t); 30 | int max_cid = 0, max_addrlen = 0, size = params->oob.size, 31 | rank = params->oob.rank; 32 | xccl_ucx_team_t *ucx_team; 33 | int *tmp; 34 | int local_addrlen, i, sbuf[2]; 35 | char* addr_array; 36 | struct xccl_ucx_nb_create_req *nb_req = malloc(sizeof(*nb_req)); 37 | ucx_team = (xccl_ucx_team_t*)malloc(sizeof(xccl_ucx_team_t)); 38 | XCCL_TEAM_SUPER_INIT(ucx_team->super, context, params); 39 | nb_req->phase = 0; 40 | ucx_team->nb_create_req = nb_req; 41 | ucx_team->range = params->range; 42 | local_addrlen = (int)ctx->ucp_addrlen; 43 | tmp = (int*)malloc(size*sizeof(int)*2); 44 | sbuf[0] = local_addrlen; 45 | sbuf[1] = ctx->next_cid; 46 | xccl_oob_allgather_nb(sbuf, tmp, 2*sizeof(int), ¶ms->oob, 47 | &nb_req->allgather_req); 48 | nb_req->scratch = tmp; 49 | *team = &ucx_team->super; 50 | return XCCL_OK; 51 | } 52 | 53 | xccl_status_t xccl_ucx_team_create_test(xccl_tl_team_t *team) 54 | { 55 | xccl_status_t status = XCCL_OK; 56 | xccl_team_lib_ucx_context_t *ctx = 57 | ucs_derived_of(team->ctx, xccl_team_lib_ucx_context_t); 58 | xccl_oob_collectives_t oob = team->params.oob; 59 | int max_cid = 0, size = oob.size, 60 | rank = oob.rank; 61 | xccl_ucx_team_t *ucx_team = ucs_derived_of(team, xccl_ucx_team_t); 62 | int *tmp; 63 | int local_addrlen, i, sbuf[2]; 64 | char* addr_array; 65 | struct xccl_ucx_nb_create_req *nb_req = 66 | (struct xccl_ucx_nb_create_req *)ucx_team->nb_create_req; 67 | if (NULL == nb_req) { 68 | return XCCL_OK; 69 | } else if (XCCL_INPROGRESS == oob.req_test(nb_req->allgather_req)) { 70 | return XCCL_INPROGRESS; 71 | } 72 | oob.req_free(nb_req->allgather_req); 73 | 74 | switch (nb_req->phase) { 75 | case 0: 76 | tmp = (int*)nb_req->scratch; 77 | ucx_team->max_addrlen = 0; 78 | for (i=0; i ucx_team->max_addrlen) ucx_team->max_addrlen = tmp[2*i]; 80 | if (tmp[2*i+1] > max_cid) max_cid = tmp[2*i+1]; 81 | } 82 | free(tmp); 83 | 84 | ucx_team->ctx_id = (uint16_t)max_cid; // TODO check overflow 85 | ucx_team->seq_num = 0; 86 | ctx->next_cid = max_cid + 1; // this is only a tmp solution to max_cid 87 | // need another alg for cid allocatoin or 88 | // and interface to get from user 89 | addr_array = (char*)malloc(size*ucx_team->max_addrlen); 90 | xccl_oob_allgather_nb(ctx->worker_address, addr_array, 91 | ucx_team->max_addrlen, &oob, &nb_req->allgather_req); 92 | nb_req->phase = 1; 93 | nb_req->scratch = addr_array; 94 | return XCCL_INPROGRESS; 95 | case 1: 96 | addr_array = (char*)nb_req->scratch; 97 | if (!ctx->ucp_eps) { 98 | ucx_team->ucp_eps = (ucp_ep_h*)calloc(size, sizeof(ucp_ep_h)); 99 | } else { 100 | ucx_team->ucp_eps = NULL; 101 | } 102 | 103 | for (i=0; imax_addrlen, i))) { 106 | status = XCCL_ERR_NO_MESSAGE; 107 | goto cleanup; 108 | } 109 | } 110 | break; 111 | } 112 | 113 | cleanup: 114 | free(addr_array); 115 | free(nb_req); 116 | ucx_team->nb_create_req = NULL; 117 | return status; 118 | } 119 | 120 | xccl_status_t xccl_ucx_team_destroy(xccl_tl_team_t *team) 121 | { 122 | xccl_ucx_team_t *ucx_team = ucs_derived_of(team, xccl_ucx_team_t); 123 | xccl_team_lib_ucx_context_t *ctx = ucs_derived_of(team->ctx, xccl_team_lib_ucx_context_t); 124 | void *tmp; 125 | 126 | if (ucx_team->ucp_eps) { 127 | close_eps(ucx_team->ucp_eps, team->params.oob.size, ctx->ucp_worker); 128 | tmp = malloc(team->params.oob.size); 129 | xccl_oob_allgather(tmp, tmp, 1, &team->params.oob); 130 | free(tmp); 131 | free(ucx_team->ucp_eps); 132 | } 133 | free(ucx_team); 134 | return XCCL_OK; 135 | } 136 | --------------------------------------------------------------------------------