├── .github └── pull_request_template.md ├── .gitignore ├── CHANGES ├── COPYRIGHT ├── Doxyfile.in ├── Makefile.am ├── README.md ├── autogen.sh ├── configure.ac ├── examples ├── Makefile.mk ├── contig.c ├── fuf.c ├── hindexed.c ├── hindexed_block.c ├── hvector.c ├── indexed.c ├── indexed_block.c ├── iov.c ├── matrix_util.c ├── matrix_util.h ├── resized.c ├── subarray.c └── vector.c ├── m4 ├── aclocal_am.m4 ├── aclocal_cc.m4 ├── aclocal_check_visibility.m4 ├── aclocal_coverage.m4 ├── aclocal_libs.m4 ├── aclocal_make.m4 ├── aclocal_runlog.m4 ├── aclocal_subcfg.m4 ├── aclocal_util.m4 ├── ax_prefix_config_h.m4 └── ax_tls.m4 ├── maint ├── Version.base.m4 ├── clmake ├── code-cleanup.bash ├── gentests.py ├── hooks │ └── pre-commit ├── release.py ├── version.m4 ├── yaksa.pc.in └── yutils.py ├── src ├── Makefile.mk ├── backend │ ├── Makefile.mk │ ├── cuda │ │ ├── Makefile.mk │ │ ├── cudalt.sh │ │ ├── genpup.py │ │ ├── hooks │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_cuda_init_hooks.c │ │ │ ├── yaksuri_cudai_info_hooks.c │ │ │ └── yaksuri_cudai_type_hooks.c │ │ ├── include │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_cuda_post.h │ │ │ ├── yaksuri_cuda_pre.h │ │ │ ├── yaksuri_cudai.h │ │ │ └── yaksuri_cudai_base.h │ │ ├── md │ │ │ ├── Makefile.mk │ │ │ └── yaksuri_cudai_md.c │ │ ├── pup │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_cudai_event.c │ │ │ ├── yaksuri_cudai_get_ptr_attr.c │ │ │ └── yaksuri_cudai_pup.c │ │ ├── stub │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_cuda_post.h │ │ │ └── yaksuri_cuda_pre.h │ │ └── subconfigure.m4 │ ├── gencomm.py │ ├── hip │ │ ├── Makefile.mk │ │ ├── genpup.py │ │ ├── hiplt.sh │ │ ├── hooks │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_hip_init_hooks.c │ │ │ ├── yaksuri_hipi_info_hooks.c │ │ │ └── yaksuri_hipi_type_hooks.c │ │ ├── include │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_hip_post.h │ │ │ ├── yaksuri_hip_pre.h │ │ │ ├── yaksuri_hipi.h │ │ │ └── yaksuri_hipi_base.h │ │ ├── md │ │ │ ├── Makefile.mk │ │ │ └── yaksuri_hipi_md.c │ │ ├── pup │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_hipi_event.c │ │ │ ├── yaksuri_hipi_get_ptr_attr.c │ │ │ └── yaksuri_hipi_pup.c │ │ ├── stub │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_hip_post.h │ │ │ └── yaksuri_hip_pre.h │ │ └── subconfigure.m4 │ ├── seq │ │ ├── Makefile.mk │ │ ├── genpup.py │ │ ├── hooks │ │ │ ├── Makefile.mk │ │ │ └── yaksuri_seq_hooks.c │ │ ├── include │ │ │ ├── Makefile.mk │ │ │ ├── yaksuri_seq_post.h │ │ │ ├── yaksuri_seq_pre.h │ │ │ └── yaksuri_seqi.h │ │ └── pup │ │ │ ├── Makefile.mk │ │ │ └── yaksuri_seq_pup.c │ ├── src │ │ ├── Makefile.mk │ │ ├── yaksur_hooks.c │ │ ├── yaksur_post.h │ │ ├── yaksur_pre.h │ │ ├── yaksur_pup.c │ │ ├── yaksur_request.c │ │ ├── yaksuri.h │ │ └── yaksuri_progress.c │ └── ze │ │ ├── Makefile.mk │ │ ├── genpup.py │ │ ├── hooks │ │ ├── Makefile.mk │ │ ├── yaksuri_ze_init_hooks.c │ │ ├── yaksuri_zei_info_hooks.c │ │ └── yaksuri_zei_type_hooks.c │ │ ├── include │ │ ├── Makefile.mk │ │ ├── yaksuri_ze_post.h │ │ ├── yaksuri_ze_pre.h │ │ ├── yaksuri_zei.h │ │ └── yaksuri_zei_md.h │ │ ├── md │ │ ├── Makefile.mk │ │ └── yaksuri_zei_md.c │ │ ├── pup │ │ ├── Makefile.mk │ │ ├── inline.py │ │ ├── yaksuri_zei_event.c │ │ ├── yaksuri_zei_get_ptr_attr.c │ │ └── yaksuri_zei_pup.c │ │ ├── stub │ │ ├── Makefile.mk │ │ ├── yaksuri_ze_post.h │ │ └── yaksuri_ze_pre.h │ │ └── subconfigure.m4 ├── external │ ├── Makefile.mk │ ├── yuthash.h │ └── yutlist.h ├── frontend │ ├── Makefile.mk │ ├── bounds │ │ ├── Makefile.mk │ │ └── yaksa_bounds.c │ ├── flatten │ │ ├── Makefile.mk │ │ ├── yaksa_flatten.c │ │ ├── yaksa_flatten_size.c │ │ └── yaksa_unflatten.c │ ├── include │ │ ├── Makefile.mk │ │ ├── yaksa.h.in │ │ └── yaksi.h │ ├── info │ │ ├── Makefile.mk │ │ └── yaksa_info.c │ ├── init │ │ ├── Makefile.mk │ │ └── yaksa_init.c │ ├── iov │ │ ├── Makefile.mk │ │ ├── yaksa_iov.c │ │ ├── yaksa_iov_len.c │ │ └── yaksa_iov_len_max.c │ ├── pup │ │ ├── Makefile.mk │ │ ├── yaksa_ipack.c │ │ ├── yaksa_iunpack.c │ │ ├── yaksa_pack.c │ │ ├── yaksa_pack_stream.c │ │ ├── yaksa_request.c │ │ ├── yaksa_unpack.c │ │ ├── yaksa_unpack_stream.c │ │ ├── yaksi_ipack.c │ │ ├── yaksi_ipack_backend.c │ │ ├── yaksi_ipack_element.c │ │ ├── yaksi_iunpack.c │ │ ├── yaksi_iunpack_backend.c │ │ ├── yaksi_iunpack_element.c │ │ └── yaksi_request.c │ └── types │ │ ├── Makefile.mk │ │ ├── yaksa_blkindx.c │ │ ├── yaksa_contig.c │ │ ├── yaksa_dup.c │ │ ├── yaksa_free.c │ │ ├── yaksa_indexed.c │ │ ├── yaksa_resized.c │ │ ├── yaksa_struct.c │ │ ├── yaksa_subarray.c │ │ ├── yaksa_vector.c │ │ └── yaksi_type.c └── util │ ├── Makefile.mk │ ├── yaksu.h │ ├── yaksu_atomics.c │ ├── yaksu_atomics.h │ ├── yaksu_base.h │ ├── yaksu_buffer_pool.c │ ├── yaksu_buffer_pool.h │ ├── yaksu_handle_pool.c │ └── yaksu_handle_pool.h └── test ├── Makefile.mk ├── dtpools ├── Makefile.mk ├── README └── src │ ├── Makefile.mk │ ├── dtpools.c │ ├── dtpools.h │ ├── dtpools_attr.c │ ├── dtpools_custom.c │ ├── dtpools_desc.c │ ├── dtpools_init_verify.c │ ├── dtpools_internal.h │ └── dtpools_misc.c ├── flatten ├── Makefile.mk └── flatten.c ├── iov ├── Makefile.mk └── iov.c ├── pack ├── Makefile.mk ├── pack-common.c ├── pack-common.h ├── pack-cuda.c ├── pack-hip.c ├── pack-ze.c └── pack.c ├── runtests.py ├── simple ├── Makefile.mk ├── lbub.c ├── simple_test.c ├── test_contig.c └── threaded_test.c └── testlist /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Pull Request Description 2 | 3 | 8 | 9 | 13 | 14 | 15 | 16 | ## Expected Impact 17 | 18 | ## Author Checklist 19 | * [ ] Reference appropriate issues (with "Fixes" or "See" as appropriate) 20 | * [ ] Commits are self-contained and do not do two things at once 21 | * [ ] Commit message is of the form: `module: short description` and follows [good practice](https://chris.beams.io/posts/git-commit/) 22 | * [ ] Add comments such that someone without knowledge of the code could understand 23 | * [ ] Have read and agree to the Yaksa CLA terms (https://github.com/pmodels/yaksa/wiki/Yaksa-Contributor-License-Agreement) 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # generic files 2 | .libs 3 | .tmp 4 | .deps 5 | .dirstamp 6 | Makefile 7 | Makefile.in 8 | aclocal.m4 9 | autom4te.cache 10 | config.log 11 | config.status 12 | configure 13 | stamp-h1 14 | libtool 15 | *~ 16 | *.la 17 | *.lo 18 | *.o 19 | __pycache__ 20 | 21 | # m4 temporary files 22 | ar-lib 23 | compile 24 | config.guess 25 | config.sub 26 | depcomp 27 | install-sh 28 | libtool.m4 29 | ltmain.sh 30 | ltoptions.m4 31 | ltsugar.m4 32 | ltversion.m4 33 | lt~obsolete.m4 34 | missing 35 | 36 | # directory specific files 37 | /Doxyfile 38 | /src/frontend/include/yaksa.h 39 | /src/frontend/include/yaksa_config.h 40 | /src/frontend/include/yaksa_config.h.in 41 | /maint/Version 42 | /maint/yaksa.pc 43 | /maint/yutils.pyc 44 | /test/m4 45 | /test/yaksa_test_config.h.in 46 | /test/yaksa_test_config.h 47 | /test/flatten/testlist.gen 48 | /test/flatten/testlist.threads.gen 49 | /test/iov/testlist.gen 50 | /test/iov/testlist.threads.gen 51 | /test/pack/testlist.gen 52 | /test/pack/testlist.threads.gen 53 | /test/pack/testlist.blocking.gen 54 | /test/pack/testlist.stream.gen 55 | /test/simple/testlist.gen 56 | /README 57 | 58 | # autogenerated files 59 | /src/backend/gencomm.pyc 60 | /src/backend/cuda/pup/Makefile.populate_pupfns.mk 61 | /src/backend/cuda/pup/Makefile.pup.mk 62 | /src/backend/cuda/pup/yaksuri_cudai_populate_pupfns* 63 | /src/backend/cuda/pup/yaksuri_cudai_pup.h 64 | /src/backend/cuda/pup/yaksuri_cudai_pup_* 65 | /src/backend/ze/pup/Makefile.populate_pupfns.mk 66 | /src/backend/ze/pup/Makefile.pup.mk 67 | /src/backend/ze/pup/yaksuri_zei_populate_pupfns* 68 | /src/backend/ze/pup/yaksuri_zei_pup.h 69 | /src/backend/ze/pup/yaksuri_zei_pup_* 70 | /src/backend/ze/pup/yaksuri_zei_pup_kernels.cl 71 | /src/backend/ze/pup/yaksuri_zei_pup_kernels.ll 72 | /src/backend/ze/pup/yaksuri_zei_pup_kernels.c 73 | /src/backend/ze/hooks/yaksuri_zei_init_kernels.c 74 | /src/backend/ze/hooks/yaksuri_zei_finalize_kernels.c 75 | /src/backend/hip/pup/Makefile.populate_pupfns.mk 76 | /src/backend/hip/pup/Makefile.pup.mk 77 | /src/backend/hip/pup/yaksuri_hipi_populate_pupfns* 78 | /src/backend/hip/pup/yaksuri_hipi_pup.h 79 | /src/backend/hip/pup/yaksuri_hipi_pup_* 80 | /src/backend/seq/pup/Makefile.populate_pupfns.mk 81 | /src/backend/seq/pup/Makefile.pup.mk 82 | /src/backend/seq/pup/yaksuri_seqi_populate_pupfns* 83 | /src/backend/seq/pup/yaksuri_seqi_pup.h 84 | /src/backend/seq/pup/yaksuri_seqi_pup_* 85 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | Changes in 0.3 3 | =============================================================================== 4 | 5 | # Default to detecting the CUDA device capabilities at configure 6 | time. If no device is found on the build system, build all "major" 7 | CUDA capabilities to cut down on build time and library size. (thanks 8 | to Jeff Hammond for contributing) 9 | 10 | # Add support for mixed memory types (thanks to ParTec AG for 11 | contributing) 12 | 13 | # Add HIP backend for stream APIs 14 | 15 | # Add automatic HIP SM detection 16 | 17 | # Add automatic CUDA SM detection 18 | 19 | # Add support for user-specified CUDA compiler 20 | 21 | # Add support in --ze-native option to compile for multiple devices 22 | 23 | # Add support for --pup-max-nesting < 2 in genpup.py 24 | 25 | # Add support for --ze-revision-id to pass to ocloc compiler 26 | 27 | # Other bug fixes and code cleanup 28 | 29 | =============================================================================== 30 | Changes in 0.2 31 | =============================================================================== 32 | 33 | # Add support for reduction operations (e.g. sum, prod, min, max, ...) 34 | 35 | # Add support for AMD GPUs via HIP backend 36 | 37 | # Add "nogpu" info hint to avoid unnecessary pointer attribute queries 38 | 39 | # Add stream-based pack/unpack APIs 40 | 41 | # Add blocking pack/unpack APIs 42 | 43 | # Add support for NVIDIA HPC SDK compilers 44 | 45 | # Improve compile time for Level Zero kernels 46 | 47 | # Extend tests to support subdevices (tiles) of Intel GPUs 48 | 49 | # Many bug fixes and code cleanups 50 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright (C) 2020, UChicago Argonne, LLC 2 | All Rights Reserved 3 | Software Name: Yaksa 4 | By: Argonne National Laboratory 5 | 6 | OPEN SOURCE LICENSE 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | 14 | 2. Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the 17 | distribution. 18 | 19 | 3. Neither the names of the copyright holder nor the names of its 20 | contributors may be used to endorse or promote products derived 21 | from this software without specific prior written permission. 22 | 23 | 24 | ******************************************************************************** 25 | DISCLAIMER 26 | 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | 39 | ******************************************************************************** 40 | 41 | EXTERNAL CONTRIBUTIONS 42 | 43 | Portions of this code have been contributed under the above license by: 44 | 45 | * Intel Corporation 46 | * ParTec AG 47 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | ACLOCAL_AMFLAGS = -I m4 7 | 8 | EXTRA_DIST = autogen.sh README.md 9 | EXTRA_PROGRAMS = 10 | 11 | .PHONY: doxygen 12 | 13 | pkgconfigdir = $(libdir)/pkgconfig 14 | if EMBEDDED_BUILD 15 | pkgconfig_DATA = 16 | else 17 | pkgconfig_DATA = maint/yaksa.pc 18 | endif !EMBEDDED_BUILD 19 | 20 | noinst_HEADERS = 21 | include_HEADERS = 22 | noinst_PROGRAMS = 23 | nodist_noinst_SCRIPTS = 24 | dist_noinst_SCRIPTS = autogen.sh 25 | 26 | if EMBEDDED_BUILD 27 | noinst_LTLIBRARIES = libyaksa.la 28 | else 29 | lib_LTLIBRARIES = libyaksa.la 30 | endif !EMBEDDED_BUILD 31 | 32 | libyaksa_la_SOURCES = 33 | AM_CPPFLAGS = 34 | 35 | if EMBEDDED_BUILD 36 | libyaksa_la_LDFLAGS = -avoid-version 37 | else 38 | libyaksa_la_LDFLAGS = -version-info @libyaksa_so_version@ 39 | endif !EMBEDDED_BUILD 40 | 41 | include $(top_srcdir)/src/Makefile.mk 42 | 43 | testlists= 44 | include $(top_srcdir)/test/Makefile.mk 45 | include $(top_srcdir)/examples/Makefile.mk 46 | 47 | examples: $(example_list) 48 | 49 | doxygen: 50 | mkdir -p doc 51 | doxygen Doxyfile 52 | 53 | testing: 54 | @$(top_srcdir)/test/runtests.py --summary=$(top_builddir)/test/summary.junit.xml \ 55 | $(testlists) 56 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | ## 3 | ## Copyright (C) by Argonne National Laboratory 4 | ## See COPYRIGHT in top-level directory 5 | ## 6 | 7 | ######################################################################## 8 | ## Utility functions 9 | ######################################################################## 10 | 11 | echo_n() { 12 | # "echo -n" isn't portable, must portably implement with printf 13 | printf "%s" "$*" 14 | } 15 | 16 | error() { 17 | echo "===> ERROR: $@" 18 | exit 19 | } 20 | 21 | 22 | ######################################################################## 23 | ## Parse user environment and arguments 24 | ######################################################################## 25 | 26 | genpup_args= 27 | gentests_args= 28 | 29 | if test -n "$YAKSA_AUTOGEN_PUP_NESTING" ; then 30 | genpup_args=$YAKSA_AUTOGEN_PUP_NESTING 31 | fi 32 | 33 | for arg in "$@" ; do 34 | case $arg in 35 | -pup-max-nesting=*|--pup-max-nesting=*) 36 | genpup_args="$genpup_args $arg" 37 | ;; 38 | -skip-test-complex) 39 | gentests_args="$gentests_args $arg" 40 | ;; 41 | *) 42 | error "unknown argument $arg" 43 | ;; 44 | esac 45 | done 46 | 47 | ######################################################################## 48 | ## Generating required files 49 | ######################################################################## 50 | 51 | # backend pup functions 52 | for x in seq cuda ze hip ; do 53 | echo_n "generating backend pup functions for ${x}... " 54 | ./src/backend/${x}/genpup.py ${genpup_args} 55 | if test "$?" = "0" ; then 56 | echo "done" 57 | else 58 | echo "failed" 59 | exit 1 60 | fi 61 | done 62 | 63 | # tests 64 | ./maint/gentests.py ${gentests_args} 65 | if test "$?" != "0" ; then 66 | echo "test generation failed" 67 | exit 1 68 | fi 69 | 70 | 71 | ######################################################################## 72 | ## Autotools 73 | ######################################################################## 74 | 75 | # generate configure files 76 | echo 77 | echo "=== generating configure files in main directory ===" 78 | autoreconf -vif 79 | if test "$?" = "0" ; then 80 | echo "=== done === " 81 | else 82 | echo "=== failed === " 83 | exit 1 84 | fi 85 | echo 86 | 87 | 88 | ######################################################################## 89 | ## Building maint/Version 90 | ######################################################################## 91 | 92 | # build a substitute maint/Version script now that we store the single copy of 93 | # this information in an m4 file for autoconf's benefit 94 | echo_n "Generating a helper maint/Version... " 95 | if autom4te -l M4sugar maint/Version.base.m4 > maint/Version ; then 96 | echo "done" 97 | else 98 | echo "error" 99 | error "unable to correctly generate maint/Version shell helper" 100 | fi 101 | -------------------------------------------------------------------------------- /examples/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/examples 7 | 8 | EXTRA_PROGRAMS += $(example_list) 9 | 10 | example_list = examples/contig \ 11 | examples/vector \ 12 | examples/hvector \ 13 | examples/indexed_block \ 14 | examples/hindexed_block \ 15 | examples/indexed \ 16 | examples/hindexed \ 17 | examples/resized \ 18 | examples/subarray \ 19 | examples/fuf \ 20 | examples/iov 21 | 22 | example_utils = examples/matrix_util.c 23 | 24 | examples_contig_SOURCES = examples/contig.c $(example_utils) 25 | examples_contig_LDADD = libyaksa.la 26 | examples_vector_SOURCES = examples/vector.c $(example_utils) 27 | examples_vector_LDADD = libyaksa.la 28 | examples_hvector_SOURCES = examples/hvector.c $(example_utils) 29 | examples_hvector_LDADD = libyaksa.la 30 | examples_indexed_block_SOURCES = examples/indexed_block.c $(example_utils) 31 | examples_indexed_block_LDADD = libyaksa.la 32 | examples_hindexed_block_SOURCES = examples/hindexed_block.c $(example_utils) 33 | examples_hindexed_block_LDADD = libyaksa.la 34 | examples_indexed_SOURCES = examples/indexed.c $(example_utils) 35 | examples_indexed_LDADD = libyaksa.la 36 | examples_hindexed_SOURCES = examples/hindexed.c $(example_utils) 37 | examples_hindexed_LDADD = libyaksa.la 38 | examples_resized_SOURCES = examples/resized.c $(example_utils) 39 | examples_resized_LDADD = libyaksa.la 40 | examples_subarray_SOURCES = examples/subarray.c $(example_utils) 41 | examples_subarray_LDADD = libyaksa.la 42 | examples_fuf_SOURCES = examples/fuf.c $(example_utils) 43 | examples_fuf_LDADD = libyaksa.la 44 | examples_iov_SOURCES = examples/iov.c $(example_utils) 45 | examples_iov_LDADD = libyaksa.la 46 | -------------------------------------------------------------------------------- /examples/contig.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "matrix_util.h" 9 | 10 | int main() 11 | { 12 | int rc; 13 | int input_matrix[SIZE]; 14 | int pack_buf[SIZE]; 15 | int unpack_buf[SIZE]; 16 | yaksa_type_t contig; 17 | 18 | yaksa_init(NULL); /* before any yaksa API is called the library 19 | * must be initialized */ 20 | 21 | init_matrix(input_matrix, ROWS, COLS); 22 | set_matrix(pack_buf, ROWS, COLS, 0); 23 | set_matrix(unpack_buf, ROWS, COLS, 0); 24 | 25 | rc = yaksa_type_create_contig(SIZE, YAKSA_TYPE__INT, NULL, &contig); 26 | assert(rc == YAKSA_SUCCESS); 27 | 28 | /* pack */ 29 | yaksa_request_t request; 30 | uintptr_t actual_pack_bytes; 31 | rc = yaksa_ipack(input_matrix, 1, contig, 0, pack_buf, SIZE * sizeof(int), &actual_pack_bytes, 32 | NULL, YAKSA_OP__REPLACE, &request); 33 | assert(rc == YAKSA_SUCCESS); 34 | rc = yaksa_request_wait(request); 35 | assert(rc == YAKSA_SUCCESS); 36 | 37 | /* unpack */ 38 | uintptr_t actual_unpack_bytes; 39 | rc = yaksa_iunpack(pack_buf, SIZE * sizeof(int), unpack_buf, 1, contig, 0, &actual_unpack_bytes, 40 | NULL, YAKSA_OP__REPLACE, &request); 41 | assert(rc == YAKSA_SUCCESS); 42 | rc = yaksa_request_wait(request); 43 | assert(rc == YAKSA_SUCCESS); 44 | 45 | print_matrix(input_matrix, ROWS, COLS, "input_matrix="); 46 | print_matrix(pack_buf, ROWS, COLS, "pack_buf="); 47 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 48 | 49 | yaksa_type_free(contig); 50 | yaksa_finalize(); 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /examples/fuf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "matrix_util.h" 10 | 11 | #define BLKLEN (4) 12 | 13 | int main() 14 | { 15 | int rc; 16 | int input_matrix[SIZE]; 17 | int pack_buf[SIZE]; 18 | int unpack_buf[SIZE]; 19 | yaksa_type_t indexed_block; 20 | intptr_t array_of_displacements[ROWS] = { 21 | 4, 12, 20, 28, 22 | 32, 40, 48, 56 23 | }; 24 | 25 | yaksa_init(NULL); /* before any yaksa API is called the library 26 | * must be initialized */ 27 | 28 | init_matrix(input_matrix, ROWS, COLS); 29 | set_matrix(pack_buf, ROWS, COLS, 0); 30 | set_matrix(unpack_buf, ROWS, COLS, 0); 31 | 32 | rc = yaksa_type_create_indexed_block(ROWS, BLKLEN, array_of_displacements, YAKSA_TYPE__INT, 33 | NULL, &indexed_block); 34 | assert(rc == YAKSA_SUCCESS); 35 | 36 | uintptr_t flatten_size; 37 | yaksa_flatten_size(indexed_block, &flatten_size); 38 | 39 | void *flatten_type = malloc(flatten_size); 40 | yaksa_flatten(indexed_block, flatten_type); 41 | yaksa_type_free(indexed_block); 42 | 43 | yaksa_type_t unflatten_type; 44 | rc = yaksa_unflatten(&unflatten_type, flatten_type); 45 | assert(rc == YAKSA_SUCCESS); 46 | 47 | uintptr_t size; 48 | yaksa_type_get_size(unflatten_type, &size); 49 | 50 | yaksa_request_t request; 51 | uintptr_t actual_pack_bytes; 52 | rc = yaksa_ipack(input_matrix, 1, unflatten_type, 0, pack_buf, ROWS * BLKLEN * sizeof(int), 53 | &actual_pack_bytes, NULL, YAKSA_OP__REPLACE, &request); 54 | assert(rc == YAKSA_SUCCESS); 55 | rc = yaksa_request_wait(request); 56 | assert(rc == YAKSA_SUCCESS); 57 | 58 | uintptr_t actual_unpack_bytes; 59 | rc = yaksa_iunpack(pack_buf, size, unpack_buf, 1, unflatten_type, 0, &actual_unpack_bytes, 60 | NULL, YAKSA_OP__REPLACE, &request); 61 | assert(rc == YAKSA_SUCCESS); 62 | rc = yaksa_request_wait(request); 63 | assert(rc == YAKSA_SUCCESS); 64 | 65 | print_matrix(input_matrix, ROWS, COLS, "input_matrix="); 66 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 67 | 68 | yaksa_type_free(unflatten_type); 69 | free(flatten_type); 70 | yaksa_finalize(); 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /examples/hindexed.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "matrix_util.h" 9 | 10 | int main() 11 | { 12 | int rc; 13 | int input_matrix[SIZE]; 14 | int pack_buf[SIZE]; 15 | int unpack_buf[SIZE]; 16 | yaksa_type_t hindexed; 17 | intptr_t array_of_blocklengths[ROWS - 1] = { 18 | 1, 2, 2, 4, 4, 4, 4 19 | }; 20 | intptr_t array_of_displacements[ROWS - 1] = { 21 | 9 * sizeof(int), 22 | 18 * sizeof(int), 26 * sizeof(int), 23 | 36 * sizeof(int), 44 * sizeof(int), 52 * sizeof(int), 60 * sizeof(int) 24 | }; 25 | 26 | yaksa_init(NULL); /* before any yaksa API is called the library 27 | * must be initialized */ 28 | 29 | init_matrix(input_matrix, ROWS, COLS); 30 | set_matrix(pack_buf, ROWS, COLS, 0); 31 | set_matrix(unpack_buf, ROWS, COLS, 0); 32 | 33 | rc = yaksa_type_create_hindexed(ROWS - 1, array_of_blocklengths, array_of_displacements, 34 | YAKSA_TYPE__INT, NULL, &hindexed); 35 | assert(rc == YAKSA_SUCCESS); 36 | 37 | yaksa_request_t request; 38 | uintptr_t actual_pack_bytes; 39 | rc = yaksa_ipack(input_matrix, 1, hindexed, 0, pack_buf, 21 * sizeof(int), &actual_pack_bytes, 40 | NULL, YAKSA_OP__REPLACE, &request); 41 | assert(rc == YAKSA_SUCCESS); 42 | rc = yaksa_request_wait(request); 43 | assert(rc == YAKSA_SUCCESS); 44 | 45 | uintptr_t actual_unpack_bytes; 46 | rc = yaksa_iunpack(pack_buf, 21 * sizeof(int), unpack_buf, 1, hindexed, 0, &actual_unpack_bytes, 47 | NULL, YAKSA_OP__REPLACE, &request); 48 | assert(rc == YAKSA_SUCCESS); 49 | rc = yaksa_request_wait(request); 50 | assert(rc == YAKSA_SUCCESS); 51 | 52 | print_matrix(input_matrix, ROWS, COLS, "input="); 53 | print_matrix(pack_buf, ROWS, COLS, "pack_buf="); 54 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 55 | 56 | yaksa_type_free(hindexed); 57 | yaksa_finalize(); 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /examples/hindexed_block.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "matrix_util.h" 10 | 11 | /* 12 | * struct pair is defined in matrix_util.h 13 | * each element is of type pair = (long, int) 14 | */ 15 | 16 | #define BLKLEN (4) 17 | 18 | int main() 19 | { 20 | int rc; 21 | int input_matrix[SIZE]; 22 | int pack_buf[SIZE]; 23 | int unpack_buf[SIZE]; 24 | yaksa_type_t hindexed_block; 25 | intptr_t array_of_displacements[ROWS] = { 26 | 4 * sizeof(int), 12 * sizeof(int), 20 * sizeof(int), 28 * sizeof(int), 27 | 32 * sizeof(int), 40 * sizeof(int), 48 * sizeof(int), 56 * sizeof(int) 28 | }; 29 | 30 | yaksa_init(NULL); /* before any yaksa API is called the library 31 | * must be initialized */ 32 | 33 | init_matrix(input_matrix, ROWS, COLS); 34 | set_matrix(pack_buf, ROWS, COLS, 0); 35 | set_matrix(unpack_buf, ROWS, COLS, 0); 36 | 37 | rc = yaksa_type_create_hindexed_block(ROWS, BLKLEN, array_of_displacements, YAKSA_TYPE__INT, 38 | NULL, &hindexed_block); 39 | assert(rc == YAKSA_SUCCESS); 40 | 41 | yaksa_request_t request; 42 | uintptr_t actual_pack_bytes; 43 | rc = yaksa_ipack(input_matrix, 1, hindexed_block, 0, pack_buf, ROWS * BLKLEN * sizeof(int), 44 | &actual_pack_bytes, NULL, YAKSA_OP__REPLACE, &request); 45 | assert(rc == YAKSA_SUCCESS); 46 | rc = yaksa_request_wait(request); 47 | assert(rc == YAKSA_SUCCESS); 48 | 49 | uintptr_t actual_unpack_bytes; 50 | rc = yaksa_iunpack(pack_buf, ROWS * BLKLEN * sizeof(int), unpack_buf, 1, hindexed_block, 0, 51 | &actual_unpack_bytes, NULL, YAKSA_OP__REPLACE, &request); 52 | assert(rc == YAKSA_SUCCESS); 53 | rc = yaksa_request_wait(request); 54 | assert(rc == YAKSA_SUCCESS); 55 | 56 | print_matrix(input_matrix, ROWS, COLS, "input_matrix="); 57 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 58 | 59 | yaksa_type_free(hindexed_block); 60 | yaksa_finalize(); 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /examples/hvector.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "matrix_util.h" 9 | 10 | int main() 11 | { 12 | int rc; 13 | int input_matrix[SIZE]; 14 | int pack_buf[SIZE]; 15 | int unpack_buf[SIZE]; 16 | yaksa_type_t hvector; 17 | 18 | yaksa_init(NULL); /* before any yaksa API is called the library 19 | * must be initialized */ 20 | 21 | init_matrix(input_matrix, ROWS, COLS); 22 | set_matrix(pack_buf, ROWS, COLS, 0); 23 | set_matrix(unpack_buf, ROWS, COLS, 0); 24 | 25 | rc = yaksa_type_create_hvector(ROWS, 1, COLS * sizeof(int), YAKSA_TYPE__INT, NULL, &hvector); 26 | assert(rc == YAKSA_SUCCESS); 27 | 28 | yaksa_request_t request; 29 | uintptr_t actual_pack_bytes; 30 | 31 | rc = yaksa_ipack(input_matrix, 1, hvector, 0, pack_buf, ROWS * sizeof(int), &actual_pack_bytes, 32 | NULL, YAKSA_OP__REPLACE, &request); 33 | assert(rc == YAKSA_SUCCESS); 34 | rc = yaksa_request_wait(request); 35 | assert(rc == YAKSA_SUCCESS); 36 | 37 | uintptr_t actual_unpack_bytes; 38 | rc = yaksa_iunpack(pack_buf, ROWS * sizeof(int), unpack_buf, 1, hvector, 0, 39 | &actual_unpack_bytes, NULL, YAKSA_OP__REPLACE, &request); 40 | assert(rc == YAKSA_SUCCESS); 41 | rc = yaksa_request_wait(request); 42 | assert(rc == YAKSA_SUCCESS); 43 | 44 | print_matrix(input_matrix, ROWS, ROWS, "input_matrix="); 45 | print_matrix(unpack_buf, ROWS, ROWS, "unpack_buf="); 46 | 47 | set_matrix(unpack_buf, ROWS, COLS, 0); 48 | 49 | /* pack second column */ 50 | rc = yaksa_ipack(input_matrix + 1, 1, hvector, 0, pack_buf, ROWS * sizeof(int), 51 | &actual_pack_bytes, NULL, YAKSA_OP__REPLACE, &request); 52 | assert(rc == YAKSA_SUCCESS); 53 | rc = yaksa_request_wait(request); 54 | assert(rc == YAKSA_SUCCESS); 55 | 56 | rc = yaksa_iunpack(pack_buf, ROWS * sizeof(int), unpack_buf + 1, 1, hvector, 0, 57 | &actual_unpack_bytes, NULL, YAKSA_OP__REPLACE, &request); 58 | assert(rc == YAKSA_SUCCESS); 59 | rc = yaksa_request_wait(request); 60 | assert(rc == YAKSA_SUCCESS); 61 | 62 | print_matrix(unpack_buf, ROWS, ROWS, "unpack_buf+1="); 63 | yaksa_type_free(hvector); 64 | 65 | /* matrix transposition using hvector */ 66 | yaksa_type_t vector; 67 | 68 | rc = yaksa_type_create_vector(ROWS, 1, COLS, YAKSA_TYPE__INT, NULL, &vector); 69 | assert(rc == YAKSA_SUCCESS); 70 | 71 | rc = yaksa_type_create_hvector(COLS, 1, sizeof(int), vector, NULL, &hvector); 72 | assert(rc == YAKSA_SUCCESS); 73 | 74 | set_matrix(pack_buf, ROWS, COLS, 0); 75 | set_matrix(unpack_buf, ROWS, COLS, 0); 76 | 77 | rc = yaksa_ipack(input_matrix, 1, hvector, 0, pack_buf, 256, &actual_pack_bytes, 78 | NULL, YAKSA_OP__REPLACE, &request); 79 | assert(rc == YAKSA_SUCCESS); 80 | rc = yaksa_request_wait(request); 81 | assert(rc == YAKSA_SUCCESS); 82 | 83 | rc = yaksa_iunpack(pack_buf, 256, unpack_buf, 1, hvector, 0, &actual_unpack_bytes, 84 | NULL, YAKSA_OP__REPLACE, &request); 85 | assert(rc == YAKSA_SUCCESS); 86 | rc = yaksa_request_wait(request); 87 | assert(rc == YAKSA_SUCCESS); 88 | 89 | fprintf(stdout, "\nMatrix transposition:\n\n"); 90 | print_matrix(pack_buf, ROWS, COLS, "pack_buf="); 91 | print_matrix(unpack_buf, ROWS, ROWS, "unpack_buf="); 92 | 93 | yaksa_type_free(vector); 94 | yaksa_type_free(hvector); 95 | yaksa_finalize(); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /examples/indexed.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "matrix_util.h" 9 | 10 | int main() 11 | { 12 | int rc; 13 | int input_matrix[SIZE]; 14 | int pack_buf[SIZE]; 15 | int unpack_buf[SIZE]; 16 | yaksa_type_t indexed; 17 | intptr_t array_of_blocklengths[ROWS - 1] = { 18 | 1, 2, 2, 4, 4, 4, 4 19 | }; 20 | intptr_t array_of_displacements[ROWS - 1] = { 21 | 9, 22 | 18, 26, 23 | 36, 44, 52, 60 24 | }; 25 | 26 | yaksa_init(NULL); /* before any yaksa API is called the library 27 | * must be initialized */ 28 | 29 | init_matrix(input_matrix, ROWS, COLS); 30 | set_matrix(pack_buf, ROWS, COLS, 0); 31 | set_matrix(unpack_buf, ROWS, COLS, 0); 32 | 33 | rc = yaksa_type_create_indexed(ROWS - 1, array_of_blocklengths, array_of_displacements, 34 | YAKSA_TYPE__INT, NULL, &indexed); 35 | assert(rc == YAKSA_SUCCESS); 36 | 37 | yaksa_request_t request; 38 | uintptr_t actual_pack_bytes; 39 | rc = yaksa_ipack(input_matrix, 1, indexed, 0, pack_buf, 21 * sizeof(int), &actual_pack_bytes, 40 | NULL, YAKSA_OP__REPLACE, &request); 41 | assert(rc == YAKSA_SUCCESS); 42 | rc = yaksa_request_wait(request); 43 | assert(rc == YAKSA_SUCCESS); 44 | 45 | uintptr_t actual_unpack_bytes; 46 | rc = yaksa_iunpack(pack_buf, 21 * sizeof(int), unpack_buf, 1, indexed, 0, &actual_unpack_bytes, 47 | NULL, YAKSA_OP__REPLACE, &request); 48 | assert(rc == YAKSA_SUCCESS); 49 | rc = yaksa_request_wait(request); 50 | assert(rc == YAKSA_SUCCESS); 51 | 52 | print_matrix(input_matrix, ROWS, COLS, "input="); 53 | print_matrix(pack_buf, ROWS, COLS, "pack_buf="); 54 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 55 | 56 | yaksa_type_free(indexed); 57 | yaksa_finalize(); 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /examples/indexed_block.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "matrix_util.h" 10 | 11 | #define BLKLEN (4) 12 | 13 | int main() 14 | { 15 | int rc; 16 | int input_matrix[SIZE]; 17 | int pack_buf[SIZE]; 18 | int unpack_buf[SIZE]; 19 | yaksa_type_t indexed_block; 20 | intptr_t array_of_displacements[ROWS] = { 21 | 4, 12, 20, 28, 22 | 32, 40, 48, 56 23 | }; 24 | 25 | yaksa_init(NULL); /* before any yaksa API is called the library 26 | * must be initialized */ 27 | 28 | init_matrix(input_matrix, ROWS, COLS); 29 | set_matrix(pack_buf, ROWS, COLS, 0); 30 | set_matrix(unpack_buf, ROWS, COLS, 0); 31 | 32 | rc = yaksa_type_create_indexed_block(ROWS, BLKLEN, array_of_displacements, YAKSA_TYPE__INT, 33 | NULL, &indexed_block); 34 | assert(rc == YAKSA_SUCCESS); 35 | 36 | yaksa_request_t request; 37 | uintptr_t actual_pack_bytes; 38 | rc = yaksa_ipack(input_matrix, 1, indexed_block, 0, pack_buf, ROWS * BLKLEN * sizeof(int), 39 | &actual_pack_bytes, NULL, YAKSA_OP__REPLACE, &request); 40 | assert(rc == YAKSA_SUCCESS); 41 | rc = yaksa_request_wait(request); 42 | assert(rc == YAKSA_SUCCESS); 43 | 44 | uintptr_t actual_unpack_bytes; 45 | rc = yaksa_iunpack(pack_buf, ROWS * BLKLEN * sizeof(int), unpack_buf, 1, indexed_block, 0, 46 | &actual_unpack_bytes, NULL, YAKSA_OP__REPLACE, &request); 47 | assert(rc == YAKSA_SUCCESS); 48 | rc = yaksa_request_wait(request); 49 | assert(rc == YAKSA_SUCCESS); 50 | 51 | print_matrix(input_matrix, ROWS, COLS, "input_matrix="); 52 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 53 | 54 | yaksa_type_free(indexed_block); 55 | yaksa_finalize(); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /examples/iov.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "matrix_util.h" 10 | 11 | #define BLKLEN (4) 12 | 13 | int main() 14 | { 15 | int rc; 16 | int input_matrix[SIZE]; 17 | int pack_buf[SIZE]; 18 | int unpack_buf[SIZE]; 19 | yaksa_type_t indexed_block; 20 | intptr_t array_of_displacements[ROWS] = { 21 | 4, 12, 20, 28, 22 | 32, 40, 48, 56 23 | }; 24 | 25 | yaksa_init(NULL); /* before any yaksa API is called the library 26 | * must be initialized */ 27 | 28 | init_matrix(input_matrix, ROWS, COLS); 29 | set_matrix(pack_buf, ROWS, COLS, 0); 30 | set_matrix(unpack_buf, ROWS, COLS, 0); 31 | 32 | rc = yaksa_type_create_indexed_block(ROWS, BLKLEN, array_of_displacements, YAKSA_TYPE__INT, 33 | NULL, &indexed_block); 34 | assert(rc == YAKSA_SUCCESS); 35 | 36 | /* create an iov of the datatype */ 37 | uintptr_t num_iov; 38 | rc = yaksa_iov_len(1, indexed_block, &num_iov); 39 | assert(rc == YAKSA_SUCCESS); 40 | 41 | fprintf(stdout, "num_iov = %lu\n", num_iov); 42 | struct iovec *iov_elem = malloc(sizeof(struct iovec) * num_iov); 43 | 44 | uintptr_t actual_iov_len; 45 | rc = yaksa_iov((const char *) input_matrix, 1, indexed_block, 0, iov_elem, num_iov, 46 | &actual_iov_len); 47 | assert(rc == YAKSA_SUCCESS); 48 | 49 | print_matrix(input_matrix, ROWS, COLS, "input_matrix="); 50 | 51 | for (uintptr_t j = 0; j < num_iov; j++) { 52 | fprintf(stdout, "iov_elem[%lu] => iov_len = %zu; iov_base = [ ", j, 53 | iov_elem[j].iov_len / sizeof(int)); 54 | for (uintptr_t k = 0; k < iov_elem[j].iov_len / sizeof(int); k++) 55 | fprintf(stdout, "%.*d ", 2, ((int *) (iov_elem[j].iov_base))[k]); 56 | fprintf(stdout, "]\n"); 57 | } 58 | 59 | free(iov_elem); 60 | 61 | yaksa_type_free(indexed_block); 62 | yaksa_finalize(); 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /examples/matrix_util.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include "matrix_util.h" 8 | 9 | void print_matrix(int *matrix, int rows, int cols, const char *desc) 10 | { 11 | int digits = 0; 12 | int max = (rows * cols) - 1; 13 | 14 | while (max > 0) { 15 | max /= 10; 16 | digits++; 17 | } 18 | 19 | fprintf(stdout, "%s\n", desc); 20 | for (int i = 0; i < rows; i++) { 21 | for (int j = 0; j < cols; j++) { 22 | fprintf(stdout, "%.*d ", digits, matrix[(i * cols) + j]); 23 | } 24 | fprintf(stdout, "\n"); 25 | } 26 | fprintf(stdout, "\n"); 27 | } 28 | 29 | void init_matrix(int *matrix, int rows, int cols) 30 | { 31 | for (int i = 0; i < rows; i++) 32 | for (int j = 0; j < cols; j++) 33 | matrix[(i * cols) + j] = (int) (i * cols) + j; 34 | } 35 | 36 | void set_matrix(int *matrix, int rows, int cols, int val) 37 | { 38 | for (int i = 0; i < rows; i++) 39 | for (int j = 0; j < cols; j++) 40 | matrix[(i * cols) + j] = (int) val; 41 | } 42 | -------------------------------------------------------------------------------- /examples/matrix_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #define ROWS (8) 7 | #define COLS (ROWS) 8 | #define SIZE (ROWS * ROWS) 9 | 10 | void print_matrix(int *matrix, int rows, int cols, const char *desc); 11 | void init_matrix(int *matrix, int rows, int cols); 12 | void set_matrix(int *matrix, int rows, int cols, int val); 13 | -------------------------------------------------------------------------------- /examples/resized.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "matrix_util.h" 9 | 10 | int main() 11 | { 12 | int rc; 13 | int input_matrix[SIZE]; 14 | int pack_buf[SIZE]; 15 | int unpack_buf[SIZE]; 16 | yaksa_type_t vector; 17 | yaksa_type_t vector_resized; 18 | yaksa_type_t transpose; 19 | 20 | yaksa_init(NULL); 21 | 22 | init_matrix(input_matrix, ROWS, COLS); 23 | set_matrix(pack_buf, ROWS, COLS, 0); 24 | set_matrix(unpack_buf, ROWS, COLS, 0); 25 | 26 | rc = yaksa_type_create_vector(ROWS, 1, COLS, YAKSA_TYPE__INT, NULL, &vector); 27 | assert(rc == YAKSA_SUCCESS); 28 | rc = yaksa_type_create_resized(vector, 0, sizeof(int), NULL, &vector_resized); 29 | assert(rc == YAKSA_SUCCESS); 30 | rc = yaksa_type_create_contig(COLS, vector_resized, NULL, &transpose); 31 | assert(rc == YAKSA_SUCCESS); 32 | 33 | yaksa_request_t request; 34 | uintptr_t actual_pack_bytes; 35 | rc = yaksa_ipack(input_matrix, 1, transpose, 0, pack_buf, 256, &actual_pack_bytes, 36 | NULL, YAKSA_OP__REPLACE, &request); 37 | assert(rc == YAKSA_SUCCESS); 38 | rc = yaksa_request_wait(request); 39 | assert(rc == YAKSA_SUCCESS); 40 | 41 | uintptr_t actual_unpack_bytes; 42 | rc = yaksa_iunpack(pack_buf, 256, unpack_buf, 1, transpose, 0, &actual_unpack_bytes, 43 | NULL, YAKSA_OP__REPLACE, &request); 44 | assert(rc == YAKSA_SUCCESS); 45 | rc = yaksa_request_wait(request); 46 | assert(rc == YAKSA_SUCCESS); 47 | 48 | print_matrix(input_matrix, ROWS, COLS, "input_matrix="); 49 | print_matrix(pack_buf, ROWS, COLS, "pack_buf="); 50 | print_matrix(unpack_buf, ROWS, COLS, "unpack_buf="); 51 | 52 | yaksa_type_free(vector); 53 | yaksa_type_free(vector_resized); 54 | yaksa_type_free(transpose); 55 | yaksa_finalize(); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /examples/subarray.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "matrix_util.h" 10 | 11 | int main() 12 | { 13 | int rc; 14 | int input_matrix[SIZE]; 15 | int pack_buf[SIZE]; 16 | int unpack_buf[SIZE]; 17 | yaksa_type_t subarray; 18 | int ndims = 2; 19 | intptr_t array_of_sizes[2] = { ROWS, COLS }; 20 | intptr_t array_of_subsizes[2] = { 4, 4 }; 21 | intptr_t array_of_starts[2] = { 4, 4 }; 22 | yaksa_subarray_order_e order = YAKSA_SUBARRAY_ORDER__C; 23 | 24 | yaksa_init(NULL); /* before any yaksa API is called the library 25 | * must be initialized */ 26 | 27 | init_matrix(input_matrix, ROWS, COLS); 28 | set_matrix(pack_buf, ROWS, COLS, 0); 29 | set_matrix(unpack_buf, ROWS, COLS, 0); 30 | 31 | rc = yaksa_type_create_subarray(ndims, array_of_sizes, array_of_subsizes, 32 | array_of_starts, order, YAKSA_TYPE__INT, NULL, &subarray); 33 | assert(rc == YAKSA_SUCCESS); 34 | 35 | yaksa_request_t request; 36 | uintptr_t actual_pack_bytes; 37 | rc = yaksa_ipack(input_matrix, 1, subarray, 0, pack_buf, 16 * sizeof(int), &actual_pack_bytes, 38 | NULL, YAKSA_OP__REPLACE, &request); 39 | assert(rc == YAKSA_SUCCESS); 40 | rc = yaksa_request_wait(request); 41 | assert(rc == YAKSA_SUCCESS); 42 | 43 | uintptr_t actual_unpack_bytes; 44 | rc = yaksa_iunpack(pack_buf, 16 * sizeof(int), unpack_buf, 1, subarray, 0, &actual_unpack_bytes, 45 | NULL, YAKSA_OP__REPLACE, &request); 46 | assert(rc == YAKSA_SUCCESS); 47 | rc = yaksa_request_wait(request); 48 | assert(rc == YAKSA_SUCCESS); 49 | 50 | print_matrix(input_matrix, 8, 8, "input_matrix="); 51 | print_matrix(unpack_buf, 8, 8, "unpack_buf="); 52 | 53 | yaksa_type_free(subarray); 54 | yaksa_finalize(); 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /examples/vector.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "matrix_util.h" 9 | 10 | int main() 11 | { 12 | int rc; 13 | int input_matrix[SIZE]; 14 | int pack_buf[SIZE]; 15 | int unpack_buf[SIZE]; 16 | yaksa_type_t vector; 17 | 18 | yaksa_init(NULL); /* before any yaksa API is called the library 19 | * must be initialized */ 20 | 21 | init_matrix(input_matrix, ROWS, COLS); 22 | set_matrix(pack_buf, ROWS, COLS, 0); 23 | set_matrix(unpack_buf, ROWS, COLS, 0); 24 | 25 | rc = yaksa_type_create_vector(ROWS, 1, COLS, YAKSA_TYPE__INT, NULL, &vector); 26 | assert(rc == YAKSA_SUCCESS); 27 | 28 | yaksa_request_t request; 29 | uintptr_t actual_pack_bytes; 30 | 31 | rc = yaksa_ipack(input_matrix, 1, vector, 0, pack_buf, ROWS * sizeof(int), &actual_pack_bytes, 32 | NULL, YAKSA_OP__REPLACE, &request); 33 | assert(rc == YAKSA_SUCCESS); 34 | rc = yaksa_request_wait(request); 35 | assert(rc == YAKSA_SUCCESS); 36 | 37 | uintptr_t actual_unpack_bytes; 38 | rc = yaksa_iunpack(pack_buf, ROWS * sizeof(int), unpack_buf, 1, vector, 0, &actual_unpack_bytes, 39 | NULL, YAKSA_OP__REPLACE, &request); 40 | assert(rc == YAKSA_SUCCESS); 41 | rc = yaksa_request_wait(request); 42 | assert(rc == YAKSA_SUCCESS); 43 | 44 | print_matrix(input_matrix, ROWS, ROWS, "input_matrix="); 45 | print_matrix(unpack_buf, ROWS, ROWS, "unpack_buf="); 46 | 47 | set_matrix(unpack_buf, ROWS, COLS, 0); 48 | 49 | /* pack second column */ 50 | rc = yaksa_ipack(input_matrix + 1, 1, vector, 0, pack_buf, ROWS * sizeof(int), 51 | &actual_pack_bytes, NULL, YAKSA_OP__REPLACE, &request); 52 | assert(rc == YAKSA_SUCCESS); 53 | rc = yaksa_request_wait(request); 54 | assert(rc == YAKSA_SUCCESS); 55 | 56 | rc = yaksa_iunpack(pack_buf, ROWS * sizeof(int), unpack_buf + 1, 1, vector, 0, 57 | &actual_unpack_bytes, NULL, YAKSA_OP__REPLACE, &request); 58 | assert(rc == YAKSA_SUCCESS); 59 | rc = yaksa_request_wait(request); 60 | assert(rc == YAKSA_SUCCESS); 61 | 62 | print_matrix(unpack_buf, ROWS, ROWS, "unpack_buf+1="); 63 | 64 | yaksa_type_free(vector); 65 | yaksa_finalize(); 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /m4/aclocal_am.m4: -------------------------------------------------------------------------------- 1 | dnl AM_IGNORE is an extension that tells (a patched) automake not to 2 | dnl include the specified AC_SUBST variable in the Makefile.in that 3 | dnl automake generates. We don't use AC_DEFUN, since aclocal will 4 | dnl then complain that AM_IGNORE is a duplicate (if you are using the 5 | dnl patched automake/aclocal). 6 | m4_ifdef([AM_IGNORE],[],[m4_define([AM_IGNORE],[])]) 7 | -------------------------------------------------------------------------------- /m4/aclocal_coverage.m4: -------------------------------------------------------------------------------- 1 | 2 | dnl Macro to add --enable-coverage option (disabled by default) and add 3 | dnl appropriate compiler flags to permit usage of gcov if that option is 4 | dnl enabled. 5 | dnl 6 | dnl Sets "pac_cv_use_coverage=yes" and AC_DEFINEs USE_COVERAGE if coverage was 7 | dnl successfully enabled. Also creates an AM_CONDITIONAL with the name 8 | dnl "BUILD_COVERAGE" that is true iff pac_cv_use_coverage=yes. 9 | dnl 10 | dnl Usage: PAC_CONFIG_SUBDIR_ARGS 11 | dnl 12 | dnl Assumes that all of the compiler macros have already been invoked 13 | dnl (AC_PROG_CC and friends). 14 | AC_DEFUN([PAC_ENABLE_COVERAGE],[ 15 | 16 | AC_ARG_VAR([GCOV],[name/path for the gcov utility]) 17 | AC_CHECK_PROGS([GCOV],[gcov]) 18 | 19 | AC_ARG_ENABLE([coverage], 20 | [AS_HELP_STRING([--enable-coverage], 21 | [Turn on coverage analysis using gcc and gcov])], 22 | [],[enable_coverage=no]) 23 | 24 | if test "$enable_coverage" = "yes" ; then 25 | if test "$GCC" = "yes" ; then 26 | CFLAGS="$CFLAGS -fprofile-arcs -ftest-coverage" 27 | LIBS="$LIBS -lgcov" 28 | else 29 | AC_MSG_WARN([--enable-coverage only supported for GCC]) 30 | fi 31 | if test "$enable_cxx" = "yes" ; then 32 | if test "$ac_cv_cxx_compiler_gnu" = "yes" ; then 33 | CXXFLAGS="$CXXFLAGS -fprofile-arcs -ftest-coverage" 34 | LIBS="$LIBS -lgcov" 35 | else 36 | AC_MSG_WARN([--enable-coverage only supported for GCC]) 37 | fi 38 | fi 39 | # Add similar options for g77 so that the Fortran tests will also 40 | # 41 | if test "$enable_f77" = yes ; then 42 | if test "$ac_cv_f77_compiler_gnu" = "yes" ; then 43 | FFLAGS="$FFLAGS -fprofile-arcs -ftest-coverage" 44 | LIBS="$LIBS -lgcov" 45 | else 46 | AC_MSG_WARN([--enable-coverage only supported for G77/GFORTRAN]) 47 | fi 48 | fi 49 | if test "$enable_fc" = yes ; then 50 | if test "$ac_cv_fc_compiler_gnu" = "yes" ; then 51 | FCFLAGS="$FCFLAGS -fprofile-arcs -ftest-coverage" 52 | LIBS="$LIBS -lgcov" 53 | else 54 | AC_MSG_WARN([--enable-coverage only supported for GFORTRAN]) 55 | fi 56 | fi 57 | # On some platforms (e.g., Mac Darwin), we must also *link* 58 | # with the -fprofile-args -ftest-coverage option. 59 | AC_MSG_CHECKING([whether compilation with coverage analysis enabled works]) 60 | AC_LINK_IFELSE([AC_LANG_SOURCE([int main(int argc, char **argv){return 1;}])], 61 | [AC_MSG_RESULT([yes])], 62 | [AC_MSG_RESULT([no]) 63 | AC_MSG_ERROR([Unable to link programs when coverage analysis enabled])]) 64 | 65 | # Test for the routines that we need to use to ensure that the 66 | # data files are (usually) written out 67 | # FIXME: Some versions of Linux provide usleep, but it rounds times 68 | # up to the next second (!) 69 | AC_CHECK_FUNCS([usleep]) 70 | 71 | # NOTE: using a "pac_cv_" prefix but not caching because of xFLAGS "side effects" 72 | pac_cv_use_coverage=yes 73 | AC_DEFINE([USE_COVERAGE],[1],[Define if performing coverage tests]) 74 | fi 75 | AM_CONDITIONAL([BUILD_COVERAGE],[test "X$pac_cv_use_coverage" = "Xyes"]) 76 | ]) 77 | 78 | -------------------------------------------------------------------------------- /m4/ax_tls.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.nongnu.org/autoconf-archive/ax_tls.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_TLS 8 | # 9 | # DESCRIPTION 10 | # 11 | # Provides a test for the compiler support of thread local storage (TLS) 12 | # extensions. Defines TLS if it is found. Currently only knows about GCC 13 | # and MSVC. I think SunPro uses the same as GCC, and Borland apparently 14 | # supports either. 15 | # 16 | # LICENSE 17 | # 18 | # Copyright (c) 2008 Alan Woodland 19 | # 20 | # This program is free software: you can redistribute it and/or modify it 21 | # under the terms of the GNU General Public License as published by the 22 | # Free Software Foundation, either version 3 of the License, or (at your 23 | # option) any later version. 24 | # 25 | # This program is distributed in the hope that it will be useful, but 26 | # WITHOUT ANY WARRANTY; without even the implied warranty of 27 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 28 | # Public License for more details. 29 | # 30 | # You should have received a copy of the GNU General Public License along 31 | # with this program. If not, see . 32 | # 33 | # As a special exception, the respective Autoconf Macro's copyright owner 34 | # gives unlimited permission to copy, distribute and modify the configure 35 | # scripts that are the output of Autoconf when processing the Macro. You 36 | # need not follow the terms of the GNU General Public License when using 37 | # or distributing such scripts, even though portions of the text of the 38 | # Macro appear in them. The GNU General Public License (GPL) does govern 39 | # all other use of the material that constitutes the Autoconf Macro. 40 | # 41 | # This special exception to the GPL applies to versions of the Autoconf 42 | # Macro released by the Autoconf Archive. When you make and distribute a 43 | # modified version of the Autoconf Macro, you may extend this special 44 | # exception to the GPL to apply to your modified version as well. 45 | 46 | AC_DEFUN([AX_TLS], [ 47 | AC_MSG_CHECKING(for thread local storage specifier) 48 | AC_CACHE_VAL(ac_cv_tls, [ 49 | ax_tls_keywords="__thread __declspec(thread) none" 50 | for ax_tls_keyword in $ax_tls_keywords; do 51 | case $ax_tls_keyword in 52 | none) ac_cv_tls=none ; break ;; 53 | *) 54 | # MPICH modification: This was an AC_TRY_COMPILE before, but 55 | # Darwin with non-standard compilers will accept __thread at 56 | # compile time but fail to link due to an undefined 57 | # "__emutls_get_address" symbol unless -lgcc_eh is added to the 58 | # link line. 59 | AC_LINK_IFELSE( 60 | [AC_LANG_PROGRAM([$ax_tls_keyword int bar = 5;],[++bar;])], 61 | [ac_cv_tls=$ax_tls_keyword ; break], 62 | [ac_cv_tls=none]) 63 | esac 64 | done 65 | ]) 66 | 67 | if test "$ac_cv_tls" != "none"; then 68 | # MPICH modification: this was "TLS" before instead of 69 | # "MPIU_TLS_SPECIFIER", but TLS had a reasonably high chance of conflicting 70 | # with a system library. 71 | AC_DEFINE_UNQUOTED([MPIU_TLS_SPECIFIER], $ac_cv_tls, [If the compiler supports a TLS storage class define it to that here]) 72 | fi 73 | AC_MSG_RESULT($ac_cv_tls) 74 | ]) 75 | -------------------------------------------------------------------------------- /maint/Version.base.m4: -------------------------------------------------------------------------------- 1 | dnl 2 | dnl this m4 file expects to be processed with "autom4te -l M4sugar" 3 | dnl 4 | m4_init 5 | 6 | dnl get the real version values 7 | m4_include([maint/version.m4])dnl 8 | 9 | dnl The m4sugar langauage switches the default diversion to "KILL", and causes 10 | dnl all normal output to be thrown away. We switch to the default (0) diversion 11 | dnl to restore output. 12 | m4_divert_push([])dnl 13 | dnl 14 | dnl now dump out a shell script header for those looking to change the version string 15 | # This shell script is no longer the canonical version number script, but rather 16 | # a byproduct of running ./maint/updatefiles using maint/Version.base.m4 as 17 | # input. The real version info is contained in maint/version.m4 instead. It is 18 | # intentionally not managed by the makefile system and may not be up to date at 19 | # all times w.r.t. the version.m4 file. 20 | 21 | dnl now provide shell versions so that simple scripts can still use 22 | dnl $YAKSA_VERSION as before 23 | YAKSA_VERSION=YAKSA_VERSION_m4 24 | export YAKSA_VERSION 25 | 26 | dnl balance our pushed diversion 27 | m4_divert_pop([])dnl 28 | -------------------------------------------------------------------------------- /maint/hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # An example hook script to verify what is about to be committed. 4 | # Called by "git commit" with no arguments. The hook should 5 | # exit with non-zero status after issuing an appropriate message if 6 | # it wants to stop the commit. 7 | # 8 | # To enable this hook, rename this file to "pre-commit". 9 | 10 | if git rev-parse --verify HEAD >/dev/null 2>&1 11 | then 12 | against=HEAD 13 | else 14 | # Initial commit: diff against an empty tree object 15 | against=040c156c6f96 16 | fi 17 | 18 | # If you want to allow non-ASCII filenames set this variable to true. 19 | allownonascii=$(git config --bool hooks.allownonascii) 20 | 21 | # Redirect output to stderr. 22 | exec 1>&2 23 | 24 | # Cross platform projects tend to avoid non-ASCII filenames; prevent 25 | # them from being added to the repository. We exploit the fact that the 26 | # printable range starts at the space character and ends with tilde. 27 | if [ "$allownonascii" != "true" ] && 28 | # Note that the use of brackets around a tr range is ok here, (it's 29 | # even required, for portability to Solaris 10's /usr/bin/tr), since 30 | # the square bracket bytes happen to fall in the designated range. 31 | test $(git diff --cached --name-only --diff-filter=A -z $against | 32 | LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 33 | then 34 | cat <<\EOF 35 | Error: Attempt to add a non-ASCII file name. 36 | 37 | This can cause problems if you want to work with people on other platforms. 38 | 39 | To be portable it is advisable to rename the file. 40 | 41 | If you know what you are doing you can disable this check using: 42 | 43 | git config hooks.allownonascii true 44 | EOF 45 | exit 1 46 | fi 47 | 48 | MIRROR=/tmp/${USER}/yaksa-tmp-mirror 49 | TMP_FILENAME=/tmp/${USER}/yaksa-tmp-file 50 | 51 | # Checkout a copy of the current index into MIRROR 52 | git checkout-index --prefix=$MIRROR/ -af 53 | 54 | # Remove files from MIRROR which are no longer present in the index 55 | git diff-index --cached --name-only --diff-filter=D -z HEAD | \ 56 | (cd $MIRROR && xargs -0 rm -f --) 57 | 58 | # This will check the previous commit again when not amending a commit, but that 59 | # should be ok if the patches are correct. 60 | filestring=`git diff --cached --name-only --diff-filter=ACM HEAD~1` 61 | 62 | # Everything else happens in the temporary build tree 63 | pushd $MIRROR > /dev/null 64 | 65 | ret=0 66 | 67 | # This won't work if we ever have a file with a space in the name 68 | for file in $filestring 69 | do 70 | if [[ ($file == *.c || $file == *.h || $file == *.c.in || $file == *.h.in) && 71 | !($file == *mpi.h.in || $file == *mpio.h.in || $file == *mpitestcxx.h || $file == src/mpid/ch3/* || $file == doc/*) ]]; then 72 | cp ${file} ${TMP_FILENAME} 73 | maint/code-cleanup.bash ${file} 74 | git --no-pager diff ${file} ${TMP_FILENAME} 75 | if [ $? != 0 ] ; then 76 | ret=1 77 | fi 78 | fi 79 | done 80 | 81 | rm -rf ${MIRROR} ${TMP_FILENAME} 82 | 83 | if [ $ret != 0 ] ; then 84 | RED='\033[0;31m' 85 | NC='\033[0m' # No Color 86 | echo -e "${RED}== CODE CLEANUP SCRIPT FAILED ==${NC}" 87 | exit $ret 88 | fi 89 | 90 | popd > /dev/null 91 | -------------------------------------------------------------------------------- /maint/version.m4: -------------------------------------------------------------------------------- 1 | [#] start of __file__ 2 | ## 3 | ## Copyright (C) by Argonne National Laboratory 4 | ## See COPYRIGHT in top-level directory 5 | ## 6 | 7 | m4_define([YAKSA_VERSION_m4],[unreleased])dnl 8 | m4_define([YAKSA_RELEASE_DATE_m4],[unreleased development copy])dnl 9 | 10 | # For libtool ABI versioning rules see: 11 | # http://www.gnu.org/software/libtool/manual/libtool.html#Updating-version-info 12 | # 13 | # 1. If the library source code has changed at all since the last 14 | # update, then increment revision (`c:r:a' becomes `c:r+1:a'). 15 | # 16 | # 2. If any interfaces have been added, removed, or changed since 17 | # the last update, increment current, and set revision to 0. 18 | # 19 | # 3. If any interfaces have been added since the last public 20 | # release, then increment age. 21 | # 22 | # 4. If any interfaces have been removed since the last public 23 | # release, then set age to 0. 24 | 25 | m4_define([libyaksa_so_version_m4],[0:0:0])dnl 26 | 27 | [#] end of __file__ 28 | -------------------------------------------------------------------------------- /maint/yaksa.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: Yaksa 7 | Description: A High Performance Noncontiguous Datatype Processing Engine 8 | Version: @YAKSA_VERSION@ 9 | URL: http://www.mpich.org/yaksa 10 | Requires: 11 | Libs: -L${libdir} -lyaksa 12 | Libs.private: @LIBS@ 13 | Cflags: -I${includedir} 14 | -------------------------------------------------------------------------------- /maint/yutils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | ## 3 | ## Copyright (C) by Argonne National Laboratory 4 | ## See COPYRIGHT in top-level directory 5 | ## 6 | 7 | import sys 8 | 9 | ######################################################################################## 10 | ##### printing utilities 11 | ######################################################################################## 12 | def display(OUTFILE, *argv): 13 | for arg in argv: 14 | if (arg.find('}') != -1): 15 | display.indent -= 1 16 | for x in range(display.indent): 17 | OUTFILE.write(" ") 18 | for arg in argv: 19 | OUTFILE.write(arg) 20 | if (arg.find('{') != -1): 21 | display.indent += 1 22 | display.indent = 0 23 | 24 | 25 | ######################################################################################## 26 | ##### add the copyright header to the top of the file 27 | ######################################################################################## 28 | def copyright_c(outfile): 29 | OUTFILE = open(outfile, "w") 30 | OUTFILE.write("/*\n") 31 | OUTFILE.write(" * Copyright (C) by Argonne National Laboratory\n") 32 | OUTFILE.write(" * See COPYRIGHT in top-level directory\n") 33 | OUTFILE.write(" *\n") 34 | OUTFILE.write(" * DO NOT EDIT: AUTOMATICALLY GENERATED FILE !!\n") 35 | OUTFILE.write(" */\n") 36 | OUTFILE.write("\n") 37 | OUTFILE.close() 38 | 39 | def copyright_makefile(outfile): 40 | OUTFILE = open(outfile, "w") 41 | OUTFILE.write("##\n") 42 | OUTFILE.write("## Copyright (C) by Argonne National Laboratory\n") 43 | OUTFILE.write("## See COPYRIGHT in top-level directory\n") 44 | OUTFILE.write("##\n") 45 | OUTFILE.write("## DO NOT EDIT: AUTOMATICALLY GENERATED FILE !!\n") 46 | OUTFILE.write("##\n") 47 | OUTFILE.write("\n") 48 | OUTFILE.close() 49 | 50 | 51 | ######################################################################################## 52 | ##### generate an array of datatype arrays 53 | ######################################################################################## 54 | def generate_darrays(derived_types, darraylist, maxlevels): 55 | darraylist.append([]) 56 | for level in range(maxlevels, 0, -1): 57 | index = [ ] 58 | for x in range(level): 59 | index.append(0) 60 | 61 | while True: 62 | darray = [ ] 63 | for x in range(level): 64 | darray.append(derived_types[index[x]]) 65 | darraylist.append(darray) 66 | 67 | index[-1] = index[-1] + 1 68 | for x in range(level - 1, 0, -1): 69 | if (index[x] == len(derived_types)): 70 | index[x] = 0 71 | index[x-1] = index[x-1] + 1 72 | if (index[0] == len(derived_types)): 73 | break 74 | -------------------------------------------------------------------------------- /src/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | include $(top_srcdir)/src/util/Makefile.mk 7 | include $(top_srcdir)/src/frontend/Makefile.mk 8 | include $(top_srcdir)/src/backend/Makefile.mk 9 | include $(top_srcdir)/src/external/Makefile.mk 10 | -------------------------------------------------------------------------------- /src/backend/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | include $(top_srcdir)/src/backend/cuda/Makefile.mk 7 | include $(top_srcdir)/src/backend/seq/Makefile.mk 8 | include $(top_srcdir)/src/backend/src/Makefile.mk 9 | include $(top_srcdir)/src/backend/ze/Makefile.mk 10 | include $(top_srcdir)/src/backend/hip/Makefile.mk 11 | -------------------------------------------------------------------------------- /src/backend/cuda/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | EXTRA_DIST += $(top_srcdir)/src/backend/cuda/cudalt.sh 7 | 8 | if BUILD_CUDA_BACKEND 9 | include $(top_srcdir)/src/backend/cuda/include/Makefile.mk 10 | include $(top_srcdir)/src/backend/cuda/hooks/Makefile.mk 11 | include $(top_srcdir)/src/backend/cuda/md/Makefile.mk 12 | include $(top_srcdir)/src/backend/cuda/pup/Makefile.mk 13 | else 14 | include $(top_srcdir)/src/backend/cuda/stub/Makefile.mk 15 | endif !BUILD_CUDA_BACKEND 16 | 17 | .cu.lo: 18 | @if $(AM_V_P) ; then \ 19 | $(top_srcdir)/src/backend/cuda/cudalt.sh --verbose $@ \ 20 | $(NVCC) $(AM_CPPFLAGS) $(CUDA_GENCODE) -c $< ; \ 21 | else \ 22 | echo " NVCC $@" ; \ 23 | $(top_srcdir)/src/backend/cuda/cudalt.sh $@ $(NVCC) $(AM_CPPFLAGS) $(CUDA_GENCODE) -c $< ; \ 24 | fi 25 | -------------------------------------------------------------------------------- /src/backend/cuda/cudalt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ## 3 | ## Copyright (C) by Argonne National Laboratory 4 | ## See COPYRIGHT in top-level directory 5 | ## 6 | 7 | set -e 8 | 9 | verbose= 10 | if test "$1" = "--verbose" ; then 11 | verbose=1 12 | shift 13 | fi 14 | 15 | LO_FILEPATH="$1" 16 | O_FILEPATH="${LO_FILEPATH%%.lo}.o" 17 | shift # handle the rest of the arguments together with ${@} 18 | 19 | LO_DIR=$(dirname $O_FILEPATH) 20 | O_FILENAME=$(basename $O_FILEPATH) 21 | 22 | LOCAL_PIC_DIR=".libs/" 23 | LOCAL_NPIC_DIR="" 24 | PIC_DIR="$LO_DIR/$LOCAL_PIC_DIR" 25 | NPIC_DIR="$LO_DIR/$LOCAL_NPIC_DIR" 26 | 27 | PIC_FILEPATH="$PIC_DIR/$O_FILENAME" 28 | NPIC_FILEPATH="$NPIC_DIR/$O_FILENAME" 29 | LOCAL_PIC_FILEPATH="$LOCAL_PIC_DIR$O_FILENAME" 30 | LOCAL_NPIC_FILEPATH="$LOCAL_NPIC_DIR$O_FILENAME" 31 | 32 | if test ! -d "$PIC_DIR" ; then 33 | mkdir -p "$PIC_DIR" 34 | fi 35 | 36 | CMD="${@} -Xcompiler -fPIC -o $PIC_FILEPATH" 37 | if test "$verbose" ; then echo "$CMD" ; fi 38 | eval "$CMD" 39 | 40 | CMD="${@} -o $NPIC_FILEPATH" 41 | if test "$verbose" ; then echo "$CMD" ; fi 42 | eval "$CMD" 43 | 44 | # use the genrated libtool at top_srcdir 45 | LIBTOOL_VERSION=$(./libtool --version | head -n 1) 46 | 47 | cat > $LO_FILEPATH < 9 | #include 10 | #include 11 | 12 | int yaksuri_cudai_info_create_hook(yaksi_info_s * info) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | yaksuri_cudai_info_s *infopriv; 16 | 17 | infopriv = (yaksuri_cudai_info_s *) malloc(sizeof(yaksuri_cudai_info_s)); 18 | YAKSU_ERR_CHKANDJUMP(!infopriv, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 19 | 20 | /* set default values for info keys */ 21 | infopriv->iov_pack_threshold = YAKSURI_CUDAI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 22 | infopriv->iov_unpack_threshold = YAKSURI_CUDAI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 23 | infopriv->inbuf.is_valid = false; 24 | infopriv->outbuf.is_valid = false; 25 | 26 | info->backend.cuda.priv = (void *) infopriv; 27 | 28 | fn_exit: 29 | return rc; 30 | fn_fail: 31 | goto fn_exit; 32 | } 33 | 34 | int yaksuri_cudai_info_free_hook(yaksi_info_s * info) 35 | { 36 | free(info->backend.cuda.priv); 37 | 38 | return YAKSA_SUCCESS; 39 | } 40 | 41 | int yaksuri_cudai_info_keyval_append(yaksi_info_s * info, const char *key, const void *val, 42 | unsigned int vallen) 43 | { 44 | yaksuri_cudai_info_s *infopriv = (yaksuri_cudai_info_s *) info->backend.cuda.priv; 45 | 46 | if (!strncmp(key, "yaksa_cuda_iov_pack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 47 | assert(vallen == sizeof(uintptr_t)); 48 | infopriv->iov_pack_threshold = (uintptr_t) val; 49 | } else if (!strncmp(key, "yaksa_cuda_iov_unpack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 50 | assert(vallen == sizeof(uintptr_t)); 51 | infopriv->iov_unpack_threshold = (uintptr_t) val; 52 | } else if (!strncmp(key, "yaksa_cuda_inbuf_ptr_attr", YAKSA_INFO_MAX_KEYLEN)) { 53 | assert(vallen == sizeof(struct cudaPointerAttributes)); 54 | infopriv->inbuf.is_valid = true; 55 | memcpy(&infopriv->inbuf.attr, val, sizeof(struct cudaPointerAttributes)); 56 | } else if (!strncmp(key, "yaksa_cuda_outbuf_ptr_attr", YAKSA_INFO_MAX_KEYLEN)) { 57 | assert(vallen == sizeof(struct cudaPointerAttributes)); 58 | infopriv->outbuf.is_valid = true; 59 | memcpy(&infopriv->outbuf.attr, val, sizeof(struct cudaPointerAttributes)); 60 | } 61 | 62 | return YAKSA_SUCCESS; 63 | } 64 | -------------------------------------------------------------------------------- /src/backend/cuda/hooks/yaksuri_cudai_type_hooks.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "yaksi.h" 9 | #include "yaksu.h" 10 | #include "yaksuri_cudai.h" 11 | 12 | static uintptr_t get_num_elements(yaksi_type_s * type) 13 | { 14 | switch (type->kind) { 15 | case YAKSI_TYPE_KIND__BUILTIN: 16 | return type->num_contig; 17 | 18 | case YAKSI_TYPE_KIND__CONTIG: 19 | return type->u.contig.count * get_num_elements(type->u.contig.child); 20 | 21 | case YAKSI_TYPE_KIND__RESIZED: 22 | return get_num_elements(type->u.resized.child); 23 | 24 | case YAKSI_TYPE_KIND__HVECTOR: 25 | return type->u.hvector.count * type->u.hvector.blocklength * 26 | get_num_elements(type->u.hvector.child); 27 | 28 | case YAKSI_TYPE_KIND__BLKHINDX: 29 | return type->u.blkhindx.count * type->u.blkhindx.blocklength * 30 | get_num_elements(type->u.blkhindx.child); 31 | 32 | case YAKSI_TYPE_KIND__HINDEXED: 33 | { 34 | uintptr_t nelems = 0; 35 | for (int i = 0; i < type->u.hindexed.count; i++) 36 | nelems += type->u.hindexed.array_of_blocklengths[i]; 37 | nelems *= get_num_elements(type->u.hindexed.child); 38 | return nelems; 39 | } 40 | 41 | default: 42 | return 0; 43 | } 44 | } 45 | 46 | int yaksuri_cudai_type_create_hook(yaksi_type_s * type) 47 | { 48 | int rc = YAKSA_SUCCESS; 49 | 50 | type->backend.cuda.priv = malloc(sizeof(yaksuri_cudai_type_s)); 51 | YAKSU_ERR_CHKANDJUMP(!type->backend.cuda.priv, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 52 | 53 | yaksuri_cudai_type_s *cuda; 54 | cuda = (yaksuri_cudai_type_s *) type->backend.cuda.priv; 55 | 56 | cuda->num_elements = get_num_elements(type); 57 | cuda->md = NULL; 58 | pthread_mutex_init(&cuda->mdmutex, NULL); 59 | 60 | rc = yaksuri_cudai_populate_pupfns(type); 61 | YAKSU_ERR_CHECK(rc, fn_fail); 62 | 63 | fn_exit: 64 | return rc; 65 | fn_fail: 66 | goto fn_exit; 67 | } 68 | 69 | int yaksuri_cudai_type_free_hook(yaksi_type_s * type) 70 | { 71 | int rc = YAKSA_SUCCESS; 72 | yaksuri_cudai_type_s *cuda = (yaksuri_cudai_type_s *) type->backend.cuda.priv; 73 | cudaError_t cerr; 74 | 75 | pthread_mutex_destroy(&cuda->mdmutex); 76 | if (cuda->md) { 77 | if (type->kind == YAKSI_TYPE_KIND__BLKHINDX) { 78 | assert(cuda->md->u.blkhindx.array_of_displs); 79 | cerr = cudaFree((void *) cuda->md->u.blkhindx.array_of_displs); 80 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 81 | } else if (type->kind == YAKSI_TYPE_KIND__HINDEXED) { 82 | assert(cuda->md->u.hindexed.array_of_displs); 83 | cerr = cudaFree((void *) cuda->md->u.hindexed.array_of_displs); 84 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 85 | 86 | assert(cuda->md->u.hindexed.array_of_blocklengths); 87 | cerr = cudaFree((void *) cuda->md->u.hindexed.array_of_blocklengths); 88 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 89 | } 90 | 91 | cerr = cudaFree(cuda->md); 92 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 93 | } 94 | 95 | free(cuda); 96 | 97 | fn_exit: 98 | return rc; 99 | fn_fail: 100 | goto fn_exit; 101 | } 102 | -------------------------------------------------------------------------------- /src/backend/cuda/include/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/cuda/include 7 | 8 | noinst_HEADERS += \ 9 | src/backend/cuda/include/yaksuri_cuda_pre.h \ 10 | src/backend/cuda/include/yaksuri_cuda_post.h \ 11 | src/backend/cuda/include/yaksuri_cudai_base.h \ 12 | src/backend/cuda/include/yaksuri_cudai.h 13 | -------------------------------------------------------------------------------- /src/backend/cuda/include/yaksuri_cuda_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_CUDA_POST_H_INCLUDED 7 | #define YAKSURI_CUDA_POST_H_INCLUDED 8 | 9 | int yaksuri_cuda_init_hook(yaksur_gpudriver_hooks_s ** hooks); 10 | 11 | #endif /* YAKSURI_CUDA_H_INCLUDED */ 12 | -------------------------------------------------------------------------------- /src/backend/cuda/include/yaksuri_cuda_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_CUDA_PRE_H_INCLUDED 7 | #define YAKSURI_CUDA_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the cuda device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef struct { 14 | void *priv; 15 | } yaksuri_cuda_type_s; 16 | 17 | typedef struct { 18 | void *priv; 19 | } yaksuri_cuda_info_s; 20 | 21 | #endif /* YAKSURI_CUDA_PRE_H_INCLUDED */ 22 | -------------------------------------------------------------------------------- /src/backend/cuda/include/yaksuri_cudai_base.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_CUDAI_BASE_H_INCLUDED 7 | #define YAKSURI_CUDAI_BASE_H_INCLUDED 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define YAKSURI_CUDAI_CUDA_ERR_CHECK(cerr) \ 15 | do { \ 16 | if (cerr != cudaSuccess) { \ 17 | fprintf(stderr, "CUDA Error (%s:%s,%d): %s\n", __func__, __FILE__, __LINE__, cudaGetErrorString(cerr)); \ 18 | } \ 19 | } while (0) 20 | 21 | typedef struct cudai_stream_s { 22 | bool created; 23 | cudaStream_t stream; 24 | } cudai_stream; 25 | 26 | typedef struct { 27 | int ndevices; 28 | cudai_stream *streams; /* array of lazily created streams, one for each device */ 29 | int **p2p; /* p2p[sdev][ddev] */ 30 | } yaksuri_cudai_global_s; 31 | extern yaksuri_cudai_global_s yaksuri_cudai_global; 32 | 33 | typedef struct yaksuri_cudai_md_s { 34 | union { 35 | struct { 36 | intptr_t count; 37 | intptr_t stride; 38 | struct yaksuri_cudai_md_s *child; 39 | } contig; 40 | struct { 41 | struct yaksuri_cudai_md_s *child; 42 | } resized; 43 | struct { 44 | intptr_t count; 45 | intptr_t blocklength; 46 | intptr_t stride; 47 | struct yaksuri_cudai_md_s *child; 48 | } hvector; 49 | struct { 50 | intptr_t count; 51 | intptr_t blocklength; 52 | intptr_t *array_of_displs; 53 | struct yaksuri_cudai_md_s *child; 54 | } blkhindx; 55 | struct { 56 | intptr_t count; 57 | intptr_t *array_of_blocklengths; 58 | intptr_t *array_of_displs; 59 | struct yaksuri_cudai_md_s *child; 60 | } hindexed; 61 | } u; 62 | 63 | uintptr_t extent; 64 | uintptr_t num_elements; 65 | } yaksuri_cudai_md_s; 66 | 67 | #endif /* YAKSURI_CUDAI_BASE_H_INCLUDED */ 68 | -------------------------------------------------------------------------------- /src/backend/cuda/md/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/cuda/md 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/cuda/md/yaksuri_cudai_md.c 10 | -------------------------------------------------------------------------------- /src/backend/cuda/pup/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/cuda/pup 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/cuda/pup/yaksuri_cudai_event.c \ 10 | src/backend/cuda/pup/yaksuri_cudai_get_ptr_attr.c 11 | 12 | include src/backend/cuda/pup/Makefile.pup.mk 13 | include src/backend/cuda/pup/Makefile.populate_pupfns.mk 14 | -------------------------------------------------------------------------------- /src/backend/cuda/pup/yaksuri_cudai_event.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include "yaksi.h" 10 | #include "yaksu.h" 11 | #include "yaksuri_cudai.h" 12 | 13 | int yaksuri_cudai_event_record(int device, void **event_) 14 | { 15 | int rc = YAKSA_SUCCESS; 16 | cudaError_t cerr; 17 | yaksuri_cudai_event_s *event; 18 | 19 | event = (yaksuri_cudai_event_s *) malloc(sizeof(yaksuri_cudai_event_s)); 20 | 21 | int cur_device; 22 | cerr = cudaGetDevice(&cur_device); 23 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 24 | 25 | if (cur_device != device) { 26 | cerr = cudaSetDevice(device); 27 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 28 | } 29 | 30 | cerr = cudaEventCreate(&event->event); 31 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 32 | 33 | cerr = cudaEventRecord(event->event, *yaksuri_cudai_get_stream(device)); 34 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 35 | 36 | if (cur_device != device) { 37 | cerr = cudaSetDevice(cur_device); 38 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 39 | } 40 | 41 | *event_ = event; 42 | 43 | fn_exit: 44 | return rc; 45 | fn_fail: 46 | goto fn_exit; 47 | } 48 | 49 | int yaksuri_cudai_event_query(void *event_, int *completed) 50 | { 51 | int rc = YAKSA_SUCCESS; 52 | yaksuri_cudai_event_s *event = (yaksuri_cudai_event_s *) event_; 53 | 54 | cudaError_t cerr = cudaEventQuery(event->event); 55 | if (cerr == cudaSuccess) { 56 | cerr = cudaEventDestroy(event->event); 57 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 58 | 59 | free(event); 60 | 61 | *completed = 1; 62 | } else if (cerr == cudaErrorNotReady) { 63 | *completed = 0; 64 | } else { 65 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 66 | } 67 | 68 | fn_exit: 69 | return rc; 70 | fn_fail: 71 | goto fn_exit; 72 | } 73 | 74 | int yaksuri_cudai_add_dependency(int device1, int device2) 75 | { 76 | int rc = YAKSA_SUCCESS; 77 | cudaError_t cerr; 78 | 79 | /* create a temporary event on the first device */ 80 | int cur_device; 81 | cerr = cudaGetDevice(&cur_device); 82 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 83 | 84 | if (cur_device != device1) { 85 | cerr = cudaSetDevice(device1); 86 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 87 | } 88 | 89 | cudaEvent_t event; 90 | cerr = cudaEventCreate(&event); 91 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 92 | 93 | cerr = cudaEventRecord(event, *yaksuri_cudai_get_stream(device1)); 94 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 95 | 96 | if (cur_device != device1) { 97 | cerr = cudaSetDevice(cur_device); 98 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 99 | } 100 | 101 | /* add a dependency on that event for the second device */ 102 | cerr = cudaStreamWaitEvent(*yaksuri_cudai_get_stream(device2), event, 0); 103 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 104 | 105 | /* destroy the temporary event */ 106 | cerr = cudaEventDestroy(event); 107 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 108 | 109 | fn_exit: 110 | return rc; 111 | fn_fail: 112 | goto fn_exit; 113 | } 114 | 115 | int yaksuri_cudai_launch_hostfn(void *stream, yaksur_hostfn_t fn, void *userData) 116 | { 117 | int rc = YAKSA_SUCCESS; 118 | cudaError_t cerr; 119 | cerr = cudaLaunchHostFunc(*(cudaStream_t *) stream, fn, userData); 120 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 121 | 122 | fn_exit: 123 | return rc; 124 | fn_fail: 125 | goto fn_exit; 126 | } 127 | -------------------------------------------------------------------------------- /src/backend/cuda/pup/yaksuri_cudai_get_ptr_attr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include "yaksuri_cudai.h" 9 | #include 10 | #include 11 | 12 | static void attr_convert(struct cudaPointerAttributes cattr, yaksur_ptr_attr_s * attr) 13 | { 14 | if (cattr.type == cudaMemoryTypeUnregistered) { 15 | attr->type = YAKSUR_PTR_TYPE__UNREGISTERED_HOST; 16 | attr->device = -1; 17 | } else if (cattr.type == cudaMemoryTypeHost) { 18 | attr->type = YAKSUR_PTR_TYPE__REGISTERED_HOST; 19 | attr->device = -1; 20 | } else if (cattr.type == cudaMemoryTypeManaged) { 21 | attr->type = YAKSUR_PTR_TYPE__MANAGED; 22 | attr->device = -1; 23 | } else { 24 | attr->type = YAKSUR_PTR_TYPE__GPU; 25 | attr->device = cattr.device; 26 | } 27 | } 28 | 29 | int yaksuri_cudai_get_ptr_attr(const void *inbuf, void *outbuf, yaksi_info_s * info, 30 | yaksur_ptr_attr_s * inattr, yaksur_ptr_attr_s * outattr) 31 | { 32 | int rc = YAKSA_SUCCESS; 33 | yaksuri_cudai_info_s *infopriv; 34 | 35 | if (info) { 36 | infopriv = (yaksuri_cudai_info_s *) info->backend.cuda.priv; 37 | } else { 38 | infopriv = NULL; 39 | } 40 | 41 | if (infopriv && infopriv->inbuf.is_valid) { 42 | attr_convert(infopriv->inbuf.attr, inattr); 43 | } else { 44 | struct cudaPointerAttributes attr; 45 | cudaError_t cerr = cudaPointerGetAttributes(&attr, inbuf); 46 | if (cerr == cudaErrorInvalidValue) { 47 | attr.type = cudaMemoryTypeUnregistered; 48 | attr.device = -1; 49 | cerr = cudaSuccess; 50 | } 51 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 52 | attr_convert(attr, inattr); 53 | } 54 | 55 | if (infopriv && infopriv->outbuf.is_valid) { 56 | attr_convert(infopriv->outbuf.attr, outattr); 57 | } else { 58 | struct cudaPointerAttributes attr; 59 | cudaError_t cerr = cudaPointerGetAttributes(&attr, outbuf); 60 | if (cerr == cudaErrorInvalidValue) { 61 | attr.type = cudaMemoryTypeUnregistered; 62 | attr.device = -1; 63 | cerr = cudaSuccess; 64 | } 65 | YAKSURI_CUDAI_CUDA_ERR_CHKANDJUMP(cerr, rc, fn_fail); 66 | attr_convert(attr, outattr); 67 | } 68 | 69 | fn_exit: 70 | return rc; 71 | fn_fail: 72 | goto fn_exit; 73 | } 74 | -------------------------------------------------------------------------------- /src/backend/cuda/stub/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/cuda/stub 7 | 8 | noinst_HEADERS += \ 9 | src/backend/cuda/stub/yaksuri_cuda_pre.h \ 10 | src/backend/cuda/stub/yaksuri_cuda_post.h 11 | -------------------------------------------------------------------------------- /src/backend/cuda/stub/yaksuri_cuda_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_CUDA_POST_H_INCLUDED 7 | #define YAKSURI_CUDA_POST_H_INCLUDED 8 | 9 | static int yaksuri_cuda_init_hook(yaksur_gpudriver_hooks_s ** hooks) ATTRIBUTE((unused)); 10 | static int yaksuri_cuda_init_hook(yaksur_gpudriver_hooks_s ** hooks) 11 | { 12 | *hooks = NULL; 13 | 14 | return YAKSA_SUCCESS; 15 | } 16 | 17 | #endif /* YAKSURI_CUDA_POST_H_INCLUDED */ 18 | -------------------------------------------------------------------------------- /src/backend/cuda/stub/yaksuri_cuda_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_CUDA_PRE_H_INCLUDED 7 | #define YAKSURI_CUDA_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the cuda device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef int yaksuri_cuda_type_s; 14 | typedef int yaksuri_cuda_info_s; 15 | 16 | #endif /* YAKSURI_CUDA_PRE_H_INCLUDED */ 17 | -------------------------------------------------------------------------------- /src/backend/hip/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | EXTRA_DIST += $(top_srcdir)/src/backend/hip/hiplt.sh 7 | 8 | if BUILD_HIP_BACKEND 9 | include $(top_srcdir)/src/backend/hip/include/Makefile.mk 10 | include $(top_srcdir)/src/backend/hip/hooks/Makefile.mk 11 | include $(top_srcdir)/src/backend/hip/md/Makefile.mk 12 | include $(top_srcdir)/src/backend/hip/pup/Makefile.mk 13 | else 14 | include $(top_srcdir)/src/backend/hip/stub/Makefile.mk 15 | endif !BUILD_HIP_BACKEND 16 | 17 | .hip.lo: 18 | @if $(AM_V_P) ; then \ 19 | $(top_srcdir)/src/backend/hip/hiplt.sh --verbose $@ \ 20 | $(HIPCC) $(AM_CPPFLAGS) -g $(HIP_GENCODE) -c $< ; \ 21 | else \ 22 | echo " HIPCC $@" ; \ 23 | $(top_srcdir)/src/backend/hip/hiplt.sh $@ $(HIPCC) $(AM_CPPFLAGS) -g $(HIP_GENCODE) -c $< ; \ 24 | fi 25 | 26 | -------------------------------------------------------------------------------- /src/backend/hip/hiplt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ## 3 | ## Copyright (C) by Argonne National Laboratory 4 | ## See COPYRIGHT in top-level directory 5 | ## 6 | 7 | set -e 8 | 9 | verbose= 10 | if test "$1" = "--verbose" ; then 11 | verbose=1 12 | shift 13 | fi 14 | 15 | LO_FILEPATH="$1" 16 | O_FILEPATH="${LO_FILEPATH%%.lo}.o" 17 | shift # handle the rest of the arguments together with ${@} 18 | 19 | LO_DIR=$(dirname $O_FILEPATH) 20 | O_FILENAME=$(basename $O_FILEPATH) 21 | 22 | LOCAL_PIC_DIR=".libs/" 23 | LOCAL_NPIC_DIR="" 24 | PIC_DIR="$LO_DIR/$LOCAL_PIC_DIR" 25 | NPIC_DIR="$LO_DIR/$LOCAL_NPIC_DIR" 26 | 27 | PIC_FILEPATH="$PIC_DIR/$O_FILENAME" 28 | NPIC_FILEPATH="$NPIC_DIR/$O_FILENAME" 29 | LOCAL_PIC_FILEPATH="$LOCAL_PIC_DIR$O_FILENAME" 30 | LOCAL_NPIC_FILEPATH="$LOCAL_NPIC_DIR$O_FILENAME" 31 | 32 | if test ! -d "$PIC_DIR" ; then 33 | mkdir -p "$PIC_DIR" 34 | fi 35 | 36 | CMD="${@} -fPIC -o $PIC_FILEPATH" 37 | if test "$verbose" ; then echo "$CMD" ; fi 38 | eval "$CMD" 39 | 40 | CMD="${@} -o $NPIC_FILEPATH" 41 | if test "$verbose" ; then echo "$CMD" ; fi 42 | eval "$CMD" 43 | 44 | LIBTOOL_VERSION=$(./libtool --version | head -n 1) 45 | 46 | cat > $LO_FILEPATH < 9 | #include 10 | #include 11 | 12 | int yaksuri_hipi_info_create_hook(yaksi_info_s * info) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | yaksuri_hipi_info_s *infopriv; 16 | 17 | infopriv = (yaksuri_hipi_info_s *) malloc(sizeof(yaksuri_hipi_info_s)); 18 | YAKSU_ERR_CHKANDJUMP(!infopriv, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 19 | 20 | /* set default values for info keys */ 21 | infopriv->iov_pack_threshold = YAKSURI_HIPI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 22 | infopriv->iov_unpack_threshold = YAKSURI_HIPI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 23 | infopriv->inbuf.is_valid = false; 24 | infopriv->outbuf.is_valid = false; 25 | 26 | info->backend.hip.priv = (void *) infopriv; 27 | 28 | fn_exit: 29 | return rc; 30 | fn_fail: 31 | goto fn_exit; 32 | } 33 | 34 | int yaksuri_hipi_info_free_hook(yaksi_info_s * info) 35 | { 36 | free(info->backend.hip.priv); 37 | 38 | return YAKSA_SUCCESS; 39 | } 40 | 41 | int yaksuri_hipi_info_keyval_append(yaksi_info_s * info, const char *key, const void *val, 42 | unsigned int vallen) 43 | { 44 | yaksuri_hipi_info_s *infopriv = (yaksuri_hipi_info_s *) info->backend.hip.priv; 45 | 46 | if (!strncmp(key, "yaksa_hip_iov_pack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 47 | assert(vallen == sizeof(uintptr_t)); 48 | infopriv->iov_pack_threshold = (uintptr_t) val; 49 | } else if (!strncmp(key, "yaksa_hip_iov_unpack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 50 | assert(vallen == sizeof(uintptr_t)); 51 | infopriv->iov_unpack_threshold = (uintptr_t) val; 52 | } else if (!strncmp(key, "yaksa_hip_inbuf_ptr_attr", YAKSA_INFO_MAX_KEYLEN)) { 53 | assert(vallen == sizeof(struct hipPointerAttribute_t)); 54 | infopriv->inbuf.is_valid = true; 55 | memcpy(&infopriv->inbuf.attr, val, sizeof(struct hipPointerAttribute_t)); 56 | } else if (!strncmp(key, "yaksa_hip_outbuf_ptr_attr", YAKSA_INFO_MAX_KEYLEN)) { 57 | assert(vallen == sizeof(struct hipPointerAttribute_t)); 58 | infopriv->outbuf.is_valid = true; 59 | memcpy(&infopriv->outbuf.attr, val, sizeof(struct hipPointerAttribute_t)); 60 | } 61 | 62 | return YAKSA_SUCCESS; 63 | } 64 | -------------------------------------------------------------------------------- /src/backend/hip/hooks/yaksuri_hipi_type_hooks.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include "yaksi.h" 9 | #include "yaksu.h" 10 | #include "yaksuri_hipi.h" 11 | 12 | static uintptr_t get_num_elements(yaksi_type_s * type) 13 | { 14 | switch (type->kind) { 15 | case YAKSI_TYPE_KIND__BUILTIN: 16 | return type->num_contig; 17 | 18 | case YAKSI_TYPE_KIND__CONTIG: 19 | return type->u.contig.count * get_num_elements(type->u.contig.child); 20 | 21 | case YAKSI_TYPE_KIND__RESIZED: 22 | return get_num_elements(type->u.resized.child); 23 | 24 | case YAKSI_TYPE_KIND__HVECTOR: 25 | return type->u.hvector.count * type->u.hvector.blocklength * 26 | get_num_elements(type->u.hvector.child); 27 | 28 | case YAKSI_TYPE_KIND__BLKHINDX: 29 | return type->u.blkhindx.count * type->u.blkhindx.blocklength * 30 | get_num_elements(type->u.blkhindx.child); 31 | 32 | case YAKSI_TYPE_KIND__HINDEXED: 33 | { 34 | uintptr_t nelems = 0; 35 | for (int i = 0; i < type->u.hindexed.count; i++) 36 | nelems += type->u.hindexed.array_of_blocklengths[i]; 37 | nelems *= get_num_elements(type->u.hindexed.child); 38 | return nelems; 39 | } 40 | 41 | default: 42 | return 0; 43 | } 44 | } 45 | 46 | int yaksuri_hipi_type_create_hook(yaksi_type_s * type) 47 | { 48 | int rc = YAKSA_SUCCESS; 49 | 50 | type->backend.hip.priv = malloc(sizeof(yaksuri_hipi_type_s)); 51 | YAKSU_ERR_CHKANDJUMP(!type->backend.hip.priv, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 52 | 53 | yaksuri_hipi_type_s *hip; 54 | hip = (yaksuri_hipi_type_s *) type->backend.hip.priv; 55 | 56 | hip->num_elements = get_num_elements(type); 57 | hip->md = NULL; 58 | pthread_mutex_init(&hip->mdmutex, NULL); 59 | 60 | rc = yaksuri_hipi_populate_pupfns(type); 61 | YAKSU_ERR_CHECK(rc, fn_fail); 62 | 63 | fn_exit: 64 | return rc; 65 | fn_fail: 66 | goto fn_exit; 67 | } 68 | 69 | int yaksuri_hipi_type_free_hook(yaksi_type_s * type) 70 | { 71 | int rc = YAKSA_SUCCESS; 72 | yaksuri_hipi_type_s *hip = (yaksuri_hipi_type_s *) type->backend.hip.priv; 73 | hipError_t cerr; 74 | 75 | pthread_mutex_destroy(&hip->mdmutex); 76 | if (hip->md) { 77 | if (type->kind == YAKSI_TYPE_KIND__BLKHINDX) { 78 | assert(hip->md->u.blkhindx.array_of_displs); 79 | cerr = hipFree((void *) hip->md->u.blkhindx.array_of_displs); 80 | YAKSURI_HIPI_HIP_ERR_CHKANDJUMP(cerr, rc, fn_fail); 81 | } else if (type->kind == YAKSI_TYPE_KIND__HINDEXED) { 82 | assert(hip->md->u.hindexed.array_of_displs); 83 | cerr = hipFree((void *) hip->md->u.hindexed.array_of_displs); 84 | YAKSURI_HIPI_HIP_ERR_CHKANDJUMP(cerr, rc, fn_fail); 85 | 86 | assert(hip->md->u.hindexed.array_of_blocklengths); 87 | cerr = hipFree((void *) hip->md->u.hindexed.array_of_blocklengths); 88 | YAKSURI_HIPI_HIP_ERR_CHKANDJUMP(cerr, rc, fn_fail); 89 | } 90 | 91 | cerr = hipFree(hip->md); 92 | YAKSURI_HIPI_HIP_ERR_CHKANDJUMP(cerr, rc, fn_fail); 93 | } 94 | 95 | free(hip); 96 | 97 | fn_exit: 98 | return rc; 99 | fn_fail: 100 | goto fn_exit; 101 | } 102 | -------------------------------------------------------------------------------- /src/backend/hip/include/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/hip/include 7 | 8 | noinst_HEADERS += \ 9 | src/backend/hip/include/yaksuri_hip_pre.h \ 10 | src/backend/hip/include/yaksuri_hip_post.h \ 11 | src/backend/hip/include/yaksuri_hipi_base.h \ 12 | src/backend/hip/include/yaksuri_hipi.h 13 | -------------------------------------------------------------------------------- /src/backend/hip/include/yaksuri_hip_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_HIP_POST_H_INCLUDED 7 | #define YAKSURI_HIP_POST_H_INCLUDED 8 | 9 | int yaksuri_hip_init_hook(yaksur_gpudriver_hooks_s ** hooks); 10 | 11 | #endif /* YAKSURI_HIP_H_INCLUDED */ 12 | -------------------------------------------------------------------------------- /src/backend/hip/include/yaksuri_hip_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_HIP_PRE_H_INCLUDED 7 | #define YAKSURI_HIP_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the hip device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef struct { 14 | void *priv; 15 | } yaksuri_hip_type_s; 16 | 17 | typedef struct { 18 | void *priv; 19 | } yaksuri_hip_info_s; 20 | 21 | #endif /* YAKSURI_HIP_PRE_H_INCLUDED */ 22 | -------------------------------------------------------------------------------- /src/backend/hip/include/yaksuri_hipi_base.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_HIPI_BASE_H_INCLUDED 7 | #define YAKSURI_HIPI_BASE_H_INCLUDED 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define YAKSURI_HIPI_HIP_ERR_CHECK(cerr) \ 15 | do { \ 16 | if (cerr != hipSuccess) { \ 17 | fprintf(stderr, "HIP Error (%s:%s,%d): %s\n", __func__, __FILE__, __LINE__, hipGetErrorString(cerr)); \ 18 | } \ 19 | } while (0) 20 | 21 | typedef struct { 22 | int ndevices; 23 | hipStream_t *stream; 24 | bool **p2p; 25 | } yaksuri_hipi_global_s; 26 | extern yaksuri_hipi_global_s yaksuri_hipi_global; 27 | 28 | typedef struct yaksuri_hipi_md_s { 29 | union { 30 | struct { 31 | intptr_t count; 32 | intptr_t stride; 33 | struct yaksuri_hipi_md_s *child; 34 | } contig; 35 | struct { 36 | struct yaksuri_hipi_md_s *child; 37 | } resized; 38 | struct { 39 | intptr_t count; 40 | intptr_t blocklength; 41 | intptr_t stride; 42 | struct yaksuri_hipi_md_s *child; 43 | } hvector; 44 | struct { 45 | intptr_t count; 46 | intptr_t blocklength; 47 | intptr_t *array_of_displs; 48 | struct yaksuri_hipi_md_s *child; 49 | } blkhindx; 50 | struct { 51 | intptr_t count; 52 | intptr_t *array_of_blocklengths; 53 | intptr_t *array_of_displs; 54 | struct yaksuri_hipi_md_s *child; 55 | } hindexed; 56 | } u; 57 | 58 | uintptr_t extent; 59 | uintptr_t num_elements; 60 | } yaksuri_hipi_md_s; 61 | 62 | #endif /* YAKSURI_HIPI_BASE_H_INCLUDED */ 63 | -------------------------------------------------------------------------------- /src/backend/hip/md/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/hip/md 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/hip/md/yaksuri_hipi_md.c 10 | -------------------------------------------------------------------------------- /src/backend/hip/pup/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/hip/pup 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/hip/pup/yaksuri_hipi_event.c \ 10 | src/backend/hip/pup/yaksuri_hipi_get_ptr_attr.c 11 | 12 | include src/backend/hip/pup/Makefile.pup.mk 13 | include src/backend/hip/pup/Makefile.populate_pupfns.mk 14 | 15 | 16 | #.chip.c: 17 | # @if $(AM_V_P) ; then \ 18 | # $(top_srcdir)/src/backend/hip/hiplt.sh --verbose $@ \ 19 | # $(HIPCC) $(AM_CPPFLAGS) $(HIP_GENCODE) -c $< ; \ 20 | # else \ 21 | # echo " HIPCC $@" ; \ 22 | # $(top_srcdir)/src/backend/hip/hiplt.sh $@ $(HIPCC) $(AM_CPPFLAGS) $(HIP_GENCODE) -c $< ; \ 23 | # fi 24 | 25 | #.cu: 26 | # @if $(AM_V_P) ; then \ 27 | # $(HIPCC) $(AM_CPPFLAGS) $(HIP_GENCODE) -c $< ; \ 28 | # else \ 29 | # echo " HIPCC $@" ; \ 30 | # $@ $(HIPCC) $(AM_CPPFLAGS) $(HIP_GENCODE) -c $< ; \ 31 | # fi 32 | 33 | 34 | -------------------------------------------------------------------------------- /src/backend/hip/pup/yaksuri_hipi_get_ptr_attr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include "yaksuri_hipi.h" 9 | #include 10 | #include 11 | 12 | #ifdef HIP_USE_MEMORYTYPE 13 | #define ATTRTYPE memoryType 14 | static hipError_t attr_convert(hipError_t cerr, struct hipPointerAttribute_t cattr, 15 | yaksur_ptr_attr_s * attr) 16 | { 17 | if (cerr == hipErrorInvalidValue) { 18 | /* attr.ATTRTYPE = hipMemoryTypeUnregistered; */ 19 | /* HIP does not seem to have something corresponding to cudaMemoryTypeUnregistered */ 20 | attr->type = YAKSUR_PTR_TYPE__UNREGISTERED_HOST; 21 | attr->device = -1; 22 | return hipSuccess; 23 | } 24 | 25 | if (cattr.ATTRTYPE == hipMemoryTypeHost) { 26 | attr->type = YAKSUR_PTR_TYPE__REGISTERED_HOST; 27 | attr->device = cattr.device; 28 | } else if (cattr.isManaged) { 29 | attr->type = YAKSUR_PTR_TYPE__MANAGED; 30 | attr->device = cattr.device; 31 | } else if (cattr.ATTRTYPE == hipMemoryTypeDevice) { 32 | attr->type = YAKSUR_PTR_TYPE__GPU; 33 | attr->device = cattr.device; 34 | } else { 35 | attr->type = YAKSUR_PTR_TYPE__UNREGISTERED_HOST; 36 | attr->device = -1; 37 | } 38 | 39 | return cerr; 40 | } 41 | #else 42 | #define ATTRTYPE type 43 | static hipError_t attr_convert(hipError_t cerr, struct hipPointerAttribute_t cattr, 44 | yaksur_ptr_attr_s * attr) 45 | { 46 | if (cattr.ATTRTYPE == hipMemoryTypeHost) { 47 | attr->type = YAKSUR_PTR_TYPE__REGISTERED_HOST; 48 | attr->device = cattr.device; 49 | } else if (cattr.ATTRTYPE == hipMemoryTypeManaged) { 50 | attr->type = YAKSUR_PTR_TYPE__MANAGED; 51 | attr->device = cattr.device; 52 | } else if (cattr.ATTRTYPE == hipMemoryTypeDevice) { 53 | attr->type = YAKSUR_PTR_TYPE__GPU; 54 | attr->device = cattr.device; 55 | } else if (cattr.ATTRTYPE == hipMemoryTypeUnregistered) { 56 | attr->type = YAKSUR_PTR_TYPE__UNREGISTERED_HOST; 57 | attr->device = -1; 58 | } 59 | 60 | return cerr; 61 | } 62 | #endif 63 | 64 | int yaksuri_hipi_get_ptr_attr(const void *inbuf, void *outbuf, yaksi_info_s * info, 65 | yaksur_ptr_attr_s * inattr, yaksur_ptr_attr_s * outattr) 66 | { 67 | int rc = YAKSA_SUCCESS; 68 | yaksuri_hipi_info_s *infopriv; 69 | 70 | if (info) { 71 | infopriv = (yaksuri_hipi_info_s *) info->backend.hip.priv; 72 | } else { 73 | infopriv = NULL; 74 | } 75 | 76 | if (infopriv && infopriv->inbuf.is_valid) { 77 | (void) attr_convert(hipSuccess, infopriv->inbuf.attr, inattr); 78 | } else { 79 | struct hipPointerAttribute_t attr; 80 | hipError_t cerr = hipPointerGetAttributes(&attr, inbuf); 81 | cerr = attr_convert(cerr, attr, inattr); 82 | YAKSURI_HIPI_HIP_ERR_CHKANDJUMP(cerr, rc, fn_fail); 83 | } 84 | 85 | if (infopriv && infopriv->outbuf.is_valid) { 86 | (void) attr_convert(hipSuccess, infopriv->outbuf.attr, outattr); 87 | } else { 88 | struct hipPointerAttribute_t attr; 89 | hipError_t cerr = hipPointerGetAttributes(&attr, outbuf); 90 | cerr = attr_convert(cerr, attr, outattr); 91 | YAKSURI_HIPI_HIP_ERR_CHKANDJUMP(cerr, rc, fn_fail); 92 | } 93 | 94 | fn_exit: 95 | return rc; 96 | fn_fail: 97 | goto fn_exit; 98 | } 99 | -------------------------------------------------------------------------------- /src/backend/hip/stub/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/hip/stub 7 | 8 | noinst_HEADERS += \ 9 | src/backend/hip/stub/yaksuri_hip_pre.h \ 10 | src/backend/hip/stub/yaksuri_hip_post.h 11 | -------------------------------------------------------------------------------- /src/backend/hip/stub/yaksuri_hip_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_HIP_POST_H_INCLUDED 7 | #define YAKSURI_HIP_POST_H_INCLUDED 8 | 9 | static int yaksuri_hip_init_hook(yaksur_gpudriver_hooks_s ** hooks) ATTRIBUTE((unused)); 10 | static int yaksuri_hip_init_hook(yaksur_gpudriver_hooks_s ** hooks) 11 | { 12 | *hooks = NULL; 13 | 14 | return YAKSA_SUCCESS; 15 | } 16 | 17 | #endif /* YAKSURI_HIP_POST_H_INCLUDED */ 18 | -------------------------------------------------------------------------------- /src/backend/hip/stub/yaksuri_hip_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_HIP_PRE_H_INCLUDED 7 | #define YAKSURI_HIP_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the hip device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef int yaksuri_hip_type_s; 14 | typedef int yaksuri_hip_info_s; 15 | 16 | #endif /* YAKSURI_HIP_PRE_H_INCLUDED */ 17 | -------------------------------------------------------------------------------- /src/backend/seq/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | EXTRA_DIST += $(top_srcdir)/src/backend/seq/genpup.py 7 | 8 | include $(top_srcdir)/src/backend/seq/include/Makefile.mk 9 | include $(top_srcdir)/src/backend/seq/hooks/Makefile.mk 10 | include $(top_srcdir)/src/backend/seq/pup/Makefile.mk 11 | 12 | nodist_noinst_SCRIPTS += \ 13 | $(top_srcdir)/src/backend/seq/genpup.py 14 | -------------------------------------------------------------------------------- /src/backend/seq/hooks/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/seq/hooks 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/seq/hooks/yaksuri_seq_hooks.c 10 | -------------------------------------------------------------------------------- /src/backend/seq/hooks/yaksuri_seq_hooks.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include "yaksuri_seqi.h" 9 | #include 10 | #include 11 | #include 12 | 13 | int yaksuri_seq_init_hook(void) 14 | { 15 | return YAKSA_SUCCESS; 16 | } 17 | 18 | int yaksuri_seq_finalize_hook(void) 19 | { 20 | return YAKSA_SUCCESS; 21 | } 22 | 23 | int yaksuri_seq_type_create_hook(yaksi_type_s * type) 24 | { 25 | int rc = YAKSA_SUCCESS; 26 | 27 | type->backend.seq.priv = malloc(sizeof(yaksuri_seqi_type_s)); 28 | YAKSU_ERR_CHKANDJUMP(!type->backend.seq.priv, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 29 | 30 | rc = yaksuri_seqi_populate_pupfns(type); 31 | YAKSU_ERR_CHECK(rc, fn_fail); 32 | 33 | fn_exit: 34 | return rc; 35 | fn_fail: 36 | goto fn_exit; 37 | } 38 | 39 | int yaksuri_seq_type_free_hook(yaksi_type_s * type) 40 | { 41 | int rc = YAKSA_SUCCESS; 42 | 43 | free(type->backend.seq.priv); 44 | 45 | return rc; 46 | } 47 | 48 | int yaksuri_seq_info_create_hook(yaksi_info_s * info) 49 | { 50 | int rc = YAKSA_SUCCESS; 51 | yaksuri_seqi_info_s *seq; 52 | 53 | seq = (yaksuri_seqi_info_s *) malloc(sizeof(yaksuri_seqi_info_s)); 54 | YAKSU_ERR_CHKANDJUMP(!seq, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 55 | 56 | /* set default values for info keys */ 57 | seq->iov_pack_threshold = YAKSURI_SEQI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 58 | seq->iov_unpack_threshold = YAKSURI_SEQI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 59 | 60 | info->backend.seq.priv = (void *) seq; 61 | 62 | fn_exit: 63 | return rc; 64 | fn_fail: 65 | goto fn_exit; 66 | } 67 | 68 | int yaksuri_seq_info_free_hook(yaksi_info_s * info) 69 | { 70 | free(info->backend.seq.priv); 71 | 72 | return YAKSA_SUCCESS; 73 | } 74 | 75 | int yaksuri_seq_info_keyval_append(yaksi_info_s * info, const char *key, const void *val, 76 | unsigned int vallen) 77 | { 78 | yaksuri_seqi_info_s *seq = (yaksuri_seqi_info_s *) info->backend.seq.priv; 79 | 80 | if (!strncmp(key, "yaksa_seq_iov_pack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 81 | assert(vallen == sizeof(uintptr_t)); 82 | seq->iov_pack_threshold = (uintptr_t) val; 83 | } else if (!strncmp(key, "yaksa_seq_iov_unpack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 84 | assert(vallen == sizeof(uintptr_t)); 85 | seq->iov_unpack_threshold = (uintptr_t) val; 86 | } 87 | 88 | return YAKSA_SUCCESS; 89 | } 90 | -------------------------------------------------------------------------------- /src/backend/seq/include/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/seq/include 7 | 8 | noinst_HEADERS += \ 9 | src/backend/seq/include/yaksuri_seqi.h \ 10 | src/backend/seq/include/yaksuri_seq_pre.h \ 11 | src/backend/seq/include/yaksuri_seq_post.h 12 | -------------------------------------------------------------------------------- /src/backend/seq/include/yaksuri_seq_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_SEQ_POST_H_INCLUDED 7 | #define YAKSURI_SEQ_POST_H_INCLUDED 8 | 9 | int yaksuri_seq_init_hook(void); 10 | int yaksuri_seq_finalize_hook(void); 11 | int yaksuri_seq_type_create_hook(yaksi_type_s * type); 12 | int yaksuri_seq_type_free_hook(yaksi_type_s * type); 13 | int yaksuri_seq_info_create_hook(yaksi_info_s * info); 14 | int yaksuri_seq_info_free_hook(yaksi_info_s * info); 15 | int yaksuri_seq_info_keyval_append(yaksi_info_s * info, const char *key, const void *val, 16 | unsigned int vallen); 17 | 18 | int yaksuri_seq_pup_is_supported(yaksi_type_s * type, yaksa_op_t op, bool * is_supported); 19 | int yaksuri_seq_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 20 | yaksi_info_s * info, yaksa_op_t op); 21 | int yaksuri_seq_iunpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 22 | yaksi_info_s * info, yaksa_op_t op); 23 | 24 | #endif /* YAKSURI_SEQ_H_INCLUDED */ 25 | -------------------------------------------------------------------------------- /src/backend/seq/include/yaksuri_seq_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_SEQ_PRE_H_INCLUDED 7 | #define YAKSURI_SEQ_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the seq device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef struct { 14 | void *priv; 15 | } yaksuri_seq_type_s; 16 | 17 | typedef struct { 18 | void *priv; 19 | } yaksuri_seq_info_s; 20 | 21 | #endif /* YAKSURI_SEQ_PRE_H_INCLUDED */ 22 | -------------------------------------------------------------------------------- /src/backend/seq/include/yaksuri_seqi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_SEQI_H_INCLUDED 7 | #define YAKSURI_SEQI_H_INCLUDED 8 | 9 | #include "yaksi.h" 10 | 11 | #define YAKSURI_KERNEL_NULL NULL 12 | 13 | typedef struct yaksuri_seqi_type_s { 14 | int (*pack) (const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 15 | yaksa_op_t op); 16 | int (*unpack) (const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 17 | yaksa_op_t op); 18 | const char *name; 19 | } yaksuri_seqi_type_s; 20 | 21 | #define YAKSURI_SEQI_INFO__DEFAULT_IOV_PUP_THRESHOLD (16384) 22 | 23 | typedef struct { 24 | uintptr_t iov_pack_threshold; 25 | uintptr_t iov_unpack_threshold; 26 | } yaksuri_seqi_info_s; 27 | 28 | int yaksuri_seqi_populate_pupfns(yaksi_type_s * type); 29 | 30 | #endif /* YAKSURI_SEQI_H_INCLUDED */ 31 | -------------------------------------------------------------------------------- /src/backend/seq/pup/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/seq/pup 7 | 8 | include src/backend/seq/pup/Makefile.pup.mk 9 | include src/backend/seq/pup/Makefile.populate_pupfns.mk 10 | -------------------------------------------------------------------------------- /src/backend/seq/pup/yaksuri_seq_pup.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include "yaksi.h" 8 | #include "yaksuri_seqi.h" 9 | #include 10 | #include 11 | 12 | #define MAX_IOV_LENGTH (8192) 13 | 14 | int yaksuri_seq_pup_is_supported(yaksi_type_s * type, yaksa_op_t op, bool * is_supported) 15 | { 16 | int rc = YAKSA_SUCCESS; 17 | yaksuri_seqi_type_s *seq_type = (yaksuri_seqi_type_s *) type->backend.seq.priv; 18 | 19 | if (seq_type->pack || (type->is_contig && op == YAKSA_OP__REPLACE)) 20 | *is_supported = true; 21 | else 22 | *is_supported = false; 23 | 24 | return rc; 25 | } 26 | 27 | int yaksuri_seq_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 28 | yaksi_info_s * info, yaksa_op_t op) 29 | { 30 | int rc = YAKSA_SUCCESS; 31 | yaksuri_seqi_type_s *seq_type = (yaksuri_seqi_type_s *) type->backend.seq.priv; 32 | 33 | uintptr_t iov_pack_threshold = YAKSURI_SEQI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 34 | if (info) { 35 | yaksuri_seqi_info_s *seq_info = (yaksuri_seqi_info_s *) info->backend.seq.priv; 36 | iov_pack_threshold = seq_info->iov_pack_threshold; 37 | } 38 | 39 | if (op == YAKSA_OP__REPLACE && type->is_contig) { 40 | memcpy(outbuf, (const char *) inbuf + type->true_lb, type->size * count); 41 | } else if (op == YAKSA_OP__REPLACE && type->size / type->num_contig >= iov_pack_threshold) { 42 | struct iovec iov[MAX_IOV_LENGTH]; 43 | char *dbuf = (char *) outbuf; 44 | uintptr_t offset = 0; 45 | while (offset < type->num_contig * count) { 46 | uintptr_t actual_iov_len; 47 | rc = yaksi_iov(inbuf, count, type, offset, iov, MAX_IOV_LENGTH, &actual_iov_len); 48 | YAKSU_ERR_CHECK(rc, fn_fail); 49 | 50 | for (uintptr_t i = 0; i < actual_iov_len; i++) { 51 | memcpy(dbuf, iov[i].iov_base, iov[i].iov_len); 52 | dbuf += iov[i].iov_len; 53 | } 54 | 55 | offset += actual_iov_len; 56 | } 57 | } else { 58 | assert(seq_type->pack); 59 | rc = seq_type->pack(inbuf, outbuf, count, type, op); 60 | YAKSU_ERR_CHECK(rc, fn_fail); 61 | } 62 | 63 | fn_exit: 64 | return rc; 65 | fn_fail: 66 | goto fn_exit; 67 | } 68 | 69 | int yaksuri_seq_iunpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 70 | yaksi_info_s * info, yaksa_op_t op) 71 | { 72 | int rc = YAKSA_SUCCESS; 73 | yaksuri_seqi_type_s *seq_type = (yaksuri_seqi_type_s *) type->backend.seq.priv; 74 | 75 | uintptr_t iov_unpack_threshold = YAKSURI_SEQI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 76 | if (info) { 77 | yaksuri_seqi_info_s *seq_info = (yaksuri_seqi_info_s *) info->backend.seq.priv; 78 | iov_unpack_threshold = seq_info->iov_unpack_threshold; 79 | } 80 | 81 | if (op == YAKSA_OP__REPLACE && type->is_contig) { 82 | memcpy((char *) outbuf + type->true_lb, inbuf, type->size * count); 83 | } else if (op == YAKSA_OP__REPLACE && type->size / type->num_contig >= iov_unpack_threshold) { 84 | struct iovec iov[MAX_IOV_LENGTH]; 85 | const char *sbuf = (const char *) inbuf; 86 | uintptr_t offset = 0; 87 | 88 | while (offset < type->num_contig * count) { 89 | uintptr_t actual_iov_len; 90 | rc = yaksi_iov(outbuf, count, type, offset, iov, MAX_IOV_LENGTH, &actual_iov_len); 91 | YAKSU_ERR_CHECK(rc, fn_fail); 92 | 93 | for (uintptr_t i = 0; i < actual_iov_len; i++) { 94 | memcpy(iov[i].iov_base, sbuf, iov[i].iov_len); 95 | sbuf += iov[i].iov_len; 96 | } 97 | 98 | offset += actual_iov_len; 99 | } 100 | } else { 101 | assert(seq_type->unpack); 102 | rc = seq_type->unpack(inbuf, outbuf, count, type, op); 103 | YAKSU_ERR_CHECK(rc, fn_fail); 104 | } 105 | 106 | fn_exit: 107 | return rc; 108 | fn_fail: 109 | goto fn_exit; 110 | } 111 | -------------------------------------------------------------------------------- /src/backend/src/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/src 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/src/yaksuri_progress.c \ 10 | src/backend/src/yaksur_hooks.c \ 11 | src/backend/src/yaksur_pup.c \ 12 | src/backend/src/yaksur_request.c 13 | 14 | noinst_HEADERS += \ 15 | src/backend/src/yaksuri.h \ 16 | src/backend/src/yaksur_pre.h \ 17 | src/backend/src/yaksur_post.h 18 | -------------------------------------------------------------------------------- /src/backend/src/yaksur_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSUR_POST_H_INCLUDED 7 | #define YAKSUR_POST_H_INCLUDED 8 | 9 | #include "yaksuri_seq_post.h" 10 | #include "yaksuri_cuda_post.h" 11 | #include "yaksuri_ze_post.h" 12 | #include "yaksuri_hip_post.h" 13 | 14 | int yaksur_init_hook(yaksi_info_s * info); 15 | int yaksur_finalize_hook(void); 16 | int yaksur_type_create_hook(yaksi_type_s * type); 17 | int yaksur_type_free_hook(yaksi_type_s * type); 18 | int yaksur_request_create_hook(yaksi_request_s * request); 19 | int yaksur_request_free_hook(yaksi_request_s * request); 20 | int yaksur_info_create_hook(yaksi_info_s * info); 21 | int yaksur_info_free_hook(yaksi_info_s * info); 22 | int yaksur_info_keyval_append(yaksi_info_s * info, const char *key, const void *val, 23 | unsigned int vallen); 24 | 25 | int yaksur_ipack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 26 | yaksi_info_s * info, yaksa_op_t op, yaksi_request_s * request); 27 | int yaksur_iunpack(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 28 | yaksi_info_s * info, yaksa_op_t op, yaksi_request_s * request); 29 | int yaksur_request_test(yaksi_request_s * request); 30 | int yaksur_request_wait(yaksi_request_s * request); 31 | 32 | #endif /* YAKSUR_POST_H_INCLUDED */ 33 | -------------------------------------------------------------------------------- /src/backend/src/yaksur_request.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include "yaksa.h" 8 | #include "yaksi.h" 9 | #include "yaksu.h" 10 | #include "yaksuri.h" 11 | 12 | int yaksur_request_test(yaksi_request_s * request) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | 16 | rc = yaksuri_progress_poke(); 17 | YAKSU_ERR_CHECK(rc, fn_fail); 18 | 19 | fn_exit: 20 | return rc; 21 | fn_fail: 22 | goto fn_exit; 23 | } 24 | 25 | int yaksur_request_wait(yaksi_request_s * request) 26 | { 27 | int rc = YAKSA_SUCCESS; 28 | 29 | while (yaksu_atomic_load(&request->cc)) { 30 | rc = yaksuri_progress_poke(); 31 | YAKSU_ERR_CHECK(rc, fn_fail); 32 | } 33 | 34 | fn_exit: 35 | return rc; 36 | fn_fail: 37 | goto fn_exit; 38 | } 39 | -------------------------------------------------------------------------------- /src/backend/src/yaksuri.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_H_INCLUDED 7 | #define YAKSURI_H_INCLUDED 8 | 9 | #include "yaksi.h" 10 | 11 | typedef enum yaksuri_gpudriver_id_e { 12 | YAKSURI_GPUDRIVER_ID__UNSET = -1, 13 | YAKSURI_GPUDRIVER_ID__CUDA = 0, 14 | YAKSURI_GPUDRIVER_ID__ZE, 15 | YAKSURI_GPUDRIVER_ID__HIP, 16 | YAKSURI_GPUDRIVER_ID__LAST, 17 | } yaksuri_gpudriver_id_e; 18 | 19 | typedef enum yaksuri_pup_e { 20 | YAKSURI_OPTYPE__UNSET, 21 | YAKSURI_OPTYPE__PACK, 22 | YAKSURI_OPTYPE__UNPACK, 23 | } yaksuri_optype_e; 24 | 25 | #define YAKSURI_TMPBUF_EL_SIZE (1024 * 1024) 26 | #define YAKSURI_TMPBUF_NUM_EL (16) 27 | 28 | typedef struct { 29 | bool has_wait_kernel; 30 | struct { 31 | yaksu_buffer_pool_s host; 32 | yaksu_buffer_pool_s *device; 33 | yaksur_gpudriver_hooks_s *hooks; 34 | int ndevices; 35 | } gpudriver[YAKSURI_GPUDRIVER_ID__LAST]; 36 | } yaksuri_global_s; 37 | extern yaksuri_global_s yaksuri_global; 38 | 39 | #define YAKSURI_SUBREQ_CHUNK_MAX_TMPBUFS (4) 40 | 41 | typedef struct yaksuri_tmpbuf { 42 | void *buf; 43 | yaksu_buffer_pool_s pool; 44 | } yaksuri_tmpbuf_s; 45 | 46 | typedef struct yaksuri_subreq_chunk { 47 | uintptr_t count_offset; 48 | uintptr_t count; 49 | 50 | int num_tmpbufs; 51 | yaksuri_tmpbuf_s tmpbufs[YAKSURI_SUBREQ_CHUNK_MAX_TMPBUFS]; 52 | void *event; 53 | 54 | struct yaksuri_subreq_chunk *next; 55 | struct yaksuri_subreq_chunk *prev; 56 | } yaksuri_subreq_chunk_s; 57 | 58 | struct yaksuri_request; 59 | typedef struct yaksuri_subreq { 60 | enum { 61 | YAKSURI_SUBREQ_KIND__SINGLE_CHUNK, 62 | YAKSURI_SUBREQ_KIND__MULTI_CHUNK, 63 | } kind; 64 | 65 | union { 66 | struct { 67 | void *event; 68 | } single; 69 | struct { 70 | const void *inbuf; 71 | void *outbuf; 72 | uintptr_t count; 73 | yaksi_type_s *type; 74 | yaksa_op_t op; 75 | 76 | uintptr_t issued_count; 77 | yaksuri_subreq_chunk_s *chunks; 78 | 79 | int (*acquire) (struct yaksuri_request * reqpriv, struct yaksuri_subreq * subreq, 80 | struct yaksuri_subreq_chunk ** chunk); 81 | int (*release) (struct yaksuri_request * reqpriv, struct yaksuri_subreq * subreq, 82 | struct yaksuri_subreq_chunk * chunk); 83 | } multiple; 84 | } u; 85 | 86 | yaksuri_gpudriver_id_e gpudriver_id; 87 | 88 | struct yaksuri_subreq *next; 89 | struct yaksuri_subreq *prev; 90 | } yaksuri_subreq_s; 91 | 92 | typedef struct yaksuri_request { 93 | yaksi_request_s *request; 94 | 95 | yaksi_info_s *info; 96 | yaksuri_optype_e optype; 97 | 98 | yaksuri_gpudriver_id_e gpudriver_id; 99 | 100 | yaksuri_subreq_s *subreqs; 101 | 102 | UT_hash_handle hh; 103 | } yaksuri_request_s; 104 | 105 | typedef struct { 106 | yaksuri_gpudriver_id_e gpudriver_id; 107 | int mapped_device; 108 | bool has_wait_kernel; /* avoid gpu functions that may cause deadlocks with wait kernel */ 109 | } yaksuri_info_s; 110 | 111 | int yaksuri_progress_enqueue(const void *inbuf, void *outbuf, uintptr_t count, yaksi_type_s * type, 112 | yaksi_info_s * info, yaksa_op_t op, yaksi_request_s * request); 113 | int yaksuri_progress_poke(void); 114 | int yaksuri_progress_init(void); 115 | int yaksuri_progress_finalize(void); 116 | 117 | #endif /* YAKSURI_H_INCLUDED */ 118 | -------------------------------------------------------------------------------- /src/backend/ze/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | if BUILD_ZE_BACKEND 7 | include $(top_srcdir)/src/backend/ze/include/Makefile.mk 8 | include $(top_srcdir)/src/backend/ze/hooks/Makefile.mk 9 | include $(top_srcdir)/src/backend/ze/md/Makefile.mk 10 | include $(top_srcdir)/src/backend/ze/pup/Makefile.mk 11 | else 12 | include $(top_srcdir)/src/backend/ze/stub/Makefile.mk 13 | endif !BUILD_ZE_BACKEND 14 | -------------------------------------------------------------------------------- /src/backend/ze/hooks/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/ze/hooks 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/ze/hooks/yaksuri_ze_init_hooks.c \ 10 | src/backend/ze/hooks/yaksuri_zei_type_hooks.c \ 11 | src/backend/ze/hooks/yaksuri_zei_info_hooks.c \ 12 | src/backend/ze/hooks/yaksuri_zei_init_kernels.c \ 13 | src/backend/ze/hooks/yaksuri_zei_finalize_kernels.c 14 | -------------------------------------------------------------------------------- /src/backend/ze/hooks/yaksuri_zei_info_hooks.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksuri_zei.h" 8 | #include 9 | #include 10 | #include 11 | 12 | int yaksuri_zei_info_create_hook(yaksi_info_s * info) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | yaksuri_zei_info_s *ze; 16 | 17 | ze = (yaksuri_zei_info_s *) malloc(sizeof(yaksuri_zei_info_s)); 18 | YAKSU_ERR_CHKANDJUMP(!ze, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 19 | 20 | /* set default values for info keys */ 21 | ze->yaksa_ze_use_copy_engine = 0; 22 | ze->iov_pack_threshold = YAKSURI_ZEI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 23 | ze->iov_unpack_threshold = YAKSURI_ZEI_INFO__DEFAULT_IOV_PUP_THRESHOLD; 24 | ze->inbuf.is_valid = false; 25 | ze->outbuf.is_valid = false; 26 | 27 | info->backend.ze.priv = (void *) ze; 28 | 29 | fn_exit: 30 | return rc; 31 | fn_fail: 32 | goto fn_exit; 33 | } 34 | 35 | int yaksuri_zei_info_free_hook(yaksi_info_s * info) 36 | { 37 | free(info->backend.ze.priv); 38 | 39 | return YAKSA_SUCCESS; 40 | } 41 | 42 | int yaksuri_zei_info_keyval_append(yaksi_info_s * info, const char *key, const void *val, 43 | unsigned int vallen) 44 | { 45 | yaksuri_zei_info_s *ze = (yaksuri_zei_info_s *) info->backend.ze.priv; 46 | 47 | typedef struct { 48 | ze_memory_allocation_properties_t prop; 49 | ze_device_handle_t device; 50 | } ze_alloc_attr_t; 51 | 52 | if (!strncmp(key, "yaksa_ze_use_copy_engine", YAKSA_INFO_MAX_KEYLEN)) { 53 | assert(vallen == sizeof(int)); 54 | ze->yaksa_ze_use_copy_engine = *(int *) val; 55 | } else if (!strncmp(key, "yaksa_ze_iov_unpack_threshold", YAKSA_INFO_MAX_KEYLEN)) { 56 | assert(vallen == sizeof(uintptr_t)); 57 | ze->iov_unpack_threshold = (uintptr_t) val; 58 | } else if (!strncmp(key, "yaksa_ze_inbuf_ptr_attr", YAKSA_INFO_MAX_KEYLEN)) { 59 | assert(vallen == sizeof(ze_alloc_attr_t)); 60 | ze->inbuf.is_valid = true; 61 | memcpy(&ze->inbuf.attr, val, sizeof(ze_alloc_attr_t)); 62 | } else if (!strncmp(key, "yaksa_ze_outbuf_ptr_attr", YAKSA_INFO_MAX_KEYLEN)) { 63 | assert(vallen == sizeof(ze_alloc_attr_t)); 64 | ze->outbuf.is_valid = true; 65 | memcpy(&ze->outbuf.attr, val, sizeof(ze_alloc_attr_t)); 66 | } 67 | 68 | return YAKSA_SUCCESS; 69 | } 70 | -------------------------------------------------------------------------------- /src/backend/ze/include/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/ze/include 7 | 8 | noinst_HEADERS += \ 9 | src/backend/ze/include/yaksuri_ze_pre.h \ 10 | src/backend/ze/include/yaksuri_ze_post.h \ 11 | src/backend/ze/include/yaksuri_zei.h \ 12 | src/backend/ze/include/yaksuri_zei_md.h 13 | -------------------------------------------------------------------------------- /src/backend/ze/include/yaksuri_ze_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_ZE_POST_H_INCLUDED 7 | #define YAKSURI_ZE_POST_H_INCLUDED 8 | 9 | int yaksuri_ze_init_hook(yaksur_gpudriver_hooks_s ** hooks); 10 | 11 | #endif /* YAKSURI_ZE_H_INCLUDED */ 12 | -------------------------------------------------------------------------------- /src/backend/ze/include/yaksuri_ze_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_ZE_PRE_H_INCLUDED 7 | #define YAKSURI_ZE_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the ze device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef struct { 14 | void *priv; 15 | } yaksuri_ze_type_s; 16 | 17 | typedef struct { 18 | void *priv; 19 | } yaksuri_ze_info_s; 20 | 21 | #endif /* YAKSURI_ZE_PRE_H_INCLUDED */ 22 | -------------------------------------------------------------------------------- /src/backend/ze/include/yaksuri_zei_md.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_ZEI_MD_H_INCLUDED 7 | #define YAKSURI_ZEI_MD_H_INCLUDED 8 | 9 | typedef struct yaksuri_zei_md_s { 10 | union { 11 | struct { 12 | long count; 13 | long stride; 14 | struct yaksuri_zei_md_s *child; 15 | } contig; 16 | struct { 17 | struct yaksuri_zei_md_s *child; 18 | } dup; 19 | struct { 20 | struct yaksuri_zei_md_s *child; 21 | } resized; 22 | struct { 23 | long count; 24 | long blocklength; 25 | long stride; 26 | struct yaksuri_zei_md_s *child; 27 | } hvector; 28 | struct { 29 | long count; 30 | long blocklength; 31 | long *array_of_displs; 32 | struct yaksuri_zei_md_s *child; 33 | } blkhindx; 34 | struct { 35 | long count; 36 | long *array_of_blocklengths; 37 | long *array_of_displs; 38 | struct yaksuri_zei_md_s *child; 39 | } hindexed; 40 | } u; 41 | 42 | unsigned long extent; 43 | unsigned long num_elements; 44 | unsigned long true_lb; 45 | } yaksuri_zei_md_s; 46 | 47 | #endif /* YAKSURI_ZEI_H_INCLUDED */ 48 | -------------------------------------------------------------------------------- /src/backend/ze/md/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/ze/md 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/ze/md/yaksuri_zei_md.c 10 | -------------------------------------------------------------------------------- /src/backend/ze/pup/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/ze/pup 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/backend/ze/pup/yaksuri_zei_event.c \ 10 | src/backend/ze/pup/yaksuri_zei_get_ptr_attr.c 11 | 12 | include src/backend/ze/pup/Makefile.pup.mk 13 | include src/backend/ze/pup/Makefile.populate_pupfns.mk 14 | 15 | ze_native_TARGET = @enable_ze_native@ 16 | 17 | if BUILD_ZE_NATIVE 18 | 19 | .cl.c: 20 | @echo " OCLOC (native) $<" ; \ 21 | ocloc compile -file $< -device $(ze_native_TARGET) -out_dir `dirname $@` -output_no_suffix -q -options "-I $(top_srcdir)/src/backend/ze/include -cl-std=CL2.0" @extra_ocloc_options@ && \ 22 | (test -f $(@:.c=) && mv $(@:.c=) $(@:.c=.bin) || true) && /bin/rm -f $(@:.c=.gen) && \ 23 | $(top_srcdir)/src/backend/ze/pup/inline.py $(@:.c=.bin) $@ $(top_srcdir) 1 24 | 25 | else 26 | 27 | if BUILD_ZE_NATIVE_MULTIPLE 28 | 29 | .cl.c: 30 | @echo " OCLOC (multiple native) $<" ; \ 31 | ocloc compile -file $< -device $(ze_native_TARGET) -q -options "-I $(top_srcdir)/src/backend/ze/include -cl-std=CL2.0" @extra_ocloc_options@ && \ 32 | mv `basename $(@:.c=.ar)` `dirname $@` && \ 33 | $(top_srcdir)/src/backend/ze/pup/inline.py $(@:.c=.ar) $@ $(top_srcdir) 2 34 | 35 | else 36 | 37 | .cl.c: 38 | @echo " OCLOC (spirv) $<"; \ 39 | ocloc compile -file $< -device skl -spv_only -out_dir `dirname $@` -output_no_suffix -q -options "-I $(top_srcdir)/src/backend/ze/include -cl-std=CL2.0" && \ 40 | /bin/rm -f $(@:.c=.gen) && \ 41 | $(top_srcdir)/src/backend/ze/pup/inline.py $(@:.c=.spv) $@ $(top_srcdir) 0 42 | 43 | endif 44 | 45 | endif 46 | -------------------------------------------------------------------------------- /src/backend/ze/pup/inline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ## 3 | ## Copyright (C) by Argonne National Laboratory 4 | ## See COPYRIGHT in top-level directory 5 | ## 6 | 7 | import sys, os 8 | 9 | # inline.py [infile] [outfile] [topdir] [native: 0|1|2] 10 | 11 | infile = sys.argv[1] 12 | outfile = sys.argv[2] 13 | top_srcdir = sys.argv[3] 14 | native = sys.argv[4] 15 | 16 | base = os.path.basename(outfile) 17 | basename = os.path.splitext(base)[0] 18 | 19 | sys.path.append(top_srcdir + '/maint/') 20 | import yutils 21 | 22 | yutils.copyright_c(outfile) 23 | OUTFILE = open(outfile, 'a') 24 | 25 | if native == "2": 26 | OUTFILE.write("/* native format with multiple devices */\n\n") 27 | elif native == "1": 28 | OUTFILE.write("/* native format */\n\n") 29 | else: 30 | OUTFILE.write("/* SPIR-V format */\n\n") 31 | OUTFILE.write("#include \n\n") 32 | OUTFILE.write("const unsigned char %s_str[] = {\n" % basename) 33 | 34 | if os.path.exists(infile): 35 | bfile = open(infile, 'rb') 36 | char = bfile.read(1) 37 | OUTFILE.write("%s" % hex(ord(char))) 38 | while 1: 39 | char = bfile.read(1) 40 | if not char: 41 | break 42 | OUTFILE.write(", %s" % hex(ord(char))) 43 | bfile.close() 44 | OUTFILE.write("};\n") 45 | OUTFILE.write("const size_t %s_size = %d;\n" % (basename, os.stat(infile).st_size)) 46 | else: 47 | OUTFILE.write("};\n") 48 | OUTFILE.write("const size_t %s_size = 0;\n" % basename) 49 | 50 | OUTFILE.close() 51 | -------------------------------------------------------------------------------- /src/backend/ze/pup/yaksuri_zei_get_ptr_attr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include "yaksi.h" 8 | #include "yaksu.h" 9 | #include "yaksuri_zei.h" 10 | #include 11 | 12 | static inline int find_device_num(ze_device_handle_t device) 13 | { 14 | int device_num = -1; 15 | for (int i = 0; i < yaksuri_zei_global.ndevices; i++) 16 | if (yaksuri_zei_global.device[i] == device) { 17 | device_num = i; 18 | break; 19 | } 20 | if (device_num == -1) { 21 | /* subdevice */ 22 | for (int i = 0; i < yaksuri_zei_global.ndevices; i++) { 23 | yaksuri_zei_device_state_s *device_state = yaksuri_zei_global.device_states + i; 24 | for (int j = 0; j < device_state->nsubdevices; j++) { 25 | if (device == device_state->subdevices[j]) { 26 | return i; 27 | } 28 | } 29 | } 30 | } 31 | return device_num; 32 | } 33 | 34 | static inline void attr_convert(ze_memory_allocation_properties_t prop, ze_device_handle_t device, 35 | yaksur_ptr_attr_s * attr) 36 | { 37 | if (prop.type == ZE_MEMORY_TYPE_UNKNOWN) { 38 | attr->type = YAKSUR_PTR_TYPE__UNREGISTERED_HOST; 39 | attr->device = -1; 40 | } else if (prop.type == ZE_MEMORY_TYPE_HOST) { 41 | attr->type = YAKSUR_PTR_TYPE__REGISTERED_HOST; 42 | attr->device = -1; 43 | } else if (prop.type == ZE_MEMORY_TYPE_SHARED) { 44 | attr->type = YAKSUR_PTR_TYPE__MANAGED; 45 | attr->device = -1; 46 | if (device) 47 | attr->device = find_device_num(device); 48 | assert(!device || attr->device != -1); 49 | } else if (prop.type == ZE_MEMORY_TYPE_DEVICE) { 50 | attr->type = YAKSUR_PTR_TYPE__GPU; 51 | attr->device = find_device_num(device); 52 | assert(attr->device != -1); 53 | } else 54 | assert(0); 55 | } 56 | 57 | int yaksuri_zei_get_ptr_attr(const void *inbuf, void *outbuf, yaksi_info_s * info, 58 | yaksur_ptr_attr_s * inattr, yaksur_ptr_attr_s * outattr) 59 | { 60 | int rc = YAKSA_SUCCESS; 61 | ze_result_t zerr; 62 | yaksuri_zei_info_s *infopriv = NULL; 63 | ze_memory_allocation_properties_t prop = { 64 | .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, 65 | .pNext = NULL, 66 | .type = 0, 67 | .id = 0, 68 | .pageSize = 0, 69 | }; 70 | ze_device_handle_t device = NULL; 71 | 72 | if (info) { 73 | infopriv = (yaksuri_zei_info_s *) info->backend.ze.priv; 74 | } 75 | 76 | if (infopriv && infopriv->inbuf.is_valid) { 77 | attr_convert(infopriv->inbuf.attr.prop, infopriv->inbuf.attr.device, inattr); 78 | } else { 79 | zerr = zeMemGetAllocProperties(yaksuri_zei_global.context, inbuf, &prop, &device); 80 | YAKSURI_ZEI_ZE_ERR_CHKANDJUMP(zerr, rc, fn_fail); 81 | attr_convert(prop, device, inattr); 82 | } 83 | 84 | if (infopriv && infopriv->outbuf.is_valid) { 85 | attr_convert(infopriv->outbuf.attr.prop, infopriv->outbuf.attr.device, outattr); 86 | } else { 87 | zerr = zeMemGetAllocProperties(yaksuri_zei_global.context, outbuf, &prop, &device); 88 | YAKSURI_ZEI_ZE_ERR_CHKANDJUMP(zerr, rc, fn_fail); 89 | attr_convert(prop, device, outattr); 90 | } 91 | 92 | fn_exit: 93 | return rc; 94 | fn_fail: 95 | goto fn_exit; 96 | } 97 | -------------------------------------------------------------------------------- /src/backend/ze/stub/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/backend/ze/stub 7 | 8 | noinst_HEADERS += \ 9 | src/backend/ze/stub/yaksuri_ze_pre.h \ 10 | src/backend/ze/stub/yaksuri_ze_post.h 11 | -------------------------------------------------------------------------------- /src/backend/ze/stub/yaksuri_ze_post.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_ZE_POST_H_INCLUDED 7 | #define YAKSURI_ZE_POST_H_INCLUDED 8 | 9 | static int yaksuri_ze_init_hook(yaksur_gpudriver_hooks_s ** hooks) ATTRIBUTE((unused)); 10 | static int yaksuri_ze_init_hook(yaksur_gpudriver_hooks_s ** hooks) 11 | { 12 | *hooks = NULL; 13 | 14 | return YAKSA_SUCCESS; 15 | } 16 | 17 | #endif /* YAKSURI_ZE_POST_H_INCLUDED */ 18 | -------------------------------------------------------------------------------- /src/backend/ze/stub/yaksuri_ze_pre.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSURI_ZE_PRE_H_INCLUDED 7 | #define YAKSURI_ZE_PRE_H_INCLUDED 8 | 9 | /* This is a API header for the ze device and should not include any 10 | * internal headers, except for yaksa_config.h, in order to get the 11 | * configure checks. */ 12 | 13 | typedef int yaksuri_ze_type_s; 14 | typedef int yaksuri_ze_info_s; 15 | 16 | #endif /* YAKSURI_ZE_PRE_H_INCLUDED */ 17 | -------------------------------------------------------------------------------- /src/external/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/external 7 | 8 | noinst_HEADERS += \ 9 | src/external/yutlist.h \ 10 | src/external/yuthash.h 11 | -------------------------------------------------------------------------------- /src/frontend/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | include $(top_srcdir)/src/frontend/bounds/Makefile.mk 7 | include $(top_srcdir)/src/frontend/flatten/Makefile.mk 8 | include $(top_srcdir)/src/frontend/include/Makefile.mk 9 | include $(top_srcdir)/src/frontend/info/Makefile.mk 10 | include $(top_srcdir)/src/frontend/init/Makefile.mk 11 | include $(top_srcdir)/src/frontend/iov/Makefile.mk 12 | include $(top_srcdir)/src/frontend/pup/Makefile.mk 13 | include $(top_srcdir)/src/frontend/types/Makefile.mk 14 | -------------------------------------------------------------------------------- /src/frontend/bounds/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/bounds 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/bounds/yaksa_bounds.c 10 | -------------------------------------------------------------------------------- /src/frontend/bounds/yaksa_bounds.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksa.h" 7 | #include "yaksi.h" 8 | #include "yaksu.h" 9 | #include 10 | 11 | YAKSA_API_PUBLIC int yaksa_type_get_size(yaksa_type_t type, uintptr_t * size) 12 | { 13 | yaksi_type_s *yaksi_type; 14 | int rc = YAKSA_SUCCESS; 15 | 16 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 17 | 18 | rc = yaksi_type_get(type, &yaksi_type); 19 | YAKSU_ERR_CHECK(rc, fn_fail); 20 | 21 | *size = yaksi_type->size; 22 | 23 | fn_exit: 24 | return rc; 25 | fn_fail: 26 | goto fn_exit; 27 | } 28 | 29 | YAKSA_API_PUBLIC int yaksa_type_get_extent(yaksa_type_t type, intptr_t * lb, intptr_t * extent) 30 | { 31 | yaksi_type_s *yaksi_type; 32 | int rc = YAKSA_SUCCESS; 33 | 34 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 35 | 36 | rc = yaksi_type_get(type, &yaksi_type); 37 | YAKSU_ERR_CHECK(rc, fn_fail); 38 | 39 | *lb = yaksi_type->lb; 40 | *extent = yaksi_type->extent; 41 | 42 | fn_exit: 43 | return rc; 44 | fn_fail: 45 | goto fn_exit; 46 | } 47 | 48 | YAKSA_API_PUBLIC int yaksa_type_get_true_extent(yaksa_type_t type, intptr_t * lb, intptr_t * extent) 49 | { 50 | yaksi_type_s *yaksi_type; 51 | int rc = YAKSA_SUCCESS; 52 | 53 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 54 | 55 | rc = yaksi_type_get(type, &yaksi_type); 56 | YAKSU_ERR_CHECK(rc, fn_fail); 57 | 58 | *lb = yaksi_type->true_lb; 59 | *extent = yaksi_type->true_ub - yaksi_type->true_lb; 60 | 61 | fn_exit: 62 | return rc; 63 | fn_fail: 64 | goto fn_exit; 65 | } 66 | -------------------------------------------------------------------------------- /src/frontend/flatten/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/flatten 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/flatten/yaksa_flatten_size.c \ 10 | src/frontend/flatten/yaksa_flatten.c \ 11 | src/frontend/flatten/yaksa_unflatten.c 12 | -------------------------------------------------------------------------------- /src/frontend/flatten/yaksa_flatten.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | 11 | static int flatten(yaksi_type_s * type, void *flattened_type) 12 | { 13 | int rc = YAKSA_SUCCESS; 14 | char *flatbuf = (char *) flattened_type; 15 | 16 | /* copy this type */ 17 | memcpy(flatbuf, type, sizeof(yaksi_type_s)); 18 | flatbuf += sizeof(yaksi_type_s); 19 | 20 | switch (type->kind) { 21 | case YAKSI_TYPE_KIND__BUILTIN: 22 | break; 23 | 24 | case YAKSI_TYPE_KIND__CONTIG: 25 | rc = flatten(type->u.contig.child, flatbuf); 26 | YAKSU_ERR_CHECK(rc, fn_fail); 27 | break; 28 | 29 | case YAKSI_TYPE_KIND__DUP: 30 | rc = flatten(type->u.dup.child, flatbuf); 31 | YAKSU_ERR_CHECK(rc, fn_fail); 32 | break; 33 | 34 | case YAKSI_TYPE_KIND__RESIZED: 35 | rc = flatten(type->u.resized.child, flatbuf); 36 | YAKSU_ERR_CHECK(rc, fn_fail); 37 | break; 38 | 39 | case YAKSI_TYPE_KIND__HVECTOR: 40 | rc = flatten(type->u.hvector.child, flatbuf); 41 | YAKSU_ERR_CHECK(rc, fn_fail); 42 | break; 43 | 44 | case YAKSI_TYPE_KIND__BLKHINDX: 45 | memcpy(flatbuf, type->u.blkhindx.array_of_displs, 46 | type->u.blkhindx.count * sizeof(intptr_t)); 47 | flatbuf += type->u.blkhindx.count * sizeof(intptr_t); 48 | 49 | rc = flatten(type->u.blkhindx.child, flatbuf); 50 | YAKSU_ERR_CHECK(rc, fn_fail); 51 | break; 52 | 53 | case YAKSI_TYPE_KIND__HINDEXED: 54 | memcpy(flatbuf, type->u.hindexed.array_of_blocklengths, 55 | type->u.hindexed.count * sizeof(intptr_t)); 56 | flatbuf += type->u.hindexed.count * sizeof(intptr_t); 57 | 58 | memcpy(flatbuf, type->u.hindexed.array_of_displs, 59 | type->u.hindexed.count * sizeof(intptr_t)); 60 | flatbuf += type->u.hindexed.count * sizeof(intptr_t); 61 | 62 | rc = flatten(type->u.hindexed.child, flatbuf); 63 | YAKSU_ERR_CHECK(rc, fn_fail); 64 | break; 65 | 66 | case YAKSI_TYPE_KIND__STRUCT: 67 | memcpy(flatbuf, type->u.str.array_of_blocklengths, 68 | type->u.str.count * sizeof(intptr_t)); 69 | flatbuf += type->u.str.count * sizeof(intptr_t); 70 | 71 | memcpy(flatbuf, type->u.str.array_of_displs, type->u.str.count * sizeof(intptr_t)); 72 | flatbuf += type->u.str.count * sizeof(intptr_t); 73 | 74 | for (intptr_t i = 0; i < type->u.str.count; i++) { 75 | rc = flatten(type->u.str.array_of_types[i], flatbuf); 76 | YAKSU_ERR_CHECK(rc, fn_fail); 77 | 78 | uintptr_t tmp; 79 | rc = yaksi_flatten_size(type->u.str.array_of_types[i], &tmp); 80 | YAKSU_ERR_CHECK(rc, fn_fail); 81 | 82 | flatbuf += tmp; 83 | } 84 | break; 85 | 86 | case YAKSI_TYPE_KIND__SUBARRAY: 87 | rc = flatten(type->u.subarray.primary, flatbuf); 88 | YAKSU_ERR_CHECK(rc, fn_fail); 89 | break; 90 | 91 | default: 92 | assert(0); 93 | } 94 | 95 | fn_exit: 96 | return rc; 97 | fn_fail: 98 | goto fn_exit; 99 | } 100 | 101 | YAKSA_API_PUBLIC int yaksa_flatten(yaksa_type_t type, void *flattened_type) 102 | { 103 | int rc = YAKSA_SUCCESS; 104 | yaksi_type_s *yaksi_type; 105 | 106 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 107 | 108 | rc = yaksi_type_get(type, &yaksi_type); 109 | YAKSU_ERR_CHECK(rc, fn_fail); 110 | 111 | rc = flatten(yaksi_type, flattened_type); 112 | YAKSU_ERR_CHECK(rc, fn_fail); 113 | 114 | fn_exit: 115 | return rc; 116 | fn_fail: 117 | goto fn_exit; 118 | } 119 | -------------------------------------------------------------------------------- /src/frontend/flatten/yaksa_flatten_size.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | int yaksi_flatten_size(yaksi_type_s * type, uintptr_t * flattened_type_size) 11 | { 12 | int rc = YAKSA_SUCCESS; 13 | uintptr_t tmp; 14 | 15 | *flattened_type_size = sizeof(yaksi_type_s); 16 | 17 | switch (type->kind) { 18 | case YAKSI_TYPE_KIND__BUILTIN: 19 | break; 20 | 21 | case YAKSI_TYPE_KIND__CONTIG: 22 | rc = yaksi_flatten_size(type->u.contig.child, &tmp); 23 | YAKSU_ERR_CHECK(rc, fn_fail); 24 | *flattened_type_size += tmp; 25 | break; 26 | 27 | case YAKSI_TYPE_KIND__DUP: 28 | rc = yaksi_flatten_size(type->u.dup.child, &tmp); 29 | YAKSU_ERR_CHECK(rc, fn_fail); 30 | *flattened_type_size += tmp; 31 | break; 32 | 33 | case YAKSI_TYPE_KIND__RESIZED: 34 | rc = yaksi_flatten_size(type->u.resized.child, &tmp); 35 | YAKSU_ERR_CHECK(rc, fn_fail); 36 | *flattened_type_size += tmp; 37 | break; 38 | 39 | case YAKSI_TYPE_KIND__HVECTOR: 40 | rc = yaksi_flatten_size(type->u.hvector.child, &tmp); 41 | YAKSU_ERR_CHECK(rc, fn_fail); 42 | *flattened_type_size += tmp; 43 | break; 44 | 45 | case YAKSI_TYPE_KIND__BLKHINDX: 46 | /* add space for array_of_displs */ 47 | *flattened_type_size += type->u.blkhindx.count * sizeof(intptr_t); 48 | 49 | rc = yaksi_flatten_size(type->u.blkhindx.child, &tmp); 50 | YAKSU_ERR_CHECK(rc, fn_fail); 51 | *flattened_type_size += tmp; 52 | break; 53 | 54 | case YAKSI_TYPE_KIND__HINDEXED: 55 | /* add space for array_of_blocklengths */ 56 | *flattened_type_size += type->u.hindexed.count * sizeof(intptr_t); 57 | /* add space for array_of_displs */ 58 | *flattened_type_size += type->u.hindexed.count * sizeof(intptr_t); 59 | 60 | rc = yaksi_flatten_size(type->u.hindexed.child, &tmp); 61 | YAKSU_ERR_CHECK(rc, fn_fail); 62 | *flattened_type_size += tmp; 63 | break; 64 | 65 | case YAKSI_TYPE_KIND__STRUCT: 66 | /* add space for array_of_blocklengths */ 67 | *flattened_type_size += type->u.str.count * sizeof(intptr_t); 68 | /* add space for array_of_displs */ 69 | *flattened_type_size += type->u.str.count * sizeof(intptr_t); 70 | 71 | for (int i = 0; i < type->u.str.count; i++) { 72 | rc = yaksi_flatten_size(type->u.str.array_of_types[i], &tmp); 73 | YAKSU_ERR_CHECK(rc, fn_fail); 74 | *flattened_type_size += tmp; 75 | } 76 | break; 77 | 78 | case YAKSI_TYPE_KIND__SUBARRAY: 79 | rc = yaksi_flatten_size(type->u.subarray.primary, &tmp); 80 | YAKSU_ERR_CHECK(rc, fn_fail); 81 | *flattened_type_size += tmp; 82 | break; 83 | 84 | default: 85 | assert(0); 86 | } 87 | 88 | fn_exit: 89 | return rc; 90 | fn_fail: 91 | goto fn_exit; 92 | } 93 | 94 | YAKSA_API_PUBLIC int yaksa_flatten_size(yaksa_type_t type, uintptr_t * flattened_type_size) 95 | { 96 | int rc = YAKSA_SUCCESS; 97 | yaksi_type_s *yaksi_type; 98 | 99 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 100 | 101 | rc = yaksi_type_get(type, &yaksi_type); 102 | YAKSU_ERR_CHECK(rc, fn_fail); 103 | 104 | rc = yaksi_flatten_size(yaksi_type, flattened_type_size); 105 | YAKSU_ERR_CHECK(rc, fn_fail); 106 | 107 | fn_exit: 108 | return rc; 109 | fn_fail: 110 | goto fn_exit; 111 | } 112 | -------------------------------------------------------------------------------- /src/frontend/include/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/include -I$(top_builddir)/src/frontend/include 7 | 8 | if EMBEDDED_BUILD 9 | noinst_HEADERS += \ 10 | src/frontend/include/yaksa.h 11 | else 12 | include_HEADERS += \ 13 | src/frontend/include/yaksa.h 14 | endif !EMBEDDED_BUILD 15 | 16 | noinst_HEADERS += \ 17 | src/frontend/include/yaksi.h 18 | -------------------------------------------------------------------------------- /src/frontend/info/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/info 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/info/yaksa_info.c 10 | -------------------------------------------------------------------------------- /src/frontend/info/yaksa_info.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | #include 11 | 12 | YAKSA_API_PUBLIC int yaksa_info_create(yaksa_info_t * info) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | yaksi_info_s *yaksi_info; 16 | 17 | yaksi_info = (yaksi_info_s *) malloc(sizeof(yaksi_info_s)); 18 | 19 | yaksu_atomic_store(&yaksi_info->refcount, 1); 20 | 21 | rc = yaksur_info_create_hook(yaksi_info); 22 | YAKSU_ERR_CHECK(rc, fn_fail); 23 | 24 | *info = yaksi_info; 25 | 26 | fn_exit: 27 | return rc; 28 | fn_fail: 29 | goto fn_exit; 30 | } 31 | 32 | YAKSA_API_PUBLIC int yaksa_info_free(yaksa_info_t info) 33 | { 34 | int rc = YAKSA_SUCCESS; 35 | yaksi_info_s *yaksi_info = (yaksi_info_s *) info; 36 | 37 | if (yaksu_atomic_decr(&yaksi_info->refcount) > 1) 38 | goto fn_exit; 39 | 40 | rc = yaksur_info_free_hook(yaksi_info); 41 | YAKSU_ERR_CHECK(rc, fn_fail); 42 | 43 | free(yaksi_info); 44 | 45 | fn_exit: 46 | return rc; 47 | fn_fail: 48 | goto fn_exit; 49 | } 50 | 51 | YAKSA_API_PUBLIC int yaksa_info_keyval_append(yaksa_info_t info, const char *key, const void *val, 52 | unsigned int vallen) 53 | { 54 | int rc = YAKSA_SUCCESS; 55 | yaksi_info_s *yaksi_info = (yaksi_info_s *) info; 56 | 57 | rc = yaksur_info_keyval_append(yaksi_info, key, val, vallen); 58 | YAKSU_ERR_CHECK(rc, fn_fail); 59 | 60 | fn_exit: 61 | return rc; 62 | fn_fail: 63 | goto fn_exit; 64 | } 65 | -------------------------------------------------------------------------------- /src/frontend/init/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/init 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/init/yaksa_init.c 10 | -------------------------------------------------------------------------------- /src/frontend/iov/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/iov 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/iov/yaksa_iov_len.c \ 10 | src/frontend/iov/yaksa_iov_len_max.c \ 11 | src/frontend/iov/yaksa_iov.c 12 | -------------------------------------------------------------------------------- /src/frontend/iov/yaksa_iov_len.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | #include 11 | 12 | int yaksi_iov_len(uintptr_t count, yaksi_type_s * type, uintptr_t * iov_len) 13 | { 14 | if (type->is_contig) { 15 | *iov_len = 1; 16 | } else { 17 | *iov_len = count * type->num_contig; 18 | } 19 | 20 | return YAKSA_SUCCESS; 21 | } 22 | 23 | YAKSA_API_PUBLIC int yaksa_iov_len(uintptr_t count, yaksa_type_t type, uintptr_t * iov_len) 24 | { 25 | yaksi_type_s *yaksi_type; 26 | int rc = YAKSA_SUCCESS; 27 | 28 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 29 | 30 | rc = yaksi_type_get(type, &yaksi_type); 31 | YAKSU_ERR_CHECK(rc, fn_fail); 32 | 33 | rc = yaksi_iov_len(count, yaksi_type, iov_len); 34 | YAKSU_ERR_CHECK(rc, fn_fail); 35 | 36 | fn_exit: 37 | return rc; 38 | fn_fail: 39 | goto fn_exit; 40 | } 41 | -------------------------------------------------------------------------------- /src/frontend/pup/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/pup 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/pup/yaksa_ipack.c \ 10 | src/frontend/pup/yaksa_pack.c \ 11 | src/frontend/pup/yaksa_iunpack.c \ 12 | src/frontend/pup/yaksa_unpack.c \ 13 | src/frontend/pup/yaksa_pack_stream.c \ 14 | src/frontend/pup/yaksa_unpack_stream.c \ 15 | src/frontend/pup/yaksa_request.c \ 16 | src/frontend/pup/yaksi_ipack.c \ 17 | src/frontend/pup/yaksi_ipack_element.c \ 18 | src/frontend/pup/yaksi_ipack_backend.c \ 19 | src/frontend/pup/yaksi_iunpack.c \ 20 | src/frontend/pup/yaksi_iunpack_element.c \ 21 | src/frontend/pup/yaksi_iunpack_backend.c \ 22 | src/frontend/pup/yaksi_request.c 23 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_ipack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | YAKSA_API_PUBLIC int yaksa_ipack(const void *inbuf, uintptr_t incount, yaksa_type_t type, 11 | uintptr_t inoffset, void *outbuf, uintptr_t max_pack_bytes, 12 | uintptr_t * actual_pack_bytes, yaksa_info_t info, yaksa_op_t op, 13 | yaksa_request_t * request) 14 | { 15 | int rc = YAKSA_SUCCESS; 16 | 17 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 18 | 19 | if (incount == 0) { 20 | *actual_pack_bytes = 0; 21 | *request = YAKSA_REQUEST__NULL; 22 | goto fn_exit; 23 | } 24 | 25 | yaksi_type_s *yaksi_type; 26 | rc = yaksi_type_get(type, &yaksi_type); 27 | YAKSU_ERR_CHECK(rc, fn_fail); 28 | 29 | if (yaksi_type->size == 0) { 30 | *actual_pack_bytes = 0; 31 | *request = YAKSA_REQUEST__NULL; 32 | goto fn_exit; 33 | } 34 | 35 | yaksi_request_s *yaksi_request; 36 | rc = yaksi_request_create(&yaksi_request); 37 | YAKSU_ERR_CHECK(rc, fn_fail); 38 | 39 | yaksi_info_s *yaksi_info; 40 | yaksi_info = (yaksi_info_s *) info; 41 | rc = yaksi_ipack(inbuf, incount, yaksi_type, inoffset, outbuf, max_pack_bytes, 42 | actual_pack_bytes, yaksi_info, op, yaksi_request); 43 | YAKSU_ERR_CHECK(rc, fn_fail); 44 | 45 | if (yaksu_atomic_load(&yaksi_request->cc)) { 46 | *request = yaksi_request->id; 47 | } else { 48 | rc = yaksi_request_free(yaksi_request); 49 | YAKSU_ERR_CHECK(rc, fn_fail); 50 | 51 | *request = YAKSA_REQUEST__NULL; 52 | } 53 | 54 | fn_exit: 55 | return rc; 56 | fn_fail: 57 | goto fn_exit; 58 | } 59 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_iunpack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | #include 11 | 12 | YAKSA_API_PUBLIC int yaksa_iunpack(const void *inbuf, uintptr_t insize, void *outbuf, 13 | uintptr_t outcount, yaksa_type_t type, uintptr_t outoffset, 14 | uintptr_t * actual_unpack_bytes, yaksa_info_t info, 15 | yaksa_op_t op, yaksa_request_t * request) 16 | { 17 | int rc = YAKSA_SUCCESS; 18 | 19 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 20 | 21 | if (outcount == 0) { 22 | *actual_unpack_bytes = 0; 23 | *request = YAKSA_REQUEST__NULL; 24 | goto fn_exit; 25 | } 26 | 27 | yaksi_type_s *yaksi_type; 28 | rc = yaksi_type_get(type, &yaksi_type); 29 | YAKSU_ERR_CHECK(rc, fn_fail); 30 | 31 | if (yaksi_type->size == 0) { 32 | *actual_unpack_bytes = 0; 33 | *request = YAKSA_REQUEST__NULL; 34 | goto fn_exit; 35 | } 36 | 37 | yaksi_request_s *yaksi_request; 38 | yaksi_request = NULL; 39 | rc = yaksi_request_create(&yaksi_request); 40 | YAKSU_ERR_CHECK(rc, fn_fail); 41 | 42 | yaksi_info_s *yaksi_info; 43 | yaksi_info = (yaksi_info_s *) info; 44 | rc = yaksi_iunpack(inbuf, insize, outbuf, outcount, yaksi_type, outoffset, actual_unpack_bytes, 45 | yaksi_info, op, yaksi_request); 46 | YAKSU_ERR_CHECK(rc, fn_fail); 47 | 48 | if (yaksu_atomic_load(&yaksi_request->cc)) { 49 | *request = yaksi_request->id; 50 | } else { 51 | rc = yaksi_request_free(yaksi_request); 52 | YAKSU_ERR_CHECK(rc, fn_fail); 53 | 54 | *request = YAKSA_REQUEST__NULL; 55 | } 56 | 57 | fn_exit: 58 | return rc; 59 | fn_fail: 60 | goto fn_exit; 61 | } 62 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_pack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | YAKSA_API_PUBLIC int yaksa_pack(const void *inbuf, uintptr_t incount, yaksa_type_t type, 11 | uintptr_t inoffset, void *outbuf, uintptr_t max_pack_bytes, 12 | uintptr_t * actual_pack_bytes, yaksa_info_t info, yaksa_op_t op) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | 16 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 17 | 18 | if (incount == 0) { 19 | *actual_pack_bytes = 0; 20 | goto fn_exit; 21 | } 22 | 23 | yaksi_type_s *yaksi_type; 24 | rc = yaksi_type_get(type, &yaksi_type); 25 | YAKSU_ERR_CHECK(rc, fn_fail); 26 | 27 | if (yaksi_type->size == 0) { 28 | *actual_pack_bytes = 0; 29 | goto fn_exit; 30 | } 31 | 32 | yaksi_request_s *yaksi_request; 33 | yaksi_request = NULL; 34 | rc = yaksi_request_create(&yaksi_request); 35 | YAKSU_ERR_CHECK(rc, fn_fail); 36 | yaksi_request_set_blocking(yaksi_request); 37 | 38 | yaksi_info_s *yaksi_info; 39 | yaksi_info = (yaksi_info_s *) info; 40 | rc = yaksi_ipack(inbuf, incount, yaksi_type, inoffset, outbuf, max_pack_bytes, 41 | actual_pack_bytes, yaksi_info, op, yaksi_request); 42 | YAKSU_ERR_CHECK(rc, fn_fail); 43 | 44 | if (yaksu_atomic_load(&yaksi_request->cc)) { 45 | rc = yaksur_request_wait(yaksi_request); 46 | YAKSU_ERR_CHECK(rc, fn_fail); 47 | } 48 | 49 | rc = yaksi_request_free(yaksi_request); 50 | YAKSU_ERR_CHECK(rc, fn_fail); 51 | 52 | fn_exit: 53 | return rc; 54 | fn_fail: 55 | goto fn_exit; 56 | } 57 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_pack_stream.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | YAKSA_API_PUBLIC int yaksa_pack_stream(const void *inbuf, uintptr_t incount, yaksa_type_t type, 11 | uintptr_t inoffset, void *outbuf, uintptr_t max_pack_bytes, 12 | uintptr_t * actual_pack_bytes, yaksa_info_t info, 13 | yaksa_op_t op, void *stream) 14 | { 15 | int rc = YAKSA_SUCCESS; 16 | 17 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 18 | 19 | if (incount == 0) { 20 | *actual_pack_bytes = 0; 21 | goto fn_exit; 22 | } 23 | 24 | yaksi_type_s *yaksi_type; 25 | rc = yaksi_type_get(type, &yaksi_type); 26 | YAKSU_ERR_CHECK(rc, fn_fail); 27 | 28 | if (yaksi_type->size == 0) { 29 | *actual_pack_bytes = 0; 30 | goto fn_exit; 31 | } 32 | 33 | yaksi_request_s *yaksi_request; 34 | rc = yaksi_request_create(&yaksi_request); 35 | YAKSU_ERR_CHECK(rc, fn_fail); 36 | yaksi_request_set_stream(yaksi_request, stream); 37 | 38 | yaksi_info_s *yaksi_info; 39 | yaksi_info = (yaksi_info_s *) info; 40 | 41 | rc = yaksi_ipack(inbuf, incount, yaksi_type, inoffset, outbuf, max_pack_bytes, 42 | actual_pack_bytes, yaksi_info, op, yaksi_request); 43 | YAKSU_ERR_CHECK(rc, fn_fail); 44 | 45 | rc = yaksi_request_free(yaksi_request); 46 | YAKSU_ERR_CHECK(rc, fn_fail); 47 | 48 | fn_exit: 49 | return rc; 50 | fn_fail: 51 | goto fn_exit; 52 | } 53 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_request.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | YAKSA_API_PUBLIC int yaksa_request_test(yaksa_request_t request, int *completed) 11 | { 12 | int rc = YAKSA_SUCCESS; 13 | 14 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 15 | 16 | if (request == YAKSA_REQUEST__NULL) { 17 | *completed = 1; 18 | goto fn_exit; 19 | } 20 | 21 | yaksi_request_s *yaksi_request; 22 | rc = yaksi_request_get(request, &yaksi_request); 23 | YAKSU_ERR_CHECK(rc, fn_fail); 24 | 25 | if (yaksu_atomic_load(&yaksi_request->cc)) { 26 | rc = yaksur_request_test(yaksi_request); 27 | YAKSU_ERR_CHECK(rc, fn_fail); 28 | } 29 | 30 | *completed = !yaksu_atomic_load(&yaksi_request->cc); 31 | 32 | if (*completed) { 33 | rc = yaksi_request_free(yaksi_request); 34 | YAKSU_ERR_CHECK(rc, fn_fail); 35 | } 36 | 37 | fn_exit: 38 | return rc; 39 | fn_fail: 40 | goto fn_exit; 41 | } 42 | 43 | YAKSA_API_PUBLIC int yaksa_request_wait(yaksa_request_t request) 44 | { 45 | int rc = YAKSA_SUCCESS; 46 | 47 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 48 | 49 | if (request == YAKSA_REQUEST__NULL) { 50 | goto fn_exit; 51 | } 52 | 53 | yaksi_request_s *yaksi_request; 54 | rc = yaksi_request_get(request, &yaksi_request); 55 | YAKSU_ERR_CHECK(rc, fn_fail); 56 | 57 | if (yaksu_atomic_load(&yaksi_request->cc)) { 58 | rc = yaksur_request_wait(yaksi_request); 59 | YAKSU_ERR_CHECK(rc, fn_fail); 60 | } 61 | 62 | assert(!yaksu_atomic_load(&yaksi_request->cc)); 63 | rc = yaksi_request_free(yaksi_request); 64 | YAKSU_ERR_CHECK(rc, fn_fail); 65 | 66 | fn_exit: 67 | return rc; 68 | fn_fail: 69 | goto fn_exit; 70 | } 71 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_unpack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | #include 11 | 12 | YAKSA_API_PUBLIC int yaksa_unpack(const void *inbuf, uintptr_t insize, void *outbuf, 13 | uintptr_t outcount, yaksa_type_t type, uintptr_t outoffset, 14 | uintptr_t * actual_unpack_bytes, yaksa_info_t info, yaksa_op_t op) 15 | { 16 | int rc = YAKSA_SUCCESS; 17 | 18 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 19 | 20 | if (outcount == 0) { 21 | *actual_unpack_bytes = 0; 22 | goto fn_exit; 23 | } 24 | 25 | yaksi_type_s *yaksi_type; 26 | rc = yaksi_type_get(type, &yaksi_type); 27 | YAKSU_ERR_CHECK(rc, fn_fail); 28 | 29 | if (yaksi_type->size == 0) { 30 | *actual_unpack_bytes = 0; 31 | goto fn_exit; 32 | } 33 | 34 | yaksi_request_s *yaksi_request; 35 | yaksi_request = NULL; 36 | rc = yaksi_request_create(&yaksi_request); 37 | YAKSU_ERR_CHECK(rc, fn_fail); 38 | yaksi_request_set_blocking(yaksi_request); 39 | 40 | yaksi_info_s *yaksi_info; 41 | yaksi_info = (yaksi_info_s *) info; 42 | rc = yaksi_iunpack(inbuf, insize, outbuf, outcount, yaksi_type, outoffset, actual_unpack_bytes, 43 | yaksi_info, op, yaksi_request); 44 | YAKSU_ERR_CHECK(rc, fn_fail); 45 | 46 | if (yaksu_atomic_load(&yaksi_request->cc)) { 47 | rc = yaksur_request_wait(yaksi_request); 48 | YAKSU_ERR_CHECK(rc, fn_fail); 49 | } 50 | 51 | rc = yaksi_request_free(yaksi_request); 52 | YAKSU_ERR_CHECK(rc, fn_fail); 53 | 54 | fn_exit: 55 | return rc; 56 | fn_fail: 57 | goto fn_exit; 58 | } 59 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksa_unpack_stream.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | YAKSA_API_PUBLIC int yaksa_unpack_stream(const void *inbuf, uintptr_t insize, void *outbuf, 11 | uintptr_t outcount, yaksa_type_t type, uintptr_t outoffset, 12 | uintptr_t * actual_unpack_bytes, yaksa_info_t info, 13 | yaksa_op_t op, void *stream) 14 | { 15 | int rc = YAKSA_SUCCESS; 16 | 17 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 18 | 19 | if (outcount == 0) { 20 | *actual_unpack_bytes = 0; 21 | goto fn_exit; 22 | } 23 | 24 | yaksi_type_s *yaksi_type; 25 | rc = yaksi_type_get(type, &yaksi_type); 26 | YAKSU_ERR_CHECK(rc, fn_fail); 27 | 28 | if (yaksi_type->size == 0) { 29 | *actual_unpack_bytes = 0; 30 | goto fn_exit; 31 | } 32 | 33 | yaksi_request_s *yaksi_request; 34 | rc = yaksi_request_create(&yaksi_request); 35 | YAKSU_ERR_CHECK(rc, fn_fail); 36 | yaksi_request_set_stream(yaksi_request, stream); 37 | 38 | yaksi_info_s *yaksi_info; 39 | yaksi_info = (yaksi_info_s *) info; 40 | 41 | rc = yaksi_iunpack(inbuf, insize, outbuf, outcount, yaksi_type, outoffset, 42 | actual_unpack_bytes, yaksi_info, op, yaksi_request); 43 | YAKSU_ERR_CHECK(rc, fn_fail); 44 | 45 | rc = yaksi_request_free(yaksi_request); 46 | YAKSU_ERR_CHECK(rc, fn_fail); 47 | 48 | fn_exit: 49 | return rc; 50 | fn_fail: 51 | goto fn_exit; 52 | } 53 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksi_ipack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | #include 11 | 12 | int yaksi_ipack(const void *inbuf, uintptr_t incount, yaksi_type_s * type, uintptr_t inoffset, 13 | void *outbuf, uintptr_t max_pack_bytes, uintptr_t * actual_pack_bytes, 14 | yaksi_info_s * info, yaksa_op_t op, yaksi_request_s * request) 15 | { 16 | int rc = YAKSA_SUCCESS; 17 | 18 | *actual_pack_bytes = 0; 19 | 20 | /* We follow these steps: 21 | * 22 | * 1. Skip the first few elements till we reach an element that 23 | * can contribute some data to our packing. 24 | * 25 | * 2. Partial pack the next element. Once packed, if either the 26 | * pack buffer is full or there's data left over in this 27 | * element, return. 28 | * 29 | * 3. Perform a full pack of the next few elements. 30 | * 31 | * 4. Partial pack the next element. Once packed, if either the 32 | * pack buffer is full or there's data left over in this 33 | * element, return. 34 | * 35 | * In the common case, we expect to execute only step 3. 36 | */ 37 | 38 | const char *sbuf = (const char *) inbuf; 39 | char *dbuf = (char *) outbuf; 40 | uintptr_t remoffset = inoffset; 41 | uintptr_t rem_pack_bytes = YAKSU_MIN(max_pack_bytes, incount * type->size - inoffset); 42 | uintptr_t tmp_pack_bytes; 43 | 44 | /* step 1: skip the first few elements */ 45 | if (remoffset) { 46 | uintptr_t skipelems = remoffset / type->size; 47 | 48 | remoffset %= type->size; 49 | sbuf += skipelems * type->extent; 50 | } 51 | 52 | 53 | /* step 2: partial pack the next element */ 54 | if (remoffset) { 55 | assert(type->size > remoffset); 56 | 57 | rc = yaksi_ipack_element(sbuf, type, remoffset, dbuf, rem_pack_bytes, &tmp_pack_bytes, 58 | info, op, request); 59 | YAKSU_ERR_CHECK(rc, fn_fail); 60 | 61 | *actual_pack_bytes += tmp_pack_bytes; 62 | rem_pack_bytes -= tmp_pack_bytes; 63 | 64 | if (rem_pack_bytes == 0) { 65 | /* if we are out of pack buffer space, return */ 66 | goto fn_exit; 67 | } else if (tmp_pack_bytes < type->size - remoffset) { 68 | /* if we could not pack all of the data in the input type 69 | * for some reason, return */ 70 | goto fn_exit; 71 | } 72 | 73 | remoffset = 0; 74 | sbuf += type->extent; 75 | dbuf += tmp_pack_bytes; 76 | } 77 | 78 | 79 | /* step 3: perform a full pack of the next few elements */ 80 | uintptr_t numelems; 81 | numelems = rem_pack_bytes / type->size; 82 | if (numelems) { 83 | rc = yaksi_ipack_backend(sbuf, dbuf, numelems, type, info, op, request); 84 | YAKSU_ERR_CHECK(rc, fn_fail); 85 | 86 | *actual_pack_bytes += numelems * type->size; 87 | rem_pack_bytes -= numelems * type->size; 88 | 89 | sbuf += numelems * type->extent; 90 | dbuf += numelems * type->size; 91 | } 92 | 93 | 94 | /* step 4: partial pack the next element */ 95 | if (rem_pack_bytes) { 96 | rc = yaksi_ipack_element(sbuf, type, remoffset, dbuf, rem_pack_bytes, &tmp_pack_bytes, 97 | info, op, request); 98 | YAKSU_ERR_CHECK(rc, fn_fail); 99 | 100 | *actual_pack_bytes += tmp_pack_bytes; 101 | } 102 | 103 | fn_exit: 104 | return rc; 105 | fn_fail: 106 | goto fn_exit; 107 | } 108 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksi_iunpack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | 11 | int yaksi_iunpack(const void *inbuf, uintptr_t insize, void *outbuf, uintptr_t outcount, 12 | yaksi_type_s * type, uintptr_t outoffset, uintptr_t * actual_unpack_bytes, 13 | yaksi_info_s * info, yaksa_op_t op, yaksi_request_s * request) 14 | { 15 | int rc = YAKSA_SUCCESS; 16 | 17 | assert(insize <= outcount * type->size - outoffset); 18 | 19 | *actual_unpack_bytes = 0; 20 | 21 | if (type->kind == YAKSI_TYPE_KIND__BUILTIN && insize < type->size) 22 | goto fn_exit; 23 | 24 | /* We follow these steps: 25 | * 26 | * 1. Skip the first few elements till we reach an element that 27 | * can contribute some data to our unpacking. 28 | * 29 | * 2. Partial unpack the next element. Once unpacked, if either 30 | * the unpack buffer is full or there's data left over in this 31 | * element, return. 32 | * 33 | * 3. Perform a full unpack of the next few elements. 34 | * 35 | * 4. Partial unpack the next element. Once unpacked, if either 36 | * the unpack buffer is full or there's data left over in this 37 | * element, return. 38 | * 39 | * In the common case, we expect to execute only step 3. 40 | */ 41 | 42 | const char *sbuf; 43 | sbuf = (const char *) inbuf; 44 | char *dbuf; 45 | dbuf = (char *) outbuf; 46 | uintptr_t remoffset; 47 | remoffset = outoffset; 48 | uintptr_t rem_unpack_bytes; 49 | rem_unpack_bytes = YAKSU_MIN(insize, outcount * type->size - outoffset); 50 | 51 | /* step 1: skip the first few elements */ 52 | if (remoffset) { 53 | uintptr_t skipelems = remoffset / type->size; 54 | 55 | remoffset %= type->size; 56 | dbuf += skipelems * type->extent; 57 | } 58 | 59 | 60 | /* step 2: partial unpack the next element */ 61 | if (remoffset) { 62 | assert(type->size > remoffset); 63 | 64 | uintptr_t tmp_unpack_bytes = YAKSU_MIN(rem_unpack_bytes, type->size - remoffset); 65 | uintptr_t tmp_actual_unpack_bytes; 66 | 67 | rc = yaksi_iunpack_element(sbuf, tmp_unpack_bytes, dbuf, type, remoffset, 68 | &tmp_actual_unpack_bytes, info, op, request); 69 | YAKSU_ERR_CHECK(rc, fn_fail); 70 | 71 | rem_unpack_bytes -= tmp_actual_unpack_bytes; 72 | *actual_unpack_bytes += tmp_actual_unpack_bytes; 73 | 74 | if (rem_unpack_bytes == 0 || tmp_actual_unpack_bytes < type->size - remoffset) { 75 | /* if we are out of unpack buffer space or if we could not 76 | * unpack fully, return */ 77 | goto fn_exit; 78 | } 79 | 80 | remoffset = 0; 81 | sbuf += tmp_unpack_bytes; 82 | dbuf += type->extent; 83 | } 84 | 85 | 86 | /* step 3: perform a full unpack of the next few elements */ 87 | uintptr_t numelems; 88 | numelems = rem_unpack_bytes / type->size; 89 | if (numelems) { 90 | rc = yaksi_iunpack_backend(sbuf, dbuf, numelems, type, info, op, request); 91 | YAKSU_ERR_CHECK(rc, fn_fail); 92 | 93 | rem_unpack_bytes -= numelems * type->size; 94 | *actual_unpack_bytes += numelems * type->size; 95 | 96 | sbuf += numelems * type->size; 97 | dbuf += numelems * type->extent; 98 | } 99 | 100 | 101 | /* step 4: partial unpack the next element */ 102 | if (rem_unpack_bytes) { 103 | uintptr_t tmp_actual_unpack_bytes; 104 | 105 | rc = yaksi_iunpack_element(sbuf, rem_unpack_bytes, dbuf, type, remoffset, 106 | &tmp_actual_unpack_bytes, info, op, request); 107 | YAKSU_ERR_CHECK(rc, fn_fail); 108 | 109 | *actual_unpack_bytes += tmp_actual_unpack_bytes; 110 | } 111 | 112 | fn_exit: 113 | return rc; 114 | fn_fail: 115 | goto fn_exit; 116 | } 117 | -------------------------------------------------------------------------------- /src/frontend/pup/yaksi_request.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | int yaksi_request_create(yaksi_request_s ** request) 11 | { 12 | int rc = YAKSA_SUCCESS; 13 | yaksi_request_s *req; 14 | 15 | req = (yaksi_request_s *) malloc(sizeof(yaksi_request_s)); 16 | YAKSU_ERR_CHKANDJUMP(!req, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 17 | 18 | rc = yaksu_handle_pool_elem_alloc(yaksi_global.request_handle_pool, &req->id, req); 19 | YAKSU_ERR_CHECK(rc, fn_fail); 20 | 21 | assert(req->id < ((yaksa_request_t) 1 << YAKSI_REQUEST_OBJECT_ID_BITS)); 22 | 23 | yaksu_atomic_store(&req->cc, 0); 24 | req->kind = YAKSI_REQUEST_KIND__NONBLOCKING; 25 | req->always_query_ptr_attr = false; 26 | 27 | rc = yaksur_request_create_hook(req); 28 | YAKSU_ERR_CHECK(rc, fn_fail); 29 | 30 | *request = req; 31 | 32 | fn_exit: 33 | return rc; 34 | fn_fail: 35 | goto fn_exit; 36 | } 37 | 38 | int yaksi_request_free(yaksi_request_s * request) 39 | { 40 | int rc = YAKSA_SUCCESS; 41 | 42 | rc = yaksur_request_free_hook(request); 43 | YAKSU_ERR_CHECK(rc, fn_fail); 44 | 45 | rc = yaksu_handle_pool_elem_free(yaksi_global.request_handle_pool, request->id); 46 | YAKSU_ERR_CHECK(rc, fn_fail); 47 | 48 | free(request); 49 | 50 | fn_exit: 51 | return rc; 52 | fn_fail: 53 | goto fn_exit; 54 | } 55 | 56 | int yaksi_request_get(yaksa_request_t request, struct yaksi_request_s **yaksi_request) 57 | { 58 | int rc = YAKSA_SUCCESS; 59 | yaksu_handle_t id = YAKSI_REQUEST_GET_OBJECT_ID(request); 60 | 61 | rc = yaksu_handle_pool_elem_get(yaksi_global.request_handle_pool, id, 62 | (const void **) yaksi_request); 63 | YAKSU_ERR_CHECK(rc, fn_fail); 64 | 65 | fn_exit: 66 | return rc; 67 | fn_fail: 68 | goto fn_exit; 69 | } 70 | 71 | void yaksi_request_set_blocking(yaksi_request_s * request) 72 | { 73 | request->kind = YAKSI_REQUEST_KIND__BLOCKING; 74 | } 75 | 76 | void yaksi_request_set_stream(yaksi_request_s * request, void *stream) 77 | { 78 | /* We assume the stream pointer points to cudaStream_t or hipStream_t, i.e. 79 | * the stream type dictated by the corresponding gpu driver id */ 80 | request->kind = YAKSI_REQUEST_KIND__GPU_STREAM; 81 | request->stream = stream; 82 | } 83 | -------------------------------------------------------------------------------- /src/frontend/types/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/frontend/types 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/frontend/types/yaksa_vector.c \ 10 | src/frontend/types/yaksa_contig.c \ 11 | src/frontend/types/yaksa_dup.c \ 12 | src/frontend/types/yaksa_resized.c \ 13 | src/frontend/types/yaksa_blkindx.c \ 14 | src/frontend/types/yaksa_indexed.c \ 15 | src/frontend/types/yaksa_subarray.c \ 16 | src/frontend/types/yaksa_struct.c \ 17 | src/frontend/types/yaksa_free.c \ 18 | src/frontend/types/yaksi_type.c 19 | -------------------------------------------------------------------------------- /src/frontend/types/yaksa_contig.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | 11 | int yaksi_type_create_contig(intptr_t count, yaksi_type_s * intype, yaksi_type_s ** newtype) 12 | { 13 | int rc = YAKSA_SUCCESS; 14 | 15 | /* shortcut for dup types */ 16 | if (count == 1) { 17 | rc = yaksi_type_create_dup(intype, newtype); 18 | YAKSU_ERR_CHECK(rc, fn_fail); 19 | goto fn_exit; 20 | } 21 | 22 | yaksi_type_s *outtype; 23 | outtype = (yaksi_type_s *) malloc(sizeof(yaksi_type_s)); 24 | YAKSU_ERR_CHKANDJUMP(!outtype, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 25 | yaksu_atomic_store(&outtype->refcount, 1); 26 | 27 | yaksu_atomic_incr(&intype->refcount); 28 | 29 | outtype->kind = YAKSI_TYPE_KIND__CONTIG; 30 | outtype->tree_depth = intype->tree_depth + 1; 31 | outtype->size = intype->size * count; 32 | outtype->alignment = intype->alignment; 33 | 34 | if (intype->extent > 0) { 35 | outtype->lb = intype->lb; 36 | outtype->ub = intype->ub + (count - 1) * intype->extent; 37 | } else { 38 | outtype->lb = intype->lb + (count - 1) * intype->extent; 39 | outtype->ub = intype->ub; 40 | } 41 | 42 | outtype->true_lb = outtype->lb + intype->true_lb - intype->lb; 43 | outtype->true_ub = outtype->ub - intype->ub + intype->true_ub; 44 | outtype->extent = outtype->ub - outtype->lb; 45 | 46 | /* detect if the outtype is contiguous */ 47 | outtype->is_contig = intype->is_contig; 48 | 49 | if (outtype->is_contig) { 50 | outtype->num_contig = 1; 51 | } else { 52 | outtype->num_contig = count * intype->num_contig; 53 | } 54 | 55 | outtype->u.contig.count = count; 56 | outtype->u.contig.child = intype; 57 | 58 | rc = yaksur_type_create_hook(outtype); 59 | YAKSU_ERR_CHECK(rc, fn_fail); 60 | *newtype = outtype; 61 | 62 | fn_exit: 63 | return rc; 64 | fn_fail: 65 | goto fn_exit; 66 | } 67 | 68 | YAKSA_API_PUBLIC int yaksa_type_create_contig(intptr_t count, yaksa_type_t oldtype, 69 | yaksa_info_t info, yaksa_type_t * newtype) 70 | { 71 | int rc = YAKSA_SUCCESS; 72 | 73 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 74 | 75 | yaksi_type_s *intype; 76 | rc = yaksi_type_get(oldtype, &intype); 77 | YAKSU_ERR_CHECK(rc, fn_fail); 78 | 79 | if (count * intype->size == 0) { 80 | *newtype = YAKSA_TYPE__NULL; 81 | goto fn_exit; 82 | } 83 | 84 | yaksi_type_s *outtype; 85 | rc = yaksi_type_create_contig(count, intype, &outtype); 86 | YAKSU_ERR_CHECK(rc, fn_fail); 87 | 88 | rc = yaksi_type_handle_alloc(outtype, newtype); 89 | YAKSU_ERR_CHECK(rc, fn_fail); 90 | 91 | fn_exit: 92 | return rc; 93 | fn_fail: 94 | goto fn_exit; 95 | } 96 | -------------------------------------------------------------------------------- /src/frontend/types/yaksa_dup.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | 11 | int yaksi_type_create_dup(yaksi_type_s * intype, yaksi_type_s ** newtype) 12 | { 13 | int rc = YAKSA_SUCCESS; 14 | 15 | yaksu_atomic_incr(&intype->refcount); 16 | *newtype = intype; 17 | 18 | return rc; 19 | } 20 | 21 | YAKSA_API_PUBLIC int yaksa_type_create_dup(yaksa_type_t oldtype, yaksa_info_t info, 22 | yaksa_type_t * newtype) 23 | { 24 | int rc = YAKSA_SUCCESS; 25 | 26 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 27 | 28 | yaksi_type_s *intype; 29 | rc = yaksi_type_get(oldtype, &intype); 30 | YAKSU_ERR_CHECK(rc, fn_fail); 31 | 32 | yaksi_type_s *outtype; 33 | rc = yaksi_type_create_dup(intype, &outtype); 34 | YAKSU_ERR_CHECK(rc, fn_fail); 35 | 36 | rc = yaksi_type_handle_alloc(outtype, newtype); 37 | YAKSU_ERR_CHECK(rc, fn_fail); 38 | 39 | fn_exit: 40 | return rc; 41 | fn_fail: 42 | goto fn_exit; 43 | } 44 | -------------------------------------------------------------------------------- /src/frontend/types/yaksa_free.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksa.h" 7 | #include "yaksu.h" 8 | #include "yaksi.h" 9 | #include 10 | #include 11 | 12 | int yaksi_type_free(yaksi_type_s * type) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | 16 | int ret = yaksu_atomic_decr(&type->refcount); 17 | assert(ret >= 1); 18 | 19 | if (ret > 1) { 20 | goto fn_exit; 21 | } 22 | 23 | rc = yaksur_type_free_hook(type); 24 | YAKSU_ERR_CHECK(rc, fn_fail); 25 | 26 | /* free the child types */ 27 | switch (type->kind) { 28 | case YAKSI_TYPE_KIND__CONTIG: 29 | rc = yaksi_type_free(type->u.contig.child); 30 | YAKSU_ERR_CHECK(rc, fn_fail); 31 | break; 32 | 33 | case YAKSI_TYPE_KIND__DUP: 34 | rc = yaksi_type_free(type->u.dup.child); 35 | YAKSU_ERR_CHECK(rc, fn_fail); 36 | break; 37 | 38 | case YAKSI_TYPE_KIND__RESIZED: 39 | rc = yaksi_type_free(type->u.resized.child); 40 | YAKSU_ERR_CHECK(rc, fn_fail); 41 | break; 42 | 43 | case YAKSI_TYPE_KIND__HVECTOR: 44 | rc = yaksi_type_free(type->u.hvector.child); 45 | YAKSU_ERR_CHECK(rc, fn_fail); 46 | break; 47 | 48 | case YAKSI_TYPE_KIND__BLKHINDX: 49 | rc = yaksi_type_free(type->u.blkhindx.child); 50 | YAKSU_ERR_CHECK(rc, fn_fail); 51 | free(type->u.blkhindx.array_of_displs); 52 | break; 53 | 54 | case YAKSI_TYPE_KIND__HINDEXED: 55 | rc = yaksi_type_free(type->u.hindexed.child); 56 | YAKSU_ERR_CHECK(rc, fn_fail); 57 | free(type->u.hindexed.array_of_blocklengths); 58 | free(type->u.hindexed.array_of_displs); 59 | break; 60 | 61 | case YAKSI_TYPE_KIND__STRUCT: 62 | for (int i = 0; i < type->u.str.count; i++) { 63 | rc = yaksi_type_free(type->u.str.array_of_types[i]); 64 | YAKSU_ERR_CHECK(rc, fn_fail); 65 | } 66 | free(type->u.str.array_of_types); 67 | free(type->u.str.array_of_blocklengths); 68 | free(type->u.str.array_of_displs); 69 | break; 70 | 71 | case YAKSI_TYPE_KIND__SUBARRAY: 72 | rc = yaksi_type_free(type->u.subarray.primary); 73 | YAKSU_ERR_CHECK(rc, fn_fail); 74 | break; 75 | 76 | default: 77 | break; 78 | } 79 | 80 | free(type); 81 | 82 | fn_exit: 83 | return rc; 84 | fn_fail: 85 | goto fn_exit; 86 | } 87 | 88 | YAKSA_API_PUBLIC int yaksa_type_free(yaksa_type_t type) 89 | { 90 | yaksi_type_s *yaksi_type; 91 | int rc = YAKSA_SUCCESS; 92 | 93 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 94 | 95 | if (type == YAKSA_TYPE__NULL) 96 | goto fn_exit; 97 | 98 | yaksu_handle_t id; 99 | id = (yaksu_handle_t) type; 100 | rc = yaksi_type_handle_dealloc(id, &yaksi_type); 101 | YAKSU_ERR_CHECK(rc, fn_fail); 102 | 103 | rc = yaksi_type_free(yaksi_type); 104 | YAKSU_ERR_CHECK(rc, fn_fail); 105 | 106 | fn_exit: 107 | return rc; 108 | fn_fail: 109 | goto fn_exit; 110 | } 111 | -------------------------------------------------------------------------------- /src/frontend/types/yaksa_resized.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | #include 10 | 11 | int yaksi_type_create_resized(yaksi_type_s * intype, intptr_t lb, intptr_t extent, 12 | yaksi_type_s ** newtype) 13 | { 14 | int rc = YAKSA_SUCCESS; 15 | 16 | if (lb == intype->lb && extent == intype->extent) { 17 | rc = yaksi_type_create_dup(intype, newtype); 18 | YAKSU_ERR_CHECK(rc, fn_fail); 19 | goto fn_exit; 20 | } 21 | 22 | yaksi_type_s *outtype; 23 | outtype = (yaksi_type_s *) malloc(sizeof(yaksi_type_s)); 24 | YAKSU_ERR_CHKANDJUMP(!outtype, rc, YAKSA_ERR__OUT_OF_MEM, fn_fail); 25 | yaksu_atomic_store(&outtype->refcount, 1); 26 | 27 | yaksu_atomic_incr(&intype->refcount); 28 | 29 | outtype->kind = YAKSI_TYPE_KIND__RESIZED; 30 | outtype->tree_depth = intype->tree_depth + 1; 31 | outtype->size = intype->size; 32 | outtype->alignment = intype->alignment; 33 | 34 | outtype->lb = lb; 35 | outtype->ub = lb + extent; 36 | outtype->true_lb = intype->true_lb; 37 | outtype->true_ub = intype->true_ub; 38 | outtype->extent = outtype->ub - outtype->lb; 39 | 40 | /* detect if the outtype is contiguous */ 41 | if (intype->is_contig && ((outtype->ub - outtype->lb) == outtype->size)) { 42 | outtype->is_contig = true; 43 | } else { 44 | outtype->is_contig = false; 45 | } 46 | 47 | outtype->num_contig = intype->num_contig; 48 | 49 | outtype->u.resized.child = intype; 50 | 51 | rc = yaksur_type_create_hook(outtype); 52 | YAKSU_ERR_CHECK(rc, fn_fail); 53 | *newtype = outtype; 54 | 55 | fn_exit: 56 | return rc; 57 | fn_fail: 58 | goto fn_exit; 59 | } 60 | 61 | YAKSA_API_PUBLIC int yaksa_type_create_resized(yaksa_type_t oldtype, intptr_t lb, intptr_t extent, 62 | yaksa_info_t info, yaksa_type_t * newtype) 63 | { 64 | int rc = YAKSA_SUCCESS; 65 | 66 | assert(yaksu_atomic_load(&yaksi_is_initialized)); 67 | 68 | yaksi_type_s *intype; 69 | rc = yaksi_type_get(oldtype, &intype); 70 | YAKSU_ERR_CHECK(rc, fn_fail); 71 | 72 | yaksi_type_s *outtype; 73 | rc = yaksi_type_create_resized(intype, lb, extent, &outtype); 74 | YAKSU_ERR_CHECK(rc, fn_fail); 75 | 76 | rc = yaksi_type_handle_alloc(outtype, newtype); 77 | YAKSU_ERR_CHECK(rc, fn_fail); 78 | 79 | fn_exit: 80 | return rc; 81 | fn_fail: 82 | goto fn_exit; 83 | } 84 | -------------------------------------------------------------------------------- /src/frontend/types/yaksi_type.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksi.h" 7 | #include "yaksu.h" 8 | #include 9 | 10 | int yaksi_type_handle_alloc(yaksi_type_s * type, yaksa_type_t * handle) 11 | { 12 | int rc = YAKSA_SUCCESS; 13 | yaksu_handle_t id; 14 | 15 | rc = yaksu_handle_pool_elem_alloc(yaksi_global.type_handle_pool, &id, type); 16 | YAKSU_ERR_CHECK(rc, fn_fail); 17 | 18 | *handle = 0; 19 | YAKSI_TYPE_SET_OBJECT_ID(*handle, id); 20 | 21 | fn_exit: 22 | return rc; 23 | fn_fail: 24 | goto fn_exit; 25 | } 26 | 27 | int yaksi_type_handle_dealloc(yaksa_type_t handle, yaksi_type_s ** type) 28 | { 29 | int rc = YAKSA_SUCCESS; 30 | yaksu_handle_t id = YAKSI_TYPE_GET_OBJECT_ID(handle); 31 | 32 | rc = yaksu_handle_pool_elem_get(yaksi_global.type_handle_pool, id, (const void **) type); 33 | YAKSU_ERR_CHECK(rc, fn_fail); 34 | 35 | rc = yaksu_handle_pool_elem_free(yaksi_global.type_handle_pool, id); 36 | YAKSU_ERR_CHECK(rc, fn_fail); 37 | 38 | fn_exit: 39 | return rc; 40 | fn_fail: 41 | goto fn_exit; 42 | } 43 | 44 | int yaksi_type_get(yaksa_type_t handle, yaksi_type_s ** type) 45 | { 46 | int rc = YAKSA_SUCCESS; 47 | yaksu_handle_t id = YAKSI_TYPE_GET_OBJECT_ID(handle); 48 | 49 | rc = yaksu_handle_pool_elem_get(yaksi_global.type_handle_pool, id, (const void **) type); 50 | YAKSU_ERR_CHECK(rc, fn_fail); 51 | 52 | fn_exit: 53 | return rc; 54 | fn_fail: 55 | goto fn_exit; 56 | } 57 | -------------------------------------------------------------------------------- /src/util/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | AM_CPPFLAGS += -I$(top_srcdir)/src/util 7 | 8 | libyaksa_la_SOURCES += \ 9 | src/util/yaksu_buffer_pool.c \ 10 | src/util/yaksu_handle_pool.c 11 | 12 | noinst_HEADERS += \ 13 | src/util/yaksu.h \ 14 | src/util/yaksu_base.h \ 15 | src/util/yaksu_atomics.h \ 16 | src/util/yaksu_buffer_pool.h \ 17 | src/util/yaksu_handle_pool.h 18 | 19 | if !HAVE_C11_ATOMICS 20 | libyaksa_la_SOURCES += \ 21 | src/util/yaksu_atomics.c 22 | endif !HAVE_C11_ATOMICS 23 | -------------------------------------------------------------------------------- /src/util/yaksu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSU_H_INCLUDED 7 | #define YAKSU_H_INCLUDED 8 | 9 | #include "yaksu_base.h" 10 | #include "yaksu_atomics.h" 11 | #include "yaksu_buffer_pool.h" 12 | #include "yaksu_handle_pool.h" 13 | 14 | #endif /* YAKSU_H_INCLUDED */ 15 | -------------------------------------------------------------------------------- /src/util/yaksu_atomics.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | 8 | pthread_mutex_t yaksui_atomic_mutex = PTHREAD_MUTEX_INITIALIZER; 9 | -------------------------------------------------------------------------------- /src/util/yaksu_atomics.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSU_ATOMICS_H_INCLUDED 7 | #define YAKSU_ATOMICS_H_INCLUDED 8 | 9 | #include 10 | 11 | #ifdef HAVE_C11_ATOMICS 12 | 13 | #include 14 | 15 | #define YAKSU_ATOMIC_VAR_INIT ATOMIC_VAR_INIT 16 | typedef atomic_int yaksu_atomic_int; 17 | 18 | static inline int yaksu_atomic_incr(yaksu_atomic_int * val) 19 | { 20 | return atomic_fetch_add(val, 1); 21 | } 22 | 23 | static inline int yaksu_atomic_decr(yaksu_atomic_int * val) 24 | { 25 | return atomic_fetch_sub(val, 1); 26 | } 27 | 28 | static inline int yaksu_atomic_load(yaksu_atomic_int * val) 29 | { 30 | return atomic_load_explicit(val, memory_order_acquire); 31 | } 32 | 33 | static inline void yaksu_atomic_store(yaksu_atomic_int * val, int x) 34 | { 35 | atomic_store_explicit(val, x, memory_order_release); 36 | } 37 | 38 | #else 39 | 40 | #include 41 | 42 | #define YAKSU_ATOMIC_VAR_INIT(x) x 43 | extern pthread_mutex_t yaksui_atomic_mutex; 44 | typedef int yaksu_atomic_int; 45 | 46 | static inline int yaksu_atomic_incr(yaksu_atomic_int * val) 47 | { 48 | pthread_mutex_lock(&yaksui_atomic_mutex); 49 | int ret = (*val)++; 50 | pthread_mutex_unlock(&yaksui_atomic_mutex); 51 | 52 | return ret; 53 | } 54 | 55 | static inline int yaksu_atomic_decr(yaksu_atomic_int * val) 56 | { 57 | pthread_mutex_lock(&yaksui_atomic_mutex); 58 | int ret = (*val)--; 59 | pthread_mutex_unlock(&yaksui_atomic_mutex); 60 | 61 | return ret; 62 | } 63 | 64 | static inline int yaksu_atomic_load(yaksu_atomic_int * val) 65 | { 66 | pthread_mutex_lock(&yaksui_atomic_mutex); 67 | int ret = (*val); 68 | pthread_mutex_unlock(&yaksui_atomic_mutex); 69 | 70 | return ret; 71 | } 72 | 73 | static inline void yaksu_atomic_store(yaksu_atomic_int * val, int x) 74 | { 75 | pthread_mutex_lock(&yaksui_atomic_mutex); 76 | *val = x; 77 | pthread_mutex_unlock(&yaksui_atomic_mutex); 78 | } 79 | 80 | #endif 81 | 82 | #endif /* YAKSU_THREADS_H_INCLUDED */ 83 | -------------------------------------------------------------------------------- /src/util/yaksu_base.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSU_BASE_H_INCLUDED 7 | #define YAKSU_BASE_H_INCLUDED 8 | 9 | #define YAKSU_MAX(x, y) ((x) > (y) ? (x) : (y)) 10 | #define YAKSU_MIN(x, y) ((x) < (y) ? (x) : (y)) 11 | #define YAKSU_CEIL(x, y) (((x) / (y)) + !!((x) % (y))) 12 | 13 | #define YAKSU_ERR_CHKANDJUMP(check, rc, errcode, label) \ 14 | do { \ 15 | if ((check)) { \ 16 | (rc) = (errcode); \ 17 | goto label; \ 18 | } \ 19 | } while (0) 20 | 21 | #define YAKSU_ERR_CHECK(rc, label) \ 22 | do { \ 23 | if (rc != YAKSA_SUCCESS) \ 24 | goto label; \ 25 | } while (0) 26 | 27 | #endif /* YAKSU_BASE_H_INCLUDED */ 28 | -------------------------------------------------------------------------------- /src/util/yaksu_buffer_pool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSU_BUFFER_POOL_H_INCLUDED 7 | #define YAKSU_BUFFER_POOL_H_INCLUDED 8 | 9 | typedef void *yaksu_buffer_pool_s; 10 | 11 | typedef void *(*yaksu_malloc_fn) (uintptr_t size, void *state); 12 | typedef void (*yaksu_free_fn) (void *buf, void *state); 13 | 14 | int yaksu_buffer_pool_alloc(uintptr_t elemsize, unsigned int elems_in_chunk, unsigned int maxelems, 15 | yaksu_malloc_fn malloc_fn, yaksu_free_fn free_fn, void *state, 16 | yaksu_buffer_pool_s * pool); 17 | int yaksu_buffer_pool_free(yaksu_buffer_pool_s pool); 18 | int yaksu_buffer_pool_elem_alloc(yaksu_buffer_pool_s pool, void **elem); 19 | int yaksu_buffer_pool_elem_free(yaksu_buffer_pool_s pool, void *elem); 20 | 21 | #endif /* YAKSU_BUFFER_POOL_H_INCLUDED */ 22 | -------------------------------------------------------------------------------- /src/util/yaksu_handle_pool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef YAKSU_HANDLE_POOL_H_INCLUDED 7 | #define YAKSU_HANDLE_POOL_H_INCLUDED 8 | 9 | typedef void *yaksu_handle_pool_s; 10 | typedef uint64_t yaksu_handle_t; 11 | 12 | int yaksu_handle_pool_alloc(yaksu_handle_pool_s * pool); 13 | int yaksu_handle_pool_free(yaksu_handle_pool_s pool); 14 | int yaksu_handle_pool_elem_alloc(yaksu_handle_pool_s pool, yaksu_handle_t * handle, 15 | void *data); 16 | int yaksu_handle_pool_elem_free(yaksu_handle_pool_s pool, yaksu_handle_t handle); 17 | int yaksu_handle_pool_elem_get(yaksu_handle_pool_s pool, yaksu_handle_t handle, const void **data); 18 | 19 | #endif /* YAKSU_HANDLE_POOL_H_INCLUDED */ 20 | -------------------------------------------------------------------------------- /test/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | EXTRA_DIST += $(top_srcdir)/test/runtests.py 7 | 8 | LDADD = test/dtpools/libdtpools.la libyaksa.la 9 | test_cppflags = -I$(build_dir)/src/frontend/include -I$(srcdir)/test/dtpools/src 10 | 11 | include $(top_srcdir)/test/dtpools/Makefile.mk 12 | 13 | include $(top_srcdir)/test/simple/Makefile.mk 14 | include $(top_srcdir)/test/pack/Makefile.mk 15 | include $(top_srcdir)/test/iov/Makefile.mk 16 | include $(top_srcdir)/test/flatten/Makefile.mk 17 | 18 | CLEANFILES = $(EXTRA_PROGRAMS) 19 | -------------------------------------------------------------------------------- /test/dtpools/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | EXTRA_LTLIBRARIES = test/dtpools/libdtpools.la 7 | 8 | test_dtpools_libdtpools_la_CPPFLAGS = $(test_cppflags) 9 | 10 | test_dtpools_libdtpools_la_SOURCES = \ 11 | test/dtpools/src/dtpools.c \ 12 | test/dtpools/src/dtpools_custom.c \ 13 | test/dtpools/src/dtpools_attr.c \ 14 | test/dtpools/src/dtpools_desc.c \ 15 | test/dtpools/src/dtpools_init_verify.c \ 16 | test/dtpools/src/dtpools_misc.c 17 | 18 | noinst_HEADERS += \ 19 | test/dtpools/src/dtpools.h \ 20 | test/dtpools/src/dtpools_internal.h 21 | -------------------------------------------------------------------------------- /test/dtpools/README: -------------------------------------------------------------------------------- 1 | DTPools Release 2.0 2 | 3 | DTPools is a datatype library used to test yaksa routines with 4 | different datatype combinations. DTPools' interface is used to create pools 5 | of datatypes, each having a specified signature (i.e., native type + count). 6 | Every pool supports different datatype layouts (defined internally by the 7 | library). For a list of the available layouts, go to section: "4. Supported 8 | Derived Datatype layouts". 9 | 10 | This README is organized as follows: 11 | 12 | 1. DTPools API 13 | 2. Testing with DTPools 14 | 3. TODOs 15 | 4. Environment variables 16 | 17 | ---------------------------------------------------------------------------- 18 | 19 | 1. DTPools API 20 | ============== 21 | 22 | Follows a list of DTPools interfaces used for datatype testing: 23 | 24 | * int DTP_pool_create(const char *base_type_str, int base_type_count, int seed, DTP_pool_s *dtp) 25 | Create a new basic pool with defined datatype signature. 26 | - base_type_str: base type to use (e.g., YAKSA_TYPE__INT or YAKSA_TYPE__INT:4,YAKSA_TYPE__FLOAT:2) 27 | - base_type_count: number of base type elements in the pool signature 28 | - seed: seed for randomly generating objects 29 | - dtp: datatype pool object 30 | 31 | * int DTP_pool_free(DTP_pool_s dtp) 32 | Free a previously created datatype pool. 33 | - dtp: datatype pool object 34 | 35 | * int DTP_obj_create(DTP_pool_s dtp, DTP_obj_s *obj, uintptr_t maxbufsize) 36 | Create a datatype object inside the specified pool. 37 | - dtp: datatype pool object 38 | - obj: Created object 39 | - maxbufsize: Maximum buffer size that an object can use 40 | 41 | * int DTP_obj_free(DTP_obj_s obj) 42 | Free a previously created datatype object inside the specified pool. 43 | - obj: object to be freed 44 | 45 | * int DTP_obj_buf_alloc(DTP_obj_s obj) 46 | - obj: object for which the buffer needs to be allocated 47 | 48 | * int DTP_obj_buf_free(DTP_obj_s obj) 49 | - obj: object for which the buffer needs to be freed 50 | 51 | * int DTP_obj_buf_init(DTP_obj_s obj, int val_start, int val_stride, int val_count) 52 | Initialize the buffer elements using start, stride and count. 53 | - obj: DTP object 54 | - val_start: start of initialization value for buffer 55 | - val_stride: increment for next element in buffer 56 | - val_count: total number of elements to be initialized in buffer 57 | 58 | * int DTP_obj_buf_check(DTP_obj_s obj, int val_start, int val_stride, int val_count) 59 | Checks whether the received buffer (used in communication routine) matches the sent buffer. 60 | - obj: object to be checked 61 | - val_start: start of checking value for buffer at index obj_idx 62 | - val_stride: increment for next checked element in buffer 63 | - val_count: total number of elements to be checked in buffer 64 | 65 | 66 | ---------------------------------------------------------------------------- 67 | 68 | 2. TODOs 69 | ======== 70 | 71 | 1. Allow a tree for structures, not just a linear list 72 | 73 | ---------------------------------------------------------------------------- 74 | 75 | 3. Environment Variables 76 | ======================== 77 | 78 | 1. DTP_MAX_BUFSIZE: Sets the maximum buffer size that can be allocated 79 | for an object. DTPools will search for an object with a smaller 80 | buffer size for a few iterations, and give up if it cannot find such 81 | an object. 82 | 83 | 2. DTP_MAX_TREE_DEPTH: Sets the maximum datatype tree depth that an 84 | object can have. 85 | -------------------------------------------------------------------------------- /test/dtpools/src/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | -------------------------------------------------------------------------------- /test/dtpools/src/dtpools.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef DTPOOLS_H_INCLUDED 7 | #define DTPOOLS_H_INCLUDED 8 | 9 | /* errors */ 10 | #define DTP_SUCCESS (0) 11 | #define DTP_ERR_ARG (-1) 12 | #define DTP_ERR_OUT_OF_RESOURCES (-2) 13 | #define DTP_ERR_MPI (-3) 14 | #define DTP_ERR_OTHER (-4) 15 | 16 | typedef struct { 17 | yaksa_type_t DTP_datatype; 18 | uintptr_t DTP_type_count; 19 | 20 | uintptr_t DTP_bufsize; 21 | uintptr_t DTP_buf_offset; 22 | 23 | void *priv; 24 | } DTP_obj_s; 25 | 26 | typedef struct { 27 | yaksa_type_t DTP_base_type; 28 | 29 | void *priv; 30 | } DTP_pool_s; 31 | 32 | /* DTP manipulation functions */ 33 | int DTP_pool_create(const char *basic_type_str, uintptr_t basic_type_count, int seed, 34 | DTP_pool_s * dtp); 35 | int DTP_pool_free(DTP_pool_s dtp); 36 | 37 | int DTP_obj_create(DTP_pool_s dtp, DTP_obj_s * obj, uintptr_t maxbufsize); 38 | int DTP_obj_create_custom(DTP_pool_s dtp, DTP_obj_s * obj, const char *desc); 39 | int DTP_obj_free(DTP_obj_s obj); 40 | int DTP_obj_get_description(DTP_obj_s obj, char **desc); 41 | 42 | int DTP_obj_buf_init(DTP_obj_s obj, void *buf, int val_start, int val_stride, uintptr_t val_count); 43 | int DTP_obj_buf_check(DTP_obj_s obj, void *buf, int val_start, int val_stride, uintptr_t val_count); 44 | 45 | #endif /* DTPOOLS_H_INCLUDED */ 46 | -------------------------------------------------------------------------------- /test/flatten/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | flatten_testlists = $(top_srcdir)/test/flatten/testlist.gen \ 7 | $(top_srcdir)/test/flatten/testlist.threads.gen 8 | 9 | testlists += $(flatten_testlists) 10 | EXTRA_DIST += $(flatten_testlists) 11 | 12 | EXTRA_PROGRAMS += \ 13 | test/flatten/flatten 14 | 15 | test_flatten_flatten_CPPFLAGS = $(test_cppflags) 16 | 17 | test-flatten: 18 | @$(top_srcdir)/test/runtests.py --summary=$(top_builddir)/test/flatten/summary.junit.xml \ 19 | $(flatten_testlists) 20 | -------------------------------------------------------------------------------- /test/iov/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | iov_testlists = $(top_srcdir)/test/iov/testlist.gen \ 7 | $(top_srcdir)/test/iov/testlist.threads.gen 8 | 9 | testlists += $(iov_testlists) 10 | EXTRA_DIST += $(iov_testlists) 11 | 12 | EXTRA_PROGRAMS += \ 13 | test/iov/iov 14 | 15 | test_iov_iov_CPPFLAGS = $(test_cppflags) 16 | 17 | test-iov: 18 | @$(top_srcdir)/test/runtests.py --summary=$(top_builddir)/test/iov/summary.junit.xml \ 19 | $(iov_testlists) 20 | -------------------------------------------------------------------------------- /test/pack/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | pack_testlists = $(top_srcdir)/test/pack/testlist.gen \ 7 | $(top_srcdir)/test/pack/testlist.threads.gen \ 8 | $(top_srcdir)/test/pack/testlist.blocking.gen 9 | 10 | if BUILD_CUDA_BACKEND 11 | pack_testlists += $(top_srcdir)/test/pack/testlist.stream.gen 12 | endif 13 | 14 | EXTRA_DIST += $(top_srcdir)/test/pack/testlist.gen \ 15 | $(top_srcdir)/test/pack/testlist.threads.gen \ 16 | $(top_srcdir)/test/pack/testlist.blocking.gen \ 17 | $(top_srcdir)/test/pack/testlist.stream.gen 18 | 19 | EXTRA_PROGRAMS += \ 20 | test/pack/pack 21 | 22 | test_pack_pack_CPPFLAGS = $(test_cppflags) 23 | 24 | common_files = test/pack/pack-common.c \ 25 | test/pack/pack-cuda.c \ 26 | test/pack/pack-ze.c \ 27 | test/pack/pack-hip.c 28 | 29 | test_pack_pack_SOURCES = test/pack/pack.c ${common_files} 30 | 31 | testlists += $(pack_testlists) 32 | -------------------------------------------------------------------------------- /test/pack/pack-common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #ifndef PACK_COMMON_H 7 | #define PACK_COMMON_H 8 | 9 | #include 10 | #include 11 | #include 12 | #include "yaksa_config.h" 13 | #include "yaksa.h" 14 | #include "dtpools.h" 15 | 16 | /* *INDENT-OFF* */ 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | /* *INDENT-ON* */ 21 | 22 | typedef enum { 23 | MEM_TYPE__UNREGISTERED_HOST = 0, 24 | MEM_TYPE__REGISTERED_HOST, 25 | MEM_TYPE__MANAGED, 26 | MEM_TYPE__DEVICE, 27 | MEM_TYPE__NUM_MEMTYPES, 28 | } mem_type_e; 29 | 30 | extern const char *memtype_str[MEM_TYPE__NUM_MEMTYPES]; 31 | 32 | int pack_get_ndevices(void); 33 | void pack_init_devices(int num_threads); 34 | void pack_finalize_devices(void); 35 | void pack_alloc_mem(int device_id, size_t size, mem_type_e type, void **hostbuf, void **devicebuf); 36 | void pack_free_mem(mem_type_e type, void *hostbuf, void *devicebuf); 37 | void pack_get_ptr_attr(const void *inbuf, void *outbuf, yaksa_info_t * info, int iter); 38 | void pack_copy_content(int tid, const void *sbuf, void *dbuf, size_t size, mem_type_e type); 39 | void *pack_create_stream(void); 40 | void pack_destroy_stream(void *stream); 41 | void pack_stream_synchronize(void *stream); 42 | 43 | #ifdef HAVE_CUDA 44 | int pack_cuda_get_ndevices(void); 45 | void pack_cuda_init_devices(int num_threads); 46 | void pack_cuda_finalize_devices(void); 47 | void pack_cuda_alloc_mem(int device_id, size_t size, mem_type_e type, void **hostbuf, 48 | void **devicebuf); 49 | void pack_cuda_free_mem(mem_type_e type, void *hostbuf, void *devicebuf); 50 | void pack_cuda_get_ptr_attr(const void *inbuf, void *outbuf, yaksa_info_t * info, int iter); 51 | void pack_cuda_copy_content(int tid, const void *sbuf, void *dbuf, size_t size, mem_type_e type); 52 | void *pack_cuda_create_stream(void); 53 | void pack_cuda_destroy_stream(void *stream); 54 | void pack_cuda_stream_synchronize(void *stream); 55 | #endif 56 | 57 | #ifdef HAVE_ZE 58 | int pack_ze_get_ndevices(void); 59 | void pack_ze_init_devices(int num_threads); 60 | void pack_ze_finalize_devices(void); 61 | void pack_ze_alloc_mem(int device_id, size_t size, mem_type_e type, void **hostbuf, 62 | void **devicebuf); 63 | void pack_ze_free_mem(mem_type_e type, void *hostbuf, void *devicebuf); 64 | void pack_ze_get_ptr_attr(const void *inbuf, void *outbuf, yaksa_info_t * info, int iter); 65 | void pack_ze_copy_content(int tid, const void *sbuf, void *dbuf, size_t size, mem_type_e type); 66 | void *pack_ze_create_stream(void); 67 | void pack_ze_destroy_stream(void *stream); 68 | void pack_ze_stream_synchronize(void *stream); 69 | #endif 70 | 71 | #ifdef HAVE_HIP 72 | int pack_hip_get_ndevices(void); 73 | void pack_hip_init_devices(int num_threads); 74 | void pack_hip_finalize_devices(void); 75 | void pack_hip_alloc_mem(int device_id, size_t size, mem_type_e type, void **hostbuf, 76 | void **devicebuf); 77 | void pack_hip_free_mem(mem_type_e type, void *hostbuf, void *devicebuf); 78 | void pack_hip_get_ptr_attr(const void *inbuf, void *outbuf, yaksa_info_t * info, int iter); 79 | void pack_hip_copy_content(int tid, const void *sbuf, void *dbuf, size_t size, mem_type_e type); 80 | void *pack_hip_create_stream(void); 81 | void pack_hip_destroy_stream(void *stream); 82 | void pack_hip_stream_synchronize(void *stream); 83 | #endif 84 | 85 | /* *INDENT-OFF* */ 86 | #ifdef __cplusplus 87 | } 88 | #endif 89 | /* *INDENT-ON* */ 90 | 91 | #endif /* PACK_COMMON_H */ 92 | -------------------------------------------------------------------------------- /test/pack/pack-cuda.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "yaksa_config.h" 12 | #include "yaksa.h" 13 | #include "dtpools.h" 14 | #include "pack-common.h" 15 | 16 | #ifdef HAVE_CUDA 17 | 18 | #include 19 | 20 | int pack_cuda_get_ndevices(void) 21 | { 22 | int ndevices; 23 | cudaGetDeviceCount(&ndevices); 24 | assert(ndevices != -1); 25 | 26 | return ndevices; 27 | } 28 | 29 | void pack_cuda_init_devices(int num_threads) 30 | { 31 | } 32 | 33 | void pack_cuda_finalize_devices() 34 | { 35 | } 36 | 37 | void pack_cuda_alloc_mem(int device_id, size_t size, mem_type_e type, void **hostbuf, 38 | void **devicebuf) 39 | { 40 | if (type == MEM_TYPE__REGISTERED_HOST) { 41 | cudaMallocHost(devicebuf, size); 42 | if (hostbuf) 43 | *hostbuf = *devicebuf; 44 | } else if (type == MEM_TYPE__MANAGED) { 45 | cudaMallocManaged(devicebuf, size, cudaMemAttachGlobal); 46 | if (hostbuf) 47 | *hostbuf = *devicebuf; 48 | } else if (type == MEM_TYPE__DEVICE) { 49 | cudaSetDevice(device_id); 50 | cudaMalloc(devicebuf, size); 51 | if (hostbuf) 52 | cudaMallocHost(hostbuf, size); 53 | } else { 54 | fprintf(stderr, "ERROR: unsupported memory type\n"); 55 | exit(1); 56 | } 57 | } 58 | 59 | void pack_cuda_free_mem(mem_type_e type, void *hostbuf, void *devicebuf) 60 | { 61 | if (type == MEM_TYPE__REGISTERED_HOST) { 62 | cudaFreeHost(devicebuf); 63 | } else if (type == MEM_TYPE__MANAGED) { 64 | cudaFree(devicebuf); 65 | } else if (type == MEM_TYPE__DEVICE) { 66 | cudaFree(devicebuf); 67 | if (hostbuf) { 68 | cudaFreeHost(hostbuf); 69 | } 70 | } 71 | } 72 | 73 | void pack_cuda_get_ptr_attr(const void *inbuf, void *outbuf, yaksa_info_t * info, int iter) 74 | { 75 | if (iter % 2 == 0) { 76 | int rc; 77 | 78 | rc = yaksa_info_create(info); 79 | assert(rc == YAKSA_SUCCESS); 80 | 81 | rc = yaksa_info_keyval_append(*info, "yaksa_gpu_driver", "cuda", strlen("cuda")); 82 | assert(rc == YAKSA_SUCCESS); 83 | 84 | struct cudaPointerAttributes attr; 85 | 86 | cudaPointerGetAttributes(&attr, inbuf); 87 | rc = yaksa_info_keyval_append(*info, "yaksa_cuda_inbuf_ptr_attr", &attr, sizeof(attr)); 88 | assert(rc == YAKSA_SUCCESS); 89 | 90 | cudaPointerGetAttributes(&attr, outbuf); 91 | rc = yaksa_info_keyval_append(*info, "yaksa_cuda_outbuf_ptr_attr", &attr, sizeof(attr)); 92 | assert(rc == YAKSA_SUCCESS); 93 | } else 94 | *info = NULL; 95 | } 96 | 97 | void pack_cuda_copy_content(int tid, const void *sbuf, void *dbuf, size_t size, mem_type_e type) 98 | { 99 | if (type == MEM_TYPE__DEVICE) { 100 | cudaMemcpy(dbuf, sbuf, size, cudaMemcpyDefault); 101 | } 102 | } 103 | 104 | void *pack_cuda_create_stream(void) 105 | { 106 | static cudaStream_t stream; 107 | /* create stream on the 1st device */ 108 | cudaSetDevice(0); 109 | cudaStreamCreate(&stream); 110 | return &stream; 111 | } 112 | 113 | void pack_cuda_destroy_stream(void *stream_p) 114 | { 115 | cudaStream_t stream = *(cudaStream_t *) stream_p; 116 | cudaStreamDestroy(stream); 117 | } 118 | 119 | void pack_cuda_stream_synchronize(void *stream_p) 120 | { 121 | cudaStream_t stream = *(cudaStream_t *) stream_p; 122 | cudaStreamSynchronize(stream); 123 | } 124 | 125 | #endif /* HAVE_CUDA */ 126 | -------------------------------------------------------------------------------- /test/pack/pack-hip.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "yaksa_config.h" 12 | #include "yaksa.h" 13 | #include "dtpools.h" 14 | #include "pack-common.h" 15 | 16 | #ifdef HAVE_HIP 17 | 18 | #include 19 | 20 | int pack_hip_get_ndevices(void) 21 | { 22 | int ndevices; 23 | hipGetDeviceCount(&ndevices); 24 | assert(ndevices != -1); 25 | 26 | return ndevices; 27 | } 28 | 29 | void pack_hip_init_devices(int num_threads) 30 | { 31 | } 32 | 33 | void pack_hip_finalize_devices() 34 | { 35 | } 36 | 37 | void pack_hip_alloc_mem(int device_id, size_t size, mem_type_e type, void **hostbuf, 38 | void **devicebuf) 39 | { 40 | if (type == MEM_TYPE__REGISTERED_HOST) { 41 | hipHostMalloc(devicebuf, size, hipHostMallocDefault); 42 | if (hostbuf) 43 | *hostbuf = *devicebuf; 44 | } else if (type == MEM_TYPE__MANAGED) { 45 | hipMallocManaged(devicebuf, size, hipMemAttachGlobal); 46 | if (hostbuf) 47 | *hostbuf = *devicebuf; 48 | } else if (type == MEM_TYPE__DEVICE) { 49 | hipSetDevice(device_id); 50 | hipMalloc(devicebuf, size); 51 | if (hostbuf) 52 | hipHostMalloc(hostbuf, size, hipHostMallocDefault); 53 | } else { 54 | fprintf(stderr, "ERROR: unsupported memory type\n"); 55 | exit(1); 56 | } 57 | } 58 | 59 | void pack_hip_free_mem(mem_type_e type, void *hostbuf, void *devicebuf) 60 | { 61 | if (type == MEM_TYPE__REGISTERED_HOST) { 62 | hipHostFree(devicebuf); 63 | } else if (type == MEM_TYPE__MANAGED) { 64 | hipFree(devicebuf); 65 | } else if (type == MEM_TYPE__DEVICE) { 66 | hipFree(devicebuf); 67 | if (hostbuf) { 68 | hipHostFree(hostbuf); 69 | } 70 | } 71 | } 72 | 73 | void pack_hip_get_ptr_attr(const void *inbuf, void *outbuf, yaksa_info_t * info, int iter) 74 | { 75 | if (iter % 2 == 0) { 76 | int rc; 77 | 78 | rc = yaksa_info_create(info); 79 | assert(rc == YAKSA_SUCCESS); 80 | 81 | rc = yaksa_info_keyval_append(*info, "yaksa_gpu_driver", "hip", strlen("hip")); 82 | assert(rc == YAKSA_SUCCESS); 83 | 84 | struct hipPointerAttribute_t attr; 85 | 86 | hipError_t cerr = hipPointerGetAttributes(&attr, inbuf); 87 | if (cerr == hipSuccess) { 88 | rc = yaksa_info_keyval_append(*info, "yaksa_hip_inbuf_ptr_attr", &attr, sizeof(attr)); 89 | assert(rc == YAKSA_SUCCESS); 90 | } 91 | 92 | cerr = hipPointerGetAttributes(&attr, outbuf); 93 | if (cerr == hipSuccess) { 94 | rc = yaksa_info_keyval_append(*info, "yaksa_hip_outbuf_ptr_attr", &attr, sizeof(attr)); 95 | assert(rc == YAKSA_SUCCESS); 96 | } 97 | } else 98 | *info = NULL; 99 | } 100 | 101 | void pack_hip_copy_content(int tid, const void *sbuf, void *dbuf, size_t size, mem_type_e type) 102 | { 103 | int rc; 104 | if (type == MEM_TYPE__DEVICE) { 105 | rc = hipMemcpy(dbuf, sbuf, size, hipMemcpyDefault); 106 | //printf("rc: %d\n", rc); 107 | assert(rc == hipSuccess); 108 | } 109 | } 110 | 111 | void *pack_hip_create_stream(void) 112 | { 113 | static hipStream_t stream; 114 | /* create stream on the 1st device */ 115 | hipSetDevice(0); 116 | hipStreamCreate(&stream); 117 | return &stream; 118 | } 119 | 120 | void pack_hip_destroy_stream(void *stream_p) 121 | { 122 | hipStream_t stream = *(hipStream_t *) stream_p; 123 | hipStreamDestroy(stream); 124 | } 125 | 126 | void pack_hip_stream_synchronize(void *stream_p) 127 | { 128 | hipStream_t stream = *(hipStream_t *) stream_p; 129 | hipStreamSynchronize(stream); 130 | } 131 | 132 | #endif /* HAVE_HIP */ 133 | -------------------------------------------------------------------------------- /test/simple/Makefile.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright (C) by Argonne National Laboratory 3 | ## See COPYRIGHT in top-level directory 4 | ## 5 | 6 | testlists += $(top_srcdir)/test/simple/testlist.gen 7 | EXTRA_DIST += $(top_srcdir)/test/simple/testlist.gen 8 | 9 | EXTRA_PROGRAMS += \ 10 | test/simple/simple_test \ 11 | test/simple/lbub \ 12 | test/simple/test_contig \ 13 | test/simple/threaded_test 14 | 15 | test_simple_simple_test_CPPFLAGS = $(test_cppflags) 16 | test_simple_lbub_CPPFLAGS = $(test_cppflags) 17 | test_simple_test_contig_CPPFLAGS = $(test_cppflags) 18 | test_simple_threaded_test_CPPFLAGS = $(test_cppflags) 19 | 20 | test-simple: 21 | @$(top_srcdir)/test/runtests.py --summary=$(top_builddir)/test/simple/summary.junit.xml \ 22 | test/simple/testlist.gen 23 | -------------------------------------------------------------------------------- /test/simple/simple_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include "yaksa.h" 8 | #include 9 | 10 | #define DIMSIZE (80) 11 | 12 | int inbuf[DIMSIZE * DIMSIZE], outbuf[DIMSIZE * DIMSIZE]; 13 | 14 | int main() 15 | { 16 | int rc = YAKSA_SUCCESS; 17 | yaksa_type_t vector, vector_vector; 18 | uintptr_t actual; 19 | 20 | yaksa_init(NULL); 21 | 22 | rc = yaksa_type_create_vector(3, 2, 3, YAKSA_TYPE__INT, NULL, &vector); 23 | assert(rc == YAKSA_SUCCESS); 24 | 25 | rc = yaksa_type_create_vector(5, 1, 10, vector, NULL, &vector_vector); 26 | assert(rc == YAKSA_SUCCESS); 27 | 28 | for (int i = 0; i < DIMSIZE * DIMSIZE; i++) { 29 | inbuf[i] = i; 30 | outbuf[i] = -1; 31 | } 32 | 33 | yaksa_request_t request; 34 | rc = yaksa_ipack(inbuf, 1, vector_vector, 0, outbuf, DIMSIZE * DIMSIZE * sizeof(int), &actual, 35 | NULL, YAKSA_OP__REPLACE, &request); 36 | assert(rc == YAKSA_SUCCESS); 37 | assert(actual == 5 * 3 * 2 * sizeof(int)); 38 | 39 | rc = yaksa_request_wait(request); 40 | assert(rc == YAKSA_SUCCESS); 41 | 42 | int idx = 0; 43 | int val = 0; 44 | for (int i = 0; i < 5; i++) { 45 | for (int j = 0; j < 3; j++) { 46 | for (int k = 0; k < 2; k++) { 47 | inbuf[val] = -1; 48 | if (outbuf[idx] != val) 49 | fprintf(stderr, "outbuf[%d] = %d instead of %d\n", idx, outbuf[idx], val); 50 | idx++; 51 | val++; 52 | } 53 | val++; 54 | } 55 | val += 71; 56 | } 57 | 58 | uintptr_t actual_unpack_bytes; 59 | rc = yaksa_iunpack(outbuf, actual, inbuf, 1, vector_vector, 0, &actual_unpack_bytes, 60 | NULL, YAKSA_OP__REPLACE, &request); 61 | assert(rc == YAKSA_SUCCESS); 62 | 63 | rc = yaksa_request_wait(request); 64 | assert(rc == YAKSA_SUCCESS); 65 | 66 | idx = 0; 67 | for (int i = 0; i < DIMSIZE; i++) { 68 | for (int j = 0; j < DIMSIZE; j++) { 69 | if (inbuf[idx] != idx) 70 | fprintf(stderr, "inbuf[%d] = %d instead of %d\n", idx, inbuf[idx], idx); 71 | idx++; 72 | } 73 | } 74 | 75 | yaksa_type_free(vector_vector); 76 | yaksa_type_free(vector); 77 | 78 | yaksa_finalize(); 79 | 80 | return 0; 81 | } 82 | -------------------------------------------------------------------------------- /test/simple/test_contig.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include "yaksa.h" 7 | #include 8 | #include 9 | #include 10 | 11 | int main(int argc, char **argv) 12 | { 13 | int errs = 0; 14 | yaksa_type_t type; 15 | uintptr_t iov_len; 16 | 17 | yaksa_init(NULL); 18 | 19 | { 20 | yaksa_type_create_vector(5, 2, 2, YAKSA_TYPE__INT, NULL, &type); 21 | yaksa_iov_len(1, type, &iov_len); 22 | if (iov_len != 1) { 23 | printf("Test vector with consecutive blocks, got iov_len = %ld, expect 1\n", iov_len); 24 | errs++; 25 | } 26 | yaksa_type_free(type); 27 | } 28 | 29 | { 30 | intptr_t blklens[] = { 2, 2, 1, 3, 3 }; 31 | intptr_t displs[] = { 1, 3, 5, 6, 9 }; 32 | yaksa_type_create_indexed(5, blklens, displs, YAKSA_TYPE__DOUBLE, NULL, &type); 33 | yaksa_iov_len(1, type, &iov_len); 34 | if (iov_len != 1) { 35 | printf("Test indexed with consecutive blocks, got iov_len = %ld, expect 1\n", iov_len); 36 | errs++; 37 | } 38 | yaksa_type_free(type); 39 | } 40 | 41 | { 42 | intptr_t blklens[] = { 2, 2, 1, 3, 3 }; 43 | intptr_t displs[] = { 1, 2, 3, 5, 9 }; 44 | yaksa_type_create_indexed(5, blklens, displs, YAKSA_TYPE__DOUBLE, NULL, &type); 45 | yaksa_iov_len(1, type, &iov_len); 46 | if (iov_len == 1) { 47 | printf("Test indexed with non-consecutive blocks, got iov_len = %ld, expect 5\n", 48 | iov_len); 49 | errs++; 50 | } 51 | yaksa_type_free(type); 52 | } 53 | 54 | { 55 | intptr_t displs[] = { 2, 4, 6, 8, 10 }; 56 | yaksa_type_create_indexed_block(5, 2, displs, YAKSA_TYPE__DOUBLE, NULL, &type); 57 | yaksa_iov_len(1, type, &iov_len); 58 | if (iov_len != 1) { 59 | printf("Test indexed_block with consecutive blocks, got iov_len = %ld, expect 1\n", 60 | iov_len); 61 | errs++; 62 | } 63 | yaksa_type_free(type); 64 | } 65 | 66 | { 67 | intptr_t displs[] = { 2, 3, 4, 7, 10 }; 68 | yaksa_type_create_indexed_block(5, 2, displs, YAKSA_TYPE__DOUBLE, NULL, &type); 69 | yaksa_iov_len(1, type, &iov_len); 70 | if (iov_len == 1) { 71 | printf("Test indexed_block with non-consecutive blocks, got iov_len = %ld, expect 5\n", 72 | iov_len); 73 | errs++; 74 | } 75 | yaksa_type_free(type); 76 | } 77 | 78 | { 79 | yaksa_type_t types[] = 80 | { YAKSA_TYPE__INT32_T, YAKSA_TYPE__INT32_T, YAKSA_TYPE__INT8_T, YAKSA_TYPE__INT8_T, 81 | YAKSA_TYPE__INT16_T 82 | }; 83 | intptr_t blklens[] = { 1, 1, 4, 4, 2 }; 84 | intptr_t displs[] = { 4, 8, 12, 16, 20 }; 85 | yaksa_type_create_struct(5, blklens, displs, types, NULL, &type); 86 | yaksa_iov_len(1, type, &iov_len); 87 | if (iov_len != 1) { 88 | printf("Test struct with consecutive blocks, got iov_len = %ld, expect 1\n", iov_len); 89 | errs++; 90 | } 91 | yaksa_type_free(type); 92 | } 93 | 94 | { 95 | yaksa_type_t types[] = 96 | { YAKSA_TYPE__INT32_T, YAKSA_TYPE__INT32_T, YAKSA_TYPE__INT8_T, YAKSA_TYPE__INT8_T, 97 | YAKSA_TYPE__INT16_T 98 | }; 99 | intptr_t blklens[] = { 1, 1, 4, 4, 2 }; 100 | intptr_t displs[] = { 4, 12, 13, 14, 20 }; 101 | yaksa_type_create_struct(5, blklens, displs, types, NULL, &type); 102 | yaksa_iov_len(1, type, &iov_len); 103 | if (iov_len == 1) { 104 | printf("Test struct with non-consecutive blocks, got iov_len = %ld, expect 5\n", 105 | iov_len); 106 | errs++; 107 | } 108 | yaksa_type_free(type); 109 | } 110 | 111 | yaksa_finalize(); 112 | 113 | return errs; 114 | } 115 | -------------------------------------------------------------------------------- /test/simple/threaded_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) by Argonne National Laboratory 3 | * See COPYRIGHT in top-level directory 4 | */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "yaksa.h" 11 | 12 | #define DIMSIZE (80) 13 | 14 | int **inbuf_, **outbuf_; 15 | 16 | void *thread_fn(void *arg); 17 | void *thread_fn(void *arg) 18 | { 19 | int rc = YAKSA_SUCCESS; 20 | yaksa_type_t vector, vector_vector; 21 | uintptr_t actual; 22 | uintptr_t tid = (uintptr_t) arg; 23 | int *inbuf = inbuf_[tid]; 24 | int *outbuf = outbuf_[tid]; 25 | 26 | yaksa_init(NULL); 27 | 28 | rc = yaksa_type_create_vector(3, 2, 3, YAKSA_TYPE__INT, NULL, &vector); 29 | assert(rc == YAKSA_SUCCESS); 30 | 31 | rc = yaksa_type_create_vector(5, 1, 10, vector, NULL, &vector_vector); 32 | assert(rc == YAKSA_SUCCESS); 33 | 34 | for (int i = 0; i < DIMSIZE * DIMSIZE; i++) { 35 | inbuf[i] = i; 36 | outbuf[i] = -1; 37 | } 38 | 39 | yaksa_request_t request; 40 | rc = yaksa_ipack(inbuf, 1, vector_vector, 0, outbuf, DIMSIZE * DIMSIZE * sizeof(int), &actual, 41 | NULL, YAKSA_OP__REPLACE, &request); 42 | assert(rc == YAKSA_SUCCESS); 43 | assert(actual == 5 * 3 * 2 * sizeof(int)); 44 | 45 | rc = yaksa_request_wait(request); 46 | assert(rc == YAKSA_SUCCESS); 47 | 48 | int idx = 0; 49 | int val = 0; 50 | for (int i = 0; i < 5; i++) { 51 | for (int j = 0; j < 3; j++) { 52 | for (int k = 0; k < 2; k++) { 53 | inbuf[val] = -1; 54 | if (outbuf[idx] != val) 55 | fprintf(stderr, "outbuf[%d] = %d instead of %d\n", idx, outbuf[idx], val); 56 | idx++; 57 | val++; 58 | } 59 | val++; 60 | } 61 | val += 71; 62 | } 63 | 64 | uintptr_t actual_unpack_bytes; 65 | rc = yaksa_iunpack(outbuf, actual, inbuf, 1, vector_vector, 0, &actual_unpack_bytes, 66 | NULL, YAKSA_OP__REPLACE, &request); 67 | assert(rc == YAKSA_SUCCESS); 68 | 69 | rc = yaksa_request_wait(request); 70 | assert(rc == YAKSA_SUCCESS); 71 | 72 | idx = 0; 73 | for (int i = 0; i < DIMSIZE; i++) { 74 | for (int j = 0; j < DIMSIZE; j++) { 75 | if (inbuf[idx] != idx) 76 | fprintf(stderr, "inbuf[%d] = %d instead of %d\n", idx, inbuf[idx], idx); 77 | idx++; 78 | } 79 | } 80 | 81 | yaksa_type_free(vector_vector); 82 | yaksa_type_free(vector); 83 | 84 | yaksa_finalize(); 85 | 86 | return NULL; 87 | } 88 | 89 | #define MAX_THREADS (16) 90 | #define ITERS 10000 91 | 92 | int main() 93 | { 94 | pthread_t thread[MAX_THREADS]; 95 | 96 | inbuf_ = (int **) malloc(MAX_THREADS * sizeof(int *)); 97 | outbuf_ = (int **) malloc(MAX_THREADS * sizeof(int *)); 98 | for (int i = 0; i < MAX_THREADS; i++) { 99 | inbuf_[i] = (int *) malloc(DIMSIZE * DIMSIZE * sizeof(int)); 100 | outbuf_[i] = (int *) malloc(DIMSIZE * DIMSIZE * sizeof(int)); 101 | } 102 | 103 | for (int j = 0; j < ITERS; j++) { 104 | for (uintptr_t i = 0; i < MAX_THREADS; i++) 105 | pthread_create(&thread[i], NULL, thread_fn, (void *) i); 106 | 107 | for (int i = 0; i < MAX_THREADS; i++) 108 | pthread_join(thread[i], NULL); 109 | } 110 | 111 | for (int i = 0; i < MAX_THREADS; i++) { 112 | free(inbuf_[i]); 113 | free(outbuf_[i]); 114 | } 115 | free(inbuf_); 116 | free(outbuf_); 117 | 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /test/testlist: -------------------------------------------------------------------------------- 1 | simple 2 | pack 3 | iov 4 | flatten 5 | --------------------------------------------------------------------------------