├── .github └── workflows │ └── dockerimage.yml ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Bale-StGirons-Final.pdf ├── README.md └── uconvey.pdf ├── example_matrices ├── directed_flat_100.mm ├── directed_geometric_100.mm ├── er_100_0.2.dst ├── er_100_0.2.mm ├── kron_1:3x4x5.mm ├── sparse100.dst ├── sparse100.mm ├── toposort_input.mm ├── undirected_flat_100.mm └── undirected_geometric_100.mm ├── images ├── GeometricGraph.png ├── GeometricGraph2.png └── toposort.png └── src ├── bale_classic ├── DEMO.md ├── Doxyfile ├── INSTALL.md ├── README.md ├── acinclude.m4 ├── apps │ ├── .gitignore │ ├── Makefile.am │ ├── README.md │ ├── configure.ac │ ├── conftest.py │ ├── histo_src │ │ ├── README.md │ │ ├── alternates │ │ │ ├── histo_alternates.h │ │ │ ├── histo_exstack2_cyclic.upc │ │ │ ├── histo_exstack2_function.upc │ │ │ ├── histo_exstack2_goto.upc │ │ │ └── histo_exstack_function.upc │ │ ├── histo.h │ │ ├── histo.upc │ │ ├── histo_agp.upc │ │ ├── histo_conveyor.upc │ │ ├── histo_exstack.upc │ │ └── histo_exstack2.upc │ ├── ig_src │ │ ├── README.md │ │ ├── alternates │ │ │ ├── ig_alternates.h │ │ │ ├── ig_exstack2_cyclic.upc │ │ │ ├── ig_exstack2_goto.upc │ │ │ ├── ig_exstack_function.upc │ │ │ └── ig_exstack_pkg.upc │ │ ├── ig.h │ │ ├── ig.upc │ │ ├── ig_agp.upc │ │ ├── ig_conveyor.upc │ │ ├── ig_exstack.upc │ │ └── ig_exstack2.upc │ ├── permute_matrix_src │ │ ├── README.md │ │ ├── alternates │ │ │ └── permute_matrix_alternates.h │ │ └── permute_matrix.upc │ ├── randperm_src │ │ ├── README.md │ │ ├── alternates │ │ │ └── randperm_agp_opt.upc │ │ ├── randperm.upc │ │ └── randperm_alternates.h │ ├── sparse_matrix_io_src │ │ ├── README.md │ │ └── sparse_matrix_io.upc │ ├── sssp_src │ │ ├── README.md │ │ ├── alternates │ │ │ ├── sssp_alternates.h │ │ │ └── sssp_bellman_agp.upc │ │ ├── sssp.h │ │ ├── sssp.upc │ │ ├── sssp_bellman_conveyor.upc │ │ ├── sssp_bellman_exstack.upc │ │ ├── sssp_bellman_exstack2.upc │ │ ├── sssp_delta_common.h │ │ ├── sssp_delta_common.upc │ │ ├── sssp_delta_conveyor.upc │ │ ├── sssp_delta_exstack.upc │ │ └── sssp_delta_exstack2.upc │ ├── tests │ │ └── test_all.py │ ├── topo_src │ │ ├── README.md │ │ ├── alternates │ │ │ ├── toposort_agp_oo.upc │ │ │ ├── toposort_alternates.h │ │ │ ├── toposort_cooler.upc │ │ │ └── toposort_exstack_orig.upc │ │ ├── toposort.h │ │ ├── toposort.upc │ │ ├── toposort_agp.upc │ │ ├── toposort_conveyor.upc │ │ ├── toposort_exstack.upc │ │ └── toposort_exstack2.upc │ ├── transpose_matrix_src │ │ ├── README.md │ │ ├── alternates │ │ │ └── transpose_matrix_alternates.h │ │ └── transpose_matrix.upc │ ├── triangle_src │ │ ├── README.md │ │ ├── alternates │ │ │ ├── triangle_agp_iter.upc │ │ │ ├── triangle_agp_oo.upc │ │ │ ├── triangle_agp_opt1.upc │ │ │ └── triangle_agp_opt2.upc │ │ ├── triangle.h │ │ ├── triangle.upc │ │ ├── triangle_agp.upc │ │ ├── triangle_conveyor.upc │ │ ├── triangle_exstack.upc │ │ └── triangle_exstack2.upc │ └── union_find │ │ └── README.md ├── bootstrap.sh ├── convey │ ├── .gitignore │ ├── Doxyfile.in │ ├── INSTALL │ ├── LICENSE │ ├── MAINTAIN │ ├── Makefile.am │ ├── README.md │ ├── a2a_align.c │ ├── a2a_apps.c │ ├── a2a_basic.c │ ├── accel.c │ ├── alltoallv.c │ ├── alltoallv.h │ ├── bench_simple.awk │ ├── bench_tensor.awk │ ├── biconvey.c │ ├── biconvey.h │ ├── biconvey_impl.h │ ├── bisimple.c │ ├── bitensor.c │ ├── bolite.h │ ├── cases.pl │ ├── circle.c │ ├── codata.c │ ├── common.c │ ├── common.h │ ├── configure.ac │ ├── convey.c │ ├── convey.h │ ├── convey.pc.in │ ├── convey_alc8r.h │ ├── convey_codec.h │ ├── convey_impl.h │ ├── elastic.c │ ├── examples │ │ ├── bigather.c │ │ ├── example.h │ │ ├── gather.c │ │ ├── histo.c │ │ └── meld.c │ ├── getput.c │ ├── launchinfo │ ├── m4 │ │ ├── acx_pthread.m4 │ │ ├── ax_check_compiler_flags.m4 │ │ └── doxygen.am │ ├── mpiport.c │ ├── mpp2mpi.c │ ├── mpp2mpi.h │ ├── mpp2nil.c │ ├── mpp2nil.h │ ├── mpp2shmem.c │ ├── mpp2shmem.h │ ├── mpp2upc.c │ ├── mpp2upc.h │ ├── narrative.h │ ├── packer.c │ ├── pivot.h │ ├── porter.c │ ├── porter.h │ ├── porter_impl.h │ ├── private.h │ ├── putport.c │ ├── router.h │ ├── shmemptr.c │ ├── simple.c │ ├── simple.h │ ├── sorter.c │ ├── sorter.h │ ├── squeeze.c │ ├── tensor.c │ ├── tensor.h │ ├── test_compress │ ├── test_elastic │ ├── test_examples │ ├── test_shmem_ptr │ ├── test_simple │ ├── test_tensor │ ├── test_twohop │ ├── trivial.c │ ├── tune.pl │ ├── tune_tensor │ ├── twohop.c │ └── wringer.c ├── docker │ ├── cupc │ │ └── Dockerfile │ ├── gupc │ │ ├── Dockerfile │ │ ├── Dockerfile_bale │ │ └── Dockerfile_github_action │ ├── oshmem │ │ ├── Dockerfile │ │ ├── Dockerfile_bale │ │ └── Dockerfile_github_action │ └── sos │ │ ├── Dockerfile │ │ └── README ├── exstack │ ├── Makefile.am │ ├── README.md │ ├── configure.ac │ ├── exstack.h │ ├── exstack.pc.in │ ├── exstack.upc │ ├── exstack2.upc │ └── exstack2_shmem.c ├── libgetput │ ├── Makefile.am │ ├── README.md │ ├── configure.ac │ ├── knuth_rng_double_2019.h │ ├── libgetput.h │ ├── libgetput.pc.in │ └── libgetput.upc ├── mainpage.h ├── make_bale ├── plot_results.ipynb ├── run_apps.py ├── spmat │ ├── Makefile.am │ ├── README.md │ ├── configure.ac │ ├── geometric.upc │ ├── spmat.h │ ├── spmat.pc.in │ ├── spmat_agp.upc │ ├── spmat_conveyor.upc │ ├── spmat_enums.h │ ├── spmat_exstack.upc │ ├── spmat_exstack2.upc │ ├── spmat_io.upc │ └── spmat_utils.upc └── std_options │ ├── Makefile.am │ ├── README.md │ ├── configure.ac │ ├── std_options.h │ ├── std_options.pc.in │ └── std_options.upc ├── other_parallel ├── Chapel │ ├── Makefile │ ├── README.md │ ├── histo.chpl │ ├── ig.chpl │ ├── spmat.chpl │ ├── topo.chpl │ └── triangle.chpl └── Rust │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ ├── README_cray.md │ ├── convey │ ├── .gitignore │ ├── Cargo.toml │ ├── LICENSE │ ├── README.md │ ├── README_cray.md │ ├── examples │ │ ├── collect_convey.rs │ │ ├── histo_convey.rs │ │ └── ig_convey.rs │ ├── pshmem-experimental │ │ ├── Cargo.toml │ │ └── src │ │ │ ├── collect.rs │ │ │ ├── error.rs │ │ │ ├── lib.rs │ │ │ ├── main.rs │ │ │ └── object.rs │ ├── scripts │ │ └── fixmodules.sh │ ├── shmem-sys │ │ ├── Cargo.toml │ │ ├── build.rs │ │ └── src │ │ │ ├── lib.rs │ │ │ └── wrapper.h │ ├── shmem │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── examples │ │ │ └── histo_shmem.rs │ │ └── src │ │ │ ├── atomic.rs │ │ │ ├── collect.rs │ │ │ ├── error.rs │ │ │ ├── lib.rs │ │ │ ├── object.rs │ │ │ └── shmem.rs │ └── src │ │ ├── SUMMARY.md │ │ ├── chapter_1.md │ │ ├── collect.rs │ │ ├── lib.rs │ │ ├── session.rs │ │ ├── shmem_buffers.rs │ │ └── testing_support.rs │ ├── delta_stepping │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ └── src │ │ ├── lib.rs │ │ └── main.rs │ ├── scripts │ └── fixmodules.sh │ ├── spmat │ ├── .gitignore │ ├── Cargo.toml │ ├── scripts │ │ ├── permute_convey.sh │ │ └── randperm_convey.sh │ └── src │ │ ├── lib.rs │ │ └── perm.rs │ ├── src │ ├── bin │ │ ├── collect_convey.rs │ │ ├── histo_convey.rs │ │ ├── ig_convey.rs │ │ ├── permute_convey.rs │ │ └── randperm_convey.rs │ └── lib.rs │ ├── toposort │ ├── Cargo.toml │ ├── scripts │ │ └── toposort.sh │ └── src │ │ ├── lib.rs │ │ └── main.rs │ └── triangle │ ├── Cargo.toml │ ├── scripts │ └── toposort.sh │ └── src │ ├── lib.rs │ └── main.rs └── other_serial ├── C ├── Doxyfile ├── Makefile ├── README.md ├── conftest.py ├── default_app_opts.h ├── demo_spmat.c ├── histo.c ├── histo.md ├── ig.c ├── ig.md ├── knuth_rng_double_2019.h ├── mainpage.h ├── opts_demo.c ├── opts_demo.md ├── permute_matrix.c ├── permute_matrix.md ├── randperm.c ├── randperm.md ├── runall.sh ├── spmat_utils.c ├── spmat_utils.h ├── spmat_utils.md ├── sssp.c ├── sssp.md ├── sssp_bellmanford.c ├── sssp_delta.c ├── sssp_dijsktra.c ├── std_options.c ├── std_options.h ├── std_options.md ├── tests │ └── test_all.py ├── toposort.c ├── toposort.md ├── transpose_matrix.c ├── transpose_matrix.md ├── triangle.c ├── triangle.md ├── unionfind.c └── unionfind.md └── Rust ├── .gitignore ├── README.md ├── delta_stepping ├── .gitignore ├── Cargo.toml ├── README.md └── src │ ├── lib.rs │ └── main.rs ├── sparsemat ├── Cargo.toml └── src │ ├── err.rs │ └── lib.rs └── toposort ├── .gitignore ├── Cargo.toml └── src ├── lib.rs └── main.rs /.github/workflows/dockerimage.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ master , github_actions ] 6 | pull_request: 7 | branches: [ master , github_actions ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | 18 | - name: Build the oshmem Docker image 19 | run: docker build . --file src/bale_classic/docker/oshmem/Dockerfile_github_action --tag bale_test_oshmem:gh_action 20 | - name: Run the oshmem Docker image 21 | run: docker run bale_test_oshmem:gh_action su bale_user - -c "oshrun -n 1 build_oshmem/bin/histo -M 1 -c 1" 22 | #- name: Run pwd and ls 23 | # run: | 24 | # docker run bale_test_oshmem:gh_action su bale_user - -c "pwd && ls" 25 | - name: Run pytest in oshmem Docker container 26 | run: | 27 | docker run bale_test_oshmem:gh_action pytest -s -p no:cacheprovider -P=/opt/bale_private/src/bale_classic/build_oshmem/bin/ --node_range=1,5,2 -M 3 apps/ 28 | - name: Build the gupc Docker image 29 | run: docker build . --file src/bale_classic/docker/gupc/Dockerfile_github_action --tag bale_test_gupc:gh_action 30 | - name: Run the gupc Docker image 31 | run: docker run bale_test_gupc:gh_action su bale_user - -c "build_gupc/bin/histo -n 1" 32 | #- name: Run pwd and ls in gupc container 33 | # run: | 34 | # docker run bale_test_gupc:gh_action su bale_user - -c "pwd && ls" 35 | - name: Run pytest in gupc Docker container 36 | run: | 37 | docker run bale_test_gupc:gh_action pytest -s -p no:cacheprovider -P=/opt/bale_private/src/bale_classic/build_gupc/bin/ --node_range=1,5,2 apps/ 38 | 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Emacs and Vim backup files 2 | *~ 3 | \#* 4 | *.swp 5 | 6 | # Ignore files created by autoconf 7 | aclocal.m4 8 | autom4te.cache/ 9 | compile 10 | config.guess 11 | config.h.in 12 | config.sub 13 | configure 14 | depcomp 15 | install-sh 16 | ltmain.sh 17 | Makefile.in 18 | missing 19 | test-driver 20 | *.o 21 | 22 | # Ignore build directories 23 | build_*/ 24 | target/ 25 | html/ 26 | latex/ 27 | t 28 | 29 | #ignore apple DS_Store 30 | .DS_Store 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, Institute for Defense Analyses 2 | 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | This material may be reproduced by or for the U.S. Government 4 | pursuant to the copyright license under the clauses at DFARS 5 | 252.227-7013 and 252.227-7014. 6 | 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | * Neither the name of the copyright holder nor the 17 | names of its contributors may be used to endorse or promote products 18 | derived from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | COPYRIGHT HOLDER NOR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /docs/Bale-StGirons-Final.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdevinney/bale/1b8f673b56645b2bd74a8af6213462bbc9d559fe/docs/Bale-StGirons-Final.pdf -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Documents 2 | 3 | ### Papers 4 | 5 | **[Conveyors for Streaming Many-To-Many Communication](uconvey.pdf)**, June 10, 2019. 6 | 7 | ### Presentations 8 | 9 | **[Bale: Kernels for Irregular Parallel Computation](Bale-StGirons-Final.pdf)**, Sparse Days at Saint-Girons, June 20, 2022. 10 | -------------------------------------------------------------------------------- /docs/uconvey.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdevinney/bale/1b8f673b56645b2bd74a8af6213462bbc9d559fe/docs/uconvey.pdf -------------------------------------------------------------------------------- /example_matrices/er_100_0.2.dst: -------------------------------------------------------------------------------- 1 | 100 2 | 0.000000 3 | 0.268099 4 | 0.181249 5 | 0.255127 6 | 0.130626 7 | 0.336029 8 | 0.276329 9 | 0.228703 10 | 0.285848 11 | 0.275056 12 | 0.443108 13 | 0.184198 14 | 0.276963 15 | 0.299967 16 | 0.227390 17 | 0.130055 18 | 0.260885 19 | 0.200435 20 | 0.185848 21 | 0.080398 22 | 0.130255 23 | 0.266758 24 | 0.232002 25 | 0.185039 26 | 0.301550 27 | 0.093411 28 | 0.130980 29 | 0.177957 30 | 0.250243 31 | 0.093147 32 | 0.224685 33 | 0.099704 34 | 0.001514 35 | 0.200047 36 | 0.114772 37 | 0.128577 38 | 0.229244 39 | 0.195214 40 | 0.209459 41 | 0.209431 42 | 0.164612 43 | 0.255898 44 | 0.277336 45 | 0.179608 46 | 0.275763 47 | 0.200306 48 | 0.241034 49 | 0.218451 50 | 0.301268 51 | 0.071648 52 | 0.158035 53 | 0.053124 54 | 0.248678 55 | 0.144716 56 | 0.237002 57 | 0.289990 58 | 0.240416 59 | 0.195120 60 | 0.257752 61 | 0.060288 62 | 0.461603 63 | 0.297427 64 | 0.011080 65 | 0.021710 66 | 0.123703 67 | 0.327204 68 | 0.263629 69 | 0.329429 70 | 0.187475 71 | 0.184620 72 | 0.110123 73 | 0.115877 74 | 0.114023 75 | 0.303102 76 | 0.144944 77 | 0.136412 78 | 0.335973 79 | 0.227595 80 | 0.251884 81 | 0.219640 82 | 0.185322 83 | 0.031596 84 | 0.239731 85 | 0.042960 86 | 0.328212 87 | 0.202409 88 | 0.138588 89 | 0.228436 90 | 0.232442 91 | 0.215454 92 | 0.219915 93 | 0.223908 94 | 0.325255 95 | 0.280732 96 | 0.315132 97 | 0.095879 98 | 0.216118 99 | 0.278495 100 | 0.308837 101 | 0.234500 102 | -------------------------------------------------------------------------------- /example_matrices/sparse100.dst: -------------------------------------------------------------------------------- 1 | 100 2 | inf 3 | 2.1900485436661703 4 | 0 5 | 1.3934548065468872 6 | inf 7 | 2.679213484487907 8 | inf 9 | 4.6463676971575865 10 | 2.1104799800754765 11 | 0.9136767111823816 12 | 3.267114981426355 13 | 1.8352854996831787 14 | 3.7643070510349936 15 | 1.4026017491715679 16 | 2.969798910286923 17 | 3.712366246705792 18 | 3.0084858712265845 19 | 2.2511308900464675 20 | 3.7701038831503157 21 | 1.3225777288786031 22 | 2.1102810878899634 23 | 4.894235309495469 24 | 3.002109240974235 25 | inf 26 | inf 27 | 1.712725533420224 28 | 2.3786927366788264 29 | 3.749476117887194 30 | inf 31 | 1.527439563382286 32 | 4.097170503636419 33 | 2.1521431492326237 34 | 3.0685217629768813 35 | 1.9906421400089718 36 | inf 37 | 3.548251710640006 38 | 2.9876828937245685 39 | inf 40 | 0.7104256352432791 41 | 2.346536247466888 42 | 2.9432568991364434 43 | 1.9297246558069676 44 | 3.1657968937662875 45 | 5.17804896000146 46 | 3.7796139167363436 47 | 0.650474101247914 48 | 2.367320228901709 49 | 1.2245492455800706 50 | 2.6410356783460296 51 | 2.3887528262554047 52 | 3.13830402790025 53 | 3.563505639479474 54 | 2.333699183164204 55 | inf 56 | 2.5572414466383937 57 | inf 58 | inf 59 | 3.1041958472106446 60 | 5.097467206367916 61 | 3.0536223118897334 62 | inf 63 | 1.4854636984711727 64 | 4.3453840027857495 65 | 3.0005182003842723 66 | inf 67 | 3.645536412334543 68 | inf 69 | 3.9035612473032146 70 | 3.9795308433083547 71 | 4.411255996919547 72 | 0.7529031694278702 73 | inf 74 | 3.1237610052031877 75 | 2.2956150137590097 76 | 3.2106304406675474 77 | inf 78 | 1.782104169430907 79 | 3.3852735016656306 80 | 1.737703505822142 81 | 1.8572475112530613 82 | 1.404024052467732 83 | 4.1692437271924385 84 | 2.3913912391007592 85 | 2.631372598965275 86 | 1.759763785548489 87 | 5.39962938329463 88 | 3.0491581882572216 89 | inf 90 | inf 91 | 3.401862159405784 92 | 3.4156587458397576 93 | 2.0530290698326885 94 | 1.9149344108029192 95 | 2.076833583530588 96 | 4.894529426366405 97 | 2.8382638308549466 98 | 1.596849977511277 99 | 1.0061447209958945 100 | inf 101 | 1.20674942183095 102 | -------------------------------------------------------------------------------- /images/GeometricGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdevinney/bale/1b8f673b56645b2bd74a8af6213462bbc9d559fe/images/GeometricGraph.png -------------------------------------------------------------------------------- /images/GeometricGraph2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdevinney/bale/1b8f673b56645b2bd74a8af6213462bbc9d559fe/images/GeometricGraph2.png -------------------------------------------------------------------------------- /images/toposort.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdevinney/bale/1b8f673b56645b2bd74a8af6213462bbc9d559fe/images/toposort.png -------------------------------------------------------------------------------- /src/bale_classic/acinclude.m4: -------------------------------------------------------------------------------- 1 | 2 | # =========================================================================== 3 | # https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html 4 | # =========================================================================== 5 | # 6 | # SYNOPSIS 7 | # 8 | # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) 9 | # 10 | # DESCRIPTION 11 | # 12 | # Check whether the given FLAG works with the current language's compiler 13 | # or gives an error. (Warnings, however, are ignored) 14 | # 15 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 16 | # success/failure. 17 | # 18 | # If EXTRA-FLAGS is defined, it is added to the current language's default 19 | # flags (e.g. CFLAGS) when the check is done. The check is thus made with 20 | # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to 21 | # force the compiler to issue an error when a bad flag is given. 22 | # 23 | # INPUT gives an alternative input source to AC_COMPILE_IFELSE. 24 | # 25 | # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this 26 | # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. 27 | # 28 | # LICENSE 29 | # 30 | # Copyright (c) 2008 Guido U. Draheim 31 | # Copyright (c) 2011 Maarten Bosmans 32 | # 33 | # Copying and distribution of this file, with or without modification, are 34 | # permitted in any medium without royalty provided the copyright notice 35 | # and this notice are preserved. This file is offered as-is, without any 36 | # warranty. 37 | 38 | #serial 6 39 | AC_DEFUN([AX_CHECK_COMPILE_FLAG], 40 | [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF 41 | AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl 42 | AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ 43 | ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS 44 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" 45 | AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], 46 | [AS_VAR_SET(CACHEVAR,[yes])], 47 | [AS_VAR_SET(CACHEVAR,[no])]) 48 | _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) 49 | AS_VAR_IF(CACHEVAR,yes, 50 | [m4_default([$2], :)], 51 | [m4_default([$3], :)]) 52 | AS_VAR_POPDEF([CACHEVAR])dnl 53 | ])dnl AX_CHECK_COMPILE_FLAGS 54 | -------------------------------------------------------------------------------- /src/bale_classic/apps/.gitignore: -------------------------------------------------------------------------------- 1 | write_triples_src/ 2 | /Makefile.am.write_triples_src 3 | -------------------------------------------------------------------------------- /src/bale_classic/apps/README.md: -------------------------------------------------------------------------------- 1 | # apps 2 | 3 | The applications in bale are meant to showcase: 4 | 5 | - the challenges in writing interesting, distributed, parallel applications that are efficient and high performing at scale 6 | - the challenges of getting these codes to use aggregated communications. Including: 7 | - getting the code right the first time 8 | - rapidly experimenting at scale during algorithm development 9 | - reading and understanding the code 10 | - implementing algorithms with different tolerances for latency 11 | - our quest for the From the Book (FTB) implementation of each app 12 | 13 | ### Implementations 14 | 15 | Each of the applications in bale are implemented in multiple ways to showcase the pros and cons of each. In general the models used are: AGP : standard PGAS model that uses Atomics, Gets, and Puts (AGP), [exstack](../exstack/README.md), [exstack2](../exstack/README.md), [convey](../convey/README.md). In some applications we have included other variants in the "alternatives" directory. 16 | 17 | ### List of apps 18 | 19 | - [histo](histo_src/README.md) -- histogram 20 | - [indexgather](ig_src/README.md) -- index_gather 21 | - [transpose_matrix](transpose_matrix_src/README.md) -- matrix transpose 22 | - [randperm](randperm_src/README.md) -- random array of integers 23 | - [permute_matrix](permute_matrix_src/README.md) -- permute a matrix 24 | - [triangle counting](triangle_src/README.md) -- count the triangles in a graph 25 | - [toposort](topo_src/README.md) -- topologically sort a matrix 26 | - [sparse_matrix_io](sparse_matrix_io_src/README.md) -- distributed sparse matrix I/O 27 | - [sssp](sssp_src/README.md) -- single source shortest path problem 28 | 29 | -------------------------------------------------------------------------------- /src/bale_classic/apps/conftest.py: -------------------------------------------------------------------------------- 1 | def pytest_addoption(parser): 2 | parser.addoption("-P", "--path", action="store", default="./", 3 | help="Specify the path to bale binaries") 4 | parser.addoption("-L", "--launcher_cmd", action="store", default="", 5 | help="Specify the job launcher on your system (i.e. srun, oshrun, etc)") 6 | parser.addoption("--launcher_opts", action="store", default="", 7 | help="Options to give to the launcher other than -n.") 8 | parser.addoption("--node_range", action="store", default="", 9 | help="A range given with ,, for the number of PEs to run on.") 10 | parser.addoption("-M", "--implementation_mask", action="store", default="31", 11 | help="A bit mask of implementations to run. AGP = 1, " 12 | "exstack=2, exstack2=4, conveyors=8,alternates=16") 13 | 14 | def pytest_generate_tests(metafunc): 15 | option_value = metafunc.config.option.path 16 | if 'path' in metafunc.fixturenames and option_value is not None: 17 | metafunc.parametrize("path",[option_value]) 18 | 19 | option_value = metafunc.config.option.launcher_cmd 20 | if 'launcher_cmd' in metafunc.fixturenames and option_value is not None: 21 | metafunc.parametrize("launcher_cmd",[option_value]) 22 | 23 | option_value = metafunc.config.option.launcher_opts 24 | if 'launcher_opts' in metafunc.fixturenames and option_value is not None: 25 | metafunc.parametrize("launcher_opts",[option_value]) 26 | 27 | option_value = metafunc.config.option.node_range 28 | if 'node_range' in metafunc.fixturenames and option_value is not None: 29 | metafunc.parametrize("node_range",[option_value]) 30 | 31 | option_value = metafunc.config.option.implementation_mask 32 | if 'implementation_mask' in metafunc.fixturenames and option_value is not None: 33 | metafunc.parametrize("implementation_mask",[option_value]) 34 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/README.md: -------------------------------------------------------------------------------- 1 | # histogram (histo) 2 | 3 | ## Definition 4 | 5 | In the histogram app, each PE generates a list of uniform random indices into a distributed table and then for each index, increment the table's value at that index. 6 | 7 | In SHMEM, this looks like 8 | 9 | ```c 10 | for(i = 0; i < N; i++) 11 | shmem_atomic_add(&table[index[i/THREADS], 1, index[i] % THREADS); 12 | ``` 13 | 14 | where table is a distributed array with M total elements and `index` is a local array of global indices into the table. 15 | 16 | ## Discussion 17 | 18 | #### Parallel Considerations 19 | 20 | histogram represents an example of the communication pattern where PEs are asynchronously sending lots of small and easy-to-perform updates to other PEs. The histogram pattern is rather easy to write in plain UPC and SHMEM and even in our exstack/conveyor aggregation libraries. We should make the distinction between an app like histogram, where the updates are simple and can be done easily using atomics, or even just puts, versus an app like Single Source Shortst Path ([SSSP](../sssp_src/README.md)) where the update is complicated and would not be simple (or in some cases possible) to achieve with atomics and puts. 21 | 22 | Clearly, it does not matter what order the updates are done in the 23 | histogram application, in fact there are no dependencies at all. All 24 | that matters is that we complete all the updates. This makes it an 25 | obvious target for aggregation. The histogram application when written 26 | with conveyors (see snippet below) looks a little more complicated than the SHMEM implementation. Note that we have packed the local offset and PE number into the pckindx array to reduce compute. 27 | 28 | ```c 29 | while( convey_advance(conveyor, (i==T)) ){ 30 | for( ; i < T; i++){ 31 | col = pckindx[i] >> 16; 32 | pe = pckindx[i] & 0xffff; 33 | if( !convey_push(ex, &col, pe) ) 34 | break; 35 | } 36 | 37 | while(convey_pull(conveyor, &col, NULL) == convey_OK ) 38 | lcounts[col]++; 39 | } 40 | ``` 41 | 42 | #### Why it is in bale? 43 | 44 | Histogram is the simplest application in bale, yet it is worthy of our attention because it represents a pattern of communication and action that is frequently used in parallel applications. The performance of this simple loop is key to the performance of many of the other apps in bale. 45 | 46 | #### From the Book? 47 | 48 | The shmem loop looks pretty good to us. Though, if aggregation is happening under the covers, the reader has no way of knowing that. 49 | 50 | See apps/histo_src/ for the all implementations. 51 | 52 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/alternates/histo_alternates.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file histo_alternates.h 17 | * \brief header file for the alternate models for histo 18 | */ 19 | 20 | double histo_exstack2_goto(int64_t *pckindx, int64_t T, int64_t *lcounts, int64_t buf_cnt); 21 | double histo_exstack2_cyclic(int64_t *pckindx, int64_t T, int64_t *lcounts, int64_t buf_cnt); 22 | double histo_exstack_function(int64_t *pckindx, int64_t T, int64_t *lcounts, int64_t buf_cnt); 23 | double histo_exstack2_function(int64_t *pckindx, int64_t T, int64_t *lcounts, int64_t buf_cnt); 24 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/alternates/histo_exstack2_cyclic.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file histo_exstack2_cyclic.upc 17 | * \brief The exstack2 implementation of histogram looping over single pushes and pops. 18 | */ 19 | #include "histo.h" 20 | 21 | /*! 22 | * \brief This routine implements the exstack2 variant of histogram where one 23 | * pushes and pops, pushes and pops, .... instead of 24 | * pushing until you can't, then popping until you can't. 25 | * \param *pckindx array of packed indices for the distributed version of the global array of counts. 26 | * \param l_num_ups the length of the pcindx array 27 | * \param *lcounts localized pointer to the count array. 28 | * \param buf_cnt the size of the exstack buffers in packages 29 | * \return average run time 30 | */ 31 | double histo_exstack2_cyclic(int64_t *pckindx, int64_t l_num_ups, int64_t *lcounts, int64_t buf_cnt) { 32 | int ret; 33 | double tm; 34 | int64_t i; 35 | int64_t pe, col, idx; 36 | minavgmaxD_t stat[1]; 37 | exstack2_t * ex2 = exstack2_init(buf_cnt, sizeof(int64_t)); 38 | if( ex2 == NULL ) return(-1.0); 39 | 40 | lgp_barrier(); 41 | tm = wall_seconds(); 42 | for( i=0; exstack2_proceed( ex2, (i==l_num_ups) ); ) { 43 | if( i < l_num_ups ) { 44 | col = pckindx[i] >> 16; 45 | pe = pckindx[i] & 0xffff; 46 | if( exstack2_push(ex2, &col, pe) ) 47 | i++; 48 | } 49 | if( exstack2_pop(ex2, &idx, NULL) ) 50 | lcounts[idx]++; 51 | } 52 | lgp_barrier(); 53 | tm = wall_seconds() - tm; 54 | 55 | lgp_min_avg_max_d( stat, tm, THREADS ); 56 | 57 | lgp_barrier(); 58 | exstack2_clear(ex2); 59 | free(ex2); 60 | return( stat->avg ); 61 | } 62 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/alternates/histo_exstack2_goto.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file histo_exstack2_goto.upc 17 | * \brief The exstack2 implementation of histogram with goto instead of loops. 18 | */ 19 | #include "histo.h" 20 | 21 | /*! 22 | * \brief This routine implements the goto variant of histogram using exstack2. 23 | * \param *pckindx array of packed indices for the distributed version of the global array of counts. 24 | * \param l_num_ups the length of the pcindx array 25 | * \param *lcounts localized pointer to the count array. 26 | * \param buf_cnt the size of the exstack buffers in packages 27 | * \return average run time 28 | * 29 | */ 30 | double histo_exstack2_goto(int64_t *pckindx, int64_t l_num_ups, int64_t *lcounts, int64_t buf_cnt) { 31 | int ret; 32 | double tm; 33 | int64_t pe, col, idx, *idxp; 34 | int64_t i; 35 | minavgmaxD_t stat[1]; 36 | exstack2_t * ex2 = exstack2_init(buf_cnt, sizeof(int64_t)); 37 | if( ex2 == NULL ) return(-1.0); 38 | 39 | void *loop_ptr = &&histo_push; 40 | lgp_barrier(); 41 | tm = wall_seconds(); 42 | for(i=0; i> 16; 44 | pe = pckindx[i] & 0xffff; 45 | histo_push: 46 | if( !exstack2_push(ex2, &col, pe) ) 47 | goto histo_pop; 48 | } 49 | loop_ptr = &&histo_pop; 50 | histo_pop: 51 | while(exstack2_pop(ex2, &idx, NULL)) 52 | lcounts[idx]++; 53 | 54 | if( exstack2_proceed( ex2, (i==l_num_ups) ) ) 55 | goto *loop_ptr; 56 | 57 | lgp_barrier(); 58 | tm = wall_seconds() - tm; 59 | 60 | lgp_min_avg_max_d( stat, tm, THREADS ); 61 | 62 | lgp_barrier(); 63 | exstack2_clear(ex2); 64 | free(ex2); 65 | return( stat->avg ); 66 | } 67 | 68 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/histo.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file histo.h 17 | * \brief header file for the histogram app. 18 | */ 19 | #ifndef HISTO_H 20 | #define HISTO_H 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | /*! 27 | \brief A structure to carry all the histogram arrays, counts to different implementations, 28 | and aids in error checking 29 | */ 30 | typedef struct histo_t { 31 | SHARED int64_t * counts; /*!< the shared array that holds the histogram counts */ 32 | int64_t * lcounts; /*!< the local pointer to the per thread parts of counts */ 33 | int64_t num_counts; /*!< the global size of the counts array */ 34 | int64_t lnum_counts; /*!< the local size of the counts array */ 35 | int64_t * index; /*!< the local index array */ 36 | int64_t * pckindx; /*!< the packed index with the divmod calculation already done */ 37 | int64_t l_num_ups; /*!< the local number of update to do */ 38 | } histo_t; 39 | 40 | double histo_agp(histo_t * data); /*!< The AGP implementation */ 41 | double histo_exstack(histo_t * data, int64_t buf_cnt); /*!< The EXSTACK implementation */ 42 | double histo_exstack2(histo_t * data, int64_t buf_cnt); /*!< The EXSTACK2 implementation */ 43 | double histo_conveyor(histo_t * data); /*!< The CONVEYOR implementation */ 44 | 45 | #include "alternates/histo_alternates.h" 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/histo_agp.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | /*! \file histo_agp.upc 16 | * \brief The intuitive implementation of histogram that uses global atomics. 17 | */ 18 | #include "histo.h" 19 | 20 | /*! 21 | * \brief This routine implements straight forward, single word atomic updates to implement histogram. 22 | * \param data the histo_t struct that carries all the parameters for the implementations 23 | * \return average run time 24 | */ 25 | double histo_agp(histo_t * data){ 26 | double tm; 27 | int64_t i; 28 | minavgmaxD_t stat[1]; 29 | 30 | lgp_barrier(); 31 | tm = wall_seconds(); 32 | 33 | for(i = 0; i < data->l_num_ups; i++) { 34 | #if __cray__ || _CRAYC 35 | #pragma pgas defer_sync 36 | #endif 37 | //_amo_aadd(&counts[index[i]], 1); 38 | //counts[index[i]] += 1; 39 | assert(data->index[i] < data->num_counts); 40 | lgp_atomic_add(data->counts, data->index[i], 1L); 41 | } 42 | 43 | lgp_barrier(); 44 | tm = wall_seconds() - tm; 45 | lgp_min_avg_max_d( stat, tm, THREADS); 46 | 47 | return( stat->avg ); 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/histo_conveyor.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2019-2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file histo_conveyor.upc 17 | * \brief A conveyor implementation of histogram. 18 | */ 19 | #include "histo.h" 20 | 21 | /*! 22 | * \brief This routine implements histogram using conveyors 23 | * \param data the histo_t struct that carries all the parameters for the implementations 24 | * \return average run time 25 | */ 26 | double histo_conveyor(histo_t * data){ 27 | int ret; 28 | int64_t i; 29 | double tm; 30 | int64_t pe, col; 31 | int64_t pop_col; 32 | 33 | minavgmaxD_t stat[1]; 34 | 35 | int status = EXIT_FAILURE; 36 | convey_t* conveyor = convey_new(SIZE_MAX, 0, NULL, convey_opt_SCATTER); 37 | if(!conveyor){printf("ERROR: histo_conveyor: convey_new failed!\n"); return(-1.0);} 38 | 39 | ret = convey_begin(conveyor, sizeof(int64_t), 0); 40 | if(ret < 0){printf("ERROR: histo_conveyor: begin failed!\n"); return(-1.0);} 41 | 42 | lgp_barrier(); 43 | tm = wall_seconds(); 44 | i = 0UL; 45 | while(convey_advance(conveyor, i == data->l_num_ups)) { 46 | for(; i< data->l_num_ups; i++){ 47 | col = data->pckindx[i] >> 20; 48 | pe = data->pckindx[i] & 0xfffff; 49 | assert(pe < THREADS); 50 | if( !convey_push(conveyor, &col, pe)) 51 | break; 52 | } 53 | while( convey_pull(conveyor, &pop_col, NULL) == convey_OK){ 54 | assert(pop_col < data->lnum_counts); 55 | data->lcounts[pop_col] += 1; 56 | } 57 | } 58 | 59 | lgp_barrier(); 60 | tm = wall_seconds() - tm; 61 | 62 | lgp_min_avg_max_d( stat, tm, THREADS ); 63 | 64 | lgp_barrier(); 65 | convey_free(conveyor); 66 | return( stat->avg ); 67 | } 68 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/histo_exstack.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | /*! \file histo_exstack.upc 16 | * \brief The exstack implementation of histogram. 17 | */ 18 | #include "histo.h" 19 | 20 | /*! 21 | * \brief This routine implements the exstack classic variant of histogram. 22 | * \param data the histo_t struct that carries all the parameters for the implementations 23 | * \param buf_cnt the number of packages in the exstack buffers 24 | * \return average run time 25 | * 26 | */ 27 | //double histo_exstack(int64_t *pckindx, int64_t T, int64_t *lcounts, int64_t buf_cnt) { 28 | double histo_exstack(histo_t * data, int64_t buf_cnt){ 29 | int64_t i; 30 | double tm; 31 | int64_t pe, col, *colp; 32 | minavgmaxD_t stat[1]; 33 | exstack_t * ex = exstack_init(buf_cnt, sizeof(int64_t)); 34 | if( ex == NULL) return(-1.0); 35 | 36 | lgp_barrier(); 37 | tm = wall_seconds(); 38 | i = 0UL; 39 | 40 | while( exstack_proceed(ex, (i==data->l_num_ups)) ){ 41 | int64_t popped = 0; 42 | for( ; i < data->l_num_ups; i++){ 43 | col = data->pckindx[i] >> 20; 44 | pe = data->pckindx[i] & 0xfffff; 45 | assert(pe < THREADS); 46 | if( !exstack_push(ex, &col, pe) ) 47 | break; 48 | } 49 | 50 | exstack_exchange(ex); 51 | 52 | while((colp = exstack_pull(ex, NULL))){ 53 | popped++; 54 | assert(*colp < data->lnum_counts); 55 | data->lcounts[*colp]++; 56 | } 57 | } 58 | 59 | lgp_barrier(); 60 | tm = wall_seconds() - tm; 61 | 62 | lgp_min_avg_max_d( stat, tm, THREADS ); 63 | 64 | exstack_clear(ex); 65 | free(ex); 66 | return( stat->avg ); 67 | } 68 | 69 | -------------------------------------------------------------------------------- /src/bale_classic/apps/histo_src/histo_exstack2.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | /*! \file histo_exstack2.upc 16 | * \brief The exstack2 implementation of histogram. 17 | */ 18 | #include "histo.h" 19 | 20 | /*! 21 | * \brief This routine implements the exstack2 variant of histogram. 22 | * \param data the histo_t struct that carries all the parameters for the implementations 23 | * \param buf_cnt the number of packages in the exstack2 buffers 24 | * \return average run time 25 | * 26 | */ 27 | //double histo_exstack2(int64_t *pckindx, int64_t T, int64_t *lcounts, int64_t buf_cnt) { 28 | double histo_exstack2(histo_t * data, int64_t buf_cnt) { 29 | int ret; 30 | double tm; 31 | int64_t pe, col, idx, *idxp; 32 | minavgmaxD_t stat[1]; 33 | exstack2_t * ex2 = exstack2_init(buf_cnt, sizeof(int64_t)); 34 | if( ex2 == NULL ) return(-1.0); 35 | 36 | lgp_barrier(); 37 | tm = wall_seconds(); 38 | int64_t i = 0; 39 | while(exstack2_proceed( ex2, i==data->l_num_ups )) { 40 | for( ; i < data->l_num_ups; i++){ 41 | col = data->pckindx[i] >> 20L; 42 | pe = data->pckindx[i] & 0xfffff; 43 | assert(pe < THREADS); 44 | if( !exstack2_push(ex2, &col, pe) ) 45 | break; 46 | } 47 | 48 | while((idxp = exstack2_pull(ex2, NULL))){ 49 | assert(*idxp < data->lnum_counts); 50 | data->lcounts[*idxp]++; 51 | } 52 | } 53 | 54 | lgp_barrier(); 55 | tm = wall_seconds() - tm; 56 | lgp_min_avg_max_d( stat, tm, THREADS ); 57 | 58 | lgp_barrier(); 59 | exstack2_clear(ex2); 60 | free(ex2); 61 | return( stat->avg ); 62 | } 63 | -------------------------------------------------------------------------------- /src/bale_classic/apps/ig_src/README.md: -------------------------------------------------------------------------------- 1 | # indexgather 2 | 3 | ## Description 4 | 5 | The indexgather app involves the asynchronous reading of random elements from a distributed array. 6 | 7 | In SHMEM, indexgather looks like this: 8 | 9 | ```c 10 | for(i = 0; i < N; i++) 11 | shmem_get(&target[i], &table[index[i]], sizeof(long), index[i] % NPES); 12 | ``` 13 | 14 | where table is a distributed array with M elements, index is a local array of indices mod M, 15 | and target is a local array where we record the results of the remote reads. Similar to [histogram](../histo_src/README.md), the intent of indexgather is that the PEs read asynchronously and without dependencies (allowing for aggregation). 16 | 17 | ## Discussion 18 | 19 | Indexgather is another rather simple application. Just like histogram, it is important because it represents a pattern of communication that is frequently used in parallel applications. In terms of difficulty to aggregate though, it is a step up from histogram. 20 | 21 | Below is indexgather written with exstack. Note that we need three distinct phases. 22 | - In phase 1, we send out requests for the remote read. 23 | - In phase 2, we process requests (pop them off in-buffers, 24 | look up the requested index in the table, and push them back onto out-buffers). 25 | -In phase 3, we receive our processed requests and record their values in our tgt array. 26 | 27 | ```c 28 | while( exstack_proceed(ex, (i==l_num_req)) ) { 29 | i0 = i; 30 | while(i < l_num_req) { 31 | l_indx = pckindx[i] >> 16; 32 | pe = pckindx[i] & 0xffff; 33 | if(!exstack_push(ex, &l_indx, pe)) 34 | break; 35 | i++; 36 | } 37 | 38 | exstack_exchange(ex); 39 | 40 | while(exstack_pop(ex, &idx , &fromth)) { 41 | idx = ltable[idx]; 42 | exstack_push(ex, &idx, fromth); 43 | } 44 | 45 | lgp_barrier(); 46 | 47 | exstack_exchange(ex); 48 | 49 | for(j=i0; j> 16; 58 | pe = pckindx[i] & 0xffff; 59 | if(!exstack_push(ex, &pkg, pe)) 60 | break; 61 | i++; 62 | } 63 | 64 | exstack_exchange(ex); 65 | 66 | while(exstack_pop(ex, &pkg , &fromth)) { 67 | pkg.idx = ltable[pkg.idx]; 68 | exstack_push(ex, &pkg, fromth); // don't need check for room 69 | } 70 | lgp_barrier(); 71 | exstack_exchange(ex); 72 | 73 | while(exstack_pop(ex, &pkg, NULL)){ 74 | tgt[pkg.i] = pkg.idx; 75 | } 76 | lgp_barrier(); 77 | } 78 | 79 | tm = wall_seconds() - tm; 80 | lgp_barrier(); 81 | lgp_min_avg_max_d( stat, tm, THREADS ); 82 | 83 | exstack_clear(ex); free(ex); 84 | return( stat->avg ); 85 | } 86 | 87 | -------------------------------------------------------------------------------- /src/bale_classic/apps/ig_src/ig.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file ig.h 17 | * \brief Demo program that computes an indexed gather of elements from a 18 | * shared array into local arrays. 19 | * The size of the source array should be large enough that the elements 20 | * need to be spread across the whole machine 21 | */ 22 | 23 | #ifndef IG_H 24 | #define IG_H 25 | #include 26 | #include 27 | 28 | double ig_agp(int64_t *tgt, int64_t *index, int64_t l_num_req, SHARED int64_t *table); 29 | double ig_exstack(int64_t *tgt, int64_t *pckindx, int64_t l_num_req, int64_t *ltable, int64_t buf_cnt); 30 | double ig_exstack2(int64_t *tgt, int64_t *pckindx, int64_t l_num_req, int64_t *ltable, int64_t buf_cnt); 31 | double ig_conveyor(int64_t *tgt, int64_t *pckindx, int64_t l_num_req, int64_t *ltable); 32 | 33 | #include "alternates/ig_alternates.h" 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/bale_classic/apps/ig_src/ig_agp.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file ig_agp.upc 17 | * \brief The intuitive implementation of indexgather that uses single word gets to shared addresses. 18 | */ 19 | #include "ig.h" 20 | 21 | /*! 22 | * \brief This routine implements the single word get version indexgather 23 | * \param *tgt array of target locations for the gathered values 24 | * \param *index array of indices into the global array of counts 25 | * \param l_num_req the length of the index array 26 | * \param *table shared pointer to the shared table array. 27 | * \return average run time 28 | * 29 | */ 30 | double ig_agp(int64_t *tgt, int64_t *index, int64_t l_num_req, SHARED int64_t *table) { 31 | int64_t i; 32 | double tm; 33 | minavgmaxD_t stat[1]; 34 | 35 | lgp_barrier(); 36 | tm = wall_seconds(); 37 | 38 | for(i = 0; i < l_num_req; i++){ 39 | #if __cray__ || _CRAYC 40 | #pragma pgas defer_sync 41 | #endif 42 | tgt[i] = lgp_get_int64(table, index[i]); 43 | } 44 | 45 | lgp_barrier(); 46 | tm = wall_seconds() - tm; 47 | 48 | lgp_min_avg_max_d( stat, tm, THREADS ); 49 | 50 | return( stat->avg ); 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/bale_classic/apps/ig_src/ig_conveyor.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2019-2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file ig_conveyor.upc 17 | * \brief A conveyor implementation of indexgather. 18 | */ 19 | #include "ig.h" 20 | 21 | /*! 22 | * \brief This routine implements the conveyor variant of indexgather. 23 | * \param *tgt array of target locations for the gathered values 24 | * \param *pckindx array of packed indices for the distributed version of the global array of counts. 25 | * \param l_num_req the length of the pcindx array 26 | * \param *ltable localized pointer to the count array. 27 | * \return average run time 28 | * 29 | */ 30 | double ig_conveyor(int64_t *tgt, int64_t *pckindx, int64_t l_num_req, int64_t *ltable) { 31 | double tm; 32 | int64_t pe, fromth, fromth2; 33 | int64_t i = 0, from; 34 | minavgmaxD_t stat[1]; 35 | bool more; 36 | 37 | typedef struct pkg_t { 38 | int64_t idx; 39 | int64_t val; 40 | } pkg_t; 41 | pkg_t pkg; 42 | pkg_t *ptr = calloc(1, sizeof(pkg_t)); 43 | 44 | convey_t* requests = convey_new(SIZE_MAX, 0, NULL, convey_opt_SCATTER); 45 | assert( requests != NULL ); 46 | convey_t* replies = convey_new(SIZE_MAX, 0, NULL, 0); 47 | assert( replies != NULL ); 48 | 49 | convey_begin(requests, sizeof(pkg_t), 0); 50 | convey_begin(replies, sizeof(pkg_t), 0); 51 | lgp_barrier(); 52 | 53 | tm = wall_seconds(); 54 | 55 | i = 0; 56 | while (more = convey_advance(requests, (i == l_num_req)), 57 | more | convey_advance(replies, !more)) { 58 | 59 | for (; i < l_num_req; i++) { 60 | pkg.idx = i; 61 | pkg.val = pckindx[i] >> 16; 62 | pe = pckindx[i] & 0xffff; 63 | if (! convey_push(requests, &pkg, pe)) 64 | break; 65 | } 66 | 67 | while (convey_pull(requests, ptr, &from) == convey_OK) { 68 | pkg.idx = ptr->idx; 69 | pkg.val = ltable[ptr->val]; 70 | if (! convey_push(replies, &pkg, from)) { 71 | convey_unpull(requests); 72 | break; 73 | } 74 | } 75 | 76 | while (convey_pull(replies, ptr, NULL) == convey_OK) 77 | tgt[ptr->idx] = ptr->val; 78 | } 79 | 80 | tm = wall_seconds() - tm; 81 | free(ptr); 82 | lgp_barrier(); 83 | 84 | lgp_min_avg_max_d( stat, tm, THREADS ); 85 | convey_free(requests); 86 | convey_free(replies); 87 | return( stat->avg ); 88 | } 89 | -------------------------------------------------------------------------------- /src/bale_classic/apps/ig_src/ig_exstack.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file ig_exstack.upc 17 | * \brief The classic exstack implementation of indexgather. 18 | */ 19 | #include "ig.h" 20 | 21 | /*! 22 | * \brief This routine implements the exstack classic variant of indexgather. 23 | * \param *tgt array of target locations for the gathered values 24 | * \param *pckindx array of packed indices for the distributed version of the global array of counts. 25 | * \param l_num_req the length of the pcindx array 26 | * \param *ltable localized pointer to the count array. 27 | * \param buf_cnt the exstack buffer size in packets 28 | * \return average run time 29 | * 30 | */ 31 | double ig_exstack(int64_t *tgt, int64_t *pckindx, int64_t l_num_req, int64_t *ltable, int64_t buf_cnt) { 32 | double tm; 33 | int imdone; 34 | int64_t ret; 35 | int64_t room; 36 | int64_t l_indx, idx, i, i0, j; 37 | int64_t pe, fromth; 38 | minavgmaxD_t stat[1]; 39 | 40 | exstack_t * ex = exstack_init(buf_cnt, sizeof(int64_t)); 41 | if( ex == NULL ) return(-1.0); 42 | 43 | lgp_barrier(); 44 | tm = wall_seconds(); 45 | i=0; 46 | while( exstack_proceed(ex, (i==l_num_req)) ) { 47 | i0 = i; 48 | while(i < l_num_req) { 49 | l_indx = pckindx[i] >> 16; 50 | pe = pckindx[i] & 0xffff; 51 | if(!exstack_push(ex, &l_indx, pe)) 52 | break; 53 | i++; 54 | } 55 | 56 | exstack_exchange(ex); 57 | 58 | while(exstack_pop(ex, &idx , &fromth)) { 59 | idx = ltable[idx]; 60 | exstack_push(ex, &idx, fromth); // don't need check for room 61 | } 62 | lgp_barrier(); 63 | exstack_exchange(ex); 64 | 65 | for(j=i0; javg ); 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/bale_classic/apps/permute_matrix_src/README.md: -------------------------------------------------------------------------------- 1 | # Permute Matrix 2 | 3 | ## Definition 4 | 5 | This application takes a distributed sparse matrix (see [spmat](../../spmat/README.md) library), a permutation for the rows and a permutation for the columns of the matrix. The goal is to apply the permutations and return the permuted matrix. (A note on the permutations: If row_perm is a permutation of the rows, we interpret rowperm[i] = j to mean that row i in the original matrix should go to row j in the permuted matrix.). The implementations of permute_matrix are contained in the spmat library. 6 | 7 | ## Discussion 8 | 9 | #### Parallel Implementation Considerations 10 | 11 | Given our interpretation of the permutation arrays, processors know where all of their local rows are destined in the permuted matrix. Also, as a consequence of the locality properties of our distributed sparse matrix data structure, the nonzeros in a row all go to the same PE (permuting the rows and columns of a matrix does not change the number of nonzeros in each row). 12 | 13 | This application is done in two phases. 14 | 15 | ##### Phase 1 16 | 17 | In phase 1, we shuffle the row data so that the nonzeros for each row land in the correct place in the permuted matrix data structure. This phase looks a little like the histogram pattern, for each local row, PEs look up the destination of the row and send/write its data to the correct PE. 18 | 19 | There are a couple of ways of going about sending the nonzeros for a row in phase 1. In the AGP code, we can send it in bulk if we know where it is destined on the remote side (which we can get by remotely reading the offset array). In an aggregated code, we could send (row, col) pairs, which would make the code a little easier, but sends extra data, or we could send a small header (row, row_count) and then the row_count nonzeros. 20 | 21 | ##### Phase 2 22 | 23 | In the second phase, we relabel the column indices on every nonzero in the permuted sparse matrix. This phase looks exactly like indexgather. PEs look at each column index they have locally and request/read its permuted index in the distributed colperm array. 24 | 25 | #### Why is it in bale? 26 | 27 | permute_matrix an interesting application because it presents an example where the payload for the sends (see phase 1) are not necessarily a fixed size. 28 | 29 | #### From the Book? 30 | 31 | The AGP version of permute_matrix (see spmat/spmat_agp.upc) is fairly concise, but still suffers from some pretty low level address arithmetic (that could be improved upon). The aggregated codes are pretty horrible. -------------------------------------------------------------------------------- /src/bale_classic/apps/permute_matrix_src/alternates/permute_matrix_alternates.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file permute_matrix_alternates.h 17 | * \brief header file for the alternate models for permute_matrix 18 | */ 19 | -------------------------------------------------------------------------------- /src/bale_classic/apps/randperm_src/randperm_alternates.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file randperm_alternates.h 17 | * \brief header file for the alternate models for randperm 18 | */ 19 | SHARED int64_t * rand_permp_agp_opt(int64_t N, int seed); /*!< optimized alternate implementation */ 20 | -------------------------------------------------------------------------------- /src/bale_classic/apps/sssp_src/alternates/sssp_alternates.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file sssp_alternates.h 17 | * \brief header file for the alternate models for sssp 18 | */ 19 | 20 | -------------------------------------------------------------------------------- /src/bale_classic/apps/sssp_src/sssp.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file sssp.h 17 | * \brief Implementation of Single Source Shortest Path algorithms. 18 | */ 19 | 20 | #ifndef sssp_INCLUDED 21 | #define sssp_INCLUDED 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | double sssp_bellman_exstack(d_array_t *tent, sparsemat_t * mat, int64_t buf_cnt, int64_t v0); /* implementation of Bellman-Ford using exstack */ 30 | double sssp_bellman_exstack2(d_array_t *tent, sparsemat_t * mat, int64_t buf_cnt, int64_t v0); /* implementation of Bellman-Ford using exstack2 */ 31 | double sssp_bellman_convey(d_array_t *tent, sparsemat_t * mat, int64_t v0); /* implementation of Bellman-Ford using conveyors AGP model */ 32 | double sssp_delta_exstack(d_array_t *tent, sparsemat_t * mat, int64_t buf_cnt, int64_t v0, double opt_delta); /* implementation of Delta-Stepping using exstack */ 33 | double sssp_delta_exstack2(d_array_t *tent, sparsemat_t * mat, int64_t buf_cnt, int64_t v0, double opt_delta); /* implementation of Delta-Stepping using exstack2 */ 34 | double sssp_delta_convey(d_array_t *tent, sparsemat_t * mat, int64_t v0, double opt_delta); /* implementation of Delta-Stepping using conveyors AGP model */ 35 | 36 | void dump_tent(char *str, d_array_t *tent); /*!< debugging routine to dump the tentative weights in a d_array */ 37 | 38 | /*! 39 | \brief the package struct for exstack, exstack2 and conveyors 40 | 41 | Note: We don't build the tree of paths from the vertices back along the shortest path to vertex 0. 42 | If that were required, we would have to send the tail of the edge be relaxed. 43 | This would not change any patterns, only increase the bandwidth demand. 44 | */ 45 | typedef struct sssp_pkg_t { 46 | int64_t lj; /*!< the local "head" of the edge */ 47 | double tw; /*!< new tentative weight */ 48 | } sssp_pkg_t ; 49 | 50 | // alternates go here 51 | double sssp_bellman_agp(d_array_t *tent, sparsemat_t * mat, int64_t v0); /*!< implementation of Bellman-Ford using the AGP model */ 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/bale_classic/apps/topo_src/alternates/toposort_alternates.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file toposort_alternates.h 17 | * \brief header file for the alternate models for toposort 18 | */ 19 | 20 | double toposort_matrix_oo(SHARED int64_t *tri_rperm, SHARED int64_t *tri_cperm, sparsemat_t *mat, sparsemat_t *tmat); 21 | double toposort_matrix_cooler(SHARED int64_t *tri_rperm, SHARED int64_t *tri_cperm, sparsemat_t *mat, sparsemat_t *tmat); 22 | double toposort_matrix_exstack_orig(SHARED int64_t *rperm, SHARED int64_t *cperm, sparsemat_t *mat, sparsemat_t *tmat); 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/bale_classic/apps/topo_src/toposort.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file toposort.h 17 | * \brief Demo application that does a toposort on a permuted upper triangular matrix 18 | */ 19 | #ifndef TOPOSORT_H 20 | #define TOPOSORT_H 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | double toposort_matrix_agp(SHARED int64_t *tri_rperm, SHARED int64_t *tri_cperm, sparsemat_t *mat, sparsemat_t *tmat); 28 | double toposort_matrix_exstack(SHARED int64_t *rperm, SHARED int64_t *cperm, sparsemat_t *mat, sparsemat_t *tmat, int64_t buf_cnt); 29 | double toposort_matrix_exstack2(SHARED int64_t *rperm, SHARED int64_t *cperm, sparsemat_t *mat, sparsemat_t *tmat, int64_t buf_cnt); 30 | double toposort_matrix_convey(SHARED int64_t *rperm, SHARED int64_t *cperm, sparsemat_t *mat, sparsemat_t *tmat); 31 | 32 | #include "alternates/toposort_alternates.h" 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/bale_classic/apps/transpose_matrix_src/README.md: -------------------------------------------------------------------------------- 1 | # transpose_matrix 2 | 3 | ## Definition 4 | 5 | In this application we transpose a distributed sparse matrix. See the [spmat](../../spmat/README.md) library for this data structure. The implementations of transpose_matrix are contained in the spmat library also. 6 | 7 | ## Discussion 8 | 9 | #### Parallel Implementation Considerations 10 | 11 | The algorithm we use to transpose a matrix has two phases. Both phases of the transpose function are well-suited for aggregation. 12 | 13 | ##### Phase 1 14 | 15 | PEs use a histogram pattern to calculate the number of nonzeros in each column of the matrix. This allows us to allocate the exact space needed to store the transpose matrix. This phase is not strictly necessary if memory constraints are not a concern. We include it (and other similar measures in other bale code) in an effort to demonstrate efficiency when it comes to resource utilization. 16 | 17 | ##### Phase 2 18 | 19 | We again use the histogram pattern, but this time we send the nonzeros of the transpose matrix to the correct PE. For example, if *A[i,j]* lives on PE k in the original matrix, PE *k* sends a *(i,j)* to PE *m* to create *AT[j,i]* (where *AT* is the transpose of *A*). 20 | 21 | #### Why is in bale? 22 | 23 | An interesting difference between the AGP and aggregated versions comes in the second phase. In the AGP version, PEs are placing nonzeros in AT directly into the distributed sparse matrix data structure via remote writes. To do that in parallel, PEs must be able to atomically reserve a spot in the "nonzero" array for their writes. Since we don't know in what order these nonzeros will arrive. the PEs are contending for these writes. In the aggregated versions, PEs are being sent nonzeros (via an aggregation library) and process them in serial. So there is no need for atomic operations. This phenomenon occurs in other bale apps (histogram for example) and is fairly common when going from AGP style paralell code to aggregated code. 24 | 25 | #### From the Book? 26 | 27 | Similar to permute_matrix, the AGP version of permute_matrix (see spmat/spmat_agp.upc) is fairly concise, but not beautiful. The aggregated versions are OK, but could be improved vastly (we think) with a more modern language. 28 | 29 | -------------------------------------------------------------------------------- /src/bale_classic/apps/transpose_matrix_src/alternates/transpose_matrix_alternates.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file transpose_matrix_alternates.h 17 | * \brief header file for the alternate models for transpose_matrix 18 | */ 19 | -------------------------------------------------------------------------------- /src/bale_classic/apps/triangle_src/README.md: -------------------------------------------------------------------------------- 1 | # Triangle counting 2 | 3 | ## Definition 4 | 5 | This uses matrix algebra approach to counting triangles in a graph. 6 | See the book, "Graph Algorithms in the Language of Linear Algebra", 7 | edited by Gilbert, and Kepner for more details on our approach to this problem. 8 | 9 | ## Discussion 10 | 11 | We have implemented two algorithms to count triangles in a graph. The 12 | first computes (L & L * U) and the second computes (L & U * L), where 13 | '&' means element-wise AND and '*' is ordinary matrix 14 | multiplication. 15 | 16 | #### Why is it in bale? 17 | 18 | Our implementation of triangle counting has several interesting properties. For one thing, the algorithm allows an owner of a row to "push" it's nonzeros to other rows that require that data and have them perform the computation, or to "pull" (or get with PGAS reads) the data itself and do the computation locally. This freedom is not found in other bale apps. The "push" model maps onto aggregation quite nicely while the "pull" model is much more natural using the AGP model. Further, the two different models, "push" and "pull", can have drastic consequences on communication volume depending on the algorithm used (either (L & L * U) or (L & U * L)). Finally, as others have shown in publications, this application is interesting because there are ways to reduce communication by permuting the rows and columns of the input matrix if one wanted to speed up the calculation. 19 | 20 | #### From the Book? 21 | 22 | The pull version of the AGP code is quite simple, but suffers from the same inscrutability that our other AGP matrix codes do. The push versions of the aggregated codes are also pretty simple, but could be improved with more modern language tools (like iterators for the sparse matrix data structure). -------------------------------------------------------------------------------- /src/bale_classic/apps/triangle_src/alternates/triangle_agp_oo.upc: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | /*! \file triangle_agp_oo.upc 16 | * \brief The intuitive implementation of triangle counting 17 | * that uses generic global references 18 | */ 19 | 20 | #include "triangle.h" 21 | 22 | /*! 23 | * \brief This routine implements another AGP variant of triangle counting 24 | * \param *count a place to return the counts from this thread 25 | * \param *sr a place to return the number of shared references 26 | * \param *mat the input sparse matrix 27 | * NB: This must be the tidy lower triangular matrix from the adjacency matrix 28 | * \return average run time 29 | */ 30 | double triangle_agp_oo(int64_t *count, int64_t *sr, sparsemat_t * mat) { 31 | int64_t cnt=0; 32 | int64_t numpulled=0; 33 | int64_t l_i, L_i, L_k, L_j; 34 | nxnz_t *nxnz_I = init_nxnz(mat); 35 | nxnz_t *nxnz_J = init_nxnz(mat); 36 | nxnz_t *nxnz_K = init_nxnz(mat); 37 | 38 | double t1 = wall_seconds(); 39 | for(l_i = 0; l_i < mat->lnumrows; l_i++){ 40 | L_i = l_i*THREADS + MYTHREAD; 41 | for(first_l_nxnz(nxnz_I, l_i); has_l_nxnz(nxnz_I, l_i); incr_l_nxnz(nxnz_I, l_i) ){ 42 | L_j = nxnz_I->col; 43 | 44 | numpulled += 2; 45 | for(first_S_nxnz(nxnz_J, L_j); has_S_nxnz(nxnz_J, L_j); incr_S_nxnz(nxnz_J, L_j) ){ 46 | L_k = nxnz_J->col; 47 | numpulled++; 48 | 49 | for(first_l_nxnz(nxnz_K, l_i); has_l_nxnz(nxnz_K, l_i); incr_l_nxnz(nxnz_K, l_i) ){ 50 | if( L_k == nxnz_K->col ){ 51 | cnt++; 52 | break; 53 | } 54 | if( L_k < nxnz_K->col ){ // the rest are all bigger too, cause mat is tidy 55 | break; 56 | } 57 | } 58 | } 59 | } 60 | } 61 | 62 | lgp_barrier(); 63 | minavgmaxD_t stat[1]; 64 | t1 = wall_seconds() - t1; 65 | lgp_min_avg_max_d( stat, t1, THREADS ); 66 | 67 | *sr = numpulled; 68 | *count = cnt; 69 | free(nxnz_I); 70 | free(nxnz_J); 71 | free(nxnz_K); 72 | return(stat->avg); 73 | } 74 | -------------------------------------------------------------------------------- /src/bale_classic/apps/triangle_src/triangle.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | 16 | /*! \file triangle.h 17 | * \brief Demo application that counts the number of triangles 18 | * in a graph. The graph is stored as a lower triangular sparse matrix. 19 | */ 20 | #ifndef TRIANGLE_H 21 | #define TRIANGLE_H 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | double triangle_agp(int64_t *count, int64_t *sr, sparsemat_t * L, sparsemat_t * U, int64_t alg); 30 | double triangle_exstack_push(int64_t *count, int64_t *sr, sparsemat_t *L, sparsemat_t * U, int64_t alg, int64_t buf_cnt); 31 | //double triangle_exstack_pull(int64_t *count, int64_t *sr, sparsemat_t *L, int64_t alg, int64_t buf_cnt); 32 | double triangle_exstack2_push(int64_t *count, int64_t *sr, sparsemat_t * L, sparsemat_t * U, int64_t alg, int64_t buf_cnt); 33 | double triangle_convey_push(int64_t *count, int64_t *sr, sparsemat_t * L, sparsemat_t * U, int64_t alg); 34 | //double triangle_convey_pull(int64_t *count, int64_t *sr, sparsemat_t *mat); 35 | 36 | // alternates go here 37 | double triangle_agp_opt1(int64_t *count, int64_t *sr, sparsemat_t * L, sparsemat_t * U, int64_t alg); 38 | double triangle_agp_opt2(int64_t *count, int64_t *sr, sparsemat_t * L, sparsemat_t * U, int64_t alg); 39 | double triangle_agp_oo(int64_t *count, int64_t *sr, sparsemat_t * L); 40 | double triangle_agp_iter(int64_t *count, int64_t *sr, sparsemat_t * L, sparsemat_t * U, int64_t alg); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/bale_classic/apps/union_find/README.md: -------------------------------------------------------------------------------- 1 | ## unionfind 2 | 3 | ### Definition 4 | 5 | This is a directory for a future application called unionfind. Here 6 | is a description of that application. It takes in a very sparse graph 7 | (represented as a symmetrix adjacency matrix) and finds the components 8 | of the graph. What counts as "finding the components" is up for 9 | discussion, but it must be "easy" to query any vertex in the graph and 10 | find out which component it lives in. After this initial computation, 11 | waves of new edges are added to the graph and the components must be 12 | updated. Whether these edges are added synchronously or asynchronously 13 | is also up for discussion. We imagine it would be easier to consider 14 | them coming in synchronous waves -- a batch of edges arrives, the 15 | components are updated before the next batch arrives. 16 | 17 | There is serial version of union-find in the "cousins" directory 18 | `other_serial/C`. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/bale_classic/bootstrap.sh: -------------------------------------------------------------------------------- 1 | cd libgetput 2 | autoreconf -fi 3 | cd ../exstack 4 | autoreconf -fi 5 | cd ../convey 6 | autoreconf -fi 7 | cd ../spmat 8 | autoreconf -fi 9 | cd ../std_options 10 | autoreconf -fi 11 | cd ../apps 12 | autoreconf -fi 13 | cd .. -------------------------------------------------------------------------------- /src/bale_classic/convey/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Emacs backup files 2 | *~ 3 | # Ignore this autoconf cache, which isn't distributed 4 | autom4te.cache/ 5 | # Ignore some other files created by autoconf. If the build process 6 | # starts to use $host_cpu, then we will need to distribute 7 | # config.guess and config.sub 8 | m4/libtool.m4 9 | m4/lt* 10 | aclocal.m4 11 | compile 12 | config.h.in 13 | config.guess 14 | config.sub 15 | configure 16 | depcomp 17 | install-sh 18 | ltmain.sh 19 | Makefile.in 20 | missing 21 | test-driver 22 | -------------------------------------------------------------------------------- /src/bale_classic/convey/LICENSE: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // This material may be reproduced by or for the U.S. Government 4 | // pursuant to the copyright license under the clauses at DFARS 5 | // 252.227-7013 and 252.227-7014. 6 | // 7 | // All rights reserved. 8 | // 9 | // Redistribution and use in source and binary forms, with or without 10 | // modification, are permitted provided that the following conditions are met: 11 | // * Redistributions of source code must retain the above copyright 12 | // notice, this list of conditions and the following disclaimer. 13 | // * Redistributions in binary form must reproduce the above copyright 14 | // notice, this list of conditions and the following disclaimer in the 15 | // documentation and/or other materials provided with the distribution. 16 | // * Neither the name of the copyright holder nor the 17 | // names of its contributors may be used to endorse or promote products 18 | // derived from this software without specific prior written permission. 19 | // 20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | // COPYRIGHT HOLDER NOR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | // STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | // OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /src/bale_classic/convey/alltoallv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #ifndef CONVEY_ALLTOALLV_H 11 | #define CONVEY_ALLTOALLV_H 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "common.h" 18 | #include "convey.h" 19 | #include "bolite.h" 20 | 21 | typedef struct checksum { 22 | uint64_t sent; 23 | uint64_t rcvd; 24 | } checksum_t; 25 | 26 | void conveyor_bug(convey_t* c, const char* call, int error); 27 | 28 | checksum_t basictest(convey_t* conveyor, brand_t* prng, double load, 29 | size_t size, int entropy, convey_t* tally, 30 | bool elastic, double reject, bool p2p_sums); 31 | 32 | checksum_t aligntest(convey_t* conveyor, brand_t* prng, double load, 33 | size_t size, bool elastic); 34 | 35 | uint64_t* global_table_init(size_t echo_size, size_t entries, brand_t* prng); 36 | void global_table_free(uint64_t* table); 37 | 38 | checksum_t indexgather(convey_t* request, convey_t* reply, size_t reply_size, 39 | brand_t* prng, double load, size_t entries, 40 | const uint64_t source[entries]); 41 | 42 | checksum_t histogram(convey_t* conveyor, size_t size, brand_t* prng, 43 | double load, size_t entries, uint64_t target[entries]); 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/bale_classic/convey/bench_simple.awk: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, Institute for Defense Analyses 2 | # 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | # 4 | # All rights reserved. 5 | # 6 | # This file is part of the conveyor package. For license information, 7 | # see the LICENSE file in the top level directory of the distribution. 8 | 9 | BEGIN { 10 | if (buf == 0) { 11 | print "set approximate buffer bytes with -vbuf=..." 12 | exit 13 | } 14 | if (ppn <= 0 || ppn % 2 == 1) { 15 | print "set procs/node to an even number with -v ppn=..." 16 | exit 17 | } 18 | if (max <= 0) { 19 | print "set maximum nodes with -v max=..." 20 | exit 21 | } 22 | 23 | bytes[0] = 8; bytes[1] = 16; bytes[2] = 32; bytes[3] = 128 24 | print "#!/bin/sh -e" 25 | print "export MPP_INIT_QUIET=1" 26 | nodes = 2; power = 2 27 | for (i = 0; nodes <= max; i++) { 28 | print "$NLAUNCH", ppn * nodes, "./alltoall? -- < 1000 * 1000 * 1000) { 36 | cap = cap / 2 37 | } 38 | for (k = 0; k < 2; k++) { 39 | option = (k == 0) ? "" : "-x " 40 | printf("-c%d -n3 %ssimple %d %d\n", cap, option, load, size) 41 | } 42 | } 43 | print "EOF" 44 | if (i % 2 == 1) { 45 | power *= 2 46 | nodes = power 47 | } else { 48 | if (power >= 32) nodes = 45 * power / 32 49 | else if (power >= 8) nodes = 11 * power / 8 50 | else nodes = 3 * power / 2 51 | } 52 | } 53 | print "exit 0" 54 | } 55 | -------------------------------------------------------------------------------- /src/bale_classic/convey/bench_tensor.awk: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, Institute for Defense Analyses 2 | # 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | # 4 | # All rights reserved. 5 | # 6 | # This file is part of the conveyor package. For license information, 7 | # see the LICENSE file in the top level directory of the distribution. 8 | 9 | BEGIN { 10 | if (ppn <= 0 || ppn % 2 == 1) { 11 | print "set procs/node to an even number with -v ppn=..." 12 | exit 13 | } 14 | if (max <= 0) { 15 | print "set maximum nodes with -v max=..." 16 | exit 17 | } 18 | 19 | bytes[0] = 8; bytes[1] = 16; bytes[2] = 32; bytes[3] = 128 20 | print "#!/bin/sh -e" 21 | print "export MPP_INIT_QUIET=1" 22 | nodes = 2; power = 2 23 | for (i = 0; nodes <= max; i++) { 24 | print "$NLAUNCH", ppn * nodes, "./alltoall? -- < ppn) { 37 | printf("-b2 -n3 -t%d tensor %d %d\n", ppn / 2, load, size); 38 | } 39 | if (nodes >= 1024) { 40 | printf("-b2 -n3 -t%d tensor %d %d\n", ppn, load, size); 41 | } 42 | } 43 | print "EOF" 44 | if (i % 2 == 1) { 45 | power *= 2 46 | nodes = power 47 | } else { 48 | if (power >= 32) nodes = 45 * power / 32 49 | else if (power >= 8) nodes = 11 * power / 8 50 | else nodes = 3 * power / 2 51 | } 52 | } 53 | print "exit 0" 54 | } 55 | -------------------------------------------------------------------------------- /src/bale_classic/convey/biconvey_impl.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #ifndef BICONVEY_IMPL_H 11 | #define BICONVEY_IMPL_H 12 | 13 | #include "biconvey.h" 14 | #include "convey_impl.h" 15 | 16 | typedef struct biconveyor_methods { 17 | int (*push)(biconvey_t* self, const void* query, int64_t to); 18 | int (*pull)(biconvey_t* self, void* reply); 19 | int (*advance)(biconvey_t* self, bool done); 20 | int (*begin)(biconvey_t* self, size_t query_bytes, size_t reply_bytes); 21 | int (*reset)(biconvey_t* self); 22 | int (*free)(biconvey_t* self); 23 | } biconvey_methods_t; 24 | 25 | struct biconveyor { 26 | const biconvey_methods_t* _class_; 27 | size_t query_bytes; 28 | size_t reply_bytes; 29 | void (*answer)(const void*, void*, void*); 30 | void* context; 31 | uint64_t suppress; 32 | }; 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/bale_classic/convey/common.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | // SUMMARY: Include configuration settings and enable actimer. 11 | 12 | #ifndef CONVEY_COMMON_H 13 | #define CONVEY_COMMON_H 14 | 15 | #if HAVE_CONFIG_H 16 | #include "config.h" 17 | #endif 18 | #if HAVE_STDATOMIC_H 19 | # include 20 | #endif 21 | 22 | #define MATRIX_REMOTE_HOP 1 23 | #if (MPP_USE_UPC || HAVE_SHMEM_PTR) && HAVE_STDATOMIC_H && \ 24 | HAVE__ATOMIC_UINT64_T && (ATOMIC_LLONG_LOCK_FREE >= 2) 25 | # define CONVEY_INTRANODE 1 26 | #endif 27 | 28 | #if MPP_USE_UPC 29 | # include "mpp2upc.h" 30 | #else 31 | # if HAVE_MPP_UTIL 32 | # include "mpp_utilV4.h" 33 | # elif MPP_RAW_MPI 34 | # include "mpp2mpi.h" 35 | # define MPP_USE_MPI 1 36 | # elif MPP_RAW_SHMEM 37 | # include "mpp2shmem.h" 38 | # define MPP_USE_SHMEM 1 39 | # else 40 | # include "mpp2nil.h" 41 | # define MPP_NO_MIMD 1 42 | # endif 43 | # define PARALLEL(TYPE,FIELD) TYPE FIELD 44 | # define PARALLEL_NULLIFY(OBJECT,FIELD) \ 45 | (OBJECT)->FIELD = NULL 46 | # define PARALLEL_ALLOC(OBJECT,FIELD,ALLOC,SIZE,TYPE) \ 47 | (OBJECT)->FIELD = (TYPE*) (ALLOC)->grab((ALLOC)->alc8r, \ 48 | (SIZE) * sizeof(TYPE), __FILE__, __LINE__) 49 | # define PARALLEL_DEALLOC(OBJECT,FIELD,ALLOC) \ 50 | (ALLOC)->free((ALLOC)->alc8r, (void*) (OBJECT)->FIELD) 51 | #endif 52 | 53 | #if ENABLE_PROFILING 54 | // this is redundant because mpp_utilV4.h includes it: 55 | # include "mpp_utilV4_profile.h" 56 | # define CONVEY_PROF_DECL(x) mpp_profile_t x 57 | # define CONVEY_PROF_START mpp_profile_start 58 | # define CONVEY_PROF_STOP mpp_profile_stop 59 | # define CONVEY_SEND_0 PROF_OP_USER(0) 60 | # define CONVEY_SEND_1 PROF_OP_USER_P2P 61 | # define CONVEY_SEND_2 PROF_OP_USER(16) 62 | #else 63 | # define CONVEY_PROF_DECL(x) 64 | # define CONVEY_PROF_START(sample) 65 | # define CONVEY_PROF_STOP(sample,opcode,other_pe,data) 66 | # define CONVEY_SEND_0 (0) 67 | # define CONVEY_SEND_1 (1) 68 | # define CONVEY_SEND_2 (2) 69 | #endif 70 | 71 | #if HAVE_MPP_UTIL 72 | # define ACTIMER_MODULE_NAME conveyors 73 | # define ACTIMER_SHARED 74 | # include "actimer.h" 75 | // Use detail level 3 for now, to tidy up mpp_util reporting 76 | # define ACT_START(timer) actimer_start(timer, 3) 77 | # define ACT_STOP(timer) actimer_stop(timer, 3, 1) 78 | #else 79 | # define ACT_START(timer) 80 | # define ACT_STOP(timer) 81 | #endif 82 | 83 | #if MPP_USE_MPI 84 | # if HAVE_MPP_UTIL 85 | # define mpp_comm_mpi(C) ((C).internal->mpi_comm) 86 | # else 87 | # define mpp_comm_mpi(C) C 88 | # endif 89 | #endif 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /src/bale_classic/convey/convey.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: convey 7 | Description: Mechanisms for all-to-all communication 8 | Version: @PACKAGE_VERSION@ 9 | Requires: @REQUIREMENTS@ 10 | Libs: -L${libdir} -lconvey 11 | Cflags: -I${includedir} 12 | -------------------------------------------------------------------------------- /src/bale_classic/convey/convey_alc8r.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | /** \file convey_alc8r.h 11 | * The auxiliary API for symmetric memory allocation. 12 | */ 13 | 14 | 15 | #ifndef CONVEY_ALC8R_H 16 | #define CONVEY_ALC8R_H 17 | 18 | #include 19 | #include 20 | 21 | /** Memory allocation objects that can be passed to conveyor constructors. 22 | * 23 | * Many conveyor constructors can be told how to allocate and deallocate 24 | * symmetric memory by passing them a pointer to a structure of this type. 25 | * The structure does not have to stay in scope; the constructor copies it. 26 | * If the pointer is \c NULL, the conveyor uses standard memory management 27 | * functions instead. 28 | */ 29 | typedef struct convey_alc8r { 30 | /// Passed as the first argument of the following functions. 31 | void* alc8r; 32 | /// The function for allocating memory: the \a tag and \a value will be the file name and line number of the call. 33 | void* (*grab)(void* alc8r, size_t size, const char* tag, uint64_t value); 34 | /// The function for releasing previously allocated memory. 35 | void (*free)(void* alc8r, void* ptr); 36 | } convey_alc8r_t; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/bale_classic/convey/examples/example.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "convey.h" 18 | #if HAVE_CONFIG_H 19 | # include "config.h" 20 | #endif 21 | #include "bolite.h" 22 | 23 | #if MPP_USE_UPC 24 | # define PROCS THREADS 25 | # define MY_PROC MYTHREAD 26 | # define example_start() 27 | # define example_end() 28 | #elif HAVE_MPP_UTIL 29 | # include "mpp_utilV4.h" 30 | # define example_start() argc = mpp_util_init(argc, argv, NULL) 31 | # define example_end() mpp_util_fin() 32 | #elif MPP_RAW_MPI 33 | # include 34 | extern long xmpi_n_procs, xmpi_my_proc; 35 | extern int xmpi_init(int argc, char* argv[]); 36 | # define PROCS xmpi_n_procs 37 | # define MY_PROC xmpi_my_proc 38 | # define example_start() xmpi_init(argc,argv) 39 | # define example_end() MPI_Finalize() 40 | #elif MPP_RAW_SHMEM 41 | # include 42 | # define PROCS shmem_n_pes() 43 | # define MY_PROC shmem_my_pe() 44 | # define example_start() shmem_init() 45 | # define example_end() shmem_finalize() 46 | #else 47 | # define PROCS (1L) 48 | # define MY_PROC (0L) 49 | # define example_start() 50 | # define example_end() 51 | #endif 52 | -------------------------------------------------------------------------------- /src/bale_classic/convey/examples/histo.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #include "example.h" 11 | 12 | int 13 | main(int argc, char* argv[]) 14 | { 15 | example_start(); 16 | 17 | // Parse command line and environment 18 | long bins = 10000; 19 | long load = 100000L; 20 | uint64_t seed = 1; 21 | if (argc > 1) 22 | bins = strtol(argv[1], NULL, 0); 23 | if (argc > 2) 24 | load = strtol(argv[2], NULL, 0); 25 | if (argc > 3) 26 | seed = strtoull(argv[3], NULL, 0); 27 | if (MY_PROC == 0) 28 | printf("command: %s %ld %ld %" PRIu64 "\n" 29 | "(parameters are: bins, load, seed)\n", 30 | argv[0], bins, load, seed); 31 | 32 | // Initialize local data 33 | brand_t _prng; 34 | brand_init(&_prng, (seed << 32) + MY_PROC); 35 | long* counts = calloc(bins, sizeof(long)); 36 | long area = PROCS * bins; 37 | 38 | // Build, use, and release the conveyor 39 | int status = EXIT_FAILURE; 40 | convey_t* conveyor = convey_new(SIZE_MAX, 0, NULL, 41 | convey_opt_SCATTER | convey_opt_ALERT); 42 | if (conveyor && counts) { 43 | convey_begin(conveyor, sizeof(long), alignof(long)); 44 | 45 | /*** START OF CONVEYOR LOOP ***/ 46 | long n = 0; 47 | long index = brand(&_prng) % area; 48 | while (convey_advance(conveyor, n == load)) { 49 | for (; n < load; n++) { 50 | long payload = index / PROCS; 51 | long pe = index % PROCS; 52 | if (! convey_push(conveyor, &payload, pe)) 53 | break; 54 | index = brand(&_prng) % area; 55 | } 56 | 57 | long* local; 58 | while ((local = convey_apull(conveyor, NULL)) != NULL) 59 | counts[*local] += 1; 60 | } 61 | /*** END OF CONVEYOR LOOP ***/ 62 | 63 | convey_reset(conveyor); 64 | status = EXIT_SUCCESS; 65 | 66 | // Produce a modest amount of output without further communication 67 | long peak = 0, where = 0;; 68 | for (long i = 0; i < bins; i++) 69 | if (counts[i] > peak) { 70 | peak = counts[i]; 71 | where = i; 72 | } 73 | double lambda = load * 1.0 / bins; 74 | double tail = 0.0; 75 | for (long i = 0; i < 10; i++) 76 | tail += exp(-lambda + (peak + i) * log(lambda) - lgamma(peak + i + 1)); 77 | if (tail * area < 4.0) { 78 | printf("RESULT: %ld[%ld] = %ld\n", (long)(MY_PROC), where, peak); 79 | fflush(stdout); 80 | } 81 | } 82 | convey_free(conveyor); 83 | free(counts); 84 | 85 | example_end(); 86 | exit(status); 87 | } 88 | -------------------------------------------------------------------------------- /src/bale_classic/convey/launchinfo: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cat < and Matteo Frigo. 15 | AC_DEFUN([AX_CHECK_COMPILER_FLAGS], 16 | [AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX 17 | AC_MSG_CHECKING([whether _AC_LANG compiler accepts $1]) 18 | dnl Some hackery here since AC_CACHE_VAL can't handle a non-literal varname: 19 | AS_LITERAL_IF([$1], 20 | [AC_CACHE_VAL(AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1), [ 21 | ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS 22 | _AC_LANG_PREFIX[]FLAGS="$1" 23 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], 24 | AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, 25 | AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) 26 | _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS])], 27 | [ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS 28 | _AC_LANG_PREFIX[]FLAGS="$1" 29 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], 30 | eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, 31 | eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) 32 | _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS]) 33 | eval ax_check_compiler_flags=$AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1) 34 | AC_MSG_RESULT($ax_check_compiler_flags) 35 | if test "x$ax_check_compiler_flags" = xyes; then 36 | m4_default([$2], :) 37 | else 38 | m4_default([$3], :) 39 | fi 40 | ])dnl AX_CHECK_COMPILER_FLAGS 41 | -------------------------------------------------------------------------------- /src/bale_classic/convey/m4/doxygen.am: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, Institute for Defense Analyses 2 | # 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | # 4 | # All rights reserved. 5 | # 6 | # This file is part of the conveyor package. For license information, 7 | # see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | clean-local: clean-local-doxygen 11 | .PHONY: clean-local-doxygen 12 | clean-local-doxygen: 13 | rm -rf html latex 14 | 15 | if HAVE_DOXYGEN 16 | 17 | doxypkg = $(PACKAGE_TARNAME)-doxy-$(PACKAGE_VERSION).tar.gz 18 | doc_DATA = $(doxypkg) 19 | 20 | $(doxypkg): doxygen.stamp 21 | tar chof - html | gzip -9 -c >$@ 22 | 23 | doxygen.stamp: Doxyfile narrative.h convey.h convey_alc8r.h convey_codec.h biconvey.h 24 | doxygen $< 25 | echo Timestamp > $@ 26 | 27 | install-data-hook: 28 | cd $(DESTDIR)$(docdir) && tar xf $(doxypkg) && rm -f $(doxypkg) 29 | 30 | uninstall-hook: 31 | cd $(DESTDIR)$(docdir) && rm -rf html 32 | 33 | CLEANFILES += doxywarn.txt doxygen.stamp $(doxypkg) 34 | 35 | endif 36 | -------------------------------------------------------------------------------- /src/bale_classic/convey/mpp2mpi.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | #include "mpp2mpi.h" 14 | 15 | long xmpi_my_proc = 0; 16 | long xmpi_n_procs = 0; 17 | 18 | int 19 | xmpi_init(int argc, char* argv[]) 20 | { 21 | MPI_Init(&argc, &argv); 22 | 23 | int size, rank; 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); 25 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 26 | xmpi_my_proc = rank; 27 | xmpi_n_procs = size; 28 | MPI_Barrier(MPI_COMM_WORLD); 29 | 30 | return argc; 31 | } 32 | 33 | void 34 | xmpi_exit(void) 35 | { 36 | MPI_Finalize(); 37 | exit(0); 38 | } 39 | 40 | void* 41 | xmpi_alloc_align(size_t align, size_t size) 42 | { 43 | void* ptr = NULL; 44 | posix_memalign(&ptr, align, size); 45 | return ptr; 46 | } 47 | 48 | #define REDUCER(Name,Op) \ 49 | long \ 50 | xmpi_ ## Name ## _long(long myval) \ 51 | { \ 52 | long result; \ 53 | MPI_Allreduce(&myval, &result, 1, MPI_LONG, Op, MPI_COMM_WORLD); \ 54 | return result; \ 55 | } 56 | 57 | REDUCER(accum, MPI_SUM) 58 | REDUCER(and, MPI_BAND) 59 | REDUCER(or, MPI_BOR) 60 | REDUCER(max, MPI_MAX) 61 | REDUCER(min, MPI_MIN) 62 | #undef REDUCER 63 | 64 | void 65 | xmpi_mfprint(FILE* stream, int prefix, const char* func, const char* format, ...) 66 | { 67 | va_list args; 68 | va_start(args, format); 69 | if (prefix) 70 | fprintf(stream, "PE %d: %s> ", (int) xmpi_my_proc, func); 71 | vfprintf(stream, format, args); 72 | va_end(args); 73 | fflush(stream); 74 | } 75 | 76 | void 77 | xmpi_broadcast64(void* data, size_t count, int root) 78 | { 79 | MPI_Bcast(data, count, MPI_UINT64_T, root, MPI_COMM_WORLD); 80 | } 81 | -------------------------------------------------------------------------------- /src/bale_classic/convey/mpp2mpi.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | // SUMMARY: Translate a limited set of mpp_utilV4 functions directly 11 | // into MPI. Requires that config.h has already been included (if 12 | // HAVE_CONFIG_H). 13 | 14 | #ifndef CONVEY_MPP2MPI_H 15 | #define CONVEY_MPP2MPI_H 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #define mpp_accum_long(L) xmpi_accum_long(L) 22 | #define mpp_and_long(L) xmpi_and_long(L) 23 | #define mpp_or_long(L) xmpi_or_long(L) 24 | #define mpp_max_long(L) xmpi_max_long(L) 25 | #define mpp_min_long(L) xmpi_min_long(L) 26 | 27 | #define mpp_alloc(S) malloc(S) 28 | #define mpp_free(P) free(P) 29 | #if _ISOC11_SOURCE 30 | # define mpp_alloc_align(S,A,B) aligned_alloc(A,S) 31 | #else 32 | # define mpp_alloc_align(S,A,B) xmpi_alloc_align(A,S) 33 | #endif 34 | 35 | #define mpp_exit(N) MPI_Abort(MPI_COMM_WORLD,N) 36 | 37 | #define mpp_broadcast64(P,N,R) xmpi_broadcast64(P,N,R) 38 | #define mpp_comm_is_equal(X,Y) (1) 39 | #define mpp_comm_is_world(X) (1) 40 | #define mpp_barrier(X) MPI_Barrier(MPI_COMM_WORLD) 41 | #define mpp_rel_to_abs_proc(C,I) (I) 42 | #define mpp_util_end() xmpi_exit() 43 | #define mpp_util_init(ARGC, ARGV, X) xmpi_init(ARGC,ARGV) 44 | #define mprint(WHO, LEVEL, ...) mfprint(stdout, WHO, 1, __VA_ARGS__) 45 | #define mprint_np(WHO, LEVEL, ...) mfprint(stdout, WHO, 0, __VA_ARGS__) 46 | #define mfprint(FILE, WHO, PREFIX, ...) \ 47 | do { if ((WHO) == MY_PROC) xmpi_mfprint(FILE, PREFIX, __func__, __VA_ARGS__); } while (0) 48 | 49 | typedef void mpp_alltoall_t; 50 | extern long xmpi_my_proc; 51 | extern long xmpi_n_procs; 52 | #define MY_PROC xmpi_my_proc 53 | #define PROCS xmpi_n_procs 54 | #define mpp_comm_t MPI_Comm 55 | #define MPP_COMM_CURR MPI_COMM_WORLD 56 | 57 | // Function prototypes 58 | int xmpi_init(int argc, char* argv[]); 59 | void xmpi_exit(void); 60 | void* xmpi_alloc_align(size_t align, size_t size); 61 | long xmpi_accum_long(long myval); 62 | long xmpi_and_long(long myval); 63 | long xmpi_or_long(long myval); 64 | long xmpi_max_long(long myval); 65 | long xmpi_min_long(long myval); 66 | void xmpi_mfprint(FILE* stream, int prefix, const char* func, const char* format, ...); 67 | void xmpi_broadcast64(void* data, size_t count, int root); 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/bale_classic/convey/mpp2nil.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | #include "mpp2nil.h" 14 | 15 | 16 | void* 17 | xnil_alloc_align(size_t align, size_t size) 18 | { 19 | void* ptr = NULL; 20 | posix_memalign(&ptr, align, size); 21 | return ptr; 22 | } 23 | 24 | void 25 | xnil_mfprint(FILE* stream, int prefix, const char* func, const char* format, ...) 26 | { 27 | va_list args; 28 | va_start(args, format); 29 | if (prefix) 30 | fprintf(stream, "%s> ", func); 31 | vfprintf(stream, format, args); 32 | va_end(args); 33 | fflush(stream); 34 | } 35 | 36 | -------------------------------------------------------------------------------- /src/bale_classic/convey/mpp2nil.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | // SUMMARY: Translate a limited set of mpp_utilV4 functions into trivial 11 | // operations for the single-process case. Requires that config.h has 12 | // been included (if HAVE_CONFIG_H). 13 | 14 | #ifndef CONVEY_MPP2NIL_H 15 | #define CONVEY_MPP2NIL_H 16 | 17 | #include 18 | #include 19 | 20 | #define mpp_accum_long(L) (L) 21 | #define mpp_and_long(L) (L) 22 | #define mpp_or_long(L) (L) 23 | #define mpp_max_long(L) (L) 24 | #define mpp_min_long(L) (L) 25 | 26 | #define mpp_alloc(S) malloc(S) 27 | #define mpp_free(P) free(P) 28 | 29 | #if _ISOC11_SOURCE 30 | # define mpp_alloc_align(S,A,B) aligned_alloc(A,S) 31 | #else 32 | # define mpp_alloc_align(S,A,B) xnil_alloc_align(A,S) 33 | #endif 34 | 35 | #define mpp_exit(N) exit(N) 36 | 37 | #define mpp_broadcast64(P,N,R) 38 | #define mpp_comm_is_equal(X,Y) (1) 39 | #define mpp_comm_is_world(X) (1) 40 | #define mpp_barrier(X) 41 | #define mpp_rel_to_abs_proc(C,I) (I) 42 | #define mpp_util_end() (exit(0)) 43 | #define mpp_util_init(ARGC, ARGV, X) (ARGC) 44 | #define mprint(WHO, LEVEL, ...) mfprint(stdout, WHO, 1, __VA_ARGS__) 45 | #define mprint_np(WHO, LEVEL, ...) mfprint(stdout, WHO, 0, __VA_ARGS__) 46 | #define mfprint(FILE, WHO, PREFIX, ...) \ 47 | xnil_mfprint(FILE, PREFIX, __func__, __VA_ARGS__) 48 | 49 | #define MY_PROC (0L) 50 | #define PROCS (1L) 51 | 52 | typedef void mpp_alltoall_t; 53 | typedef int mpp_comm_t; 54 | #define MPP_COMM_CURR (0) 55 | 56 | // Function prototypes 57 | 58 | void* xnil_alloc_align(size_t align, size_t size); 59 | void xnil_mfprint(FILE* stream, int prefix, const char* func, const char* format, ...); 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /src/bale_classic/convey/private.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #ifndef CONVEY_PRIVATE_H 11 | #define CONVEY_PRIVATE_H 12 | 13 | #include 14 | #include 15 | #include "common.h" 16 | #include "convey.h" 17 | #include "bolite.h" 18 | 19 | #ifndef MPP_USE_SHMEM 20 | # undef HAVE_SHMEMX_ALLTOALLV 21 | # undef HAVE_SHMEMX_PUTMEM_SIGNAL 22 | # undef HAVE_SHMEMX_TEAM_ALLTOALLV 23 | #else 24 | # if (HAVE_SHMEMX_ALLTOALLV || HAVE_SHMEMX_TEAM_ALLTOALLV || HAVE_SHMEMX_PUTMEM_SIGNAL) 25 | # include 26 | # endif 27 | # if !HAVE_SHMEM_FREE 28 | // Fall back to old-style names 29 | # define shmem_free shfree 30 | # define shmem_malloc shmalloc 31 | # define shmem_align shmemalign 32 | # endif 33 | #endif 34 | 35 | enum convey_imp_timers { 36 | simple_a2a = 0, 37 | matrix_pivot, 38 | tensor_early, 39 | tensor_late, 40 | }; 41 | 42 | 43 | /*** Memory Allocation ***/ 44 | 45 | extern const convey_alc8r_t convey_imp_alloc, convey_imp_alloc_align; 46 | 47 | // Compute the required alignment from the item size and the alignment 48 | // request (coming from convey_begin, and validated). If it exceeds the 49 | // alignment automatically guaranteed by the item size and the header size, 50 | // then allocate an aligned buffer of item_size bytes and store its address 51 | // in *handle. On entry, *handle must be NULL. The return value is false 52 | // iff allocation failed. 53 | bool 54 | convey_prep_aligned(void** handle, size_t item_size, size_t header_size, 55 | size_t align_wanted); 56 | 57 | 58 | /*** Processes Per Node ***/ 59 | 60 | size_t 61 | convey_procs_per_node(void); 62 | 63 | 64 | /*** Error Handling ***/ 65 | 66 | #define CONVEY_REJECT(QUIET,MSG) \ 67 | do { if (!(QUIET)) mprint(0, 0, "%s\n", (MSG)); return NULL; } while(0) 68 | 69 | int 70 | convey_imp_panic(convey_t* self, const char* where, int error); 71 | 72 | 73 | /*** Statistics ***/ 74 | 75 | enum convey_imp_statistic { 76 | convey_imp_N_STATS = 2 * convey_CUMULATIVE 77 | }; 78 | 79 | // Retrieve a statistic if it exists 80 | int64_t 81 | convey_imp_statistic(int64_t stats[], int which); 82 | 83 | // Add transient stats into persistent stats, then reset them 84 | void 85 | convey_imp_update_stats(int64_t stats[]); 86 | 87 | 88 | /*** Fallback Conveyor ***/ 89 | 90 | convey_t* 91 | convey_new_trivial(size_t monster_size, const convey_alc8r_t* alloc, 92 | uint64_t options); 93 | 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /src/bale_classic/convey/router.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | // The next three functions convert the PE number into a tag and the 11 | // destination rank for pushing into porters[0]. A matrix routing tag is 12 | // 16 bits (local). A tensor routing tag is either 24 bits (remote) + 13 | // 8 bits (local) or 8 bits + 8 bits. 14 | 15 | static ROUTER_HINT route_t 16 | vector_route(tensor_t* vector, int64_t pe) 17 | { 18 | return (route_t) { .tag = 0, .next = pe }; 19 | } 20 | 21 | static ROUTER_HINT route_t 22 | matrix_route(tensor_t* matrix, int64_t pe) 23 | { 24 | // dest is (x',y'), we are (x,y); hop to (x',y), tag is (y') 25 | uint32_t dest = pe; 26 | uint32_t upper = _divbymul32(dest, matrix->div_local); 27 | uint32_t lower = dest - matrix->n_local * upper; 28 | #if MATRIX_REMOTE_HOP == 0 29 | return (route_t) { .tag = lower, .next = upper }; 30 | #else 31 | return (route_t) { .tag = upper, .next = lower }; 32 | #endif 33 | } 34 | 35 | static ROUTER_HINT route_t 36 | tensor_route(tensor_t* tensor, int64_t pe) 37 | { 38 | // dest is (x',y',z'), we are (x,y,z) 39 | // hop to (x,y,y'), tag is (x',z') [24 bits, 8 bits] 40 | uint32_t dest = pe; 41 | uint32_t upper = _divbymul32(dest, tensor->div_square); 42 | uint32_t middle = _divbymul32(dest, tensor->div_local); 43 | uint32_t lower = dest - tensor->n_local * middle; 44 | middle -= tensor->n_local * upper; 45 | uint32_t tag = (upper << 8) | lower; 46 | return (route_t) { .tag = tag, .next = middle }; 47 | } 48 | -------------------------------------------------------------------------------- /src/bale_classic/convey/shmemptr.c: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | #include 16 | #include 17 | #if HAVE_CONFIG_H 18 | #include "config.h" 19 | #endif 20 | #include "mpp2shmem.h" 21 | 22 | int 23 | main(void) 24 | { 25 | shmem_init(); 26 | int* data = mpp_alloc(sizeof(int)); 27 | shmem_barrier_all(); 28 | 29 | if (shmem_my_pe() == 0) { 30 | void* friend = shmem_ptr(data, 1); 31 | *data = (friend == NULL); 32 | if (friend == NULL) 33 | fputs("ERROR: shmem_ptr() is not working. Either fix your environment to\n" 34 | "make shmem_ptr() work, or configure --without-shmem-ptr.\n", stderr); 35 | } 36 | 37 | shmem_barrier_all(); 38 | int status = shmem_int_g(data, 0); 39 | shmem_barrier_all(); 40 | shmem_finalize(); 41 | return status; 42 | } 43 | -------------------------------------------------------------------------------- /src/bale_classic/convey/simple.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #ifndef CONVEY_SIMPLE_H 11 | #define CONVEY_SIMPLE_H 12 | 13 | #include "convey_impl.h" 14 | #include "sorter.h" 15 | 16 | #ifdef MPP_USE_MPI 17 | typedef int a2a_off_t; 18 | #else 19 | typedef size_t a2a_off_t; 20 | #endif 21 | 22 | typedef struct simple { 23 | convey_t convey; 24 | sorter_t* sorter; // for distributing items into buffers 25 | bool nonempty; // is there anything in any buffer? 26 | bool overflow; // has any buffer filled up? 27 | bool quiet; // are communications finished? 28 | bool own_a2a; // did we create the a2a? 29 | bool dynamic; // deallocate buffers when dormant? 30 | bool scatter; // do we want a sorter? 31 | bool flip; // which sync array should we use next? 32 | int64_t pull_from; // draw from which recv buffer? 33 | size_t capacity; 34 | size_t buffer_bytes; 35 | size_t buffer_limit; // item_size * capacity 36 | area_t* send; // current state of send buffers 37 | area_t* recv; // current state of recv buffers 38 | PARALLEL(size_t*, send_sizes); 39 | PARALLEL(size_t*, recv_sizes); 40 | PARALLEL(char*, send_buffers); 41 | PARALLEL(char*, recv_buffers); 42 | // Symmetric (in SHMEM case) but not shared (in UPC case): 43 | a2a_off_t* offsets; // offsets (displacements) for alltoallv 44 | // Miscellaneous fields 45 | convey_alc8r_t alloc; 46 | const mpp_alltoall_t* a2a; 47 | int* perm; // for xshmem_alltoallv or upcx_alltoallv 48 | long* sync[2]; // for shmemx_alltoallv 49 | mpp_comm_t comm; // for error checking and MPI_Alltoallv 50 | int64_t stats[convey_imp_N_STATS]; 51 | } simple_t; 52 | 53 | 54 | /*** Functions that bisimple conveyors need to call ***/ 55 | 56 | int simple_alltoallv(simple_t* simple); 57 | 58 | void simple_reset_send_buffers(simple_t* simple); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /src/bale_classic/convey/sorter.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020, Institute for Defense Analyses 2 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | // 4 | // All rights reserved. 5 | // 6 | // This file is part of the conveyor package. For license information, 7 | // see the LICENSE file in the top level directory of the distribution. 8 | 9 | 10 | #ifndef CONVEY_SORTER_H 11 | #define CONVEY_SORTER_H 12 | 13 | #include 14 | #include 15 | #include "convey_alc8r.h" 16 | 17 | 18 | // A sorter is an object that can efficiently distribute a stream of 19 | // fixed-sized items into an array of buffers. ... 20 | 21 | typedef struct sorter sorter_t; 22 | 23 | typedef struct area { 24 | char* next; // place to write or read next item 25 | char* limit; // upper limit of the area 26 | } area_t; 27 | 28 | 29 | sorter_t* sorter_new(int n, area_t areas[n], size_t item_bytes, size_t capacity, 30 | const convey_alc8r_t* alloc, bool dynamic); 31 | 32 | // Returns false if something goes wrong (a memory allocation error). 33 | bool sorter_setup(sorter_t* self); 34 | 35 | // Returns false if one of the outgoing buffers is full. The item is 36 | // always successfully pushed, provided that the caller responds to a 37 | // return value of 'false' by emptying the full buffer(s). 38 | bool sorter_push(sorter_t* self, const void* item, int dest); 39 | 40 | // Returns 0 if there is nothing to flush, -1 if an outgoing buffer 41 | // filled up, and 1 otherwise. 42 | int sorter_flush(sorter_t* self); 43 | 44 | void sorter_reset(sorter_t* self); 45 | void sorter_free(sorter_t* self); 46 | 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/bale_classic/convey/test_compress: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | # Copyright (c) 2020, Institute for Defense Analyses 4 | # 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 5 | # 6 | # All rights reserved. 7 | # 8 | # This file is part of the conveyor package. For license information, 9 | # see the LICENSE file in the top level directory of the distribution. 10 | 11 | export MPP_INIT_QUIET=1 12 | $LAUNCH ./alltoall? -- <; 14 | close(CONFIG); 15 | my $enabled = grep /ENABLE_NONBLOCKING 1/, @config; 16 | my $old_enabled = $enabled; 17 | 18 | my @lines = <>; 19 | my @blines = grep /^1/, @lines; 20 | my @nblines = grep /^0/, @lines; 21 | die "incomplete data for tuning\n" if $#blines != $#nblines; 22 | die "insufficient data for tuning\n" if $#blines < 10; 23 | 24 | my @bdata = map { (split)[2] } @blines; 25 | my @nbdata = map { (split)[2] } @nblines; 26 | 27 | if (! $enabled) { 28 | for (my $i = 0; $i < @bdata; $i++) { 29 | $bdata[$i] = ($bdata[$i] + $nbdata[$i]) / 2; 30 | } 31 | } 32 | my ($peak, $value) = &findPeak(\@bdata); 33 | if ($enabled) { 34 | my ($npeak, $nvalue) = &findPeak(\@nbdata); 35 | if ($nvalue >= 0.95 * $value) { 36 | $peak = $npeak; 37 | $value = $nvalue; 38 | @blines = @nblines; 39 | } 40 | else { 41 | $enabled = 0; 42 | } 43 | } 44 | 45 | my $bufsiz = (split ' ',$blines[$peak])[1]; 46 | my $old_bufsiz = (join '', grep /CONVEY_BUFFER_SIZE [0-9]+\s*$/, @config); 47 | $old_bufsiz =~ s/^.*CONVEY_BUFFER_SIZE ([0-9]+)\s$/\1/; 48 | 49 | printf "Estimated bandwidth is %.1f MB/sec/PE\n", ($value); 50 | print "Setting ENABLE_NONBLOCKING to $enabled (was $old_enabled)\n"; 51 | print "Setting CONVEY_BUFFER_SIZE to $bufsiz (was $old_bufsiz)\n"; 52 | print "Rerun 'make' to build these values into the library.\n"; 53 | 54 | my $config = join '', @config; 55 | $config =~ s/ENABLE_NONBLOCKING [0-1]/ENABLE_NONBLOCKING $enabled/gm; 56 | $config =~ s/CONVEY_BUFFER_SIZE [0-9]+\s*$/CONVEY_BUFFER_SIZE $bufsiz/gm; 57 | open(CONFIG, ">config.h") || die "cannot open config.h for writing\n"; 58 | print CONFIG $config; 59 | close(CONFIG); 60 | 61 | exit; 62 | 63 | 64 | sub findPeak { 65 | my ($array) = @_; 66 | my $peak = 0; 67 | my $value = 0; 68 | for (my $i = 1; $i < @$array - 1; $i++) { 69 | my $avg = ($$array[$i-1] + $$array[$i] + $$array[$i+1]) / 3; 70 | if ($avg > $value) { 71 | $value = $avg; 72 | $peak = $i; 73 | } 74 | } 75 | return ($peak, $value); 76 | } 77 | -------------------------------------------------------------------------------- /src/bale_classic/docker/cupc/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM ubuntu:18.04 3 | 4 | MAINTAINER bale 5 | 6 | # Build-arg: NPROCS for parallel make 7 | ARG NPROCS=2 8 | 9 | ENV PREFIX=/usr/local 10 | RUN mkdir -p "${PREFIX}" 11 | RUN mkdir -p "${PREFIX}/src" 12 | 13 | 14 | # This will prevent questions from being asked during the install 15 | ENV DEBIAN_FRONTEND noninteractive 16 | 17 | # Get certs so HTTPS requests (e.g., git clone) works 18 | # do we need to do this? 19 | RUN apt-get -y update \ 20 | && apt-get install -y --no-install-recommends \ 21 | ca-certificates \ 22 | && apt-get clean 23 | 24 | # Install various useful/prereq packages 25 | RUN apt-get -y update \ 26 | && apt-get install -y --no-install-recommends \ 27 | autoconf \ 28 | automake \ 29 | cmake \ 30 | g++ \ 31 | gcc \ 32 | gfortran \ 33 | git \ 34 | libnuma-dev \ 35 | libtool \ 36 | m4 \ 37 | make \ 38 | openssh-client \ 39 | patch \ 40 | pkg-config \ 41 | python \ 42 | wget \ 43 | zlib1g-dev \ 44 | && apt-get clean 45 | 46 | #ENV CUPC_URL=https://www.gccupc.org/gupc-5201-1/30-gupc-5201-x8664-ubuntu-1204/file 47 | # Where to get url for wget? Try git 48 | 49 | ENV LLVM_GIT=git://github.com/Intrepid/llvm-upc.git 50 | ENV CUPC_GIT=git://github.com/Intrepid/clang-upc.git 51 | #ENV CUPC_ARCHIVE=${PREFIX}/src/cupc.tar.gz 52 | ENV CUPC_SRC_DIR=${PREFIX}/src 53 | ENV CUPC_BLD_DIR=${PREFIX}/src/cupc/tools/bld 54 | ENV CUPC_INSTALL_DIR=${PREFIX} 55 | RUN mkdir -p ${CUPC_INSTALL_DIR} 56 | RUN mkdir -p ${CUPC_SRC_DIR} 57 | #RUN wget --quiet ${CUPC_URL} --output-document=${CUPC_ARCHIVE} 58 | WORKDIR ${CUPC_SRC_DIR} 59 | RUN git clone ${LLVM_GIT} cupc >/dev/null 2>&1 60 | 61 | WORKDIR ${CUPC_SRC_DIR}/cupc/tools 62 | RUN git clone ${CUPC_GIT} clang >/dev/null 2>&1 63 | RUN mkdir -p bld 64 | WORKDIR bld 65 | RUN cmake -DLLVM_TARGETS_TO_BUILD="X86" ../.. 66 | #WORKDIR ${CUPC_BLD_DIR} 67 | #RUN ./configure --prefix=${CUPC_INSTALL_DIR} --enable-languages=c,c++ 68 | RUN make -j ${NPROCS} 69 | #RUN make VERBOSE=1 70 | 71 | RUN make install 72 | RUN rm -rf ${CUPC_ARCHIVE} 73 | 74 | ENV PATH=${PREFIX}/bin:$PATH 75 | RUN adduser --disabled-password --gecos "" bale_user 76 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /src/bale_classic/docker/gupc/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM ubuntu:16.04 3 | 4 | MAINTAINER bale 5 | 6 | # Build-arg: NPROCS for parallel make 7 | ARG NPROCS=8 8 | 9 | ENV PREFIX=/usr/local 10 | RUN mkdir -p "${PREFIX}" 11 | RUN mkdir -p "${PREFIX}/src" 12 | 13 | # This will prevent questions from being asked during the install 14 | ENV DEBIAN_FRONTEND noninteractive 15 | 16 | # Get certs so HTTPS requests (e.g., git clone) works 17 | # do we need to do this? 18 | RUN apt-get -y update \ 19 | && apt-get install -y --no-install-recommends \ 20 | ca-certificates \ 21 | && apt-get clean 22 | 23 | # Install various useful/prereq packages 24 | RUN apt-get -y update \ 25 | && apt-get install -y --no-install-recommends \ 26 | autoconf \ 27 | automake \ 28 | flex \ 29 | gcc \ 30 | g++ \ 31 | gfortran \ 32 | gcc-multilib \ 33 | git \ 34 | libgmp3-dev \ 35 | libmpc-dev \ 36 | libmpfr-dev \ 37 | libnuma-dev \ 38 | libtool \ 39 | m4 \ 40 | make \ 41 | openssh-client \ 42 | patch \ 43 | pkg-config \ 44 | wget \ 45 | zlib1g-dev \ 46 | && apt-get clean 47 | #ENV GUPC_URL=https://www.gccupc.org/gupc-5201-1/32-gupc-5-2-0-1-source-release/file 48 | ENV GUPC_URL=https://github.com/Intrepid/GUPC/releases/download/upc-5.2.0.1/upc-5.2.0.1.src.tar.bz2 49 | ENV GUPC_ARCHIVE=${PREFIX}/src/gupc.tar.bz2 50 | ENV GUPC_SRC_DIR=${PREFIX}/src/gupc/ 51 | ENV GUPC_BUILD_DIR=${PREFIX}/build/gupc/ 52 | ENV GUPC_INSTALL_DIR=${PREFIX} 53 | RUN mkdir -p ${GUPC_SRC_DIR} 54 | RUN mkdir -p ${GUPC_BUILD_DIR} 55 | RUN mkdir -p ${GUPC_INSTALL_DIR} 56 | RUN wget --quiet ${GUPC_URL} --output-document=${GUPC_ARCHIVE} 57 | RUN tar -jxf ${GUPC_ARCHIVE} -C ${GUPC_SRC_DIR} --strip-components=1 58 | WORKDIR ${GUPC_BUILD_DIR} 59 | RUN ${GUPC_SRC_DIR}/configure --prefix=${GUPC_INSTALL_DIR} --enable-languages=c \ 60 | --with-mpfr-include=/usr/include --with-mpfr-lib=/usr/lib/x86_64-linux-gnu \ 61 | --with-gmp-include=/usr/include --with-gmp-lib=/usr/lib/x86_64-linux-gnu \ 62 | --with-mpc-include=/usr/include --with-mpc-lib=/usr/lib/x86_64-linux-gnu\ 63 | --disable-multilib --disable-bootstrap --with-upc-runtime='SMP' 64 | RUN make -j ${NPROCS} 65 | RUN make install 66 | RUN rm -rf ${GUPC_ARCHIVE} 67 | 68 | ENV PATH=${PREFIX}/bin:$PATH 69 | RUN adduser --disabled-password --gecos "" bale_user 70 | RUN echo "hello" 71 | 72 | RUN apt-get -y update \ 73 | && apt-get install -y --no-install-recommends \ 74 | vim less curl python3 python3-setuptools python3-pip 75 | 76 | RUN ln -s /usr/bin/python3 /usr/bin/python && \ 77 | ln -s /usr/bin/pip3 /usr/bin/pip 78 | 79 | RUN pip install pytest 80 | 81 | CMD ["/bin/bash"] 82 | -------------------------------------------------------------------------------- /src/bale_classic/docker/gupc/Dockerfile_bale: -------------------------------------------------------------------------------- 1 | FROM gupc_with_pytest:latest 2 | 3 | # copy bale from host into container (the host should be sitting at the top level of bale_private) 4 | COPY . /opt/bale_private 5 | 6 | WORKDIR /opt/bale_private/src/bale_classic 7 | 8 | RUN ./bootstrap.sh 9 | ENV UPC gupc 10 | ENV PLATFORM gupc 11 | RUN ./make_bale -u 12 | 13 | RUN echo "---- done with install ----" 14 | 15 | WORKDIR /opt/bale_private/src/bale_classic/apps 16 | 17 | RUN pytest -s -P=/opt/bale_private/src/bale_classic/build_gupc/bin --node_range=1,3,1 18 | 19 | -------------------------------------------------------------------------------- /src/bale_classic/docker/gupc/Dockerfile_github_action: -------------------------------------------------------------------------------- 1 | # get a gupc image in public Dockerhub 2 | FROM npmolino/bale_public_gupc_with_pytest 3 | 4 | COPY . /opt/bale_private 5 | 6 | WORKDIR /opt/bale_private/src/bale_classic 7 | 8 | RUN ./bootstrap.sh 9 | ENV UPC gupc 10 | ENV PLATFORM gupc 11 | RUN ./make_bale -u 12 | 13 | RUN echo "---- done with install ----" 14 | 15 | # run as non-root user 16 | # bale_user exists from Dockerfile in the bale_public_gupc image at this point 17 | #RUN adduser --disabled-password --gecos "" bale_user 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/bale_classic/docker/oshmem/Dockerfile_bale: -------------------------------------------------------------------------------- 1 | FROM oshmem4.0.3_with_pytest:latest 2 | 3 | COPY . /opt/bale_private 4 | 5 | WORKDIR /opt/bale_private/src/bale_classic 6 | 7 | RUN ./bootstrap.sh 8 | ENV CC oshcc 9 | ENV PLATFORM oshmem 10 | RUN ./make_bale -s 11 | 12 | RUN echo "---- done with install ----" 13 | 14 | # run as non-root user 15 | RUN adduser --disabled-password --gecos "" bale_user 16 | RUN export NUM_CORES=$(grep cores /proc/cpuinfo | awk '{print $4}' | head -n 1) && \ 17 | echo NUM_CORES is $NUM_CORES && \ 18 | su bale_user - -c "oshrun -n $NUM_CORES build_${PLATFORM}/bin/histo -M 1 -c 1" 19 | 20 | # here we actaully run it as root 21 | RUN echo "--- run as root ---" 22 | ENV OMPI_ALLOW_RUN_AS_ROOT 1 23 | ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM 1 24 | RUN NUM_CORES=$(grep cores /proc/cpuinfo | awk '{print $4}' | head -n 1) && \ 25 | echo NUM_CORES is $NUM_CORES && \ 26 | oshrun -n 2 build_${PLATFORM}/bin/histo -M 1 -c 2 27 | 28 | 29 | RUN pytest -s -p no:cacheprovider --node_range=1,5,2 apps/ 30 | 31 | -------------------------------------------------------------------------------- /src/bale_classic/docker/oshmem/Dockerfile_github_action: -------------------------------------------------------------------------------- 1 | # get a oshmem image in public Dockerhub 2 | FROM npmolino/bale_public_oshmem4.0.3_with_pytest:latest 3 | 4 | COPY . /opt/bale_private 5 | 6 | WORKDIR /opt/bale_private/src/bale_classic 7 | 8 | RUN ./bootstrap.sh 9 | ENV CC oshcc 10 | ENV PLATFORM oshmem 11 | RUN ./make_bale -s 12 | 13 | RUN echo "---- done with install ----" 14 | 15 | # run as non-root user 16 | RUN adduser --disabled-password --gecos "" bale_user 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/bale_classic/docker/sos/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | FROM ubuntu:18.04 3 | 4 | MAINTAINER bale 5 | 6 | # Build-arg: NPROCS for parallel make 7 | ARG NPROCS=8 8 | 9 | ENV PREFIX=/usr/local 10 | RUN mkdir -p "${PREFIX}" 11 | RUN mkdir -p "${PREFIX}/src" 12 | 13 | # This will prevent questions from being asked during the install 14 | ENV DEBIAN_FRONTEND noninteractive 15 | 16 | # Get certs so HTTPS requests (e.g., git clone) works 17 | # do we need to do this? 18 | RUN apt-get -y update \ 19 | && apt-get install -y --no-install-recommends \ 20 | ca-certificates \ 21 | && apt-get clean 22 | 23 | # Install various useful/prereq packages 24 | RUN apt-get -y update \ 25 | && apt-get install -y --no-install-recommends \ 26 | autoconf \ 27 | automake \ 28 | flex \ 29 | gcc \ 30 | g++ \ 31 | gfortran \ 32 | gcc-multilib \ 33 | git \ 34 | libnuma-dev \ 35 | libtool \ 36 | m4 \ 37 | make \ 38 | openssh-client \ 39 | patch \ 40 | pkg-config \ 41 | wget 42 | 43 | ENV OFI_GIT=https://github.com/ofiwg/libfabric 44 | #RUN mkdir -p ${PREFIX}/OFI 45 | RUN git clone ${OFI_GIT} ${PREFIX}/OFI >/dev/null 2>&1 46 | WORKDIR ${PREFIX}/OFI 47 | RUN ./autogen.sh 48 | RUN ./configure 49 | RUN make -j${NPROCS} 50 | RUN make install 51 | 52 | WORKDIR ${PREFIX} 53 | ENV HYDRA_URL=http://www.mpich.org/static/downloads/3.3.2/hydra-3.3.2.tar.gz 54 | ENV HYDRA_ARCHIVE=${PREFIX}/hydra-3.3.2.tar.gz 55 | ENV HYDRA_VERSION=hydra-3.3.2 56 | RUN wget --quiet ${HYDRA_URL} 57 | RUN tar -zxf ${HYDRA_ARCHIVE} 58 | WORKDIR ${PREFIX}/hydra-3.3.2 59 | #RUN ./autogen.sh 60 | RUN ./configure 61 | RUN make -j${NPROCS} 62 | RUN make install 63 | RUN rm -f $HYDRA_ARCHIVE} 64 | 65 | WORKDIR ${PREFIX} 66 | ENV SOS_GIT=https://github.com/Sandia-OpenSHMEM/SOS 67 | RUN git clone ${SOS_GIT} ${PREFIX}/SOS >/dev/null 2>&1 68 | WORKDIR ${PREFIX}/SOS 69 | RUN ./autogen.sh 70 | RUN ./configure --with-ofi=${PREFIX} --disable-fortran --enable-pmi-simple --disable-error-checking --enable-memcpy 71 | RUN make -j${NPROCS} 72 | RUN make install 73 | 74 | WORKDIR ${PREFIX} 75 | RUN export PATH=${PREFIX}/bin:$PATH 76 | RUN export LD_LIBRARY_PATH=${PREFIX}/lib:$LD_LIBRARY_PATH 77 | RUN adduser --disabled-password --gecos "" bale_user 78 | CMD ["/bin/bash"] 79 | -------------------------------------------------------------------------------- /src/bale_classic/docker/sos/README: -------------------------------------------------------------------------------- 1 | docker build -t sos sos 2 | 3 | docker run --rm -it -v /Users/conn/Bale/bale_private:/opt/bale_private sos sh -c "cd /opt/bale_private && ./bootstrap.sh && export CC=oshcc && export PLATFORM=sos && export LD_LIBRARY_PATH=/usr/local/lib && ./install.sh -s && bash" 4 | 5 | 6 | Modify path /Users/conn/Bale/bale_private:/opt/bale_private as needed. 7 | 8 | -------------------------------------------------------------------------------- /src/bale_classic/exstack/Makefile.am: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | 16 | common_sources = exstack.upc 17 | pkconfigdir=@libdir@/pkgconfig 18 | 19 | include_HEADERS=exstack.h 20 | lib_LIBRARIES = libexstack.a 21 | 22 | pkconfig_DATA = @PACKAGE_NAME@.pc 23 | 24 | libexstack_a_CPPFLAGS= 25 | 26 | # handle UPC library 27 | if BUILD_UPC 28 | libexstack_a_SOURCES = $(common_sources:.c=__upc.upc) 29 | libexstack_a_SOURCES += exstack2.upc 30 | endif 31 | 32 | if BUILD_SHMEM 33 | libexstack_a_SOURCES = $(common_sources:.upc=__c.c) 34 | libexstack_a_SOURCES += exstack2_shmem.c 35 | libexstack_a_CFLAGS = -DUSE_SHMEM 36 | endif 37 | 38 | libexstack_a_CPPFLAGS += @CPPFLAGS@ @libdeps_CFLAGS@ 39 | 40 | # Automatic linking as needed to create C copies of UPC files. # 41 | %__c.c: %.upc 42 | @echo "INFO: Linking C copy of UPC file: $@" 43 | @rm -f $@ 44 | @test -d $(dir $@) || mkdir -p $(dir $@) 45 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 46 | 47 | 48 | # Automatic linking as needed to create UPC copies of C files. # 49 | %__upc.upc: %.c 50 | @echo "INFO: Linking UPC copy of C file: $@" 51 | @rm -f $@ 52 | @test -d $(dir $@) || mkdir -p $(dir $@) 53 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/bale_classic/exstack/README.md: -------------------------------------------------------------------------------- 1 | # exstack 2 | 3 | This library contains both exstack and exstack2. 4 | 5 | **exstack** 6 | 7 | exstack was originally written in 2011 and was our first attempt at an aggregation library. Exstack is synchronous and resembles Bulk Synchronous Programming. The main functions in its API (push and pop) 8 | remain in its descendants (exstack2 and [conveyors](../convey/README.md)). In a typical exstack loop there are 9 | three phases. 10 | 11 | 1. First, each PE pushes items onto its local out-buffers. 12 | 13 | 2. Once a PE sees that one of its out-buffers has become 14 | full, that PE goes to the "exchange" phase where it waits for all 15 | other PEs to join it. Once all PEs are in the exchange phase, all 16 | out-buffers are sent to their destination where they land in 17 | in-buffers. 18 | 19 | 3. PEs then enter into the pop phase where they pop items off 20 | of their in-buffers and do whatever computation is required. 21 | 22 | See [histogram](../apps/histo_src/README.md) for a simple example. While exstack is naive compared to conveyors, it still acheives very good performance on most of the bale apps. 23 | 24 | **exstack2** 25 | 26 | exstack2 was our attempt to improve both the performance and ease-of-use of exstack. In fact, bale grew out of our attempt to write exstack2. exstack2's main difference from exstack is the fact that exstack2 is asynchronous. PEs do not proceed in lock-step and there is no "exchange phase". The buffer sends happen automatically and individually as soon as a buffer is full. 27 | 28 | Both exstack and exstack2 require logic for what we call the "endgame". This refers to the fact that PEs must all continue to participate in any exstack or exstack2 loop until all PEs are "done". This is because, even if a PE is done pushing items to other PEs, it needs to stay ready to process other PEs pushes to it. The endgame of aggregation libraries (especially asynchronous versions) is an interesting topic in itself. 29 | 30 | -------------------------------------------------------------------------------- /src/bale_classic/exstack/configure.ac: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | 16 | AC_PREREQ([2.60]) #Require Minimum Autotools version 2.60 17 | 18 | AC_INIT([exstack], [1.0.0], [bale@super.org]) 19 | AM_INIT_AUTOMAKE([1.10 foreign no-define tar-ustar]) 20 | ##LT_INIT ## enable libtool 21 | 22 | AC_CONFIG_HEADERS([config.h]) 23 | AC_CONFIG_MACRO_DIRS([../]) 24 | AC_ARG_WITH([upc], 25 | AS_HELP_STRING([--with-upc], [build a UPC version]), 26 | [], [with_upc=yes]) 27 | AC_ARG_WITH([shmem], 28 | AS_HELP_STRING([--with-shmem], [build a SHMEM version]), 29 | [], [with_shmem=no]) 30 | 31 | 32 | AC_PROG_RANLIB 33 | AC_PROG_CC 34 | AC_PROG_CC_C99 35 | 36 | mpp_use_shmem=no 37 | if test "x$with_shmem" = xyes; then 38 | with_upc=no 39 | mpp_use_shmem=yes 40 | fi 41 | 42 | if test "x$with_upc" = xyes; then 43 | AM_PROG_UPC 44 | else 45 | # must define this ugly conditional to keep automake happy 46 | AM_CONDITIONAL(am__fastdepUPC, [test]) 47 | fi 48 | 49 | AX_CHECK_COMPILE_FLAG([-std=c11], [ 50 | AS_VAR_APPEND([CFLAGS]," -std=c11") 51 | ], 52 | [ 53 | AX_CHECK_COMPILE_FLAG([-hstd=c11],[ AS_VAR_APPEND([CFLAGS]," -hstd=c11")], 54 | [echo "C compiler cannot compile C11 code" 55 | exit -1]) 56 | ]) 57 | 58 | PKG_CHECK_MODULES([libdeps], 59 | [ 60 | libgetput 61 | ] 62 | ) 63 | 64 | AM_CONDITIONAL(BUILD_UPC, [test "x$with_upc" = xyes]) 65 | AM_CONDITIONAL(BUILD_SHMEM, [test "x$mpp_use_shmem" = xyes]) 66 | 67 | AS_IF([test "x$mpp_use_shmem" = xyes], 68 | [AC_SEARCH_LIBS([shmemx_alltoallv], [], 69 | [AC_DEFINE([HAVE_SHMEMX_ALLTOALLV], [1], [Define if shmem_alltoallv exists.])]) 70 | AC_SEARCH_LIBS([shmemx_team_alltoallv], [], 71 | [AC_DEFINE([HAVE_SHMEMX_TEAM_ALLTOALLV], [1], [Define if shmem_team_alltoallv exists.])]) 72 | AC_SEARCH_LIBS([shmemx_putmem_signal], [], 73 | [AC_DEFINE([HAVE_SHMEMX_PUTMEM_SIGNAL], [1], [Define if shmem_putmem_signal exists.])]) 74 | AC_SEARCH_LIBS([shmem_free], [], 75 | [AC_DEFINE([HAVE_SHMEM_FREE], [1], [Define if shmem_free exists.])]) 76 | ]) 77 | 78 | AC_CONFIG_FILES([ 79 | exstack.pc:exstack.pc.in 80 | Makefile 81 | ]) 82 | 83 | AC_OUTPUT 84 | 85 | -------------------------------------------------------------------------------- /src/bale_classic/exstack/exstack.pc.in: -------------------------------------------------------------------------------- 1 | Name: @PACKAGE_NAME@ 2 | Version: @PACKAGE_VERSION@ 3 | Description: exstack library 4 | Requires: 5 | 6 | prefix=@prefix@ 7 | libdir=${prefix}/lib 8 | includedir=${prefix}/include 9 | 10 | Libs: -L${libdir} -l@PACKAGE_NAME@ 11 | Cflags: -I${includedir} 12 | 13 | -------------------------------------------------------------------------------- /src/bale_classic/libgetput/Makefile.am: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | 16 | common_sources = libgetput.upc 17 | pkconfigdir=@libdir@/pkgconfig 18 | noinst_HEADERS=knuth_rng_double_2019.h 19 | include_HEADERS=libgetput.h 20 | #lib_LIBRARIES = 21 | lib_LIBRARIES = liblibgetput.a 22 | 23 | pkconfig_DATA = @PACKAGE_NAME@.pc 24 | 25 | if BUILD_UPC 26 | liblibgetput_a_SOURCES = $(common_sources:.c=__upc.upc) 27 | endif 28 | 29 | if BUILD_SHMEM 30 | #liblibgetput_a_LINK = $(LINK) 31 | liblibgetput_a_SOURCES = $(common_sources:.upc=__c.c) 32 | liblibgetput_a_CFLAGS = -DUSE_SHMEM -D_XOPEN_SOURCE=700 33 | endif 34 | 35 | # Automatic linking as needed to create C copies of UPC files. # 36 | %__c.c: %.upc 37 | @echo "INFO: Linking C copy of UPC file: $@" 38 | @rm -f $@ 39 | @test -d $(dir $@) || mkdir -p $(dir $@) 40 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/bale_classic/libgetput/README.md: -------------------------------------------------------------------------------- 1 | # libgetput (A basic parallel library) 2 | 3 | This library is meant to provide a very simple parallel programming layer. Namely it provides basic remote gets and puts, atomics, and a few collective functions (like barriers and reductions). Our history programming with UPC programming with this small set of operations led to a simple parallel programming model that could achieve good performance on machines of the past (Cray T3E for instance), but that suffered from poor performance on today's machines. For years, this made us nostalgic for computers that allowed for this style of programming without the performance hit. However, as our experience with bale has deepened, we now believe that parallel programmers need more than this. 4 | 5 | One feature of libgetput is that it acts as a wrapper for some of the simple functionality that is common between UPC and SHMEM. It allows most of bale to be easily compiled against UPC or SHMEM. 6 | 7 | As we mentioned above, as its name implies, libgetput provides basic remote get and put functions: 8 | lgp_get_int64, lgp_put_int64 and lgp_getmem and lgp_putmem (which are 9 | similar to SHMEM functions for single word gets and puts or more 10 | general gets and puts of memory). One key distinction is that the 11 | indexing in libgetput is UPC style indexing. That is we consider the 12 | distributed array to be indexed in round-robin fashion: the first 13 | element has affinity to PE 0, the next element has affinity to PE 1 14 | and so on. 15 | 16 | libgetput also supplies a variety of atomic functions, both fetching and non-fetching. Finally, libgetput provides some fundamental collectives: value-based reductions and barriers. -------------------------------------------------------------------------------- /src/bale_classic/libgetput/configure.ac: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | 16 | AC_PREREQ([2.60]) #Require Minimum Autoconf version 2.60 17 | 18 | AC_INIT([libgetput], [1.0.0], [bale@super.org]) 19 | 20 | AM_INIT_AUTOMAKE([1.10 foreign no-define tar-ustar]) 21 | ##LT_INIT ## enable libtool 22 | 23 | AC_CONFIG_HEADERS([config.h]) 24 | AC_CONFIG_MACRO_DIRS([../]) 25 | 26 | AC_ARG_WITH([upc], 27 | AS_HELP_STRING([--with-upc], [build a UPC version]), 28 | [], [with_upc=yes]) 29 | AC_ARG_WITH([shmem], 30 | AS_HELP_STRING([--with-shmem], [build a SHMEM version]), 31 | [], [with_shmem=no]) 32 | 33 | 34 | AC_PROG_RANLIB 35 | AC_PROG_CC 36 | AC_PROG_CC_C99 37 | 38 | mpp_use_shmem=no 39 | if test "x$with_shmem" = xyes; then 40 | with_upc=no 41 | mpp_use_shmem=yes 42 | fi 43 | 44 | if test "x$with_upc" = xyes; then 45 | AM_PROG_UPC 46 | #AC_CHECK_HEADERS([upc_atomic.h]) 47 | else 48 | # must define this ugly conditional to keep automake happy 49 | AM_CONDITIONAL(am__fastdepUPC, [test]) 50 | fi 51 | 52 | 53 | AX_CHECK_COMPILE_FLAG([-std=c11], [ 54 | AS_VAR_APPEND([CFLAGS]," -std=c11") 55 | ], 56 | [ 57 | AX_CHECK_COMPILE_FLAG([-hstd=c11],[ AS_VAR_APPEND([CFLAGS]," -hstd=c11")], 58 | [echo "C compiler cannot compile C11 code" 59 | exit -1]) 60 | ]) 61 | 62 | AS_IF([test "x$mpp_use_shmem" = xyes], 63 | [AC_SEARCH_LIBS([shmemx_alltoallv], [], 64 | [AC_DEFINE([HAVE_SHMEMX_ALLTOALLV], [1], [Define if shmem_alltoallv exists.])]) 65 | AC_SEARCH_LIBS([shmemx_team_alltoallv], [], 66 | [AC_DEFINE([HAVE_SHMEMX_TEAM_ALLTOALLV], [1], [Define if shmem_team_alltoallv exists.])]) 67 | AC_SEARCH_LIBS([shmemx_putmem_signal], [], 68 | [AC_DEFINE([HAVE_SHMEMX_PUTMEM_SIGNAL], [1], [Define if shmem_putmem_signal exists.])]) 69 | AC_SEARCH_LIBS([shmem_free], [], 70 | [AC_DEFINE([HAVE_SHMEM_FREE], [1], [Define if shmem_free exists.])]) 71 | ]) 72 | 73 | AM_CONDITIONAL(BUILD_UPC, [test "x$with_upc" = xyes]) 74 | AM_CONDITIONAL(BUILD_SHMEM, [test "x$mpp_use_shmem" = xyes]) 75 | 76 | AC_CONFIG_FILES([ 77 | libgetput.pc:libgetput.pc.in 78 | Makefile 79 | ]) 80 | 81 | AC_OUTPUT 82 | 83 | -------------------------------------------------------------------------------- /src/bale_classic/libgetput/libgetput.pc.in: -------------------------------------------------------------------------------- 1 | Name: @PACKAGE_NAME@ 2 | Version: @PACKAGE_VERSION@ 3 | Description: bale utils library 4 | Requires: 5 | 6 | prefix=@prefix@ 7 | libdir=${prefix}/lib 8 | includedir=${prefix}/include 9 | 10 | Libs: -L${libdir} -l@PACKAGE_NAME@ 11 | Cflags: -I${includedir} 12 | 13 | -------------------------------------------------------------------------------- /src/bale_classic/spmat/Makefile.am: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # All rights reserved. 8 | # 9 | # This file is a part of Bale. For license information see the 10 | # LICENSE file in the top level directory of the distribution. 11 | # 12 | # *****************************************************************/ 13 | 14 | common_sources = spmat_agp.upc 15 | common_sources += spmat_exstack.upc 16 | common_sources += spmat_exstack2.upc 17 | common_sources += spmat_conveyor.upc 18 | common_sources += spmat_io.upc 19 | common_sources += spmat_utils.upc 20 | common_sources += geometric.upc 21 | pkconfigdir=@libdir@/pkgconfig 22 | 23 | include_HEADERS=spmat.h spmat_enums.h 24 | 25 | lib_LIBRARIES = libspmat.a 26 | 27 | pkconfig_DATA = @PACKAGE_NAME@.pc 28 | libspmat_a_CPPFLAGS = -g -O2 29 | #libspmat_a_CPPFLAGS = -g -O0 30 | 31 | if BUILD_UPC 32 | libspmat_a_SOURCES = $(common_sources:.c=__upc.upc) 33 | endif 34 | 35 | if BUILD_SHMEM 36 | #libspmat_a_LINK = $(LINK) 37 | libspmat_a_SOURCES = $(common_sources:.upc=__c.c) 38 | libspmat_a_CPPFLAGS += -DUSE_SHMEM 39 | endif 40 | 41 | libspmat_a_CPPFLAGS += @CPPFLAGS@ @libdeps_CFLAGS@ 42 | 43 | # Automatic linking as needed to create C copies of UPC files. # 44 | %__c.c: %.upc 45 | @echo "INFO: Linking C copy of UPC file: $@" 46 | @rm -f $@ 47 | @test -d $(dir $@) || mkdir -p $(dir $@) 48 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 49 | 50 | 51 | # Automatic linking as needed to create UPC copies of C files. # 52 | %__upc.upc: %.c 53 | @echo "INFO: Linking UPC copy of C file: $@" 54 | @rm -f $@ 55 | @test -d $(dir $@) || mkdir -p $(dir $@) 56 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 57 | 58 | 59 | -------------------------------------------------------------------------------- /src/bale_classic/spmat/configure.ac: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2019-2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # All rights reserved. 8 | # 9 | # This file is a part of Bale. For license information see the 10 | # LICENSE file in the top level directory of the distribution. 11 | # 12 | # *****************************************************************/ 13 | 14 | AC_PREREQ([2.60]) #Require Minimum Autotools version 2.60 15 | 16 | AC_INIT([spmat], [2.1.0], [bale@super.org]) 17 | AM_INIT_AUTOMAKE([1.10 foreign no-define tar-ustar]) 18 | ##LT_INIT ## enable libtool 19 | 20 | AC_CONFIG_HEADERS([config.h]) 21 | AC_CONFIG_MACRO_DIRS([../]) 22 | 23 | AC_ARG_WITH([upc], 24 | AS_HELP_STRING([--with-upc], [build a UPC version]), 25 | [], [with_upc=yes]) 26 | AC_ARG_WITH([shmem], 27 | AS_HELP_STRING([--with-shmem], [build a SHMEM version]), 28 | [], [with_shmem=no]) 29 | 30 | AC_PROG_RANLIB 31 | AC_PROG_CC 32 | AC_PROG_CC_C99 33 | 34 | mpp_use_shmem=no 35 | if test "x$with_shmem" = xyes; then 36 | with_upc=no 37 | mpp_use_shmem=yes 38 | fi 39 | 40 | if test "x$with_upc" = xyes; then 41 | AM_PROG_UPC 42 | else 43 | # must define this ugly conditional to keep automake happy 44 | AM_CONDITIONAL(am__fastdepUPC, [test]) 45 | fi 46 | 47 | AS_IF([test "x$mpp_use_shmem" = xyes], 48 | [AC_SEARCH_LIBS([shmemx_alltoallv], [], 49 | [AC_DEFINE([HAVE_SHMEMX_ALLTOALLV], [1], [Define if shmem_alltoallv exists.])]) 50 | AC_SEARCH_LIBS([shmemx_team_alltoallv], [], 51 | [AC_DEFINE([HAVE_SHMEMX_TEAM_ALLTOALLV], [1], [Define if shmem_team_alltoallv exists.])]) 52 | AC_SEARCH_LIBS([shmemx_putmem_signal], [], 53 | [AC_DEFINE([HAVE_SHMEMX_PUTMEM_SIGNAL], [1], [Define if shmem_putmem_signal exists.])]) 54 | AC_SEARCH_LIBS([shmem_free], [], 55 | [AC_DEFINE([HAVE_SHMEM_FREE], [1], [Define if shmem_free exists.])]) 56 | ]) 57 | 58 | AM_CONDITIONAL(BUILD_UPC, [test "x$with_upc" = xyes]) 59 | AM_CONDITIONAL(BUILD_SHMEM, [test "x$mpp_use_shmem" = xyes]) 60 | 61 | PKG_CHECK_MODULES([libdeps], 62 | [ 63 | exstack 64 | convey 65 | libgetput 66 | ] 67 | ) 68 | 69 | AC_CONFIG_FILES([ 70 | spmat.pc:spmat.pc.in 71 | Makefile 72 | ]) 73 | 74 | AC_OUTPUT 75 | 76 | -------------------------------------------------------------------------------- /src/bale_classic/spmat/spmat.pc.in: -------------------------------------------------------------------------------- 1 | Name: @PACKAGE_NAME@ 2 | Version: @PACKAGE_VERSION@ 3 | Description: bale sparse matrix library 4 | Requires: 5 | 6 | prefix=@prefix@ 7 | libdir=${prefix}/lib 8 | includedir=${prefix}/include 9 | 10 | Libs: -L${libdir} -l@PACKAGE_NAME@ 11 | Cflags: -I${includedir} 12 | -------------------------------------------------------------------------------- /src/bale_classic/spmat/spmat_enums.h: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | #ifndef SPMAT_ENUMS_H 16 | #define SPMAT_ENUMS_H 17 | 18 | typedef enum graph_model {FLAT, GEOMETRIC, KRONECKER} graph_model; 19 | typedef enum edge_type {DIRECTED, UNDIRECTED, DIRECTED_WEIGHTED, UNDIRECTED_WEIGHTED} edge_type; 20 | typedef enum self_loops {NOLOOPS, LOOPS} self_loops; 21 | #endif 22 | -------------------------------------------------------------------------------- /src/bale_classic/std_options/Makefile.am: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | 16 | pkconfigdir=@libdir@/pkgconfig 17 | 18 | include_HEADERS=std_options.h 19 | lib_LIBRARIES=libstd_options.a 20 | 21 | pkconfig_DATA = @PACKAGE_NAME@.pc 22 | libstd_options_a_CPPFLAGS=@libdeps_CFLAGS@ 23 | 24 | common_sources=std_options.upc 25 | 26 | if BUILD_UPC 27 | libstd_options_a_SOURCES = $(common_sources:.c=__upc.upc) 28 | endif 29 | 30 | if BUILD_SHMEM 31 | #libstd_options_a_LINK = $(LINK) 32 | libstd_options_a_SOURCES = $(common_sources:.upc=__c.c) 33 | libstd_options_a_CFLAGS = -DUSE_SHMEM 34 | endif 35 | 36 | # Automatic linking as needed to create C copies of UPC files. # 37 | %__c.c: %.upc 38 | @echo "INFO: Linking C copy of UPC file: $@" 39 | @rm -f $@ 40 | @test -d $(dir $@) || mkdir -p $(dir $@) 41 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 42 | 43 | # Automatic linking as needed to create UPC copies of C files. # 44 | %__upc.upc: %.c 45 | @echo "INFO: Linking UPC copy of C file: $@" 46 | @rm -f $@ 47 | @test -d $(dir $@) || mkdir -p $(dir $@) 48 | @if test "$(top_srcdir)" = "$(top_builddir)"; then ln -s $(notdir $<) $@ || cp -f $< $@; else ln -s $< $@ || cp -f $< $@; fi 49 | 50 | -------------------------------------------------------------------------------- /src/bale_classic/std_options/README.md: -------------------------------------------------------------------------------- 1 | # std_options 2 | 3 | This library handles option parsing for the bale 4 | [apps](../apps/README.md). This library is built on top of the arg_parse library in glibc. There are two main classes of options: 5 | standard options, and standard graph options. Standard options are 6 | included in all bale apps and give the user control of things like, 7 | buffer count (for aggregation libraries), RNG seeds, and an implementation 8 | mask (this controls which "implementations" are run for each app). The 9 | standard graph options allow the user to control the input graph for 10 | apps that require a matrix or graph. Run any bale app with --help to 11 | see more information. 12 | 13 | -------------------------------------------------------------------------------- /src/bale_classic/std_options/configure.ac: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | 16 | AC_PREREQ([2.60]) #Require Minimum Autoconf version 2.60 17 | 18 | AC_INIT([std_options], [1.0.0], [bale@super.org]) 19 | 20 | AM_INIT_AUTOMAKE([1.10 foreign no-define tar-ustar]) 21 | 22 | AC_CONFIG_MACRO_DIRS([../]) 23 | 24 | AC_PROG_RANLIB 25 | AC_PROG_CC 26 | AC_PROG_CC_C99 27 | 28 | mpp_use_shmem=no 29 | if test "x$with_shmem" = xyes; then 30 | with_upc=no 31 | mpp_use_shmem=yes 32 | fi 33 | 34 | if test "x$with_upc" = xyes; then 35 | AM_PROG_UPC 36 | else 37 | # must define this ugly conditional to keep automake happy 38 | AM_CONDITIONAL(am__fastdepUPC, [test]) 39 | fi 40 | 41 | AS_IF([test "x$mpp_use_shmem" = xyes], 42 | [AC_SEARCH_LIBS([shmemx_alltoallv], [], 43 | [AC_DEFINE([HAVE_SHMEMX_ALLTOALLV], [1], [Define if shmem_alltoallv exists.])]) 44 | AC_SEARCH_LIBS([shmemx_team_alltoallv], [], 45 | [AC_DEFINE([HAVE_SHMEMX_TEAM_ALLTOALLV], [1], [Define if shmem_team_alltoallv exists.])]) 46 | AC_SEARCH_LIBS([shmemx_putmem_signal], [], 47 | [AC_DEFINE([HAVE_SHMEMX_PUTMEM_SIGNAL], [1], [Define if shmem_putmem_signal exists.])]) 48 | AC_SEARCH_LIBS([shmem_free], [], 49 | [AC_DEFINE([HAVE_SHMEM_FREE], [1], [Define if shmem_free exists.])]) 50 | ]) 51 | 52 | AM_CONDITIONAL(BUILD_UPC, [test "x$with_upc" = xyes]) 53 | AM_CONDITIONAL(BUILD_SHMEM, [test "x$mpp_use_shmem" = xyes]) 54 | 55 | PKG_CHECK_MODULES([libdeps], 56 | [ 57 | libgetput 58 | spmat 59 | ] 60 | ) 61 | 62 | 63 | AC_CONFIG_FILES([ 64 | std_options.pc:std_options.pc.in 65 | Makefile 66 | ]) 67 | 68 | AC_OUTPUT 69 | 70 | -------------------------------------------------------------------------------- /src/bale_classic/std_options/std_options.pc.in: -------------------------------------------------------------------------------- 1 | Name: @PACKAGE_NAME@ 2 | Version: @PACKAGE_VERSION@ 3 | Description: bale standard options library 4 | Requires: 5 | 6 | prefix=@prefix@ 7 | libdir=${prefix}/lib 8 | includedir=${prefix}/include 9 | 10 | Libs: -L${libdir} -l@PACKAGE_NAME@ 11 | Cflags: -I${includedir} 12 | 13 | -------------------------------------------------------------------------------- /src/other_parallel/Chapel/Makefile: -------------------------------------------------------------------------------- 1 | all: histo ig topo spmat triangle 2 | 3 | histo: histo.chpl 4 | chpl -o histo histo.chpl 5 | 6 | ig: ig.chpl 7 | chpl -o ig ig.chpl 8 | 9 | topo: topo.chpl 10 | chpl -o topo topo.chpl 11 | 12 | spmat: spmat.chpl 13 | chpl -o spmat spmat.chpl 14 | 15 | triangle: triangle.chpl 16 | chpl -o triangle triangle.chpl 17 | 18 | clean: 19 | rm -f histo ig topo spmat triangle \ 20 | histo_real ig_real topo_real spmat_real triangle_real 21 | -------------------------------------------------------------------------------- /src/other_parallel/Chapel/README.md: -------------------------------------------------------------------------------- 1 | # bale (Chapel) 2 | 3 | So far we have 5 different Chapel files in bale_chapel. None of these codes is using software aggregation libraries (like exstack or conveyors) because we haven't written them yet in Chapel. Hopefully that will be a future project! Our Chapel implementations mostly follow the AGP model as an experiment to see how clean they look in Chapel. 4 | 5 | * histo.chpl : This file implements histogram in a few different chapel styles. 6 | * ig.chpl: This file implements index_gather in a few different chapel styles 7 | * spmat.chpl: This file implements most of the sparse matrix functionality of the spmat library in bale_classic. We take advantage of Chapel iterators to make operations on sparse matrices much cleaner and more intuitive looking than the equivalent C code. 8 | * topo.chpl: 2 different implementations of toposort in Chapel 9 | * triangle.chpl: an implementation of triangle counting in Chapel -------------------------------------------------------------------------------- /src/other_parallel/Chapel/histo.chpl: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | use CyclicDist, BlockDist; 16 | use Random; 17 | use Assert; 18 | use Time; 19 | config const N=10; // number of updates 20 | config const M=10; // size of table 21 | 22 | // allocate main table and array of random ints 23 | const Mspace = {0..M-1}; 24 | const D = Mspace dmapped Cyclic(startIdx=Mspace.low); 25 | var A: [D] atomic int; 26 | const Nspace = {0..(N*numLocales - 1)}; 27 | const D2 = Nspace dmapped Block(Nspace); 28 | var rindex: [D2] int; 29 | var t: Timer; 30 | t.start(); 31 | 32 | /* set up loop: populate the rindex array with random numbers mod M */ 33 | fillRandom(rindex, 208); // the 208 is a seed 34 | forall r in rindex{ 35 | r = mod(r, M); 36 | } 37 | 38 | t.stop(); 39 | writeln("Set up time: ", t.elapsed()); 40 | t.clear(); 41 | t.start(); 42 | 43 | /* In this code, we present 3 ways to write histogram in Chapel */ 44 | 45 | /* first, using a simple iterator over the array */ 46 | forall r in rindex{ 47 | A[r].add(1); //atomic add 48 | } 49 | 50 | t.stop(); 51 | writeln("Loop 1: ", t.elapsed()); 52 | t.clear(); 53 | t.start(); 54 | 55 | /* We can write the main loop in a more node-centric way though. */ 56 | coforall loc in Locales do on loc do{ 57 | forall i in rindex.localSubdomain(){ 58 | A[rindex[i]].add(1); 59 | } 60 | } 61 | 62 | t.stop(); 63 | writeln("Loop 2: ", t.elapsed()); 64 | t.clear(); 65 | t.start(); 66 | 67 | /* Most economical of all, we can also write the main loop in this “vector” way: */ 68 | A[rindex].add(-2); 69 | 70 | t.stop(); 71 | writeln("Loop 3: ", t.elapsed()); 72 | t.start(); 73 | 74 | /* make sure all the updates happened correctly */ 75 | forall r in rindex{ 76 | assert(A[r].read() == 0); 77 | } -------------------------------------------------------------------------------- /src/other_parallel/Chapel/ig.chpl: -------------------------------------------------------------------------------- 1 | /****************************************************************** 2 | // 3 | // 4 | // Copyright(C) 2020, Institute for Defense Analyses 5 | // 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | // 7 | // 8 | // All rights reserved. 9 | // 10 | // This file is a part of Bale. For license information see the 11 | // LICENSE file in the top level directory of the distribution. 12 | // 13 | // 14 | *****************************************************************/ 15 | use CyclicDist, BlockDist; 16 | use Random; 17 | use Assert; 18 | use Time; 19 | config const M = 20; //table size 20 | config const N = 10; //num reads 21 | 22 | // allocate main table (T), dest array (tgt) and array of random ints (rindex). 23 | const Nspace = {0..N*numLocales-1}; 24 | const Mspace = {0..M-1}; 25 | 26 | const D = Mspace dmapped Cyclic(startIdx=Mspace.low); 27 | var T: [D] int; 28 | 29 | const D2 = Nspace dmapped Block(Nspace); 30 | var rindex: [D2] int; 31 | var tgt: [D2] int; 32 | var t: Timer; 33 | t.start(); 34 | 35 | /* set up loop: 36 | populate the rindex array with random numbers mod M 37 | populate T so that T[i] = i. 38 | */ 39 | fillRandom(rindex, 208); //208 is just the seed 40 | rindex = mod(rindex, M); 41 | 42 | T = {0..#M}; // identity permutation 43 | 44 | t.stop(); 45 | writeln("Set up time: ", t.elapsed()); 46 | t.clear(); 47 | t.start(); 48 | 49 | /* Main loop 1 */ 50 | forall i in rindex.domain{ 51 | tgt[i] = T[rindex[i]]; 52 | } 53 | 54 | t.stop(); 55 | writeln("Loop 1: ", t.elapsed()); 56 | t.clear(); 57 | t.start(); 58 | 59 | /* Main loop 2 */ 60 | /* We could switch the main loop to this… */ 61 | coforall loc in Locales do on loc do{ 62 | var inds = D2.localSubdomain(); 63 | forall i in inds{ 64 | tgt[i] += T[rindex[i]]; 65 | } 66 | } 67 | 68 | t.stop(); 69 | writeln("Loop 2: ", t.elapsed()); 70 | t.clear(); 71 | t.start(); 72 | 73 | //We can check for success easily: 74 | [r in rindex] r = 2*r; 75 | if(!tgt.equals(rindex)){ 76 | writeln("Error!"); 77 | } 78 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/.gitignore: -------------------------------------------------------------------------------- 1 | #Rust specific .gitignore 2 | 3 | Cargo.lock 4 | target/ 5 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "bale" 17 | description = "Bale in Rust" 18 | version = "0.1.0" 19 | authors = ["Bill Carlson "] 20 | edition = "2018" 21 | 22 | [dependencies] 23 | clap = "2.0" 24 | rand = "0.7" 25 | spmat = { path = "spmat" } 26 | convey_hpc = { path = "../../../../convey_private" } 27 | 28 | [workspace] 29 | members = [ 30 | "delta_stepping", 31 | "spmat", 32 | "toposort", 33 | "triangle", 34 | ] 35 | 36 | [profile.release] 37 | debug = true 38 | 39 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/README.md: -------------------------------------------------------------------------------- 1 | # Bale Applications written in Rust 2 | 3 | This is the implementation of the Bale applications, using the Rust 4 | programming language. 5 | 6 | ## Cray Installation 7 | 8 | If you are attempting to build on a Cray system, this is currently 9 | tricky. See the [Cray Instructions](README_cray.md) and ignore 10 | the rest of these instructions. 11 | 12 | ## Non-Cray Installation 13 | 14 | It depends on the rust conveyor library [available 15 | here](https://github.com/wwc559/convey_private). It is 16 | strongly advised that you do a *standalone build and test* of 17 | `convey_private` *first*, following the instructions in that library. 18 | This can be a *bit tricky* as you probably need to specify where the 19 | underlying parallel communication library is installed on 20 | your system, via the `SHMEM_PATH` environment variable. 21 | 22 | This build is setup to run correctly if you place `convey_private` 23 | in the same directory where you placed bale. Modify Cargo.toml, 24 | delta_stepping/Cargo.toml, spmat/Cargo.toml, triangle/Cargo.toml, 25 | and toposort/Cargo.toml if you have placed it in a different location. 26 | 27 | To build this and say `cargo build --release --workspace` and then you can 28 | run any of the bale apps with: 29 | 30 | ``` 31 | oshrun -n 4 target/release/delta_stepping 32 | oshrun -n 4 target/release/histo_convey 33 | oshrun -n 4 target/release/ig_convey 34 | oshrun -n 4 target/release/permute_convey 35 | oshrun -n 4 target/release/randperm_convey 36 | oshrun -n 4 target/release/toposort 37 | oshrun -n 4 target/release/triangle 38 | ``` 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/README_cray.md: -------------------------------------------------------------------------------- 1 | # Cray Build Instructions 2 | 3 | We depend on the rust conveyor library [available 4 | here](https://github.com/wwc559/convey_private). Install this 5 | first but DO NOT BUILD. 6 | 7 | The Cray build is complicated, at least on the NERSC Cori system. 8 | The crux of the probem is that some Rust crates will not build with the 9 | llvm module loaded while others will not build without it loaded. 10 | This is unfortunate and we will look for solutions that work 11 | better going forward. 12 | 13 | 1. Build with default modules loaded, shmem-sys will fail 14 | ``` 15 | cargo build --release 16 | ``` 17 | 2. source the fix-modules.sh script 18 | ``` 19 | source scripts/fix-modules.sh 20 | ``` 21 | 3. rebuild, it should work this time. 22 | ``` 23 | cargo build --release --workspace 24 | cargo build --release --examples 25 | ``` 26 | To run the application: 27 | 28 | ``` 29 | srun -n 4 target/release/delta_stepping 30 | srun -n 4 target/release/histo_convey 31 | srun -n 4 target/release/ig_convey 32 | srun -n 4 target/release/permute_convey 33 | srun -n 4 target/release/randperm_convey 34 | srun -n 4 target/release/toposort 35 | srun -n 4 target/release/triangle 36 | ``` 37 | 38 | If you are building convey_private, the instructions are similar, so 39 | just build there (on a fresh login, without having loaded 40 | fix-modules.sh) 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | .vscode/launch.json 13 | 14 | # Emacs backup files 15 | \#*\# 16 | .\#* 17 | *~ 18 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "convey_hpc" 3 | description = "Conveyor implementation in Rust for HPC" 4 | version = "0.1.0" 5 | authors = ["Bill Carlson "] 6 | edition = "2018" 7 | 8 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 9 | 10 | [dependencies] 11 | lazy_static = "1.4.0" 12 | shmem = { path="shmem" } 13 | bincode = "1.2" 14 | serde = { version = "1.0", features = ["derive"] } 15 | 16 | [workspace] 17 | members = [ 18 | "shmem", 19 | "shmem-sys", 20 | "pshmem-experimental", 21 | ] 22 | 23 | [profile.release] 24 | debug = true 25 | 26 | [dev-dependencies] 27 | rand="0.7" 28 | clap="2" 29 | 30 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020, Institute for Defense Analyses 2 | 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 3 | This material may be reproduced by or for the U.S. Government 4 | pursuant to the copyright license under the clauses at DFARS 5 | 252.227-7013 and 252.227-7014. 6 | 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | * Neither the name of the copyright holder nor the 17 | names of its contributors may be used to endorse or promote products 18 | derived from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 | FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 | COPYRIGHT HOLDER NOR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 25 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 31 | OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/README.md: -------------------------------------------------------------------------------- 1 | # convey_private 2 | Conveyor Implementation for Rust 3 | 4 | This project is to explore implementing conveyors in the Rust 5 | programming language. This is inspired by many projects in the PGAS 6 | research community including UPC, OpenShmem, Bale, and Conveyors. 7 | 8 | **Note: This repository is currently private, please do not redistribute.** 9 | Open an 'issue' to get access for others. 10 | 11 | **Note: Expect rapid changes in code organization and interfaces, this is highly experimental.** 12 | 13 | ## Cray Build Instructions 14 | 15 | To build this package on Cray, see [Cray 16 | Instructions](README_cray.md). Ignore what follows here as it is 17 | currently complicated. 18 | 19 | ## Non-Cray Build Instructions 20 | 21 | To build this package you will need an implementation of openshmem, 22 | version 1.4. If not, you will need to set the environment 23 | variable SHMEM_PATH to the path where openshmem was installed. The 24 | default is /usr/local. Also you will need a version of llvm in your 25 | path, specifically the build needs to be able to run 'llvm-config --prefix' 26 | 27 | To build this just say `cargo build --release --examples` and then you can run it with: 28 | 29 | ``` 30 | oshrun -n 4 target/release/examples/histo_convey 31 | ``` 32 | 33 | There are sub-crates for adapting to implementation technologies this is 34 | build to top of. 35 | 36 | `shmem` and `shmem-sys` implement an adaptation to the OpenShmem (1.4) 37 | system. `shmem-sys` is the wrapper for a 1.4-confomrant OpenShmem 38 | library. `shmem` provides the a subset of these features to `pshmem`. 39 | See the [Shmem README.md](shmem/README.md) for more details, including 40 | some examples of using this directly. 41 | 42 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/README_cray.md: -------------------------------------------------------------------------------- 1 | # Installation instructions for Cray 2 | 3 | The cray build is complicated, at least on the NERSC Cori system. The 4 | crux of the probem is that some Rust crates will not build with the 5 | llvm module loaded while others will not build without it loaded. 6 | This is unfortunate and we will look for solutions that work better 7 | going forward. 8 | 9 | 1. Build with default modules loaded, shmem-sys will fail 10 | ``` 11 | cargo build --release 12 | ``` 13 | 2. source the fix-modules.sh script 14 | ``` 15 | source scripts/fix-modules.sh 16 | ``` 17 | 3. rebuild, it should work this time. 18 | ``` 19 | cargo build --release --workspace 20 | cargo build --release --examples 21 | ``` 22 | To run the application: 23 | 24 | ``` 25 | srun -n 4 target/release/examples/histo_convey 26 | ``` 27 | 28 | If you are building bale_private/src/other_parallel/Rust, the 29 | instructions are similar, so just build there (on a fresh 30 | login, without having loaded fix-modules.sh) 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/examples/collect_convey.rs: -------------------------------------------------------------------------------- 1 | //! Example program to do collectives in conveyors 2 | use clap::{App, Arg}; 3 | use convey_hpc::collect::CollectValues; 4 | use convey_hpc::Convey; 5 | use std::time::Instant; 6 | 7 | fn main() { 8 | let matches = App::new("collect") 9 | .version("0.1.0") 10 | .about("test of collectives") 11 | .arg( 12 | Arg::with_name("collectives") 13 | .short("c") 14 | .long("collectives") 15 | .takes_value(true) 16 | .help("the number of collective calls per rank"), 17 | ) 18 | .arg( 19 | Arg::with_name("verbose") 20 | .short("-v") 21 | .long("verbose") 22 | .takes_value(false) 23 | .help("increase the amount of verbosity"), 24 | ) 25 | .get_matches(); 26 | 27 | let collectives: usize = matches 28 | .value_of("collectives") 29 | .unwrap_or("100") 30 | .parse() 31 | .expect("bad collectives arg"); 32 | let verbose: u64 = matches.occurrences_of("verbose"); 33 | 34 | let convey = Convey::new().expect("convey initializtion failed"); 35 | 36 | do_collect_convey(&convey, collectives, verbose); 37 | do_collect_shmem(&convey, collectives, verbose); 38 | } 39 | 40 | fn do_collect_convey(convey: &Convey, collectives: usize, verbose: u64) { 41 | let value: usize = 42; 42 | 43 | let now = Instant::now(); 44 | 45 | let mut total_result = 0; 46 | 47 | for _i in 0..collectives { 48 | total_result += value.reduce_sum(); 49 | } 50 | let d = now.elapsed(); 51 | if verbose > 0 || convey.my_rank() == 0 { 52 | println!( 53 | "convey pe{}/{}, {} reductions, {} reductions, {} msec", 54 | convey.my_rank(), 55 | convey.num_ranks(), 56 | collectives, 57 | total_result, 58 | d.as_millis(), 59 | ); 60 | } 61 | } 62 | 63 | fn do_collect_shmem(convey: &Convey, collectives: usize, verbose: u64) { 64 | let _value: usize = 42; 65 | 66 | let now = Instant::now(); 67 | 68 | let total_result = 0; 69 | 70 | for _i in 0..collectives { 71 | //total_result += convey.shmem.sum_to_all(value); 72 | } 73 | let d = now.elapsed(); 74 | if verbose > 0 || convey.my_rank() == 0 { 75 | println!( 76 | "shmem pe{}/{}, {} reductions, {} reductions, {} msec", 77 | convey.my_rank(), 78 | convey.num_ranks(), 79 | collectives, 80 | total_result, 81 | d.as_millis(), 82 | ); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/pshmem-experimental/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pshmem" 3 | description = "Partitioned Shared Memory (pShmem)" 4 | version = "0.1.0" 5 | authors = ["Bill Carlson "] 6 | edition = "2018" 7 | 8 | [dependencies] 9 | shmem = { path="../shmem" } 10 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/pshmem-experimental/src/collect.rs: -------------------------------------------------------------------------------- 1 | //! Module to handle collectives, currently only "value based" 2 | use crate::Pshmem; 3 | use crate::PSHMEM_WORK; 4 | use shmem::collect::Collect; 5 | 6 | /// A trait which implement 7 | pub trait ValueCollect { 8 | /// our type 9 | type T; 10 | /// reduce and produce maximum value 11 | fn reduce_max(&self, value: RHS) -> Self::T; 12 | /// reduce and produce sum 13 | fn reduce_add(&self, value: RHS) -> Self::T; 14 | } 15 | 16 | impl ValueCollect for Pshmem { 17 | type T = i64; 18 | fn reduce_max(&self, value: Self::T) -> Self::T { 19 | let source = vec![value]; 20 | PSHMEM_WORK.with(|psw| { 21 | psw.borrow() 22 | .work_i64 23 | .shmem_object 24 | .max_to_all(0, &source) 25 | .expect("fixme"); 26 | psw.borrow().work_i64.local_part()[0] 27 | }) 28 | } 29 | fn reduce_add(&self, value: Self::T) -> Self::T { 30 | let source = vec![value]; 31 | PSHMEM_WORK.with(|psw| { 32 | psw.borrow() 33 | .work_i64 34 | .shmem_object 35 | .sum_to_all(0, &source) 36 | .expect("fixme"); 37 | psw.borrow().work_i64.local_part()[0] 38 | }) 39 | } 40 | } 41 | 42 | impl ValueCollect for Pshmem { 43 | type T = usize; 44 | fn reduce_max(&self, value: Self::T) -> Self::T { 45 | self.reduce_max(value as i64) as usize 46 | } 47 | fn reduce_add(&self, value: Self::T) -> Self::T { 48 | self.reduce_add(value as i64) as usize 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/pshmem-experimental/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Module to handle errors that occur in shmem 2 | //! Derived from https://blog.burntsushi.net/rust-error-handling/ 3 | 4 | /// Enum to keep all the error types we and our dependencies use 5 | #[derive(Debug)] 6 | pub enum Error { 7 | /// An error that occured in a called function by shmem 8 | Io(std::io::Error), 9 | /// An error that occured in a called function in shmem 10 | Shmem(shmem::error::Error), 11 | /// Some bound on an object::Object or object::GlobalObject was exceeded 12 | BoundsExceeded, 13 | /// We don't support arbitrary blocking (yet?) 14 | UnsupportedBlocking, 15 | } 16 | 17 | impl std::fmt::Display for Error { 18 | /// Format one of our errors for display to user 19 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 20 | match *self { 21 | Error::Io(ref err) => err.fmt(f), 22 | Error::Shmem(ref err) => err.fmt(f), 23 | Error::BoundsExceeded => write!(f, "Shmem bounds exceeded on remote operation"), 24 | Error::UnsupportedBlocking => write!(f, "Unsupported blocking factor"), 25 | } 26 | } 27 | } 28 | 29 | impl From for Error { 30 | /// Pull the io error into our space 31 | fn from(err: std::io::Error) -> Error { 32 | Error::Io(err) 33 | } 34 | } 35 | 36 | impl From for Error { 37 | /// Pull the shmem error into our space 38 | fn from(err: shmem::error::Error) -> Error { 39 | Error::Shmem(err) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/pshmem-experimental/src/main.rs: -------------------------------------------------------------------------------- 1 | use pshmem::object::Fetchers; 2 | use pshmem::Pshmem; 3 | 4 | fn main() { 5 | let pshmem = Pshmem::new(); 6 | let me = pshmem.rank(); 7 | let size = pshmem.size(); 8 | 9 | // A shared object with 1 usize element per rank 10 | let x = pshmem.new_object::(1).expect("allocation error"); 11 | // the local slice of this data 12 | let lp = x.local_part(); 13 | 14 | lp[0] = pshmem.rank() + 42; 15 | 16 | pshmem.barrier(); 17 | println!( 18 | "Hello Pshmem World from rank {} of {}, a nice number is {}", 19 | me, 20 | pshmem.size(), 21 | x.fetch((me + 1) % size).expect("fetch failed") 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/scripts/fixmodules.sh: -------------------------------------------------------------------------------- 1 | module unload PrgEnv-intel 2 | module load PrgEnv-gnu 3 | module swap gcc/7.3.0 4 | module load cuda/10.1.168 5 | module load llvm 6 | module load cray-shmem 7 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shmem-sys" 3 | version = "0.1.0" 4 | authors = ["Bill Carlson "] 5 | edition = "2018" 6 | build = "build.rs" 7 | 8 | [dependencies] 9 | 10 | [build-dependencies] 11 | bindgen = "0.53" 12 | 13 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Some allows to prevent warnings 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![allow(non_upper_case_globals)] 5 | // 6 | // Copyright (c) 2020, Institute for Defense Analyses 7 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 8 | // 9 | // All rights reserved. 10 | // 11 | // This file is part of Convey, a conveyor library for rust. For 12 | // licence information see the LICENSE file in the top level dirctory 13 | // of the distribution. 14 | 15 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 16 | 17 | // bother: need a different type in various versions of shmem.h 18 | #[cfg(cray)] 19 | pub type nreduce_t = size_t; 20 | 21 | #[cfg(not(cray))] 22 | pub type nreduce_t = i32; 23 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem-sys/src/wrapper.h: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2020, Institute for Defense Analyses 3 | // 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 4 | // 5 | // All rights reserved. 6 | // 7 | // This file is part of Convey, a conveyor library for rust. For 8 | // licence information see the LICENSE file in the top level dirctory 9 | // of the distribution. 10 | 11 | // This define is needed so that Cray's shmem.h can be parsed by bindgen 12 | // The symbol referenced is a #define defined after use, which seems to work 13 | // in cc but not bindgen 14 | #define _SHMEM_MAX_RADIX 64 15 | #include 16 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "shmem" 3 | version = "0.1.0" 4 | authors = ["Bill Carlson "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | lazy_static = "1.4.0" 11 | shmem-sys = { path="../shmem-sys" } 12 | 13 | [dev-dependencies] 14 | rand="0.7" 15 | clap="2" -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem/README.md: -------------------------------------------------------------------------------- 1 | # Shmem: an adaptation of a subset of OpenShmem 1.4 to Rust. 2 | 3 | See examples/histo.rs for an example of usage 4 | 5 | Use `cargo build --examples` in this directory to make this. It can be run by 6 | 7 | ``` 8 | oshrun -n 4 ../target/debug/histo 9 | ``` 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem/examples/histo_shmem.rs: -------------------------------------------------------------------------------- 1 | use clap::{App, Arg}; 2 | use rand::distributions::{Distribution, Uniform}; 3 | use shmem::atomic::{Atomic, GlobalAtomic}; 4 | use shmem::Shmem; 5 | 6 | fn main() { 7 | let matches = App::new("histo") 8 | .version("0.1.0") 9 | .about("test of histogram") 10 | .arg( 11 | Arg::with_name("buckets") 12 | .short("b") 13 | .long("buckets") 14 | .takes_value(true) 15 | .help("then number of buckets"), 16 | ) 17 | .arg( 18 | Arg::with_name("updates") 19 | .short("u") 20 | .long("updates") 21 | .takes_value(true) 22 | .help("the number of updates"), 23 | ) 24 | .get_matches(); 25 | 26 | let buckets: usize = matches 27 | .value_of("buckets") 28 | .unwrap_or("4000") 29 | .parse() 30 | .expect("bad buckets arg"); 31 | let updates: u64 = matches 32 | .value_of("updates") 33 | .unwrap_or("40000") 34 | .parse() 35 | .expect("bad updates arg"); 36 | 37 | let shmem = Shmem::new().expect("shmem initializtion failed"); 38 | do_histo_shmem(&shmem, buckets, updates); 39 | do_histo_shmem_global(&shmem, buckets, updates); 40 | } 41 | 42 | fn do_histo_shmem(shmem: &Shmem, buckets: usize, updates: u64) { 43 | let me = shmem.my_pe(); 44 | let num = shmem.n_pes(); 45 | //println!("Hello, world from PE {} of {}!", me, num); 46 | 47 | let mut rng = rand::thread_rng(); 48 | let die = Uniform::from(0..buckets as usize); 49 | 50 | let histo = shmem 51 | .new_object::(buckets / num) 52 | .expect("allocation error"); 53 | 54 | for _i in 0..updates { 55 | let index = die.sample(&mut rng); 56 | let pe = index % num; 57 | let offset = index / num; 58 | histo.atomic_inc(offset, pe).expect("failed atomic inc"); 59 | } 60 | shmem.barrier(); 61 | println!("pe{}, first bucket {}", me, histo.local_part()[0]); 62 | } 63 | 64 | fn do_histo_shmem_global(shmem: &Shmem, buckets: usize, updates: u64) { 65 | let me = shmem.my_pe(); 66 | //let num = shmem.n_pes(); 67 | //println!("Hello, world from PE {} of {}!", me, num); 68 | 69 | let mut rng = rand::thread_rng(); 70 | let die = Uniform::from(0..buckets as usize); 71 | 72 | let histo = shmem 73 | .new_global_object::(buckets, 0) 74 | .expect("allocation error"); 75 | 76 | for _i in 0..updates { 77 | let index = die.sample(&mut rng); 78 | histo.atomic_inc(index).expect("failed atomic inc"); 79 | } 80 | shmem.barrier(); 81 | println!("pe{}, first bucket {}", me, histo.local_part()[0]); 82 | } 83 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Module to handle errors that occur in shmem 2 | //! Derived from https://blog.burntsushi.net/rust-error-handling/ 3 | 4 | /// Enum to keep all the error types we and our depenencies use 5 | #[derive(Debug)] 6 | pub enum Error { 7 | /// An error that occured in a called function 8 | Io(std::io::Error), 9 | /// We cannot re-establish a shmem_init() after shmem_finalize() was called 10 | /// This should only occur if a user of our package calls new() multiple times 11 | NewAfterDrop, 12 | /// Some bound on an object::Object or object::GlobalObject was exceeded 13 | BoundsExceeded, 14 | /// Tried to reference an invalid PE 15 | InvalidPE, 16 | /// We don't support arbitrary blocking (yet?) 17 | UnsupportedBlocking, 18 | } 19 | 20 | impl std::fmt::Display for Error { 21 | /// Format one of our errors for display to user 22 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 23 | match *self { 24 | Error::Io(ref err) => err.fmt(f), 25 | Error::NewAfterDrop => write!(f, "Shmem::new() after last Shem::drop()"), 26 | Error::BoundsExceeded => write!(f, "Shmem bounds exceeded on remote operation"), 27 | Error::InvalidPE => write!(f, "PE number exceeds num_pes()"), 28 | Error::UnsupportedBlocking => write!(f, "Unsupported blocking factor"), 29 | } 30 | } 31 | } 32 | 33 | impl From for Error { 34 | /// Pull the io error into our space 35 | fn from(err: std::io::Error) -> Error { 36 | Error::Io(err) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/shmem/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn( 2 | missing_docs, 3 | future_incompatible, 4 | missing_debug_implementations, 5 | rust_2018_idioms 6 | )] 7 | //! This package implements the rust interface to shmem-sys, which in turn 8 | //! is a wrapper for OpenShmem implmentations, currently version 1.4 9 | /// 10 | /// Copyright (c) 2020, Institute for Defense Analyses 11 | /// 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 12 | /// 13 | /// All rights reserved. 14 | /// 15 | /// This file is part of Convey, a conveyor library for rust. For 16 | /// licence information see the LICENSE file in the top level dirctory 17 | /// of the distribution. 18 | 19 | /// Generic result type for this library 20 | pub type Result = std::result::Result; 21 | 22 | /// Our instance struct, currently no per-instance state 23 | #[derive(Debug)] 24 | pub struct Shmem {} 25 | 26 | pub mod atomic; 27 | pub mod collect; 28 | pub mod error; 29 | pub mod object; 30 | pub mod shmem; 31 | 32 | // Tests are in shmem.rs. Unfortunately cannot have tests in other modules. 33 | // (See test notes in shmem.rs) Todo: this could be done 34 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | - [Chapter 1](./chapter_1.md) 4 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/src/chapter_1.md: -------------------------------------------------------------------------------- 1 | ls# Chapter 1 2 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/convey/src/testing_support.rs: -------------------------------------------------------------------------------- 1 | use crate::Convey; 2 | use lazy_static::lazy_static; 3 | use std::sync::{Mutex, MutexGuard}; 4 | 5 | lazy_static! { 6 | static ref LOCK: Mutex = Mutex::new(0); 7 | } 8 | 9 | /// A strut to hold our open mutex and open convey instance 10 | #[derive(Debug)] 11 | pub struct TestingMutex<'a> { 12 | /// Tests can use this convey instance 13 | pub convey: Convey, 14 | // Test should *not* use this, so keep it private 15 | _data: MutexGuard<'a, i32>, 16 | } 17 | 18 | impl<'a> TestingMutex<'a> { 19 | /// Create a new TestingMutex instance 20 | pub fn new() -> TestingMutex<'a> { 21 | // it is important to get the convey structure first due to 22 | // requirement that we always have one shmem instance open. 23 | // There could actually be a race condition here, so maybe we 24 | // need to have a slight delay or synchronization somewhere 25 | let convey = Convey::new().unwrap(); 26 | let data = LOCK.lock().unwrap(); 27 | TestingMutex { 28 | convey: convey, 29 | _data: data, 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/delta_stepping/.gitignore: -------------------------------------------------------------------------------- 1 | /*.dst 2 | /*.mm 3 | /*.out 4 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/delta_stepping/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "delta_stepping" 17 | version = "0.1.0" 18 | authors = ["John R. Gilbert "] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | chrono="0.4.15" 25 | clap="2" 26 | itertools="0" 27 | rand = "0.7" 28 | regex="1" 29 | serde = { version = "1.0", features = ["derive"] } 30 | 31 | [dependencies.spmat] 32 | path = "../spmat" 33 | 34 | [dependencies.convey_hpc] 35 | path = "../../../../../convey_private" 36 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/scripts/fixmodules.sh: -------------------------------------------------------------------------------- 1 | module unload PrgEnv-intel 2 | module load PrgEnv-gnu 3 | module swap gcc/7.3.0 4 | module load cuda/10.1.168 5 | module load llvm 6 | module load cray-shmem 7 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/spmat/.gitignore: -------------------------------------------------------------------------------- 1 | *.mm 2 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/spmat/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "spmat" 17 | version = "0.1.0" 18 | authors = ["Bill Carlson "] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | convey_hpc = { path="../../../../../convey_private" } 25 | rand = "0.7" 26 | regex="1" 27 | serde = "1.0" 28 | 29 | [dev-dependencies] 30 | clap="2" 31 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/spmat/scripts/permute_convey.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --qos=debug 3 | #SBATCH --time=1 4 | #SBATCH --nodes=2 5 | #SBATCH --tasks-per-node=16 6 | #SBATCH --constraint=haswell 7 | 8 | srun $HOME/Rust/pshmem_private/target/release/examples/permute_convey 9 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/spmat/scripts/randperm_convey.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --qos=debug 3 | #SBATCH --time=1 4 | #SBATCH --nodes=2 5 | #SBATCH --tasks-per-node=32 6 | #SBATCH --constraint=haswell 7 | 8 | srun $HOME/Rust/pshmem_private/target/release/examples/randperm_convey 9 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn( 2 | missing_docs, 3 | future_incompatible, 4 | missing_debug_implementations, 5 | rust_2018_idioms 6 | )] 7 | 8 | //! Main Rust Bale Library 9 | /// 10 | /// Copyright (c) 2020, Institute for Defense Analyses 11 | /// 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 12 | /// 13 | /// All rights reserved. 14 | /// 15 | /// This file is part of Bale. For license information see the 16 | /// LICENSE file in the top level dirctory of the distribution. 17 | 18 | // This is an empty library, simple programs are in the bin 19 | // sub-directory of this directory. 20 | 21 | // We may standardize argument parsing in this library in the future, 22 | // maybe some other things. 23 | #[derive(Debug)] 24 | pub struct BaleArgs; 25 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/toposort/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "toposort" 17 | version = "0.1.0" 18 | authors = ["wwc"] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | clap = "2" 25 | serde = { version = "1.0", features = ["derive"] } 26 | 27 | [dependencies.spmat] 28 | path = "../spmat" 29 | 30 | [dependencies.convey_hpc] 31 | path = "../../../../../convey_private" 32 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/toposort/scripts/toposort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --qos=debug 3 | #SBATCH --time=1 4 | #SBATCH --nodes=8 5 | #SBATCH --tasks-per-node=32 6 | #SBATCH --constraint=haswell 7 | 8 | srun $HOME/Rust/pshmem_private/target/release/toposort -n 20000 9 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/triangle/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "triangle" 17 | version = "0.1.0" 18 | authors = ["wwc"] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | clap = "2" 25 | serde = { version = "1.0", features = ["derive"] } 26 | 27 | [dependencies.spmat] 28 | path = "../spmat" 29 | 30 | [dependencies.convey_hpc] 31 | path = "../../../../../convey_private" 32 | -------------------------------------------------------------------------------- /src/other_parallel/Rust/triangle/scripts/toposort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --qos=debug 3 | #SBATCH --time=1 4 | #SBATCH --nodes=8 5 | #SBATCH --tasks-per-node=32 6 | #SBATCH --constraint=haswell 7 | 8 | srun $HOME/Rust/pshmem_private/target/release/toposort -n 20000 9 | -------------------------------------------------------------------------------- /src/other_serial/C/conftest.py: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | def pytest_addoption(parser): 16 | parser.addoption("-P", "--path", action="store", default="./") 17 | parser.addoption("-M", "--implementation_mask", action="store", default="31") 18 | 19 | def pytest_generate_tests(metafunc): 20 | option_value = metafunc.config.option.path 21 | if 'path' in metafunc.fixturenames and option_value is not None: 22 | metafunc.parametrize("path",[option_value]) 23 | option_value = metafunc.config.option.implementation_mask 24 | if 'implementation_mask' in metafunc.fixturenames and option_value is not None: 25 | metafunc.parametrize("implementation_mask",[option_value]) 26 | -------------------------------------------------------------------------------- /src/other_serial/C/default_app_opts.h: -------------------------------------------------------------------------------- 1 | /*******************************************************************/ 2 | /* Copyright (c) 2020, Institute for Defense Analyses */ 3 | /* 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 */ 4 | /* */ 5 | /* All rights reserved. */ 6 | /* */ 7 | /* This file is part of Bale. For license information see the */ 8 | /* LICENSE file in the top level dirctory of the distribution. */ 9 | /*******************************************************************/ 10 | 11 | // Simple list of default sizes for each app 12 | 13 | #define HISTO_TABLE_SIZE (1L<<20) 14 | #define HISTO_NUM_UPDATES 100000 15 | 16 | #define IG_TABLE_SIZE (1L<<20) 17 | #define IG_NUM_UPDATES 100000 18 | 19 | #define PERMUTE_NUM_ROWS 10000 20 | 21 | #define RANDPERM_SIZE 1000000 22 | 23 | #define SSSP_NUM_ROWS 10000 24 | 25 | #define TRANSPOSE_NUM_ROWS 10000 26 | 27 | #define TRIANGLE_NUM_ROWS 50000 28 | 29 | #define UNIONFIND_NUM_ROWS 10000 30 | -------------------------------------------------------------------------------- /src/other_serial/C/demo_spmat.c: -------------------------------------------------------------------------------- 1 | /*******************************************************************/ 2 | /* Copyright (c) 2020, Institute for Defense Analyses */ 3 | /* 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 */ 4 | /* */ 5 | /* All rights reserved. */ 6 | /* */ 7 | /* This file is part of Bale. For license information see the */ 8 | /* LICENSE file in the top level dirctory of the distribution. */ 9 | /*******************************************************************/ 10 | 11 | /*! \file demo_spmat.c 12 | * \brief Program that demonstrates (checks) some of the rountines in 13 | * spmat_util.c 14 | */ 15 | 16 | #include "spmat_utils.h" 17 | 18 | int main(int argc, char * argv[]) 19 | { 20 | int i; 21 | 22 | int64_t *p = rand_perm(10, 0); 23 | printf("Identity perm = "); 24 | for(i=0; i<10; i++) 25 | printf(" %"PRId64,p[i]); 26 | printf("\n"); 27 | free(p); 28 | 29 | int64_t *q = rand_perm(12, 1); 30 | printf("Random perm = "); 31 | for(i=0; i<12; i++) 32 | printf(" %"PRId64,q[i]); 33 | printf("\n"); 34 | 35 | printf("Is q a perm? %s\n", (is_perm(q,12))?"yes":"no"); 36 | q[0] = 12; 37 | printf("Is q a perm? %s\n", (is_perm(q,12))?"yes":"no"); 38 | free(q); 39 | 40 | 41 | 42 | sparsemat_t *graph; 43 | graph = read_matrix_mm("../../../example_matrices/undirected_flat_100.mm"); 44 | spmat_stats(graph); 45 | 46 | dump_matrix(graph, 4, "dump_4.out"); 47 | dump_matrix(graph, 0, "dump_0.out"); 48 | 49 | write_matrix_mm(graph, "tidy_demo.mm.out"); 50 | clear_matrix(graph); 51 | 52 | #if 0 53 | printf("Generate a Kronecker Product of Stars\n"); 54 | kron_args_t * kron_args = kron_args_init("2: 2 2"); 55 | printf("-- input %s\n", kron_args->str); 56 | printf("-- mode %"PRId32"\n", kron_args->mode); 57 | printf("-- num_stars %"PRId32"\n", kron_args->num_stars); 58 | for(i=0; inum_stars; i++) 59 | printf("%"PRId32" ", kron_args->star_size[i]); 60 | printf("\n-- numrows %"PRId64"\n", kron_args->numrows); 61 | printf("Known number of triangles = %"PRId64"\n", calc_num_tri_kron_graph(kron_args)); 62 | 63 | sparsemat_t *Kron = kronecker_product_graph(kron_args); 64 | spmat_stats(Kron); 65 | dump_matrix(Kron, 0, "kron.out"); 66 | 67 | clear_matrix(Kron); 68 | free(kron_args); 69 | #endif 70 | return(0); 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/other_serial/C/histo.md: -------------------------------------------------------------------------------- 1 | ## histo 2 | ### Definition 3 | We form the histogram of a large number of `int64_t`'s into a large table. 4 | The loop is as simple as: 5 | ``` 6 | foreach idx in index[ ] 7 | counts[idx]++ 8 | ``` 9 | On a serial thread this app shows the difference between random stores 10 | and streaming stores. On a large parallel machine this is the simplest 11 | case of managing the latency and bandwidth of the interconnection network 12 | and the race condition of multiple threads updating the same entry. 13 | 14 | ### Algorithms 15 | We start by filling the index array with random numbers between 0 and the `table size`. 16 | 17 | We have the generic algorithm (as simple as the loop above). 18 | 19 | We also have a buffered version where we sort the indices 20 | into buffers, based on their high bits. When a buffer gets full 21 | we perform all the updates from that buffer's contents all at once. 22 | Studying this version with different number of buffers and buffer sizes 23 | might reveal properties of the memory hierarchy, like page sizes or TLBs. 24 | 25 | A third version first sorts the indices before running the loop. 26 | This ought to be closer to the streams benchmark's performance 27 | as it is streaming with holes in it. 28 | 29 | ### Discussion 30 | Comparing these random access patterns to the streams benchmark for 31 | a particular node could be interesting. 32 | 33 | In serial, we don't have the problem of atomic updates in `histo`. 34 | The parallel version in bale_classic as to use some form of atomicity 35 | to handle multiple threads concurrently update a particular entry in `counts[ ]`. 36 | 37 | Running this simple version of `histo` on a node, with node level threads, 38 | might reveal something about atomic updates to memory, 39 | without the performance being dominated by the interconnection network. 40 | 41 | ### References 42 | https://www.cs.virginia.edu/stream 43 | -------------------------------------------------------------------------------- /src/other_serial/C/ig.md: -------------------------------------------------------------------------------- 1 | ## ig 2 | ### Definition 3 | We do an index_gather of a large number of entries from a large table. 4 | The loop is simply: 5 | ``` 6 | for i in 0,...n-1 7 | target[i] = source[ index[i] ] 8 | ``` 9 | This is complement of [histo](histo.md). 10 | ### Algorithms 11 | We have the generic implementation and a buffered implementation. 12 | 13 | In the buffered version, we collect the index[i] values into 14 | buffers based on their high bits. When a buffer is full we 15 | do all the gathers for the indices in that buffer before continuing. 16 | 17 | ### Discussion 18 | This is surprising complicated in the parallel case. 19 | 20 | This exercises a streaming load of `index`, then random loads from the `source` table 21 | and a streaming store to `target`. 22 | As with the histogram example, playing with the number and sizes of 23 | buffers might reveal properties of a single thread memory hierarchy. 24 | 25 | ### References 26 | -------------------------------------------------------------------------------- /src/other_serial/C/mainpage.h: -------------------------------------------------------------------------------- 1 | /*******************************************************************/ 2 | /* Copyright (c) 2020, Institute for Defense Analyses */ 3 | /* 4850 Mark Center Drive, Alexandria, VA 22311-1882; 703-845-2500 */ 4 | /* */ 5 | /* All rights reserved. */ 6 | /* */ 7 | /* This file is part of Bale. For license information see the */ 8 | /* LICENSE file in the top level dirctory of the distribution. */ 9 | /*******************************************************************/ 10 | 11 | -------------------------------------------------------------------------------- /src/other_serial/C/opts_demo.md: -------------------------------------------------------------------------------- 1 | ## opt_demo 2 | 3 | ### Definition 4 | Consistency among the command line options for the 5 | bale apps was hard to maintain. We are now using 6 | argp to solve this problem. This file contains four 7 | complete programs (or the boilerplate for programs) 8 | that are protect by #ifdef. You have to un-comment 9 | the appropriate #define and remake `opt_demo` to run 10 | each of them. The intent is to show that you can 11 | modify default values for given options and add new 12 | options without getting into the details of argp. 13 | 14 | ### References 15 | -------------------------------------------------------------------------------- /src/other_serial/C/permute_matrix.md: -------------------------------------------------------------------------------- 1 | ## permute_matrix 2 | 3 | ### Definition 4 | We apply given row and column permutations to a sparse matrix. 5 | 6 | ### Algorithm 7 | We produce a new sparse matrix data structure by copying the nonzeros and value entries 8 | of the original matrix to their new positions in the permuted matrix. 9 | To permute the rows, we compute the new offsets, based on the new order for the 10 | original rows. 11 | As we are copying the `nonzero[ ]` and `value[ ]` entries from the original matrix 12 | to their new position in the permuted matrix, 13 | we replace the nonzeros (the column indices) with the new column indices 14 | given by the column permutation. 15 | 16 | ### Discussion 17 | This app is really just a wrapper that calls the routine in the sparse matrix library. 18 | 19 | This is C_bale to shadow the app in bale_classic. 20 | It is here because we need it in the library. 21 | It is an app in bale_classic because the communication pattern is interesting. 22 | 23 | ### References 24 | 25 | -------------------------------------------------------------------------------- /src/other_serial/C/randperm.md: -------------------------------------------------------------------------------- 1 | ## randperm 2 | 3 | ### Definition 4 | We fill an array of `int64_t`'s with a flat random permutation. 5 | 6 | ### Algorithms 7 | #### the Fisher-Yates algorithm 8 | ``` 9 | fill the array, rand[ ], with indices 0 thru n-1. 10 | for l=n-1, n-2, ... ,1 11 | swap rand[l] with a random entry in 0,1,...,l 12 | ``` 13 | By definition this picks "n balls from an urn without replacement". 14 | This is a standard serial algorithm that is in fact a serial algorithm. 15 | You have to process the entries from right to left one at a time. 16 | 17 | #### the "dart throwing" algorithm 18 | We pick a dart board (an array) that is bigger than the desired permutation, 19 | say twice as big and fill the entries with -1. 20 | Then we randomly throw darts (numbers from 0 to `len-1`) 21 | at the dart board, re-throwing any dart that hits an entry that is already occupied (!= -1). 22 | Then we squeeze out the holes. 23 | 24 | We picked the dartboard to be twice the size of the array 25 | so that even the last dart has a 50/50 chance of hitting an open entry. 26 | 27 | ### the sorting algorithm 28 | We form an array of (index, key) pairs. Then we randomly fill the keys 29 | and sort the array on the keys. Then we read the permutation from the indices. 30 | 31 | NB. Repeated key are bad, but tolerated. 32 | They would be ok if ties were broken randomly or if doubles were real numbers. 33 | 34 | ### Discussion 35 | The Fisher-Yates algorithm must do one thing at a time, so it doesn't parallelize. 36 | 37 | The dart throwing algorithm is here because it shadows the algorithm 38 | in bale_classic. It is in bale_classic because it is fun. 39 | And because the AGP version is essentially the same as this serial version. 40 | 41 | The sorting version seems like a reasonable parallel algorithm, but it 42 | not in bale_classic because its not that interesting nor fun. 43 | 44 | ### References 45 | https://en.wikipedia.org/wiki/Fisher-Yates_shuffle 46 | -------------------------------------------------------------------------------- /src/other_serial/C/runall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # 4 | # Copyright(C) 2018-2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # All rights reserved. 8 | # 9 | # This file is a part of Bale. For license information see the 10 | # LICENSE file in the top level directory of the distribution. 11 | # 12 | 13 | # this script runs all of the bale apps (C versions) with trivial parameters 14 | # It exits if any application exits abnormally. 15 | # 16 | cores_per_node="" 17 | quick_run=0 18 | 19 | while getopts ":c:qM:" opt; do 20 | case $opt in 21 | q ) quick_run=1;; 22 | \? ) echo 'usage: runall [-q]' 23 | exit 1 24 | esac 25 | done 26 | 27 | # Let make see that they are all up to date 28 | make 29 | # 30 | 31 | if [ $quick_run == 1 ]; then 32 | # this makes the tests run a little quicker, if you want to run a longer 33 | # set of tests, use a second argument (can be anything!) 34 | options+=" -n 1000" 35 | fi 36 | 37 | echo; echo XXXXXXXXXXXX demo_spmat XXXXXXXXXXXXXXX 38 | ./demo_spmat 39 | 40 | 41 | for app in histo ig topo randperm permute_matrix transpose_matrix triangles unionfind sssp 42 | do 43 | # just run the command with -h 44 | echo; echo XXXXXXXXXXXX $app XXXXXXXXXXXXXXX 45 | echo; 46 | ./$app 47 | echo; 48 | done 49 | -------------------------------------------------------------------------------- /src/other_serial/C/spmat_utils.md: -------------------------------------------------------------------------------- 1 | ## spmat_utils 2 | 3 | ### Definition 4 | The definitions and a few sparse matrices routines we use in these apps. 5 | 6 | ### Discussion 7 | Compared to libraries bale_classic this is a trivial library. 8 | This is not a general library; we only include routines needed to support the 9 | apps in this directory. We simply took the `spmat_utils` and `libgetput` 10 | libraries of bale_classic and deleted the shared memory and threaded 11 | communication code. 12 | 13 | ### References 14 | -------------------------------------------------------------------------------- /src/other_serial/C/sssp.md: -------------------------------------------------------------------------------- 1 | ## sssp Single Source Shortest Path 2 | 3 | ### Definition 4 | We are given the adjacency matrix for a graph with non-negative edge weights *c(v,w)* 5 | and a given source vertex, *v_0*. 6 | We wish to find the lightest weighted path from *v_0* to all other vertices, 7 | where the weight of a path is the sum of the weights of the edges in the path. 8 | Note, if the graph is undirected we work with the full (symmetric) adjacency matrix. 9 | 10 | ### Algorithms 11 | We consider three algorithms: Dijsktra's, Delta-Stepping, and Bellman-Ford. 12 | Dijsktra's algorithm is not in bale_classic because it is a serial algorithm. 13 | 14 | Delta-Stepping and Bellman-Ford are here as shadows of the parallel versions. 15 | The algorithms here are surprisingly similar to those in bale_classic. 16 | These may be slightly easier to read because we don't have the communication layer. 17 | 18 | ### Discussion 19 | The priority queue version of Dijsktra's algorithm is a favorite example 20 | of the use of data structures in irregular algorithms. 21 | We discuss this issue in the bale_classic app and in the serial unionfind app. 22 | 23 | ### References 24 | "Delta-stepping: a parallelizable shortest path algorithm" by U. Meyer and P. Sanders. 25 | -------------------------------------------------------------------------------- /src/other_serial/C/std_options.md: -------------------------------------------------------------------------------- 1 | ## std_options 2 | 3 | ### Definition 4 | The use of the argp library (standard on most version of unix) 5 | to provide a unified command line option and parsing of the options. 6 | 7 | ### Discussion 8 | If you want to modify the command line options, play with the 9 | demo program opts_demo.c 10 | 11 | -------------------------------------------------------------------------------- /src/other_serial/C/toposort.md: -------------------------------------------------------------------------------- 1 | ## toposort 2 | ### Definition 3 | We are given a matrix that is a random row and column permutation 4 | of an upper triangular matrix (with ones on the diagonal). 5 | Such a matrix has been called a morally upper triangular matrix. 6 | This algorithm finds a row and column permutation that, when applied, 7 | returns it to an upper triangular form. 8 | 9 | ### Algorithms 10 | We generate the morally upper triangular matrix by 11 | randomly permuting the rows and columns of a triangular matrix. 12 | In the figure below, we have marked the nonzeros in the matrix 13 | with letter to help follow the permutations. 14 | 15 | To find row and column permutations that would return the matrix 16 | to upper triangular form, we recursively find pivot positions. 17 | A pivot is a nonzero (really a (row,col) pair) that is the single 18 | nonzero in a row. If we permute this row and column to the last 19 | row and column of our new matrix and delete the row and column 20 | from the original matrix, we can recursively construct the new 21 | matrix from the bottom right corner to the top left corner. 22 | 23 | 24 | 25 | 26 | A more detailed description of the algorithm is given in 27 | bale_classic toposort documentation. 28 | 29 | #### enqueuing pivots 30 | In this version when the pivots are found they are placed in a queue. 31 | The algorithm runs until the queue is empty. 32 | 33 | #### loop to find pivots 34 | In the loop version we simply continue to loop over the rows 35 | until we have found all the pivots. This simplifies the 36 | flow of the algorithm but does redundant checking of 37 | rows which have already been processed. 38 | 39 | ### Discussion 40 | In the serial case, the use of a queue seem like an obvious win. 41 | In the parallel case, the queue has to be managed with remote operations. 42 | Whether or not it is a win in this case is an interesting discussion. 43 | 44 | ### References 45 | -------------------------------------------------------------------------------- /src/other_serial/C/transpose_matrix.md: -------------------------------------------------------------------------------- 1 | ## transpose_matrix 2 | 3 | ### Definition 4 | Compute the transpose of a given sparse matrix. 5 | 6 | ### Algorithm 7 | This produces a `sparse_mat_t` data structure to hold the transpose of the given matrix. 8 | We start by computing columns counts. These become row counts in the transpose. 9 | With these we can allocate the memory and set the row offsets for the transpose. 10 | Then we go through the `nonzero[ ]` and `value[ ]` arrays one row at a time. 11 | We write the entries in the given row to the location in the transpose matrix 12 | given by the nonzero (column number) of the original matrix. 13 | 14 | ### Discussion 15 | This apps is a timer wrapper for the routine in the sparse matrix library. 16 | 17 | This is C_bale to shadow the app in bale_classic. 18 | It is an app in bale_classic because the communication pattern is interesting. 19 | 20 | ### References 21 | -------------------------------------------------------------------------------- /src/other_serial/C/triangle.md: -------------------------------------------------------------------------------- 1 | ## triangle 2 | ### Definition 3 | Find the number of triangles in a given simple unweighted graph. 4 | 5 | A triangle is a set of three vertices {u,w,v} where edges {u,w}, {w,v} and {u,v} are in the graph. 6 | 7 | ### Algorithm 8 | This uses matrix algebra approach to counting triangles in a graph. 9 | Given, L, the (strictly) lower triangular matrix that holds the undirected graph, 10 | we compute \sum_ij{L .& (L * L)} and \sum_ij{L .& (L * U)}. 11 | Where U is the upper triangular matrix from the full adjacency matrix. 12 | Recall that U = L transpose. 13 | 14 | ### Discussion 15 | This is here to shadow the algorithms in bale_classic. 16 | 17 | The amount of work done in these formulations depends on the matrix. 18 | This is interesting and well studied even in the serial case, but not here. 19 | 20 | In the parallel case, it is even more interesting because it depends 21 | on ones ability to push or pull information remotely as well as the 22 | row densities in the matrix. 23 | 24 | ### References 25 | See the book, "Graph Algorithms in the Language of Linear Algebra", 26 | edited by Gilbert, and Kepner for more details on our approach to this problem. 27 | 28 | -------------------------------------------------------------------------------- /src/other_serial/C/unionfind.md: -------------------------------------------------------------------------------- 1 | ## unionfind 2 | 3 | ### Definition 4 | We use the unionfind data structure to implement the union of disjoint sets 5 | approach to finding the connect components in a simple graph. 6 | 7 | ### Algorithms 8 | This algorithm relies on the notion of disjoint subsets. Given a collection 9 | of disjoint subsets that covers a space, the union of any of the members of 10 | the collection will result in another collection of disjoint subsets that 11 | covers the space. The algorithm starts with each vertex in its own subset. 12 | Then it looks at each edge in the graph and forms the union of the subsets 13 | that contain the incident vertices. The resulting subsets are the connected 14 | components. 15 | 16 | The key to algorithm is a data structure that forms a tree for each subset. 17 | The union of the subsets is formed by connecting the root of one tree to the 18 | other tree. 19 | 20 | We have implemented two versions (there are a number of versions): 21 | the first joins the trees by connecting 22 | the root of the tree corresponding to one vertex of the edge to the node 23 | corresponding to the other vertex of the connecting edge. 24 | This is referred to as the "bad algorithm" because it is not 25 | nearly as efficient as the second version. The most efficient 26 | version connects the roots of the two trees according to the rank of the 27 | trees, where the rank is the length of the longest branch in the tree. 28 | 29 | ### Discussion 30 | This algorithm is in the C cousin of bale because we 31 | don't have a parallel version. 32 | 33 | Like the use of the priority queue in Dijsktra's algorithm, this algorithm is 34 | a favorite example of how important data structures are. In these algorithms, 35 | the data structure is more than a place to storage the state. Manipulating 36 | the data structure *is* the computation. 37 | 38 | In the most efficient version of the algorithm are manipulating structures 39 | that hold pointers that encode the tree and the rank of the tree. To do this 40 | in an AGP model, the whole operation must be done atomically. Doing this in a 41 | lock-free way is currently beyond our capability. 42 | 43 | Unlike Dijsktra's or the Fisher-Yates algorithm, the use of this forest 44 | of trees is not necessarily serial. There is plenty of opportunity for 45 | asynchronous parallelism, but keeping the data structure consistent while 46 | making parallel changes to it seems overwhelming. 47 | 48 | There are other algorithms 49 | to find the connected components in a graph. We present this algorithm 50 | because we are interested in a discussion about parallel data structures. 51 | 52 | ### References 53 | -------------------------------------------------------------------------------- /src/other_serial/Rust/.gitignore: -------------------------------------------------------------------------------- 1 | # no data files here 2 | *.out 3 | *.dump 4 | *.mm 5 | -------------------------------------------------------------------------------- /src/other_serial/Rust/README.md: -------------------------------------------------------------------------------- 1 | This is a partially complete project to write serial versions of Bale applications in the Rust Programming Language 2 | 3 | -------------------------------------------------------------------------------- /src/other_serial/Rust/delta_stepping/.gitignore: -------------------------------------------------------------------------------- 1 | /*.mm 2 | /*.wts 3 | /*.out 4 | eg* 5 | -------------------------------------------------------------------------------- /src/other_serial/Rust/delta_stepping/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "delta_stepping" 17 | version = "0.1.0" 18 | authors = ["John R. Gilbert "] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | clap="2" 25 | chrono="0.4.15" 26 | itertools="0" 27 | regex="1" 28 | 29 | [dependencies.sparsemat] 30 | path = "../sparsemat" 31 | 32 | -------------------------------------------------------------------------------- /src/other_serial/Rust/delta_stepping/README.md: -------------------------------------------------------------------------------- 1 | # Delta-stepping for single-source shortest path (SSSP) 2 | 3 | 4 | This application finds shortest path lengths from a single source in 5 | a directed graph, using the Meyer/Sanders [delta-stepping algorithm]( 6 | https://www.sciencedirect.com/science/article/pii/S0196677403000762). 7 | 8 | It is a serial rust program. 9 | 10 | ### Test Matrices 11 | 12 | The following are provided in the top Bale repository example_matrices directory: 13 | 14 | Matrices: 15 | 16 | - er_nn_prob.mm reasonably dense ER graphs 17 | - sparsennn.mm sparser ER graphs 18 | 19 | Distances: 20 | 21 | - er_nn_prob.dst distances with source_vtx 0 22 | - sparsennn.dst distances with source_vtx 2 23 | -------------------------------------------------------------------------------- /src/other_serial/Rust/sparsemat/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "sparsemat" 17 | version = "0.1.0" 18 | authors = ["wwc"] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | rand="0.7" 25 | regex="1" 26 | 27 | -------------------------------------------------------------------------------- /src/other_serial/Rust/toposort/.gitignore: -------------------------------------------------------------------------------- 1 | *.mm 2 | -------------------------------------------------------------------------------- /src/other_serial/Rust/toposort/Cargo.toml: -------------------------------------------------------------------------------- 1 | #/****************************************************************** 2 | # 3 | # 4 | # Copyright(C) 2020, Institute for Defense Analyses 5 | # 4850 Mark Center Drive, Alexandria, VA; 703-845-2500 6 | # 7 | # 8 | # All rights reserved. 9 | # 10 | # This file is a part of Bale. For license information see the 11 | # LICENSE file in the top level directory of the distribution. 12 | # 13 | # 14 | # *****************************************************************/ 15 | [package] 16 | name = "toposort" 17 | version = "0.1.0" 18 | authors = ["wwc"] 19 | edition = "2018" 20 | 21 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 22 | 23 | [dependencies] 24 | clap="2" 25 | 26 | [dependencies.sparsemat] 27 | path = "../sparsemat" 28 | 29 | --------------------------------------------------------------------------------