├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Data.md ├── INSTALL.md ├── Jenkinsfile ├── LICENSE ├── README.md ├── RELEASE.md ├── cmake_modules ├── GenPkgConfig.cmake └── PrintOpts.cmake ├── compute ├── ddiag.c ├── dgemm.c ├── dgenmat.c ├── dgenrhs.c ├── dgytlr.c ├── dhagcm.c ├── dhagdm.c ├── dpotrf.c ├── dtrsm.c ├── duncompress.c ├── hicma_ztile.c ├── pdgemm.c ├── pdgenmat.c ├── pdgenrhs.c ├── pdgytlr.c ├── pdhagcm.c ├── pdhagdm.c ├── pdpotrf.c ├── pdtrsm.c ├── pzgetrf.c ├── pzgytlr.c ├── pzlacpy.c ├── pzlaset.c ├── pzplrnt.c ├── zdiag.c ├── zgetrf.c ├── zgytlr.c ├── zlacpy.c ├── zlaset.c ├── zplrnt.c └── zuncompress.c ├── control ├── hicma_compute_z.h ├── hicma_config.h ├── hicma_init.c └── hicma_tile.c ├── docs ├── CMakeLists.txt ├── HiCMA-handout-SC17.png ├── Installation.md ├── config.in └── index.md ├── exec_env_info_ADSC20 ├── 128-core-amd-epyc-output.txt ├── 32-core-crayxc40-output.txt ├── 40-core-cascadelake-output.txt ├── 48-core-crayxc-output.txt ├── 64-core-thunder-arm-output.txt └── collect_environment.sh ├── exp ├── cases │ ├── 10M.txt │ ├── 1M-8M.txt │ ├── 1M-acc.txt │ ├── 1M-few-case.txt │ ├── 1M.txt │ ├── 500K-1M.txt │ ├── 50K2M.txt │ ├── 50K500K.txt │ ├── ae-10M.txt │ ├── allsize-allnb-1.txt │ ├── biggersize-allnb-1.txt │ ├── biggersize-allnb-2.txt │ ├── cfd_virus1024.sh │ ├── cfd_virus1_batch.sh │ ├── cfd_virus4.sh │ ├── cfd_virus4_batch.sh │ ├── cfd_virus_10km.sh │ ├── cfd_virus_basis.sh │ ├── cfd_virusdist.sh │ ├── cham.txt │ ├── cham32.txt │ ├── edsin.sh │ ├── edsin2.sh │ ├── fxt-100K-16nodes.txt │ ├── m10.txt │ ├── m2.txt │ ├── matern.sh │ ├── mattern-50K500K.txt │ ├── mpi32.txt │ ├── rnd-50K500K.txt │ ├── rnd.sh │ ├── shmem-1.txt │ ├── shmem-2.txt │ ├── sqexp-trsm.sh │ ├── sqexp-trsm2.sh │ ├── sqexp.sh │ ├── sqexp2.sh │ ├── statistics-cham.sh │ ├── statistics.sh │ ├── statistics2.sh │ ├── t1.txt │ └── t2.txt ├── cham-isambard-simple.sh ├── ci │ ├── compile_shihab.sh │ └── test01.sh ├── cout │ └── .f ├── ctrial.sh ├── distmem.sh ├── distmemamd.sh ├── distmemcs.sh ├── distruns.sh ├── distrunsamd.sh ├── distrunscs.sh ├── echameleon │ └── potrf_run.sh ├── incs │ ├── Makefile.cdl2.inc │ └── Makefile.uwork.inc ├── inner-product-gemm.sh ├── jobids │ ├── .f │ ├── 2017-10-18-hicma-10M-1.txt │ ├── 2017-10-18-hicma-10M-2.txt │ ├── 2017-10-19-hicma-10M-1.txt │ ├── 2017-10-19-hicma-50K500K-1.txt │ ├── 2017-10-20-hicma-10M-missingNB-1.txt │ ├── 2017-10-21-hicma-acc-1.txt │ ├── 2017-10-21-hicma-acc-nocomp-1.txt │ ├── 2017-10-21-hicma-compare-1.txt │ ├── 2017-10-22-hicma-acc-mem-1.txt │ ├── 2017-11-02-hicma-more-points-1.txt │ ├── 2017-11-02-hicma-more-points-BIG-1.txt │ ├── 2018-01-22-hicma-edsin-acc-1.txt │ ├── 2018-01-23-hicma-edsin-acc-custommaxrk-1.txt │ ├── 2018-01-27-hicma-ae-test-1.txt │ ├── 2018-02-01-hicma-syn-1.txt │ ├── 2018-02-05-hicma-fxt-ss-1.txt │ ├── 2018-02-10-cham-256-1.txt │ ├── 2018-02-10-hicma-edsin-16-1.txt │ ├── 2018-02-10-hicma-geostat-16-1.txt │ ├── 2018-02-10-hicma-rnd-16-1.txt │ ├── 2018-02-12-hicma-edsin-1.txt │ ├── 2018-02-12-hicma-edsin-scale-1.txt │ ├── 2018-02-12-hicma-matern-1.txt │ ├── 2018-02-12-hicma-sqexp-1.txt │ ├── 2018-02-16-hicma-edsin-scale-1.txt │ ├── 2018-02-16-hicma-edsin-scale-2.txt │ ├── 2018-02-17-hicma-only-prob-1.txt │ ├── 2018-02-17-hicma-trsm-1.txt │ ├── 2018-02-17-hicma-trsm-2.txt │ ├── 2018-02-17-hicma-trsm-3.txt │ ├── 2018-02-17-hicma-trsm-4.txt │ ├── 2018-02-17-hicma-trsm-5.txt │ ├── 2018-02-18-hicma-trsm-1.txt │ ├── 2018-04-17-hicma-threads-updatecham-1.txt │ ├── 2019-09-01-hicma-st2dsqexp-isambard-1.txt │ ├── 2019-09-05-hicma-st2dsqexp-isambard-threads-1.txt │ ├── 2019-09-05-hicma-stat-isambard-1.txt │ ├── 2019-09-10-hicma-stat-isambard-1.txt │ ├── all-hicma-10M.txt │ ├── ji-2018-02-12-turbo_off.txt │ ├── ji-2018-02-12-turbo_on.txt │ └── ji-2018-02-12-turbo_on_small_block_sizes.txt ├── out │ ├── cpu.txt │ ├── flamingo-2d-3d-2019-09-08-1-RAW.txt │ ├── flamingo-reorder-2019-07-29-2.txt │ ├── flamingo-reorder-2019-07-30-1.txt │ ├── flamingo-reorder-2019-08-01-1.txt │ ├── gpu.txt │ ├── ibexrome-2021-05-02-sqexp-exp-2d-1.txt │ ├── jasmine-2d-3d-2019-09-08-1-RAW.txt │ ├── mkl.txt │ ├── shihab-2d-3d-2019-09-08-1-RAW.txt │ ├── st-2d-exp.txt │ ├── st-2d-sqexp.txt │ ├── vulture-2021-03-17-st-2d-sqexp-1.txt │ ├── vulture-2021-03-18-mkl-1.txt │ ├── vulture-2021-03-18-st-2d-exp-1.txt │ ├── vulture-2021-05-22-196560-1.txt │ ├── vulture-2v100-2021-03-16-sqexp-exp-2d-2gpus-1.txt │ ├── vulture-gpu-2021-03-07-1.txt │ └── vulture-v100-2021-03-15-sqexp-exp-2d-1.txt ├── plots │ ├── cham-hicma.ipynb │ ├── gemm.py │ └── potrf.py ├── ranks │ └── .f ├── shmem-mpi-test.sh ├── shmem.sh ├── shruns.sh ├── starpulog │ └── .f ├── tr.sh └── valgrind.supp ├── experiment_ADSC20 ├── S1.sh ├── S1slurm.sh ├── S2.sh ├── S2slurm.sh ├── singlevirus.sh └── singlevirusslurm.sh ├── hicma.pc.in ├── hicma_ext ├── control │ ├── common.h │ ├── hicma_async.c │ ├── hicma_async.h │ ├── hicma_auxiliary.c │ ├── hicma_auxiliary.h │ ├── hicma_context.c │ ├── hicma_context.h │ ├── hicma_descriptor.c │ ├── hicma_descriptor.h │ └── hicma_global.h ├── coreblas │ └── compute │ │ └── hicma_global.c └── runtime │ └── starpu │ └── control │ ├── hicma_runtime_async.c │ ├── hicma_runtime_context.c │ ├── hicma_runtime_control.c │ ├── hicma_runtime_descriptor.c │ ├── hicma_runtime_options.c │ ├── hicma_runtime_profiling.c │ └── hicma_runtime_workspace.c ├── include ├── coreblas │ ├── hicma_cblas.h │ ├── hicma_coreblas.h │ ├── hicma_lapacke.h │ ├── hicma_lapacke_config.h │ └── hicma_lapacke_mangling.h ├── hicma.h ├── hicma_common.h ├── hicma_config.h ├── hicma_constants.h ├── hicma_init.h ├── hicma_kernels.h ├── hicma_runtime.h ├── hicma_runtime_z.h ├── hicma_struct.h ├── hicma_types.h ├── hicma_z.h └── runtime │ └── starpu │ ├── hicma_runtime_codelet_profile.h │ ├── hicma_runtime_codelets.h │ ├── hicma_runtime_profiling.h │ ├── hicma_runtime_workspace.h │ └── hicma_starpu.h ├── misc ├── compute │ └── zproblem.c ├── descutil.c ├── dstat.c ├── include │ ├── auxcompute_z.h │ └── auxdescutil.h └── zstat.c ├── python └── hodlr │ ├── TestTree.py │ ├── Tree.py │ └── hodlr-v1.py ├── runtime └── starpu │ └── codelets │ ├── codelet_dgemm.c │ ├── codelet_dgemm_bdcd.c │ ├── codelet_dgenmat.c │ ├── codelet_dgenrhs.c │ ├── codelet_dgytlr.c │ ├── codelet_dgytlr_diag.c │ ├── codelet_dhagcm.c │ ├── codelet_dhagdm.c │ ├── codelet_dpotrf.c │ ├── codelet_dsyrk.c │ ├── codelet_dtrsm.c │ ├── codelet_duncompress.c │ ├── codelet_hcore_dgemm.c │ ├── codelet_hcore_dtrsm.c │ ├── codelet_hcore_zgemm.c │ ├── codelet_hcore_ztrsm.c │ ├── codelet_zgemm_cd.c │ ├── codelet_zgetrf.c │ ├── codelet_zgytlr.c │ ├── codelet_zgytlr_diag.c │ ├── codelet_zlacpy.c │ ├── codelet_zlaset.c │ ├── codelet_zplrnt.c │ ├── codelet_ztrsmu.c │ └── codelet_zuncompress.c ├── scripts ├── allocate-interactive-node-isambard.sh ├── build-amd-rome.sh ├── build-distmpi.sh ├── build-isambard-netlib.sh ├── build-isambard.sh ├── build-macos-nompi-mkl.sh ├── build-nompi.sh ├── build-shared.sh ├── build-starsh-cpp-intel-parsec.sh ├── build-xc40.sh ├── build.sh ├── intel.modules ├── modules-amd-rome.sh ├── modules-cs.sh ├── modules-ecrc-mpi.sh ├── modules-ecrc-ub18-mpi.sh ├── modules-ecrc-ub18.sh ├── modules-ecrc.sh ├── modules-isambard-allinea.sh ├── modules-shaheen-gcc-mkl-starpu.sh ├── modules-xc40.sh ├── power8.modules ├── test-trsm.sh └── test.sh ├── testing ├── CMakeLists.txt ├── electrodynamics.c ├── testing_dauxiliary.c ├── testing_dauxiliary.h ├── testing_dposv.c └── testing_dtrsmd.c └── timing ├── CMakeLists.txt ├── time_dgemm_tile.c ├── time_dpotrf_tile.c ├── time_dpotrf_tile_batch.c ├── time_zgetrf_tile.c ├── timing.c ├── timing.h ├── timing_auxiliary.c ├── timing_auxiliary.h ├── timing_dauxiliary.c ├── timing_dauxiliary.h ├── timing_zauxiliary.c └── timing_zauxiliary.h /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | ._.DS_Store 3 | exp/trace 4 | exp/err 5 | exp/cerr 6 | exp/out 7 | exp/ranks 8 | main 9 | tags 10 | build* 11 | 12 | # Object files 13 | *.o 14 | *.ko 15 | *.obj 16 | *.elf 17 | 18 | # Precompiled Headers 19 | *.gch 20 | *.pch 21 | 22 | # Libraries 23 | *.lib 24 | *.a 25 | *.la 26 | *.lo 27 | 28 | # Shared objects (inc. Windows DLLs) 29 | *.dll 30 | *.so 31 | *.so.* 32 | *.dylib 33 | 34 | # Executables 35 | *.exe 36 | *.out 37 | *.app 38 | *.i*86 39 | *.x86_64 40 | *.hex 41 | 42 | # Debug files 43 | *.dSYM/ 44 | *.su 45 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "cmake_modules/ecrc"] 2 | path = cmake_modules/ecrc 3 | url = https://github.com/ecrc/ecrc_cmake 4 | [submodule "stars-h"] 5 | path = stars-h 6 | url = https://github.com/ecrc/stars-h.git 7 | [submodule "hcore"] 8 | path = hcore 9 | url = https://github.com/ecrc/hcore.git 10 | -------------------------------------------------------------------------------- /Data.md: -------------------------------------------------------------------------------- 1 | # Dataset 2 | 3 | ## Mesh Deformation Application 4 | 5 | Dataset is available in KAUST repository: https://repository.kaust.edu.sa/handle/10754/664938. 6 | Add mesh file name to `--mesh_file=` parameter. 7 | 8 | ## Acoustic Scattering Application 9 | 10 | Dataset is available in KAUST repository: https://repository.kaust.edu.sa/handle/10754/664400. 11 | Add mesh file name to `--mesh_file=` parameter and the file containing interpolation points information to `--nipp=` parameter. 12 | 13 | 14 | For more information on the dataset please refer to the readme files in the data repositories. 15 | 16 | ## Testing Mesh Deformation and Acoustic Scattering Applications 17 | 18 | This [cmake file](timing/CMakeLists.txt) contains sample commands to run the mesh deformation and acoustic scattering applications. 19 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation on ECRC servers 2 | 3 | 1. Go to home directory 4 | 5 | 2. Get HiCMA from git repository 6 | 7 | git clone git@github.com:ecrc/hicma 8 | 9 | 3. Run the following command: (This command will also setup PKG_CONFIG_PATH for STARS-H) 10 | 11 | . hicma/scripts/build.sh 12 | 13 | 4. Run the following command for seeing which experimental cases will be executed: 14 | 15 | cd ./hicma 16 | ./exp/distruns.sh exp/cases/statistics.sh dry 17 | 18 | 5. Remove the keyword "dry" to really run the cases. 19 | 20 | # Installation 21 | 22 | Installation requires `CMake` of version 3.2.3 at least. To build HiCMA, 23 | follow these instructions: 24 | 25 | 1. Get HiCMA from git repository 26 | 27 | git clone git@github.com:ecrc/hicma 28 | 29 | 30 | 2. Go into hicma folder 31 | 32 | cd hicma 33 | 34 | 3. Get submodules using git as follows. The submodules Chameleon, HCORE and STARS-H should be compiled and the `PKG_CONFIG_PATH` should be set. 35 | 36 | git submodule update --init --recursive 37 | 38 | 4. Create build directory and go there 39 | 40 | mkdir build && cd build 41 | 42 | 5. Use CMake to get all the dependencies 43 | 44 | cmake .. -DCMAKE_INSTALL_PREFIX=/path/to/install/ -DHICMA_USE_MPI=ON 45 | 46 | 6. Build HiCMA 47 | 48 | make -j 49 | 50 | 7. Build local documentation (optional) 51 | 52 | make docs 53 | 54 | 8. Install HiCMA 55 | 56 | make install 57 | 58 | 9. Add line 59 | 60 | export PKG_CONFIG_PATH=/path/to/install:$PKG_CONFIG_PATH 61 | 62 | to your .bashrc file to use HiCMA as a library. 63 | 64 | Now you can use `pkg-config` executable to collect compiler and linker flags for HiCMA or run the binaries under `/path/to/install/timing/`. 65 | 66 | There are `scripts/build.sh` and `scripts/build-nompi.sh` scripts in the repository to build the whole software stack. 67 | 68 | # CTests 69 | 70 | You can run CTests in the build folder via the following command: 71 | 72 | ctest 73 | 74 | A specific test can be run in verbose mode as follows: 75 | 76 | ctest -R time_zgemm_tile-mpi-edsin -V 77 | 78 | # CMake Configuration 79 | 80 | The following definitions affect how HiCMA library works: 81 | 82 | **HICMA_ALWAYS_FIX_RANK**: Disables rank descriptors. The rank of the input matrix will be uniform 83 | across all tiles. This option can be used to measure the impact of additionally 84 | communicating rank descriptor during computations. If rank descriptor is not communicated, 85 | the total number of messages becomes less. 86 | 87 | **HICMA_USE_MPI=[ON,OFF]**: Enables/Disables MPI in HiCMA. 88 | 89 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2022, King Abdullah University of Science and Technology 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright notice, 9 | this list of conditions and the following disclaimer in the documentation 10 | and/or other materials provided with the distribution. 11 | * Neither the name of the copyright holder nor the names of its 12 | contributors may be used to endorse or promote products derived from 13 | this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HiCMA: Hierarchical Computations on Manycore Architectures 2 | =========================================================== 3 | The **Hierarchical Computations on Manycore Architectures (HiCMA)** library aims to redesign existing 4 | dense linear algebra libraries to exploit the data sparsity of the matrix operator. Data sparse 5 | matrices arise in many scientific problems (e.g., in statistics-based weather forecasting, seismic 6 | imaging, and materials science applications) and are characterized by low-rank off-diagonal tile 7 | structure. Numerical low-rank approximations have demonstrated attractive theoretical bounds, both in 8 | memory footprint and arithmetic complexity. The core idea of HiCMA is to develop fast linear algebra 9 | computations operating on the underlying tile low-rank data format, while satisfying a specified 10 | numerical accuracy and leveraging performance from massively parallel hardware architectures. 11 | 12 | 13 | Features of HiCMA 1.0.0 14 | ----------------------------- 15 | * Matrix-Matrix Multiplication 16 | * Cholesky Factorization/Solve 17 | * Double Precision 18 | * Task-based Programming Models 19 | * Shared and Distributed-Memory Environments 20 | * Support for StarPU Dynamic Runtime Systems 21 | * Testing Suite and Examples 22 | * Support for 3D unstructured mesh deformation of a population of the novel coronaviruses (i.e., SARS-CoV-2) 23 | * LU factorization (hicma_zgetrf) on double complex matrices stored in tile low-rank (TLR) format. ![Link to branch.](https://github.com/ecrc/hicma/tree/zgetrf) 24 | 25 | Current Research 26 | ---------------- 27 | * Matrix Inversion 28 | * Schur Complements 29 | * Preconditioners 30 | * Hardware Accelerators 31 | * Support for Multiple Precisions 32 | * Autotuning: Tile Size, Fixed Accuracy and Fixed Ranks 33 | * Support for OpenMP, PaRSEC and Kokkos 34 | * Support for HODLR, H, HSS and H2 35 | 36 | 37 | External Dependencies 38 | --------------------- 39 | HiCMA depends on the following libraries: 40 | * Chameleon 41 | * HCORE 42 | * STARS-H 43 | * hwloc 44 | * StarPU 45 | * MPI 46 | 47 | Installation 48 | ------------ 49 | 50 | Please see INSTALL.md for information about installing and testing. 51 | 52 | Dataset 53 | ------------ 54 | 55 | Please see Data.md for information about dataset. 56 | 57 | 58 | References 59 | ----------- 60 | 1. K. Akbudak, H. Ltaief, A. Mikhalev, and D. E. Keyes, *Tile Low Rank Cholesky Factorization for 61 | Climate/Weather Modeling Applications on Manycore Architectures*, **International Supercomputing 62 | Conference (ISC17)**, June 18-22, 2017, Frankfurt, Germany. 63 | 64 | 2. K. Akbudak, H. Ltaief, A. Mikhalev, A. Charara, and D. E. Keyes, *Exploiting Data Sparsity for Large-Scale Matrix Computations*, **Euro-Par 2018**, August 27-31, 2018, Turin, Italy. 65 | 66 | 3. Q. Cao, Y. Pei, T. Herault, K. Akbudak, A. Mikhalev, G. Bosilca, H. Ltaief, D. E. Keyes, and J. Dongarra, *Performance Analysis of Tile Low-Rank Cholesky Factorization Using PaRSEC Instrumentation Tools*, **2019 IEEE/ACM International Workshop on Programming and Performance Visualization Tools (ProTools)**, Denver, CO, USA, 2019, pp. 25-32. 67 | 68 | 4. Q. Cao, Y. Pei, K. Akbudak, A. Mikhalev, G. Bosilca, H. Ltaief, D. E. Keyes, and J. Dongarra, *Extreme-Scale Task-Based Cholesky Factorization Toward Climate and Weather Prediction Applications*, **The Platform for Advanced Scientific Computing (PASC 2020)**. 69 | 70 | 5. N. Al-Harthi, R. Alomairy, K. Akbudak, R. Chen, H. Ltaief, H. Bagci, and D. E. Keyes, *Solving Acoustic Boundary Integral Equations Using High Performance Tile Low-Rank LU Factorization*, **International Supercomputing Conference (ISC 2020)**. [GCS Award Winning Paper at ISC2020](https://www.gauss-centre.eu/news/newsflashes/article/gcs-award-2020-1/) 71 | 72 | 73 | 74 | 75 | ![Handout](docs/HiCMA-handout-SC17.png) 76 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Release 1.0.0 2 | 3 | ## Features 4 | 5 | * Removed Chameleon dependency completely. 6 | 7 | # Release 0.1.4 8 | 9 | ## Features 10 | 11 | * LU factorization (hicma_zgetrf) on double complex matrices stored in tile low-rank (TLR) format. ![Link to branch.](https://github.com/ecrc/hicma/tree/zgetrf) 12 | 13 | # Release 0.1.3 14 | 15 | ## Features 16 | * Support for a new application that simulates the 3D unstructured mesh deformation of a population of the novel coronaviruses (i.e., SARS-CoV-2). This relies on the virus geometry extracted from the Protein Data Bank (PDB) codenamed PDBID 6VXX available at (https://www.rcsb.org/structure/6VXX). The corresponding basis function and data geometry are available at STARS-H (https://github.com/ecrc/stars-h). 17 | 18 | # Release 0.1.2 19 | 20 | ## Features 21 | * New matrix kernel (3D exponential) for statistics application from STARS-H (https://github.com/ecrc/stars-h) is supported. 22 | * Sequential operations on low-rank tiles are moved into a standalone library called HCORE (https://github.com/ecrc/hcore). 23 | * Number of flops is reported. See the driver routines under timing folder. 24 | * Multiplication order of tiles in each inner product performed by HiCMA_GEMM can be changed via adding --reorderinnerproducts flag while calling timing/time_zgemm_tile. This flag enables sorting tiles according to increasing sum of ranks of tile pairs that which will be multiplied. 25 | * More tests in timing/CMakeLists.txt 26 | 27 | ## Bug Fixes and Other Changes 28 | * Set minimum number of singular vectors in HCORE_GEMM to 1 instead of 2 29 | 30 | # Release 0.1.1 31 | 32 | ## Features 33 | * Triangular solve (left) for low rank A and B/X 34 | * Triangular solve (left, lower) for low rank A and full rank B/X 35 | * Testing routine for POSV and TRSM 36 | * HCORE routine for C=C+AB where A is low rank and B and C are full rank 37 | * Separate routines for dense and low rank matrix generation 38 | 39 | # Release 0.1.0 40 | 41 | ## Features 42 | * Matrix-Matrix Multiplication 43 | * Cholesky Factorization 44 | * Double Precision 45 | * Task-based Programming Models 46 | * Shared and Distributed-Memory Environments 47 | * Support for StarPU Dynamic Runtime Systems 48 | * Testing Suite and Examples 49 | 50 | -------------------------------------------------------------------------------- /cmake_modules/PrintOpts.cmake: -------------------------------------------------------------------------------- 1 | ### 2 | # 3 | # @copyright (c) 2009-2014 The University of Tennessee and The University 4 | # of Tennessee Research Foundation. 5 | # All rights reserved. 6 | # @copyright (c) 2012-2016 Inria. All rights reserved. 7 | # @copyright (c) 2012-2014, 2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 8 | # @copyright (c) 2017, King Abdullah University of Science and Technology. All rights reserved. 9 | # 10 | ### 11 | # 12 | # @file PrintOpts.cmake 13 | # 14 | # @project MORSE 15 | # MORSE is a software package provided by: 16 | # Inria Bordeaux - Sud-Ouest, 17 | # Univ. of Tennessee, 18 | # King Abdullah Univesity of Science and Technology 19 | # Univ. of California Berkeley, 20 | # Univ. of Colorado Denver. 21 | # 22 | # @author Florent Pruvost 23 | # @author Eduardo Gonzalez 24 | # @date 2019-11-25 25 | # 26 | ### 27 | set(dep_message "\nConfiguration of HiCMA:\n" 28 | " Compiler: C .........: ${CMAKE_C_COMPILER} (${CMAKE_C_COMPILER_ID})\n" 29 | # " Compiler: Fortran ...: ${CMAKE_Fortran_COMPILER} (${CMAKE_Fortran_COMPILER_ID})\n" 30 | ) 31 | if(HICMA_USE_MPI) 32 | set(dep_message "${dep_message}" 33 | " Compiler: MPI .......: ${MPI_C_COMPILER}\n" 34 | " compiler flags ......: ${MPI_C_COMPILE_FLAGS}\n") 35 | endif() 36 | set(dep_message "${dep_message}" 37 | " Linker: .............: ${CMAKE_LINKER}\n" 38 | "\n" 39 | " Build type ..........: ${CMAKE_BUILD_TYPE}\n" 40 | " Build shared ........: ${BUILD_SHARED_LIBS}\n" 41 | " CFlags ..............: ${CMAKE_C_FLAGS}\n" 42 | " LDFlags (shared lib).: ${CMAKE_SHARED_LINKER_FLAGS}\n" 43 | " LDFlags (static lib).: ${CMAKE_STATIC_LINKER_FLAGS}\n" 44 | " LDFlags (executable).: ${CMAKE_EXE_LINKER_FLAGS}\n" 45 | "\n" 46 | " Implementation paradigm\n" 47 | " CUDA ................: ${HICMA_USE_CUDA}\n" 48 | " MPI .................: ${HICMA_USE_MPI}\n" 49 | "\n" 50 | " Runtime specific\n" 51 | " QUARK ...............: ${HICMA_SCHED_QUARK} ${QUARK_DIR_FOUND}\n" 52 | " StarPU ..............: ${HICMA_SCHED_STARPU} ${STARPU_DIR_FOUND}\n" 53 | "\n" 54 | " Kernels specific\n" 55 | " BLAS ................: ${BLAS_VENDOR_FOUND} [${BLAS_LIBRARIES}]\n" 56 | " LAPACK...............: ${LAPACK_VENDOR_FOUND} [${LAPACK_LIBRARIES}]\n" 57 | "\n" 58 | " Chameleon ...........: ${CHAMELEON_DIR_FOUND}\n" 59 | " STARS-H .............: ${STARSH_DIR_FOUND}\n" 60 | " HCORE . .............: ${HCORE_DIR_FOUND}\n" 61 | "\n" 62 | " Trace ...............: ${HICMA_ENABLE_TRACING}\n" 63 | " Simulation mode .....: ${HICMA_SIMULATION}\n" 64 | "\n" 65 | " Binaries to build\n" 66 | " documentation ........: ${HICMA_ENABLE_DOCS}\n" 67 | " example ..............: ${HICMA_ENABLE_EXAMPLE}\n" 68 | " testing ..............: ${HICMA_ENABLE_TESTING}\n" 69 | " timing ...............: ${HICMA_ENABLE_TIMING}\n" 70 | "\n" 71 | " HICMA dependencies :\n") 72 | foreach (_dep ${HICMA_DEP}) 73 | set(dep_message "${dep_message}" 74 | " ${_dep}\n") 75 | endforeach () 76 | string(REGEX REPLACE ";" " " HICMA_DEFINITIONS_LIST "${HICMA_DEFINITIONS_LIST}") 77 | set(dep_message "${dep_message}" 78 | "\n" 79 | " Definitions: ${HICMA_DEFINITIONS_LIST}\n") 80 | set(dep_message "${dep_message}" 81 | "\n" 82 | " INSTALL_PREFIX ......: ${CMAKE_INSTALL_PREFIX}\n\n") 83 | 84 | string(REPLACE ";" " " dep_message_wsc "${dep_message}") 85 | message(${dep_message}) 86 | file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/config.log "${dep_message_wsc}") 87 | message(STATUS "Configuration is done - A summary of the current configuration" 88 | "\n has been written in ${CMAKE_CURRENT_BINARY_DIR}/config.log") 89 | # installation 90 | # ------------ 91 | INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.log DESTINATION share/hicma) 92 | -------------------------------------------------------------------------------- /compute/ddiag.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | 6 | /** 7 | * @file ddiag.c 8 | * 9 | * This file contains the function for copying tiles of a tile vector into diagonal tiles of a tile matrix. 10 | * 11 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 12 | * 13 | * @version 1.0.0 14 | * @author Kadir Akbudak 15 | * @date 2019-11-14 16 | **/ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | extern int store_only_diagonal_tiles; 25 | 26 | /* 27 | * Uncompresses lower triangular part. Computes D=U*V^T. Ranks of U and Vs stored in Ark 28 | */ 29 | int HICMA_ddiag_vec2mat( 30 | HICMA_desc_t *vec, HICMA_desc_t *mat) { 31 | 32 | HICMA_context_t *hicma; 33 | HICMA_sequence_t *sequence = NULL; 34 | HICMA_request_t request = HICMA_REQUEST_INITIALIZER; 35 | int status; 36 | hicma = hicma_context_self(); 37 | if (hicma == NULL) { 38 | hicma_fatal_error("HiCMA_diag_vec2mat", "HiCMA not initialized"); 39 | return HICMA_ERR_NOT_INITIALIZED; 40 | } 41 | hicma_sequence_create(hicma, &sequence); 42 | 43 | 44 | /*HICMA_context_t *hicma;*/ 45 | HICMA_option_t options; 46 | /*hicma = hicma_context_self();*/ 47 | if (sequence->status != HICMA_SUCCESS) 48 | return HICMA_ERR_NOT_INITIALIZED; 49 | HICMA_RUNTIME_options_init(&options, hicma, sequence, &request); 50 | assert(vec->mb == mat->mb); 51 | assert(vec->nb == mat->nb); 52 | assert(vec->mb == vec->nb); 53 | int i; 54 | for (i = 0; i < vec->mt; i++) { 55 | int vecicol; 56 | if (store_only_diagonal_tiles == 1) { 57 | vecicol = 0; 58 | } else { 59 | vecicol = i; 60 | } 61 | //@KADIR FIXME handle leftovers 62 | int ldv = BLKLDD(vec, i); 63 | int ldm = BLKLDD(mat, i); 64 | int tempii = i == vec->mt - 1 ? vec->m - i * vec->mb : vec->mb; 65 | //printf("i=%d ldv=%d ldm=%d vec->mb=%d mat->mb=%d tempii=%d\n", i, ldv, ldm, vec->mb, mat->mb, tempii); 66 | HICMA_TASK_dlacpy( //FIXME convert to z 67 | &options, 68 | HicmaUpperLower, 69 | tempii, tempii, vec->mb, 70 | vec, i, vecicol, ldv, 71 | mat, i, i, ldm); 72 | } 73 | HICMA_RUNTIME_sequence_wait(hicma, sequence); 74 | HICMA_RUNTIME_options_finalize(&options, hicma); 75 | //HICMA_TASK_dataflush_all(); removed in newer chameleon 76 | 77 | //RUNTIME_desc_getoncpu( &AD ); accuracy checking works without this line on shared memory and with 4 mpi ranks on shared memory 78 | //HICMA_RUNTIME_options_finalize(&options, hicma); 79 | 80 | 81 | HICMA_Desc_Flush(vec, sequence); 82 | HICMA_Desc_Flush(mat, sequence); 83 | 84 | hicma_sequence_wait(hicma, sequence); 85 | /*RUNTIME_desc_getoncpu(vec);*/ 86 | /*RUNTIME_desc_getoncpu(mat);*/ 87 | 88 | status = sequence->status; 89 | hicma_sequence_destroy(hicma, sequence); 90 | return status; 91 | } 92 | -------------------------------------------------------------------------------- /compute/pdgenmat.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | 6 | /** 7 | * 8 | * @file pdgenmat.c 9 | * 10 | * HiCMA auxiliary routines 11 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 12 | * 13 | * @version 1.0.0 14 | * @author Rabab Alomairy 15 | * @author Kadir Akbudak 16 | * @date 2018-11-08 17 | * 18 | **/ 19 | 20 | /** 21 | * @copyright (c) 2009-2014 The University of Tennessee and The University 22 | * of Tennessee Research Foundation. 23 | * All rights reserved. 24 | * @copyright (c) 2012-2016 Inria. All rights reserved. 25 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 26 | **/ 27 | 28 | /** 29 | * 30 | * @file pdgenmat.c 31 | * 32 | * MORSE auxiliary routines 33 | * MORSE is a software package provided by Univ. of Tennessee, 34 | * Univ. of California Berkeley and Univ. of Colorado Denver 35 | * 36 | * @version 2.5.0 37 | * @comment This file has been automatically generated 38 | * from Plasma 2.5.0 for MORSE 1.0.0 39 | * @author Mathieu Faverge 40 | * @author Emmanuel Agullo 41 | * @author Cedric Castagnede 42 | * @date 2010-10-21 43 | * 44 | **/ 45 | 46 | #include 47 | #include 48 | #include 49 | extern int store_only_diagonal_tiles; 50 | 51 | /***************************************************************************//** 52 | * Generate a application matrix using STARS-H. 53 | **/ 54 | void hicma_pdgenmat( 55 | HICMA_desc_t *A, 56 | HICMA_sequence_t *sequence, HICMA_request_t *request ) 57 | { 58 | HICMA_context_t *hicma; 59 | HICMA_option_t options; 60 | 61 | int i, j; 62 | int tempmm, tempnn; 63 | 64 | hicma = hicma_context_self(); 65 | if (sequence->status != HICMA_SUCCESS) 66 | return; 67 | HICMA_RUNTIME_options_init(&options, hicma, sequence, request); 68 | 69 | for (i = 0; i < A->mt; i++) { 70 | int lda = BLKLDD(A, i); 71 | tempmm = i == A->mt-1 ? A->m-i*A->mb : A->mb; 72 | for (j = 0; j < A->nt; j++) { 73 | tempnn = j == A->nt-1 ? A->n-j*A->nb : A->nb; 74 | HICMA_TASK_dgenmat(&options, A, lda, i, j, tempmm, tempnn); 75 | } 76 | } 77 | HICMA_RUNTIME_options_finalize(&options, hicma); 78 | } 79 | -------------------------------------------------------------------------------- /compute/pdgenrhs.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * @copyright (c) 2009-2014 The University of Tennessee and The University 8 | * of Tennessee Research Foundation. 9 | * All rights reserved. 10 | * @copyright (c) 2012-2016 Inria. All rights reserved. 11 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 12 | */ 13 | 14 | /** 15 | * 16 | * @file pdgenrhs.c 17 | * 18 | * HiCMA computational routines 19 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 20 | * 21 | * @author Rabab Alomairy 22 | * @date 2020-02-15 23 | * 24 | **/ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | /** 32 | * hicma_pdgenrhs_virus - Generate a random matrix by tiles. 33 | * Operates on matrices stored by tiles. 34 | * All matrices are passed through descriptors. 35 | * All dimensions are taken from the descriptors. 36 | * 37 | * HICMA_dgenrhs_Tile - Generate RHS matrix by tiles. 38 | * 39 | ******************************************************************************* 40 | * 41 | * @param[out] A 42 | * Store RHS values in descriptor A 43 | * 44 | * @param[in] sequence 45 | * Identifies the sequence of function calls that this call belongs to 46 | * (for completion checks and exception handling purposes). 47 | * 48 | * @param[out] request 49 | * Identifies this function call (for exception handling purposes). 50 | * 51 | 52 | ******************************************************************************/ 53 | 54 | void hicma_pdgenrhs( 55 | HICMA_desc_t *A, 56 | HICMA_sequence_t *sequence, HICMA_request_t *request ) 57 | { 58 | HICMA_context_t *hicma; 59 | HICMA_option_t options; 60 | 61 | int m, n; 62 | int tempmm, tempnn; 63 | int index; 64 | hicma = hicma_context_self(); 65 | if (sequence->status != HICMA_SUCCESS) 66 | return; 67 | HICMA_RUNTIME_options_init(&options, hicma, sequence, request); 68 | 69 | for (m = 0; m < A->mt; m++) { 70 | tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; 71 | tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; 72 | int ldam = BLKLDD(A, m); 73 | HICMA_TASK_dgenrhs( 74 | &options, 75 | tempmm, 76 | tempnn, 77 | A, m, 0, 78 | ldam, 79 | A->m, m*A->mb, m*A->nb); 80 | } 81 | HICMA_RUNTIME_options_finalize(&options, hicma); 82 | } 83 | -------------------------------------------------------------------------------- /compute/pdhagcm.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * @file pdhagcm.c 8 | * 9 | * HiCMA auxiliary routines 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 1.0.0 13 | * @author Kadir Akbudak 14 | * @date 2018-11-08 15 | **/ 16 | 17 | /** 18 | * @copyright (c) 2009-2014 The University of Tennessee and The University 19 | * of Tennessee Research Foundation. 20 | * All rights reserved. 21 | * @copyright (c) 2012-2016 Inria. All rights reserved. 22 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 23 | */ 24 | 25 | /** 26 | * 27 | * file pdhagcm.c 28 | * 29 | * MORSE auxiliary routines 30 | * MORSE is a software package provided by Univ. of Tennessee, 31 | * Univ. of California Berkeley and Univ. of Colorado Denver 32 | * 33 | * @version 2.5.0 34 | * @comment This file has been automatically generated 35 | * from Plasma 2.5.0 for MORSE 1.0.0 36 | * @author Mathieu Faverge 37 | * @author Emmanuel Agullo 38 | * @author Cedric Castagnede 39 | * @date 2010-11-15 40 | * 41 | **/ 42 | 43 | #include 44 | #include 45 | #include "hicma_runtime_d.h" 46 | extern int store_only_diagonal_tiles; 47 | 48 | /** 49 | * Generate a compressed matrix. 50 | * HicmaLower and HicmaUpper do not include diagnal tiles. 51 | **/ 52 | void hicma_pdhagcm( 53 | HICMA_enum uplo, 54 | HICMA_desc_t *AUV, 55 | HICMA_desc_t *Ark, 56 | int numrows_matrix, 57 | int numcols_matrix, 58 | int numrows_block, 59 | int numcols_block, 60 | int maxrank, double tol, 61 | HICMA_sequence_t *sequence, HICMA_request_t *request ) 62 | { 63 | HICMA_desc_t *A = AUV; // FIXME 64 | HICMA_context_t *hicma; 65 | HICMA_option_t options; 66 | 67 | int m, n; 68 | int tempmm, tempnn; 69 | 70 | hicma = hicma_context_self(); 71 | if (sequence->status != HICMA_SUCCESS) 72 | return; 73 | HICMA_RUNTIME_options_init(&options, hicma, sequence, request); 74 | 75 | for (m = 0; m < A->mt; m++) { 76 | tempmm = m == A->mt-1 ? numrows_matrix-m*numrows_block : numrows_block; 77 | int ldamUV = BLKLDD(AUV, m); 78 | 79 | //for (n = 0; n < A->mt; n++) { 80 | // tempnn = n == A->mt-1 ? A->m-n*A->mb : A->mb; 81 | for (n = 0; n < A->nt; n++) { //I hope this change does not break anything 82 | tempnn = n == A->nt-1 ? numcols_matrix-n*numcols_block : numcols_block; 83 | 84 | // if(m= n) 90 | continue; 91 | //printf("Tile (%d,%d) ldamUV=%d A->m=%d A->n=%d A->mb=%d A->nb=%d tempmm=%d tempnn=%d (%dx%d) (%dx%d) (%dx%d)\n", m, n, ldamUV, A->m, A->n, A->mb, A->nb, tempmm, tempnn, numrows_matrix, numcols_matrix, numrows_block, numcols_block, A->mt, A->nt); 92 | 93 | HICMA_TASK_dhagcm( 94 | &options, 95 | tempmm, tempnn, 96 | AUV, 97 | Ark, 98 | m, n, 99 | ldamUV, ldamUV, 100 | maxrank, tol, A->mt 101 | ); 102 | } 103 | } 104 | HICMA_RUNTIME_options_finalize(&options, hicma); 105 | //HICMA_TASK_dataflush_all(); removed in newer chameleon 106 | } 107 | -------------------------------------------------------------------------------- /compute/pzplrnt.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * @file pzplrnt.c 8 | * 9 | * HiCMA auxiliary routines 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 1.0.0 13 | * @author Kadir Akbudak 14 | * @date 2018-11-08 15 | **/ 16 | 17 | /** 18 | * @copyright (c) 2009-2014 The University of Tennessee and The University 19 | * of Tennessee Research Foundation. 20 | * All rights reserved. 21 | * @copyright (c) 2012-2016 Inria. All rights reserved. 22 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 23 | */ 24 | 25 | /** 26 | * 27 | * file pzplrnt.c 28 | * 29 | * MORSE auxiliary routines 30 | * MORSE is a software package provided by Univ. of Tennessee, 31 | * Univ. of California Berkeley and Univ. of Colorado Denver 32 | * 33 | * @version 2.5.0 34 | * @comment This file has been automatically generated 35 | * from Plasma 2.5.0 for MORSE 1.0.0 36 | * @author Jakub Kurzak 37 | * @author Hatem Ltaief 38 | * @author Mathieu Faverge 39 | * @author Emmanuel Agullo 40 | * @author Cedric Castagnede 41 | * @date 2010-11-15 42 | * @precisions normal z -> s d c 43 | * 44 | **/ 45 | 46 | #include 47 | #include 48 | #include 49 | 50 | #define A(m, n) A, m, n 51 | 52 | /** 53 | * hicma_pzplghe - Generate a random matrix by tiles. 54 | */ 55 | void hicma_pzplrnt(HICMA_desc_t *A, unsigned long long int seed, 56 | HICMA_sequence_t *sequence, HICMA_request_t *request) { 57 | HICMA_context_t *hicma; 58 | HICMA_option_t options; 59 | 60 | int m, n; 61 | int ldam; 62 | int tempmm, tempnn; 63 | 64 | hicma = hicma_context_self(); 65 | if (sequence->status != HICMA_SUCCESS) 66 | return; 67 | HICMA_RUNTIME_options_init(&options, hicma, sequence, request); 68 | 69 | for (m = 0; m < A->mt; m++) { 70 | tempmm = m == A->mt - 1 ? A->m - m * A->mb : A->mb; 71 | ldam = BLKLDD(A, m); 72 | 73 | for (n = 0; n < A->nt; n++) { 74 | tempnn = n == A->nt - 1 ? A->n - n * A->nb : A->nb; 75 | 76 | HICMA_TASK_zplrnt( 77 | &options, 78 | tempmm, tempnn, A(m, n), ldam, 79 | A->m, m * A->mb, n * A->nb, seed); 80 | } 81 | } 82 | HICMA_RUNTIME_options_finalize(&options, hicma); 83 | } 84 | -------------------------------------------------------------------------------- /compute/zdiag.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * @file zdiag.c 8 | * 9 | * HiCMA auxiliary routines 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 1.0.0 13 | * @author Kadir Akbudak 14 | * @date 2018-11-08 15 | **/ 16 | 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | extern int store_only_diagonal_tiles; 24 | /* 25 | * Uncompresses lower triangular part. Computes D=U*V^T. Ranks of U and Vs stored in Ark 26 | */ 27 | 28 | int HICMA_zdiag_vec2mat( 29 | HICMA_desc_t *vec, HICMA_desc_t *mat) 30 | { 31 | 32 | HICMA_context_t *hicma; 33 | HICMA_sequence_t *sequence = NULL; 34 | HICMA_request_t request = HICMA_REQUEST_INITIALIZER; 35 | int status; 36 | hicma = hicma_context_self(); 37 | if (hicma == NULL) { 38 | hicma_fatal_error("HiCMA_diag_vec2mat", "HiCMA not initialized"); 39 | return HICMA_ERR_NOT_INITIALIZED; 40 | } 41 | hicma_sequence_create(hicma, &sequence); 42 | 43 | 44 | /*HICMA_context_t *hicma;*/ 45 | HICMA_option_t options; 46 | /*hicma = hicma_context_self();*/ 47 | if (sequence->status != HICMA_SUCCESS) 48 | return HICMA_ERR_NOT_INITIALIZED; 49 | HICMA_RUNTIME_options_init(&options, hicma, sequence, &request); 50 | assert(vec->mb == mat->mb); 51 | if(vec->nb != mat->nb) { 52 | printf("nb is not equal for vec:%d mat:%d\n", vec->nb, mat->nb); 53 | } 54 | assert(vec->nb == mat->nb); 55 | assert(vec->mb == vec->nb); 56 | HICMA_Complex64_t zzero = (HICMA_Complex64_t) 0.0; 57 | HICMA_Complex64_t zone = (HICMA_Complex64_t) 1.0; 58 | int i; 59 | for (i = 0; i < vec->mt; i++) { 60 | int vecicol; 61 | if(store_only_diagonal_tiles == 1){ 62 | vecicol = 0; 63 | } else { 64 | vecicol = i; 65 | } 66 | //@KADIR FIXME handle leftovers 67 | int ldv = BLKLDD(vec, i); 68 | int ldm = BLKLDD(mat, i); 69 | int tempii = i == vec->mt-1 ? vec->m-i*vec->mb : vec->mb; 70 | //printf("i=%d ldv=%d ldm=%d vec->mb=%d mat->mb=%d tempii=%d\n", i, ldv, ldm, vec->mb, mat->mb, tempii); 71 | HICMA_TASK_zlacpy( //FIXME convert to z 72 | &options, 73 | HicmaUpperLower, 74 | tempii, tempii, vec->mb, 75 | vec, i, vecicol, ldv, 76 | mat, i, i, ldm ); 77 | } 78 | HICMA_RUNTIME_sequence_wait( hicma, sequence ); 79 | HICMA_RUNTIME_options_finalize( &options, hicma ); 80 | //HICMA_TASK_dataflush_all(); removed in newer chameleon 81 | 82 | //RUNTIME_desc_getoncpu( &AD ); accuracy checking works without this line on shared memory and with 4 mpi ranks on shared memory 83 | //HICMA_RUNTIME_options_finalize(&options, hicma); 84 | 85 | 86 | HICMA_Desc_Flush( vec, sequence ); 87 | HICMA_Desc_Flush( mat, sequence ); 88 | 89 | hicma_sequence_wait(hicma, sequence); 90 | /*RUNTIME_desc_getoncpu(vec);*/ 91 | /*RUNTIME_desc_getoncpu(mat);*/ 92 | 93 | status = sequence->status; 94 | hicma_sequence_destroy(hicma, sequence); 95 | return status; 96 | } 97 | -------------------------------------------------------------------------------- /control/hicma_config.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file hicma_config.h 7 | * 8 | * HiCMA configurations 9 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 10 | * 11 | * @version 0.1.0 12 | * @author Ali Charara 13 | * @date 2017-11-16 14 | **/ 15 | #ifndef _HICMA_CONFIG_H_ 16 | #define _HICMA_CONFIG_H_ 17 | 18 | // #define HCORE_GEMM_USE_ORGQR 19 | // #define HCORE_GEMM_USE_KBLAS_ACA 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /control/hicma_tile.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | 6 | /** 7 | * 8 | * @file tile.c 9 | * 10 | * @copyright 2009-2014 The University of Tennessee and The University of 11 | * Tennessee Research Foundation. All rights reserved. 12 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 13 | * Univ. Bordeaux. All rights reserved. 14 | * 15 | *** 16 | * 17 | * @brief Chameleon layout conversion wrappers 18 | * 19 | * @version 1.0.0 20 | * @author Jakub Kurzak 21 | * @author Cedric Castagnede 22 | * @date 2010-11-15 23 | * 24 | *** 25 | * 26 | * @defgroup Tile 27 | * @brief Group routines exposed to users for matrices conversion LAPACK-Tile 28 | * 29 | */ 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | /** 38 | * 39 | * @ingroup Tile 40 | * 41 | * HICMA_Lapack_to_Tile - Conversion from LAPACK layout to tile layout. 42 | * 43 | ****************************************************************************** 44 | * 45 | * @param[in] Af77 46 | * LAPACK matrix. 47 | * 48 | * @param[in] LDA 49 | * The leading dimension of the matrix Af77. 50 | * 51 | * @param[out] A 52 | * Descriptor of the HICMA matrix in tile layout. 53 | * 54 | ****************************************************************************** 55 | * 56 | * @return 57 | * \retval HICMA_SUCCESS successful exit 58 | * 59 | */ 60 | int HICMA_Lapack_to_Tile(void *Af77, int LDA, HICMA_desc_t *A) 61 | { 62 | switch( A->dtyp ) { 63 | case HicmaComplexDouble: 64 | return HICMA_zLapack_to_Tile( (HICMA_Complex64_t *)Af77, LDA, A ); 65 | break; 66 | case HicmaComplexFloat: 67 | return HICMA_cLapack_to_Tile( (HICMA_Complex32_t *)Af77, LDA, A ); 68 | break; 69 | case HicmaRealFloat: 70 | return HICMA_sLapack_to_Tile( (float *)Af77, LDA, A ); 71 | break; 72 | case HicmaRealDouble: 73 | default: 74 | return HICMA_dLapack_to_Tile( (double *)Af77, LDA, A ); 75 | } 76 | return HICMA_ERR_ILLEGAL_VALUE; 77 | } 78 | 79 | /** 80 | * 81 | * @ingroup Tile 82 | * 83 | * HICMA_Tile_to_Lapack - Conversion from tile layout to LAPACK layout. 84 | * 85 | ****************************************************************************** 86 | * 87 | * @param[out] A 88 | * Descriptor of the HICMA matrix in tile layout. 89 | * 90 | * @param[in] Af77 91 | * LAPACK matrix (only needed on proc 0). 92 | * 93 | * @param[in] LDA 94 | * The leading dimension of the matrix Af77. 95 | * 96 | ****************************************************************************** 97 | * 98 | * @return 99 | * \retval HICMA_SUCCESS successful exit 100 | * 101 | */ 102 | int HICMA_Tile_to_Lapack(HICMA_desc_t *A, void *Af77, int LDA) 103 | { 104 | switch( A->dtyp ) { 105 | case HicmaComplexDouble: 106 | return HICMA_zTile_to_Lapack( A, (HICMA_Complex64_t *)Af77, LDA ); 107 | break; 108 | case HicmaComplexFloat: 109 | return HICMA_cTile_to_Lapack( A, (HICMA_Complex32_t *)Af77, LDA ); 110 | break; 111 | case HicmaRealFloat: 112 | return HICMA_sTile_to_Lapack( A, (float *)Af77, LDA ); 113 | break; 114 | case HicmaRealDouble: 115 | default: 116 | return HICMA_dTile_to_Lapack( A, (double *)Af77, LDA ); 117 | } 118 | return HICMA_ERR_ILLEGAL_VALUE; 119 | } 120 | -------------------------------------------------------------------------------- /docs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.in" 2 | "${CMAKE_CURRENT_BINARY_DIR}/config") 3 | add_custom_target(docs COMMAND 4 | "${DOXYGEN_EXECUTABLE}" "${CMAKE_CURRENT_BINARY_DIR}/config") 5 | -------------------------------------------------------------------------------- /docs/HiCMA-handout-SC17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecrc/hicma/fa8596b5d3aa8e5b7d5c06cd8db3cecc32f70d17/docs/HiCMA-handout-SC17.png -------------------------------------------------------------------------------- /docs/Installation.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # What is HiCMA? 2 | 3 | The Hierarchical Computations on Manycore Architectures (HiCMA) library aims to redesign existing dense linear algebra libraries to exploit the data sparsity of the matrix operator. Data sparse matrices arise in many scientific problems (e.g., in statistics-based weather forecasting, seismic imaging, and materials science applications) and are characterized by low-rank off-diagonal tile structure. Numerical low-rank approximations have demonstrated attractive theoretical bounds, 4 | both in memory footprint and arithmetic complexity. The core idea of HiCMA is to develop fast linear algebra computations operating on the underlying tile low-rank data format, while satisfying a specified numerical accuracy and leveraging performance from massively parallel hardware architectures. 5 | 6 | # Features of HiCMA 0.1.1 7 | 8 | * Matrix-Matrix Multiplication 9 | * Cholesky Factorization (Solve will be available soon) 10 | * Double Precision 11 | * Task-based Programming Models 12 | * Shared and Distributed-Memory Environments 13 | * Support for StarPU Dynamic Runtime Systems 14 | * Testing Suite and Examples 15 | 16 | # Installation 17 | 18 | Installation requires `CMake` of version 3.2.3 at least. To build HiCMA, 19 | follow these instructions: 20 | 21 | 1. Get HiCMA from git repository 22 | 23 | git clone git@github.com:ecrc/hicma 24 | 25 | 26 | 2. Go into hicma folder 27 | 28 | cd hicma 29 | 30 | 3. Get submodules using git as follows. The submodules Chameleon and STARS-H should be compiled and the `PKG_CONFIG_PATH` should be set. 31 | 32 | git submodule update --init --recursive 33 | 34 | 4. Create build directory and go there 35 | 36 | mkdir build && cd build 37 | 38 | 5. Use CMake to get all the dependencies 39 | 40 | cmake .. -DCMAKE_INSTALL_PREFIX=/path/to/install/ -DHICMA_USE_MPI=ON 41 | 42 | 6. Build HiCMA 43 | 44 | make -j 45 | 46 | 7. Build local documentation (optional) 47 | 48 | make docs 49 | 50 | 8. Install HiCMA 51 | 52 | make install 53 | 54 | 9. Add line 55 | 56 | export PKG_CONFIG_PATH=/path/to/install:$PKG_CONFIG_PATH 57 | 58 | to your .bashrc file to use HiCMA as a library. 59 | 60 | Now you can use `pkg-config` executable to collect compiler and linker flags for HiCMA or run the binaries under `/path/to/install/timing/`. 61 | 62 | There are `scripts/build.sh` and `scripts/build-nompi.sh` scripts in the repository to build the whole software stack. 63 | 64 | # Quick Start 65 | 66 | ## Cholesky Factorization 67 | timing/time_zpotrf_tile.c performs the following operations: 68 | 1. Creates a matrix in tile low-rank (TLR) format. 69 | 2. Performs Cholesky factorization on the TLR matrix. 70 | 3. Checks solution according to LAPACK_dpotrf() if checking is enabled. 71 | 72 | ## General Matrix Multiply 73 | timing/time_zgemm_tile.c performs the following operations: 74 | 1. Creates three matrices in tile low-rank (TLR) format. 75 | 2. Performs matrix multiplication. 76 | 3. Checks solution according to cblas_gemm() if checking is enabled. 77 | -------------------------------------------------------------------------------- /exec_env_info_ADSC20/collect_environment.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | # Linux data-gathering commands; adjust as necessary for your platform. 4 | # 5 | # Be sure to remove any information from the output that would violate 6 | # SC's double-blind review policies. 7 | 8 | env | sed "s/$USER/USER/g" 9 | set -x 10 | lsb_release -a 11 | uname -a 12 | lscpu || cat /proc/cpuinfo 13 | cat /proc/meminfo 14 | inxi -F -c0 15 | lsblk -a 16 | lsscsi -s 17 | module list 18 | nvidia-smi 19 | (lshw -short -quiet -sanitize || lspci) | cat 20 | -------------------------------------------------------------------------------- /exp/cases/10M.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=10800000; nb[1]=3375 2 | nrows[2]=10800000; nb[2]=4500 3 | nrows[3]=10800000; nb[3]=6750 4 | nrows[4]=8100000; nb[4]=3375 5 | nrows[5]=8100000; nb[5]=4500 6 | nrows[6]=8100000; nb[6]=6750 7 | nrows[7]=5400000; nb[7]=3375 8 | nrows[8]=5400000; nb[8]=4500 9 | nrows[9]=5400000; nb[9]=6750 10 | 11 | #nrows[1]=5400000; nb[1]=2250 12 | #nrows[2]=5400000; nb[2]=2700 13 | #nrows[3]=5400000; nb[3]=3375 14 | 15 | nrows[1]=5400000; nb[1]=2250 16 | nrows[2]=5400000; nb[2]=2700 17 | nrows[3]=5400000; nb[3]=3375 18 | nrows[4]=8100000; nb[4]=2250 19 | nrows[5]=8100000; nb[5]=2700 20 | nrows[6]=8100000; nb[6]=3375 21 | nrows[7]=8100000; nb[7]=4500 22 | 23 | nrows[1]=10800000; nb[1]=3375 24 | nrows[2]=10800000; nb[2]=4500 25 | nrows[3]=10800000; nb[3]=6750 26 | 27 | nrows[1]=13500000; nb[1]=3375 28 | nrows[2]=13500000; nb[2]=4500 29 | nrows[3]=13500000; nb[3]=6750 30 | nrows[4]=16200000; nb[4]=3375 31 | nrows[5]=16200000; nb[5]=4500 32 | nrows[6]=16200000; nb[6]=6750 33 | 34 | nrows[1]=13500000; nb[1]=3375 35 | nrows[2]=13500000; nb[2]=4500 36 | nrows[3]=13500000; nb[3]=6750 37 | nrows[4]=16200000; nb[4]=3375 38 | nrows[5]=16200000; nb[5]=4500 39 | nrows[6]=16200000; nb[6]=6750 40 | nrows[7]=18900000; nb[7]=3375 41 | nrows[8]=18900000; nb[8]=4500 42 | nrows[9]=18900000; nb[9]=6750 43 | nrows[10]=21600000; nb[10]=3375 44 | nrows[11]=21600000; nb[11]=4500 45 | nrows[12]=21600000; nb[12]=6750 46 | 47 | nrows[1]=5400000; nb[1]=2250 48 | nrows[2]=5400000; nb[2]=2700 49 | nrows[3]=5400000; nb[3]=3375 50 | -------------------------------------------------------------------------------- /exp/cases/1M-8M.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=1080000; nb[1]=2700; acc[1]=8; maxrank[1]=50; decay[1]=0.41; compmaxrank[1]=100 2 | nrows[2]=1080000; nb[2]=3000; acc[2]=8; maxrank[2]=50; 3 | nrows[3]=1080000; nb[3]=3375; acc[3]=8; maxrank[3]=50; 4 | nrows[4]=1080000; nb[4]=4500; acc[4]=8; maxrank[4]=50; 5 | _appdata="--rnd"; 6 | 7 | note="Hicma runs - Synthetic" 8 | allcaseids[16]="1 2 3 4" 9 | allcaseids[16]="1" 10 | timelimit="01:00:00" 11 | step=1 12 | nprocs="16" 13 | -------------------------------------------------------------------------------- /exp/cases/1M-acc.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=1080000; nb[1]=2700; acc[1]=1; maxrank[1]=40; 2 | nrows[2]=1080000; nb[2]=2700; acc[2]=2; maxrank[2]=48; 3 | nrows[3]=1080000; nb[3]=2700; acc[3]=3; maxrank[3]=56; 4 | nrows[4]=1080000; nb[4]=2700; acc[4]=4; maxrank[4]=67; #66 5 | nrows[5]=1080000; nb[5]=2700; acc[5]=5; maxrank[5]=76; 6 | nrows[6]=1080000; nb[6]=2700; acc[6]=6; maxrank[6]=86; #85 7 | nrows[7]=1080000; nb[7]=2700; acc[7]=7; maxrank[7]=96; 8 | nrows[8]=1080000; nb[8]=2700; acc[8]=8; maxrank[8]=106; 9 | nrows[9]=1080000; nb[9]=2700; acc[9]=9; maxrank[9]=117; #116 10 | nrows[10]=1080000; nb[10]=2700; acc[10]=10; maxrank[10]=127; 11 | nrows[11]=1080000; nb[11]=2700; acc[11]=11; maxrank[11]=141; 12 | nrows[12]=1080000; nb[12]=2700; acc[12]=12; maxrank[12]=152; 13 | nrows[13]=1080000; nb[13]=2700; acc[13]=13; maxrank[13]=166; #165 14 | 15 | nrows[1]=1080000; nb[1]=2700; acc[1]=7; maxrank[1]=25; 16 | nrows[2]=1080000; nb[2]=2700; acc[2]=8; maxrank[2]=29; 17 | nrows[3]=1080000; nb[3]=2700; acc[3]=9; maxrank[3]=33; 18 | nrows[4]=1080000; nb[4]=2700; acc[4]=10; maxrank[4]=37; 19 | nrows[5]=1080000; nb[5]=2700; acc[5]=11; maxrank[5]=41; 20 | nrows[6]=1080000; nb[6]=2700; acc[6]=12; maxrank[6]=46; 21 | nrows[7]=1080000; nb[7]=2700; acc[7]=13; maxrank[7]=51; 22 | 23 | note="Hicma - accuracy - SS - custom maxrank" 24 | _appdata="--ss" 25 | allcaseids[64]="`seq 1 7`" 26 | allcaseids[64]="3 4 5 6 7" 27 | 28 | #note="Hicma - accuracy - EDSIN - 1e-4missing" 29 | #_appdata="--edsin"; _wavek=200 30 | #allcaseids[64]="4 6 9 13" 31 | 32 | _compmaxrank=300 33 | timelimit="06:00:00" 34 | step=10 35 | nprocs="64" 36 | 37 | 38 | -------------------------------------------------------------------------------- /exp/cases/1M-few-case.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=65536; nb[1]=1024 2 | nrows[2]=56644; nb[2]=1156 3 | nrows[3]=63504; nb[3]=1296 4 | nrows[4]=51984; nb[4]=1444 5 | nrows[5]=57600; nb[5]=1600 6 | nrows[6]=63504; nb[6]=1764 7 | nrows[7]=69696; nb[7]=1936 8 | nrows[8]=147456; nb[8]=1024 9 | nrows[9]=166464; nb[9]=1156 10 | nrows[10]=156816; nb[10]=1296 11 | nrows[11]=144400; nb[11]=1444 12 | nrows[12]=160000; nb[12]=1600 13 | nrows[13]=142884; nb[13]=1764 14 | nrows[14]=156816; nb[14]=1936 15 | nrows[15]=262144; nb[15]=1024 16 | nrows[16]=260100; nb[16]=1156 17 | nrows[17]=254016; nb[17]=1296 18 | nrows[18]=244036; nb[18]=1444 19 | nrows[19]=230400; nb[19]=1600 20 | nrows[20]=254016; nb[20]=1764 21 | nrows[21]=234256; nb[21]=1936 22 | nrows[22]=369664; nb[22]=1024 23 | nrows[23]=334084; nb[23]=1156 24 | nrows[24]=331776; nb[24]=1296 25 | nrows[25]=369664; nb[25]=1444 26 | nrows[26]=360000; nb[26]=1600 27 | nrows[27]=345744; nb[27]=1764 28 | nrows[28]=327184; nb[28]=1936 29 | nrows[29]=451584; nb[29]=1024 30 | nrows[30]=462400; nb[30]=1156 31 | nrows[31]=467856; nb[31]=1296 32 | nrows[32]=467856; nb[32]=1444 33 | nrows[33]=462400; nb[33]=1600 34 | nrows[34]=451584; nb[34]=1764 35 | nrows[35]=435600; nb[35]=1936 36 | nrows[36]=541696; nb[36]=1024 37 | nrows[37]=559504; nb[37]=1156 38 | nrows[38]=518400; nb[38]=1296 39 | nrows[39]=521284; nb[39]=1444 40 | nrows[40]=518400; nb[40]=1600 41 | nrows[41]=509796; nb[41]=1764 42 | nrows[42]=559504; nb[42]=1936 43 | nrows[43]=640000; nb[43]=1024 44 | nrows[44]=665856; nb[44]=1156 45 | nrows[45]=627264; nb[45]=1296 46 | nrows[46]=636804; nb[46]=1444 47 | nrows[47]=640000; nb[47]=1600 48 | nrows[48]=636804; nb[48]=1764 49 | nrows[49]=627264; nb[49]=1936 50 | nrows[50]=746496; nb[50]=1024 51 | nrows[51]=722500; nb[51]=1156 52 | nrows[52]=746496; nb[52]=1296 53 | nrows[53]=763876; nb[53]=1444 54 | nrows[54]=705600; nb[54]=1600 55 | nrows[55]=705600; nb[55]=1764 56 | nrows[56]=698896; nb[56]=1936 57 | nrows[57]=861184; nb[57]=1024 58 | nrows[58]=842724; nb[58]=1156 59 | nrows[59]=810000; nb[59]=1296 60 | nrows[60]=831744; nb[60]=1444 61 | nrows[61]=846400; nb[61]=1600 62 | nrows[62]=853776; nb[62]=1764 63 | nrows[63]=853776; nb[63]=1936 64 | nrows[64]=921600; nb[64]=1024 65 | nrows[65]=906304; nb[65]=1156 66 | nrows[66]=944784; nb[66]=1296 67 | nrows[67]=902500; nb[67]=1444 68 | nrows[68]=921600; nb[68]=1600 69 | nrows[69]=933156; nb[69]=1764 70 | nrows[70]=937024; nb[70]=1936 71 | nrows[71]=1048576; nb[71]=1024 72 | nrows[72]=1040400; nb[72]=1156 73 | nrows[73]=1016064; nb[73]=1296 74 | nrows[74]=1052676; nb[74]=1444 75 | nrows[75]=1000000; nb[75]=1600 76 | nrows[76]=1016064; nb[76]=1764 77 | nrows[77]=1024144; nb[77]=1936 78 | -------------------------------------------------------------------------------- /exp/cases/500K-1M.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=540000; nb[1]=1125 2 | nrows[2]=540000; nb[2]=1350 3 | nrows[3]=540000; nb[3]=1500 4 | nrows[4]=540000; nb[4]=2250 5 | nrows[5]=540000; nb[5]=2700 6 | nrows[6]=540000; nb[6]=3375 7 | nrows[7]=675000; nb[7]=1125 8 | nrows[8]=675000; nb[8]=1350 9 | nrows[9]=675000; nb[9]=1500 10 | nrows[10]=675000; nb[10]=2250 11 | nrows[11]=675000; nb[11]=2700 12 | nrows[12]=675000; nb[12]=3375 13 | nrows[13]=810000; nb[13]=1125 14 | nrows[14]=810000; nb[14]=1350 15 | nrows[15]=810000; nb[15]=1500 16 | nrows[16]=810000; nb[16]=2250 17 | nrows[17]=810000; nb[17]=2700 18 | nrows[18]=810000; nb[18]=3375 19 | nrows[19]=945000; nb[19]=1125 20 | nrows[20]=945000; nb[20]=1350 21 | nrows[21]=945000; nb[21]=1500 22 | nrows[22]=945000; nb[22]=2250 23 | nrows[23]=945000; nb[23]=2700 24 | nrows[24]=945000; nb[24]=3375 25 | nrows[25]=1080000; nb[25]=1125 26 | nrows[26]=1080000; nb[26]=1350 27 | nrows[27]=1080000; nb[27]=1500 28 | nrows[28]=1080000; nb[28]=2250 29 | nrows[29]=1080000; nb[29]=2700 30 | nrows[30]=1080000; nb[30]=3375 31 | 32 | -------------------------------------------------------------------------------- /exp/cases/ae-10M.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=1080000; nb[1]=2700; acc[1]=8; maxrank[1]=50; 2 | nrows[2]=1080000; nb[2]=3000; acc[2]=8; maxrank[2]=50; 3 | nrows[3]=1080000; nb[3]=3375; acc[3]=8; maxrank[3]=50; 4 | nrows[4]=1080000; nb[4]=4500; acc[4]=8; maxrank[4]=50; 5 | nrows[5]=2295000; nb[5]=2700; acc[5]=8; maxrank[5]=50; 6 | nrows[6]=2295000; nb[6]=3000; acc[6]=8; maxrank[6]=50; 7 | nrows[7]=2295000; nb[7]=3375; acc[7]=8; maxrank[7]=50; 8 | nrows[8]=2295000; nb[8]=4500; acc[8]=8; maxrank[8]=50; 9 | nrows[9]=3510000; nb[9]=2700; acc[9]=8; maxrank[9]=50; 10 | nrows[10]=3510000; nb[10]=3000; acc[10]=8; maxrank[10]=50; 11 | nrows[11]=3510000; nb[11]=3375; acc[11]=8; maxrank[11]=50; 12 | nrows[12]=3510000; nb[12]=4500; acc[12]=8; maxrank[12]=50; 13 | nrows[13]=4725000; nb[13]=2700; acc[13]=8; maxrank[13]=50; 14 | nrows[14]=4725000; nb[14]=3000; acc[14]=8; maxrank[14]=50; 15 | nrows[15]=4725000; nb[15]=3375; acc[15]=8; maxrank[15]=50; 16 | nrows[16]=4725000; nb[16]=4500; acc[16]=8; maxrank[16]=50; 17 | nrows[17]=5940000; nb[17]=2700; acc[17]=8; maxrank[17]=50; 18 | nrows[18]=5940000; nb[18]=3000; acc[18]=8; maxrank[18]=50; 19 | nrows[19]=5940000; nb[19]=3375; acc[19]=8; maxrank[19]=50; 20 | nrows[20]=5940000; nb[20]=4500; acc[20]=8; maxrank[20]=50; 21 | nrows[21]=8100000; nb[21]=2700; acc[21]=8; maxrank[21]=50; 22 | nrows[22]=8100000; nb[22]=3000; acc[22]=8; maxrank[22]=50; 23 | nrows[23]=8100000; nb[23]=3375; acc[23]=8; maxrank[23]=50; 24 | nrows[24]=8100000; nb[24]=4500; acc[24]=8; maxrank[24]=50; 25 | nrows[25]=10800000; nb[25]=2700; acc[25]=8; maxrank[25]=50; 26 | nrows[26]=10800000; nb[26]=3000; acc[26]=8; maxrank[26]=50; 27 | nrows[27]=10800000; nb[27]=3375; acc[27]=8; maxrank[27]=50; 28 | nrows[28]=10800000; nb[28]=4500; acc[28]=8; maxrank[28]=50; 29 | 30 | _appdata="--ss" 31 | 32 | ##number of experimental instances inside one slurm job 33 | step=1 34 | note="Hicma runs - Skylake cluster" 35 | allcaseids[16]="1 7" 36 | allcaseids[32]="1 5 12" 37 | allcaseids[64]="1 5 11 15" 38 | allcaseids[128]="1 5 11 15 20" 39 | allcaseids[256]="19 24" 40 | -------------------------------------------------------------------------------- /exp/cases/biggersize-allnb-1.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=230400; nb[1]=256 2 | nrows[3]=230400; nb[3]=400 3 | nrows[4]=234256; nb[4]=484 4 | nrows[5]=230400; nb[5]=576 5 | nrows[7]=226576; nb[7]=784 6 | nrows[8]=230400; nb[8]=900 7 | nrows[9]=230400; nb[9]=1024 8 | nrows[10]=226576; nb[10]=1156 9 | nrows[12]=207936; nb[12]=1444 10 | nrows[13]=230400; nb[13]=1600 11 | nrows[14]=236196; nb[14]=324 12 | nrows[15]=244036; nb[15]=676 13 | nrows[16]=244036; nb[16]=1444 14 | nrows[17]=246016; nb[17]=256 15 | nrows[18]=250000; nb[18]=400 16 | nrows[19]=254016; nb[19]=324 17 | nrows[20]=254016; nb[20]=576 18 | nrows[21]=254016; nb[21]=784 19 | nrows[22]=254016; nb[22]=1296 20 | nrows[23]=256036; nb[23]=484 21 | nrows[24]=262144; nb[24]=256 22 | nrows[25]=260100; nb[25]=900 23 | nrows[26]=262144; nb[26]=1024 24 | nrows[27]=260100; nb[27]=1156 25 | nrows[28]=272484; nb[28]=324 26 | nrows[29]=270400; nb[29]=400 27 | nrows[30]=270400; nb[30]=676 28 | nrows[31]=270400; nb[31]=1600 29 | nrows[32]=278784; nb[32]=256 30 | nrows[33]=278784; nb[33]=484 31 | nrows[34]=278784; nb[34]=576 32 | nrows[35]=283024; nb[35]=784 33 | nrows[36]=283024; nb[36]=1444 34 | nrows[37]=291600; nb[37]=324 35 | nrows[38]=291600; nb[38]=400 36 | nrows[39]=291600; nb[39]=900 37 | nrows[40]=291600; nb[40]=1296 38 | nrows[41]=295936; nb[41]=256 39 | nrows[42]=298116; nb[42]=676 40 | nrows[43]=295936; nb[43]=1024 41 | nrows[44]=295936; nb[44]=1156 42 | nrows[45]=302500; nb[45]=484 43 | nrows[46]=304704; nb[46]=576 44 | nrows[47]=313600; nb[47]=256 45 | nrows[48]=311364; nb[48]=324 46 | nrows[49]=313600; nb[49]=400 47 | nrows[50]=313600; nb[50]=784 48 | nrows[51]=313600; nb[51]=1600 49 | nrows[52]=324900; nb[52]=900 50 | nrows[53]=324900; nb[53]=1444 51 | nrows[54]=327184; nb[54]=484 52 | nrows[55]=327184; nb[55]=676 53 | nrows[56]=331776; nb[56]=256 54 | nrows[57]=331776; nb[57]=324 55 | nrows[58]=331776; nb[58]=576 56 | nrows[59]=331776; nb[59]=1024 57 | nrows[60]=334084; nb[60]=1156 58 | nrows[61]=331776; nb[61]=1296 59 | -------------------------------------------------------------------------------- /exp/cases/cfd_virus1024.sh: -------------------------------------------------------------------------------- 1 | #compmaxrank is IPARAM_HICMA_STARSH_MAXRANK, is used 2 | #in Starsh matrix generation as a limit 3 | #to allocated temporary buffer for hcore_gemm 4 | #in checking the width of concatenated matrices in QR of HCORE_GEMM 5 | 6 | #maxrank is used to determine the number of columns corresponding to rank of a tile. 7 | #it is used in this formula: number_of_tiles * maxrank. 8 | #this formula gives the total number of columns of the matrix storing low rank tiles 9 | #this value is passed as --nb to program. 10 | 11 | #nb is the number of ROWS in a tile. 12 | 13 | 14 | 15 | 16 | 17 | _ci=48; nrows[$_ci]=4002310; nb[$_ci]=5120; acc[$_ci]="1e-6 1e-8"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=34; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/4002310.txt"; rbf_kernel[$_ci]="9"; reg[$_ci]=1.1; 18 | 19 | _ci=49; nrows[$_ci]=4237740; nb[$_ci]=5120; acc[$_ci]="1e-6 1e-8"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=36; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/4002310.txt"; rbf_kernel[$_ci]="9"; reg[$_ci]=1.1; 20 | 21 | _ci=50; nrows[$_ci]=4237740; nb[$_ci]=7244; acc[$_ci]="1e-6 1e-8"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=36; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/4237740.txt"; rbf_kernel[$_ci]="9"; reg[$_ci]=1.1; 22 | 23 | _ci=51; nrows[$_ci]=4473170; nb[$_ci]=5120; acc[$_ci]="1e-6 1e-8"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=38; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/4473170.txt"; rbf_kernel[$_ci]="9"; reg[$_ci]=1.1; 24 | 25 | _ci=52; nrows[$_ci]=4473170; nb[$_ci]=7244; acc[$_ci]="1e-6 1e-8"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=38; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/4473170.txt"; rbf_kernel[$_ci]="9"; reg[$_ci]=1.1; 26 | 27 | 28 | nprocs="1024" 29 | allcaseids[1024]="48 49 50 51 52" 30 | 31 | 32 | prog="hic" 33 | step=1 34 | timelimit="04:00:00" 35 | note="Hicma beta=0.01 $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 36 | 37 | 38 | -------------------------------------------------------------------------------- /exp/cases/cfd_virus1_batch.sh: -------------------------------------------------------------------------------- 1 | #compmaxrank is IPARAM_HICMA_STARSH_MAXRANK, is used 2 | #in Starsh matrix generation as a limit 3 | #to allocated temporary buffer for hcore_gemm 4 | #in checking the width of concatenated matrices in QR of HCORE_GEMM 5 | 6 | #maxrank is used to determine the number of columns corresponding to rank of a tile. 7 | #it is used in this formula: number_of_tiles * maxrank. 8 | #this formula gives the total number of columns of the matrix storing low rank tiles 9 | #this value is passed as --nb to program. 10 | 11 | #nb is the number of ROWS in a tile. 12 | 13 | 14 | _ci=40; nrows[$_ci]=20740; nb[$_ci]=1037; acc[$_ci]="1e-6"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=8; numsubobj[$_ci]=2; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/S2data_20k/"; rbf_kernel[$_ci]="9"; denst[$_ci]=-1; 15 | 16 | _ci=41; nrows[$_ci]=207400 nb[$_ci]=3050; acc[$_ci]="1e-6"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=60; numsubobj[$_ci]=240; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/S2data_200k/"; rbf_kernel[$_ci]="9"; denst[$_ci]=-1; 17 | 18 | 19 | nprocs="1" 20 | allcaseids[1]="40" 21 | 22 | 23 | prog="hic" 24 | step=1 25 | timelimit="04:00:00" 26 | note="Hicma beta=0.01 $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 27 | 28 | 29 | -------------------------------------------------------------------------------- /exp/cases/cfd_virus4.sh: -------------------------------------------------------------------------------- 1 | #compmaxrank is IPARAM_HICMA_STARSH_MAXRANK, is used 2 | #in Starsh matrix generation as a limit 3 | #to allocated temporary buffer for hcore_gemm 4 | #in checking the width of concatenated matrices in QR of HCORE_GEMM 5 | 6 | #maxrank is used to determine the number of columns corresponding to rank of a tile. 7 | #it is used in this formula: number_of_tiles * maxrank. 8 | #this formula gives the total number of columns of the matrix storing low rank tiles 9 | #this value is passed as --nb to program. 10 | 11 | #nb is the number of ROWS in a tile. 12 | 13 | 14 | 15 | _ci=40; nrows[$_ci]=103700; nb[$_ci]=2074; acc[$_ci]="1e-5 1e-6 1e-7"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=10; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt"; rbf_kernel[$_ci]="9"; 16 | _ci=41; nrows[$_ci]=207400 nb[$_ci]=3050; acc[$_ci]="1e-5 1e-6 1e-7"; maxrank[$_ci]=100; compmaxrank[$_ci]=200; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=20; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt"; rbf_kernel[$_ci]="9"; 17 | 18 | 19 | nprocs="4" 20 | allcaseids[4]="40 41" 21 | 22 | 23 | prog="hic" 24 | step=1 25 | timelimit="04:00:00" 26 | note="Hicma beta=0.01 $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 27 | 28 | 29 | -------------------------------------------------------------------------------- /exp/cases/cfd_virus4_batch.sh: -------------------------------------------------------------------------------- 1 | #compmaxrank is IPARAM_HICMA_STARSH_MAXRANK, is used 2 | #in Starsh matrix generation as a limit 3 | #to allocated temporary buffer for hcore_gemm 4 | #in checking the width of concatenated matrices in QR of HCORE_GEMM 5 | 6 | #maxrank is used to determine the number of columns corresponding to rank of a tile. 7 | #it is used in this formula: number_of_tiles * maxrank. 8 | #this formula gives the total number of columns of the matrix storing low rank tiles 9 | #this value is passed as --nb to program. 10 | 11 | #nb is the number of ROWS in a tile. 12 | 13 | 14 | 15 | _ci=40; nrows[$_ci]=207400 nb[$_ci]=3050; acc[$_ci]="1e-6"; maxrank[$_ci]=100; compmaxrank[$_ci]=200; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; numobj[$_ci]=240; numsubobj[$_ci]=20; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/S2data_200k/"; rbf_kernel[$_ci]="9"; 16 | 17 | 18 | nprocs="4" 19 | allcaseids[4]="40" 20 | 21 | 22 | prog="hic" 23 | step=1 24 | timelimit="04:00:00" 25 | note="Hicma beta=0.01 $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 26 | 27 | 28 | -------------------------------------------------------------------------------- /exp/cases/cfd_virus_10km.sh: -------------------------------------------------------------------------------- 1 | #compmaxrank is IPARAM_HICMA_STARSH_MAXRANK, is used 2 | #in Starsh matrix generation as a limit 3 | #to allocated temporary buffer for hcore_gemm 4 | #in checking the width of concatenated matrices in QR of HCORE_GEMM 5 | 6 | #maxrank is used to determine the number of columns corresponding to rank of a tile. 7 | #it is used in this formula: number_of_tiles * maxrank. 8 | #this formula gives the total number of columns of the matrix storing low rank tiles 9 | #this value is passed as --nb to program. 10 | 11 | #nb is the number of ROWS in a tile. 12 | 13 | _ci=0; nrows[$_ci]=103700; nb[$_ci]=2074; acc[$_ci]="1e-5"; maxrank[$_ci]=50; compmaxrank[$_ci]=100; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/data/SortVirus103700.txt"; rbf_kernel[$_ci]="9"; numobj[$_ci]=10; 14 | 15 | _ci=1; nrows[$_ci]=103700; nb[$_ci]=2074; acc[$_ci]="1e-6"; maxrank[$_ci]=60; compmaxrank[$_ci]=120; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/data/SortVirus103700.txt"; rbf_kernel[$_ci]="9"; numobj[$_ci]=10; 16 | 17 | _ci=2; nrows[$_ci]=103700; nb[$_ci]=2074; acc[$_ci]="1e-7"; maxrank[$_ci]=100; compmaxrank[$_ci]=200; appdata[$_ci]="--m-3D-rbf"; rad[$_ci]=-1; order[$_ci]=2; mesh_file[$_ci]="stars-h/SARS-CoV-2-meshes/data/SortVirus103700.txt"; rbf_kernel[$_ci]="9"; numobj[$_ci]=10; 18 | 19 | 20 | 21 | norocs="1 2 4 8 16" 22 | nprocs="1" 23 | 24 | allcaseids[1]="`seq 0 2`" 25 | prog="hic" 26 | 27 | allcaseids[2]="`seq 1 4` `seq 9 12`" 28 | 29 | step=1 30 | timelimit="00:30:00" 31 | #_compmaxrank=150 #for 54000 maxrank=100 32 | note="Hicma beta=0.01 $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 33 | 34 | #prog="hic" 35 | #prog="cham" 36 | 37 | -------------------------------------------------------------------------------- /exp/cases/cham.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=1080000; nb[1]=2700; acc[1]=1; maxrank[1]=40; 2 | nrows[2]=1080000; nb[2]=2700; acc[2]=2; maxrank[2]=48; 3 | nrows[3]=1080000; nb[3]=2700; acc[3]=3; maxrank[3]=56; 4 | nrows[4]=1080000; nb[4]=2700; acc[4]=4; maxrank[4]=67; #66 5 | nrows[5]=1080000; nb[5]=2700; acc[5]=5; maxrank[5]=76; 6 | nrows[6]=1080000; nb[6]=2700; acc[6]=6; maxrank[6]=86; #85 7 | nrows[7]=1080000; nb[7]=2700; acc[7]=7; maxrank[7]=96; 8 | nrows[8]=1080000; nb[8]=2700; acc[8]=8; maxrank[8]=106; 9 | nrows[9]=1080000; nb[9]=2700; acc[9]=9; maxrank[9]=117; #116 10 | nrows[10]=1080000; nb[10]=2700; acc[10]=10; maxrank[10]=127; 11 | nrows[11]=1080000; nb[11]=2700; acc[11]=11; maxrank[11]=141; 12 | nrows[12]=1080000; nb[12]=2700; acc[12]=12; maxrank[12]=152; 13 | nrows[13]=1080000; nb[13]=2700; acc[13]=13; maxrank[13]=166; #165 14 | 15 | nrows[1]=108000; nb[1]=1125; acc[1]=8; maxrank[1]=41; 16 | nrows[1]=54000; nb[1]=1125; acc[1]=8; maxrank[1]=52; 17 | 18 | #nrows[1]=108000; nb[1]=320; acc[1]=8; maxrank[1]=41; #cham 19 | #nrows[1]=54000; nb[1]=320; acc[1]=8; maxrank[1]=41; #cham 20 | 21 | note="Chameleon potrf runs" 22 | allcaseids[256]="`seq 1 63`" 23 | timelimit="03:00:00" 24 | step=1 25 | nprocs="256" 26 | 27 | nrows[1]=54000; nb[1]=280 28 | nrows[2]=54000; nb[2]=300 29 | nrows[3]=54000; nb[3]=320 30 | nrows[4]=81000; nb[4]=280 31 | nrows[5]=81000; nb[5]=300 32 | nrows[6]=81000; nb[6]=320 33 | nrows[7]=108000; nb[7]=280 34 | nrows[8]=108000; nb[8]=300 35 | nrows[9]=108000; nb[9]=320 36 | nrows[10]=135000; nb[10]=280 37 | nrows[11]=135000; nb[11]=300 38 | nrows[12]=135000; nb[12]=320 39 | nrows[13]=162000; nb[13]=280 40 | nrows[14]=162000; nb[14]=300 41 | nrows[15]=162000; nb[15]=320 42 | nrows[16]=189000; nb[16]=280 43 | nrows[17]=189000; nb[17]=300 44 | nrows[18]=189000; nb[18]=320 45 | nrows[19]=216000; nb[19]=280 46 | nrows[20]=216000; nb[20]=300 47 | nrows[21]=216000; nb[21]=320 48 | nrows[22]=243000; nb[22]=280 49 | nrows[23]=243000; nb[23]=300 50 | nrows[24]=243000; nb[24]=320 51 | nrows[25]=270000; nb[25]=280 52 | nrows[26]=270000; nb[26]=300 53 | nrows[27]=270000; nb[27]=320 54 | nrows[28]=297000; nb[28]=280 55 | nrows[29]=297000; nb[29]=300 56 | nrows[30]=297000; nb[30]=320 57 | nrows[31]=324000; nb[31]=280 58 | nrows[32]=324000; nb[32]=300 59 | nrows[33]=324000; nb[33]=320 60 | nrows[34]=351000; nb[34]=280 61 | nrows[35]=351000; nb[35]=300 62 | nrows[36]=351000; nb[36]=320 63 | nrows[37]=378000; nb[37]=280 64 | nrows[38]=378000; nb[38]=300 65 | nrows[39]=378000; nb[39]=320 66 | nrows[40]=405000; nb[40]=280 67 | nrows[41]=405000; nb[41]=300 68 | nrows[42]=405000; nb[42]=320 69 | nrows[43]=432000; nb[43]=280 70 | nrows[44]=432000; nb[44]=300 71 | nrows[45]=432000; nb[45]=320 72 | nrows[46]=459000; nb[46]=280 73 | nrows[47]=459000; nb[47]=300 74 | nrows[48]=459000; nb[48]=320 75 | nrows[49]=486000; nb[49]=280 76 | nrows[50]=486000; nb[50]=300 77 | nrows[51]=486000; nb[51]=320 78 | nrows[52]=513000; nb[52]=280 79 | nrows[53]=513000; nb[53]=300 80 | nrows[54]=513000; nb[54]=320 81 | nrows[55]=540000; nb[55]=280 82 | nrows[56]=540000; nb[56]=300 83 | nrows[57]=540000; nb[57]=320 84 | nrows[58]=567000; nb[58]=280 85 | nrows[59]=567000; nb[59]=300 86 | nrows[60]=567000; nb[60]=320 87 | nrows[61]=594000; nb[61]=280 88 | nrows[62]=594000; nb[62]=300 89 | nrows[63]=594000; nb[63]=320 90 | -------------------------------------------------------------------------------- /exp/cases/cham32.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=160000; nb[1]=200 2 | nrows[2]=160000; nb[2]=300 3 | nrows[3]=160000; nb[3]=400 4 | nrows[4]=160000; nb[4]=500 5 | -------------------------------------------------------------------------------- /exp/cases/edsin2.sh: -------------------------------------------------------------------------------- 1 | sizes="big" 2 | sizes="small" 3 | if [ "$sizes" == "small" ];then 4 | nrows[1]=54000; nb[1]=2700; acc[1]=8; maxrank[1]=106; 5 | nrows[2]=81000; nb[2]=2700; acc[2]=8; maxrank[2]=106; 6 | nrows[3]=108000; nb[3]=2700; acc[3]=8; maxrank[3]=106; 7 | nrows[4]=135000; nb[4]=2700; acc[4]=8; maxrank[4]=106; 8 | nrows[5]=162000; nb[5]=2700; acc[5]=8; maxrank[5]=106; 9 | nrows[6]=189000; nb[6]=2700; acc[6]=8; maxrank[6]=106; 10 | nrows[7]=216000; nb[7]=2700; acc[7]=8; maxrank[7]=106; 11 | nrows[8]=243000; nb[8]=2700; acc[8]=8; maxrank[8]=106; 12 | nrows[9]=270000; nb[9]=2700; acc[9]=8; maxrank[9]=106; 13 | nrows[10]=297000; nb[10]=2700; acc[10]=8; maxrank[10]=106; 14 | nrows[11]=324000; nb[11]=2700; acc[11]=8; maxrank[11]=106; 15 | nrows[12]=351000; nb[12]=2700; acc[12]=8; maxrank[12]=106; 16 | nrows[13]=378000; nb[13]=2700; acc[13]=8; maxrank[13]=106; 17 | nrows[14]=405000; nb[14]=2700; acc[14]=8; maxrank[14]=106; 18 | nrows[15]=432000; nb[15]=2700; acc[15]=8; maxrank[15]=106; 19 | nrows[16]=459000; nb[16]=2700; acc[16]=8; maxrank[16]=106; 20 | nrows[17]=486000; nb[17]=2700; acc[17]=8; maxrank[17]=106; 21 | nrows[18]=513000; nb[18]=2700; acc[18]=8; maxrank[18]=106; 22 | nrows[19]=540000; nb[19]=2700; acc[19]=8; maxrank[19]=106; 23 | nrows[20]=567000; nb[20]=2700; acc[20]=8; maxrank[20]=106; 24 | nrows[21]=594000; nb[21]=2700; acc[21]=8; maxrank[21]=106; 25 | allcaseids[16]="`seq 1 21`" 26 | allcaseids[32]="`seq 1 21`" 27 | allcaseids[64]="`seq 1 21`" 28 | allcaseids[128]="`seq 1 21`" 29 | allcaseids[256]="`seq 1 21`" 30 | nprocs="16 32 64 128 256" 31 | else 32 | nrows[1]=1080000; nb[1]=2700; acc[1]=8; maxrank[1]=100; 33 | nrows[2]=1080000; nb[2]=3000; acc[2]=8; maxrank[2]=100; 34 | nrows[3]=1080000; nb[3]=3375; acc[3]=8; maxrank[3]=100; 35 | nrows[4]=1080000; nb[4]=4500; acc[4]=8; maxrank[4]=100; 36 | nrows[5]=2295000; nb[5]=2700; acc[5]=8; maxrank[5]=100; 37 | nrows[6]=2295000; nb[6]=3000; acc[6]=8; maxrank[6]=100; 38 | nrows[7]=2295000; nb[7]=3375; acc[7]=8; maxrank[7]=100; 39 | nrows[8]=2295000; nb[8]=4500; acc[8]=8; maxrank[8]=100; 40 | nrows[9]=3510000; nb[9]=2700; acc[9]=8; maxrank[9]=100; 41 | nrows[10]=3510000; nb[10]=3000; acc[10]=8; maxrank[10]=100; 42 | nrows[11]=3510000; nb[11]=3375; acc[11]=8; maxrank[11]=100; 43 | nrows[12]=3510000; nb[12]=4500; acc[12]=8; maxrank[12]=100; 44 | nrows[13]=4725000; nb[13]=2700; acc[13]=8; maxrank[13]=100; 45 | nrows[14]=4725000; nb[14]=3000; acc[14]=8; maxrank[14]=100; 46 | nrows[15]=4725000; nb[15]=3375; acc[15]=8; maxrank[15]=100; 47 | nrows[16]=4725000; nb[16]=4500; acc[16]=8; maxrank[16]=100; 48 | nrows[17]=5940000; nb[17]=2700; acc[17]=8; maxrank[17]=100; 49 | nrows[18]=5940000; nb[18]=3000; acc[18]=8; maxrank[18]=100; 50 | nrows[19]=5940000; nb[19]=3375; acc[19]=8; maxrank[19]=100; 51 | nrows[20]=5940000; nb[20]=4500; acc[20]=8; maxrank[20]=100; 52 | nrows[21]=8100000; nb[21]=2700; acc[21]=8; maxrank[21]=100; 53 | nrows[22]=8100000; nb[22]=3000; acc[22]=8; maxrank[22]=100; 54 | nrows[23]=8100000; nb[23]=3375; acc[23]=8; maxrank[23]=100; 55 | nrows[24]=8100000; nb[24]=4500; acc[24]=8; maxrank[24]=100; 56 | nrows[25]=10800000; nb[25]=2700; acc[25]=8; maxrank[25]=100; 57 | nrows[26]=10800000; nb[26]=3000; acc[26]=8; maxrank[26]=100; 58 | nrows[27]=10800000; nb[27]=3375; acc[27]=8; maxrank[27]=100; 59 | nrows[28]=10800000; nb[28]=4500; acc[28]=8; maxrank[28]=100; 60 | allcaseids[16]="`seq 1 8`" 61 | allcaseids[32]="`seq 1 12`" 62 | allcaseids[64]="`seq 1 15`" 63 | allcaseids[128]="`seq 1 20`" 64 | allcaseids[256]="`seq 17 24`" 65 | allcaseids[512]="`seq 21 28`" 66 | nprocs="16 32 64 128 256 512" 67 | fi 68 | 69 | 70 | 71 | step=1 72 | _appdata="--edsin"; timelimit="00:25:00" 73 | _appdata="--edsin"; _wavek=100; _compmaxrank=250; 74 | note="Hicma only dense matrix $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 75 | 76 | 77 | -------------------------------------------------------------------------------- /exp/cases/fxt-100K-16nodes.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=1080000; nb[1]=2700; acc[1]=1; maxrank[1]=40; 2 | nrows[2]=1080000; nb[2]=2700; acc[2]=2; maxrank[2]=48; 3 | nrows[3]=1080000; nb[3]=2700; acc[3]=3; maxrank[3]=56; 4 | nrows[4]=1080000; nb[4]=2700; acc[4]=4; maxrank[4]=67; #66 5 | nrows[5]=1080000; nb[5]=2700; acc[5]=5; maxrank[5]=76; 6 | nrows[6]=1080000; nb[6]=2700; acc[6]=6; maxrank[6]=86; #85 7 | nrows[7]=1080000; nb[7]=2700; acc[7]=7; maxrank[7]=96; 8 | nrows[8]=1080000; nb[8]=2700; acc[8]=8; maxrank[8]=106; 9 | nrows[9]=1080000; nb[9]=2700; acc[9]=9; maxrank[9]=117; #116 10 | nrows[10]=1080000; nb[10]=2700; acc[10]=10; maxrank[10]=127; 11 | nrows[11]=1080000; nb[11]=2700; acc[11]=11; maxrank[11]=141; 12 | nrows[12]=1080000; nb[12]=2700; acc[12]=12; maxrank[12]=152; 13 | nrows[13]=1080000; nb[13]=2700; acc[13]=13; maxrank[13]=166; #165 14 | 15 | nrows[1]=108000; nb[1]=1125; acc[1]=8; maxrank[1]=41; 16 | nrows[1]=54000; nb[1]=1125; acc[1]=8; maxrank[1]=52; 17 | 18 | #nrows[1]=108000; nb[1]=320; acc[1]=8; maxrank[1]=41; #cham 19 | #nrows[1]=54000; nb[1]=320; acc[1]=8; maxrank[1]=41; #cham 20 | 21 | _appdata="--ss" 22 | 23 | #_appdata="--edsin"; _wavek=200 24 | 25 | note="Hicma runs - SS - FXT" 26 | allcaseids[4]="1" 27 | allcaseids[16]="1" 28 | timelimit="01:00:00" 29 | step=10 30 | nprocs="16" 31 | nprocs="4" 32 | trace="trace" 33 | 34 | 35 | -------------------------------------------------------------------------------- /exp/cases/matern.sh: -------------------------------------------------------------------------------- 1 | nrows[1]=56644; nb[1]=1156; acc[1]=8; maxrank[1]=200; 2 | nrows[2]=73984; nb[2]=1156; acc[2]=8; maxrank[2]=200; 3 | nrows[3]=93636; nb[3]=1156; acc[3]=8; maxrank[3]=200; 4 | nrows[4]=93636; nb[4]=2601; acc[4]=8; maxrank[4]=200; 5 | nrows[5]=115600; nb[5]=1156; acc[5]=8; maxrank[5]=200; 6 | nrows[6]=139876; nb[6]=1156; acc[6]=8; maxrank[6]=200; 7 | nrows[7]=166464; nb[7]=1156; acc[7]=8; maxrank[7]=200; 8 | nrows[8]=166464; nb[8]=2601; acc[8]=8; maxrank[8]=200; 9 | nrows[9]=195364; nb[9]=1156; acc[9]=8; maxrank[9]=200; 10 | nrows[10]=226576; nb[10]=1156; acc[10]=8; maxrank[10]=200; 11 | nrows[11]=260100; nb[11]=900; acc[11]=8; maxrank[11]=200; 12 | nrows[12]=260100; nb[12]=1156; acc[12]=8; maxrank[12]=200; 13 | nrows[13]=260100; nb[13]=2601; acc[13]=8; maxrank[13]=200; 14 | nrows[14]=295936; nb[14]=1024; acc[14]=8; maxrank[14]=200; 15 | nrows[15]=295936; nb[15]=1156; acc[15]=8; maxrank[15]=200; 16 | nrows[16]=334084; nb[16]=1156; acc[16]=8; maxrank[16]=200; 17 | nrows[17]=374544; nb[17]=1156; acc[17]=8; maxrank[17]=200; 18 | nrows[18]=374544; nb[18]=1296; acc[18]=8; maxrank[18]=200; 19 | nrows[19]=374544; nb[19]=2601; acc[19]=8; maxrank[19]=200; 20 | nrows[20]=417316; nb[20]=1156; acc[20]=8; maxrank[20]=200; 21 | nrows[21]=417316; nb[21]=1444; acc[21]=8; maxrank[21]=200; 22 | nrows[22]=462400; nb[22]=1156; acc[22]=8; maxrank[22]=200; 23 | nrows[23]=462400; nb[23]=1600; acc[23]=8; maxrank[23]=200; 24 | nrows[24]=509796; nb[24]=1156; acc[24]=8; maxrank[24]=200; 25 | nrows[25]=509796; nb[25]=1764; acc[25]=8; maxrank[25]=200; 26 | nrows[26]=509796; nb[26]=2601; acc[26]=8; maxrank[26]=200; 27 | 28 | note="Hicma matern" 29 | allcaseids[16]="`seq 1 26`" 30 | allcaseids[16]="1" 31 | timelimit="02:00:00" 32 | step=1 33 | nprocs="16" 34 | _appdata="--geostat" 35 | _compmaxrank=200 36 | note="Hicma $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 37 | -------------------------------------------------------------------------------- /exp/cases/shmem-1.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=25600; nb[1]=1024 2 | nrows[2]=28900; nb[2]=1156 3 | nrows[3]=20736; nb[3]=1296 4 | nrows[4]=23104; nb[4]=1444 5 | nrows[5]=25600; nb[5]=1600 6 | nrows[6]=28224; nb[6]=1764 7 | nrows[7]=17424; nb[7]=1936 8 | nrows[8]=36864; nb[8]=1024 9 | nrows[9]=32400; nb[9]=1296 10 | nrows[10]=36100; nb[10]=1444 11 | nrows[11]=40000; nb[11]=1600 12 | nrows[12]=30976; nb[12]=1936 13 | nrows[13]=41616; nb[13]=1156 14 | nrows[14]=46656; nb[14]=1296 15 | nrows[15]=44100; nb[15]=1764 16 | nrows[16]=48400; nb[16]=1936 17 | nrows[17]=50176; nb[17]=1024 18 | nrows[18]=56644; nb[18]=1156 19 | nrows[19]=51984; nb[19]=1444 20 | nrows[20]=57600; nb[20]=1600 21 | nrows[21]=65536; nb[21]=1024 22 | nrows[22]=63504; nb[22]=1296 23 | nrows[23]=63504; nb[23]=1764 24 | nrows[24]=69696; nb[24]=1936 25 | nrows[25]=73984; nb[25]=1156 26 | nrows[26]=70756; nb[26]=1444 27 | nrows[27]=78400; nb[27]=1600 28 | nrows[28]=82944; nb[28]=1024 29 | nrows[29]=82944; nb[29]=1296 30 | nrows[30]=86436; nb[30]=1764 31 | nrows[31]=93636; nb[31]=1156 32 | nrows[32]=92416; nb[32]=1444 33 | nrows[33]=94864; nb[33]=1936 34 | nrows[34]=102400; nb[34]=1024 35 | nrows[35]=104976; nb[35]=1296 36 | nrows[36]=102400; nb[36]=1600 37 | nrows[37]=115600; nb[37]=1156 38 | nrows[38]=116964; nb[38]=1444 39 | nrows[39]=112896; nb[39]=1764 40 | nrows[40]=123904; nb[40]=1024 41 | nrows[41]=129600; nb[41]=1296 42 | nrows[42]=129600; nb[42]=1600 43 | nrows[43]=123904; nb[43]=1936 44 | nrows[44]=139876; nb[44]=1156 45 | nrows[45]=147456; nb[45]=1024 46 | nrows[46]=144400; nb[46]=1444 47 | nrows[47]=142884; nb[47]=1764 48 | nrows[48]=156816; nb[48]=1296 49 | nrows[49]=160000; nb[49]=1600 50 | nrows[50]=156816; nb[50]=1936 51 | nrows[51]=166464; nb[51]=1156 52 | nrows[52]=173056; nb[52]=1024 53 | nrows[53]=174724; nb[53]=1444 54 | nrows[54]=176400; nb[54]=1764 55 | nrows[55]=186624; nb[55]=1296 56 | nrows[56]=195364; nb[56]=1156 57 | nrows[57]=193600; nb[57]=1600 58 | nrows[58]=193600; nb[58]=1936 59 | nrows[59]=200704; nb[59]=1024 60 | nrows[60]=207936; nb[60]=1444 61 | nrows[61]=219024; nb[61]=1296 62 | nrows[62]=213444; nb[62]=1764 63 | nrows[63]=226576; nb[63]=1156 64 | nrows[64]=230400; nb[64]=1024 65 | nrows[65]=230400; nb[65]=1600 66 | nrows[66]=234256; nb[66]=1936 67 | -------------------------------------------------------------------------------- /exp/cases/shmem-2.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=27000; nb[1]=1125 2 | nrows[2]=27000; nb[2]=1350 3 | nrows[3]=27000; nb[3]=1500 4 | nrows[4]=27000; nb[4]=2250 5 | nrows[5]=40500; nb[5]=1125 6 | nrows[6]=40500; nb[6]=1350 7 | nrows[7]=40500; nb[7]=1500 8 | nrows[8]=40500; nb[8]=2250 9 | nrows[9]=54000; nb[9]=1125 10 | nrows[10]=54000; nb[10]=1350 11 | nrows[11]=54000; nb[11]=1500 12 | nrows[12]=54000; nb[12]=2250 13 | nrows[13]=67500; nb[13]=1125 14 | nrows[14]=67500; nb[14]=1350 15 | nrows[15]=67500; nb[15]=1500 16 | nrows[16]=67500; nb[16]=2250 17 | nrows[17]=81000; nb[17]=1125 18 | nrows[18]=81000; nb[18]=1350 19 | nrows[19]=81000; nb[19]=1500 20 | nrows[20]=81000; nb[20]=2250 21 | nrows[21]=94500; nb[21]=1125 22 | nrows[22]=94500; nb[22]=1350 23 | nrows[23]=94500; nb[23]=1500 24 | nrows[24]=94500; nb[24]=2250 25 | nrows[25]=108000; nb[25]=1125 26 | nrows[26]=108000; nb[26]=1350 27 | nrows[27]=108000; nb[27]=1500 28 | nrows[28]=108000; nb[28]=2250 29 | nrows[29]=121500; nb[29]=1125 30 | nrows[30]=121500; nb[30]=1350 31 | nrows[31]=121500; nb[31]=1500 32 | nrows[32]=121500; nb[32]=2250 33 | nrows[33]=135000; nb[33]=1125 34 | nrows[34]=135000; nb[34]=1350 35 | nrows[35]=135000; nb[35]=1500 36 | nrows[36]=135000; nb[36]=2250 37 | nrows[37]=148500; nb[37]=1125 38 | nrows[38]=148500; nb[38]=1350 39 | nrows[39]=148500; nb[39]=1500 40 | nrows[40]=148500; nb[40]=2250 41 | nrows[41]=162000; nb[41]=1125 42 | nrows[42]=162000; nb[42]=1350 43 | nrows[43]=162000; nb[43]=1500 44 | nrows[44]=162000; nb[44]=2250 45 | nrows[45]=175500; nb[45]=1125 46 | nrows[46]=175500; nb[46]=1350 47 | nrows[47]=175500; nb[47]=1500 48 | nrows[48]=175500; nb[48]=2250 49 | nrows[49]=189000; nb[49]=1125 50 | nrows[50]=189000; nb[50]=1350 51 | nrows[51]=189000; nb[51]=1500 52 | nrows[52]=189000; nb[52]=2250 53 | nrows[53]=202500; nb[53]=1125 54 | nrows[54]=202500; nb[54]=1350 55 | nrows[55]=202500; nb[55]=1500 56 | nrows[56]=202500; nb[56]=2250 57 | nrows[57]=216000; nb[57]=1125 58 | nrows[58]=216000; nb[58]=1350 59 | nrows[59]=216000; nb[59]=1500 60 | nrows[60]=216000; nb[60]=2250 61 | nrows[61]=229500; nb[61]=1125 62 | nrows[62]=229500; nb[62]=1350 63 | nrows[63]=229500; nb[63]=1500 64 | nrows[64]=229500; nb[64]=2250 65 | nrows[65]=243000; nb[65]=1125 66 | nrows[66]=243000; nb[66]=1350 67 | nrows[67]=243000; nb[67]=1500 68 | nrows[68]=243000; nb[68]=2250 69 | nrows[69]=256500; nb[69]=1125 70 | nrows[70]=256500; nb[70]=1350 71 | nrows[71]=256500; nb[71]=1500 72 | nrows[72]=256500; nb[72]=2250 73 | nrows[73]=270000; nb[73]=1125 74 | nrows[74]=270000; nb[74]=1350 75 | nrows[75]=270000; nb[75]=1500 76 | nrows[76]=270000; nb[76]=2250 77 | nrows[77]=283500; nb[77]=1125 78 | nrows[78]=283500; nb[78]=1350 79 | nrows[79]=283500; nb[79]=1500 80 | nrows[80]=283500; nb[80]=2250 81 | nrows[81]=297000; nb[81]=1125 82 | nrows[82]=297000; nb[82]=1350 83 | nrows[83]=297000; nb[83]=1500 84 | nrows[84]=297000; nb[84]=2250 85 | nrows[84]=16384; nb[84]=1024 86 | nrows[84]=2500; nb[84]=250 87 | -------------------------------------------------------------------------------- /exp/cases/sqexp-trsm.sh: -------------------------------------------------------------------------------- 1 | timelimit="03:00:00" 2 | sizes="big" 3 | sizes="small" 4 | if [ "$sizes" == "small" ];then 5 | #nrows[1]=1600; nb[1]=1350; acc[1]=6; maxrank[1]=200; nrhs[1]=400 6 | nrows[1]=27000; nb[1]=2700; acc[1]=6; maxrank[1]=1350; nrhs[1]=2700; 7 | nrows[2]=54000; nb[2]=2700; acc[2]=6; maxrank[2]=1350; nrhs[2]=2700; 8 | nrows[3]=81000; nb[3]=2700; acc[3]=6; maxrank[3]=1350; nrhs[3]=2700; 9 | nrows[4]=108000; nb[4]=2700; acc[4]=6; maxrank[4]=1350; nrhs[4]=2700; 10 | nrows[5]=135000; nb[5]=2700; acc[5]=6; maxrank[5]=1350; nrhs[5]=2700; 11 | nrows[6]=162000; nb[6]=2700; acc[6]=6; maxrank[6]=1350; nrhs[6]=2700; 12 | nrows[7]=189000; nb[7]=2700; acc[7]=6; maxrank[7]=1350; nrhs[7]=2700; 13 | nrows[8]=216000; nb[8]=2700; acc[8]=6; maxrank[8]=1350; nrhs[8]=2700; 14 | nrows[9]=243000; nb[9]=2700; acc[9]=6; maxrank[9]=1350; nrhs[9]=2700; 15 | nrows[10]=270000; nb[10]=2700; acc[10]=6; maxrank[10]=1350; nrhs[10]=2700; 16 | nrows[11]=297000; nb[11]=2700; acc[11]=6; maxrank[11]=1350; nrhs[11]=2700; 17 | nrows[12]=324000; nb[12]=2700; acc[12]=6; maxrank[12]=1350; nrhs[12]=2700; 18 | nrows[13]=351000; nb[13]=2700; acc[13]=6; maxrank[13]=1350; nrhs[13]=2700; 19 | nrows[14]=378000; nb[14]=2700; acc[14]=6; maxrank[14]=1350; nrhs[14]=2700; 20 | nrows[15]=405000; nb[15]=2700; acc[15]=6; maxrank[15]=1350; nrhs[15]=2700; 21 | nrows[16]=432000; nb[16]=2700; acc[16]=6; maxrank[16]=1350; nrhs[16]=2700; 22 | nrows[17]=459000; nb[17]=2700; acc[17]=6; maxrank[17]=1350; nrhs[17]=2700; 23 | nrows[18]=486000; nb[18]=2700; acc[18]=6; maxrank[18]=1350; nrhs[18]=2700; 24 | nrows[19]=513000; nb[19]=2700; acc[19]=6; maxrank[19]=1350; nrhs[19]=2700; 25 | nrows[20]=540000; nb[20]=2700; acc[20]=6; maxrank[20]=1350; nrhs[20]=2700; 26 | nrows[21]=567000; nb[21]=2700; acc[21]=6; maxrank[21]=1350; nrhs[21]=2700; 27 | nrows[22]=594000; nb[22]=2700; acc[22]=6; maxrank[22]=1350; nrhs[22]=2700; 28 | allcaseids[16]="`seq 1 22`" 29 | allcaseids[1]="1" 30 | nprocs="1" 31 | #allcaseids[1]="1";nprocs="1";que=debug 32 | #allcaseids[2]="1";nprocs="2";que=debug; timelimit="00:30:00" #TODO 33 | #allcaseids[2]="1";nprocs="2"; 34 | else 35 | : 36 | fi 37 | 38 | 39 | op=posv 40 | step=1 41 | _appdata="--ss"; 42 | _compmaxrank=1350 43 | note="Hicma trsm $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 44 | 45 | 46 | -------------------------------------------------------------------------------- /exp/cases/statistics-cham.sh: -------------------------------------------------------------------------------- 1 | 2 | . $PWD/exp/cases/statistics.sh 3 | prog="cham" 4 | allcaseids[1]="`seq 101 5 121`" 5 | -------------------------------------------------------------------------------- /exp/cases/statistics2.sh: -------------------------------------------------------------------------------- 1 | nbs=(1120 1260 1440 1680 2520 1120 1260 1440 1680 2240 2520 2880 1080 1120 1260 1440 1680 1890 2160 2520 1040 1120 1560 1680 1820 2080 2730 1040 1080 1170 1440 1560 2080 2160 2340 1040 1120 1560 1680 1820 2080 2240 2730 1080 1170 1260 1560 1820 1890 2340 2520 2730 1040 1080 1170 1440 1560 2080 2160 2340 2880 1040 1120 1170 1260 1440 1560 1680 1820 2080 2340 2520 2730 1040 1080 1170 1260 1560 1680 1820 1890 2160 2340 2520 2730 ) 2 | nts=(9 8 7 6 4 18 16 14 12 9 8 7 28 27 24 21 18 16 14 12 42 39 28 26 24 21 16 54 52 48 39 36 27 26 24 84 78 56 52 48 42 39 32 91 84 78 63 54 52 42 39 36 108 104 96 78 72 54 52 48 39 126 117 112 104 91 84 78 72 63 56 52 48 189 182 168 156 126 117 108 104 91 84 78 72 ) 3 | lennbs=${#nbs[@]} 4 | ncases=$((lennbs)) 5 | echo "Number of nbs:$lennbs Number of cases:$ncases" 6 | for i in "${!nbs[@]}"; do 7 | __nb=${nbs[i]} 8 | __nt=${nts[i]} 9 | __m=$((__nb*__nt)) 10 | __halfnb=$((__nb/2)) 11 | __maxrank=$((__nb/2)) 12 | #__maxrank=1000 13 | if [[ $__halfnb -lt $__maxrank ]]; then 14 | __maxrank=$__halfnb; 15 | fi 16 | __compmaxrank=$((__nb/3*2)) 17 | _ci=$((i+1)); nrows[$_ci]=$__m; nb[$_ci]=$__nb; acc[$_ci]="1e-8"; maxrank[$_ci]=$__maxrank; compmaxrank[$_ci]=$__compmaxrank; appdata[$_ci]="--st-3D-sqexp"; rbf_kernel[$_ci]="NA"; denst[$_ci]="NA"; rad[$_ci]="NA";mesh_file[$_ci]="NA";numobj[$_ci]="NA";order[$_ci]="NA" 18 | done 19 | 20 | nprocs="1" 21 | allcaseids[1]="`seq 1 $ncases`" 22 | prog="hic" 23 | #prog="mkl" 24 | step=1 25 | timelimit="02:00:00" 26 | note="Hicma $_appdata - $sizes - $_wavek - $timelimit - $_compmaxrank " 27 | 28 | -------------------------------------------------------------------------------- /exp/cases/t1.txt: -------------------------------------------------------------------------------- 1 | 2 | nrows[1]=57600; nb[1]=100; 3 | nrows[2]=57600; nb[2]=144; 4 | nrows[3]=56644; nb[3]=196; 5 | nrows[4]=57600; nb[4]=256; 6 | nrows[5]=63504; nb[5]=324; 7 | nrows[6]=57600; nb[6]=400; 8 | nrows[7]=58564; nb[7]=484; 9 | nrows[8]=57600; nb[8]=576; 10 | nrows[9]=54756; nb[9]=676; 11 | nrows[10]=63504; nb[10]=784; 12 | nrows[11]=57600; nb[11]=900; 13 | -------------------------------------------------------------------------------- /exp/cases/t2.txt: -------------------------------------------------------------------------------- 1 | nrows[1]=360000; nb[1]=1600 2 | nrows[2]=409600; nb[2]=1600 3 | nrows[3]=462400; nb[3]=1600 4 | nrows[4]=518400; nb[4]=1600 5 | nrows[5]=577600; nb[5]=1600 6 | nrows[6]=640000; nb[6]=1600 7 | nrows[7]=705600; nb[7]=1600 8 | nrows[8]=774400; nb[8]=1600 9 | nrows[9]=846400; nb[9]=1600 10 | nrows[10]=921600; nb[10]=1600 11 | nrows[11]=1000000; nb[11]=1600 12 | nrows[12]=1081600; nb[12]=1600 13 | nrows[13]=1166400; nb[13]=1600 14 | nrows[14]=1254400; nb[14]=1600 15 | nrows[15]=1345600; nb[15]=1600 16 | nrows[16]=1440000; nb[16]=1600 17 | nrows[17]=1537600; nb[17]=1600 18 | nrows[18]=1638400; nb[18]=1600 19 | nrows[19]=1742400; nb[19]=1600 20 | nrows[20]=1849600; nb[20]=1600 21 | nrows[21]=1960000; nb[21]=1600 22 | -------------------------------------------------------------------------------- /exp/cham-isambard-simple.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #PBS -q arm 3 | #PBS -l select=1 4 | #PBS -l walltime=00:05:00 5 | 6 | RUNCMD="aprun -n 1 -d 64 -j 1 " 7 | #RUNCMD="aprun -n 1 -d 64 -j 1 perf stat " 8 | 9 | dim=20000; 10 | KMP_AFFINITY=disabled \ 11 | $RUNCMD \ 12 | $HOME/hicma-dev/chameleon/build/timing/time_dpotrf_tile \ 13 | --threads=64 --M=$dim --N=$dim --K=$dim \ 14 | --P=1 -b 300 15 | 16 | 17 | #chameleon dgemm achieves ~861gflop/s 18 | 19 | #$HOME/hicma-dev/chameleon/build/timing/time_dgemm_tile \ 20 | -------------------------------------------------------------------------------- /exp/ci/compile_shihab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | module load libs-extra 3 | module load intel/16 4 | module load gcc/5.3.0 5 | module load cmake/3.7.2 6 | module load hwloc/1.11.6-gcc-5.3.0 7 | module load mpi-openmpi/2.1.0-gcc-5.3.0 8 | module load starpu/1.2.1-gcc-5.3.0-openmpi 9 | 10 | set -x 11 | # Check if we are already in hicma repo dir or not. 12 | if git -C $PWD remote -v | grep -q 'https://github.com/ecrc/hicma' 13 | then 14 | # we are, lets go to the top dir (where .git is) 15 | until test -d $PWD/.git ; 16 | do 17 | cd .. 18 | done; 19 | else 20 | #we are not, we need to clone the repo 21 | git clone https://github.com/ecrc/hicma.git 22 | cd hicma 23 | fi 24 | 25 | # Update submodules 26 | HICMADEVDIR=$PWD 27 | git submodule update --init --recursive 28 | 29 | # STARS-H 30 | cd stars-h 31 | mkdir -p build/installdir 32 | cd build 33 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir 34 | make -j 35 | make install 36 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 37 | 38 | # CHAMELEON 39 | cd $HICMADEVDIR 40 | cd chameleon 41 | mkdir -p build/installdir 42 | cd build 43 | cmake .. -DCMAKE_BUILD_TYPE=Debug -DHICMA_USE_MPI=ON -DCMAKE_INSTALL_PREFIX=$PWD/installdir 44 | make -j 45 | make install 46 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 47 | 48 | # HICMA 49 | cd $HICMADEVDIR 50 | mkdir -p build/installdir 51 | cd build 52 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir -DHICMA_USE_MPI=ON 53 | make -j 54 | make install 55 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 56 | 57 | # TEST 58 | cd $HICMADEVDIR 59 | ./exp/ci/test01.sh 2 - 4 4 60 | -------------------------------------------------------------------------------- /exp/ci/test01.sh: -------------------------------------------------------------------------------- 1 | PRE="" 2 | factor=1 3 | debug=0 4 | mpi=0 5 | if [ $# -ne 4 ]; then 6 | echo "Usage: factor[1,2,4...] debug[d,-] mpi[m,-] nthreads" 7 | exit -1 8 | fi 9 | factor=$1 10 | debug=$2 11 | mpi=$3 12 | nthreads=$4 13 | nmpi=8 14 | if [ "$mpi" != "-" ]; then 15 | nmpi=$mpi 16 | if [ $debug == "d" ]; then 17 | CMD="mpirun -n $nmpi xterm -hold -e gdb -ex run --args " 18 | else 19 | #CMD="mpirun -n $nmpi -tag-output " 20 | #CMD="mpirun -n $nmpi -quiet " 21 | CMD="mpirun -n $nmpi " 22 | fi 23 | else 24 | if [ $debug == "d" ]; then 25 | CMD="gdb -ex run --args" 26 | fi 27 | fi 28 | echo $CMD 29 | 30 | export STARPU_SILENT=1 31 | nb=2;m=4 32 | nb=3;m=6 33 | #nb=3;m=9 34 | nb=4;m=4 35 | nb=4;m=8 36 | nb=9;m=36 37 | acc=3;nb=16;m=64 38 | acc=4;nb=128;m=1024 39 | #acc=7;nb=128;m=1024 40 | #acc=6;nb=128;m=4096 41 | #nb=7;m=21 #works for rndusr potrf 42 | nta="1 4 8" 43 | nta="27 28 29 30 31 32" #shihab has 36 physical cores 44 | #irange="2 2";nta=$nthreads;_b=1156; acc=8; check="" 45 | irange="3 3"; nta=$nthreads;_b=400; acc=8; check="--check" 46 | #irange="4 4";nta=$nthreads;_b=1156; acc=8; check="" 47 | #irange="3 3"; nta="2"; _b=324; acc=8; check="--check" #ALEKS'S SUGGESTION 48 | #irange="3 3"; nta="2"; _b=256; acc=6; check="--check" #FAST 49 | #nta="1" 50 | for nt in $nta;do 51 | n=$((m/factor)) 52 | maxrank=$((nb/factor)) 53 | #echo BASH-MAXRANK: $maxrank 54 | #echo BASH-DEBUG: $debug 55 | #_b=1600 56 | #_b=324 57 | nb=$_b; 58 | for _i in `seq $irange`;do 59 | _is=$((_i*_i)) 60 | m=$((_is*_b)); 61 | n=$((_is*_b/factor)); 62 | maxrank=$((_b/factor)) 63 | run="./build/main \ 64 | --m=$m \ 65 | --n_range=$n:$n \ 66 | --k=$m \ 67 | --mb=$nb \ 68 | --nb=$maxrank \ 69 | --nowarmup \ 70 | --threads=$nt \ 71 | --rk=0 \ 72 | --acc=$acc \ 73 | $check \ 74 | --ss \ 75 | --starshdecay=2 \ 76 | --starshmaxrank=$maxrank" 77 | runcham="/home/akbudak/hicma/chameleon/build/timing/time_dpotrf_tile \ 78 | --m=$m \ 79 | --n_range=$n:$n \ 80 | --nowarmup \ 81 | --threads=$nt \ 82 | " 83 | $CMD $run 84 | done 85 | done 86 | exit 87 | --check \ 88 | --printmat \ 89 | --printindex \ 90 | 91 | #--starshmaxrank=$nb 92 | 93 | #--rndusr \ 94 | #--rnd \ 95 | #--ss \ 96 | 97 | #not used in code 98 | #--maxrank=$((nb/2)) \ 99 | -------------------------------------------------------------------------------- /exp/cout/.f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecrc/hicma/fa8596b5d3aa8e5b7d5c06cd8db3cecc32f70d17/exp/cout/.f -------------------------------------------------------------------------------- /exp/ctrial.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=cham 3 | #SBATCH --output=/project/k1205/akbudak/hicma/exp/cout/%j.o 4 | #SBATCH --error=/project/k1205/akbudak/hicma/exp/cerr/%j.e 5 | #SBATCH --partition=workq 6 | #SBATCH --nodes=4 7 | #SBATCH --ntasks=4 8 | #SBATCH --ntasks-per-node=1 9 | #SBATCH --cpus-per-task=32 10 | #SBATCH --time 12:00:00 11 | #SBATCH --mail-type=END,FAIL 12 | #SBATCH --mail-user=akbudak 13 | 14 | 15 | #3teraflops 16 | export STARPU_SCHED=eager 17 | #export STARPU_SCHED=prio 18 | #export STARPU_MIN_PRIO=-1 19 | #export STARPU_MAX_PRIO=100 20 | 21 | #These do not affect perf for dpotrf M=36000 mb=1000 22 | export STARPU_CALIBRATE=0 23 | export STARPU_LIMIT_CPU_MEM=60000 24 | export STARPU_LIMIT_MAX_SUBMITTED_TASKS=20000 25 | export STARPU_LIMIT_MIN_SUBMITTED_TASKS=18000 26 | 27 | export LD_LIBRARY_PATH=/opt/intel/composer_xe_2015.2.164/mkl/lib/intel64/:/project/k1205/akbudak/codes/chameleon/install/lib/:/project/k1205/akbudak/codes/hwloc-1.11.7/install/lib/:/project/k1205/akbudak/codes/starpu-1.2.1/install/lib/:$LD_LIBRARY_PATH 28 | echo STARPU_SCHED $STARPU_SCHED 29 | echo STARPU_CALIBRATE $STARPU_CALIBRATE 30 | echo STARPU_LIMIT_CPU_MEM $STARPU_LIMIT_CPU_MEM 31 | echo STARPU_LIMIT_MAX_SUBMITTED_TASKS $STARPU_LIMIT_MAX_SUBMITTED_TASKS 32 | echo STARPU_LIMIT_MIN_SUBMITTED_TASKS $STARPU_LIMIT_MIN_SUBMITTED_TASKS 33 | kernel=time_dgemm_tile 34 | kernel=time_dpotrf_tile 35 | #try with these 36 | nb=460 37 | _nb=320 38 | _nb=320 39 | #_nb=1156 40 | _b=1156 41 | for _i in `seq 12 20`; do 42 | for _nb in 320 460 1156;do 43 | _is=$((_i*_i)) 44 | _m=$((_is*_b)) 45 | echo "# $i $_m $nb" 46 | echo -n "#" date 47 | STARPU_SILENT=1 numactl --interleave=all srun --hint=nomultithread /project/k1205/akbudak/hicma/chameleon/build/timing/$kernel --n_range=$_m:$_m:$_m --m=$_m --k=$_m --nb=$_nb --p=2 --threads=31 48 | echo -n "#" date 49 | done 50 | done 51 | -------------------------------------------------------------------------------- /exp/echameleon/potrf_run.sh: -------------------------------------------------------------------------------- 1 | ROOT=`pwd` 2 | irange="3 3";nt=2;_bs="289 578 1156"; 3 | for _i in `seq $irange`;do 4 | for _b in $_bs;do 5 | _is=$((_i*_i)) 6 | m=$((_is*_b)); 7 | n=$m 8 | echo $m $_b 9 | mpirun -np 4 $ROOT/chameleon/build/timing/time_dpotrf_tile 10 | 11 | exit 12 | --m=$m \ 13 | --n_range=$n:$n \ 14 | --k=$m \ 15 | --nb=$_b \ 16 | --threads=$nt \ 17 | 18 | done; 19 | done 20 | exit 21 | --nowarmup \ 22 | -------------------------------------------------------------------------------- /exp/incs/Makefile.cdl2.inc: -------------------------------------------------------------------------------- 1 | mpilib=-lmpich 2 | -------------------------------------------------------------------------------- /exp/incs/Makefile.uwork.inc: -------------------------------------------------------------------------------- 1 | mpilib=-lmpi 2 | -------------------------------------------------------------------------------- /exp/inner-product-gemm.sh: -------------------------------------------------------------------------------- 1 | #for _m_ in 12000 24000 36000 48000; do 2 | for _m_ in 24000; do 3 | for _mb_ in 600 1200; do 4 | _maxrank_=1200 5 | for _acc_ in 3; do 6 | for reorderinnerproducts in 0 1; do 7 | cmd="./timing/time_zgemm_tile --m=$_m_ --n_range=$_m_:$_m_ --k=$_m_ --mb=$_mb_ --nb=$_mb_ --nowarmup --threads=55 --starshmaxrank=$_maxrank_ --st-3D-exp --acc=$_acc_ --reorderinnerproducts=$reorderinnerproducts" 8 | echo $cmd 9 | eval $cmd 10 | done 11 | done 12 | done 13 | done 14 | -------------------------------------------------------------------------------- /exp/jobids/.f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecrc/hicma/fa8596b5d3aa8e5b7d5c06cd8db3cecc32f70d17/exp/jobids/.f -------------------------------------------------------------------------------- /exp/jobids/2017-10-18-hicma-10M-1.txt: -------------------------------------------------------------------------------- 1 | #Job for max/min sub:20480 15360 on 512 nodes 2 | Submitted batch job 4310337 3 | Submitted batch job 4310338 4 | Submitted batch job 4310339 5 | Submitted batch job 4310340 6 | Submitted batch job 4310341 7 | Submitted batch job 4310342 8 | Submitted batch job 4310343 9 | Submitted batch job 4310344 10 | Submitted batch job 4310345 11 | #Job for max/min sub:40960 30720 on 1024 nodes 12 | Submitted batch job 4310346 13 | Submitted batch job 4310347 14 | Submitted batch job 4310348 15 | Submitted batch job 4310349 16 | Submitted batch job 4310350 17 | Submitted batch job 4310351 18 | Submitted batch job 4310352 19 | Submitted batch job 4310353 20 | Submitted batch job 4310354 21 | -------------------------------------------------------------------------------- /exp/jobids/2017-10-18-hicma-10M-2.txt: -------------------------------------------------------------------------------- 1 | #Job for max/min sub:20480 15360 on 512 nodes 2 | 4310683 3 | 4310684 4 | 4310685 5 | 4310686 6 | 4310687 7 | 4310688 8 | 4310689 9 | 4310690 10 | 4310691 11 | #Job for max/min sub:40960 30720 on 1024 nodes 12 | 4310692 13 | 4310693 14 | 4310694 15 | 4310695 16 | 4310696 17 | 4310697 18 | 4310698 19 | 4310699 20 | 4310700 21 | -------------------------------------------------------------------------------- /exp/jobids/2017-10-19-hicma-10M-1.txt: -------------------------------------------------------------------------------- 1 | #Job for max/min sub:10240 7680 on 256 nodes 2 | 4314153 3 | 4314154 4 | 4314155 5 | 4314156 6 | 4314157 7 | 4314158 8 | 4314159 9 | 4314160 10 | 4314161 11 | #Job for max/min sub:20480 15360 on 512 nodes 12 | 4314162 13 | 4314163 14 | 4314164 15 | 4314165 16 | 4314166 17 | 4314167 18 | 4314168 19 | 4314169 20 | 4314170 21 | #Job for max/min sub:40960 30720 on 1024 nodes 22 | 4314171 23 | 4314172 24 | 4314173 25 | 4314174 26 | 4314175 27 | 4314176 28 | 4314177 29 | 4314178 30 | 4314179 31 | -------------------------------------------------------------------------------- /exp/jobids/2017-10-19-hicma-50K500K-1.txt: -------------------------------------------------------------------------------- 1 | #Job for max/min sub:640 480 on 16 nodes 2 | 4313698 3 | #Job for max/min sub:1280 960 on 32 nodes 4 | 4313699 5 | #Job for max/min sub:640 480 on 16 nodes 6 | 4314024 7 | #Job for max/min sub:1280 960 on 32 nodes 8 | 4314025 9 | -------------------------------------------------------------------------------- /exp/jobids/2017-10-20-hicma-10M-missingNB-1.txt: -------------------------------------------------------------------------------- 1 | #Job for max/min sub:40960 30720 on 1024 nodes 2 | 4315061 3 | 4315062 4 | #Job for max/min sub:20480 15360 on 512 nodes 5 | 4315067 6 | 4315068 7 | #Job for max/min sub:10240 7680 on 256 nodes 8 | 4315072 9 | 4315073 10 | 4315074 11 | #Job for max/min sub:640 480 on 16 nodes 12 | 4315162 13 | 4315163 14 | 4315164 15 | 4315165 16 | 4315166 17 | 4315167 18 | 4315168 19 | 4315169 20 | 4315170 21 | 4315171 22 | 4315172 23 | 4315173 24 | 4315174 25 | 4315175 26 | 4315176 27 | 4315177 28 | 4315178 29 | 4315179 30 | 4315180 31 | 4315181 32 | 4315182 33 | 4315183 34 | 4315184 35 | 4315185 36 | 4315186 37 | 4315187 38 | 4315188 39 | 4315189 40 | #Job for max/min sub:1280 960 on 32 nodes 41 | 4315190 42 | 4315191 43 | 4315192 44 | 4315193 45 | 4315194 46 | 4315195 47 | 4315196 48 | 4315197 49 | 4315198 50 | 4315199 51 | 4315200 52 | 4315201 53 | 4315202 54 | 4315203 55 | 4315204 56 | 4315205 57 | 4315206 58 | 4315207 59 | 4315208 60 | 4315209 61 | 4315210 62 | 4315211 63 | 4315212 64 | 4315213 65 | 4315214 66 | 4315215 67 | 4315216 68 | 4315217 69 | #Job for max/min sub:2560 1920 on 64 nodes 70 | 4315218 71 | 4315219 72 | 4315220 73 | 4315221 74 | 4315222 75 | 4315223 76 | 4315224 77 | 4315225 78 | 4315226 79 | 4315227 80 | 4315228 81 | 4315229 82 | 4315230 83 | 4315231 84 | 4315232 85 | 4315233 86 | 4315234 87 | 4315235 88 | 4315236 89 | 4315237 90 | 4315238 91 | 4315239 92 | 4315240 93 | 4315241 94 | 4315242 95 | 4315243 96 | 4315244 97 | 4315245 98 | #Job for max/min sub:5120 3840 on 128 nodes 99 | 4315246 100 | 4315247 101 | 4315248 102 | 4315249 103 | 4315250 104 | 4315251 105 | 4315252 106 | 4315253 107 | 4315254 108 | 4315255 109 | 4315256 110 | 4315257 111 | 4315258 112 | 4315259 113 | 4315260 114 | 4315261 115 | 4315262 116 | 4315263 117 | 4315264 118 | 4315265 119 | 4315266 120 | 4315267 121 | 4315268 122 | 4315269 123 | 4315270 124 | 4315271 125 | 4315272 126 | 4315273 127 | -------------------------------------------------------------------------------- /exp/jobids/2017-10-21-hicma-acc-1.txt: -------------------------------------------------------------------------------- 1 | #comp Sat Oct 21 16:26:27 AST 2017 on 64 nodes 2 | 4316549 3 | 4316550 4 | 4316551 5 | 4316552 6 | 4316553 7 | 4316554 8 | 4316555 9 | #Sat Oct 21 20:27:35 AST 2017 on 64 nodes 10 | 4317272 11 | 4317273 12 | 4317274 13 | #nocomp Sat Oct 21 20:27:53 AST 2017 on 64 nodes 14 | 4316741 15 | 4316742 16 | 4316743 17 | 4316744 18 | 4316745 19 | 4316746 20 | 4316747 21 | 4317275 22 | 4317276 23 | 4317277 24 | -------------------------------------------------------------------------------- /exp/jobids/2017-10-21-hicma-acc-nocomp-1.txt: -------------------------------------------------------------------------------- 1 | #Sat Oct 21 20:27:53 AST 2017 on 64 nodes 2 | 4316741 3 | 4316742 4 | 4316743 5 | 4316744 6 | 4316745 7 | 4316746 8 | 4316747 9 | 4317275 10 | 4317276 11 | 4317277 12 | -------------------------------------------------------------------------------- /exp/jobids/2017-11-02-hicma-more-points-1.txt: -------------------------------------------------------------------------------- 1 | #Thu Nov 2 04:43:28 AST 2017 on 16 nodes. Hicma runs to compare vs scalapack 2 | 4377668 3 | 4377669 4 | 4377670 5 | 4377671 6 | 4377672 7 | 4377673 8 | 4377674 9 | 4377675 10 | -------------------------------------------------------------------------------- /exp/jobids/2017-11-02-hicma-more-points-BIG-1.txt: -------------------------------------------------------------------------------- 1 | #Thu Nov 2 04:50:04 AST 2017 on 256 nodes. Hicma BIG runs 2 | 4377676 3 | 4377677 4 | 4377678 5 | 4377679 6 | #Thu Nov 2 04:50:04 AST 2017 on 512 nodes. Hicma BIG runs 7 | 4377680 8 | 4377681 9 | 4377682 10 | 4377683 11 | #Thu Nov 2 04:53:28 AST 2017 on 128 nodes. Hicma BIG runs 12 | 4377684 13 | 4377685 14 | 4377686 15 | -------------------------------------------------------------------------------- /exp/jobids/2018-01-22-hicma-edsin-acc-1.txt: -------------------------------------------------------------------------------- 1 | #Mon Jan 22 14:00:07 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (-5) 2 | 4665289 3 | 4665290 4 | 4665291 5 | 4665292 6 | 4665293 7 | #Mon Jan 22 14:12:45 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-5-1) 8 | 4665453 9 | 4665454 10 | 4665455 11 | 4665456 12 | 4665457 13 | #Mon Jan 22 15:01:00 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-3-1) 14 | #1 15 | 4665759 16 | #2 17 | 4665760 18 | #3 outlier 19 | #4665761 20 | #3-r2 21 | 4665845 22 | #Mon Jan 22 14:47:31 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-2-1) 23 | #4 24 | 4665685 25 | #5 26 | 4665686 27 | #Mon Jan 22 14:31:58 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-11-1) 28 | #6 29 | 4665566 30 | #7 31 | 4665567 32 | #8 33 | 4665568 34 | #8-r2 35 | 4665846 36 | #9 37 | 4665569 38 | #9-r2 39 | 4665847 40 | #10 41 | 4665570 42 | #11 43 | 4665571 44 | #12 45 | 4665572 46 | #13 47 | 4665573 48 | #14 not working 49 | 4665574 50 | #15 not working 51 | 4665575 52 | #16 not working 53 | 4665576 54 | #Mon Jan 22 21:07:02 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 55 | 4667255 56 | 4667256 57 | 4667257 58 | 4667258 59 | 4667259 60 | 4667260 61 | 4667261 62 | 4667262 63 | 4667263 64 | 4667264 65 | 4667265 66 | 4667266 67 | 4667267 68 | #Mon Jan 22 21:07:05 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 69 | 4667269 70 | 4667270 71 | 4667271 72 | 4667272 73 | 4667273 74 | 4667274 75 | 4667275 76 | 4667276 77 | 4667277 78 | 4667278 79 | 4667279 80 | 4667280 81 | 4667281 82 | #Mon Jan 22 21:07:10 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 83 | 4667282 84 | 4667283 85 | 4667284 86 | 4667285 87 | 4667286 88 | 4667287 89 | 4667288 90 | 4667289 91 | 4667290 92 | 4667291 93 | 4667292 94 | 4667293 95 | 4667294 96 | #Mon Jan 22 21:07:12 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 97 | 4667295 98 | 4667296 99 | 4667297 100 | 4667298 101 | 4667299 102 | 4667300 103 | 4667301 104 | 4667302 105 | 4667303 106 | 4667304 107 | 4667305 108 | 4667306 109 | 4667307 110 | #Mon Jan 22 22:22:40 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 111 | 4667559 112 | 4667560 113 | 4667561 114 | 4667562 115 | 4667563 116 | 4667564 117 | 4667565 118 | 4667566 119 | 4667567 120 | 4667568 121 | 4667569 122 | 4667570 123 | 4667571 124 | #Mon Jan 22 22:22:41 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 125 | 4667572 126 | 4667573 127 | 4667574 128 | 4667575 129 | 4667576 130 | 4667577 131 | 4667578 132 | 4667579 133 | 4667580 134 | 4667581 135 | 4667582 136 | 4667583 137 | 4667584 138 | #Mon Jan 22 22:22:42 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 139 | 4667585 140 | 4667586 141 | 4667587 142 | 4667588 143 | 4667589 144 | 4667590 145 | 4667591 146 | 4667592 147 | 4667593 148 | 4667594 149 | 4667595 150 | 4667596 151 | 4667597 152 | #Mon Jan 22 22:22:43 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 153 | 4667598 154 | 4667599 155 | 4667600 156 | 4667601 157 | 4667602 158 | 4667603 159 | 4667604 160 | 4667605 161 | 4667606 162 | 4667607 163 | 4667608 164 | 4667609 165 | 4667610 166 | #Mon Jan 22 22:22:46 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-1) 167 | 4667611 168 | 4667612 169 | 4667613 170 | 4667614 171 | 4667615 172 | 4667616 173 | 4667617 174 | 4667618 175 | 4667619 176 | 4667620 177 | 4667621 178 | 4667622 179 | 4667623 180 | -------------------------------------------------------------------------------- /exp/jobids/2018-01-23-hicma-edsin-acc-custommaxrk-1.txt: -------------------------------------------------------------------------------- 1 | #Wed Jan 24 13:30:09 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-20) 2 | 4680589 3 | #Wed Jan 24 21:16:00 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-20) 4 | 4684023 5 | #Wed Jan 24 21:36:16 AST 2018 on 64 nodes. Hicma runs - different accuracy - exp/cases/1M-acc.txt (1-13-20) 6 | 4684306 7 | #Number of nodes: 64 ============================= 8 | #Thu Feb 1 10:18:17 AST 2018 on 64 nodes. Hicma runs - EDSIN - 1e-4missing - exp/cases/1M-acc.txt "4" 1 (0-0-10) 9 | #case ids: 4 10 | 4776037 11 | #Number of nodes: 64 ============================= 12 | #Thu Feb 1 10:50:44 AST 2018 on 64 nodes. Hicma runs - EDSIN - 1e-4missing - exp/cases/1M-acc.txt "4 6 9 13" 4 (0-3-10) 13 | #case ids: 4 6 9 13 14 | 4776318 15 | #Number of nodes: 64 ============================= 16 | #Thu Feb 15 12:40:00 AST 2018 on 64 nodes. Hicma - accuracy - SS - custom maxrank - hic - exp/cases/1M-acc.txt "1 2 3 4 5 6 7" 7 (0-6-10) 17 | #case ids: 1 2 3 4 5 6 7 18 | 5025046 19 | #Number of nodes: 64 ============================= 20 | #Thu Feb 15 15:16:22 AST 2018 on 64 nodes. Hicma - accuracy - SS - custom maxrank - hic - exp/cases/1M-acc.txt "3 4 5 6 7" 5 (0-4-10) 21 | #case ids: 3 4 5 6 7 22 | 5026176 23 | -------------------------------------------------------------------------------- /exp/jobids/2018-01-27-hicma-ae-test-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sat Jan 27 22:37:32 AST 2018 on 16 nodes. Hicma runs - Skylake cluster - exp/cases/ae-10M.txt "1 7" 2 (0-1-1) 3 | #case ids: 1 4 | 4720894 5 | #case ids: 7 6 | 4720895 7 | #Number of nodes: 16 ============================= 8 | #Sat Jan 27 23:14:20 AST 2018 on 16 nodes. Hicma runs - Skylake cluster - exp/cases/ae-10M.txt "1 7" 2 (0-1-1) 9 | #case ids: 1 10 | 4721179 11 | #case ids: 7 12 | 4721180 13 | #Number of nodes: 16 ============================= 14 | #Sat Jan 27 23:27:58 AST 2018 on 16 nodes. Hicma runs - Skylake cluster - exp/cases/ae-10M.txt "1 7" 2 (0-1-1) 15 | #case ids: 1 16 | 4721254 17 | #case ids: 7 18 | 4721255 19 | #Number of nodes: 16 ============================= 20 | #Sat Jan 27 23:36:59 AST 2018 on 16 nodes. Hicma runs - Skylake cluster - exp/cases/ae-10M.txt "1 7" 2 (0-1-1) 21 | #case ids: 1 22 | 4721318 23 | #case ids: 7 24 | 4721319 25 | #Number of nodes: 16 ============================= 26 | #Sat Jan 27 23:53:11 AST 2018 on 16 nodes. Hicma runs - Skylake cluster - exp/cases/ae-10M.txt "1 7" 2 (0-1-1) 27 | #case ids: 1 28 | 4721433 29 | #case ids: 7 30 | 4721434 31 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-01-hicma-syn-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Thu Feb 1 14:41:30 AST 2018 on 16 nodes. Hicma runs - Synthetic - exp/cases/1M-8M.txt "1" 1 (0-0-1) 3 | #case ids: 1 4 | 4778408 5 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-05-hicma-fxt-ss-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Tue Feb 6 09:20:15 AST 2018 on 16 nodes. Hicma runs - EDSIN - FXT - ./exp/cases/fxt-100K-16nodes.txt "1" 1 (0-0-10) 3 | #case ids: 1 4 | 4873079 5 | #Number of nodes: 16 ============================= 6 | #Wed Feb 7 12:16:32 AST 2018 on 16 nodes. Hicma runs - EDSIN - FXT - cham - ./exp/cases/fxt-100K-16nodes.txt "1" 1 (0-0-10) 7 | #case ids: 1 8 | 4909095 9 | #Number of nodes: 4 ============================= 10 | #Wed Feb 7 13:47:45 AST 2018 on 4 nodes. Hicma runs - EDSIN - FXT - cham - ./exp/cases/fxt-100K-16nodes.txt "1" 1 (0-0-10) 11 | #case ids: 1 12 | 4914175 13 | #Number of nodes: 4 ============================= 14 | #Wed Feb 7 14:49:09 AST 2018 on 4 nodes. Hicma runs - SS - FXT - hic - ./exp/cases/fxt-100K-16nodes.txt "1" 1 (0-0-10) 15 | #case ids: 1 16 | 4915619 17 | #Number of nodes: 4 ============================= 18 | #Wed Feb 7 15:37:18 AST 2018 on 4 nodes. Hicma runs - SS - FXT - hic - ./exp/cases/fxt-100K-16nodes.txt "1" 1 (0-0-10) 19 | #case ids: 1 20 | 4916494 21 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-10-cham-256-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 256 ============================= 2 | #Sat Feb 10 21:15:39 AST 2018 on 256 nodes. Chameleon potrf runs - cham - exp/cases/cham.txt "1..63" 63 (0-62-1) 3 | #case ids: 1 4 | 4970383 5 | #case ids: 2 6 | 4970384 7 | #case ids: 3 8 | 4970385 9 | #case ids: 4 10 | 4970386 11 | #case ids: 5 12 | 4970387 13 | #case ids: 6 14 | 4970388 15 | #case ids: 7 16 | 4970389 17 | #case ids: 8 18 | 4970390 19 | #case ids: 9 20 | 4970391 21 | #case ids: 10 22 | 4970392 23 | #case ids: 11 24 | 4970393 25 | #case ids: 12 26 | 4970394 27 | #case ids: 13 28 | 4970395 29 | #case ids: 14 30 | 4970396 31 | #case ids: 15 32 | 4970397 33 | #case ids: 16 34 | 4970398 35 | #case ids: 17 36 | 4970399 37 | #case ids: 18 38 | 4970400 39 | #case ids: 19 40 | 4970401 41 | #case ids: 20 42 | 4970402 43 | #case ids: 21 44 | 4970403 45 | #case ids: 22 46 | 4970404 47 | #case ids: 23 48 | 4970405 49 | #case ids: 24 50 | 4970406 51 | #case ids: 25 52 | 4970407 53 | #case ids: 26 54 | 4970408 55 | #case ids: 27 56 | 4970409 57 | #case ids: 28 58 | 4970410 59 | #case ids: 29 60 | 4970411 61 | #case ids: 30 62 | 4970412 63 | #case ids: 31 64 | 4970413 65 | #case ids: 32 66 | 4970414 67 | #case ids: 33 68 | 4970415 69 | #case ids: 34 70 | 4970416 71 | #case ids: 35 72 | 4970417 73 | #case ids: 36 74 | 4970418 75 | #case ids: 37 76 | 4970419 77 | #case ids: 38 78 | 4970420 79 | #case ids: 39 80 | 4970421 81 | #case ids: 40 82 | 4970422 83 | #case ids: 41 84 | 4970423 85 | #case ids: 42 86 | 4970424 87 | #case ids: 43 88 | 4970425 89 | #case ids: 44 90 | 4970426 91 | #case ids: 45 92 | 4970427 93 | #case ids: 46 94 | 4970428 95 | #case ids: 47 96 | 4970429 97 | #case ids: 48 98 | 4970430 99 | #case ids: 49 100 | 4970431 101 | #case ids: 50 102 | 4970432 103 | #case ids: 51 104 | 4970433 105 | #case ids: 52 106 | 4970434 107 | #case ids: 53 108 | 4970435 109 | #case ids: 54 110 | 4970436 111 | #case ids: 55 112 | 4970437 113 | #case ids: 56 114 | 4970438 115 | #case ids: 57 116 | 4970439 117 | #case ids: 58 118 | 4970440 119 | #case ids: 59 120 | 4970441 121 | #case ids: 60 122 | 4970442 123 | #case ids: 61 124 | 4970443 125 | #case ids: 62 126 | 4970444 127 | #case ids: 63 128 | 4970445 129 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-10-hicma-geostat-16-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sat Feb 10 23:16:17 AST 2018 on 16 nodes. Hicma mattern - hic - exp/cases/mattern-50K500K.txt "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68" 68 (0-67-1) 3 | #case ids: 1 4 | 4971680 5 | #case ids: 2 6 | 4971681 7 | #case ids: 3 8 | 4971682 9 | #case ids: 4 10 | 4971683 11 | #case ids: 5 12 | 4971684 13 | #case ids: 6 14 | 4971685 15 | #case ids: 7 16 | 4971686 17 | #case ids: 8 18 | 4971687 19 | #case ids: 9 20 | 4971688 21 | #case ids: 10 22 | 4971689 23 | #case ids: 11 24 | 4971690 25 | #case ids: 12 26 | 4971691 27 | #case ids: 13 28 | 4971692 29 | #case ids: 14 30 | 4971693 31 | #case ids: 15 32 | 4971694 33 | #case ids: 16 34 | 4971695 35 | #case ids: 17 36 | 4971696 37 | #case ids: 18 38 | 4971697 39 | #case ids: 19 40 | 4971698 41 | #case ids: 20 42 | 4971699 43 | #case ids: 21 44 | 4971700 45 | #case ids: 22 46 | 4971701 47 | #case ids: 23 48 | 4971702 49 | #case ids: 24 50 | 4971703 51 | #case ids: 25 52 | 4971704 53 | #case ids: 26 54 | 4971705 55 | #case ids: 27 56 | 4971706 57 | #case ids: 28 58 | 4971707 59 | #case ids: 29 60 | 4971708 61 | #case ids: 30 62 | 4971709 63 | #case ids: 31 64 | 4971710 65 | #case ids: 32 66 | 4971711 67 | #case ids: 33 68 | 4971712 69 | #case ids: 34 70 | 4971713 71 | #case ids: 35 72 | 4971714 73 | #case ids: 36 74 | 4971715 75 | #case ids: 37 76 | 4971716 77 | #case ids: 38 78 | 4971717 79 | #case ids: 39 80 | 4971718 81 | #case ids: 40 82 | 4971719 83 | #case ids: 41 84 | 4971720 85 | #case ids: 42 86 | 4971721 87 | #case ids: 43 88 | 4971722 89 | #case ids: 44 90 | 4971723 91 | #case ids: 45 92 | 4971724 93 | #case ids: 46 94 | 4971725 95 | #case ids: 47 96 | 4971726 97 | #case ids: 48 98 | 4971727 99 | #case ids: 49 100 | 4971728 101 | #case ids: 50 102 | 4971729 103 | #case ids: 51 104 | 4971730 105 | #case ids: 52 106 | 4971731 107 | #case ids: 53 108 | 4971732 109 | #case ids: 54 110 | 4971733 111 | #case ids: 55 112 | 4971734 113 | #case ids: 56 114 | 4971735 115 | #case ids: 57 116 | 4971736 117 | #case ids: 58 118 | 4971737 119 | #case ids: 59 120 | 4971738 121 | #case ids: 60 122 | 4971739 123 | #case ids: 61 124 | 4971740 125 | #case ids: 62 126 | 4971741 127 | #case ids: 63 128 | 4971742 129 | #case ids: 64 130 | 4971743 131 | #case ids: 65 132 | 4971744 133 | #case ids: 66 134 | 4971745 135 | #case ids: 67 136 | 4971746 137 | #case ids: 68 138 | 4971747 139 | #Number of nodes: 16 ============================= 140 | #Sun Feb 11 22:26:53 AST 2018 on 16 nodes. Hicma mattern - hic - exp/cases/mattern-50K500K.txt "1" 1 (0-0-1) 141 | #case ids: 1 142 | 4985016 143 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-10-hicma-rnd-16-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sat Feb 10 22:53:40 AST 2018 on 16 nodes. Hicma edsin - hic - exp/cases/50K500K.txt "1" 1 (0-0-1) 3 | #case ids: 1 4 | 4971380 5 | # 6 | 4971803 7 | 4971804 8 | 4971805 9 | 4971806 10 | 4971807 11 | 4971808 12 | 4971809 13 | 4971810 14 | 4971811 15 | 4971812 16 | 4971813 17 | 4971814 18 | 4971815 19 | 4971816 20 | 4971817 21 | 4971818 22 | 4971819 23 | 4971820 24 | 4971821 25 | 4971822 26 | 4971823 27 | #Number of nodes: 16 ============================= 28 | #Mon Feb 12 00:39:24 AST 2018 on 16 nodes. Hicma rnd - hic - exp/cases/rnd.sh "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84" 84 (0-83-1) 29 | #case ids: 1 30 | 4986003 31 | #case ids: 2 32 | 4986004 33 | #case ids: 3 34 | 4986005 35 | #case ids: 4 36 | 4986006 37 | #case ids: 5 38 | 4986007 39 | #case ids: 6 40 | 4986008 41 | #case ids: 7 42 | 4986009 43 | #case ids: 8 44 | 4986010 45 | #case ids: 9 46 | 4986011 47 | #case ids: 10 48 | 4986012 49 | #case ids: 11 50 | 4986013 51 | #case ids: 12 52 | 4986014 53 | #case ids: 13 54 | 4986015 55 | #case ids: 14 56 | 4986016 57 | #case ids: 15 58 | 4986017 59 | #case ids: 16 60 | 4986018 61 | #case ids: 17 62 | 4986019 63 | #case ids: 18 64 | 4986020 65 | #case ids: 19 66 | 4986021 67 | #case ids: 20 68 | 4986022 69 | #case ids: 21 70 | 4986023 71 | #case ids: 22 72 | 4986024 73 | #case ids: 23 74 | 4986025 75 | #case ids: 24 76 | 4986026 77 | #case ids: 25 78 | 4986027 79 | #case ids: 26 80 | 4986028 81 | #case ids: 27 82 | 4986029 83 | #case ids: 28 84 | 4986030 85 | #case ids: 29 86 | 4986031 87 | #case ids: 30 88 | 4986032 89 | #case ids: 31 90 | 4986033 91 | #case ids: 32 92 | 4986034 93 | #case ids: 33 94 | 4986035 95 | #case ids: 34 96 | 4986036 97 | #case ids: 35 98 | 4986037 99 | #case ids: 36 100 | 4986038 101 | #case ids: 37 102 | 4986039 103 | #case ids: 38 104 | 4986040 105 | #case ids: 39 106 | 4986041 107 | #case ids: 40 108 | 4986042 109 | #case ids: 41 110 | 4986043 111 | #case ids: 42 112 | 4986044 113 | #case ids: 43 114 | 4986045 115 | #case ids: 44 116 | 4986046 117 | #case ids: 45 118 | 4986047 119 | #case ids: 46 120 | 4986048 121 | #case ids: 47 122 | 4986049 123 | #case ids: 48 124 | 4986050 125 | #case ids: 49 126 | 4986051 127 | #case ids: 50 128 | 4986052 129 | #case ids: 51 130 | 4986053 131 | #case ids: 52 132 | 4986054 133 | #case ids: 53 134 | 4986055 135 | #case ids: 54 136 | 4986056 137 | #case ids: 55 138 | 4986057 139 | #case ids: 56 140 | 4986058 141 | #case ids: 57 142 | 4986059 143 | #case ids: 58 144 | 4986060 145 | #case ids: 59 146 | 4986061 147 | #case ids: 60 148 | 4986062 149 | #case ids: 61 150 | 4986063 151 | #case ids: 62 152 | 4986064 153 | #case ids: 63 154 | 4986065 155 | #case ids: 64 156 | 4986066 157 | #case ids: 65 158 | 4986067 159 | #case ids: 66 160 | 4986068 161 | #case ids: 67 162 | 4986069 163 | #case ids: 68 164 | 4986070 165 | #case ids: 69 166 | 4986071 167 | #case ids: 70 168 | 4986072 169 | #case ids: 71 170 | 4986073 171 | #case ids: 72 172 | 4986074 173 | #case ids: 73 174 | 4986075 175 | #case ids: 74 176 | 4986076 177 | #case ids: 75 178 | 4986077 179 | #case ids: 76 180 | 4986078 181 | #case ids: 77 182 | 4986079 183 | #case ids: 78 184 | 4986080 185 | #case ids: 79 186 | 4986081 187 | #case ids: 80 188 | 4986082 189 | #case ids: 81 190 | 4986083 191 | #case ids: 82 192 | 4986084 193 | #case ids: 83 194 | 4986085 195 | #case ids: 84 196 | 4986086 197 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-17-hicma-trsm-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sat Feb 17 22:56:43 AST 2018 on 16 nodes. Hicma trsm --ss - small - - 03:00:00 - 400 - hic - exp/cases/sqexp-trsm.sh "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22" 22 (0-21-1) 3 | #case ids: 1 4 | 5034934 5 | #case ids: 2 6 | 5034935 7 | #case ids: 3 8 | 5034936 9 | #case ids: 4 10 | 5034937 11 | #case ids: 5 12 | 5034938 13 | #case ids: 6 14 | 5034939 15 | #case ids: 7 16 | 5034940 17 | #case ids: 8 18 | 5034941 19 | #case ids: 9 20 | 5034942 21 | #case ids: 10 22 | 5034943 23 | #case ids: 11 24 | 5034944 25 | #case ids: 12 26 | 5034945 27 | #case ids: 13 28 | 5034946 29 | #case ids: 14 30 | 5034947 31 | #case ids: 15 32 | 5034948 33 | #case ids: 16 34 | 5034949 35 | #case ids: 17 36 | 5034950 37 | #case ids: 18 38 | 5034951 39 | #case ids: 19 40 | 5034952 41 | #case ids: 20 42 | 5034953 43 | #case ids: 21 44 | 5034954 45 | #case ids: 22 46 | 5034955 47 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-17-hicma-trsm-2.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sat Feb 17 23:29:52 AST 2018 on 16 nodes. Hicma trsm --ss - small - - 03:00:00 - 1350 - hic - exp/cases/sqexp-trsm.sh "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22" 22 (0-21-1) 3 | #case ids: 1 4 | 5034956 5 | #case ids: 2 6 | 5034957 7 | #case ids: 3 8 | 5034958 9 | #case ids: 4 10 | 5034959 11 | #case ids: 5 12 | 5034960 13 | #case ids: 6 14 | 5034961 15 | #case ids: 7 16 | 5034962 17 | #case ids: 8 18 | 5034963 19 | #case ids: 9 20 | 5034964 21 | #case ids: 10 22 | 5034965 23 | #case ids: 11 24 | 5034966 25 | #case ids: 12 26 | 5034967 27 | #case ids: 13 28 | 5034968 29 | #case ids: 14 30 | 5034969 31 | #case ids: 15 32 | 5034970 33 | #case ids: 16 34 | 5034971 35 | #case ids: 17 36 | 5034972 37 | #case ids: 18 38 | 5034973 39 | #case ids: 19 40 | 5034974 41 | #case ids: 20 42 | 5034975 43 | #case ids: 21 44 | 5034976 45 | #case ids: 22 46 | 5034977 47 | -------------------------------------------------------------------------------- /exp/jobids/2018-02-17-hicma-trsm-3.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sat Feb 17 23:38:48 AST 2018 on 16 nodes. Hicma trsm --ss - small - - 03:00:00 - 1350 - hic - exp/cases/sqexp-trsm.sh "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22" 22 (0-21-1) 3 | #case ids: 1 4 | 5034978 5 | #case ids: 2 6 | 5034979 7 | #case ids: 3 8 | 5034980 9 | #case ids: 4 10 | 5034981 11 | #case ids: 5 12 | 5034982 13 | #case ids: 6 14 | 5034983 15 | #case ids: 7 16 | 5034984 17 | #case ids: 8 18 | 5034985 19 | #case ids: 9 20 | 5034986 21 | #case ids: 10 22 | 5034987 23 | #case ids: 11 24 | 5034988 25 | #case ids: 12 26 | 5034989 27 | #case ids: 13 28 | 5034990 29 | #case ids: 14 30 | 5034991 31 | #case ids: 15 32 | 5034992 33 | #case ids: 16 34 | 5034993 35 | #case ids: 17 36 | 5034994 37 | #case ids: 18 38 | 5034995 39 | #case ids: 19 40 | 5034996 41 | #case ids: 20 42 | 5034997 43 | #case ids: 21 44 | 5034998 45 | #case ids: 22 46 | 5034999 47 | -------------------------------------------------------------------------------- /exp/jobids/2019-09-01-hicma-st2dsqexp-isambard-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 16 ============================= 2 | #Sun Sep 1 12:24:42 UTC 2019 on 16 nodes. Hicma beta=0.01 --ss - big - - 10:00:00 - 150 - hic - exp/cases/sqexp.sh "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28" 28 (0-27-1) 3 | #case ids: 1 4 | 23984.xci00 5 | #case ids: 2 6 | 23985.xci00 7 | #case ids: 3 8 | 23986.xci00 9 | #case ids: 4 10 | 23987.xci00 11 | #case ids: 5 12 | 23988.xci00 13 | #case ids: 6 14 | 23989.xci00 15 | #case ids: 7 16 | 23990.xci00 17 | #case ids: 8 18 | 23991.xci00 19 | ##case ids: 9 20 | #23992.xci00 21 | ##case ids: 10 22 | #23993.xci00 23 | ##case ids: 11 24 | #23994.xci00 25 | ##case ids: 12 26 | #23995.xci00 27 | ##case ids: 13 28 | #23996.xci00 29 | ##case ids: 14 30 | #23997.xci00 31 | ##case ids: 15 32 | #23998.xci00 33 | ##case ids: 16 34 | #23999.xci00 35 | ##case ids: 17 36 | #24000.xci00 37 | ##case ids: 18 38 | #24001.xci00 39 | ##case ids: 19 40 | #24002.xci00 41 | ##case ids: 20 42 | #24003.xci00 43 | ##case ids: 21 44 | #24004.xci00 45 | ##case ids: 22 46 | #24005.xci00 47 | ##case ids: 23 48 | #24006.xci00 49 | ##case ids: 24 50 | #24007.xci00 51 | ##case ids: 25 52 | #24008.xci00 53 | ##case ids: 26 54 | #24009.xci00 55 | ##case ids: 27 56 | #24010.xci00 57 | ##case ids: 28 58 | #24011.xci00 59 | -------------------------------------------------------------------------------- /exp/jobids/2019-09-05-hicma-st2dsqexp-isambard-threads-1.txt: -------------------------------------------------------------------------------- 1 | #Number of nodes: 1 ============================= 2 | #Thu Sep 5 19:04:35 UTC 2019 on 1 nodes. Hicma beta=0.01 --ss - small - - 10:00:00 - 150 - hic - exp/cases/sqexp.sh "1" 1 (0-0-1) 3 | #case ids: 1 4 | 25425.xci00 5 | 25426.xci00 6 | 25427.xci00 7 | 25428.xci00 8 | -------------------------------------------------------------------------------- /exp/jobids/ji-2018-02-12-turbo_off.txt: -------------------------------------------------------------------------------- 1 | 128_1.simout.583525.sdb 2 | 128_2.simout.583526.sdb 3 | 128_3.simout.583527.sdb 4 | 128_4.simout.583528.sdb 5 | 128_5.simout.583529.sdb 6 | 16_1.simout.583537.sdb 7 | 16_2.simout.583538.sdb 8 | 256_1.simout.583523.sdb 9 | 256_2.simout.583524.sdb 10 | 32_1.simout.583534.sdb 11 | 32_2.simout.583535.sdb 12 | 32_3.simout.583536.sdb 13 | 64_1.simout.583530.sdb 14 | 64_2.simout.583531.sdb 15 | 64_3.simout.583532.sdb 16 | 64_4.simout.583533.sdb 17 | -------------------------------------------------------------------------------- /exp/jobids/ji-2018-02-12-turbo_on.txt: -------------------------------------------------------------------------------- 1 | 128_1.simout.583509.sdb 2 | 128_2.simout.583510.sdb 3 | 128_3.simout.583511.sdb 4 | 128_4.simout.583512.sdb 5 | 128_5.simout.583513.sdb 6 | 16_1.simout.583521.sdb 7 | 16_2.simout.583522.sdb 8 | 256_1.simout.583507.sdb 9 | 256_2.simout.583508.sdb 10 | 32_1.simout.583518.sdb 11 | 32_2.simout.583519.sdb 12 | 32_3.simout.583520.sdb 13 | 64_1.simout.583514.sdb 14 | 64_2.simout.583515.sdb 15 | 64_3.simout.583516.sdb 16 | 64_4.simout.583517.sdb 17 | -------------------------------------------------------------------------------- /exp/jobids/ji-2018-02-12-turbo_on_small_block_sizes.txt: -------------------------------------------------------------------------------- 1 | 128_1.simout.583593.sdb 2 | 128_10.simout.583602.sdb 3 | 128_11.simout.583603.sdb 4 | 128_12.simout.583604.sdb 5 | 128_13.simout.583605.sdb 6 | 128_14.simout.583606.sdb 7 | 128_15.simout.583607.sdb 8 | 128_2.simout.583594.sdb 9 | 128_3.simout.583595.sdb 10 | 128_4.simout.583596.sdb 11 | 128_5.simout.583597.sdb 12 | 128_6.simout.583598.sdb 13 | 128_7.simout.583599.sdb 14 | 128_8.simout.583600.sdb 15 | 128_9.simout.583601.sdb 16 | 16_1.simout.583629.sdb 17 | 16_2.simout.583630.sdb 18 | 16_3.simout.583631.sdb 19 | 16_4.simout.583632.sdb 20 | 16_5.simout.583633.sdb 21 | 16_6.simout.583634.sdb 22 | 256_1.simout.583587.sdb 23 | 256_2.simout.583588.sdb 24 | 256_3.simout.583589.sdb 25 | 256_4.simout.583590.sdb 26 | 256_5.simout.583591.sdb 27 | 256_6.simout.583592.sdb 28 | 32_1.simout.583620.sdb 29 | 32_2.simout.583621.sdb 30 | 32_3.simout.583622.sdb 31 | 32_4.simout.583623.sdb 32 | 32_5.simout.583624.sdb 33 | 32_6.simout.583625.sdb 34 | 32_7.simout.583626.sdb 35 | 32_8.simout.583627.sdb 36 | 32_9.simout.583628.sdb 37 | 64_1.simout.583608.sdb 38 | 64_10.simout.583617.sdb 39 | 64_11.simout.583618.sdb 40 | 64_12.simout.583619.sdb 41 | 64_2.simout.583609.sdb 42 | 64_3.simout.583610.sdb 43 | 64_4.simout.583611.sdb 44 | 64_5.simout.583612.sdb 45 | 64_6.simout.583613.sdb 46 | 64_7.simout.583614.sdb 47 | 64_8.simout.583615.sdb 48 | 64_9.simout.583616.sdb 49 | -------------------------------------------------------------------------------- /exp/out/cpu.txt: -------------------------------------------------------------------------------- 1 | vulture-2021-03-18-st-2d-exp-1.txt 2 | vulture-2021-03-17-st-2d-exp-1.txt 3 | vulture-2021-03-17-st-2d-sqexp-1.txt 4 | vulture-2021-05-22-196560-1.txt 5 | -------------------------------------------------------------------------------- /exp/out/gpu.txt: -------------------------------------------------------------------------------- 1 | #vulture-gpu-2021-03-07-1.txt 2 | vulture-v100-2021-03-15-sqexp-exp-2d-1.txt 3 | #vulture-2v100-2021-03-16-sqexp-exp-2d-2gpus-1.txt 4 | #a100-2021-03-21-1.txt 5 | vulture-v100-2021-04-08-sqexp-exp-2d-gpu-resident-1.txt 6 | #ibexrome-2021-05-02-sqexp-exp-2d-1.txt 7 | ibexrome-2021-05-07-smallnb-sqexp-exp-2d-1-1.txt 8 | ibexrome-2021-05-08-smallnb-sqexp-exp-2d-1.txt 9 | -------------------------------------------------------------------------------- /exp/out/mkl.txt: -------------------------------------------------------------------------------- 1 | vulture-2021-03-18-mkl-1.txt 2 | -------------------------------------------------------------------------------- /exp/out/st-2d-exp.txt: -------------------------------------------------------------------------------- 1 | vulture-2021-03-18-st-2d-exp-1.txt vulture-2021-03-17-st-2d-exp-1.txt 2 | -------------------------------------------------------------------------------- /exp/out/st-2d-sqexp.txt: -------------------------------------------------------------------------------- 1 | vulture-2021-03-17-st-2d-sqexp-1.txt 2 | -------------------------------------------------------------------------------- /exp/ranks/.f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecrc/hicma/fa8596b5d3aa8e5b7d5c06cd8db3cecc32f70d17/exp/ranks/.f -------------------------------------------------------------------------------- /exp/shmem-mpi-test.sh: -------------------------------------------------------------------------------- 1 | sbatch --nodes=1 exp/trial.sh 1 1 30 2 | sbatch --nodes=1 exp/trial.sh 1 2 15 3 | sbatch --nodes=2 exp/trial.sh 2 2 15 4 | -------------------------------------------------------------------------------- /exp/starpulog/.f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecrc/hicma/fa8596b5d3aa8e5b7d5c06cd8db3cecc32f70d17/exp/starpulog/.f -------------------------------------------------------------------------------- /exp/valgrind.supp: -------------------------------------------------------------------------------- 1 | { 2 | kaCondJump 3 | Memcheck:Cond 4 | obj:/opt/ecrc/mkl/2018-initial/compilers_and_libraries_2018.0.128/linux/mkl/lib/intel64_lin/* 5 | fun:* 6 | } 7 | -------------------------------------------------------------------------------- /experiment_ADSC20/S1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Please set this to available cores in the system 3 | TH=39 4 | export STARPU_SCHED=prio 5 | export STARPU_SILENT=1 6 | 7 | #10 viruses of 10370 resolution using 1e-5, 1e-6, and 1e-7 8 | numactl --interleave=all ./build/timing/time_zpotrf_tile --m=103700 --n_range=2500:2500 --k=103700 --mb=2074 --nb=50 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-5 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=10 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt --csolve --solve 9 | numactl --interleave=all ./build/timing/time_zpotrf_tile --m=103700 --n_range=2500:2500 --k=103700 --mb=2074 --nb=50 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=10 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt --csolve --solve 10 | numactl --interleave=all ./build/timing/time_zpotrf_tile --m=103700 --n_range=2500:2500 --k=103700 --mb=2074 --nb=50 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-7 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=10 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt --csolve --solve 11 | 12 | #20 viruses of 10370 resolution using 1e-5, 1e-6, and 1e-7 13 | 14 | numactl --interleave=all ./build/timing/time_zpotrf_tile --m=207400 --n_range=6800:6800 --k=207400 --mb=3050 --nb=100 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-5 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=200 --rbf_kernel=9 --rad=-1 --numobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt --csolve --solve 15 | 16 | numactl --interleave=all ./build/timing/time_zpotrf_tile --m=207400 --n_range=6800:6800 --k=207400 --mb=3050 --nb=100 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=200 --rbf_kernel=9 --rad=-1 --numobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt --csolve --solve 17 | 18 | numactl --interleave=all ./build/timing/time_zpotrf_tile --m=207400 --n_range=6800:6800 --k=207400 --mb=3050 --nb=100 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-7 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=200 --rbf_kernel=9 --rad=-1 --numobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt --csolve --solve 19 | 20 | -------------------------------------------------------------------------------- /experiment_ADSC20/S1slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=hicma 3 | #SBATCH --account=k1205 4 | #SBATCH --output=%j.out 5 | #SBATCH --error=%j.err 6 | #SBATCH --partition=workq 7 | #SBATCH --cpus-per-task=32 8 | #SBATCH --threads-per-core=1 9 | #SBATCH --hint=nomultithread 10 | #SBATCH --nodes=4 11 | #SBATCH --ntasks-per-node=1 12 | #SBATCH --ntasks=4 13 | #SBATCH --time=10:00:00 14 | 15 | 16 | #Please set this to available cores in the system 17 | TH=31 18 | P=2 19 | export STARPU_SCHED=prio 20 | export STARPU_SILENT=1 21 | 22 | # you can set max to number of mpi nodes * number of threads, and min to half the max 23 | export STARPU_LIMIT_MAX_SUBMITTED_TASKS=128 24 | export STARPU_LIMIT_MIN_SUBMITTED_TASKS=64 25 | 26 | 27 | #10 viruses of 10370 resolution using 1e-5, 1e-6, and 1e-7 28 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile --m=103700 --n_range=2500:2500 --k=103700 --mb=2074 --nb=50 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-5 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=10 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt --csolve --solve 29 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile --m=103700 --n_range=2500:2500 --k=103700 --mb=2074 --nb=50 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=10 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt --csolve --solve 30 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile --m=103700 --n_range=2500:2500 --k=103700 --mb=2074 --nb=50 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-7 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=10 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus103700.txt --csolve --solve 31 | 32 | #20 viruses of 10370 resolution using 1e-5, 1e-6, and 1e-7 33 | 34 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile --m=207400 --n_range=6800:6800 --k=207400 --mb=3050 --nb=100 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-5 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=200 --rbf_kernel=9 --rad=-1 --numobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt --csolve --solve 35 | 36 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile --m=207400 --n_range=6800:6800 --k=207400 --mb=3050 --nb=100 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=200 --rbf_kernel=9 --rad=-1 --numobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt --csolve --solve 37 | 38 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile --m=207400 --n_range=6800:6800 --k=207400 --mb=3050 --nb=100 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-7 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=200 --rbf_kernel=9 --rad=-1 --numobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S1data/SortVirus207400.txt --csolve --solve 39 | 40 | -------------------------------------------------------------------------------- /experiment_ADSC20/S2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Please set this to available cores in the system 3 | TH=39 4 | export STARPU_SCHED=prio 5 | export STARPU_SILENT=1 6 | 7 | #8 viruses of 10370 resolution in which every 2 viurses interact together in one batch using 1e-6 8 | numactl --interleave=all ./build/timing/time_zpotrf_tile_batch --m=20740 --n_range=1000:1000 --k=20740 --mb=1037 --nb=50 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=8 --numsubobj=2 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S2data_20k/ --csolve --solve 9 | 10 | #240 viruses of 10370 resolution in which every 20 viurses interact together in one batch using 1e-6 11 | 12 | numactl --interleave=all ./build/timing/time_zpotrf_tile_batch --m=207400 --n_range=3400:3400 --k=207400 --mb=3050 --nb=50 --nowarmup --threads=$TH --p=1 --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=240 --numsubobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S2data_200k/ 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /experiment_ADSC20/S2slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=hicma 3 | #SBATCH --account=k1205 4 | #SBATCH --output=%j.out 5 | #SBATCH --error=%j.err 6 | #SBATCH --partition=workq 7 | #SBATCH --cpus-per-task=32 8 | #SBATCH --threads-per-core=1 9 | #SBATCH --hint=nomultithread 10 | #SBATCH --nodes=4 11 | #SBATCH --ntasks-per-node=1 12 | #SBATCH --ntasks=4 13 | #SBATCH --time=10:00:00 14 | 15 | #Please set this to available cores in the system 16 | TH=31 17 | P=2 18 | export STARPU_SCHED=prio 19 | export STARPU_SILENT=1 20 | 21 | 22 | #240 viruses of 10370 resolution in which every 20 viurses interact together in one batch using 1e-6 23 | 24 | srun numactl --interleave=all ./build/timing/time_zpotrf_tile_batch --m=207400 --n_range=3400:3400 --k=207400 --mb=3050 --nb=50 --nowarmup --threads=$TH --p=$P --rk=0 --acc=1e-6 --m-3D-rbf --starshwavek=0 --starshdecay=0 --starshmaxrank=100 --rbf_kernel=9 --rad=-1 --numobj=240 --numsubobj=20 --order=2 --mesh_file=stars-h/SARS-CoV-2-meshes/S2data_200k/ 25 | 26 | -------------------------------------------------------------------------------- /hicma.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=${exec_prefix}/lib 4 | includedir=${exec_prefix}/include 5 | 6 | Name: HICMA 7 | Description: HiCMA: Hierarchical Computations on Manycore Architectures. 8 | Version: @HICMA_VERSION@ 9 | URL: http://github.com/ecrc/hicma 10 | Cflags: -I${includedir} 11 | Libs: -L${libdir} @HICMA_PKGCONFIG_LIBS@ 12 | Libs.private: @HICMA_PKGCONFIG_LIBS_PRIVATE@ 13 | Requires: @HICMA_PKGCONFIG_REQUIRED@ 14 | Requires.private: @HICMA_PKGCONFIG_REQUIRED_PRIVATE@ 15 | 16 | -------------------------------------------------------------------------------- /hicma_ext/control/common.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file common.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon common header file 17 | * 18 | * @version 1.0.0 19 | * @author Mathieu Faverge 20 | * @author Cedric Castagnede 21 | * @date 2012-09-15 22 | * 23 | */ 24 | /** 25 | * HICMA facilities of interest to both HICMA core developer 26 | * and also of interest to HICMA community contributor. 27 | */ 28 | #ifndef _HICMA_CHAM_COMMON_H_ 29 | #define _HICMA_CHAM_COMMON_H_ 30 | 31 | 32 | #if defined( _WIN32 ) || defined( _WIN64 ) 33 | #include 34 | #else 35 | #include 36 | #endif 37 | 38 | /** 39 | * Implementation headers 40 | */ 41 | #if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION) 42 | #include 43 | #include 44 | #include 45 | #if defined(CHAMELEON_USE_CUBLAS_V2) 46 | #include 47 | #include 48 | #else 49 | #include 50 | #endif 51 | #endif 52 | 53 | #if defined(CHAMELEON_USE_OPENCL) && !defined(CHAMELEON_SIMULATION) 54 | #include 55 | #endif 56 | 57 | #if defined(HICMA_USE_MPI) 58 | #include 59 | #endif 60 | 61 | /** 62 | * Line to avoid conflict with other linear algebra libraries, because, we 63 | * don't know why but lapacke provide a wrong interface of lapack in fortran 64 | */ 65 | #ifndef LAPACK_NAME 66 | #define LAPACK_NAME(a, b) lapackef77_##a 67 | #endif 68 | 69 | /** 70 | * Chameleon header files 71 | */ 72 | 73 | #include 74 | #include "hicma_global.h" 75 | #include "hicma_auxiliary.h" 76 | #include "hicma_context.h" 77 | #include "hicma_descriptor.h" 78 | #include "hicma_async.h" 79 | 80 | /** 81 | * Global shortcuts 82 | */ 83 | #define HICMA_RANK hicma_rank(hicma) 84 | #define HICMA_SIZE hicma->world_size 85 | #define HICMA_GRPSIZE hicma->group_size 86 | #define HICMA_NB hicma->nb 87 | #define HICMA_IB hicma->ib 88 | #define HICMA_NBNBSIZE hicma->nbnbsize 89 | #define HICMA_IBNBSIZE hicma->ibnbsize 90 | #define HICMA_SCHEDULING hicma->scheduling 91 | #define HICMA_RHBLK hicma->rhblock 92 | #define HICMA_TRANSLATION hicma->translation 93 | #define HICMA_PARALLEL hicma->parallel_enabled 94 | #define HICMA_PROFILING hicma->profiling_enabled 95 | #if defined(HICMA_USE_MPI) 96 | #define HICMA_MPI_RANK hicma->my_mpi_rank 97 | #define HICMA_MPI_SIZE hicma->mpi_comm_size 98 | #endif 99 | 100 | /** 101 | * IPT internal define 102 | */ 103 | #define HicmaIPT_NoDep 0 104 | #define HicmaIPT_Panel 1 105 | #define HicmaIPT_All 2 106 | 107 | /** 108 | * Global array of LAPACK constants 109 | */ 110 | extern char *hicma_lapack_constants[]; 111 | #define hicma_lapack_const(hicma_const) hicma_lapack_constants[hicma_const][0] 112 | 113 | #ifdef __cplusplus 114 | extern "C" { 115 | #endif 116 | 117 | 118 | #include 119 | #include 120 | #include 121 | #include 122 | 123 | /* 124 | void hicma_pdlag2s(HICMA_context_t *hicma); 125 | void hicma_pzlag2c(HICMA_context_t *hicma); 126 | void hicma_pslag2d(HICMA_context_t *hicma); 127 | void hicma_pclag2z(HICMA_context_t *hicma); 128 | */ 129 | 130 | #ifdef __cplusplus 131 | } 132 | #endif 133 | 134 | #endif 135 | -------------------------------------------------------------------------------- /hicma_ext/control/hicma_async.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file async.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon asynchronous management header 17 | * 18 | * @version 1.0.0 19 | * @author Jakub Kurzak 20 | * @author Cedric Castagnede 21 | * @date 2010-11-15 22 | * 23 | */ 24 | #ifndef _HICMA_CHAM_ASYNC_H_ 25 | #define _HICMA_CHAM_ASYNC_H_ 26 | 27 | #include 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | /** 34 | * Internal routines 35 | */ 36 | int hicma_request_fail (HICMA_sequence_t *sequence, HICMA_request_t *request, int error); 37 | int hicma_sequence_create (HICMA_context_t *HICMA, HICMA_sequence_t **sequence); 38 | int hicma_sequence_destroy (HICMA_context_t *HICMA, HICMA_sequence_t *sequence); 39 | int hicma_sequence_wait (HICMA_context_t *HICMA, HICMA_sequence_t *sequence); 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /hicma_ext/control/hicma_auxiliary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file auxiliary.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon auxiliary header 17 | * 18 | * @version 1.0.0 19 | * @author Jakub Kurzak 20 | * @author Piotr Luszczek 21 | * @author Emmanuel Agullo 22 | * @author Cedric Castagnede 23 | * @date 2010-11-15 24 | * 25 | */ 26 | #ifndef _HICMA_CHAM_AUXILIARY_H_ 27 | #define _HICMA_CHAM_AUXILIARY_H_ 28 | 29 | #include 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | /** 36 | * Internal routines 37 | */ 38 | void hicma_warning (const char *func_name, const char* msg_text); 39 | void hicma_error (const char *func_name, const char* msg_text); 40 | void hicma_fatal_error (const char *func_name, const char* msg_text); 41 | int hicma_rank (HICMA_context_t *hicma); 42 | int hicma_tune (HICMA_enum func, int M, int N, int NRHS); 43 | 44 | /** 45 | * API routines 46 | */ 47 | int HICMA_Version (int *ver_major, int *ver_minor, int *ver_micro); 48 | int HICMA_Element_Size (int type); 49 | int HICMA_My_Mpi_Rank (void); 50 | 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /hicma_ext/control/hicma_context.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file context.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon context header 17 | * 18 | * @version 1.0.0 19 | * @author Jakub Kurzak 20 | * @author Cedric Augonnet 21 | * @author Mathieu Faverge 22 | * @author Cedric Castagnede 23 | * @date 2012-09-15 24 | * 25 | */ 26 | #ifndef _HICMA_CHAM_CONTEXT_H_ 27 | #define _HICMA_CHAM_CONTEXT_H_ 28 | 29 | #include 30 | 31 | /** 32 | * Routines to handle threads context 33 | */ 34 | #ifdef __cplusplus 35 | extern "C" { 36 | #endif 37 | 38 | HICMA_context_t* hicma_context_create (); 39 | HICMA_context_t* hicma_context_self (); 40 | int hicma_context_destroy (); 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /hicma_ext/control/hicma_global.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file global.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon global variables header 17 | * 18 | * @version 1.0.0 19 | * @author Jakub Kurzak 20 | * @author Piotr Luszczek 21 | * @author Cedric Castagnede 22 | * @date 2010-11-15 23 | * 24 | */ 25 | /** 26 | * HICMA internals of interest to HICMA core developers, but not necessarily 27 | * of interest to HICMA community contributors. 28 | */ 29 | #ifndef _HICMA_CHAM_GLOBAL_H_ 30 | #define _HICMA_CHAM_GLOBAL_H_ 31 | 32 | #if defined( _WIN32 ) || defined( _WIN64 ) 33 | #include "control/hicmawinthread.h" 34 | #else 35 | #include 36 | #endif 37 | 38 | /** 39 | * Numerical operations 40 | */ 41 | #define HICMA_FUNC_SGEMM 19 42 | #define HICMA_FUNC_DGEMM 20 43 | #define HICMA_FUNC_CGEMM 21 44 | #define HICMA_FUNC_ZGEMM 22 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /hicma_ext/runtime/starpu/control/hicma_runtime_async.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file runtime_async.c 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon StarPU asynchronous routines 17 | * 18 | * @version 1.0.0 19 | * @author Mathieu Faverge 20 | * @author Cedric Castagnede 21 | * @date 2010-11-15 22 | * 23 | */ 24 | #include 25 | #include "runtime/starpu/hicma_starpu.h" 26 | 27 | /** 28 | * Create a sequence 29 | */ 30 | int HICMA_RUNTIME_sequence_create( HICMA_context_t *hicma, 31 | HICMA_sequence_t *sequence ) 32 | { 33 | (void)hicma; 34 | (void)sequence; 35 | return HICMA_SUCCESS; 36 | } 37 | 38 | /** 39 | * Destroy a sequence 40 | */ 41 | int HICMA_RUNTIME_sequence_destroy( HICMA_context_t *hicma, 42 | HICMA_sequence_t *sequence ) 43 | { 44 | (void)hicma; 45 | (void)sequence; 46 | return HICMA_SUCCESS; 47 | } 48 | 49 | /** 50 | * Wait for the completion of a sequence 51 | */ 52 | int HICMA_RUNTIME_sequence_wait( HICMA_context_t *hicma, 53 | HICMA_sequence_t *sequence ) 54 | { 55 | (void)hicma; 56 | (void)sequence; 57 | 58 | if (hicma->progress_enabled) { 59 | HICMA_RUNTIME_progress(hicma); 60 | } 61 | 62 | starpu_task_wait_for_all(); 63 | #if defined(HICMA_USE_MPI) 64 | starpu_mpi_barrier(MPI_COMM_WORLD); 65 | #endif 66 | return HICMA_SUCCESS; 67 | } 68 | 69 | /** 70 | * Terminate a sequence 71 | */ 72 | void HICMA_RUNTIME_sequence_flush( HICMA_context_t *hicma, 73 | HICMA_sequence_t *sequence, 74 | HICMA_request_t *request, 75 | int status ) 76 | { 77 | (void)hicma; 78 | sequence->request = request; 79 | sequence->status = status; 80 | request->status = status; 81 | return; 82 | } 83 | -------------------------------------------------------------------------------- /hicma_ext/runtime/starpu/control/hicma_runtime_context.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file runtime_context.c 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon StarPU context routines 17 | * 18 | * @version 1.0.0 19 | * @author Cedric Augonnet 20 | * @author Mathieu Faverge 21 | * @author Cedric Castagnede 22 | * @date 2010-11-15 23 | * 24 | */ 25 | #include 26 | #include "runtime/starpu/hicma_starpu.h" 27 | 28 | #if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 3)) 29 | /* Defined by StarPU as external function */ 30 | #else 31 | #if ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 2)) 32 | int _starpu_is_initialized(void); 33 | #define starpu_is_initialized() _starpu_is_initialized() 34 | #else 35 | #define starpu_is_initialized() 0 36 | #endif 37 | #endif 38 | 39 | /** 40 | * Create new context 41 | */ 42 | void HICMA_RUNTIME_context_create( HICMA_context_t *hicma ) 43 | { 44 | starpu_conf_t *conf; 45 | 46 | hicma->scheduler = HICMA_RUNTIME_SCHED_STARPU; 47 | 48 | if (! starpu_is_initialized() ) { 49 | hicma->schedopt = (void*) malloc (sizeof(starpu_conf_t)); 50 | conf = hicma->schedopt; 51 | 52 | starpu_conf_init( conf ); 53 | } 54 | else { 55 | hicma->schedopt = NULL; 56 | } 57 | 58 | return; 59 | } 60 | 61 | /** 62 | * Clean the context 63 | */ 64 | void HICMA_RUNTIME_context_destroy( HICMA_context_t *hicma ) 65 | { 66 | /* StarPU was already initialized by an external library */ 67 | if (hicma->schedopt) { 68 | free(hicma->schedopt); 69 | } 70 | return; 71 | } 72 | 73 | /** 74 | * 75 | */ 76 | void HICMA_RUNTIME_enable( HICMA_enum lever ) 77 | { 78 | switch (lever) 79 | { 80 | case HICMA_PROFILING_MODE: 81 | starpu_profiling_status_set(STARPU_PROFILING_ENABLE); 82 | break; 83 | case HICMA_BOUND: 84 | starpu_bound_start(0, 0); 85 | break; 86 | default: 87 | return; 88 | } 89 | return; 90 | } 91 | 92 | /** 93 | * 94 | */ 95 | void HICMA_RUNTIME_disable( HICMA_enum lever ) 96 | { 97 | switch (lever) 98 | { 99 | case HICMA_PROFILING_MODE: 100 | starpu_profiling_status_set(STARPU_PROFILING_DISABLE); 101 | break; 102 | case HICMA_BOUND: 103 | starpu_bound_stop(); 104 | break; 105 | default: 106 | return; 107 | } 108 | return; 109 | } 110 | -------------------------------------------------------------------------------- /hicma_ext/runtime/starpu/control/hicma_runtime_options.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file runtime_options.c 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon StarPU options routines 17 | * 18 | * @version 1.0.0 19 | * @author Cedric Augonnet 20 | * @author Mathieu Faverge 21 | * @author Cedric Castagnede 22 | * @date 2010-11-15 23 | * 24 | */ 25 | #include 26 | #include 27 | #include "runtime/starpu/hicma_starpu.h" 28 | 29 | void HICMA_RUNTIME_options_init( HICMA_option_t *option, HICMA_context_t *hicma, 30 | HICMA_sequence_t *sequence, HICMA_request_t *request ) 31 | { 32 | option->sequence = sequence; 33 | option->request = request; 34 | option->profiling = HICMA_PROFILING == HICMA_TRUE; 35 | option->parallel = HICMA_PARALLEL == HICMA_TRUE; 36 | option->priority = HICMA_PRIORITY_MIN; 37 | option->nb = HICMA_NB; 38 | option->ws_wsize = 0; 39 | option->ws_hsize = 0; 40 | option->ws_worker = NULL; 41 | option->ws_host = NULL; 42 | return; 43 | } 44 | 45 | void HICMA_RUNTIME_options_finalize( HICMA_option_t *option, HICMA_context_t *hicma ) 46 | { 47 | (void)option; 48 | (void)hicma; 49 | return; 50 | } 51 | 52 | int HICMA_RUNTIME_options_ws_alloc( HICMA_option_t *options, size_t worker_size, size_t host_size ) 53 | { 54 | int ret = 0; 55 | if ( worker_size > 0 ) { 56 | options->ws_wsize = worker_size; 57 | starpu_vector_data_register((starpu_data_handle_t*)(&(options->ws_worker)), 58 | -1, (uintptr_t)NULL, 59 | worker_size, sizeof(char)); 60 | } 61 | if ( host_size > 0 ) { 62 | options->ws_hsize = host_size; 63 | ret = HICMA_RUNTIME_starpu_ws_alloc((HICMA_starpu_ws_t**)&(options->ws_host), 64 | host_size, HICMA_CUDA, HICMA_HOST_MEM); 65 | } 66 | return ret; 67 | } 68 | 69 | int HICMA_RUNTIME_options_ws_free( HICMA_option_t *options ) 70 | { 71 | int ret = 0; 72 | if ( options->ws_worker != NULL ) { 73 | starpu_data_unregister_submit((starpu_data_handle_t)(options->ws_worker)); 74 | options->ws_worker = NULL; 75 | } 76 | if ( options->ws_host != NULL ) { 77 | starpu_task_wait_for_all(); 78 | ret = HICMA_RUNTIME_starpu_ws_free( (HICMA_starpu_ws_t*)(options->ws_host) ); 79 | options->ws_host = NULL; 80 | } 81 | return ret; 82 | } 83 | -------------------------------------------------------------------------------- /include/coreblas/hicma_coreblas.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file coreblas.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon CPU kernels main header 17 | * 18 | * @version 1.0.0 19 | * @author Jakub Kurzak 20 | * @author Hatem Ltaief 21 | * @date 2010-11-15 22 | * 23 | */ 24 | #ifndef _CORE_BLAS_H_ 25 | #define _CORE_BLAS_H_ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | /** 33 | * CBLAS requires for scalar arguments to be passed 34 | * by address rather than by value 35 | */ 36 | #ifndef CBLAS_SADDR 37 | #define CBLAS_SADDR( _val_ ) &(_val_) 38 | #endif 39 | #include "cblas.h" 40 | 41 | /** 42 | * HICMA types and constants 43 | */ 44 | #include 45 | #include 46 | #include 47 | #include "hicma_constants.h" 48 | 49 | 50 | /** 51 | * Coreblas Error 52 | */ 53 | #define coreblas_error(k, str) do { \ 54 | fprintf(stderr, "%s: Parameter %d / %s\n", __func__, k, str) ; \ 55 | assert(0); \ 56 | } while(0) 57 | 58 | /** 59 | * CBlas enum 60 | */ 61 | #define CBLAS_TRANSPOSE enum CBLAS_TRANSPOSE 62 | #define CBLAS_UPLO enum CBLAS_UPLO 63 | #define CBLAS_DIAG enum CBLAS_DIAG 64 | #define CBLAS_SIDE enum CBLAS_SIDE 65 | 66 | /** 67 | * LAPACK Constants 68 | */ 69 | BEGIN_C_DECLS 70 | 71 | extern char *hicma_lapack_constants[]; 72 | #define hicma_lapack_const(hicma_const) hicma_lapack_constants[hicma_const][0] 73 | 74 | void HICMA_set_coreblas_gemm3m_enabled( int v ); 75 | int HICMA_get_coreblas_gemm3m_enabled( void ); 76 | 77 | END_C_DECLS 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /include/coreblas/hicma_lapacke_mangling.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file lapacke_mangling.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon lapacke mangling header 17 | * 18 | * @version 1.0.0 19 | * 20 | */ 21 | #ifndef LAPACK_HEADER_INCLUDED 22 | #define LAPACK_HEADER_INCLUDED 23 | 24 | /* Mangling for Fortran global symbols without underscores. */ 25 | #define LAPACK_GLOBAL(name,NAME) name##_ 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /include/hicma_common.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | 6 | #ifndef __HICMA_COMMON__ 7 | #define __HICMA_COMMON__ 8 | /** 9 | * @file hicma_common.h 10 | * 11 | * This header file is used inside the library. 12 | */ 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define PROGRESS(str) \ 22 | if(print_progress){ \ 23 | int myrank = HICMA_My_Mpi_Rank();\ 24 | time(&timer); \ 25 | tm_info = localtime(&timer); \ 26 | strftime(datebuf, 26, "%Y-%m-%d %H:%M:%S",tm_info); \ 27 | fprintf(stderr, "%d:%s\t%d\t%s\t%s\n", myrank, datebuf, __LINE__, __func__, str);\ 28 | fflush(stderr);\ 29 | } 30 | //#undef PROGRESS 31 | //#define PROGRESS(str) 32 | extern struct hicma_context hicma_context; 33 | 34 | void printdescrk(HICMA_desc_t *descZ, int64_t rank); 35 | void printdesc(HICMA_desc_t *descZ); 36 | void _printdescs(HICMA_desc_t *descD,HICMA_desc_t *descU, HICMA_desc_t *descV, HICMA_desc_t *descRk); 37 | void _printdescrk(HICMA_desc_t *descZ, int64_t rank); 38 | 39 | void check_same(HICMA_desc_t *descL, HICMA_desc_t *descR, char diag, char uplo); 40 | void dget_stat(HICMA_enum uplo, double *Ark, size_t m, size_t n, size_t ld, HICMA_stat_t *stat); 41 | void dprint_stat(HICMA_stat_t stat); 42 | void zget_stat(HICMA_enum uplo, double *Ark, size_t m, size_t n, size_t ld, HICMA_stat_t *stat); 43 | void zprint_stat(HICMA_stat_t stat); 44 | int HICMA_Lapack_to_Tile(void *Af77, int LDA, HICMA_desc_t *A); 45 | int HICMA_Tile_to_Lapack(HICMA_desc_t *A, void *Af77, int LDA); 46 | #endif 47 | -------------------------------------------------------------------------------- /include/hicma_config.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file chameleon_config.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon configuration file 17 | * 18 | * @version 1.0.0 19 | * @author Florent Pruvost 20 | * @date 2017-01-06 21 | * 22 | */ 23 | #ifndef CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED 24 | #define CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED 25 | 26 | #define HICMA_CHAM_VERSION_MAJOR 1 27 | #define HICMA_CHAM_VERSION_MINOR 0 28 | #define HICMA_CHAM_VERSION_MICRO 0 29 | 30 | /* Scheduling engine */ 31 | /* #undef CHAMELEON_SCHED_QUARK */ 32 | /* #undef CHAMELEON_SCHED_PARSEC */ 33 | #define CHAMELEON_SCHED_STARPU 34 | 35 | /* Communication engine */ 36 | /* #undef HICMA_USE_MPI */ 37 | /* #undef CHAMELEON_USE_MIGRATE */ 38 | #if !defined(HICMA_USE_MPI) && defined(CHAMELEON_USE_MIGRATE) 39 | #undef CHAMELEON_USE_MIGRATE 40 | #endif 41 | 42 | /* GPU Support */ 43 | /* #undef CHAMELEON_USE_CUDA */ 44 | /* #undef CHAMELEON_USE_CUBLAS */ 45 | /* #undef CHAMELEON_USE_CUBLAS_V2 */ 46 | 47 | /* Simulation */ 48 | /* #undef CHAMELEON_SIMULATION */ 49 | 50 | /* Tracing support */ 51 | /* #undef CHAMELEON_ENABLE_TRACING */ 52 | 53 | /* getopt */ 54 | #define CHAMELEON_HAVE_GETOPT_H 55 | #define CHAMELEON_HAVE_GETOPT_LONG 56 | 57 | #ifdef BEGIN_C_DECLS 58 | #undef BEGIN_C_DECLS 59 | #endif 60 | 61 | #ifdef END_C_DECLS 62 | #undef END_C_DECLS 63 | #endif 64 | 65 | #if defined(c_plusplus) || defined(__cplusplus) 66 | # define BEGIN_C_DECLS extern "C" { 67 | # define END_C_DECLS } 68 | #else 69 | # define BEGIN_C_DECLS /* empty */ 70 | # define END_C_DECLS /* empty */ 71 | #endif 72 | 73 | #endif /* CHAMELEON_CONFIG_H_HAS_BEEN_INCLUDED */ 74 | -------------------------------------------------------------------------------- /include/hicma_init.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | 6 | #include "hicma_struct.h" 7 | static struct hicma_context hicma_context = { 8 | 0, '\0', 0, 0 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 9 | }; 10 | const static struct hicma_context hicma_context_default = { 11 | 0, '\0', 0, 0 , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 12 | }; 13 | #include "flop_util_structs.h" 14 | flop_counter counters[FLOP_NUMTHREADS]; 15 | -------------------------------------------------------------------------------- /include/hicma_kernels.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file hicma_kernels.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon elementary kernels enum 17 | * 18 | * @version 1.0.0 19 | * @author Mathieu Faverge 20 | * @author Cedric Augonnet 21 | * @date 2011-06-01 22 | * 23 | */ 24 | #ifndef _HICMA_CHAM_KERNELS_H_ 25 | #define _HICMA_CHAM_KERNELS_H_ 26 | 27 | /** 28 | * Used to apply operations on specific kernels 29 | */ 30 | typedef enum hicma_kernel_e { 31 | 32 | HICMA_GEMM, 33 | 34 | } HICMA_kernel_t; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /include/hicma_types.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file hicma_types.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon basic datatypes header 17 | * 18 | * @version 1.0.0 19 | * @author Cedric Augonnet 20 | * @author Mathieu Faverge 21 | * @author Cedric Castagnede 22 | * @date 2011-06-01 23 | * 24 | */ 25 | #ifndef _HICMA_CHAM_TYPES_H_ 26 | #define _HICMA_CHAM_TYPES_H_ 27 | 28 | #include "hicma_config.h" 29 | 30 | /** 31 | * System requirements 32 | */ 33 | #include 34 | #if defined( _WIN32 ) 35 | /* This must be included before INPUT is defined below, otherwise we 36 | have a name clash/problem */ 37 | #include 38 | #include 39 | #else /* _WIN32 */ 40 | #include 41 | #endif /* _WIN32 */ 42 | 43 | 44 | /** 45 | * HICMA types 46 | */ 47 | typedef int HICMA_enum; 48 | typedef int HICMA_bool; 49 | typedef long HICMA_index; 50 | typedef long HICMA_size; 51 | 52 | 53 | /** 54 | * HICMA Complex numbers 55 | */ 56 | #define HICMA_HAS_COMPLEX_H 1 57 | 58 | #if defined(_WIN32) 59 | # include 60 | # if defined(__INTEL_COMPILER) 61 | /* Fix name conflict within the cabs prototype (_Complex) that */ 62 | /* conflicts with a C99 keyword. */ 63 | #define _Complex __ConflictingComplex 64 | #include 65 | #undef _Complex 66 | #undef complex 67 | # elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) 68 | #undef HICMA_COMPLEX_CPP 69 | #define HICMA_COMPLEX_CPP 70 | # else 71 | #error "Supported compilers on WIN32 are MSVC and Intel Compiler." 72 | # endif /* __INTEL_COMPILER */ 73 | 74 | # define isnan _isnan 75 | # define isinf !_finite 76 | #endif /* _WIN32 */ 77 | 78 | /* Sun doesn't ship the complex.h header. Sun Studio doesn't have it and older GCC compilers don't have it either. */ 79 | #if defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(sun) || defined(__sun) 80 | #undef HICMA_HAS_COMPLEX_H 81 | #endif /* __SUNPRO_C */ 82 | 83 | #ifndef __cplusplus 84 | #undef HICMA_COMPLEX_CPP 85 | #endif 86 | 87 | #if defined(HICMA_COMPLEX_CPP) 88 | #ifndef LAPACK_COMPLEX_CPP 89 | # define LAPACK_COMPLEX_CPP 90 | # warning "HiCMA_COMPLEX_CPP was defined, but not LAPACK_COMPLEX_CPP. Maybe you want to set both." 91 | #endif 92 | #include // needed for std::complex declaration 93 | #define HICMA_Complex32_t std::complex 94 | #define HICMA_Complex64_t std::complex 95 | #else /* HICMA_COMPLEX_CPP */ 96 | /* not using cplusplus complex type: */ 97 | 98 | #if defined(__STDC_NO_COMPLEX__) 99 | # error "Compiler support for complex number is required." 100 | #endif 101 | 102 | #define HICMA_Complex32_t float _Complex 103 | #define HICMA_Complex64_t double _Complex 104 | 105 | #if HICMA_HAS_COMPLEX_H 106 | # include 107 | #endif 108 | #endif /* HICMA_COMPLEX_CPP */ 109 | 110 | /** 111 | * HICMA Deprecated attribute 112 | */ 113 | #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 114 | #define HICMA_DEPRECATED __attribute__((__deprecated__)) 115 | #else 116 | #define HICMA_DEPRECATED 117 | #endif /* __GNUC__ */ 118 | 119 | BEGIN_C_DECLS 120 | 121 | /** 122 | * Global utilities 123 | */ 124 | static inline int hicma_max( int a, int b ) { 125 | if ( a > b ) return a; else return b; 126 | } 127 | 128 | static inline int hicma_min( int a, int b ) { 129 | if ( a < b ) return a; else return b; 130 | } 131 | 132 | END_C_DECLS 133 | 134 | #endif /* __CHAMELEON_H__ */ 135 | -------------------------------------------------------------------------------- /include/runtime/starpu/hicma_runtime_profiling.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file runtime_profiling.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon StarPU profiling and kernel locality header 17 | * 18 | * @version 1.0.0 19 | * @author Mathieu Faverge 20 | * @author Cedric Castagnede 21 | * @date 2011-06-01 22 | * 23 | */ 24 | #ifndef _PROFILING_H_ 25 | #define _PROFILING_H_ 26 | 27 | #ifdef CHAMELEON_ENABLE_PRUNING_STATS 28 | extern unsigned long RUNTIME_total_tasks; 29 | extern unsigned long RUNTIME_exec_tasks; 30 | extern unsigned long RUNTIME_comm_tasks; 31 | extern unsigned long RUNTIME_changed_tasks; 32 | #endif 33 | 34 | typedef struct measure_s { 35 | double sum; 36 | double sum2; 37 | long n; 38 | } measure_t; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /include/runtime/starpu/hicma_runtime_workspace.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | /** 6 | * 7 | * @file runtime_workspace.h 8 | * 9 | * @copyright 2009-2014 The University of Tennessee and The University of 10 | * Tennessee Research Foundation. All rights reserved. 11 | * @copyright 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, 12 | * Univ. Bordeaux. All rights reserved. 13 | * 14 | *** 15 | * 16 | * @brief Chameleon StarPU workspace header 17 | * 18 | * @version 1.0.0 19 | * @author Cedric Augonnet 20 | * @date 2011-06-01 21 | * 22 | */ 23 | #ifndef _HICMA_CHAM_STARPU_WORKSPACE_H_ 24 | #define _HICMA_CHAM_STARPU_WORKSPACE_H_ 25 | 26 | /* 27 | * Allocate workspace in host memory: CPU for any worker 28 | * or allocate workspace in worker's memory: main memory for cpu workers, 29 | * and embedded memory for CUDA devices. 30 | */ 31 | #define HICMA_HOST_MEM 0 32 | #define HICMA_WORKER_MEM 1 33 | 34 | struct hicma_starpu_ws_s { 35 | size_t size; 36 | int memory_location; 37 | int which_workers; 38 | void *workspaces[STARPU_NMAXWORKERS]; 39 | }; 40 | 41 | typedef struct hicma_starpu_ws_s HICMA_starpu_ws_t; 42 | 43 | /* 44 | * This function creates a workspace on each type of worker in "which_workers" 45 | * (eg. HICMA_CUDA|HICMA_CPU for all CPU and GPU workers). The 46 | * memory_location argument indicates whether this should be a buffer in host 47 | * memory or in worker's memory (HICMA_HOST_MEM or HICMA_WORKER_MEM). This function 48 | * returns 0 upon successful completion. 49 | */ 50 | int HICMA_RUNTIME_starpu_ws_alloc ( HICMA_starpu_ws_t **workspace, size_t size, int which_workers, int memory_location); 51 | int HICMA_RUNTIME_starpu_ws_free ( HICMA_starpu_ws_t *workspace); 52 | void *HICMA_RUNTIME_starpu_ws_getlocal( HICMA_starpu_ws_t *workspace); 53 | 54 | #endif /* _HICMA_CHAM_STARPU_WORKSPACE_H_ */ 55 | -------------------------------------------------------------------------------- /misc/dstat.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file dstat 7 | * 8 | * This file contains the function for getting statistics (avg, min, max) of numbers in a matrix. 9 | * 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 0.1.0 13 | * @author Kadir Akbudak 14 | * @date 2019-11-14 15 | **/ 16 | #include 17 | #include 18 | 19 | void dget_stat(HICMA_enum uplo, double *Ark, size_t m, size_t n, size_t ld, HICMA_stat_t *stat) 20 | { 21 | double final_avgrank; 22 | int final_maxrank = 0; 23 | int minrank = 10000; 24 | int final_totalrank = 0; 25 | double *MAT = Ark; 26 | int64_t i, j, imt, jnt, nelm = 0; 27 | int ntiles = 0; 28 | for(imt=0;imt jnt) 35 | continue; 36 | double *A = MAT+imt+jnt*ld; 37 | int rank = A[0]; 38 | if(rank > final_maxrank){ 39 | final_maxrank = rank; 40 | } 41 | if(rank < minrank){ 42 | minrank = rank; 43 | } 44 | final_totalrank += rank; 45 | ntiles++; 46 | 47 | if(0){ 48 | //if(jntmin = minrank; 55 | stat->max = final_maxrank; 56 | stat->avg = final_avgrank; 57 | } 58 | void dprint_stat(HICMA_stat_t stat) 59 | { 60 | printf("avg:%g min:%d max:%d\n", stat.avg, stat.min, stat.max); 61 | } 62 | -------------------------------------------------------------------------------- /misc/include/auxcompute_z.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file auxcompute_z.h 7 | * 8 | * This file contains the declarations of computational auxiliary functions. 9 | * 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 1.0.0 13 | * @author Kadir Akbudak 14 | * @date 2018-11-08 15 | **/ 16 | #ifndef __AUXCOMPUTE_Z__ 17 | #define __AUXCOMPUTE_Z__ 18 | 19 | #ifdef MKL 20 | #include 21 | //#pragma message("MKL is used") 22 | #else 23 | 24 | #include 25 | 26 | #ifdef LAPACKE_UTILS 27 | #include 28 | #endif 29 | 30 | #include 31 | //#pragma message("MKL is NOT used") 32 | #endif 33 | 34 | #include 35 | #include 36 | 37 | #ifndef min 38 | #define min(a, b) ((a) < (b) ? (a) : (b)) 39 | #endif 40 | 41 | 42 | #include "starsh.h" 43 | 44 | int HICMA_zuncompress( 45 | HICMA_enum uplo, HICMA_desc_t *AUV, HICMA_desc_t *AD, HICMA_desc_t *Ark); 46 | 47 | int HICMA_zuncompress_custom_size(HICMA_enum uplo, 48 | HICMA_desc_t *AUV, HICMA_desc_t *AD, HICMA_desc_t *Ark, 49 | int numrows_matrix, 50 | int numcolumns_matrix, 51 | int numrows_block, 52 | int numcolumns_block 53 | ); 54 | 55 | int HICMA_zdiag_vec2mat( 56 | HICMA_desc_t *vec, HICMA_desc_t *mat); 57 | 58 | void HICMA_znormest(int M, int N, double *A, double *e, double *work); 59 | 60 | void HICMA_zgenerate_problem( 61 | int probtype, //problem type defined in hicma_constants.h 62 | char sym, // symmetricity of problem: 'N' or 'S' 63 | double decay, // decay of singular values. Will be used in HICMA_STARSH_PROB_RND. Set 0 for now. 64 | int _M, // number of rows/columns of matrix 65 | int _nb, // number of rows/columns of a single tile 66 | int _mt, // number of tiles in row dimension 67 | int _nt, // number of tiles in column dimension 68 | HICMA_problem_t *hicma_problem // pointer to hicma struct (starsh format will be used to pass coordinate info to number generation and compression phase) 69 | ); 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /misc/include/auxdescutil.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file auxdescutil.h 7 | * 8 | * This file contains the declarations of auxiliary functions for printing HICMA descriptors.. 9 | * 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 1.0.0 13 | * @author Kadir Akbudak 14 | * @date 2018-11-08 15 | **/ 16 | #ifndef __AUXDESCUTIL__ 17 | #define __AUXDESCUTIL__ 18 | #include 19 | #include 20 | 21 | #define tld(d) (d->mb) 22 | #define tsa(d,i,j) (((j)*(d->mt)+(i))*(d->mb)*(d->nb)) 23 | 24 | void printmat(double * A, int64_t m, int64_t n, int64_t ld, int irs, int ics); 25 | void printmat_format(double * A, int64_t m, int64_t n, int64_t ld, int irs, int ics, int format); 26 | void printdescrk(HICMA_desc_t *descZ, int64_t rank); 27 | void printdesc(HICMA_desc_t *descZ); 28 | void _printmat(double * A, int64_t m, int64_t n, int64_t ld); 29 | void _printdescs(HICMA_desc_t *descD,HICMA_desc_t *descU, HICMA_desc_t *descV, HICMA_desc_t *descRk); 30 | void _printdescrk(HICMA_desc_t *descZ, int64_t rank); 31 | 32 | void check_same(HICMA_desc_t *descL, HICMA_desc_t *descR, char diag, char uplo); 33 | void check_same_array(double *L, double *R, int nelm, int line, char *file); 34 | 35 | void zget_stat(HICMA_enum uplo, double *Ark, size_t m, size_t n, size_t ld, HICMA_stat_t *stat); 36 | void zprint_stat(HICMA_stat_t stat); 37 | #endif 38 | -------------------------------------------------------------------------------- /misc/zstat.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file zstat.c 7 | * 8 | * This file contains the function for getting statistics (avg, min, max) of numbers in a matrix. 9 | * 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 0.1.0 13 | * @author Kadir Akbudak 14 | * @date 2017-11-16 15 | **/ 16 | 17 | #include 18 | #include 19 | 20 | void zget_stat(HICMA_enum uplo, double *Ark, size_t m, size_t n, size_t ld, HICMA_stat_t *stat) 21 | { 22 | double final_avgrank; 23 | int final_maxrank = 0; 24 | int minrank = 10000; 25 | int final_totalrank = 0; 26 | double *MAT = Ark; 27 | int64_t i, j, imt, jnt, nelm = 0; 28 | int ntiles = 0; 29 | for(imt=0;imt jnt) 38 | continue; 39 | double *A = MAT+imt+jnt*ld; 40 | int rank = A[0]; 41 | if(rank > final_maxrank){ 42 | final_maxrank = rank; 43 | } 44 | if(rank < minrank){ 45 | minrank = rank; 46 | } 47 | final_totalrank += rank; 48 | ntiles++; 49 | 50 | if(0){ 51 | //if(jntmin = minrank; 58 | stat->max = final_maxrank; 59 | stat->avg = final_avgrank; 60 | } 61 | void zprint_stat(HICMA_stat_t stat) 62 | { 63 | printf("avg:%g min:%d max:%d\n", stat.avg, stat.min, stat.max); 64 | } 65 | 66 | -------------------------------------------------------------------------------- /python/hodlr/TestTree.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from Tree import Tree 3 | 4 | class TestTree(unittest.TestCase): 5 | superms=[0,1,2,3,4,5,6] 6 | lowersubms=[1,3,5,7,9,11,13] 7 | uppersubms=[2,4,6,8,10,12,14] 8 | def super_sub(self): 9 | for i, s in enumerate(self.superms): 10 | ls = Tree.lower_subm(s) 11 | self.assertEqual(ls, self.lowersubms[i]) 12 | us = Tree.upper_subm(s) 13 | self.assertEqual(us, self.uppersubms[i]) 14 | superml = Tree.super_matrix(ls) 15 | self.assertEqual(superml, s) 16 | supermu = Tree.super_matrix(us) 17 | self.assertEqual(supermu, s) 18 | numtotalrow = 16 # number of rows of matrix 19 | tss = [ 16, 8, 4, 2, 1] 20 | def tile_size(self): 21 | for l in range(0,5): 22 | self.assertEqual(Tree.max_tile_size(l, self.numtotalrow), self.tss[l]) 23 | depths = [0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3] 24 | def depth(self): 25 | for i, n in enumerate(range(len(self.depths))): 26 | self.assertEqual(Tree.depth(i), self.depths[i]) 27 | numrows = [16, 8, 8, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2] 28 | def number_of_rows(self): 29 | for i, n in enumerate(range(len(self.numrows))): 30 | self.assertEqual(Tree.numrow(i, self.numtotalrow), self.numrows[i]) 31 | tiles_at_levels = [[0],[1,2],[3,4,5,6],[7,8,9,10,11,12,13,14]] 32 | def tiles_at_depth(self): 33 | for level in range(0,4): 34 | #print(level, Tree.tiles_at_depth(level)) 35 | tiles = Tree.tiles_at_depth(level) 36 | self.assertEqual(tiles, self.tiles_at_levels[level]) 37 | tiles = Tree.tiles_at_depth_before(3, 11) 38 | self.assertEqual(tiles, [7,8,9,10]) 39 | tiles = Tree.tiles_at_depth_before(3, 14) 40 | self.assertEqual(tiles, [7,8,9,10,11,12,13]) 41 | tiles = Tree.tiles_at_depth_before(3, 15) 42 | self.assertEqual(tiles, [7,8,9,10,11,12,13,14]) 43 | tiles = Tree.tiles_at_depth_before(3, 16) 44 | self.assertEqual(tiles, [7,8,9,10,11,12,13,14]) 45 | def tile_storage_size(self): 46 | tree = Tree(16, 3) 47 | self.assertEqual(tree.stsize(10), 6) 48 | ststarts = [None, 0, 8, 16, 20, 24, 28, 32, 34, 36, 38, 40, 42, 44, 46] 49 | def tile_storage_start_end(self): 50 | tree = Tree(16, 1) 51 | for i in range(1, 15): 52 | self.assertEqual(tree.ststart(i), self.ststarts[i]) 53 | self.assertEqual(tree.ststart(i)+tree.stsize(i), tree.stend(i)) 54 | rowstarts = [None, 8, 0, 4, 0, 12, 8, 2, 0, 6, 4, 10, 8, 14, 12] 55 | def tile_row_start_end(self): 56 | tree = Tree(16, 1) 57 | for i in range(1, 15): 58 | self.assertEqual(tree.rowstart(i), self.rowstarts[i]) 59 | self.assertEqual(tree.rowstart(i)+Tree.numrow(i, 16), tree.rowend(i)) 60 | colstarts = [None, 0, 8, 0, 4, 8, 12, 0, 2, 4, 6, 8, 10, 12, 14] 61 | def tile_col_start_end(self): 62 | tree = Tree(16, 1) 63 | for i in range(1, 15): 64 | self.assertEqual(tree.colstart(i), self.colstarts[i]) 65 | self.assertEqual(tree.colstart(i)+Tree.numcol(i, 16), tree.colend(i)) 66 | if __name__ == '__main__': 67 | test = TestTree() 68 | test.super_sub() 69 | test.tile_size() 70 | test.depth() 71 | test.number_of_rows() 72 | test.tiles_at_depth() 73 | test.tile_storage_size() 74 | test.tile_storage_start_end() 75 | test.tile_row_start_end() 76 | test.tile_col_start_end() 77 | -------------------------------------------------------------------------------- /python/hodlr/Tree.py: -------------------------------------------------------------------------------- 1 | import math 2 | class Tree(object): 3 | # Tree class provides utilities for a full binary tree. 4 | # Warnings: 5 | # 1. Be aware that divisions like /2 should result in natural numbers 6 | # if tree is full, o.w., wrong result is produced 7 | # 8 | elmsize = 1 # size of double float etc 9 | def __init__(self, numtotalrows, stmax, elmsize = 1): 10 | self.numrows = numtotalrows 11 | self.numcols = self.numrows #ASSUMPTION: square tiles 12 | self.stmax = stmax 13 | self.elmsize = elmsize 14 | # static methods 15 | def lower_subm(i): 16 | return 2*i+1 17 | def upper_subm(i): 18 | return 2*i+2 19 | def super_matrix(i): 20 | fr = math.ceil(float(i)/2.0)-1 21 | ir = int(fr) 22 | assert(ir == fr) 23 | return ir 24 | def depth(i): 25 | return math.floor(math.log(i+1, 2)) 26 | def max_tile_size(level, numrows): #ts 27 | fr = float(numrows)/math.pow(2, level) 28 | ir = int(fr) 29 | assert(ir == fr) 30 | return ir 31 | def numrow(i, numrows): #ASSUMPTION: uniform tile size 32 | return Tree.max_tile_size(Tree.depth(i), numrows) 33 | def numcol(i, numcols): 34 | return Tree.numrow(i, numcols) #ASSUMPTION 35 | def tiles_at_depth(d): 36 | start = int(math.pow(2, d)-1) 37 | end = int(math.pow(2, d+1)-2) 38 | return list(range(start, end+1)) 39 | def tiles_at_depth_before(d, i): 40 | start = int(math.pow(2, d)-1) 41 | end = int(math.pow(2, d+1)-2) 42 | if i < end+1: 43 | end = i-1 44 | return list(range(start, end+1)) 45 | # class methods 46 | def stsize(self, i): 47 | return Tree.numrow(i, self.numrows) * self.stmax * self.elmsize 48 | def ststart(self, i): 49 | if i == 0: # tile 0 50 | raise ValueError("Tile 0 is not valid for ststart()") 51 | depth = Tree.depth(i) 52 | # skip previous levels 53 | start = (depth-1) * self.numrows * self.stmax * self.elmsize 54 | tiles = Tree.tiles_at_depth_before(depth, i) 55 | for t in tiles: 56 | start += self.stsize(t) 57 | return start 58 | def stend(self, i): 59 | if i == 0: # tile 0 60 | raise ValueError("Tile 0 is not valid for ststart()") 61 | return self.ststart(i) + self.stsize(i) 62 | def rowstart(self, i): 63 | if i%2 == 0: 64 | before = i - 1 65 | start = 0 66 | else: 67 | before = i 68 | start = Tree.numrow(i+1, self.numrows) 69 | depth = Tree.depth(i) 70 | tiles = Tree.tiles_at_depth_before(depth, before) 71 | for t in tiles: 72 | start += Tree.numrow(t, self.numrows) 73 | return start 74 | def rowend(self, i): 75 | return self.rowstart(i) + Tree.numrow(i, self.numrows) 76 | def colstart(self, i): 77 | depth = Tree.depth(i) 78 | tiles = Tree.tiles_at_depth_before(depth, i) 79 | start = 0 80 | for t in tiles: 81 | start += Tree.numcol(t, self.numcols) 82 | return start 83 | def colend(self, i): 84 | return self.colstart(i) + Tree.numcol(i, self.numcols) 85 | 86 | 87 | -------------------------------------------------------------------------------- /runtime/starpu/codelets/codelet_dgenmat.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * @file codelet_dgenmat.c 8 | * 9 | * HiCMA codelets kernel 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 0.1.0 13 | * @author Rabab Alomairy 14 | * @date 2020-05-20 15 | * @precisions normal z -> c d s 16 | **/ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | extern int store_only_diagonal_tiles; 24 | 25 | DCODELETS_HEADER(genmat) 26 | 27 | void HICMA_TASK_dgenmat( const HICMA_option_t *options, 28 | HICMA_desc_t *A, int lda, int Am, int An, int m, int n) 29 | { 30 | struct starpu_codelet *codelet = &cl_dgenmat; 31 | void (*callback)(void*) = NULL; 32 | 33 | HICMA_BEGIN_ACCESS_DECLARATION; 34 | HICMA_ACCESS_W(A, Am, An); 35 | HICMA_END_ACCESS_DECLARATION; 36 | 37 | // printf("%s:%d: Am:%d An:%d lda:%d bigM:%d m0:%d n0:%d\n ", __FILE__, __LINE__, Am, An, lda, bigM, m0, n0); 38 | 39 | //printf("%s %d: Am:%d An:%d ADm:%d ADn:%d ptr:%p\n", __func__, __LINE__, Am, An, ADm, ADn, ptr); 40 | starpu_insert_task( 41 | starpu_mpi_codelet(codelet), 42 | STARPU_W, RTBLKADDR(A, double, Am, An), 43 | STARPU_VALUE, &Am, sizeof(int), 44 | STARPU_VALUE, &An, sizeof(int), 45 | STARPU_VALUE, &lda, sizeof(int), 46 | STARPU_VALUE, &m, sizeof(int), 47 | STARPU_VALUE, &n, sizeof(int), 48 | STARPU_CALLBACK, callback, 49 | #if defined(CHAMELEON_CODELETS_HAVE_NAME) 50 | STARPU_NAME, "zgenmat", 51 | #endif 52 | 0); 53 | } 54 | 55 | /* cl_dper_cpu_func - Generate a tile for random matrix. */ 56 | 57 | #if !defined(CHAMELEON_SIMULATION) 58 | static void cl_dgenmat_cpu_func(void *descr[], void *cl_arg) 59 | { 60 | int i; 61 | int j; 62 | int m, n; 63 | double *A; 64 | int lda; 65 | 66 | A = (double *)STARPU_MATRIX_GET_PTR(descr[0]); 67 | 68 | starpu_codelet_unpack_args(cl_arg, &i, &j, &lda, &m, &n); 69 | 70 | int shape[2]; 71 | int oversample = 10; 72 | double *work; 73 | int *iwork; 74 | STARSH_blrf* blrf = HICMA_get_starsh_format(); 75 | STARSH_cluster *RC = blrf->row_cluster, *CC = RC; 76 | void *RD = RC->data, *CD = RD; 77 | 78 | blrf->problem->kernel(m, n, RC->pivot+RC->start[i], CC->pivot+CC->start[j], 79 | RD, CD, A, lda); 80 | } 81 | #endif /* !defined(CHAMELEON_SIMULATION) */ 82 | 83 | /* 84 | * Codelet definition 85 | */ 86 | CODELETS_CPU(dgenmat, 1, cl_dgenmat_cpu_func) 87 | -------------------------------------------------------------------------------- /runtime/starpu/codelets/codelet_dgenrhs.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * @file codelet_dgenrhs.c 8 | * 9 | * HiCMA codelets kernel 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 0.1.0 13 | * @author Rabab Alomairy 14 | * @date 2020-05-20 15 | * @precisions normal z -> c d s 16 | **/ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | DCODELETS_HEADER(genrhs) 23 | 24 | void HICMA_TASK_dgenrhs( const HICMA_option_t *options, 25 | int m, int n, 26 | const HICMA_desc_t *A, int Am, int An, 27 | int lda, 28 | int bigM, int m0, int n0 29 | ) 30 | { 31 | struct starpu_codelet *codelet = &cl_dgenrhs; 32 | 33 | void (*callback)(void*) = NULL; 34 | int nb = A->nb; 35 | 36 | HICMA_BEGIN_ACCESS_DECLARATION; 37 | HICMA_ACCESS_W(A, Am, An); 38 | HICMA_END_ACCESS_DECLARATION; 39 | 40 | 41 | starpu_insert_task( 42 | starpu_mpi_codelet(codelet), 43 | STARPU_VALUE, &m, sizeof(int), 44 | STARPU_VALUE, &n, sizeof(int), 45 | STARPU_W, RTBLKADDR(A, double, Am, An), 46 | STARPU_VALUE, &lda, sizeof(int), 47 | STARPU_VALUE, &bigM, sizeof(int), 48 | STARPU_VALUE, &m0, sizeof(int), 49 | STARPU_VALUE, &n0, sizeof(int), 50 | STARPU_PRIORITY, options->priority, 51 | STARPU_CALLBACK, callback, 52 | #if defined(CHAMELEON_CODELETS_HAVE_NAME) 53 | STARPU_NAME, "zgenrhs", 54 | #endif 55 | 0); 56 | } 57 | 58 | /* cl_dgenrhs_cpu_func - Generate a tile for random matrix. */ 59 | 60 | #if !defined(CHAMELEON_SIMULATION) 61 | static void cl_dgenrhs_cpu_func(void *descr[], void *cl_arg) 62 | { 63 | int m; 64 | int n; 65 | double *A, *mesh; 66 | int lda; 67 | int bigM; 68 | int m0; 69 | int n0; 70 | A = (double *)STARPU_MATRIX_GET_PTR(descr[0]); 71 | 72 | starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0); 73 | 74 | 75 | starsh_generate_3d_virus_rhs( m, A); 76 | } 77 | #endif /* !defined(CHAMELEON_SIMULATION) */ 78 | 79 | /* 80 | * Codelet definition 81 | */ 82 | CODELETS_CPU(dgenrhs, 1, cl_dgenrhs_cpu_func) 83 | -------------------------------------------------------------------------------- /scripts/allocate-interactive-node-isambard.sh: -------------------------------------------------------------------------------- 1 | qsub -I -q arm-dev 2 | -------------------------------------------------------------------------------- /scripts/build-nompi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -le 2 | 3 | 4 | 5 | # BASH verbose mode 6 | set -x 7 | currdir=$PWD 8 | 9 | echo "Current dir is $PWD. The files in the current dir are here:"; ls -al 10 | if [ -z $reponame ]; then reponame=hicma-dev; fi 11 | echo "Reponame is: $reponame" 12 | 13 | # Check if we are already in hicma repo dir or not. 14 | if git remote -v | grep -q "https://github.com/ecrc/$reponame" 15 | then 16 | # we are, lets go to the top dir (where .git is) 17 | until test -d $PWD/.git ; 18 | do 19 | cd .. 20 | done; 21 | else 22 | #we are not, we need to clone the repo 23 | git clone https://github.com/ecrc/$reponame.git 24 | cd $reponame 25 | fi 26 | module purge 27 | if [ "$HOSTNAME" == "thana" ]; then 28 | . ./scripts/power8.modules 29 | elif [ "$HOSTNAME" == "almaha.kaust.edu.sa" ]; then 30 | echo "Loading modules for ub18" 31 | . ./scripts/modules-ecrc-ub18.sh 32 | else 33 | echo "Loading modules" 34 | . ./scripts/modules-ecrc.sh 35 | fi 36 | module list 37 | 38 | # Update submodules 39 | HICMADEVDIR=$PWD 40 | git submodule update --init --recursive 41 | 42 | 43 | # STARS-H 44 | cd $HICMADEVDIR 45 | cd stars-h 46 | rm -rf build 47 | mkdir -p build/installdir 48 | cd build 49 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir -DMPI=OFF -DOPENMP=OFF -DSTARPU=OFF -DGSL=OFF 50 | make clean 51 | make -j 52 | make install 53 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 54 | 55 | # STARS-H-CORE 56 | #cd $HICMADEVDIR 57 | #cd stars-h-core 58 | #rm -rf build 59 | #mkdir -p build/installdir 60 | #cd build 61 | #cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir 62 | #make -j install 63 | #export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 64 | 65 | # CHAMELEON 66 | cd $HICMADEVDIR 67 | cd chameleon 68 | rm -rf build 69 | mkdir -p build/installdir 70 | cd build 71 | cmake .. -DCMAKE_BUILD_TYPE=Debug -DHICMA_USE_MPI=OFF -DCMAKE_INSTALL_PREFIX=$PWD/installdir 72 | make clean 73 | make -j 74 | make install 75 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 76 | 77 | # HCORE 78 | cd $HICMADEVDIR 79 | cd hcore 80 | rm -rf build 81 | mkdir -p build/installdir 82 | cd build 83 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir 84 | make clean 85 | make -j 86 | make install 87 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 88 | 89 | # HICMA 90 | cd $HICMADEVDIR 91 | rm -rf build 92 | mkdir -p build/installdir 93 | cd build 94 | cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$PWD/installdir -DHICMA_USE_MPI=OFF 95 | make clean 96 | make -j 97 | make install 98 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 99 | 100 | cd $currdir 101 | set +x 102 | -------------------------------------------------------------------------------- /scripts/build-starsh-cpp-intel-parsec.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -le 2 | set -x 3 | currdir=$PWD 4 | function module_load { 5 | module=$1 6 | if module list 2>&1 | grep $module; then 7 | echo "$module is loaded"; 8 | else 9 | module load $module 10 | fi 11 | } 12 | function download { 13 | reponame=$1 14 | # Check if we are already in repo dir or not. 15 | if git -C $PWD remote -v | grep -q "https://github.com/ecrc/$reponame" 16 | then 17 | echo "We are in $reponame" 18 | # we are, lets go to the top dir (where .git is) 19 | until test -d $PWD/.git ; 20 | do 21 | cd .. 22 | done; 23 | else 24 | echo "We are NOT in $reponame" 25 | if [ ! -d $reponame ]; then 26 | #we are not, we need to clone the repo 27 | git clone git@github.com:ecrc/$reponame.git 28 | if [ ! -d $reponame ]; then 29 | echo "Failed to clone $reponame" 30 | return 31 | fi 32 | fi 33 | cd $reponame 34 | fi 35 | } 36 | 37 | 38 | install_starsh_core=1 39 | install_starsh=1 40 | 41 | module_load cmake/3.11.1 42 | module_load intel/2018 43 | 44 | if [ $install_starsh_core -eq 1 ]; then 45 | reponame=stars-h-core-dev 46 | download $reponame 47 | git submodule update --init 48 | if [ -d build-intel ]; then 49 | rm -rf build-intel 50 | fi 51 | mkdir build-intel 52 | cd build-intel 53 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/install 54 | make -j install 55 | export LD_LIBRARY_PATH=/opt/share/intel/2018/compilers_and_libraries/linux/lib/intel64/:$LD_LIBRARY_PATH 56 | #/home/akbudak/stars-h-core-dev/build-intel/tests/applications/particles 57 | if [ -d $PWD/install ]; then 58 | export PKG_CONFIG_PATH=$HOME/stars-h-core-dev/build-intel/install/lib/pkgconfig/:$PKG_CONFIG_PATH 59 | fi 60 | cd $currdir 61 | fi 62 | 63 | if [ $install_starsh -eq 1 ]; then 64 | module_load hwloc/1.11.8-intel-2018 65 | module_load plasma/2.8.0-intel-2018-mkl 66 | module_load parsec/master-intel-2018-mkl-intelmpi-plasma-2.8.0 67 | reponame=stars-h-dev 68 | download $reponame 69 | git submodule update --init 70 | git checkout muxas/cpp 71 | if [ -d build-intel ]; then 72 | rm -rf build-intel 73 | fi 74 | mkdir build-intel 75 | cd build-intel 76 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/install -DPARSEC_daguepp_BIN_DIR=/opt/ecrc/parsec/master-intel-2018-mkl-intelmpi-plasma-2.8.0/ub16/bin -DPARSEC_SRC_DIR=$HOME/parsec 77 | make -j install 78 | if [ -d $PWD/install ]; then 79 | export PKG_CONFIG_PATH=$HOME/stars-h-dev/build-intel/install/lib/pkgconfig/:$PKG_CONFIG_PATH 80 | fi 81 | export MKL_NUM_THREADS=1 #starsh is linked to parallel mkl 82 | cd $currdir 83 | fi 84 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -le 2 | 3 | 4 | 5 | # BASH verbose mode 6 | #set -x 7 | currdir=$PWD 8 | 9 | echo "Current dir is $PWD. The files in the current dir are here:"; ls -al 10 | if [ -z $reponame ]; then reponame=hicma-dev; fi 11 | echo "Reponame is: $reponame" 12 | 13 | # Check if we are already in hicma repo dir or not. 14 | if git remote -v | grep -q "https://github.com/ecrc/$reponame" 15 | then 16 | # we are, lets go to the top dir (where .git is) 17 | until test -d $PWD/.git ; 18 | do 19 | cd .. 20 | done; 21 | else 22 | #we are not, we need to clone the repo 23 | git clone https://github.com/ecrc/$reponame.git 24 | cd $reponame 25 | fi 26 | module purge 27 | if [ "$HOSTNAME" == "thana" ]; then 28 | . ./scripts/power8.modules 29 | #elif [ "$HOSTNAME" == "almaha.kaust.edu.sa" ]; then 30 | # echo "Loading modules for ub18" 31 | # . ./scripts/modules-ecrc-ub18-mpi.sh 32 | else 33 | echo "Loading modules" 34 | . ./scripts/modules-ecrc-ub18-mpi.sh 35 | # . ./scripts/modules-ecrc.sh 36 | # . ./scripts/modules-ecrc-mpi.sh 37 | fi 38 | module list 39 | 40 | # Update submodules 41 | HICMADEVDIR=$PWD 42 | git submodule update --init --recursive 43 | 44 | ## enable/disable compilation of libraries 45 | starsh=1 46 | chameleon=1 47 | hcore=1 48 | hicma=1 49 | 50 | if [ $starsh -eq 1 ]; then 51 | # STARS-H 52 | cd $HICMADEVDIR 53 | cd stars-h 54 | rm -rf build 55 | mkdir -p build/installdir 56 | cd build 57 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir -DMPI=OFF -DOPENMP=OFF -DSTARPU=OFF -DGSL=OFF 58 | make clean 59 | make -j 60 | make install 61 | fi 62 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 63 | 64 | # STARS-H-CORE 65 | #cd $HICMADEVDIR 66 | #cd stars-h-core 67 | #rm -rf build 68 | #mkdir -p build/installdir 69 | #cd build 70 | #cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir 71 | #make -j install 72 | #export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 73 | 74 | if [ $chameleon -eq 1 ]; then 75 | # CHAMELEON 76 | cd $HICMADEVDIR 77 | cd chameleon 78 | rm -rf build 79 | mkdir -p build/installdir 80 | cd build 81 | cmake .. -DCMAKE_BUILD_TYPE=Debug -DHICMA_USE_MPI=ON -DCHAMELEON_USE_CUDA=OFF -DCHAMELEON_ENABLE_CUDA=OFF -DCMAKE_INSTALL_PREFIX=$PWD/installdir 82 | make clean 83 | make -j 84 | make install 85 | fi 86 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 87 | 88 | 89 | if [ $hcore -eq 1 ]; then 90 | # HCORE 91 | cd $HICMADEVDIR 92 | cd hcore 93 | rm -rf build 94 | mkdir -p build/installdir 95 | cd build 96 | cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir 97 | make clean 98 | make -j 99 | make install 100 | fi 101 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 102 | 103 | if [ $hicma -eq 1 ]; then 104 | # HICMA 105 | cd $HICMADEVDIR 106 | rm -rf build 107 | mkdir -p build/installdir 108 | cd build 109 | cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$PWD/installdir -DHICMA_USE_MPI=ON 110 | make clean 111 | make -j 112 | make install 113 | fi 114 | export PKG_CONFIG_PATH=$PWD/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 115 | 116 | cd $currdir 117 | set +x 118 | -------------------------------------------------------------------------------- /scripts/intel.modules: -------------------------------------------------------------------------------- 1 | module load mkl/2018-initial 2 | module load gcc/5.5.0 3 | module load cmake/3.9.6 4 | module load hwloc/1.11.8-gcc-5.5.0 5 | module load openmpi/3.0.0-gcc-5.5.0 6 | #module load starpu/1.2.3-gcc-5.5.0-mkl-openmpi-3.0.0 7 | module load starpu/1.2.4-gcc-5.5.0-mkl-openmpi-3.0.0 8 | module load gsl/2.4-gcc-5.5.0 9 | 10 | -------------------------------------------------------------------------------- /scripts/modules-amd-rome.sh: -------------------------------------------------------------------------------- 1 | module load mkl 2 | module load gcc 3 | export MKL_DEBUG_CPU_TYPE=5 4 | -------------------------------------------------------------------------------- /scripts/modules-cs.sh: -------------------------------------------------------------------------------- 1 | module load gcc/6.4.0 2 | module load intel/2019 3 | #module load gsl/2.4/gnu-6.4.0 4 | module load cmake/3.13.4/gnu-6.4.0 5 | #export PKG_CONFIG_PATH=/ibex/scratch/omairyrm/sourcefiles-cs/hwloc-2.0.2/install/lib/pkgconfig:$PKG_CONFIG_PATH 6 | #export PKG_CONFIG_PATH=/ibex/scratch/omairyrm/sourcefiles-cs/starpu-1.2.6/install/lib/pkgconfig:$PKG_CONFIG_PATH 7 | #export LD_LIBRARY_PATH=/ibex/scratch/omairyrm/sourcefiles-cs/hwloc-2.0.2/install/lib:$LD_LIBRARY_PATH 8 | #export LD_LIBRARY_PATH=/ibex/scratch/omairyrm/sourcefiles-cs/starpu-1.2.6/install/lib:$LD_LIBRARY_PATH 9 | -------------------------------------------------------------------------------- /scripts/modules-ecrc-mpi.sh: -------------------------------------------------------------------------------- 1 | module load openmpi/4.1.0-gcc-10.2.0 2 | -------------------------------------------------------------------------------- /scripts/modules-ecrc-ub18-mpi.sh: -------------------------------------------------------------------------------- 1 | #module load mkl/2018-initial 2 | for m in ecrc-extras mkl/2020.0.166 gcc/10.2.0 cmake/3.19.2 hwloc/2.4.0-gcc-10.2.0 openmpi/4.1.0-gcc-10.2.0 starpu/1.3.7-gcc-10.2.0-mkl-openmpi-4.1.0 ; do 3 | module load $m 4 | done 5 | #starpu/1.3.7-gcc-10.2.0-mkl-openmpi-4.1.0 6 | 7 | -------------------------------------------------------------------------------- /scripts/modules-ecrc-ub18.sh: -------------------------------------------------------------------------------- 1 | #module load mkl/2018-initial 2 | for m in ecrc-extras mkl/2020.0.166 gcc/10.2.0 cmake/3.19.2 hwloc/2.4.0-nocuda-gcc-10.2.0 starpu/1.2.10-gcc-10.2.0-mkl-openmpi-4.1.0 ; do 3 | module load $m 4 | done 5 | #starpu/1.3.7-gcc-10.2.0-mkl-openmpi-4.1.0 6 | 7 | -------------------------------------------------------------------------------- /scripts/modules-ecrc.sh: -------------------------------------------------------------------------------- 1 | #module load mkl/2018-initial 2 | module load ecrc-extras 3 | module load mkl/2020.0.166 4 | module load gcc/10.2.0 5 | module load cmake/3.19.2 6 | module load hwloc/2.4.0-gcc-10.2.0 7 | #module load starpu/1.2.3-gcc-5.5.0-mkl-openmpi-3.0.0 8 | #module load starpu/1.2.4-gcc-5.5.0-mkl-openmpi-3.0.0 9 | module load starpu/1.3.7-gcc-10.2.0-mkl-openmpi-4.1.0 10 | #module load gsl/2.4-gcc-5.5.0 11 | 12 | 13 | -------------------------------------------------------------------------------- /scripts/modules-isambard-allinea.sh: -------------------------------------------------------------------------------- 1 | module swap PrgEnv-cray/6.0.5 PrgEnv-allinea 2 | module load cdt/19.08 3 | #export ARMPL_DIR=/opt/allinea/19.0.0/opt/arm/armpl-19.0.0_ThunderX2CN99_SUSE-12_arm-hpc-compiler_19.0_aarch64-linux 4 | #export LD_LIBRARY_PATH=/opt/allinea/19.0.0/opt/arm/armpl-19.0.0_ThunderX2CN99_SUSE-12_arm-hpc-compiler_19.0_aarch64-linux/lib:$LD_LIBRARY_PATH 5 | #export PKG_CONFIG_PATH=/lustre/home/ri-kakbudak/hicma-dev/stars-h/build/installdir/lib/pkgconfig:$PKG_CONFIG_PATH 6 | ARMPLLIBS="-larmpl" 7 | ARMPLROOT="/opt/allinea/19.2.0.0/opt/arm/armpl-19.2.0_ThunderX2CN99_SUSE-12_arm-hpc-compiler_19.2_aarch64-linux" 8 | export LD_LIBRARY_PATH="/opt/allinea/19.2.0.0/opt/arm/armpl-19.2.0_ThunderX2CN99_SUSE-12_arm-hpc-compiler_19.2_aarch64-linux/lib":$LD_LIBRARY_PATH 9 | export LD_LIBRARY_PATH=/home/ri-ralomairy/sourcefiles/lapack-3.9.0/build/installdir:$LD_LIBRARY_PATH 10 | export PKG_CONFIG_PATH=/home/ri-ralomairy/sourcefiles/lapack-3.9.0/build/installdir/pkgconfig:$PKG_CONFIG_PATH 11 | -------------------------------------------------------------------------------- /scripts/modules-shaheen-gcc-mkl-starpu.sh: -------------------------------------------------------------------------------- 1 | #conda deactivate 2 | module load cmake/3.13.4 3 | export LC_ALL=en_US.UTF-8 4 | module load intel/19.0.5.281 5 | module load cray-mpich/7.7.11 6 | module load python 7 | module switch PrgEnv-cray PrgEnv-gnu 8 | module unload cray-libsci 9 | module list -l 10 | export CRAYPE_LINK_TYPE=dynamic 11 | 12 | 13 | export PKG_CONFIG_PATH=/project/k1205/omairyrm/starpu-1.2.6/install/lib/pkgconfig:$PKG_CONFIG_PATH 14 | export LD_LIBRARY_PATH=/project/k1205/omairyrm/starpu-1.2.6/install/lib:$LD_LIBRARY_PATH 15 | #hwloc 16 | export HWLOC_SRC_DIR=/project/k1205/omairyrm/hwloc/hwloc-1.11.10 17 | export HWLOC_ROOT=${HWLOC_SRC_DIR}/install 18 | export PKG_CONFIG_PATH=${HWLOC_ROOT}/lib/pkgconfig:$PKG_CONFIG_PATH 19 | export LD_LIBRARY_PATH=${HWLOC_ROOT}/lib:$LD_LIBRARY_PATH 20 | 21 | 22 | -------------------------------------------------------------------------------- /scripts/modules-xc40.sh: -------------------------------------------------------------------------------- 1 | export LC_ALL=en_US.UTF-8 2 | export CRAYPE_LINK_TYPE=dynamic 3 | #module switch PrgEnv-cray/5.2.82 PrgEnv-gnu 4 | module switch PrgEnv-cray PrgEnv-gnu 5 | module load hwloc/1.11.9 6 | 7 | #module load starpu/1.2.3 8 | 9 | 10 | #module unload PrgEnv-cray/5.2.82 11 | #module unload PrgEnv-intel 12 | #module load PrgEnv-gnu 13 | module unload intel 14 | #module load intel/16.3.3.210 15 | module load intel 16 | #module load gsl 17 | #module load python #for compiling starsh 18 | 19 | -------------------------------------------------------------------------------- /scripts/power8.modules: -------------------------------------------------------------------------------- 1 | 2 | module load ecrc-extras 3 | module load gcc/5.5.0 4 | module load cmake/3.9.6 5 | module load openblas/0.2.20-gcc-5.5.0-singlethread 6 | module load hwloc/1.11.8-gcc-5.5.0 7 | module load openmpi/3.0.0-gcc-5.5.0 8 | #module load starpu/1.2.3-gcc-5.5.0-openblas-openmpi-3.0.0 9 | module load starpu/1.2.4-gcc-5.5.0-openblas-openmpi-3.0.0 10 | module load gsl/2.4-gcc-5.5.0 11 | -------------------------------------------------------------------------------- /scripts/test-trsm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -le 2 | module purge 3 | 4 | if [ "$HOSTNAME" == "thana" ]; then 5 | . ./scripts/power8.modules 6 | else 7 | . ./scripts/intel.modules 8 | fi 9 | 10 | export HICMA_TESTING_VERBOSE=1 11 | ./build/testing/testing_zposv 1 0 posv 1000 1000 200 1000 250 1e-7 0 250 250 1 1 1 12 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -le 2 | module purge 3 | if [ "$HOSTNAME" == "thana" ]; then 4 | . ./scripts/power8.modules 5 | else 6 | . ./scripts/modules-ecrc.sh 7 | . ./scripts/modules-ecrc-mpi.sh 8 | fi 9 | 10 | module list 11 | 12 | set -x 13 | 14 | # TEST 15 | PRE="" 16 | factor=1 17 | debug=0 18 | mpi=0 19 | 20 | 21 | if [ $# -ne 5 ]; then 22 | echo "Usage: factor[1,2,4...] debug[d,-] mpi[m,-] nthreads problem" 23 | ii=0 24 | for i in $*; do 25 | echo "$ii: |$i|" 26 | ii=$((ii+1)) 27 | done 28 | exit -1 29 | fi 30 | 31 | factor=$1 32 | debug=$2 33 | mpi=$3 34 | nthreads=$4 35 | problem=$5 36 | 37 | nmpi=8 38 | if [ "$mpi" != "-" ]; then 39 | nmpi=$mpi 40 | if [ $debug == "d" ]; then 41 | CMD="mpirun -n $nmpi xterm -hold -e gdb -ex run --args " 42 | else 43 | CMD="mpirun -n $nmpi " 44 | fi 45 | else 46 | if [ $debug == "d" ]; then 47 | CMD="gdb -ex run --args" 48 | fi 49 | fi 50 | echo $CMD 51 | 52 | export STARPU_SILENT=1 53 | irange="3 3"; nta=$nthreads;_b=400; acc=1e-8; check="--check" 54 | for nt in $nta;do 55 | n=$((m/factor)) 56 | maxrank=$((nb/factor)) 57 | #echo BASH-MAXRANK: $maxrank 58 | #echo BASH-DEBUG: $debug 59 | #_b=1600 60 | #_b=324 61 | nb=$_b; 62 | for _i in `seq $irange`;do 63 | _is=$((_i*_i)) 64 | m=$((_is*_b)); 65 | n=$((_is*_b/factor)); 66 | maxrank=$((_b/factor)) 67 | run="./build/timing/time_zpotrf_tile \ 68 | --m=$m \ 69 | --n_range=$n:$n \ 70 | --k=$m \ 71 | --mb=$nb \ 72 | --nb=$maxrank \ 73 | --nowarmup \ 74 | --threads=$nt \ 75 | --rk=0 \ 76 | --acc=$acc \ 77 | $check \ 78 | $problem \ 79 | --starshwavek=40 \ 80 | --starshdecay=2 \ 81 | --starshmaxrank=$maxrank" 82 | echo "Executing: $CMD $run" 83 | $CMD $run 84 | done 85 | done 86 | -------------------------------------------------------------------------------- /testing/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | set( TESTINGS_SRC 3 | testing_dposv.c 4 | testing_dtrsmd.c 5 | ) 6 | 7 | 8 | link_directories(${STARSH_LIBRARY_DIRS}) 9 | link_directories(${STARPU_LIBRARY_DIRS}) 10 | link_directories(${STARPU_SHM_STATIC_LIBRARY_DIRS}) 11 | #link_directories(${CMAKE_SOURCE_DIR}/chameleon) 12 | 13 | #message(${CHAMELEON_LIBRARY_DIRS}) 14 | #link_directories(${HICMA_LIBRARY_DIRS}) 15 | set(TESTING_AUX_HDRS 16 | ../timing/timing.h 17 | ../timing/timing_auxiliary.h 18 | ../timing/timing_auxiliary.c 19 | testing_dauxiliary.c 20 | testing_dauxiliary.h 21 | ) 22 | 23 | set(libs_for_testings) 24 | list(APPEND libs_for_testings hicma) 25 | #list(APPEND libs_for_testings hicma chameleon) 26 | 27 | add_custom_target(testing_include ALL SOURCES ${TESTING_AUX_HDRS}) 28 | include_directories( ${CMAKE_SOURCE_DIR}/misc/include ) 29 | #include_directories( ${CMAKE_SOURCE_DIR}/chameleon/ ) 30 | include_directories(BEFORE ${CMAKE_SOURCE_DIR}/timing ) 31 | include_directories(${CMAKE_SOURCE_DIR}/include ) 32 | foreach(_timing ${TESTINGS_SRC}) 33 | get_filename_component(_name_exe ${_timing} NAME_WE) 34 | add_executable(${_name_exe} ${_timing} ${TESTING_AUX_HDRS}) 35 | add_dependencies(${_name_exe} testing_include) 36 | # set_property(TARGET ${_name_exe} PROPERTY LINKER_LANGUAGE Fortran) 37 | target_link_libraries(${_name_exe} ${libs_for_testings}) 38 | target_link_libraries(${_name_exe} 39 | hicma 40 | ${HICMA_DEP} 41 | ${STARSH_LIBRARIES_DEP} 42 | # ${CHAMELEON_LIBRARIES_DEP} 43 | ${STARPU_LIBRARIES_DEP} 44 | -lgfortran #THIS IS NOT GOOD 45 | ) 46 | install(TARGETS ${_name_exe} 47 | DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/hicma/testing) 48 | 49 | # Test 50 | if( _name_exe MATCHES "dposv") 51 | add_test( NAME ${_name_exe} 52 | COMMAND ${_name_exe} 1 0 posv 1000 1000 200 1000 250 1e-7 0 250 250 1 1 1) 53 | set_tests_properties( ${_name_exe} PROPERTIES ENVIRONMENT "HiCMA_TESTING_VERBOSE=1" ) 54 | set_tests_properties( ${_name_exe} PROPERTIES LABELS "testing" ) 55 | endif() 56 | 57 | endforeach() 58 | 59 | #set(examples_files 60 | #"electrodynamics.c" 61 | #) 62 | #foreach(example_src ${examples_files}) 63 | #get_filename_component(_name_exe ${example_src} NAME_WE) 64 | #add_executable(${_name_exe} ${example_src}) 65 | #target_link_libraries(${_name_exe} ${STARSHCORE_LIBRARIES_DEP} starsh_core stdc++ 66 | #${CBLAS_LIBRARIES} ${LAPACKE_LIBRARIES} 67 | #) 68 | 69 | #endforeach() 70 | 71 | -------------------------------------------------------------------------------- /testing/testing_dauxiliary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | */ 5 | 6 | /** 7 | * 8 | * @copyright (c) 2009-2014 The University of Tennessee and The University 9 | * of Tennessee Research Foundation. 10 | * All rights reserved. 11 | * @copyright (c) 2012-2016 Inria. All rights reserved. 12 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 13 | * 14 | **/ 15 | 16 | /** 17 | * 18 | * @file testing_zauxiliary.h 19 | * 20 | * HICMA testing routines 21 | * HICMA is a software package provided by Univ. of Tennessee, 22 | * Univ. of California Berkeley and Univ. of Colorado Denver 23 | * 24 | * @version 0.9.0 25 | * @author Mathieu Faverge 26 | * @author Cédric Castagnède 27 | * @date 2018-11-08 28 | * @precisions normal z -> c d s 29 | * 30 | **/ 31 | #ifndef TESTING_ZAUXILIARY_H 32 | #define TESTING_ZAUXILIARY_H 33 | 34 | //#include "testing.h" 35 | 36 | #define USAGE(name, args, details) \ 37 | printf(" Proper Usage is : ./ztesting ncores ngpus " name " " args " with\n" \ 38 | " - ncores : number of cores \n" \ 39 | " - ngpus : number of GPUs\n" \ 40 | " - name : name of function to test\n" \ 41 | details); 42 | 43 | #ifdef WIN32 44 | #include 45 | #define isnan _isnan 46 | #endif 47 | 48 | #ifndef max 49 | #define max(a, b) ((a) > (b) ? (a) : (b)) 50 | #endif 51 | #ifndef min 52 | #define min(a, b) ((a) < (b) ? (a) : (b)) 53 | #endif 54 | 55 | extern int IONE; 56 | extern int ISEED[4]; 57 | 58 | extern int format[6]; 59 | extern int trans[3]; 60 | extern int uplo[2]; 61 | extern int side[2]; 62 | extern int diag[2]; 63 | extern int itype[3]; 64 | extern int storev[2]; 65 | extern int norm[4]; 66 | 67 | extern char *formatstr[6]; 68 | extern char *transstr[3]; 69 | extern char *uplostr[2]; 70 | extern char *sidestr[2]; 71 | extern char *diagstr[2]; 72 | extern char *itypestr[3]; 73 | extern char *storevstr[2]; 74 | extern char *normstr[4]; 75 | 76 | extern int (*formatmap[6])(int, int, int, int, int, int); 77 | 78 | int map_CM (int m, int n, int mb, int nb, int i, int j); 79 | int map_CCRB(int m, int n, int mb, int nb, int i, int j); 80 | int map_CRRB(int m, int n, int mb, int nb, int i, int j); 81 | int map_RCRB(int m, int n, int mb, int nb, int i, int j); 82 | int map_RRRB(int m, int n, int mb, int nb, int i, int j); 83 | int map_RM (int m, int n, int mb, int nb, int i, int j); 84 | 85 | 86 | int testing_dposv(int argc, char **argv); 87 | int testing_dtrsmd(int argc, char **argv); 88 | 89 | #endif /* TESTINGS_H */ 90 | -------------------------------------------------------------------------------- /timing/timing_auxiliary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file hicma_d.h 7 | * 8 | * HiCMA computational routines 9 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 10 | * 11 | * @version 1.0.0 12 | * @author Kadir Akbudak 13 | * @date 2018-11-08 14 | **/ 15 | 16 | #include "hicma_struct.h" 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | #define PROBLEM_TYPE_RND 1 22 | #define PROBLEM_TYPE_SS 2 23 | #define PROBLEM_TYPE_RNDUSR 3 24 | #define PROBLEM_TYPE_FILE 4 25 | #define PROBLEM_TYPE_GEOSTAT 5 26 | #define PROBLEM_TYPE_EDSIN 6 27 | #define PROBLEM_TYPE_GEOSTAT_POINT 7 28 | #define PROBLEM_TYPE_ST_3D_EXP 8 29 | #define PROBLEM_TYPE_ST_3D_SQEXP 9 30 | #define PROBLEM_TYPE_3D_RBF_VIRUS 12 31 | #define PROBLEM_TYPE_3D_RBF_CUBE 13 32 | #define PROBLEM_TYPE_AC_3D 14 33 | #define PROBLEM_TYPE_ST_2D_EXP 15 34 | #define PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS_BIVARIATE 108 35 | #define PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS_BIVARIATE_POINT 109 36 | #define PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS2_BIVARIATE 110 37 | #define PROBLEM_TYPE_GEOSTAT_PARSIMONIOUS2_BIVARIATE_POINT 111 38 | 39 | void generate_problem( 40 | int probtype, //problem type defined in hicma_constants.h 41 | char sym, // symmetricity of problem: 'N' or 'S' 42 | double decay, // decay of singular values. Will be used in PROBLEM_TYPE_RND. Set 0 for now. 43 | int _M, // number of rows/columns of matrix 44 | int _nb, // number of rows/columns of a single tile 45 | int _mt, // number of tiles in row dimension 46 | int _nt, // number of tiles in column dimension 47 | HICMA_problem_t *hicma_problem // pointer to hicma struct (starsh format will be used to pass coordinate info to number generation and compression phase) 48 | ); 49 | #ifdef __cplusplus 50 | } 51 | #endif 52 | -------------------------------------------------------------------------------- /timing/timing_dauxiliary.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * This file contains auxilary functions for routines in timing folder. 8 | * @version 0.1.0 9 | * @author Kadir Akbudak 10 | * @date 2017-11-16 11 | **/ 12 | 13 | /* 14 | * @copyright (c) 2009-2014 The University of Tennessee and The University 15 | * of Tennessee Research Foundation. 16 | * All rights reserved. 17 | * @copyright (c) 2012-2016 Inria. All rights reserved. 18 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 19 | */ 20 | 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | // #include "libhqr.h" 27 | #include 28 | #include 29 | #include "coreblas/hicma_lapacke.h" 30 | #include 31 | #include "timing_dauxiliary.h" 32 | 33 | 34 | #undef CBLAS_SADDR //FIXME 35 | #define CBLAS_SADDR(_val) (_val) //FIXME I should not include this definition 36 | /*-------------------------------------------------------------- 37 | * Check the gemm 38 | */ 39 | double hicma_d_check_gemm( 40 | HICMA_enum transA, HICMA_enum transB, int M, int N, int K, //FIXME use z cblas calls for precision generation 41 | double alpha, double *A, int LDA, 42 | double *B, int LDB, 43 | double beta, double *Chicma, 44 | double *Cref, int LDC, 45 | double *Cinitnorm, double *Chicmanorm, double *Clapacknorm ) 46 | { 47 | double beta_const = -1.0; 48 | double Rnorm; 49 | double *work = (double *)malloc(hicma_max(K,hicma_max(M, N))* sizeof(double)); 50 | 51 | *Cinitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); 52 | *Chicmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Chicma, LDC, work); 53 | 54 | cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, //TODO 55 | CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); //TODO 56 | 57 | *Clapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); 58 | 59 | cblas_daxpy(LDC * N, CBLAS_SADDR(beta_const), Chicma, 1, Cref, 1); 60 | 61 | Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); 62 | 63 | free(work); 64 | 65 | return Rnorm; 66 | } 67 | 68 | 69 | -------------------------------------------------------------------------------- /timing/timing_dauxiliary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file timing_dauxiliary.h 7 | * 8 | * This file contains the declarations of auxiliary functions used for timing experiments. 9 | * 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 0.1.0 13 | * @author Kadir Akbudak 14 | * @date 2017-11-16 15 | **/ 16 | 17 | /* 18 | * @copyright (c) 2009-2014 The University of Tennessee and The University 19 | * of Tennessee Research Foundation. 20 | * All rights reserved. 21 | * @copyright (c) 2012-2016 Inria. All rights reserved. 22 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 23 | */ 24 | 25 | #ifndef TIMING_DAUXILIARY_H 26 | #define TIMING_DAUXILIARY_H 27 | 28 | double hicma_d_check_gemm(HICMA_enum transA, HICMA_enum transB, int M, int N, int K, 29 | double alpha, double *A, int LDA, 30 | double *B, int LDB, 31 | double beta, double *Chicma, 32 | double *Cref, int LDC, 33 | double *Cinitnorm, double *Chicmanorm, double *Clapacknorm ); 34 | 35 | 36 | 37 | #endif /* TIMING_DAUXILIARY_H */ 38 | -------------------------------------------------------------------------------- /timing/timing_zauxiliary.c: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | 6 | /** 7 | * This file contains auxilary functions for routines in timing folder. 8 | * @version 0.1.0 9 | * @author Kadir Akbudak 10 | * @date 2017-11-16 11 | **/ 12 | 13 | /* 14 | * @copyright (c) 2009-2014 The University of Tennessee and The University 15 | * of Tennessee Research Foundation. 16 | * All rights reserved. 17 | * @copyright (c) 2012-2016 Inria. All rights reserved. 18 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 19 | */ 20 | 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | // #include "libhqr.h" 27 | #include 28 | #include 29 | #include "coreblas/hicma_lapacke.h" 30 | #include 31 | #include "timing_zauxiliary.h" 32 | 33 | 34 | #undef CBLAS_SADDR //FIXME 35 | #define CBLAS_SADDR(_val) (_val) //FIXME I should not include this definition 36 | /*-------------------------------------------------------------- 37 | * Check the gemm 38 | */ 39 | double hicma_z_check_gemm( 40 | HICMA_enum transA, HICMA_enum transB, int M, int N, int K, //FIXME use z cblas calls for precision generation 41 | double alpha, double *A, int LDA, 42 | double *B, int LDB, 43 | double beta, double *Chicma, 44 | double *Cref, int LDC, 45 | double *Cinitnorm, double *Chicmanorm, double *Clapacknorm ) 46 | { 47 | double beta_const = -1.0; 48 | double Rnorm; 49 | double *work = (double *)malloc(hicma_max(K,hicma_max(M, N))* sizeof(double)); 50 | 51 | *Cinitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); 52 | *Chicmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Chicma, LDC, work); 53 | 54 | cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, //TODO 55 | CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); //TODO 56 | 57 | *Clapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); 58 | 59 | cblas_daxpy(LDC * N, CBLAS_SADDR(beta_const), Chicma, 1, Cref, 1); 60 | 61 | Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, 'I', M, N, Cref, LDC, work); 62 | 63 | free(work); 64 | 65 | return Rnorm; 66 | } 67 | 68 | 69 | -------------------------------------------------------------------------------- /timing/timing_zauxiliary.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @copyright (c) 2017-2022 King Abdullah University of Science and Technology (KAUST). 3 | * All rights reserved. 4 | **/ 5 | /** 6 | * @file timing_zauxiliary.h 7 | * 8 | * This file contains the declarations of auxiliary functions used for timing experiments. 9 | * 10 | * HiCMA is a software package provided by King Abdullah University of Science and Technology (KAUST) 11 | * 12 | * @version 0.1.0 13 | * @author Kadir Akbudak 14 | * @date 2017-11-16 15 | **/ 16 | 17 | /* 18 | * @copyright (c) 2009-2014 The University of Tennessee and The University 19 | * of Tennessee Research Foundation. 20 | * All rights reserved. 21 | * @copyright (c) 2012-2016 Inria. All rights reserved. 22 | * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. 23 | */ 24 | 25 | #ifndef TIMING_ZAUXILIARY_H 26 | #define TIMING_ZAUXILIARY_H 27 | 28 | double hicma_z_check_gemm(HICMA_enum transA, HICMA_enum transB, int M, int N, int K, 29 | double alpha, double *A, int LDA, 30 | double *B, int LDB, 31 | double beta, double *Chicma, 32 | double *Cref, int LDC, 33 | double *Cinitnorm, double *Chicmanorm, double *Clapacknorm ); 34 | 35 | 36 | 37 | #endif /* TIMING_ZAUXILIARY_H */ 38 | --------------------------------------------------------------------------------