├── .github └── workflows │ ├── build.sh │ ├── configure.sh │ ├── main.yml │ ├── setup_env.sh │ └── test.sh ├── .gitignore ├── CMakeLists.txt ├── ChangeLog ├── LICENSE ├── README.md ├── cmake ├── FindAccelerate.cmake ├── FindCBLAS.cmake ├── FindLAPACKE.cmake ├── FindLibSci.cmake ├── FindMKL.cmake ├── FindNetlibCblas.cmake ├── FindNetlibLapacke.cmake ├── FindOpenBLAS.cmake └── Generator.cmake ├── compute ├── clag2z.c ├── dlaebz2.c ├── dlaneg2.c ├── dstevx2.c ├── dzamax.c ├── pclag2z.c ├── pdzamax.c ├── pzdesc2ge.c ├── pzdesc2pb.c ├── pzdesc2tr.c ├── pzgb2desc.c ├── pzgbbrd_static.c ├── pzgbtrf.c ├── pzge2desc.c ├── pzge2gb.c ├── pzgeadd.c ├── pzgecpy_tile2lapack_band.c ├── pzgelqf.c ├── pzgelqf_tree.c ├── pzgemm.c ├── pzgeqrf.c ├── pzgeqrf_tree.c ├── pzgeswp.c ├── pzgetrf.c ├── pzgetri_aux.c ├── pzhbtrd_static.c ├── pzhe2hb.c ├── pzhecpy_tile2lapack_band.c ├── pzhemm.c ├── pzher2k.c ├── pzherk.c ├── pzhetrf_aasen.c ├── pzlacpy.c ├── pzlag2c.c ├── pzlangb.c ├── pzlange.c ├── pzlanhe.c ├── pzlansy.c ├── pzlantr.c ├── pzlarft_blgtrd.c ├── pzlascl.c ├── pzlaset.c ├── pzlauum.c ├── pzpb2desc.c ├── pzpbtrf.c ├── pzpotrf.c ├── pzsymm.c ├── pzsyr2k.c ├── pzsyrk.c ├── pztbsm.c ├── pztr2desc.c ├── pztradd.c ├── pztrmm.c ├── pztrsm.c ├── pztrtri.c ├── pzunglq.c ├── pzunglq_tree.c ├── pzungqr.c ├── pzungqr_tree.c ├── pzunmlq.c ├── pzunmlq_tree.c ├── pzunmqr.c ├── pzunmqr_blgtrd.c ├── pzunmqr_tree.c ├── zcgbsv.c ├── zcgesv.c ├── zcposv.c ├── zdesc2ge.c ├── zdesc2pb.c ├── zdesc2tr.c ├── zgb2desc.c ├── zgbmm.c ├── zgbset.c ├── zgbsv.c ├── zgbtrf.c ├── zgbtrs.c ├── zge2desc.c ├── zgeadd.c ├── zgeinv.c ├── zgelqf.c ├── zgelqs.c ├── zgels.c ├── zgemm.c ├── zgeqrf.c ├── zgeqrs.c ├── zgesdd.c ├── zgesv.c ├── zgeswp.c ├── zgetrf.c ├── zgetri.c ├── zgetri_aux.c ├── zgetrs.c ├── zheevd.c ├── zhemm.c ├── zher2k.c ├── zherk.c ├── zhesv.c ├── zhetrf.c ├── zhetrs.c ├── zlacpy.c ├── zlag2c.c ├── zlangb.c ├── zlange.c ├── zlanhe.c ├── zlansy.c ├── zlantr.c ├── zlascl.c ├── zlaset.c ├── zlauum.c ├── zpb2desc.c ├── zpbsv.c ├── zpbtrf.c ├── zpbtrs.c ├── zpoinv.c ├── zposv.c ├── zpotrf.c ├── zpotri.c ├── zpotrs.c ├── zsymm.c ├── zsyr2k.c ├── zsyrk.c ├── ztr2desc.c ├── ztradd.c ├── ztrmm.c ├── ztrsm.c ├── ztrtri.c ├── zunglq.c ├── zungqr.c ├── zunmlq.c └── zunmqr.c ├── control ├── async.c ├── barrier.c ├── constants.c ├── context.c ├── descriptor.c ├── tree.c ├── tuning.c ├── version.c └── workspace.c ├── core_blas ├── core_clag2z.c ├── core_dcabs1.c ├── core_dzamax.c ├── core_zgbtype1cb.c ├── core_zgbtype2cb.c ├── core_zgbtype3cb.c ├── core_zgeadd.c ├── core_zgelqt.c ├── core_zgemm.c ├── core_zgeqrt.c ├── core_zgessq.c ├── core_zgeswp.c ├── core_zgetrf.c ├── core_zhbtrd_type1.c ├── core_zhbtrd_type2.c ├── core_zhbtrd_type3.c ├── core_zhegst.c ├── core_zhemm.c ├── core_zher2k.c ├── core_zherfb.c ├── core_zherk.c ├── core_zhessq.c ├── core_zheswp.c ├── core_zlacpy.c ├── core_zlacpy_band.c ├── core_zlag2c.c ├── core_zlange.c ├── core_zlanhe.c ├── core_zlansy.c ├── core_zlantr.c ├── core_zlarfb_gemm.c ├── core_zlarfy.c ├── core_zlascl.c ├── core_zlaset.c ├── core_zlauum.c ├── core_zpamm.c ├── core_zparfb.c ├── core_zpemv.c ├── core_zpotrf.c ├── core_zsymm.c ├── core_zsyr2k.c ├── core_zsyrk.c ├── core_zsyssq.c ├── core_ztradd.c ├── core_ztrmm.c ├── core_ztrsm.c ├── core_ztrssq.c ├── core_ztrtri.c ├── core_ztslqt.c ├── core_ztsmlq.c ├── core_ztsmlq_2sided.c ├── core_ztsmlq_conj_trans.c ├── core_ztsmqr.c ├── core_ztsmqr_2sided.c ├── core_ztsmqr_conj_trans.c ├── core_ztsqrt.c ├── core_zttlqt.c ├── core_zttmlq.c ├── core_zttmqr.c ├── core_zttqrt.c ├── core_zunmlq.c └── core_zunmqr.c ├── docs ├── doxygen │ ├── DoxygenLayout.xml │ ├── doxyfile.conf │ └── groups.dox └── style-guide.md ├── fortran_examples ├── test_zcgesv.f90 ├── test_zcposv.f90 ├── test_zgels.f90 ├── test_zgeqrf.f90 ├── test_zgeqrf_omp.f90 ├── test_zgetrs.f90 ├── test_zpotrf.f90 ├── test_zpotri.f90 └── test_zpotrs.f90 ├── include ├── bulge.h ├── core_lapack.h ├── core_lapack_z.h ├── plasma.h ├── plasma_async.h ├── plasma_barrier.h ├── plasma_config.hin ├── plasma_context.h ├── plasma_core_blas.h ├── plasma_core_blas_z.h ├── plasma_core_blas_zc.h ├── plasma_descriptor.h ├── plasma_error.h ├── plasma_internal.h ├── plasma_internal_z.h ├── plasma_internal_zc.h ├── plasma_tree.h ├── plasma_tuning.h ├── plasma_types.h ├── plasma_workspace.h ├── plasma_z.h ├── plasma_zc.h └── plasma_zlaebz2_work.h ├── lib └── pkgconfig │ └── plasma.pc.in ├── share ├── cmake │ └── plasma.cmakein └── pkgconfig │ └── plasma.pcin ├── test ├── flops.h ├── run_tests.py ├── test.c ├── test.h ├── test_clag2z.c ├── test_dstevx2.c ├── test_dzamax.c ├── test_z.h ├── test_zc.h ├── test_zcgbsv.c ├── test_zcgesv.c ├── test_zcposv.c ├── test_zgbmm.c ├── test_zgbsv.c ├── test_zgbtrf.c ├── test_zgeadd.c ├── test_zgeinv.c ├── test_zgelqf.c ├── test_zgelqs.c ├── test_zgels.c ├── test_zgemm.c ├── test_zgeqrf.c ├── test_zgeqrs.c ├── test_zgesdd.c ├── test_zgesv.c ├── test_zgeswp.c ├── test_zgetrf.c ├── test_zgetri.c ├── test_zgetri_aux.c ├── test_zgetrs.c ├── test_zhbtrd.c ├── test_zheevd.c ├── test_zhemm.c ├── test_zher2k.c ├── test_zherk.c ├── test_zhesv.c ├── test_zhetrf.c ├── test_zlacpy.c ├── test_zlag2c.c ├── test_zlangb.c ├── test_zlange.c ├── test_zlanhe.c ├── test_zlansy.c ├── test_zlantr.c ├── test_zlascl.c ├── test_zlaset.c ├── test_zlauum.c ├── test_zpbsv.c ├── test_zpbtrf.c ├── test_zpoinv.c ├── test_zposv.c ├── test_zpotrf.c ├── test_zpotri.c ├── test_zpotrs.c ├── test_zprint.c ├── test_zsymm.c ├── test_zsyr2k.c ├── test_zsyrk.c ├── test_ztradd.c ├── test_ztrmm.c ├── test_ztrsm.c ├── test_ztrtri.c ├── test_zunmlq.c └── test_zunmqr.c ├── tools ├── checklist.py ├── codegen.py ├── create_release_file.sh ├── doxygen_filter ├── doxygen_groups.sh ├── fortran_gen.py ├── lua-5.3.4.tar.gz ├── makespackrelease.py ├── old_build_system │ ├── Makefile │ ├── config │ │ ├── __init__.py │ │ ├── config.py │ │ └── environment.py │ ├── configure.py │ ├── make.inc.in │ ├── make.inc.mkl-gcc │ └── make.inc.power8-gcc ├── subs.py └── trace.c └── tuning └── default.lua /.github/workflows/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | maker=$1 4 | device=$2 5 | 6 | mydir=$(dirname $0) 7 | source ${mydir}/setup_env.sh 8 | 9 | print "======================================== Build" 10 | make -j8 || exit 10 11 | 12 | print "======================================== Install" 13 | make -j8 install || exit 11 14 | ls -R ${top}/install 15 | 16 | print "======================================== Verify build" 17 | ldd_result=$(ldd plasmatest) || exit 12 18 | echo "${ldd_result}" 19 | 20 | print "======================================== Finished build" 21 | exit 0 22 | -------------------------------------------------------------------------------- /.github/workflows/configure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | maker=$1 4 | device=$2 5 | 6 | # CMake build directory. cd is in setup_env.sh. 7 | rm -rf build 8 | mkdir -p build 9 | 10 | mydir=$(dirname $0) 11 | source ${mydir}/setup_env.sh 12 | 13 | print "======================================== Environment" 14 | # Show environment variables, excluding functions. 15 | (set -o posix; set) 16 | 17 | print "======================================== Modules" 18 | quiet module list -l 19 | 20 | print "======================================== Setup build" 21 | # Note: set all env variables in setup_env.sh, 22 | # else build.sh and test.sh won't see them. 23 | 24 | rm -rf ${top}/install 25 | 26 | cmake -DCMAKE_INSTALL_PREFIX=${top}/install .. || exit 12 27 | 28 | print "======================================== Finished configure" 29 | exit 0 30 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: CI 4 | 5 | # Controls when the workflow will run 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ main ] 10 | pull_request: 11 | branches: [ main ] 12 | 13 | # Allows you to run this workflow manually from the Actions tab 14 | workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | icl_plasma: 19 | timeout-minutes: 120 20 | strategy: 21 | matrix: 22 | maker: [cmake] 23 | device: [cpu] 24 | fail-fast: false 25 | runs-on: ${{ matrix.device }} 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Configure 29 | run: .github/workflows/configure.sh ${{matrix.maker}} ${{matrix.device}} 30 | - name: Build 31 | run: .github/workflows/build.sh ${{matrix.maker}} ${{matrix.device}} 32 | - name: Test 33 | run: .github/workflows/test.sh ${{matrix.maker}} ${{matrix.device}} 34 | -------------------------------------------------------------------------------- /.github/workflows/setup_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Sourced from configure.sh, build.sh, test.sh 4 | # to load modules and setup environment. 5 | 6 | #------------------------------------------------------------------------------- 7 | # Functions 8 | 9 | # Suppress echo (-x) output of commands executed with `quiet`. 10 | # Useful for sourcing files, loading modules, spack, etc. 11 | # set +x, set -x are not echo'd. 12 | quiet() { 13 | { set +x; } 2> /dev/null; 14 | $@; 15 | set -x 16 | } 17 | 18 | # `print` is like `echo`, but suppresses output of the command itself. 19 | # https://superuser.com/a/1141026 20 | echo_and_restore() { 21 | builtin echo "$*" 22 | date 23 | case "${save_flags}" in 24 | (*x*) set -x 25 | esac 26 | } 27 | alias print='{ save_flags="$-"; set +x; } 2> /dev/null; echo_and_restore' 28 | 29 | 30 | #------------------------------------------------------------------------------- 31 | quiet source /etc/profile 32 | 33 | hostname && pwd 34 | export top=$(pwd) 35 | 36 | shopt -s expand_aliases 37 | 38 | quiet module load intel-oneapi-mkl 39 | print "MKLROOT=${MKLROOT}" 40 | 41 | quiet module load python 42 | quiet which python 43 | quiet which python3 44 | python --version 45 | python3 --version 46 | 47 | quiet module load pkgconf 48 | quiet which pkg-config 49 | 50 | #----------------------------------------------------------------- Compiler 51 | print "======================================== Load GNU compiler" 52 | quiet module load gcc@11.3 53 | 54 | print "---------------------------------------- Verify compiler" 55 | print "CXX = $CXX" 56 | print "CC = $CC" 57 | print "FC = $FC" 58 | ${CXX} --version 59 | ${CC} --version 60 | ${FC} --version 61 | 62 | #----------------------------------------------------------------- CMake 63 | print "======================================== Load cmake" 64 | quiet module load cmake 65 | quiet which cmake 66 | cmake --version 67 | cd build 68 | -------------------------------------------------------------------------------- /.github/workflows/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | maker=$1 4 | device=$2 5 | 6 | mydir=$(dirname $0) 7 | source ${mydir}/setup_env.sh 8 | 9 | # Instead of exiting on the first failed test (bash -e), 10 | # run all the tests and accumulate failures into $err. 11 | err=0 12 | 13 | export OMP_NUM_THREADS=8 14 | 15 | print "======================================== Tests" 16 | cd test 17 | 18 | args="--quick" 19 | 20 | ./run_tests.py ${args} 21 | (( err += $? )) 22 | 23 | print "======================================== Finished test" 24 | exit ${err} 25 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | # ChangeLog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [Unreleased] 6 | 7 | ## [25.5.27] - 2025-05-27 8 | ### Added 9 | - Add computing of eigenvectors for symmetric and Hermitian matrices with 2-stage algorithm 10 | - Add invocation of code generator during CMake configuration 11 | - Add support for BLAS implementation in BLIS 12 | - Add comprehensive testing Python script 13 | - Add repetition of individual tests for a single function 14 | - Add style guide for code contributors 15 | - Add CI scripts for testing pull requests and commits 16 | 17 | ### Fixed 18 | - Fix release tar-ball generator to use proper PkgConfig file name 19 | - Fix generation of Fortran wrappers from C constants 20 | - Fix time stamp type to avoid changes from the code generator 21 | - Fix source code formatting based on the style guide 22 | - Fix robustness of some of testers 23 | - Fix ignoring of intermediate files for Git 24 | 25 | ## [24.8.7] - 2024-08-07 26 | ### Added 27 | - Add an attempt to generate missing precision files if Python present during configuration 28 | - Add generation of pkg-config files during installation 29 | - Add generation of CMake expored configuration during installation 30 | - Add timeout to the basic testing script 31 | - Add SOVERSION to the installed libraries to track API changes 32 | - Add detection of CBLAS and LAPACKE headers and libraries based on CMake's FindBLAS() and FindLAPACK() 33 | - Add description of the CMake installation process with system-level details 34 | 35 | ### Fixed 36 | - Fix variable pointing to OpenBLAS installation 37 | - Fix name of Python executable when launching code generation 38 | - Fix download link for available releases in documentation 39 | - Fix compatibility of basic testing script with Python 3 versions 40 | - Fix error codes to be negative unlike numerical issues reported as positive integers 41 | - Fix generation of Fortran wrappers 42 | 43 | ## [23.8.2] - 2023-08-02 44 | 45 | ### Added 46 | - Add support for OpenBLAS and its exported CMake configuration 47 | - Add PLASMA prefix to remaining config macros 48 | 49 | ### Removed 50 | - Remove old build system files using Python script for configuration in Makefile 51 | 52 | ## [22.9.29] - 2022-09-29 53 | ### Added 54 | - Add xGBMM() for band matrix multiply 55 | - Add xGESDD() for symmetric/Hermitian divide-and-conquer eigensolver 56 | - Add xSTEVX() for tridiagonal matrix eigensolver based on bisection 57 | 58 | ### Fixed 59 | - Fix reporting of testers' program name 60 | - Fix build issues without Fortran interface 61 | 62 | ## [21.8.29] - 2021-08-29 63 | ### Added 64 | - Add transpose option to xGETRS() functions 65 | - Add transpose option to xGELS() functions 66 | - Add convenience scripts for C and Fortran examples 67 | - Add Python script for quickly launching tests 68 | 69 | ### Removed 70 | - Remove Fortran code and comments producing warnings 71 | 72 | ### Fixed 73 | - Fix wrong pointer in tuning 74 | - Fix division by zero for zero-matrix norm 75 | - Fix error propagation in sequences 76 | - Fix LU factorization for rectangular matrices 77 | 78 | ## [20.9.20] - 2020-09-20 79 | ### Added 80 | - New detection of Apple's Accelerate Framework as BLAS and LAPACK implementations 81 | - New GPU offload option detection of the MAGMA library 82 | - Version reporting in tester 83 | 84 | ### Removed 85 | - Remove POSIX threads code and now only OpenMP threading support 86 | 87 | ### Fixed 88 | - Fix support for wider range of CMake versions 89 | - Fix Fortran examples' syntax 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | -- Innovative Computing Laboratory 2 | -- University of Tennessee 3 | -- (C) Copyright 2008-2016 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | * Neither the name of the University of Tennessee, Knoxville nor the 15 | names of its contributors may be used to endorse or promote products 16 | derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /cmake/FindAccelerate.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindAccelerate 3 | # --------------- 4 | # 5 | # Find Apple's Accelerate library. 6 | # This module sets the following variables: 7 | # 8 | # :: 9 | # 10 | # Accelerate_FOUND - set to TRUE if a Accelerate library was found 11 | # Accelerate_INCLUDE_DIRS - Location of Accelerate header files 12 | # Accelerate_LINKER_FLAGS - linker flags required to link Accelerate 13 | # Accelerate_LIBRARIES - libraries required to link Accelerate 14 | # 15 | # User settings 16 | # ------------- 17 | # 18 | # ACCELERATE_ROOT is a directory that contains a Accelerate installation. 19 | # ENV{ACCELERATE_ROOT} is an environment variable pointing to directory that contains a Accelerate installation. 20 | # 21 | 22 | if (NOT Accelerate_INCLUDE_DIRS) 23 | find_path(Accelerate_INCLUDE_DIRS Accelerate/Accelerate.h PATHS ${ACCELERATE_ROOT} ENV ACCELERATE_ROOT PATH_SUFFIXES include DOC "Path to Apple's Accelerate include directory") 24 | endif() 25 | 26 | if (NOT Accelerate_LIBRARIES) 27 | find_package(BLAS REQUIRED) 28 | 29 | # add BLAS libraries to CMake's link line (Accelerate is searched and found by CMake's FindBLAS.cmake module) 30 | set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${MATH_LIB}) 31 | 32 | include(CheckSymbolExists) 33 | check_symbol_exists(cblas_cgemm Accelerate/Accelerate.h Accelerate_WORKS) 34 | if (Accelerate_WORKS) 35 | set(Accelerate_LIBRARIES ${BLAS_LIBRARIES}) 36 | endif() 37 | endif() 38 | 39 | find_package_handle_standard_args(Accelerate REQUIRED_VARS Accelerate_INCLUDE_DIRS Accelerate_LIBRARIES) 40 | -------------------------------------------------------------------------------- /cmake/FindCBLAS.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindCBLAS 3 | # --------- 4 | # 5 | # Find CBLAS header files and libraries 6 | # This module sets the following variables: 7 | # 8 | # :: 9 | # 10 | # CBLAS_FOUND - set to true if a library implementing the BLAS C interface 11 | # is found 12 | 13 | # if the caller didn't select CBLAS then see if the environment variable has it 14 | if (NOT CBLAS_PROVIDER) 15 | if (NOT "x_$ENV{CBLAS_PROVIDER}_x" STREQUAL "x__x") 16 | set(CBLAS_PROVIDER $ENV{CBLAS_PROVIDER}) 17 | else () 18 | set(CBLAS_PROVIDER "any") 19 | endif() 20 | endif() 21 | 22 | if ("x_${CBLAS_PROVIDER}_x" STREQUAL "x_generic_x" OR "x_${CBLAS_PROVIDER}_x" STREQUAL "x_any_x") 23 | # the C BLAS root path is defined, attempt to find the header and libraries 24 | if (CBLAS_ROOT) 25 | find_path(CBLAS_INCLUDE_DIRS cblas.h PATHS ${CBLAS_ROOT} ENV CBLAS_ROOT PATH_SUFFIXES include DOC "Path to C BLAS include directory") 26 | if (NOT CBLAS_LIBRARIES) 27 | find_library(CBLAS_LIBRARIES cblas PATHS ${CBLAS_ROOT} ENV CBLAS_ROOT) 28 | endif() 29 | else() 30 | find_package( LibSci ) 31 | if (LibSci_FOUND) 32 | set(CBLAS_INCLUDE_DIRS ${LIBSCI_INCLUDE_DIRS}) 33 | set(CBLAS_LIBRARIES ${LIBSCI_LIBRARIES}) 34 | set(CBLAS_PROVIDER "libsci") 35 | find_package_handle_standard_args(CBLAS "Can't find CBLAS") 36 | return() 37 | endif() 38 | endif() 39 | 40 | elseif ("x_${CBLAS_PROVIDER}_x" STREQUAL "x_mkl_x" OR "x_${CBLAS_PROVIDER}_x" STREQUAL "x_any_x") 41 | find_package( MKL ) 42 | if (MKL_FOUND) 43 | set(CBLAS_INCLUDE_DIRS ${MKL_INCLUDE_DIRS}) 44 | set(CBLAS_LIBRARIES ${MKL_LIBRARIES}) 45 | set(CBLAS_PROVIDER "mkl") 46 | endif() 47 | 48 | elseif ("x_${CBLAS_PROVIDER}_x" STREQUAL "x_openblas_x" OR "x_${CBLAS_PROVIDER}_x" STREQUAL "x_any_x") 49 | find_package( OpenBLAS ) 50 | if (OpenBLAS_FOUND) 51 | set(CBLAS_INCLUDE_DIRS ${OpenBLAS_INCLUDE_DIRS}) 52 | set(CBLAS_LIBRARIES ${OpenBLAS_LIBRARIES}) 53 | set(CBLAS_PROVIDER "openblas") 54 | endif() 55 | 56 | elseif ("x_${CBLAS_PROVIDER}_x" STREQUAL "x_netlib_x" OR "x_${CBLAS_PROVIDER}_x" STREQUAL "x_any_x") 57 | find_package( NetlibCblas ) 58 | if (NetlibCblas_FOUND) 59 | set(CBLAS_INCLUDE_DIRS ${NetlibCblas_INCLUDE_DIRS}) 60 | set(CBLAS_LIBRARIES ${NetlibCblas_LIBRARIES}) 61 | set(CBLAS_PROVIDER "netlib") 62 | endif() 63 | 64 | elseif ("x_${CBLAS_PROVIDER}_x" STREQUAL "x_accelerate_x" OR "x_${CBLAS_PROVIDER}_x" STREQUAL "x_any_x") 65 | find_package( Accelerate ) 66 | if (Accelerate_FOUND) 67 | set(CBLAS_INCLUDE_DIRS ${Accelerate_INCLUDE_DIRS}) 68 | set(CBLAS_LIBRARIES ${Accelerate_LIBRARIES}) 69 | set(CBLAS_PROVIDER "accelerate") 70 | endif() 71 | 72 | endif() 73 | 74 | find_package_handle_standard_args(CBLAS REQUIRED_VARS CBLAS_INCLUDE_DIRS CBLAS_LIBRARIES CBLAS_PROVIDER) 75 | -------------------------------------------------------------------------------- /cmake/FindLAPACKE.cmake: -------------------------------------------------------------------------------- 1 | 2 | #.rst: 3 | # FindLAPACKE 4 | # ----------- 5 | # 6 | # Find LAPACKE header files and libraries 7 | # This module sets the following variables: 8 | # 9 | # :: 10 | # 11 | # LAPACKE_FOUND - set to true if a library implementing the BLAS C interface 12 | # is found 13 | 14 | # if the caller didn't select LAPACKE then see if the environment variable has it 15 | if (NOT LAPACKE_PROVIDER) 16 | if (NOT "x_$ENV{LAPACKE_PROVIDER}_x" STREQUAL "x__x") 17 | set(LAPACKE_PROVIDER $ENV{LAPACKE_PROVIDER}) 18 | else () 19 | set(LAPACKE_PROVIDER "any") 20 | endif() 21 | endif() 22 | 23 | if ("x_${LAPACKE_PROVIDER}_x" STREQUAL "x_generic_x" OR "x_${LAPACKE_PROVIDER}_x" STREQUAL "x_any_x") 24 | # the LAPACKE BLAS root path is defined, attempt to find the header and libraries 25 | if (LAPACKE_ROOT) 26 | find_path(LAPACKE_INCLUDE_DIRS lapacke.h PATHS ${LAPACKE_ROOT} ENV LAPACKE_ROOT PATH_SUFFIXES include DOC "Path to LapackE include directory") 27 | if (NOT LAPACKE_LIBRARIES) 28 | find_library(LAPACKE_LIBRARIES lapacke PATHS ${LAPACKE_ROOT} ENV LAPACKE_ROOT) 29 | endif() 30 | else() 31 | find_package( LibSci ) 32 | if (LibSci_FOUND) 33 | set(LAPACKE_INCLUDE_DIRS ${LIBSCI_INCLUDE_DIRS}) 34 | set(LAPACKE_LIBRARIES ${LIBSCI_LIBRARIES}) 35 | set(LAPACKE_PROVIDER "libsci") 36 | find_package_handle_standard_args(LAPACKE "Can't find LAPACKE") 37 | return() 38 | endif() 39 | 40 | endif() 41 | endif() 42 | 43 | if ("x_${LAPACKE_PROVIDER}_x" STREQUAL "x_mkl_x" OR "x_${LAPACKE_PROVIDER}_x" STREQUAL "x_any_x") 44 | find_package( MKL ) 45 | if (MKL_FOUND) 46 | set(LAPACKE_INCLUDE_DIRS ${MKL_INCLUDE_DIRS}) 47 | set(LAPACKE_LIBRARIES ${MKL_LIBRARIES}) 48 | set(LAPACKE_PROVIDER "mkl") 49 | endif() 50 | endif() 51 | 52 | if ("x_${LAPACKE_PROVIDER}_x" STREQUAL "x_openblas_x" OR "x_${LAPACKE_PROVIDER}_x" STREQUAL "x_any_x") 53 | find_package( OpenBLAS ) 54 | if (OpenBLAS_FOUND) 55 | set(LAPACKE_INCLUDE_DIRS ${OpenBLAS_INCLUDE_DIRS}) 56 | set(LAPACKE_LIBRARIES ${OpenBLAS_LIBRARIES}) 57 | set(LAPACKE_PROVIDER "openblas") 58 | endif() 59 | endif() 60 | 61 | if ("x_${LAPACKE_PROVIDER}_x" STREQUAL "x_netlib_x" OR "x_${LAPACKE_PROVIDER}_x" STREQUAL "x_any_x") 62 | find_package( NetlibLapacke ) 63 | if(NetlibLapacke_FOUND) 64 | set(LAPACKE_INCLUDE_DIRS ${NetlibLapacke_INCLUDE_DIRS}) 65 | set(LAPACKE_LIBRARIES ${NetlibLapacke_LIBRARIES}) 66 | set(LAPACKE_PROVIDER "netlib") 67 | endif() 68 | endif() 69 | 70 | find_package_handle_standard_args(LAPACKE REQUIRED_VARS LAPACKE_INCLUDE_DIRS LAPACKE_LIBRARIES LAPACKE_PROVIDER) 71 | -------------------------------------------------------------------------------- /cmake/FindLibSci.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindLibSci 3 | # ------- 4 | # 5 | # Find header files for Cray LibSci installation. 6 | # This module sets the following variables: 7 | # 8 | # :: 9 | # 10 | # LibSci_FOUND - set to true if LibSci implementatiom of CBLAS was found 11 | # LibSci_INCLUDE_DIRS - location of LibSci CBLAS header files 12 | # LibSci_LIBRARIES - libraries required to link for C interface to BLAS 13 | # 14 | # User settings 15 | # ------------- 16 | # 17 | # LibSci_ROOT a directory that contains LibSci CBLAS installation. 18 | # ENV{LibSci_ROOT} a directory that contains LibSci CBLAS installation. 19 | # 20 | 21 | if (NOT LibSci_INCLUDE_DIRS) 22 | find_path(LibSci_INCLUDE_DIRS cblas.h PATHS ${LibSci_ROOT} ENV LibSci_ROOT PATH_SUFFIXES include DOC "Path to LibSci CBLAS include directory") 23 | if (LibSci_INCLUDE_DIRS) 24 | set(CMAKE_REQUIRED_INCLUDES ${LibSci_INCLUDE_DIRS}) 25 | endif() 26 | endif() 27 | 28 | if (NOT LibSci_LIBRARIES) 29 | find_package(BLAS REQUIRED) 30 | if (NOT WIN32) 31 | set(MATH_LIB "-lm") 32 | endif () 33 | set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${MATH_LIB}) 34 | include(CheckSymbolExists) 35 | check_symbol_exists(cblas_cgemm cblas.h LibSci_WORKS) 36 | unset(CMAKE_REQUIRED_INCLUDES) 37 | unset(CMAKE_REQUIRED_LIBRARIES) 38 | 39 | if (LibSci_WORKS) 40 | set(LibSci_LIBRARIES ${BLAS_LIBRARIES}) 41 | endif() 42 | endif() 43 | 44 | if (LibSci_INCLUDE_DIRS AND LibSci_LIBRARIES) 45 | find_package_handle_standard_args(LibSci REQUIRED_VARS LibSci_INCLUDE_DIRS LibSci_LIBRARIES) 46 | 47 | elseif (LibSci_INCLUDE_DIRS) 48 | find_package_handle_standard_args(LibSci REQUIRED_VARS LibSci_INCLUDE_DIRS) 49 | 50 | elseif (LibSci_LIBRARIES) 51 | find_package_handle_standard_args(LibSci REQUIRED_VARS LibSci_LIBRARIES) 52 | 53 | else() 54 | find_package_handle_standard_args(LibSci "Can't find LibSci") 55 | endif() 56 | -------------------------------------------------------------------------------- /cmake/FindMKL.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindMKL 3 | # ------- 4 | # 5 | # Find header files for Intel MKL installation. 6 | # This module sets the following variables: 7 | # 8 | # :: 9 | # 10 | # MKL_FOUND - set to true if a library implementing the BLAS C interface 11 | # is found 12 | # MKL_FOUND - set to true if MKL implementatiom of C BLAS was found 13 | # MKL_INCLUDE_DIRS - location of MKL header files 14 | # MKL_LIBRARIES - libraries required to link for MKL's C interface to BLAS 15 | # 16 | # User settings 17 | # ------------- 18 | # 19 | # MKLROOT a directory that contains MKL installation. 20 | # ENV{MKLROOT} a directory that contains MKL installation. 21 | # 22 | 23 | if (NOT MKL_INCLUDE_DIRS) 24 | find_path(MKL_INCLUDE_DIRS mkl.h PATHS ${MKLROOT} ENV MKLROOT PATH_SUFFIXES include DOC "Path to MKL include directory") 25 | endif() 26 | 27 | if (NOT MKL_LIBRARIES) 28 | #find_library(MKL_LIBRARIES mkl_core NAMES mkl_sequential PATHS MKLROOT ENV MKLROOT) 29 | find_package(BLAS REQUIRED) 30 | set(CMAKE_REQUIRED_INCLUDES ${MKL_INCLUDE_DIRS}) 31 | if (NOT WIN32) 32 | set(MATH_LIB "-lm") 33 | endif () 34 | set(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${MATH_LIB}) 35 | include(CheckSymbolExists) 36 | check_symbol_exists(cblas_cgemm mkl.h MKL_WORKS) 37 | unset(CMAKE_REQUIRED_INCLUDES) 38 | unset(CMAKE_REQUIRED_LIBRARIES) 39 | 40 | if (MKL_WORKS) 41 | set(MKL_LIBRARIES ${BLAS_LIBRARIES}) 42 | endif() 43 | endif() 44 | 45 | find_package_handle_standard_args(MKL REQUIRED_VARS MKL_INCLUDE_DIRS MKL_LIBRARIES) 46 | -------------------------------------------------------------------------------- /cmake/FindNetlibCblas.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindNetlibCblas 3 | # --------------- 4 | # 5 | # Find NetlibCblas library. 6 | # This module sets the following variables: 7 | # 8 | # :: 9 | # 10 | # NetlibCblas_FOUND - set to TRUE if a NetlibCblas library was found 11 | # NetlibCblas_INCLUDE_DIRS - Location of NetlibCblas header files 12 | # NetlibCblas_LINKER_FLAGS - linker flags required to link NetlibCblas 13 | # NetlibCblas_LIBRARIES - libraries required to link NetlibCblas 14 | # 15 | # User settings 16 | # ------------- 17 | # 18 | # NETLIB_ROOT is a directory that contains a NetlibCblas installation. 19 | # ENV{NETLIB_ROOT} is an environment variable pointing to directory that contains a NetlibCblas installation. 20 | # 21 | 22 | if (NOT NetlibCblas_INCLUDE_DIRS) 23 | find_path(NetlibCblas_INCLUDE_DIRS cblas_mangling.h PATHS ${NETLIB_ROOT} ENV NETLIB_ROOT PATH_SUFFIXES include DOC "Path to Netlib C BLAS include directory") 24 | endif() 25 | 26 | if (NOT NetlibCblas_LIBRARIES) 27 | find_library(NetlibCblas_LIBRARIES cblas PATHS ${NETLIB_ROOT} ENV NETLIB_ROOT) 28 | endif() 29 | 30 | find_package_handle_standard_args(NetlibCblas REQUIRED_VARS NetlibCblas_INCLUDE_DIRS NetlibCblas_LIBRARIES) 31 | -------------------------------------------------------------------------------- /cmake/FindNetlibLapacke.cmake: -------------------------------------------------------------------------------- 1 | 2 | #.rst: 3 | # FindNetlibLapacke 4 | # ----------------- 5 | # 6 | # Find NetlibLapacke library. 7 | # This module sets the following variables: 8 | # 9 | # :: 10 | # 11 | # NetlibLapacke_FOUND - set to TRUE if a NetlibLapacke library was found 12 | # NetlibLapacke_INCLUDE_DIRS - Location of NetlibLapacke header files 13 | # NetlibLapacke_LINKER_FLAGS - linker flags required to link NetlibLapacke 14 | # NetlibLapacke_LIBRARIES - libraries required to link NetlibLapacke 15 | # 16 | # User settings 17 | # ------------- 18 | # 19 | # NETLIB_ROOT is a directory that contains a NetlibLapacke installation. 20 | # ENV{NETLIB_ROOT} is an environment variable pointing to directory that contains a NetlibLapacke installation. 21 | # 22 | 23 | if (NOT NetlibLapacke_INCLUDE_DIRS) 24 | find_path(NetlibLapacke_INCLUDE_DIRS lapacke.h PATHS ${NETLIB_ROOT} ENV NETLIB_ROOT PATH_SUFFIXES include DOC "Path to Netlib LapackE include directory") 25 | endif() 26 | 27 | if (NOT NetlibLapacke_LIBRARIES) 28 | find_library(NetlibLapacke_LIBRARIES lapacke PATHS ${NETLIB_ROOT} ENV NETLIB_ROOT) 29 | endif() 30 | 31 | find_package_handle_standard_args(NetlibLapacke REQUIRED_VARS NetlibLapacke_INCLUDE_DIRS NetlibLapacke_LIBRARIES) 32 | -------------------------------------------------------------------------------- /cmake/FindOpenBLAS.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # FindOpenBLAS 3 | # --------- 4 | # 5 | # Find OpenBLAS library. 6 | # This module sets the following variables: 7 | # 8 | # :: 9 | # 10 | # OpenBLAS_FOUND - set to TRUE if a OpenBLAS library was found 11 | # OpenBLAS_INCLUDE_DIRS - Location of OpenBLAS header files 12 | # OpenBLAS_LINKER_FLAGS - linker flags required to link OpenBLAS 13 | # OpenBLAS_LIBRARIES - libraries required to link OpenBLAS 14 | # 15 | # User settings 16 | # ------------- 17 | # 18 | # OpenBLAS_DIR is a directory that contains a OpenBLAS installation. 19 | # ENV{OPENBLAS_ROOT} is an environment variable pointing to directory that contains a OpenBLAS installation. 20 | # 21 | 22 | #FIXME: lib/cmake/openblas/OpenBLASConfig.cmake 23 | 24 | if (NOT OpenBLAS_INCLUDE_DIRS) 25 | find_path(OpenBLAS_INCLUDE_DIRS openblas_config.h PATHS ${OpenBLAS_DIR} ENV OPENBLAS_ROOT PATH_SUFFIXES include DOC "Path to OpenBLAS include directory") 26 | endif() 27 | 28 | if (NOT OpenBLAS_LIBRARIES) 29 | find_library(OpenBLAS_LIBRARIES openblas PATHS ${OpenBLAS_DIR} ENV OPENBLAS_ROOT) 30 | endif() 31 | 32 | find_package_handle_standard_args(OpenBLAS REQUIRED_VARS OpenBLAS_INCLUDE_DIRS OpenBLAS_LIBRARIES) 33 | -------------------------------------------------------------------------------- /cmake/Generator.cmake: -------------------------------------------------------------------------------- 1 | #.rst: 2 | # Generator 3 | # --------- 4 | # 5 | # Calls Python `codegen.py` script to generate source code for different 6 | # data types. 7 | 8 | #------------------------------------------------------------------------------- 9 | # Parses a list of template source files to find what files should be generated. 10 | # 11 | # @param[in,out] src 12 | # On input, variable that is a list of template files (source and 13 | # headers) for codegen to process. May have non-template source files; 14 | # codegen ignores them. 15 | # On output, the list of generated files is appended. 16 | # 17 | # Example: 18 | # set( src zgemm.c plasma_z.h ) 19 | # generate_files( src ) 20 | # # On output, src is zgemm.c plasma_z.h sgemm.c dgemm.c cgemm.c plasma_s.h 21 | # # plasma_d.h plasma_c.h 22 | # add_library( plasma ${src} ) 23 | # 24 | function( generate_files src ) 25 | message( DEBUG "----- generate_files -----" ) 26 | message( DEBUG "src is ${src} = <${${src}}>" ) 27 | message( DEBUG "cache is ${src}_cache = <${${src}_cache}>" ) 28 | 29 | if (NOT "${${src}}" STREQUAL "${${src}_cache}") 30 | message( STATUS "Running codegen to find files to generate for ${src}" ) 31 | execute_process( 32 | COMMAND "${Python_EXECUTABLE}" "tools/codegen.py" "--depend" ${${src}} 33 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 34 | RESULT_VARIABLE error 35 | OUTPUT_VARIABLE ${src}_depends ) 36 | message( DEBUG "codegen error ${error}" ) 37 | message( DEBUG "depends is ${src}_depends = <<<\n${${src}_depends}>>>" ) 38 | 39 | if (error) 40 | message( FATAL_ERROR "codegen returned error; cannot generate source files." ) 41 | else() 42 | # Cache src so we don't have to re-run codegen to get the 43 | # list of dependencies again if src doesn't change. 44 | set( ${src}_cache ${${src}} CACHE INTERNAL "" ) 45 | 46 | # Split lines and cache it. 47 | string( REGEX REPLACE "\n" ";" ${src}_depends "${${src}_depends}" ) 48 | set( ${src}_depends ${${src}_depends} CACHE INTERNAL "" ) 49 | message( DEBUG "depends is ${src}_depends = <<<${${src}_depends}>>>" ) 50 | endif() 51 | endif() 52 | 53 | message( STATUS "Adding codegen commands to generate files for ${src}" ) 54 | foreach( depend ${${src}_depends} ) 55 | message( DEBUG "depend = <${depend}>" ) 56 | string( REGEX MATCH "^(.*): (.*)$" out "${depend}" ) 57 | set( outputs ${CMAKE_MATCH_1} ) 58 | set( input ${CMAKE_MATCH_2} ) 59 | string( REGEX REPLACE " " ";" outputs "${outputs}" ) 60 | list( TRANSFORM outputs PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/" 61 | OUTPUT_VARIABLE src_outputs ) 62 | message( DEBUG " input: <${input}>" ) 63 | message( DEBUG " outputs: <${outputs}>" ) 64 | message( DEBUG " src_outputs: <${src_outputs}>" ) 65 | add_custom_command( 66 | OUTPUT ${src_outputs} 67 | COMMAND "${Python_EXECUTABLE}" "tools/codegen.py" "${input}" 68 | DEPENDS "${input}" 69 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 70 | VERBATIM ${CODEGEN} ) 71 | 72 | list( APPEND ${src} "${outputs}" ) 73 | message( DEBUG " src: <${${src}}>" ) 74 | message( DEBUG "" ) 75 | endforeach() 76 | set( ${src} ${${src}} PARENT_SCOPE ) # propagate changes 77 | message( DEBUG "src is ${src} = <${${src}}>" ) 78 | endfunction() 79 | -------------------------------------------------------------------------------- /compute/pclag2z.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_descriptor.h" 15 | #include "plasma_types.h" 16 | #include "plasma_internal.h" 17 | #include 18 | 19 | 20 | #define As(m, n) (plasma_complex32_t*)plasma_tile_addr(As, m, n) 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr( A, m, n) 22 | 23 | /***************************************************************************//** 24 | * Parallel tile conversion of matrix precision from single complex to 25 | * double complex. 26 | * @see plasma_omp_clag2z 27 | ******************************************************************************/ 28 | void plasma_pclag2z(plasma_desc_t As, plasma_desc_t A, 29 | plasma_sequence_t *sequence, plasma_request_t *request) 30 | { 31 | // Return if failed sequence. 32 | if (sequence->status != PlasmaSuccess) 33 | return; 34 | 35 | if (A.type == PlasmaGeneral && As.type == PlasmaGeneral) { 36 | for (int m = 0; m < As.mt; m++) { 37 | int am = plasma_tile_mview(As, m); 38 | int lda = plasma_tile_mmain(As, m); 39 | int ldb = plasma_tile_mmain(A, m); 40 | for (int n = 0; n < As.nt; n++) { 41 | int an = plasma_tile_nview(As, n); 42 | plasma_core_omp_clag2z( 43 | am, an, 44 | As(m, n), lda, 45 | A(m, n), ldb, 46 | sequence, request); 47 | } 48 | } 49 | } 50 | else if (A.type == PlasmaGeneralBand && 51 | As.type == PlasmaGeneralBand) { 52 | for (int n = 0; n < A.nt; n++ ) { 53 | int nvan = plasma_tile_nview(A, n); 54 | int m_start = (imax(0, n*A.nb-A.ku)) / A.nb; 55 | int m_end = (imin(A.m-1, (n+1)*A.nb+A.kl-1)) / A.nb; 56 | for (int m = m_start; m <= m_end; m++) { 57 | int ldam = plasma_tile_mmain_band(A, m, n); 58 | int mvam = plasma_tile_mview(A, m); 59 | plasma_core_omp_clag2z( 60 | mvam, nvan, 61 | As(m, n), ldam, 62 | A(m, n), ldam, 63 | sequence, request); 64 | } 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /compute/pdzamax.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | 23 | /******************************************************************************/ 24 | void plasma_pdzamax(plasma_enum_t colrow, 25 | plasma_desc_t A, double *work, double *values, 26 | plasma_sequence_t *sequence, plasma_request_t *request) 27 | { 28 | // Return if failed sequence. 29 | if (sequence->status != PlasmaSuccess) 30 | return; 31 | 32 | switch (colrow) { 33 | //=================== 34 | // PlasmaColumnwise 35 | //=================== 36 | case PlasmaColumnwise: 37 | for (int m = 0; m < A.mt; m++) { 38 | int mvam = plasma_tile_mview(A, m); 39 | int ldam = plasma_tile_mmain(A, m); 40 | for (int n = 0; n < A.nt; n++) { 41 | int nvan = plasma_tile_nview(A, n); 42 | plasma_core_omp_dzamax(PlasmaColumnwise, 43 | mvam, nvan, 44 | A(m, n), ldam, 45 | &work[A.n*m+n*A.nb], 46 | sequence, request); 47 | } 48 | } 49 | #pragma omp taskwait 50 | plasma_core_omp_damax(PlasmaRowwise, 51 | A.n, A.mt, 52 | work, A.n, 53 | values, 54 | sequence, request); 55 | break; 56 | //================ 57 | // PlasmaRowwise 58 | //================ 59 | case PlasmaRowwise: 60 | for (int m = 0; m < A.mt; m++) { 61 | int mvam = plasma_tile_mview(A, m); 62 | int ldam = plasma_tile_mmain(A, m); 63 | for (int n = 0; n < A.nt; n++) { 64 | int nvan = plasma_tile_nview(A, n); 65 | plasma_core_omp_dzamax(PlasmaRowwise, 66 | mvam, nvan, 67 | A(m, n), ldam, 68 | &work[A.m*n+m*A.mb], 69 | sequence, request); 70 | } 71 | } 72 | #pragma omp taskwait 73 | plasma_core_omp_damax(PlasmaRowwise, 74 | A.m, A.nt, 75 | work, A.m, 76 | values, 77 | sequence, request); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /compute/pzdesc2ge.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | /******************************************************************************/ 22 | void plasma_pzdesc2ge(plasma_desc_t A, 23 | plasma_complex64_t *pA, int lda, 24 | plasma_sequence_t *sequence, 25 | plasma_request_t *request) 26 | { 27 | // Return if failed sequence. 28 | if (sequence->status != PlasmaSuccess) 29 | return; 30 | 31 | plasma_complex64_t *f77; 32 | plasma_complex64_t *bdl; 33 | 34 | int x1, y1; 35 | int x2, y2; 36 | int n, m, ldt; 37 | 38 | for (m = 0; m < A.mt; m++) { 39 | ldt = plasma_tile_mmain(A, m); 40 | for (n = 0; n < A.nt; n++) { 41 | x1 = n == 0 ? A.j%A.nb : 0; 42 | y1 = m == 0 ? A.i%A.mb : 0; 43 | x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; 44 | y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; 45 | 46 | f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m]; 47 | bdl = (plasma_complex64_t*)plasma_tile_addr(A, m, n); 48 | 49 | plasma_core_omp_zlacpy(PlasmaGeneral, PlasmaNoTrans, 50 | y2-y1, x2-x1, 51 | &(bdl[x1*A.nb+y1]), ldt, 52 | &(f77[x1*lda+y1]), lda, 53 | sequence, request); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /compute/pzdesc2pb.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define offset \ 22 | (A.uplo == PlasmaUpper ? A.ku : (A.uplo == PlasmaLower ? 0 : A.ku+A.kl)) 23 | #define bandA(m, n) (&(pA[lda*(A.nb*(n)) + offset+A.mb*((m)-(n))])) 24 | #define tileA(m, n) ((plasma_complex64_t*)plasma_tile_addr(A, (m), (n))) 25 | 26 | /******************************************************************************/ 27 | void plasma_pzdesc2pb(plasma_desc_t A, 28 | plasma_complex64_t *pA, int lda, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Return if failed sequence. 33 | if (sequence->status != PlasmaSuccess) 34 | return; 35 | 36 | for (int n = 0; n < A.nt; n++) 37 | { 38 | int m_start, m_end; 39 | if (A.uplo == PlasmaGeneral) { 40 | m_start = (imax(0, n*A.nb-A.ku-A.kl)) / A.nb; 41 | m_end = (imin(A.m-1, (n+1)*A.nb+A.kl-1)) / A.nb; 42 | } 43 | else if (A.uplo == PlasmaUpper) { 44 | m_start = (imax(0, n*A.nb-A.ku-A.kl)) / A.nb; 45 | m_end = (imin(A.m-1, (n+1)*A.nb-1)) / A.nb; 46 | } 47 | else { 48 | m_start = (imax(0, n*A.nb)) / A.nb; 49 | m_end = (imin(A.m-1, (n+1)*A.nb+A.kl-1)) / A.nb; 50 | } 51 | for (int m = m_start; m <= m_end; m++) 52 | { 53 | int mb = imin(A.mb, A.m-m*A.mb); 54 | int nb = imin(A.nb, A.n-n*A.nb); 55 | plasma_core_omp_zlacpy_tile2lapack_band( 56 | A.uplo, m, n, 57 | mb, nb, A.mb, A.kl, A.ku, 58 | tileA(m, n), plasma_tile_mmain_band(A, m, n), 59 | bandA(m, n), lda-1); 60 | //tileA(m_start,n), nb*nb, INOUT | GATHERV); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /compute/pzdesc2tr.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | /******************************************************************************/ 22 | void plasma_pzdesc2tr(plasma_desc_t A, 23 | plasma_complex64_t *pA, int lda, 24 | plasma_sequence_t *sequence, 25 | plasma_request_t *request) 26 | { 27 | // Return if failed sequence. 28 | if (sequence->status != PlasmaSuccess) 29 | return; 30 | 31 | for (int m = 0; m < A.mt; m++) { 32 | int ldt = plasma_tile_mmain(A, m); 33 | int n_start = (A.type == PlasmaUpper ? m : 0); 34 | int n_end = (A.type == PlasmaUpper ? A.nt : m+1); 35 | for (int n = n_start; n < n_end; n++) { 36 | int x1 = n == 0 ? A.j%A.nb : 0; 37 | int y1 = m == 0 ? A.i%A.mb : 0; 38 | int x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; 39 | int y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; 40 | 41 | plasma_complex64_t *f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m]; 42 | plasma_complex64_t *bdl = 43 | (plasma_complex64_t*)plasma_tile_addr(A, m, n); 44 | 45 | plasma_core_omp_zlacpy(PlasmaGeneral, PlasmaNoTrans, 46 | y2-y1, x2-x1, 47 | &(bdl[x1*A.nb+y1]), ldt, 48 | &(f77[x1*lda+y1]), lda, 49 | sequence, request); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /compute/pzgb2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | /******************************************************************************/ 22 | void plasma_pzgb2desc(plasma_complex64_t *pA, int lda, 23 | plasma_desc_t A, 24 | plasma_sequence_t *sequence, 25 | plasma_request_t *request) 26 | { 27 | // Return if failed sequence. 28 | if (sequence->status != PlasmaSuccess) 29 | return; 30 | 31 | plasma_complex64_t *f77; 32 | plasma_complex64_t *bdl; 33 | 34 | int x1, y1; 35 | int x2, y2; 36 | int n, m, ldt; 37 | for (m = 0; m < A.mt; m++) { 38 | for (n = 0; n < A.nt; n++) { 39 | // don't want to copy tiles without elements because 40 | // (plasma_tile_addr) cannot handle it. 41 | // Instead of "m-n >= A.kut" (as one might expect), we 42 | // Calculate kut without normal space for transformations. 43 | if(m-n >= A.klt || n-m >= 1+(A.ku+A.nb-1)/A.nb) 44 | { 45 | continue; 46 | } 47 | ldt = plasma_tile_mmain_band(A, m, n); // possibly too many calls. 48 | // plasma_tile_mmain_band need not know m,n 49 | x1 = n == 0 ? A.j%A.nb : 0; 50 | y1 = m == 0 ? A.i%A.mb : 0; 51 | x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; 52 | y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; 53 | 54 | 55 | f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m]; 56 | bdl = (plasma_complex64_t*)plasma_tile_addr(A, m, n); 57 | plasma_core_omp_zlacpy( 58 | PlasmaGeneralBand, PlasmaNoTrans, 59 | y2-y1, x2-x1, 60 | &(f77[x1*lda+y1]), lda, 61 | &(bdl[x1*A.nb+y1]), ldt, 62 | sequence, request); 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /compute/pzge2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | /******************************************************************************/ 22 | void plasma_pzge2desc(plasma_complex64_t *pA, int lda, 23 | plasma_desc_t A, 24 | plasma_sequence_t *sequence, 25 | plasma_request_t *request) 26 | { 27 | // Return if failed sequence. 28 | if (sequence->status != PlasmaSuccess) 29 | return; 30 | 31 | plasma_complex64_t *f77; 32 | plasma_complex64_t *bdl; 33 | 34 | int x1, y1; 35 | int x2, y2; 36 | int n, m, ldt; 37 | 38 | for (m = 0; m < A.mt; m++) { 39 | ldt = plasma_tile_mmain(A, m); 40 | for (n = 0; n < A.nt; n++) { 41 | x1 = n == 0 ? A.j%A.nb : 0; 42 | y1 = m == 0 ? A.i%A.mb : 0; 43 | x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; 44 | y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; 45 | 46 | f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m]; 47 | bdl = (plasma_complex64_t*)plasma_tile_addr(A, m, n); 48 | 49 | plasma_core_omp_zlacpy(PlasmaGeneral, PlasmaNoTrans, 50 | y2-y1, x2-x1, 51 | &(f77[x1*lda+y1]), lda, 52 | &(bdl[x1*A.nb+y1]), ldt, 53 | sequence, request); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /compute/pzgeadd.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m,n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | #define B(m,n) (plasma_complex64_t*)plasma_tile_addr(B, m, n) 23 | 24 | /***************************************************************************//** 25 | * Parallel tile matrix-matrix addition. 26 | * @see plasma_omp_zgeadd 27 | ******************************************************************************/ 28 | void plasma_pzgeadd(plasma_enum_t transa, 29 | plasma_complex64_t alpha, plasma_desc_t A, 30 | plasma_complex64_t beta, plasma_desc_t B, 31 | plasma_sequence_t *sequence, plasma_request_t *request) 32 | { 33 | // Return if failed sequence. 34 | if (sequence->status != PlasmaSuccess) 35 | return; 36 | 37 | //=============== 38 | // PlasmaNoTrans 39 | //=============== 40 | if (transa == PlasmaNoTrans) { 41 | for (int m = 0; m < B.mt; m++) { 42 | int mvbm = plasma_tile_mview(B, m); 43 | int ldam = plasma_tile_mmain(A, m); 44 | int ldbm = plasma_tile_mmain(B, m); 45 | for (int n = 0; n < B.nt; n++) { 46 | int nvbn = plasma_tile_nview(B, n); 47 | plasma_core_omp_zgeadd( 48 | transa, mvbm, nvbn, 49 | alpha, A(m, n), ldam, 50 | beta, B(m, n), ldbm, 51 | sequence, request); 52 | } 53 | } 54 | } 55 | //==================== 56 | // Plasma[_Conj]Trans 57 | //==================== 58 | else { 59 | for (int m = 0; m < B.mt; m++) { 60 | int mvbm = plasma_tile_mview(B, m); 61 | int ldbm = plasma_tile_mmain(B, m); 62 | for (int n = 0; n < B.nt; n++) { 63 | int nvbn = plasma_tile_nview(B, n); 64 | int ldan = plasma_tile_mmain(A, n); 65 | plasma_core_omp_zgeadd( 66 | transa, mvbm, nvbn, 67 | alpha, A(n, m), ldan, 68 | beta, B(m, n), ldbm, 69 | sequence, request); 70 | } 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /compute/pzgecpy_tile2lapack_band.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include "plasma_core_blas.h" 20 | 21 | #define A(m, n) ((plasma_complex64_t*) plasma_tile_addr(A, m, n)) 22 | #define pA_band(m_, n_) &(pA_band[ (m_) + lda_band*((n_)*nb )]) 23 | /**********************************************************************//** 24 | * Parallel copy of a band matrix from full nxn tile storage to compact band 25 | * storage (lda_bandxn). 26 | * NOTE : this function transform the 27 | * Lower/Upper Tile band matrix to LOWER Band storage matrix. For 28 | * Lower it copy it directly. For Upper it conjtransposed during the 29 | * copy. 30 | * */ 31 | void plasma_pzgecpy_tile2lapack_band(plasma_enum_t uplo, plasma_desc_t A, 32 | plasma_complex64_t *pA_band, int lda_band, 33 | plasma_sequence_t *sequence, plasma_request_t *request) 34 | { 35 | // Return if failed sequence. 36 | if (sequence->status != PlasmaSuccess) 37 | return; 38 | 39 | /*============================================= 40 | * NOTE : 41 | * this function transforms the Lower/Upper Tile 42 | * band matrix to LOWER Band storage matrix. 43 | * For Lower it copy it directly. 44 | * For Upper it conjtransposed during the copy. 45 | *=============================================*/ 46 | 47 | int nb = A.mb; 48 | int ldx = lda_band - 1; 49 | 50 | // copy Lower to Lower 51 | if ( uplo == PlasmaLower ) { 52 | for (int j = 0; j < imin(A.mt, A.nt); j++) { 53 | int mvaj = plasma_tile_mview(A, j); 54 | int nvaj = plasma_tile_nview(A, j); 55 | int ldaj = plasma_tile_mmain(A, j); 56 | 57 | plasma_core_omp_zlacpy(PlasmaLower, PlasmaNoTrans, 58 | mvaj, nvaj, 59 | A(j, j), ldaj, pA_band(0, j), ldx, 60 | sequence, request); 61 | 62 | if (j< imin(A.mt, A.nt)-1 ) { 63 | mvaj = plasma_tile_mview(A, j+1); 64 | ldaj = plasma_tile_mmain(A, j+1); 65 | plasma_core_omp_zlacpy(PlasmaUpper, PlasmaNoTrans, 66 | mvaj, nvaj, 67 | A(j+1, j), ldaj, pA_band(nb, j), ldx, 68 | sequence, request); 69 | } 70 | } 71 | } 72 | //Mawussi: This comment is misleading : I 73 | // think it is Upper to Upper 74 | // conjtranspose Upper when copying it to Lower 75 | else if ( uplo == PlasmaUpper ) { 76 | for (int j = 0; j < imin(A.mt, A.nt); j++) { 77 | int mvaj = plasma_tile_mview(A, j); 78 | int nvaj = plasma_tile_nview(A, j); 79 | int ldaj = plasma_tile_mmain(A, j); 80 | 81 | plasma_core_omp_zlacpy(PlasmaUpper, PlasmaNoTrans, 82 | mvaj, nvaj, 83 | A(j, j), ldaj, pA_band(nb, j), ldx, 84 | sequence, request); 85 | 86 | if (j s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | #define T(m, n) (plasma_complex64_t*)plasma_tile_addr(T, m, n) 23 | 24 | /***************************************************************************//** 25 | * Parallel tile LQ factorization - dynamic scheduling 26 | * @see plasma_omp_zgelqf 27 | **/ 28 | void plasma_pzgelqf(plasma_desc_t A, plasma_desc_t T, 29 | plasma_workspace_t work, 30 | plasma_sequence_t *sequence, plasma_request_t *request) 31 | { 32 | // Return if failed sequence. 33 | if (sequence->status != PlasmaSuccess) 34 | return; 35 | 36 | // Set inner blocking from the T tile row-dimension. 37 | int ib = T.mb; 38 | 39 | for (int k = 0; k < imin(A.mt, A.nt); k++) { 40 | int mvak = plasma_tile_mview(A, k); 41 | int nvak = plasma_tile_nview(A, k); 42 | int ldak = plasma_tile_mmain(A, k); 43 | plasma_core_omp_zgelqt( 44 | mvak, nvak, ib, 45 | A(k, k), ldak, 46 | T(k, k), T.mb, 47 | work, 48 | sequence, request); 49 | 50 | for (int m = k+1; m < A.mt; m++) { 51 | int mvam = plasma_tile_mview(A, m); 52 | int ldam = plasma_tile_mmain(A, m); 53 | plasma_core_omp_zunmlq( 54 | PlasmaRight, Plasma_ConjTrans, 55 | mvam, nvak, imin(mvak, nvak), ib, 56 | A(k, k), ldak, 57 | T(k, k), T.mb, 58 | A(m, k), ldam, 59 | work, 60 | sequence, request); 61 | } 62 | for (int n = k+1; n < A.nt; n++) { 63 | int nvan = plasma_tile_nview(A, n); 64 | plasma_core_omp_ztslqt( 65 | mvak, nvan, ib, 66 | A(k, k), ldak, 67 | A(k, n), ldak, 68 | T(k, n), T.mb, 69 | work, 70 | sequence, request); 71 | 72 | for (int m = k+1; m < A.mt; m++) { 73 | int mvam = plasma_tile_mview(A, m); 74 | int ldam = plasma_tile_mmain(A, m); 75 | plasma_core_omp_ztsmlq( 76 | PlasmaRight, Plasma_ConjTrans, 77 | mvam, A.nb, mvam, nvan, mvak, ib, 78 | A(m, k), ldam, 79 | A(m, n), ldam, 80 | A(k, n), ldak, 81 | T(k, n), T.mb, 82 | work, 83 | sequence, request); 84 | } 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /compute/pzgeqrf.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | #define T(m, n) (plasma_complex64_t*)plasma_tile_addr(T, m, n) 23 | 24 | /***************************************************************************//** 25 | * Parallel tile QR factorization - dynamic scheduling 26 | * @see plasma_omp_zgeqrf 27 | **/ 28 | void plasma_pzgeqrf(plasma_desc_t A, plasma_desc_t T, 29 | plasma_workspace_t work, 30 | plasma_sequence_t *sequence, plasma_request_t *request) 31 | { 32 | // Return if failed sequence. 33 | if (sequence->status != PlasmaSuccess) 34 | return; 35 | 36 | // Set inner blocking from the T tile row-dimension. 37 | int ib = T.mb; 38 | 39 | for (int k = 0; k < imin(A.mt, A.nt); k++) { 40 | int mvak = plasma_tile_mview(A, k); 41 | int nvak = plasma_tile_nview(A, k); 42 | int ldak = plasma_tile_mmain(A, k); 43 | plasma_core_omp_zgeqrt( 44 | mvak, nvak, ib, 45 | A(k, k), ldak, 46 | T(k, k), T.mb, 47 | work, 48 | sequence, request); 49 | 50 | for (int n = k+1; n < A.nt; n++) { 51 | int nvan = plasma_tile_nview(A, n); 52 | plasma_core_omp_zunmqr( 53 | PlasmaLeft, Plasma_ConjTrans, 54 | mvak, nvan, imin(mvak, nvak), ib, 55 | A(k, k), ldak, 56 | T(k, k), T.mb, 57 | A(k, n), ldak, 58 | work, 59 | sequence, request); 60 | } 61 | for (int m = k+1; m < A.mt; m++) { 62 | int mvam = plasma_tile_mview(A, m); 63 | int ldam = plasma_tile_mmain(A, m); 64 | plasma_core_omp_ztsqrt( 65 | mvam, nvak, ib, 66 | A(k, k), ldak, 67 | A(m, k), ldam, 68 | T(m, k), T.mb, 69 | work, 70 | sequence, request); 71 | 72 | for (int n = k+1; n < A.nt; n++) { 73 | int nvan = plasma_tile_nview(A, n); 74 | plasma_core_omp_ztsmqr( 75 | PlasmaLeft, Plasma_ConjTrans, 76 | A.mb, nvan, mvam, nvan, nvak, ib, 77 | A(k, n), ldak, 78 | A(m, n), ldam, 79 | A(m, k), ldam, 80 | T(m, k), T.mb, 81 | work, 82 | sequence, request); 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /compute/pzgetri_aux.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | #define W(m) (plasma_complex64_t*)plasma_tile_addr(W, m, 0) 23 | 24 | /***************************************************************************//** 25 | * Parallel zgetri auxrialiry routine - dynamic scheduling 26 | **/ 27 | void plasma_pzgetri_aux(plasma_desc_t A, plasma_desc_t W, 28 | plasma_sequence_t *sequence, plasma_request_t *request) 29 | { 30 | if (sequence->status != PlasmaSuccess) 31 | return; 32 | 33 | for (int k = A.mt-1; k >= 0; k--) { 34 | int mvak = plasma_tile_mview(A, k); 35 | int nvak = plasma_tile_nview(A, k); 36 | 37 | int ldak = plasma_tile_mmain(A, k); 38 | int ldakn= plasma_tile_mmain(A, k); 39 | int ldwk = plasma_tile_mmain(W, k); 40 | 41 | // copy L(k, k) into W(k) 42 | plasma_core_omp_zlacpy( 43 | PlasmaLower, PlasmaNoTrans, 44 | mvak, nvak, 45 | A(k, k), ldak, W(k), ldwk, 46 | sequence, request ); 47 | // zero strictly-lower part of U(k, k) 48 | plasma_core_omp_zlaset( 49 | PlasmaLower, 50 | ldak, ldakn, 1, 0, 51 | nvak-1, nvak-1, 52 | 0.0, 0.0, A(k, k)); 53 | 54 | for (int m = k+1; m < A.mt; m++) { 55 | int mvam = plasma_tile_mview(A, m); 56 | int ldam = plasma_tile_mmain(A, m); 57 | int ldwm = plasma_tile_mmain(W, m); 58 | // copy L(m, k) to W(m) 59 | plasma_core_omp_zlacpy( 60 | PlasmaGeneral, PlasmaNoTrans, 61 | mvam, nvak, 62 | A(m, k), ldam, W(m), ldwm, 63 | sequence, request ); 64 | // zero U(m, k) 65 | plasma_core_omp_zlaset( 66 | PlasmaGeneral, 67 | ldam, ldakn, 0, 0, 68 | mvam, nvak, 69 | 0.0, 0.0, A(m, k)); 70 | } 71 | 72 | // update A(:, k) = A(:, k)-A(:, k+1:nt)*L(k+1:nt, k) 73 | for (int m = 0; m < A.mt; m++) { 74 | int mvam = plasma_tile_mview(A, m); 75 | int ldam = plasma_tile_mmain(A, m); 76 | for (int n = k+1; n < A.nt; n++) { 77 | int nvan = plasma_tile_nview(A, n); 78 | int ldwn = plasma_tile_mmain(W, n); 79 | plasma_core_omp_zgemm( 80 | PlasmaNoTrans, PlasmaNoTrans, 81 | mvam, nvak, nvan, 82 | -1.0, A(m, n), ldam, 83 | W( n ), ldwn, 84 | 1.0, A(m, k), ldam, 85 | sequence, request); 86 | } 87 | } 88 | 89 | // compute A(:, k) = A(:, k) L(k, k)^{-1} 90 | for (int m = 0; m < A.mt; m++) { 91 | int mvam = plasma_tile_mview(A, m); 92 | int ldam = plasma_tile_mmain(A, m); 93 | plasma_core_omp_ztrsm( 94 | PlasmaRight, PlasmaLower, 95 | PlasmaNoTrans, PlasmaUnit, 96 | mvam, nvak, 97 | 1.0, W( k ), ldwk, 98 | A( m, k ),ldam, 99 | sequence, request ); 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /compute/pzhecpy_tile2lapack_band.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include "plasma_core_blas.h" 20 | 21 | #define A(i_, j_) (plasma_complex64_t*) plasma_tile_addr(A, i_, j_) 22 | #define Aband(i_, j_) &(Aband[ (i_)*nb + lda_band*((j_)*nb) ]) 23 | 24 | /***************************************************************************//** 25 | * Parallel copy of a Hermitian band matrix, with bandwidth of nb (1 tile), 26 | * from full n-by-n tile storage to compact band storage (lda_band-by-n). 27 | * As this function is internal and the space is the 28 | * same for either Lower or Upper, it ALWAYS converts to lower band and 29 | * then the bulge chasing will always work with a lower band matrix. 30 | **/ 31 | void plasma_pzhecpy_tile2lapack_band( 32 | plasma_enum_t uplo, 33 | plasma_desc_t A, 34 | plasma_complex64_t *Aband, int lda_band, 35 | plasma_sequence_t *sequence, plasma_request_t *request) 36 | { 37 | // Return if failed sequence. 38 | if (sequence->status != PlasmaSuccess) 39 | return; 40 | 41 | int nb = A.mb; 42 | int ldx = lda_band - 1; 43 | int minmn = imin( A.mt, A.nt ); 44 | 45 | //============================================= 46 | // NOTE : 47 | // this function transform the Lower/Upper Tile 48 | // band matrix to LOWER Band storage matrix. 49 | // For Lower it copies it directly. 50 | // For Upper it is conjugate-transposed during the copy. 51 | //============================================= 52 | if (uplo == PlasmaLower) { 53 | // copy Lower to Lower 54 | for (int j = 0; j < minmn; ++j) { 55 | int mvaj = plasma_tile_mview(A, j); 56 | int nvaj = plasma_tile_nview(A, j); 57 | int ldaj = plasma_tile_mmain(A, j); 58 | 59 | plasma_core_omp_zlacpy( 60 | PlasmaLower, PlasmaNoTrans, 61 | mvaj, nvaj, 62 | A(j, j), ldaj, Aband(0, j), ldx, 63 | sequence, request); 64 | 65 | if (j < minmn - 1) { 66 | mvaj = plasma_tile_mview(A, j+1); 67 | ldaj = plasma_tile_mmain(A, j+1); 68 | plasma_core_omp_zlacpy( 69 | PlasmaUpper, PlasmaNoTrans, 70 | mvaj, nvaj, 71 | A(j+1, j), ldaj, Aband(1, j), ldx, 72 | sequence, request); 73 | } 74 | } 75 | } 76 | else if (uplo == PlasmaUpper) { 77 | // conj-transpose Upper when copying it to Lower 78 | for (int j = 0; j < minmn; ++j) { 79 | int mvaj = plasma_tile_mview(A, j); 80 | int nvaj = plasma_tile_nview(A, j); 81 | int ldaj = plasma_tile_mmain(A, j); 82 | 83 | plasma_core_omp_zlacpy( 84 | PlasmaUpper, PlasmaConjTrans, 85 | mvaj, nvaj, 86 | A(j, j), ldaj, Aband(0, j), ldx, 87 | sequence, request); 88 | 89 | if (j < minmn - 1) { 90 | nvaj = plasma_tile_nview(A, j+1); 91 | plasma_core_omp_zlacpy( 92 | PlasmaLower, PlasmaConjTrans, 93 | mvaj, nvaj, 94 | A(j, j+1), ldaj, Aband(1, j), ldx, 95 | sequence, request); 96 | } 97 | } 98 | } 99 | } 100 | 101 | #undef Aband 102 | #undef A 103 | -------------------------------------------------------------------------------- /compute/pzlag2c.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_descriptor.h" 15 | #include "plasma_types.h" 16 | #include "plasma_internal_zc.h" 17 | #include 18 | 19 | /******************************************************************************/ 20 | static inline int imin(int a, int b) 21 | { 22 | if (a < b) 23 | return a; 24 | else 25 | return b; 26 | } 27 | 28 | /******************************************************************************/ 29 | static inline int imax(int a, int b) 30 | { 31 | if (a > b) 32 | return a; 33 | else 34 | return b; 35 | } 36 | 37 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr( A, m, n) 38 | #define As(m, n) (plasma_complex32_t*)plasma_tile_addr(As, m, n) 39 | 40 | /***************************************************************************//** 41 | * Parallel tile conversion of matrix precision from double complex to 42 | * single complex. 43 | * @see plasma_omp_zlag2c 44 | * 45 | * If A and As are general band matrix they must have the same specs. 46 | ******************************************************************************/ 47 | void plasma_pzlag2c(plasma_desc_t A, plasma_desc_t As, 48 | plasma_sequence_t *sequence, plasma_request_t *request) 49 | { 50 | // Return if failed sequence. 51 | if (sequence->status != PlasmaSuccess) 52 | return; 53 | if (A.type == PlasmaGeneral && As.type == PlasmaGeneral) { 54 | for (int m = 0; m < A.mt; m++) { 55 | int am = plasma_tile_mview(A, m); 56 | int lda = plasma_tile_mmain(A, m); 57 | int ldb = plasma_tile_mmain(As, m); 58 | for (int n = 0; n < A.nt; n++) { 59 | int an = plasma_tile_nview(A, n); 60 | plasma_core_omp_zlag2c( 61 | am, an, 62 | A(m, n), lda, 63 | As(m, n), ldb, 64 | sequence, request); 65 | } 66 | } 67 | } 68 | else if (A.type == PlasmaGeneralBand && 69 | As.type == PlasmaGeneralBand) { 70 | for (int n = 0; n < A.nt; n++ ) { 71 | int nvan = plasma_tile_nview(A, n); 72 | int m_start = (imax(0, n*A.nb-A.ku)) / A.nb; 73 | int m_end = (imin(A.m-1, (n+1)*A.nb+A.kl-1)) / A.nb; 74 | for (int m = m_start; m <= m_end; m++) { 75 | int ldam = plasma_tile_mmain_band(A, m, n); 76 | int mvam = plasma_tile_mview(A, m); 77 | plasma_core_omp_zlag2c( 78 | mvam, nvan, 79 | A(m, n), ldam, 80 | As(m, n), ldam, 81 | sequence, request); 82 | } 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /compute/pzlascl.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_descriptor.h" 15 | #include "plasma_internal.h" 16 | #include "plasma_types.h" 17 | #include 18 | 19 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 20 | 21 | /******************************************************************************/ 22 | void plasma_pzlascl(plasma_enum_t uplo, 23 | double cfrom, double cto, 24 | plasma_desc_t A, 25 | plasma_sequence_t *sequence, plasma_request_t *request) 26 | { 27 | // Return if failed sequence. 28 | if (sequence->status != PlasmaSuccess) 29 | return; 30 | 31 | switch (uplo) { 32 | //============== 33 | // PlasmaUpper 34 | //============== 35 | case PlasmaUpper: 36 | for (int m = 0; m < A.mt; m++) { 37 | int mvam = plasma_tile_mview(A, m); 38 | int ldam = plasma_tile_mmain(A, m); 39 | if (m < A.nt) { 40 | int nvam = plasma_tile_nview(A, m); 41 | plasma_core_omp_zlascl( 42 | PlasmaUpper, 43 | cfrom, cto, 44 | mvam, nvam, 45 | A(m, m), ldam, 46 | sequence, request); 47 | } 48 | for (int n = m+1; n < A.nt; n++) { 49 | int nvan = plasma_tile_nview(A, n); 50 | plasma_core_omp_zlascl( 51 | PlasmaGeneral, 52 | cfrom, cto, 53 | mvam, nvan, 54 | A(m, n), ldam, 55 | sequence, request); 56 | } 57 | } 58 | break; 59 | //============== 60 | // PlasmaLower 61 | //============== 62 | case PlasmaLower: 63 | for (int m = 0; m < A.mt; m++) { 64 | int mvam = plasma_tile_mview(A, m); 65 | int ldam = plasma_tile_mmain(A, m); 66 | if (m < A.nt) { 67 | int nvam = plasma_tile_nview(A, m); 68 | plasma_core_omp_zlascl( 69 | PlasmaLower, 70 | cfrom, cto, 71 | mvam, nvam, 72 | A(m, m), ldam, 73 | sequence, request); 74 | } 75 | for (int n = 0; n < imin(m, A.nt); n++) { 76 | int nvan = plasma_tile_nview(A, n); 77 | plasma_core_omp_zlascl( 78 | PlasmaGeneral, 79 | cfrom, cto, 80 | mvam, nvan, 81 | A(m, n), ldam, 82 | sequence, request); 83 | } 84 | } 85 | break; 86 | //================ 87 | // PlasmaGeneral 88 | //================ 89 | case PlasmaGeneral: 90 | default: 91 | for (int m = 0; m < A.mt; m++) { 92 | int mvam = plasma_tile_mview(A, m); 93 | int ldam = plasma_tile_mmain(A, m); 94 | for (int n = 0; n < A.nt; n++) { 95 | int nvan = plasma_tile_nview(A, n); 96 | plasma_core_omp_zlascl( 97 | PlasmaGeneral, 98 | cfrom, cto, 99 | mvam, nvan, 100 | A(m, n), ldam, 101 | sequence, request); 102 | } 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /compute/pzlaset.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) ((plasma_complex64_t*)plasma_tile_addr(A, m, n)) 22 | 23 | /***************************************************************************//** 24 | * Initializes the matrix A to beta on the diagonal and alpha on the 25 | * offdiagonals. Applies alpha correctly for any shape of the submatrix 26 | * described by A, but applies beta correctly only for submatrices aligned 27 | * with the diagonal of the main matrix (A.i = A.j). 28 | **/ 29 | void plasma_pzlaset(plasma_enum_t uplo, 30 | plasma_complex64_t alpha, plasma_complex64_t beta, 31 | plasma_desc_t A, 32 | plasma_sequence_t *sequence, plasma_request_t *request) 33 | { 34 | // Return if failed sequence. 35 | if (sequence->status != PlasmaSuccess) 36 | return; 37 | 38 | int i, j; 39 | int m, n; 40 | 41 | int lm1 = A.gm/A.mb; 42 | int ln1 = A.gn/A.nb; 43 | 44 | for (i = 0; i < A.mt; i++) { 45 | if (i == 0 && i == A.mt-1) 46 | m = A.m; 47 | else if (i == 0) 48 | m = A.mb-A.i%A.mb; 49 | else if (i == A.mt-1) 50 | m = (A.i+A.m+A.mb-1)%A.mb+1; 51 | else 52 | m = A.mb; 53 | 54 | for (j = 0; j < A.nt; j++) { 55 | if (j == 0 && j == A.nt-1) 56 | n = A.n; 57 | else if (j == 0) 58 | n = A.nb-A.j%A.nb; 59 | else if (j == A.nt-1) 60 | n = (A.j+A.n+A.nb-1)%A.nb+1; 61 | else 62 | n = A.nb; 63 | 64 | if (uplo == PlasmaGeneral || 65 | (uplo == PlasmaLower && i >= j) || 66 | (uplo == PlasmaUpper && i <= j)) 67 | plasma_core_omp_zlaset(i == j ? uplo : PlasmaGeneral, 68 | A.i/A.mb+i == lm1 ? A.gm-lm1*A.mb : A.mb, 69 | A.j/A.nb+j == ln1 ? A.gn-ln1*A.nb : A.nb, 70 | i == 0 ? A.i%A.mb : 0, 71 | j == 0 ? A.j%A.nb : 0, 72 | m, 73 | n, 74 | alpha, 75 | i != j ? alpha : beta, 76 | A(i, j)); 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /compute/pzpb2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define offset \ 22 | (A.uplo == PlasmaUpper ? A.ku : (A.uplo == PlasmaLower ? 0 : A.ku+A.kl)) 23 | #define bandA(m, n) (&(pA[lda*(A.nb*(n)) + offset + A.mb*((m)-(n))])) 24 | #define tileA(m, n) ((plasma_complex64_t*)plasma_tile_addr(A, (m), (n))) 25 | 26 | /******************************************************************************/ 27 | void plasma_pzpb2desc(plasma_complex64_t *pA, int lda, 28 | plasma_desc_t A, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Return if failed sequence. 33 | if (sequence->status != PlasmaSuccess) 34 | return; 35 | 36 | for (int n = 0; n < A.nt; n++) 37 | { 38 | int m_start, m_end; 39 | if (A.uplo == PlasmaGeneral) { 40 | m_start = (imax(0, n*A.nb-A.ku-A.kl)) / A.nb; 41 | m_end = (imin(A.m-1, (n+1)*A.nb+A.kl-1)) / A.nb; 42 | } 43 | else if (A.uplo == PlasmaUpper) { 44 | m_start = (imax(0, n*A.nb-A.ku)) / A.nb; 45 | m_end = (imin(A.m-1, (n+1)*A.nb-1)) / A.nb; 46 | } 47 | else { 48 | m_start = (imax(0, n*A.nb)) / A.nb; 49 | m_end = (imin(A.m-1, (n+1)*A.nb+A.kl-1)) / A.nb; 50 | } 51 | for (int m = m_start; m <= m_end; m++) 52 | { 53 | int mb = imin(A.mb, A.m-m*A.mb); 54 | int nb = imin(A.nb, A.n-n*A.nb); 55 | plasma_core_omp_zlacpy_lapack2tile_band( 56 | A.uplo, m, n, 57 | mb, nb, A.mb, A.kl, A.ku, 58 | bandA(m, n), lda-1, 59 | tileA(m, n), plasma_tile_mmain_band(A, m, n)); 60 | //tileA(i_start,n), nb*nb, INOUT | GATHERV); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /compute/pztr2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | /******************************************************************************/ 22 | void plasma_pztr2desc(plasma_complex64_t *pA, int lda, 23 | plasma_desc_t A, 24 | plasma_sequence_t *sequence, 25 | plasma_request_t *request) 26 | { 27 | // Return if failed sequence. 28 | if (sequence->status != PlasmaSuccess) 29 | return; 30 | 31 | for (int m = 0; m < A.mt; m++) { 32 | int ldt = plasma_tile_mmain(A, m); 33 | int n_start = (A.type == PlasmaUpper ? m : 0); 34 | int n_end = (A.type == PlasmaUpper ? A.nt : m+1); 35 | for (int n = n_start; n < n_end; n++) { 36 | int x1 = n == 0 ? A.j%A.nb : 0; 37 | int y1 = m == 0 ? A.i%A.mb : 0; 38 | int x2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb; 39 | int y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb; 40 | 41 | plasma_complex64_t *f77 = &pA[(size_t)A.nb*lda*n + (size_t)A.mb*m]; 42 | plasma_complex64_t *bdl = (plasma_complex64_t*)plasma_tile_addr(A, m, n); 43 | 44 | plasma_core_omp_zlacpy(PlasmaGeneral, PlasmaNoTrans, 45 | y2-y1, x2-x1, 46 | &(f77[x1*lda+y1]), lda, 47 | &(bdl[x1*A.nb+y1]), ldt, 48 | sequence, request); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /compute/pzunglq.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | #define T(m, n) (plasma_complex64_t*)plasma_tile_addr(T, m, n) 23 | #define Q(m, n) (plasma_complex64_t*)plasma_tile_addr(Q, m, n) 24 | 25 | /***************************************************************************//** 26 | * Parallel construction of Q using tile V (application to identity) 27 | **/ 28 | void plasma_pzunglq(plasma_desc_t A, plasma_desc_t T, plasma_desc_t Q, 29 | plasma_workspace_t work, 30 | plasma_sequence_t *sequence, plasma_request_t *request) 31 | { 32 | // Return if failed sequence. 33 | if (sequence->status != PlasmaSuccess) 34 | return; 35 | 36 | // Set inner blocking from the T tile row-dimension. 37 | int ib = T.mb; 38 | 39 | for (int k = imin(A.mt, A.nt)-1; k >= 0; k--) { 40 | int mvak = plasma_tile_mview(A, k); 41 | int nvak = plasma_tile_nview(A, k); 42 | int nvqk = plasma_tile_nview(Q, k); 43 | int ldak = plasma_tile_mmain(A, k); 44 | for (int n = Q.nt-1; n > k; n--) { 45 | int nvqn = plasma_tile_nview(Q, n); 46 | for (int m = k; m < Q.mt; m++) { 47 | int mvqm = plasma_tile_mview(Q, m); 48 | int ldqm = plasma_tile_mmain(Q, m); 49 | plasma_core_omp_ztsmlq( 50 | PlasmaRight, PlasmaNoTrans, 51 | mvqm, Q.nb, mvqm, nvqn, mvak, ib, 52 | Q(m, k), ldqm, 53 | Q(m, n), ldqm, 54 | A(k, n), ldak, 55 | T(k, n), T.mb, 56 | work, 57 | sequence, request); 58 | } 59 | } 60 | for (int m = k; m < Q.mt; m++) { 61 | int mvqm = plasma_tile_mview(Q, m); 62 | int ldqm = plasma_tile_mmain(Q, m); 63 | plasma_core_omp_zunmlq( 64 | PlasmaRight, PlasmaNoTrans, 65 | mvqm, nvqk, imin(nvak, mvak), ib, 66 | A(k, k), ldak, 67 | T(k, k), T.mb, 68 | Q(m, k), ldqm, 69 | work, 70 | sequence, request); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /compute/pzungqr.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_context.h" 15 | #include "plasma_descriptor.h" 16 | #include "plasma_internal.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | #include 20 | 21 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 22 | #define T(m, n) (plasma_complex64_t*)plasma_tile_addr(T, m, n) 23 | #define Q(m, n) (plasma_complex64_t*)plasma_tile_addr(Q, m, n) 24 | 25 | /***************************************************************************//** 26 | * Parallel construction of Q using tile V (application to identity) 27 | **/ 28 | void plasma_pzungqr(plasma_desc_t A, plasma_desc_t T, plasma_desc_t Q, 29 | plasma_workspace_t work, 30 | plasma_sequence_t *sequence, plasma_request_t *request) 31 | { 32 | // Return if failed sequence. 33 | if (sequence->status != PlasmaSuccess) 34 | return; 35 | 36 | // Set inner blocking from the T tile row-dimension. 37 | int ib = T.mb; 38 | 39 | for (int k = imin(A.mt, A.nt)-1; k >= 0; k--) { 40 | int mvak = plasma_tile_mview(A, k); 41 | int nvak = plasma_tile_nview(A, k); 42 | int mvqk = plasma_tile_mview(Q, k); 43 | int ldak = plasma_tile_mmain(A, k); 44 | int ldqk = plasma_tile_mmain(Q, k); 45 | for (int m = Q.mt - 1; m > k; m--) { 46 | int mvqm = plasma_tile_mview(Q, m); 47 | int ldam = plasma_tile_mmain(A, m); 48 | int ldqm = plasma_tile_mmain(Q, m); 49 | for (int n = k; n < Q.nt; n++) { 50 | int nvqn = plasma_tile_nview(Q, n); 51 | plasma_core_omp_ztsmqr( 52 | PlasmaLeft, PlasmaNoTrans, 53 | Q.mb, nvqn, mvqm, nvqn, nvak, ib, 54 | Q(k, n), ldqk, 55 | Q(m, n), ldqm, 56 | A(m, k), ldam, 57 | T(m, k), T.mb, 58 | work, 59 | sequence, request); 60 | } 61 | } 62 | for (int n = k; n < Q.nt; n++) { 63 | int nvqn = plasma_tile_nview(Q, n); 64 | plasma_core_omp_zunmqr( 65 | PlasmaLeft, PlasmaNoTrans, 66 | mvqk, nvqn, imin(nvak, mvak), ib, 67 | A(k, k), ldak, 68 | T(k, k), T.mb, 69 | Q(k, n), ldqk, 70 | work, 71 | sequence, request); 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /compute/zdesc2ge.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_ccrb2cm 23 | 24 | Convert tiled (CCRB) to column-major (CM) matrix layout. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_zdesc2ge(plasma_desc_t A, 28 | plasma_complex64_t *pA, int lda, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (plasma_desc_check(A) != PlasmaSuccess) { 42 | plasma_error("invalid A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (pA == NULL) { 47 | plasma_error("NULL A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | 66 | // Call the parallel function. 67 | plasma_pzdesc2ge(A, pA, lda, sequence, request); 68 | } 69 | -------------------------------------------------------------------------------- /compute/zdesc2pb.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_ccrb2cm 23 | 24 | Convert tiled (CCRB) to column-major (CM) layout for a band matrix. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_zdesc2pb(plasma_desc_t A, 28 | plasma_complex64_t *pA, int lda, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (plasma_desc_check(A) != PlasmaSuccess) { 42 | plasma_error("invalid A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (pA == NULL) { 47 | plasma_error("NULL A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | 66 | // Call the parallel function. 67 | plasma_pzdesc2pb(A, pA, lda, sequence, request); 68 | } 69 | -------------------------------------------------------------------------------- /compute/zdesc2tr.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_ccrb2cm 23 | 24 | Convert tiled (CCRB) to column-major (CM) matrix layout. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_zdesc2tr(plasma_desc_t A, 28 | plasma_complex64_t *pA, int lda, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (plasma_desc_check(A) != PlasmaSuccess) { 42 | plasma_error("invalid A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (pA == NULL) { 47 | plasma_error("NULL A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | 66 | // Call the parallel function. 67 | plasma_pzdesc2tr(A, pA, lda, sequence, request); 68 | } 69 | -------------------------------------------------------------------------------- /compute/zgb2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_cm2ccrb 23 | 24 | Convert column-major (CM) to tiled (CCRB) matrix layout. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_zgb2desc(plasma_complex64_t *pA, int lda, 28 | plasma_desc_t A, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (pA == NULL) { 42 | plasma_error("NULL A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (plasma_desc_check(A) != PlasmaSuccess) { 47 | plasma_error("invalid A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | // Call the parallel function. 66 | if (A.type == PlasmaGeneral) { 67 | plasma_pzge2desc(pA, lda, A, sequence, request); 68 | } 69 | else{ 70 | plasma_pzgb2desc(pA, lda, A, sequence, request); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /compute/zge2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_cm2ccrb 23 | 24 | Convert column-major (CM) to tiled (CCRB) matrix layout. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_zge2desc(plasma_complex64_t *pA, int lda, 28 | plasma_desc_t A, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (pA == NULL) { 42 | plasma_error("NULL A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (plasma_desc_check(A) != PlasmaSuccess) { 47 | plasma_error("invalid A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | 66 | // Call the parallel function. 67 | plasma_pzge2desc(pA, lda, A, sequence, request); 68 | } 69 | -------------------------------------------------------------------------------- /compute/zpb2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_cm2ccrb 23 | 24 | Convert column-major (CM) to tiled (CCRB) layout for a band matrix. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_zpb2desc(plasma_complex64_t *pA, int lda, 28 | plasma_desc_t A, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (pA == NULL) { 42 | plasma_error("NULL A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (plasma_desc_check(A) != PlasmaSuccess) { 47 | plasma_error("invalid A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | 66 | // Call the parallel function. 67 | plasma_pzpb2desc(pA, lda, A, sequence, request); 68 | } 69 | -------------------------------------------------------------------------------- /compute/ztr2desc.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #include "plasma.h" 14 | #include "plasma_async.h" 15 | #include "plasma_context.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_internal.h" 18 | #include "plasma_types.h" 19 | #include "plasma_workspace.h" 20 | 21 | /***************************************************************************//** 22 | @ingroup plasma_cm2ccrb 23 | 24 | Convert column-major (CM) to tiled (CCRB) matrix layout. 25 | Out-of-place. 26 | */ 27 | void plasma_omp_ztr2desc(plasma_complex64_t *pA, int lda, 28 | plasma_desc_t A, 29 | plasma_sequence_t *sequence, 30 | plasma_request_t *request) 31 | { 32 | // Get PLASMA context. 33 | plasma_context_t *plasma = plasma_context_self(); 34 | if (plasma == NULL) { 35 | plasma_error("PLASMA not initialized"); 36 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 37 | return; 38 | } 39 | 40 | // Check input arguments. 41 | if (pA == NULL) { 42 | plasma_error("NULL A"); 43 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 44 | return; 45 | } 46 | if (plasma_desc_check(A) != PlasmaSuccess) { 47 | plasma_error("invalid A"); 48 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 49 | return; 50 | } 51 | if (sequence == NULL) { 52 | plasma_error("NULL sequence"); 53 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 54 | return; 55 | } 56 | if (request == NULL) { 57 | plasma_error("NULL request"); 58 | plasma_request_fail(sequence, request, PlasmaErrorIllegalValue); 59 | return; 60 | } 61 | 62 | // quick return 63 | if (A.m == 0 || A.n == 0) 64 | return; 65 | 66 | // Call the parallel function. 67 | plasma_pztr2desc(pA, lda, A, sequence, request); 68 | } 69 | -------------------------------------------------------------------------------- /control/async.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | 11 | #include "plasma_async.h" 12 | #include "plasma_internal.h" 13 | 14 | #include 15 | 16 | /******************************************************************************/ 17 | int plasma_request_fail(plasma_sequence_t *sequence, 18 | plasma_request_t *request, 19 | int status) 20 | { 21 | sequence->request = request; 22 | sequence->status = status; 23 | request->status = status; 24 | return status; 25 | } 26 | 27 | /******************************************************************************/ 28 | int plasma_request_init(plasma_request_t *request) 29 | { 30 | request->status = PlasmaSuccess; 31 | return PlasmaSuccess; 32 | } 33 | 34 | /******************************************************************************/ 35 | int plasma_sequence_init(plasma_sequence_t *sequence) 36 | { 37 | sequence->status = PlasmaSuccess; 38 | sequence->request = NULL; 39 | return PlasmaSuccess; 40 | } 41 | -------------------------------------------------------------------------------- /control/barrier.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | 11 | #include "plasma_barrier.h" 12 | 13 | /******************************************************************************/ 14 | void plasma_barrier_init(plasma_barrier_t *barrier) 15 | { 16 | barrier->count = 0; 17 | barrier->passed = 0; 18 | } 19 | 20 | /******************************************************************************/ 21 | void plasma_barrier_wait(plasma_barrier_t *barrier, int size) 22 | { 23 | int passed_old = barrier->passed; 24 | 25 | __sync_fetch_and_add(&barrier->count, 1); 26 | if (__sync_bool_compare_and_swap(&barrier->count, size, 0)) 27 | barrier->passed++; 28 | else 29 | while (barrier->passed == passed_old); 30 | } 31 | -------------------------------------------------------------------------------- /control/version.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | 11 | #include "plasma.h" 12 | 13 | /******************************************************************************/ 14 | void plasma_version(int *major, int *minor, int *patch) 15 | { 16 | if (major) *major = PLASMA_VERSION_MAJOR; 17 | if (minor) *minor = PLASMA_VERSION_MINOR; 18 | if (patch) *patch = PLASMA_VERSION_PATCH; 19 | } 20 | -------------------------------------------------------------------------------- /control/workspace.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #include "plasma_workspace.h" 11 | #include "plasma_internal.h" 12 | 13 | #include 14 | 15 | /******************************************************************************/ 16 | int plasma_workspace_create(plasma_workspace_t *workspace, size_t lworkspace, 17 | plasma_enum_t dtyp) 18 | { 19 | // Allocate array of pointers. 20 | #pragma omp parallel 21 | #pragma omp master 22 | { 23 | workspace->nthread = omp_get_num_threads(); 24 | } 25 | workspace->lworkspace = lworkspace; 26 | workspace->dtyp = dtyp; 27 | if ((workspace->spaces = (void**)calloc(workspace->nthread, 28 | sizeof(void*))) == NULL) { 29 | free(workspace->spaces); 30 | plasma_error("malloc() failed"); 31 | return PlasmaErrorOutOfMemory; 32 | } 33 | 34 | // Each thread allocates its workspace. 35 | size_t size = (size_t)lworkspace * plasma_element_size(workspace->dtyp); 36 | int info = PlasmaSuccess; 37 | #pragma omp parallel 38 | { 39 | int tid = omp_get_thread_num(); 40 | if ((workspace->spaces[tid] = (void*)malloc(size)) == NULL) { 41 | info = PlasmaErrorOutOfMemory; 42 | } 43 | } 44 | if (info != PlasmaSuccess) { 45 | plasma_workspace_destroy(workspace); 46 | } 47 | return info; 48 | } 49 | 50 | /******************************************************************************/ 51 | int plasma_workspace_destroy(plasma_workspace_t *workspace) 52 | { 53 | if (workspace->spaces != NULL) { 54 | for (int i = 0; i < workspace->nthread; ++i) { 55 | free(workspace->spaces[i]); 56 | workspace->spaces[i] = NULL; 57 | } 58 | free(workspace->spaces); 59 | workspace->spaces = NULL; 60 | workspace->nthread = 0; 61 | workspace->lworkspace = 0; 62 | } 63 | return PlasmaSuccess; 64 | } 65 | -------------------------------------------------------------------------------- /core_blas/core_clag2z.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | 13 | #include 14 | #include "core_lapack.h" 15 | #include "plasma_types.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_lag2 20 | * 21 | * Converts m-by-n matrix A from single complex to double complex precision. 22 | * 23 | ******************************************************************************* 24 | * 25 | * @param[in] m 26 | * The number of rows of the matrix As. 27 | * m >= 0. 28 | * 29 | * @param[in] n 30 | * The number of columns of the matrix As. 31 | * n >= 0. 32 | * 33 | * @param[in] As 34 | * The ldas-by-n matrix in single complex precision to convert. 35 | * 36 | * @param[in] ldas 37 | * The leading dimension of the matrix As. 38 | * ldas >= max(1,m). 39 | * 40 | * @param[out] A 41 | * On exit, the converted lda-by-n matrix in double complex precision. 42 | * 43 | * @param[in] lda 44 | * The leading dimension of the matrix A. 45 | * lda >= max(1,m). 46 | * 47 | ******************************************************************************/ 48 | __attribute__((weak)) 49 | void plasma_core_clag2z(int m, int n, 50 | plasma_complex32_t *As, int ldas, 51 | plasma_complex64_t *A, int lda) 52 | { 53 | LAPACKE_clag2z_work(LAPACK_COL_MAJOR, m, n, As, ldas, A, lda); 54 | } 55 | 56 | /******************************************************************************/ 57 | void plasma_core_omp_clag2z(int m, int n, 58 | plasma_complex32_t *As, int ldas, 59 | plasma_complex64_t *A, int lda, 60 | plasma_sequence_t *sequence, plasma_request_t *request) 61 | { 62 | #pragma omp task depend(in:As[0:ldas*n]) \ 63 | depend(out:A[0:lda*n]) 64 | { 65 | if (sequence->status == PlasmaSuccess) 66 | plasma_core_clag2z(m, n, As, ldas, A, lda); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /core_blas/core_dcabs1.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c 10 | * 11 | **/ 12 | 13 | #include 14 | 15 | #include 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_cabs1 20 | * 21 | ******************************************************************************* 22 | * 23 | * @param[in] alpha 24 | * The scalar alpha. 25 | * 26 | ******************************************************************************* 27 | * 28 | * @retval Complex 1-norm absolute value: abs(real(alpha)) + abs(imag(alpha)). 29 | * 30 | ******************************************************************************* 31 | * 32 | * @sa plasma_core_scabs1 33 | * 34 | ******************************************************************************/ 35 | double plasma_core_dcabs1(plasma_complex64_t alpha) 36 | { 37 | return fabs(creal(alpha)) + fabs(cimag(alpha)); 38 | } 39 | -------------------------------------------------------------------------------- /core_blas/core_dzamax.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | #include 18 | 19 | /******************************************************************************/ 20 | void plasma_core_omp_dzamax(int colrow, int m, int n, 21 | const plasma_complex64_t *A, int lda, 22 | double *values, 23 | plasma_sequence_t *sequence, plasma_request_t *request) 24 | { 25 | switch (colrow) { 26 | case PlasmaColumnwise: 27 | #pragma omp task depend(in:A[0:lda*n]) \ 28 | depend(out:values[0:n]) 29 | { 30 | if (sequence->status == PlasmaSuccess) { 31 | for (int j = 0; j < n; j++) { 32 | values[j] = plasma_core_dcabs1(A[lda*j]); 33 | for (int i = 1; i < m; i++) { 34 | double tmp = plasma_core_dcabs1(A[lda*j+i]); 35 | if (tmp > values[j]) 36 | values[j] = tmp; 37 | } 38 | } 39 | } 40 | } 41 | break; 42 | case PlasmaRowwise: 43 | #pragma omp task depend(in:A[0:lda*n]) \ 44 | depend(out:values[0:m]) 45 | { 46 | if (sequence->status == PlasmaSuccess) { 47 | for (int i = 0; i < m; i++) 48 | values[i] = plasma_core_dcabs1(A[i]); 49 | 50 | for (int j = 1; j < n; j++) { 51 | for (int i = 0; i < m; i++) { 52 | double tmp = plasma_core_dcabs1(A[lda*j+i]); 53 | if (tmp > values[i]) 54 | values[i] = tmp; 55 | } 56 | } 57 | } 58 | } 59 | break; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /core_blas/core_zgessq.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | 15 | #include 16 | #include "plasma_types.h" 17 | #include "core_lapack.h" 18 | 19 | /******************************************************************************/ 20 | __attribute__((weak)) 21 | void plasma_core_zgessq(int m, int n, 22 | const plasma_complex64_t *A, int lda, 23 | double *scale, double *sumsq) 24 | { 25 | int ione = 1; 26 | for (int j = 0; j < n; j++) { 27 | // TODO: Inline this operation. 28 | LAPACK_zlassq(&m, &A[j*lda], &ione, scale, sumsq); 29 | } 30 | } 31 | 32 | /******************************************************************************/ 33 | void plasma_core_omp_zgessq(int m, int n, 34 | const plasma_complex64_t *A, int lda, 35 | double *scale, double *sumsq, 36 | plasma_sequence_t *sequence, plasma_request_t *request) 37 | { 38 | #pragma omp task depend(in:A[0:lda*n]) \ 39 | depend(out:scale[0:n]) \ 40 | depend(out:sumsq[0:n]) 41 | { 42 | if (sequence->status == PlasmaSuccess) { 43 | *scale = 0.0; 44 | *sumsq = 1.0; 45 | plasma_core_zgessq(m, n, A, lda, scale, sumsq); 46 | } 47 | } 48 | } 49 | 50 | /******************************************************************************/ 51 | void plasma_core_omp_zgessq_aux(int n, 52 | const double *scale, const double *sumsq, 53 | double *value, 54 | plasma_sequence_t *sequence, plasma_request_t *request) 55 | { 56 | #pragma omp task depend(in:scale[0:n]) \ 57 | depend(in:sumsq[0:n]) \ 58 | depend(out:value[0:1]) 59 | { 60 | if (sequence->status == PlasmaSuccess) { 61 | double scl = 0.0; 62 | double sum = 1.0; 63 | for (int i = 0; i < n; i++) { 64 | if (scl < scale[i]) { 65 | sum = sumsq[i] + sum*((scl/scale[i])*(scl/scale[i])); 66 | scl = scale[i]; 67 | } 68 | else if (scl > 0.) { 69 | sum = sum + sumsq[i]*(scale[i]/scl)*(scale[i]/scl); 70 | } 71 | } 72 | *value = scl*sqrt(sum); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /core_blas/core_zgeswp.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_internal.h" 15 | #include "plasma_types.h" 16 | #include "core_lapack.h" 17 | 18 | #define A(m, n) (plasma_complex64_t*)plasma_tile_addr(A, m, n) 19 | 20 | /******************************************************************************/ 21 | __attribute__((weak)) 22 | void plasma_core_zgeswp(plasma_enum_t colrow, 23 | plasma_desc_t A, int k1, int k2, const int *ipiv, int incx) 24 | { 25 | //================ 26 | // PlasmaRowwise 27 | //================ 28 | if (colrow == PlasmaRowwise) { 29 | if (incx > 0) { 30 | for (int m = k1-1; m <= k2-1; m += incx) { 31 | if (ipiv[m]-1 != m) { 32 | int m1 = m; 33 | int m2 = ipiv[m]-1; 34 | 35 | int lda1 = plasma_tile_mmain(A, m1/A.mb); 36 | int lda2 = plasma_tile_mmain(A, m2/A.mb); 37 | 38 | cblas_zswap(A.n, 39 | A(m1/A.mb, 0) + m1%A.mb, lda1, 40 | A(m2/A.mb, 0) + m2%A.mb, lda2); 41 | } 42 | } 43 | } 44 | else { 45 | for (int m = k2-1; m >= k1-1; m += incx) { 46 | if (ipiv[m]-1 != m) { 47 | int m1 = m; 48 | int m2 = ipiv[m]-1; 49 | 50 | int lda1 = plasma_tile_mmain(A, m1/A.mb); 51 | int lda2 = plasma_tile_mmain(A, m2/A.mb); 52 | 53 | cblas_zswap(A.n, 54 | A(m1/A.mb, 0) + m1%A.mb, lda1, 55 | A(m2/A.mb, 0) + m2%A.mb, lda2); 56 | } 57 | } 58 | } 59 | } 60 | //=================== 61 | // PlasmaColumnwise 62 | //=================== 63 | else { 64 | if (incx > 0) { 65 | for (int n = k1-1; n <= k2-1; n += incx) { 66 | if (ipiv[n]-1 != n) { 67 | int n1 = n; 68 | int n2 = ipiv[n]-1; 69 | 70 | int lda0 = plasma_tile_mmain(A, 0); 71 | 72 | cblas_zswap(A.m, 73 | A(0, n1/A.nb) + (n1%A.nb)*lda0, 1, 74 | A(0, n2/A.nb) + (n2%A.nb)*lda0, 1); 75 | } 76 | } 77 | } 78 | else { 79 | for (int n = k2-1; n >= k1-1; n += incx) { 80 | if (ipiv[n]-1 != n) { 81 | int n1 = n; 82 | int n2 = ipiv[n]-1; 83 | 84 | int lda0 = plasma_tile_mmain(A, 0); 85 | 86 | cblas_zswap(A.m, 87 | A(0, n1/A.nb) + (n1%A.nb)*lda0, 1, 88 | A(0, n2/A.nb) + (n2%A.nb)*lda0, 1); 89 | } 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /core_blas/core_zhegst.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_hegst 20 | * 21 | * Reduces a complex Hermitian-definite generalized eigenproblem to standard 22 | * form. 23 | * 24 | * If ITYPE = 1, the problem is A*x = lambda*B*x, 25 | * and A is overwritten by inv(U^H)*A*inv(U) or inv(L)*A*inv(L^H) 26 | * 27 | * If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or 28 | * B*A*x = lambda*x, and A is overwritten by U*A*U^H or L^H*A*L. 29 | * 30 | ******************************************************************************* 31 | * 32 | * @param[in] itype 33 | * = 1: compute inv(U^H)*A*inv(U) or inv(L)*A*inv(L^H); 34 | * = 2 or 3: compute U*A*U^H or L^H*A*L. 35 | * 36 | * @param[in] uplo 37 | * If PlasmaUpper, upper triangle of A is stored and B is factored as 38 | * U^H*U; 39 | * If PlasmaLower, lower triangle of A is stored and B is factored as 40 | * L*L^H. 41 | * 42 | * @param[in] n 43 | * The order of the matrices A and B. N >= 0. 44 | * 45 | * @param[in,out] A 46 | * On entry, the Hermitian matrix A. If UPLO = 'U', the leading 47 | * N-by-N upper triangular part of A contains the upper 48 | * triangular part of the matrix A, and the strictly lower 49 | * triangular part of A is not referenced. If UPLO = 'L', the 50 | * leading N-by-N lower triangular part of A contains the lower 51 | * triangular part of the matrix A, and the strictly upper 52 | * triangular part of A is not referenced. 53 | * 54 | * On exit, if INFO = 0, the transformed matrix, stored in the 55 | * same format as A. 56 | * 57 | * @param[in] lda 58 | * The leading dimension of the array A. LDA >= max(1,N). 59 | * 60 | * @param[in,out] B 61 | * The triangular factor from the Cholesky factorization of B, 62 | * as returned by plasma_core_zpotrf. 63 | * 64 | * @param[in] ldb 65 | * The leading dimension of the array B. LDB >= max(1,N). 66 | * 67 | ******************************************************************************/ 68 | __attribute__((weak)) 69 | int plasma_core_zhegst(int itype, plasma_enum_t uplo, 70 | int n, 71 | plasma_complex64_t *A, int lda, 72 | plasma_complex64_t *B, int ldb) 73 | { 74 | int info = LAPACKE_zhegst_work( 75 | LAPACK_COL_MAJOR, 76 | itype, 77 | lapack_const(uplo), 78 | n, A, lda, B, ldb ); 79 | return info; 80 | } 81 | 82 | /******************************************************************************/ 83 | void plasma_core_omp_zhegst(int itype, plasma_enum_t uplo, 84 | int n, 85 | plasma_complex64_t *A, int lda, 86 | plasma_complex64_t *B, int ldb, 87 | plasma_sequence_t *sequence, 88 | plasma_request_t *request) 89 | { 90 | #pragma omp task depend(inout:A[0:lda*n]) \ 91 | depend(in:B[0:ldb*n]) 92 | { 93 | if (sequence->status == PlasmaSuccess) 94 | plasma_core_zhegst(itype, uplo, 95 | n, 96 | A, lda, 97 | B, ldb); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /core_blas/core_zherk.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_herk 20 | * 21 | * Performs one of the Hermitian rank k operations 22 | * 23 | * \f[ C = \alpha A \times A^H + \beta C, \f] 24 | * or 25 | * \f[ C = \alpha A^H \times A + \beta C, \f] 26 | * 27 | * where alpha and beta are real scalars, C is an n-by-n Hermitian 28 | * matrix, and A is an n-by-k matrix in the first case and a k-by-n 29 | * matrix in the second case. 30 | * 31 | ******************************************************************************* 32 | * 33 | * @param[in] uplo 34 | * - PlasmaUpper: Upper triangle of C is stored; 35 | * - PlasmaLower: Lower triangle of C is stored. 36 | * 37 | * @param[in] trans 38 | * - PlasmaNoTrans: \f[ C = \alpha A \times A^H + \beta C; \f] 39 | * - PlasmaConjTrans: \f[ C = \alpha A^H \times A + \beta C. \f] 40 | * 41 | * @param[in] n 42 | * The order of the matrix C. n >= 0. 43 | * 44 | * @param[in] k 45 | * If trans = PlasmaNoTrans, number of columns of the A matrix; 46 | * if trans = PlasmaConjTrans, number of rows of the A matrix. 47 | * 48 | * @param[in] alpha 49 | * The scalar alpha. 50 | * 51 | * @param[in] A 52 | * A is an lda-by-ka matrix. 53 | * If trans = PlasmaNoTrans, ka = k; 54 | * if trans = PlasmaConjTrans, ka = n. 55 | * 56 | * @param[in] lda 57 | * The leading dimension of the array A. 58 | * If trans = PlasmaNoTrans, lda >= max(1, n); 59 | * if trans = PlasmaConjTrans, lda >= max(1, k). 60 | * 61 | * @param[in] beta 62 | * The scalar beta. 63 | * 64 | * @param[in,out] C 65 | * C is an ldc-by-n matrix. 66 | * On exit, the uplo part of the matrix is overwritten 67 | * by the uplo part of the updated matrix. 68 | * 69 | * @param[in] ldc 70 | * The leading dimension of the array C. ldc >= max(1, n). 71 | * 72 | ******************************************************************************/ 73 | __attribute__((weak)) 74 | void plasma_core_zherk(plasma_enum_t uplo, plasma_enum_t trans, 75 | int n, int k, 76 | double alpha, const plasma_complex64_t *A, int lda, 77 | double beta, plasma_complex64_t *C, int ldc) 78 | { 79 | cblas_zherk(CblasColMajor, 80 | (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, 81 | n, k, 82 | alpha, A, lda, 83 | beta, C, ldc); 84 | } 85 | 86 | /******************************************************************************/ 87 | void plasma_core_omp_zherk(plasma_enum_t uplo, plasma_enum_t trans, 88 | int n, int k, 89 | double alpha, const plasma_complex64_t *A, int lda, 90 | double beta, plasma_complex64_t *C, int ldc, 91 | plasma_sequence_t *sequence, plasma_request_t *request) 92 | { 93 | int ak; 94 | if (trans == PlasmaNoTrans) 95 | ak = k; 96 | else 97 | ak = n; 98 | 99 | #pragma omp task depend(in:A[0:lda*ak]) \ 100 | depend(inout:C[0:ldc*n]) 101 | { 102 | if (sequence->status == PlasmaSuccess) 103 | plasma_core_zherk(uplo, trans, 104 | n, k, 105 | alpha, A, lda, 106 | beta, C, ldc); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /core_blas/core_zhessq.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | #include 18 | 19 | /******************************************************************************/ 20 | __attribute__((weak)) 21 | void plasma_core_zhessq(plasma_enum_t uplo, 22 | int n, 23 | const plasma_complex64_t *A, int lda, 24 | double *scale, double *sumsq) 25 | { 26 | int ione = 1; 27 | if (uplo == PlasmaUpper) { 28 | for (int j = 1; j < n; j++) 29 | // TODO: Inline this operation. 30 | LAPACK_zlassq(&j, &A[lda*j], &ione, scale, sumsq); 31 | } 32 | else { // PlasmaLower 33 | for (int j = 0; j < n-1; j++) { 34 | int len = n-j-1; 35 | // TODO: Inline this operation. 36 | LAPACK_zlassq(&len, &A[lda*j+j+1], &ione, scale, sumsq); 37 | } 38 | } 39 | *sumsq *= 2.0; 40 | for (int i = 0; i < n; i++) { 41 | // diagonal is real, ignore imaginary part 42 | if (creal(A[lda*i+i]) != 0.0) { // != propagates nan 43 | double absa = fabs(creal(A[lda*i+i])); 44 | if (*scale < absa) { 45 | *sumsq = 1.0 + *sumsq*((*scale/absa)*(*scale/absa)); 46 | *scale = absa; 47 | } 48 | else { 49 | *sumsq = *sumsq + ((absa/(*scale))*(absa/(*scale))); 50 | } 51 | } 52 | } 53 | } 54 | 55 | /******************************************************************************/ 56 | void plasma_core_omp_zhessq(plasma_enum_t uplo, 57 | int n, 58 | const plasma_complex64_t *A, int lda, 59 | double *scale, double *sumsq, 60 | plasma_sequence_t *sequence, plasma_request_t *request) 61 | { 62 | #pragma omp task depend(in:A[0:lda*n]) \ 63 | depend(out:scale[0:n]) \ 64 | depend(out:sumsq[0:n]) 65 | { 66 | if (sequence->status == PlasmaSuccess) { 67 | *scale = 0.0; 68 | *sumsq = 1.0; 69 | plasma_core_zhessq(uplo, n, A, lda, scale, sumsq); 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /core_blas/core_zlag2c.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | 13 | #include 14 | #include "core_lapack.h" 15 | #include "plasma_types.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_lag2 20 | * 21 | * Converts m-by-n matrix A from double complex to single complex precision. 22 | * 23 | ******************************************************************************* 24 | * 25 | * @param[in] m 26 | * The number of rows of the matrix A. 27 | * m >= 0. 28 | * 29 | * @param[in] n 30 | * The number of columns of the matrix A. 31 | * n >= 0. 32 | * 33 | * @param[in] A 34 | * The lda-by-n matrix in double complex precision to convert. 35 | * 36 | * @param[in] lda 37 | * The leading dimension of the matrix A. 38 | * lda >= max(1,m). 39 | * 40 | * @param[out] As 41 | * On exit, the converted ldas-by-n matrix in single complex precision. 42 | * 43 | * @param[in] ldas 44 | * The leading dimension of the matrix As. 45 | * ldas >= max(1,m). 46 | * 47 | ******************************************************************************/ 48 | __attribute__((weak)) 49 | int plasma_core_zlag2c(int m, int n, 50 | plasma_complex64_t *A, int lda, 51 | plasma_complex32_t *As, int ldas) 52 | { 53 | int info; 54 | info = LAPACKE_zlag2c_work(LAPACK_COL_MAJOR, m, n, A, lda, As, ldas); 55 | return info; 56 | } 57 | 58 | /******************************************************************************/ 59 | void plasma_core_omp_zlag2c(int m, int n, 60 | plasma_complex64_t *A, int lda, 61 | plasma_complex32_t *As, int ldas, 62 | plasma_sequence_t *sequence, plasma_request_t *request) 63 | { 64 | #pragma omp task depend(in:A[0:lda*n]) \ 65 | depend(out:As[0:ldas*n]) 66 | { 67 | int info; 68 | if (sequence->status == PlasmaSuccess) { 69 | info = plasma_core_zlag2c(m, n, A, lda, As, ldas); 70 | if (info != 0) { 71 | #pragma omp critical (plasma_critical_sequence) 72 | { 73 | // Value will be 1, so it doesn't matter which tile sets status. 74 | plasma_request_fail(sequence, request, info); 75 | } 76 | } 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /core_blas/core_zlange.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | #include 18 | 19 | /***************************************************************************//** 20 | * 21 | * @ingroup core_lange 22 | * 23 | * Calculates max, one, infinity or Frobenius norm of a given matrix. 24 | * 25 | ******************************************************************************* 26 | * 27 | * @param[in] norm 28 | * - PlasmaMaxNorm: Max norm 29 | * - PlasmaOneNorm: One norm 30 | * - PlasmaInfNorm: Infinity norm 31 | * - PlasmaFrobeniusNorm: Frobenius norm 32 | * 33 | * @param[in] m 34 | * The number of rows of the matrix A. m >= 0. When m = 0, 35 | * the returned value is set to zero. 36 | * 37 | * @param[in] n 38 | * The number of columns of the matrix A. n >= 0. When n = 0, 39 | * the returned value is set to zero. 40 | * 41 | * @param[in] A 42 | * The m-by-n matrix A. 43 | * 44 | * @param[in] lda 45 | * The leading dimension of the array A. lda >= max(1,m). 46 | * 47 | * @param[in] work 48 | * The auxiliary work array. 49 | * 50 | * @param[out] value 51 | * The specified norm of the given matrix A 52 | * 53 | ******************************************************************************/ 54 | __attribute__((weak)) 55 | void plasma_core_zlange(plasma_enum_t norm, int m, int n, 56 | const plasma_complex64_t *A, int lda, 57 | double *work, double *value) 58 | { 59 | *value = LAPACKE_zlange_work(LAPACK_COL_MAJOR, 60 | lapack_const(norm), 61 | m, n, A, lda, work); 62 | } 63 | 64 | /******************************************************************************/ 65 | void plasma_core_omp_zlange(plasma_enum_t norm, int m, int n, 66 | const plasma_complex64_t *A, int lda, 67 | double *work, double *value, 68 | plasma_sequence_t *sequence, plasma_request_t *request) 69 | { 70 | #pragma omp task depend(in:A[0:lda*n]) \ 71 | depend(out:value[0:1]) 72 | { 73 | if (sequence->status == PlasmaSuccess) 74 | plasma_core_zlange(norm, m, n, A, lda, work, value); 75 | } 76 | } 77 | 78 | /******************************************************************************/ 79 | void plasma_core_omp_zlange_aux(plasma_enum_t norm, int m, int n, 80 | const plasma_complex64_t *A, int lda, 81 | double *value, 82 | plasma_sequence_t *sequence, plasma_request_t *request) 83 | { 84 | switch (norm) { 85 | case PlasmaOneNorm: 86 | #pragma omp task depend(in:A[0:lda*n]) \ 87 | depend(out:value[0:n]) 88 | { 89 | if (sequence->status == PlasmaSuccess) { 90 | for (int j = 0; j < n; j++) { 91 | value[j] = cabs(A[lda*j]); 92 | for (int i = 1; i < m; i++) { 93 | value[j] += cabs(A[lda*j+i]); 94 | } 95 | } 96 | } 97 | } 98 | break; 99 | case PlasmaInfNorm: 100 | #pragma omp task depend(in:A[0:lda*n]) \ 101 | depend(out:value[0:m]) 102 | { 103 | if (sequence->status == PlasmaSuccess) { 104 | for (int i = 0; i < m; i++) 105 | value[i] = 0.0; 106 | 107 | for (int j = 0; j < n; j++) { 108 | for (int i = 0; i < m; i++) { 109 | value[i] += cabs(A[lda*j+i]); 110 | } 111 | } 112 | } 113 | } 114 | break; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /core_blas/core_zlanhe.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | #include 18 | 19 | /******************************************************************************/ 20 | __attribute__((weak)) 21 | void plasma_core_zlanhe(plasma_enum_t norm, plasma_enum_t uplo, 22 | int n, 23 | const plasma_complex64_t *A, int lda, 24 | double *work, double *value) 25 | { 26 | *value = LAPACKE_zlanhe_work(LAPACK_COL_MAJOR, 27 | lapack_const(norm), 28 | lapack_const(uplo), 29 | n, A, lda, work); 30 | } 31 | 32 | /******************************************************************************/ 33 | void plasma_core_omp_zlanhe(plasma_enum_t norm, plasma_enum_t uplo, 34 | int n, 35 | const plasma_complex64_t *A, int lda, 36 | double *work, double *value, 37 | plasma_sequence_t *sequence, plasma_request_t *request) 38 | { 39 | #pragma omp task depend(in:A[0:lda*n]) \ 40 | depend(out:value[0:1]) 41 | { 42 | if (sequence->status == PlasmaSuccess) 43 | plasma_core_zlanhe(norm, uplo, n, A, lda, work, value); 44 | } 45 | } 46 | 47 | /******************************************************************************/ 48 | void plasma_core_omp_zlanhe_aux(plasma_enum_t norm, plasma_enum_t uplo, 49 | int n, 50 | const plasma_complex64_t *A, int lda, 51 | double *value, 52 | plasma_sequence_t *sequence, plasma_request_t *request) 53 | { 54 | switch (norm) { 55 | case PlasmaOneNorm: 56 | case PlasmaInfNorm: 57 | #pragma omp task depend(in:A[0:lda*n]) \ 58 | depend(out:value[0:n]) 59 | { 60 | if (sequence->status == PlasmaSuccess) { 61 | if (uplo == PlasmaUpper) { 62 | for (int i = 0; i < n; i++) 63 | value[i] = 0.0; 64 | 65 | for (int j = 0; j < n; j++) { 66 | for (int i = 0; i < j; i++) { 67 | value[i] += cabs(A[lda*j+i]); 68 | value[j] += cabs(A[lda*j+i]); 69 | } 70 | value[j] += fabs(creal(A[lda*j+j])); 71 | } 72 | } 73 | else { // PlasmaLower 74 | for (int i = 0; i < n; i++) 75 | value[i] = 0.0; 76 | 77 | for (int j = 0; j < n; j++) { 78 | value[j] += fabs(creal(A[lda*j+j])); 79 | for (int i = j+1; i < n; i++) { 80 | value[i] += cabs(A[lda*j+i]); 81 | value[j] += cabs(A[lda*j+i]); 82 | } 83 | } 84 | } 85 | } 86 | } 87 | break; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /core_blas/core_zlansy.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | #include 18 | 19 | /******************************************************************************/ 20 | __attribute__((weak)) 21 | void plasma_core_zlansy(plasma_enum_t norm, plasma_enum_t uplo, 22 | int n, 23 | const plasma_complex64_t *A, int lda, 24 | double *work, double *value) 25 | { 26 | *value = LAPACKE_zlansy_work(LAPACK_COL_MAJOR, 27 | lapack_const(norm), 28 | lapack_const(uplo), 29 | n, A, lda, work); 30 | } 31 | 32 | /******************************************************************************/ 33 | void plasma_core_omp_zlansy(plasma_enum_t norm, plasma_enum_t uplo, 34 | int n, 35 | const plasma_complex64_t *A, int lda, 36 | double *work, double *value, 37 | plasma_sequence_t *sequence, plasma_request_t *request) 38 | { 39 | #pragma omp task depend(in:A[0:lda*n]) \ 40 | depend(out:value[0:1]) 41 | { 42 | if (sequence->status == PlasmaSuccess) 43 | plasma_core_zlansy(norm, uplo, n, A, lda, work, value); 44 | } 45 | } 46 | 47 | /******************************************************************************/ 48 | void plasma_core_omp_zlansy_aux(plasma_enum_t norm, plasma_enum_t uplo, 49 | int n, 50 | const plasma_complex64_t *A, int lda, 51 | double *value, 52 | plasma_sequence_t *sequence, plasma_request_t *request) 53 | { 54 | switch (norm) { 55 | case PlasmaOneNorm: 56 | case PlasmaInfNorm: 57 | #pragma omp task depend(in:A[0:lda*n]) \ 58 | depend(out:value[0:n]) 59 | { 60 | if (sequence->status == PlasmaSuccess) { 61 | if (uplo == PlasmaUpper) { 62 | for (int i = 0; i < n; i++) 63 | value[i] = 0.0; 64 | 65 | for (int j = 0; j < n; j++) { 66 | for (int i = 0; i < j; i++) { 67 | value[i] += cabs(A[lda*j+i]); 68 | value[j] += cabs(A[lda*j+i]); 69 | } 70 | value[j] += cabs(A[lda*j+j]); 71 | } 72 | } 73 | else { // PlasmaLower 74 | for (int i = 0; i < n; i++) 75 | value[i] = 0.0; 76 | 77 | for (int j = 0; j < n; j++) { 78 | value[j] += cabs(A[lda*j+j]); 79 | for (int i = j+1; i < n; i++) { 80 | value[i] += cabs(A[lda*j+i]); 81 | value[j] += cabs(A[lda*j+i]); 82 | } 83 | } 84 | } 85 | } 86 | } 87 | break; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /core_blas/core_zlarfy.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include "plasma_core_blas.h" 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | #undef REAL 18 | #define COMPLEX 19 | 20 | /***************************************************************************//** 21 | * 22 | * @ingroup core_larfy 23 | * 24 | * Applies an elementary reflector, or Householder reflector, H, 25 | * to a n-by-n Hermitian matrix A, from both the left and the right: 26 | * \[ 27 | * A = H A H^H 28 | * \] 29 | * H is represented in the form 30 | * \[ 31 | * H = I - tau v v^H 32 | * \] 33 | * where tau is a scalar and v is a vector. 34 | * 35 | * If tau is zero, then H is taken to be the unit matrix. 36 | * 37 | ******************************************************************************* 38 | * 39 | * @param[in] n 40 | * The number of rows and columns of the matrix C. n >= 0. 41 | * 42 | * @param[in,out] A 43 | * On entry, the n-by-n Hermitian matrix A in an lda-by-n array. 44 | * On exit, A is overwritten by H A H^H. 45 | * 46 | * @param[in] lda 47 | * The leading dimension of the array A. lda >= max( 1,n ). 48 | * 49 | * @param[in] v 50 | * The vector v that contains the Householder reflectors. 51 | * 52 | * @param[in] tau 53 | * The value tau. 54 | * 55 | * @param[out] work 56 | * Workspace of size n. 57 | * 58 | ******************************************************************************/ 59 | void plasma_core_zlarfy( 60 | int n, 61 | plasma_complex64_t *A, int lda, 62 | const plasma_complex64_t *v, 63 | const plasma_complex64_t *tau, 64 | plasma_complex64_t *work) 65 | { 66 | const plasma_complex64_t zero = 0.0; 67 | const plasma_complex64_t neg_one = -1.0; 68 | 69 | plasma_complex64_t dtmp; 70 | 71 | // Compute dtmp = x^H v 72 | // x = A v tau 73 | cblas_zhemv( CblasColMajor, CblasLower, 74 | n, CBLAS_SADDR( *tau ), A, lda, 75 | v, 1, CBLAS_SADDR( zero ), work, 1 ); 76 | 77 | // cblas_zdotc_sub( n, work, 1, v, 1, &dtmp ); 78 | dtmp = 0.; 79 | for (int j = 0; j < n; ++j) { 80 | dtmp += conj( work[j] ) * v[j]; 81 | } 82 | 83 | // Compute 1/2 x^H v tau = 1/2 dtmp tau 84 | dtmp = -dtmp * 0.5 * (*tau); 85 | 86 | // Compute w = x - 1/2 v x^H v t = x - dtmp v */ 87 | cblas_zaxpy( n, CBLAS_SADDR( dtmp ), 88 | v, 1, work, 1 ); 89 | 90 | // Performs the Hermitian rank 2 operation 91 | // A := alpha x y^H + alpha y x^H + A 92 | cblas_zher2( CblasColMajor, CblasLower, n, 93 | CBLAS_SADDR( neg_one ), work, 1, 94 | v, 1, 95 | A, lda ); 96 | } 97 | 98 | #undef COMPLEX 99 | -------------------------------------------------------------------------------- /core_blas/core_zlascl.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /******************************************************************************/ 18 | __attribute__((weak)) 19 | void plasma_core_zlascl(plasma_enum_t uplo, 20 | double cfrom, double cto, 21 | int m, int n, 22 | plasma_complex64_t *A, int lda) 23 | { 24 | // LAPACKE_zlascl is not available in LAPACKE < 3.6.0 25 | int kl; 26 | int ku; 27 | int info; 28 | char type = lapack_const(uplo); 29 | LAPACK_zlascl(&type, 30 | &kl, &ku, 31 | &cfrom, &cto, 32 | &m, &n, 33 | A, &lda, &info); 34 | } 35 | 36 | /******************************************************************************/ 37 | void plasma_core_omp_zlascl(plasma_enum_t uplo, 38 | double cfrom, double cto, 39 | int m, int n, 40 | plasma_complex64_t *A, int lda, 41 | plasma_sequence_t *sequence, plasma_request_t *request) 42 | { 43 | #pragma omp task depend(inout:A[0:lda*n]) 44 | { 45 | if (sequence->status == PlasmaSuccess) 46 | plasma_core_zlascl(uplo, 47 | cfrom, cto, 48 | m, n, 49 | A, lda); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /core_blas/core_zlaset.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "plasma_internal.h" 16 | #include "core_lapack.h" 17 | 18 | // for memset function 19 | #include 20 | 21 | /***************************************************************************//** 22 | * 23 | * @ingroup core_laset 24 | * 25 | * Sets the elements of the matrix A on the diagonal 26 | * to beta and on the off-diagonals to alpha 27 | * 28 | ******************************************************************************* 29 | * 30 | * @param[in] uplo 31 | * Specifies which elements of the matrix are to be set 32 | * - PlasmaUpper: Upper part of A is set; 33 | * - PlasmaLower: Lower part of A is set; 34 | * - PlasmaUpperLower: ALL elements of A are set. 35 | * 36 | * @param[in] m 37 | * The number of rows of the matrix A. m >= 0. 38 | * 39 | * @param[in] n 40 | * The number of columns of the matrix A. n >= 0. 41 | * 42 | * @param[in] alpha 43 | * The constant to which the off-diagonal elements are to be set. 44 | * 45 | * @param[in] beta 46 | * The constant to which the diagonal elements are to be set. 47 | * 48 | * @param[in,out] A 49 | * On entry, the m-by-n tile A. 50 | * On exit, A has been set accordingly. 51 | * 52 | * @param[in] lda 53 | * The leading dimension of the array A. lda >= max(1,m). 54 | * 55 | ******************************************************************************/ 56 | __attribute__((weak)) 57 | void plasma_core_zlaset(plasma_enum_t uplo, int m, int n, 58 | plasma_complex64_t alpha, plasma_complex64_t beta, 59 | plasma_complex64_t *A, int lda) 60 | { 61 | if (alpha == 0.0 && beta == 0.0 && uplo == PlasmaGeneral && m == lda) { 62 | // Use memset to zero continuous memory. 63 | memset((void*)A, 0, (size_t)m*n*sizeof(plasma_complex64_t)); 64 | } 65 | else { 66 | // Use LAPACKE_zlaset_work to initialize the matrix. 67 | LAPACKE_zlaset_work(LAPACK_COL_MAJOR, lapack_const(uplo), 68 | m, n, alpha, beta, A, lda); 69 | } 70 | } 71 | 72 | /******************************************************************************/ 73 | void plasma_core_omp_zlaset(plasma_enum_t uplo, 74 | int mb, int nb, 75 | int i, int j, 76 | int m, int n, 77 | plasma_complex64_t alpha, plasma_complex64_t beta, 78 | plasma_complex64_t *A) 79 | { 80 | #pragma omp task depend(out:A[0:mb*nb]) 81 | plasma_core_zlaset(uplo, m, n, 82 | alpha, beta, 83 | A+i+j*mb, mb); 84 | } 85 | -------------------------------------------------------------------------------- /core_blas/core_zlauum.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_lauum 20 | * 21 | * Computes the product U * U^H or L^H * L, where the triangular 22 | * factor U or L is stored in the upper or lower triangular part of 23 | * the array A. 24 | * 25 | * If uplo = 'U' or 'u' then the upper triangle of the result is stored, 26 | * overwriting the factor U in A. 27 | * If uplo = 'L' or 'l' then the lower triangle of the result is stored, 28 | * overwriting the factor L in A. 29 | 30 | * 31 | ******************************************************************************* 32 | * 33 | * @param[in] uplo 34 | * = PlasmaUpper: Upper triangle of A is stored; 35 | * = PlasmaLower: Lower triangle of A is stored. 36 | * 37 | * 38 | * @param[in] n 39 | * The order of the matrix A. n >= 0. 40 | * 41 | * @param[in,out] A 42 | * On entry, the triangular factor U or L. 43 | * On exit, if uplo = 'U', the upper triangle of A is 44 | * overwritten with the upper triangle of the product U * U^H; 45 | * if uplo = 'L', the lower triangle of A is overwritten with 46 | * the lower triangle of the product L^H * L. 47 | 48 | * 49 | * @param[in] lda 50 | * The leading dimension of the array A. lda >= max(1,n). 51 | * 52 | * @param[out] info 53 | * - 0 on successful exit 54 | * - < 0 if -i, the i-th argument had an illegal value 55 | * 56 | ******************************************************************************/ 57 | __attribute__((weak)) 58 | int plasma_core_zlauum(plasma_enum_t uplo, 59 | int n, 60 | plasma_complex64_t *A, int lda) 61 | { 62 | return LAPACKE_zlauum_work(LAPACK_COL_MAJOR, 63 | lapack_const(uplo), n, A, lda); 64 | } 65 | 66 | /******************************************************************************/ 67 | void plasma_core_omp_zlauum(plasma_enum_t uplo, 68 | int n, 69 | plasma_complex64_t *A, int lda, 70 | plasma_sequence_t *sequence, plasma_request_t *request) 71 | { 72 | #pragma omp task depend(inout:A[0:lda*n]) 73 | { 74 | if (sequence->status == PlasmaSuccess) { 75 | int info = plasma_core_zlauum(uplo, n, A, lda); 76 | if (info != PlasmaSuccess) { 77 | plasma_coreblas_error("core_zlauum() failed"); 78 | plasma_request_fail(sequence, request, PlasmaErrorInternal); 79 | } 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /core_blas/core_zpotrf.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_potrf 20 | * 21 | * Performs the Cholesky factorization of a Hermitian positive definite 22 | * matrix A. The factorization has the form 23 | * 24 | * \f[ A = L \times L^H, \f] 25 | * or 26 | * \f[ A = U^H \times U, \f] 27 | * 28 | * where U is an upper triangular matrix and L is a lower triangular matrix. 29 | * 30 | ******************************************************************************* 31 | * 32 | * @param[in] uplo 33 | * - PlasmaUpper: Upper triangle of A is stored; 34 | * - PlasmaLower: Lower triangle of A is stored. 35 | * 36 | * @param[in] n 37 | * The order of the matrix A. n >= 0. 38 | * 39 | * @param[in,out] A 40 | * On entry, the Hermitian positive definite matrix A. 41 | * If uplo = PlasmaUpper, the leading N-by-N upper triangular part of A 42 | * contains the upper triangular part of the matrix A, and the strictly 43 | * lower triangular part of A is not referenced. 44 | * If uplo = PlasmaLower, the leading N-by-N lower triangular part of A 45 | * contains the lower triangular part of the matrix A, and the strictly 46 | * upper triangular part of A is not referenced. 47 | * On exit, if return value = 0, the factor U or L from the Cholesky 48 | * factorization A = U^H*U or A = L*L^H. 49 | * 50 | * @param[in] lda 51 | * The leading dimension of the array A. lda >= max(1,n). 52 | * 53 | ******************************************************************************/ 54 | __attribute__((weak)) 55 | int plasma_core_zpotrf(plasma_enum_t uplo, 56 | int n, 57 | plasma_complex64_t *A, int lda) 58 | { 59 | return LAPACKE_zpotrf_work(LAPACK_COL_MAJOR, 60 | lapack_const(uplo), 61 | n, 62 | A, lda); 63 | } 64 | 65 | /******************************************************************************/ 66 | void plasma_core_omp_zpotrf(plasma_enum_t uplo, 67 | int n, 68 | plasma_complex64_t *A, int lda, 69 | int iinfo, 70 | plasma_sequence_t *sequence, plasma_request_t *request) 71 | { 72 | #pragma omp task depend(inout:A[0:lda*n]) 73 | { 74 | if (sequence->status == PlasmaSuccess) { 75 | int info = plasma_core_zpotrf(uplo, 76 | n, 77 | A, lda); 78 | if (info != 0) 79 | plasma_request_fail(sequence, request, iinfo+info); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /core_blas/core_zsyrk.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_syrk 20 | * 21 | * Performs one of the symmetric rank k operations 22 | * 23 | * \f[ C = \alpha A \times A^T + \beta C, \f] 24 | * or 25 | * \f[ C = \alpha A^T \times A + \beta C, \f] 26 | * 27 | * where alpha and beta are scalars, C is an n-by-n symmetric 28 | * matrix, and A is an n-by-k matrix in the first case and a k-by-n 29 | * matrix in the second case. 30 | * 31 | ******************************************************************************* 32 | * 33 | * @param[in] uplo 34 | * - PlasmaUpper: Upper triangle of C is stored; 35 | * - PlasmaLower: Lower triangle of C is stored. 36 | * 37 | * @param[in] trans 38 | * - PlasmaNoTrans: \f[ C = \alpha A \times A^T + \beta C; \f] 39 | * - PlasmaTrans: \f[ C = \alpha A^T \times A + \beta C. \f] 40 | * 41 | * @param[in] n 42 | * The order of the matrix C. n >= 0. 43 | * 44 | * @param[in] k 45 | * If trans = PlasmaNoTrans, number of columns of the A matrix; 46 | * if trans = PlasmaTrans, number of rows of the A matrix. 47 | * 48 | * @param[in] alpha 49 | * The scalar alpha. 50 | * 51 | * @param[in] A 52 | * A is an lda-by-ka matrix. 53 | * If trans = PlasmaNoTrans, ka = k; 54 | * if trans = PlasmaTrans, ka = n. 55 | * 56 | * @param[in] lda 57 | * The leading dimension of the array A. 58 | * If trans = PlasmaNoTrans, lda >= max(1, n); 59 | * if trans = PlasmaTrans, lda >= max(1, k). 60 | * 61 | * @param[in] beta 62 | * The scalar beta. 63 | * 64 | * @param[in,out] C 65 | * C is an ldc-by-n matrix. 66 | * On exit, the uplo part of the matrix is overwritten 67 | * by the uplo part of the updated matrix. 68 | * 69 | * @param[in] ldc 70 | * The leading dimension of the array C. ldc >= max(1, n). 71 | * 72 | ******************************************************************************/ 73 | __attribute__((weak)) 74 | void plasma_core_zsyrk(plasma_enum_t uplo, plasma_enum_t trans, 75 | int n, int k, 76 | plasma_complex64_t alpha, const plasma_complex64_t *A, int lda, 77 | plasma_complex64_t beta, plasma_complex64_t *C, int ldc) 78 | { 79 | cblas_zsyrk(CblasColMajor, 80 | (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, 81 | n, k, 82 | CBLAS_SADDR(alpha), A, lda, 83 | CBLAS_SADDR(beta), C, ldc); 84 | } 85 | 86 | /******************************************************************************/ 87 | void plasma_core_omp_zsyrk( 88 | plasma_enum_t uplo, plasma_enum_t trans, 89 | int n, int k, 90 | plasma_complex64_t alpha, const plasma_complex64_t *A, int lda, 91 | plasma_complex64_t beta, plasma_complex64_t *C, int ldc, 92 | plasma_sequence_t *sequence, plasma_request_t *request) 93 | { 94 | int ak; 95 | if (trans == PlasmaNoTrans) 96 | ak = k; 97 | else 98 | ak = n; 99 | 100 | #pragma omp task depend(in:A[0:lda*ak]) \ 101 | depend(inout:C[0:ldc*n]) 102 | { 103 | if (sequence->status == PlasmaSuccess) 104 | plasma_core_zsyrk(uplo, trans, 105 | n, k, 106 | alpha, A, lda, 107 | beta, C, ldc); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /core_blas/core_ztrssq.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "plasma_internal.h" 16 | #include "core_lapack.h" 17 | 18 | #include 19 | 20 | /******************************************************************************/ 21 | // This computation also shows up in plasma_core_zsyssq() and can be factored out. 22 | // LAPACK does real and imag components separately in zlassq. 23 | static inline void ssq(plasma_complex64_t value, double *scale, double *sumsq) 24 | { 25 | double absa = cabs(value); 26 | if (absa != 0.0) { // != propagates nan 27 | if (*scale < absa) { 28 | *sumsq = 1.0 + *sumsq*((*scale/absa)*(*scale/absa)); 29 | *scale = absa; 30 | } 31 | else { 32 | *sumsq = *sumsq + ((absa/(*scale))*(absa/(*scale))); 33 | } 34 | } 35 | } 36 | 37 | /******************************************************************************/ 38 | __attribute__((weak)) 39 | void plasma_core_ztrssq(plasma_enum_t uplo, plasma_enum_t diag, 40 | int m, int n, 41 | const plasma_complex64_t *A, int lda, 42 | double *scale, double *sumsq) 43 | { 44 | if (uplo == PlasmaUpper) { 45 | if (diag == PlasmaNonUnit) { 46 | for (int j = 0; j < n; j++) { 47 | ssq(A[lda*j], scale, sumsq); 48 | for (int i = 1; i < imin(j+1, m); i++) { 49 | ssq(A[lda*j+i], scale, sumsq); 50 | } 51 | } 52 | } 53 | else { // PlasmaUnit 54 | int j; 55 | for (j = 0; j < imin(n, m); j++) { 56 | ssq(1.0, scale, sumsq); 57 | for (int i = 0; i < j; i++) { 58 | ssq(A[lda*j+i], scale, sumsq); 59 | } 60 | } 61 | for (; j < n; j++) { 62 | ssq(A[lda*j], scale, sumsq); 63 | for (int i = 1; i < m; i++) { 64 | ssq(A[lda*j+i], scale, sumsq); 65 | } 66 | } 67 | } 68 | } 69 | else { // PlasmaLower 70 | if (diag == PlasmaNonUnit) { 71 | for (int j = 0; j < imin(n, m); j++) { 72 | ssq(A[lda*j+j], scale, sumsq); 73 | for (int i = j+1; i < m; i++) { 74 | ssq(A[lda*j+i], scale, sumsq); 75 | } 76 | } 77 | } 78 | else { // PlasmaUnit 79 | for (int j = 0; j < imin(n, m); j++) { 80 | ssq(1.0, scale, sumsq); 81 | for (int i = j+1; i < m; i++) { 82 | ssq(A[lda*j+i], scale, sumsq); 83 | } 84 | } 85 | } 86 | } 87 | } 88 | 89 | /******************************************************************************/ 90 | void plasma_core_omp_ztrssq(plasma_enum_t uplo, plasma_enum_t diag, 91 | int m, int n, 92 | const plasma_complex64_t *A, int lda, 93 | double *scale, double *sumsq, 94 | plasma_sequence_t *sequence, plasma_request_t *request) 95 | { 96 | #pragma omp task depend(in:A[0:lda*n]) \ 97 | depend(out:scale[0:n]) \ 98 | depend(out:sumsq[0:n]) 99 | { 100 | if (sequence->status == PlasmaSuccess) { 101 | *scale = 0.0; 102 | *sumsq = 1.0; 103 | plasma_core_ztrssq(uplo, diag, m, n, A, lda, scale, sumsq); 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /core_blas/core_ztrtri.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> c d s 10 | * 11 | **/ 12 | 13 | #include 14 | #include "plasma_types.h" 15 | #include "core_lapack.h" 16 | 17 | /***************************************************************************//** 18 | * 19 | * @ingroup core_trtri 20 | * 21 | * Computes the inverse of an upper or lower 22 | * triangular matrix A. 23 | * 24 | ******************************************************************************* 25 | * 26 | * @param[in] uplo 27 | * = PlasmaUpper: Upper triangle of A is stored; 28 | * = PlasmaLower: Lower triangle of A is stored. 29 | * 30 | * @param[in] diag 31 | * = PlasmaNonUnit: A is non-unit triangular; 32 | * = PlasmaUnit: A is unit triangular. 33 | * 34 | * @param[in] n 35 | * The order of the matrix A. n >= 0. 36 | * 37 | * @param[in,out] A 38 | * On entry, the triangular matrix A. If uplo = 'U', the 39 | * leading n-by-n upper triangular part of the array A 40 | * contains the upper triangular matrix, and the strictly 41 | * lower triangular part of A is not referenced. If uplo = 42 | * 'L', the leading n-by-n lower triangular part of the array 43 | * A contains the lower triangular matrix, and the strictly 44 | * upper triangular part of A is not referenced. If diag = 45 | * 'U', the diagonal elements of A are also not referenced and 46 | * are assumed to be 1. On exit, the (triangular) inverse of 47 | * the original matrix. 48 | * 49 | * @param[in] lda 50 | * The leading dimension of the array A. lda >= max(1,n). 51 | * 52 | * @retval PlasmaSuccess on successful exit 53 | * @retval < 0 if -i, the i-th argument had an illegal value 54 | * @retval > 0 if i, A(i,i) is exactly zero. The triangular 55 | * matrix is singular and its inverse can not be computed. 56 | * 57 | ******************************************************************************/ 58 | __attribute__((weak)) 59 | int plasma_core_ztrtri(plasma_enum_t uplo, plasma_enum_t diag, 60 | int n, 61 | plasma_complex64_t *A, int lda) 62 | { 63 | return LAPACKE_ztrtri_work(LAPACK_COL_MAJOR, 64 | lapack_const(uplo), lapack_const(diag), 65 | n, A, lda); 66 | } 67 | 68 | /******************************************************************************/ 69 | void plasma_core_omp_ztrtri(plasma_enum_t uplo, plasma_enum_t diag, 70 | int n, 71 | plasma_complex64_t *A, int lda, 72 | int iinfo, 73 | plasma_sequence_t *sequence, plasma_request_t *request) 74 | { 75 | #pragma omp task depend(inout:A[0:lda*n]) 76 | { 77 | if (sequence->status == PlasmaSuccess) { 78 | int info = plasma_core_ztrtri(uplo, diag, 79 | n, A, lda); 80 | if (info != 0) 81 | plasma_request_fail(sequence, request, iinfo+info); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /fortran_examples/test_zpotrf.f90: -------------------------------------------------------------------------------- 1 | !> 2 | !> @file 3 | !> 4 | !> PLASMA is a software package provided by: 5 | !> University of Tennessee, US, 6 | !> University of Manchester, UK. 7 | !> 8 | !> @precisions normal z -> s d c 9 | !> 10 | !> @brief Tests PLASMA Cholesky factorization 11 | 12 | program test_zpotrf 13 | 14 | use, intrinsic :: iso_fortran_env 15 | use :: iso_c_binding 16 | use :: omp_lib 17 | use :: plasma 18 | implicit none 19 | 20 | ! Precisions 21 | integer, parameter :: sp = c_float 22 | integer, parameter :: dp = c_double 23 | 24 | ! set working precision, this value is rewritten for different precisions 25 | integer, parameter :: wp = dp 26 | 27 | integer, parameter :: n = 2000 28 | complex(wp), parameter :: zmone = -1.0 29 | complex(wp), allocatable :: A(:,:), Aref(:,:) 30 | real(wp), allocatable :: work(:) 31 | integer :: seed(4) = [0, 0, 0, 1] 32 | real(wp) :: Anorm, error, tol 33 | character :: uploLapack ='L' 34 | integer :: uploPlasma = PlasmaLower 35 | integer :: lda, infoPlasma, infoLapack, i 36 | logical :: success = .false. 37 | 38 | 39 | ! External functions 40 | real(wp), external :: dlamch, zlanhe, zlange 41 | 42 | 43 | tol = 50.0 * dlamch('E') 44 | print *, "tol:", tol 45 | 46 | lda = max(1,n) 47 | 48 | ! Allocate matrix A 49 | allocate(A(lda,n), stat=infoPlasma) 50 | 51 | ! Generate random Hermitian positive definite matrix A 52 | call zlarnv(1, seed, lda*n, A) 53 | A = A * conjg(transpose(A)) 54 | do i = 1, n 55 | A(i,i) = A(i,i) + n 56 | end do 57 | 58 | allocate(Aref(lda,n), stat=infoPlasma) 59 | Aref = A 60 | 61 | !============================================== 62 | ! Initialize PLASMA. 63 | !============================================== 64 | call plasma_init(infoPlasma) 65 | 66 | !============================================== 67 | ! Perform Cholesky factorization. 68 | !============================================== 69 | call plasma_zpotrf(uploPlasma, n, A, lda, infoPlasma) 70 | 71 | !============================================== 72 | ! Finalise PLASMA. 73 | !============================================== 74 | call plasma_finalize(infoPlasma) 75 | 76 | ! Check Cholesky decomposition is correct 77 | 78 | ! Factorize matrix A using Cholesky 79 | call zpotrf(uploLapack, n, Aref, lda, infoLapack) 80 | print *, "zpotrf:", infoLapack 81 | 82 | if (infoLapack == 0) then 83 | 84 | ! Calculate difference A := -1*Aref+A, A := A-Aref 85 | ! A = A-Aref 86 | call zaxpy(lda*n, zmone, Aref, 1, A, 1) 87 | 88 | ! Calculate norms |Aref|_F, |Aref-A|_F, 89 | Anorm = zlanhe('F', uploLapack, n, Aref, lda, work) 90 | print *, "|Aref|_F:", Anorm 91 | 92 | error = zlange('F', n, n, A, lda, work) 93 | print *, "|A-Aref|_F:", error 94 | 95 | ! Calculate error |A-Aref|_F / |Aref|_F 96 | error = error/Anorm 97 | 98 | if (error < tol) success = .true. 99 | 100 | else 101 | 102 | if (infoPlasma == infoLapack) then 103 | error = 0.0 104 | success =.true. 105 | else 106 | error = 0.0 107 | success =.false. 108 | end if 109 | 110 | end if 111 | 112 | print *, "|A-Aref|_F / |Aref|_F:", error 113 | print *, "success: ", success 114 | 115 | if (success) then 116 | write(*,'(a)') " The result is CORRECT." 117 | else 118 | write(*,'(a)') " The result is WRONG!" 119 | end if 120 | write(*,'(a)') "" 121 | 122 | ! Deallocate matrix Aref 123 | deallocate(Aref, stat=infoPlasma) 124 | 125 | ! Deallocate matrix A 126 | deallocate(A, stat=infoPlasma) 127 | 128 | end program test_zpotrf 129 | -------------------------------------------------------------------------------- /include/bulge.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file bulge.h 4 | * 5 | * PLASMA auxiliary routines 6 | * PLASMA is a software package provided by Univ. of Tennessee, 7 | * Univ. of California Berkeley and Univ. of Colorado Denver 8 | * 9 | * @version 2.8.0 10 | * @author Azzam Haidar 11 | * @date 2010-11-15 12 | * 13 | **/ 14 | 15 | /***************************************************************************//** 16 | * bulge chasing global definition for all L/U HE/HB/GE matrices. 17 | **/ 18 | #ifndef _PLASMA_BULGE_H_ 19 | #define _PLASMA_BULGE_H_ 20 | 21 | /***************************************************************************//** 22 | * internal common routines to all bulgechasing function 23 | **/ 24 | inline static void findVTpos(int N, int NB, int Vblksiz, int sweep, int st, int *Vpos, int *TAUpos, int *Tpos, int *myblkid); 25 | inline static void findVTsiz(int N, int NB, int Vblksiz, int *blkcnt, int *LDV); 26 | inline static int plasma_ceildiv(int a, int b); 27 | 28 | //////////////////////////////////////////////////////////////////////////////////////////////////// 29 | inline static int plasma_ceildiv(int a, int b) 30 | { 31 | double r = (double)a/(double)b; 32 | r = (r-(int)r)==0? (int)r:(int)r+1; 33 | return (int) r; 34 | } 35 | //////////////////////////////////////////////////////////////////////////////////////////////////// 36 | 37 | //////////////////////////////////////////////////////////////////////////////////////////////////// 38 | inline static void findVTpos(int N, int NB, int Vblksiz, int sweep, int st, int *Vpos, int *TAUpos, int *Tpos, int *myblkid) 39 | { 40 | int prevcolblknb, prevblkcnt, prevcolblkid; 41 | int curcolblknb, nbprevcolblk, mastersweep; 42 | int blkid, locj, LDV; 43 | 44 | prevcolblknb = 0; 45 | prevblkcnt = 0; 46 | curcolblknb = 0; 47 | 48 | nbprevcolblk = sweep/Vblksiz; 49 | for (prevcolblkid = 0; prevcolblkid < nbprevcolblk; prevcolblkid++) 50 | { 51 | mastersweep = prevcolblkid * Vblksiz; 52 | prevcolblknb = plasma_ceildiv((N-(mastersweep+2)),NB); 53 | prevblkcnt = prevblkcnt + prevcolblknb; 54 | } 55 | curcolblknb = plasma_ceildiv((st-sweep),NB); 56 | blkid = prevblkcnt + curcolblknb -1; 57 | locj = sweep%Vblksiz; 58 | LDV = NB + Vblksiz -1; 59 | 60 | *myblkid= blkid; 61 | *Vpos = blkid*Vblksiz*LDV + locj*LDV + locj; 62 | *TAUpos = blkid*Vblksiz + locj; 63 | *Tpos = blkid*Vblksiz*Vblksiz + locj*Vblksiz + locj; 64 | } 65 | //////////////////////////////////////////////////////////////////////////////////////////////////// 66 | 67 | //////////////////////////////////////////////////////////////////////////////////////////////////// 68 | inline static void findVTsiz(int N, int NB, int Vblksiz, int *blkcnt, int *LDV) 69 | { 70 | int colblk, nbcolblk; 71 | int curcolblknb, mastersweep; 72 | 73 | *blkcnt = 0; 74 | nbcolblk = plasma_ceildiv((N-1),Vblksiz); 75 | for (colblk = 0; colblk 20 | #include 21 | 22 | // MKL LAPACKE doesn't provide LAPACK_GLOBAL macro, so define it here. 23 | // MKL provides all 3 name manglings (foo, foo_, FOO); pick foo_. 24 | #ifndef LAPACK_GLOBAL 25 | #define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ 26 | #endif 27 | #elif defined(PLASMA_HAVE_ESSL) || defined(PLASMA_WITH_ESSL) 28 | // GCC + ESSL(BLAS) + LAPACKE/CBLAS from LAPACK 29 | #include 30 | #include 31 | 32 | #ifndef LAPACK_GLOBAL 33 | #define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ 34 | #endif 35 | #else 36 | #include 37 | #include 38 | 39 | // Original cblas.h does: enum CBLAS_ORDER {...}; 40 | // Intel mkl_cblas.h does: typedef enum {...} CBLAS_ORDER; 41 | // LAPACK cblas.h does: typedef enum {...} CBLAS_ORDER; 42 | // OpenBLAS cblas.h does: typedef enum CBLAS_ORDER {...} CBLAS_ORDER; 43 | // We use (CBLAS_ORDER), so add these typedefs for original cblas.h 44 | #if defined(PLASMA_CBLAS_ADD_TYPEDEF) 45 | typedef enum CBLAS_ORDER CBLAS_ORDER; 46 | typedef enum CBLAS_TRANSPOSE CBLAS_TRANSPOSE; 47 | typedef enum CBLAS_UPLO CBLAS_UPLO; 48 | typedef enum CBLAS_DIAG CBLAS_DIAG; 49 | typedef enum CBLAS_SIDE CBLAS_SIDE; 50 | #endif 51 | #endif 52 | 53 | #ifndef lapack_int 54 | #define lapack_int int 55 | #endif 56 | 57 | #include "core_lapack_s.h" 58 | #include "core_lapack_d.h" 59 | #include "core_lapack_c.h" 60 | #include "core_lapack_z.h" 61 | 62 | #endif // PLASMA_CORE_LAPACK_H 63 | -------------------------------------------------------------------------------- /include/core_lapack_z.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | 13 | #ifndef PLASMA_CORE_LAPACK_Z_H 14 | #define PLASMA_CORE_LAPACK_Z_H 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | // LAPACK_GLOBAL is Fortran name mangling macro from LAPACKE 21 | 22 | // LAPACKE_zlantr broken (returns 0) in LAPACKE < 3.6.1 23 | #ifndef LAPACK_zlantr 24 | #define LAPACK_zlantr LAPACK_GLOBAL(zlantr, ZLANTR) 25 | double LAPACK_zlantr(const char *norm, const char *uplo, const char *diag, 26 | const lapack_int *m, const lapack_int *n, 27 | const plasma_complex64_t *A, const lapack_int *lda, 28 | double *work); 29 | #endif 30 | 31 | // LAPACKE_zlascl not available in LAPACKE < 3.6.0 32 | #ifndef LAPACK_zlascl 33 | #define LAPACK_zlascl LAPACK_GLOBAL(zlascl, ZLASCL) 34 | void LAPACK_zlascl(const char *type, const lapack_int *kl, const lapack_int *ku, 35 | const double *cfrom, const double *cto, 36 | const lapack_int *m, const lapack_int *n, 37 | plasma_complex64_t *A, const lapack_int *lda, 38 | lapack_int *info); 39 | #endif 40 | 41 | // LAPACKE_zlassq not available yet 42 | #ifndef LAPACK_zlassq 43 | #define LAPACK_zlassq LAPACK_GLOBAL(zlassq, ZLASSQ) 44 | void LAPACK_zlassq(const lapack_int *n, const plasma_complex64_t *x, const lapack_int *incx, 45 | double *scale, double *sumsq); 46 | #endif 47 | 48 | // LAPACKE_zlangb not available yet 49 | #ifndef LAPACK_zlangb 50 | #define LAPACK_zlangb LAPACK_GLOBAL(zlangb, ZLANGB) 51 | double LAPACK_zlangb(const char *norm, 52 | const lapack_int *n, const lapack_int *kl, const lapack_int *ku, 53 | const plasma_complex64_t *A, const lapack_int *lda, 54 | double *work); 55 | 56 | #endif 57 | 58 | #ifdef __cplusplus 59 | } // extern "C" 60 | #endif 61 | 62 | #endif // PLASMA_CORE_LAPACK_Z_H 63 | -------------------------------------------------------------------------------- /include/plasma.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_H 11 | #define PLASMA_H 12 | 13 | #include "plasma_async.h" 14 | #include "plasma_descriptor.h" 15 | #include "plasma_context.h" 16 | #include "plasma_tuning.h" 17 | #include "plasma_workspace.h" 18 | 19 | #include "plasma_s.h" 20 | #include "plasma_d.h" 21 | #include "plasma_ds.h" 22 | #include "plasma_c.h" 23 | #include "plasma_z.h" 24 | #include "plasma_zc.h" 25 | 26 | #endif // PLASMA_H 27 | -------------------------------------------------------------------------------- /include/plasma_async.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_ASYNC_H 11 | #define PLASMA_ASYNC_H 12 | 13 | #include "plasma_types.h" 14 | 15 | #ifdef __cplusplus 16 | extern "C" { 17 | #endif 18 | 19 | /******************************************************************************/ 20 | typedef struct { 21 | plasma_enum_t status; ///< error code 22 | } plasma_request_t; 23 | 24 | typedef struct { 25 | plasma_enum_t status; ///< error code 26 | plasma_request_t *request; ///< failed request 27 | } plasma_sequence_t; 28 | 29 | /******************************************************************************/ 30 | int plasma_request_fail(plasma_sequence_t *sequence, 31 | plasma_request_t *request, 32 | int status); 33 | 34 | int plasma_request_init(plasma_request_t *request); 35 | 36 | int plasma_sequence_init(plasma_sequence_t *sequence); 37 | 38 | #ifdef __cplusplus 39 | } // extern "C" 40 | #endif 41 | 42 | #endif // PLASMA_ASYNC_H 43 | -------------------------------------------------------------------------------- /include/plasma_barrier.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_BARRIER_H 11 | #define PLASMA_BARRIER_H 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | /******************************************************************************/ 18 | typedef struct { 19 | int count; 20 | volatile int passed; 21 | } plasma_barrier_t; 22 | 23 | /******************************************************************************/ 24 | void plasma_barrier_init(plasma_barrier_t *barrier); 25 | void plasma_barrier_wait(plasma_barrier_t *barrier, int size); 26 | 27 | #ifdef __cplusplus 28 | } // extern "C" 29 | #endif 30 | 31 | #endif // PLASMA_BARRIER_H 32 | -------------------------------------------------------------------------------- /include/plasma_config.hin: -------------------------------------------------------------------------------- 1 | /* PLASMA configuration options */ 2 | 3 | #cmakedefine PLASMA_CBLAS_ADD_TYPEDEF 4 | 5 | #cmakedefine PLASMA_HAVE_MKL 6 | 7 | #cmakedefine PLASMA_HAVE_ESSL 8 | 9 | #cmakedefine PLASMA_WITH_ESSL 10 | 11 | #cmakedefine PLASMA_WITH_MKL 12 | 13 | #cmakedefine PLASMA_WITH_NETLIB 14 | 15 | #cmakedefine PLASMA_USE_LUA 16 | -------------------------------------------------------------------------------- /include/plasma_context.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_CONTEXT_H 11 | #define PLASMA_CONTEXT_H 12 | 13 | #include "plasma_types.h" 14 | #include "plasma_barrier.h" 15 | 16 | #include 17 | #if defined(PLASMA_USE_LUA) 18 | #include 19 | #include 20 | #include 21 | #else 22 | #define lua_State void 23 | #endif 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | /******************************************************************************/ 30 | typedef struct { 31 | lua_State *L; ///< Lua state 32 | int tuning; ///< PlasmaEnabled or PlasmaDisabled 33 | int nb; ///< PlasmaNb 34 | int ib; ///< PlasmaIb 35 | plasma_enum_t inplace_outplace; ///< PlasmaInplaceOutplace 36 | int max_threads; ///< the value of OMP_NUM_THREADS 37 | int max_panel_threads; ///< max threads for panel factorization 38 | plasma_barrier_t barrier; ///< thread barrier for multithreaded tasks 39 | plasma_enum_t householder_mode; ///< PlasmaHouseholderMode 40 | int ss_ld; // static scheduler progress table leading dimension 41 | volatile int ss_abort; // static scheduler abort flag 42 | volatile int *ss_progress; // static scheduler progress table 43 | } plasma_context_t; 44 | 45 | typedef struct { 46 | pthread_t thread_id; ///< thread id 47 | plasma_context_t *context; ///< pointer to associated context 48 | } plasma_context_map_t; 49 | 50 | /******************************************************************************/ 51 | int plasma_init(); 52 | int plasma_finalize(); 53 | int plasma_set(plasma_enum_t param, int value); 54 | int plasma_get(plasma_enum_t param, int *value); 55 | 56 | int plasma_context_attach(); 57 | int plasma_context_detach(); 58 | plasma_context_t *plasma_context_self(); 59 | void plasma_context_init(plasma_context_t *context); 60 | void plasma_context_finalize(plasma_context_t *context); 61 | 62 | #ifdef __cplusplus 63 | } // extern "C" 64 | #endif 65 | 66 | #endif // PLASMA_CONTEXT_H 67 | -------------------------------------------------------------------------------- /include/plasma_core_blas_zc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | #ifndef PLASMA_CORE_BLAS_ZC_H 13 | #define PLASMA_CORE_BLAS_ZC_H 14 | 15 | #include "plasma_async.h" 16 | #include "plasma_types.h" 17 | #include "plasma_workspace.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | /******************************************************************************/ 24 | int plasma_core_zlag2c( 25 | int m, int n, 26 | plasma_complex64_t *A, int lda, 27 | plasma_complex32_t *As, int ldas); 28 | 29 | void plasma_core_clag2z( 30 | int m, int n, 31 | plasma_complex32_t *As, int ldas, 32 | plasma_complex64_t *A, int lda); 33 | 34 | /******************************************************************************/ 35 | void plasma_core_omp_zlag2c( 36 | int m, int n, 37 | plasma_complex64_t *A, int lda, 38 | plasma_complex32_t *As, int ldas, 39 | plasma_sequence_t *sequence, plasma_request_t *request); 40 | 41 | void plasma_core_omp_clag2z( 42 | int m, int n, 43 | plasma_complex32_t *As, int ldas, 44 | plasma_complex64_t *A, int lda, 45 | plasma_sequence_t *sequence, plasma_request_t *request); 46 | 47 | #ifdef __cplusplus 48 | } // extern "C" 49 | #endif 50 | 51 | #endif // PLASMA_CORE_BLAS_ZC_H 52 | -------------------------------------------------------------------------------- /include/plasma_error.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_ERROR_H 11 | #define PLASMA_ERROR_H 12 | 13 | #include 14 | #include 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | /******************************************************************************/ 21 | #define plasma_warning(msg) \ 22 | plasma_warning_func_line_file(__func__, __LINE__, __FILE__, msg) 23 | 24 | #define plasma_error(msg) \ 25 | plasma_error_func_line_file(__func__, __LINE__, __FILE__, msg) 26 | 27 | #define plasma_error_with_code(msg, code) \ 28 | plasma_error_func_line_file_code(__func__, __LINE__, __FILE__, msg, \ 29 | code) 30 | 31 | #define plasma_fatal_error(msg) \ 32 | plasma_fatal_error_func_line_file(__func__, __LINE__, __FILE__, msg) 33 | 34 | /******************************************************************************/ 35 | static inline void plasma_warning_func_line_file( 36 | char const *func, int line, const char *file, const char *msg) 37 | { 38 | fprintf(stderr, 39 | "PLASMA WARNING at %d of %s() in %s: %s\n", 40 | line, func, file, msg); 41 | } 42 | 43 | /******************************************************************************/ 44 | static inline void plasma_error_func_line_file( 45 | char const *func, int line, const char *file, const char *msg) 46 | { 47 | fprintf(stderr, 48 | "PLASMA ERROR at %d of %s() in %s: %s\n", 49 | line, func, file, msg); 50 | } 51 | 52 | /******************************************************************************/ 53 | static inline void plasma_error_func_line_file_code( 54 | char const *func, int line, const char *file, const char *msg, int code) 55 | { 56 | fprintf(stderr, 57 | "PLASMA ERROR at %d of %s() in %s: %s %d\n", 58 | line, func, file, msg, code); 59 | } 60 | 61 | /******************************************************************************/ 62 | static inline void plasma_fatal_error_func_line_file( 63 | char const *func, int line, const char *file, const char *msg) 64 | { 65 | fprintf(stderr, 66 | "PLASMA FATAL ERROR at %d of %s() in %s: %s\n", 67 | line, func, file, msg); 68 | exit(EXIT_FAILURE); 69 | } 70 | 71 | #ifdef __cplusplus 72 | } // extern "C" 73 | #endif 74 | 75 | #endif // PLASMA_ERROR_H 76 | -------------------------------------------------------------------------------- /include/plasma_internal.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_INTERNAL_H 11 | #define PLASMA_INTERNAL_H 12 | 13 | #if ((__GNUC__ == 6) && (__GNUC_MINOR__ < 1)) || (__GNUC__ < 6) 14 | #define priority(p) 15 | #endif 16 | 17 | #include 18 | #include 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | /******************************************************************************/ 25 | static inline int imin(int a, int b) 26 | { 27 | if (a < b) 28 | return a; 29 | else 30 | return b; 31 | } 32 | 33 | /******************************************************************************/ 34 | static inline int imax(int a, int b) 35 | { 36 | if (a > b) 37 | return a; 38 | else 39 | return b; 40 | } 41 | 42 | /// Use to silence compiler warning of unused variable. 43 | #define plasma_unused( var ) ((void) var) 44 | 45 | #ifdef __cplusplus 46 | } // extern "C" 47 | #endif 48 | 49 | #include "plasma_internal_s.h" 50 | #include "plasma_internal_d.h" 51 | #include "plasma_internal_ds.h" 52 | #include "plasma_internal_c.h" 53 | #include "plasma_internal_z.h" 54 | #include "plasma_internal_zc.h" 55 | 56 | #endif // PLASMA_INTERNAL_H 57 | -------------------------------------------------------------------------------- /include/plasma_internal_zc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | #ifndef PLASMA_INTERNAL_ZC_H 13 | #define PLASMA_INTERNAL_ZC_H 14 | 15 | #include "plasma_async.h" 16 | #include "plasma_descriptor.h" 17 | #include "plasma_types.h" 18 | #include "plasma_workspace.h" 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | /******************************************************************************/ 25 | void plasma_pzlag2c( 26 | plasma_desc_t A, plasma_desc_t As, 27 | plasma_sequence_t *sequence, plasma_request_t *request); 28 | 29 | void plasma_pclag2z( 30 | plasma_desc_t As, plasma_desc_t A, 31 | plasma_sequence_t *sequence, plasma_request_t *request); 32 | 33 | #ifdef __cplusplus 34 | } // extern "C" 35 | #endif 36 | 37 | #endif // PLASMA_INTERNAL_ZC_H 38 | -------------------------------------------------------------------------------- /include/plasma_tree.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | **/ 9 | 10 | #ifndef PLASMA_TREE_H 11 | #define PLASMA_TREE_H 12 | 13 | enum { 14 | PlasmaGeKernel = 1, 15 | PlasmaTtKernel = 2, 16 | PlasmaTsKernel = 3 17 | }; 18 | 19 | enum { 20 | PlasmaTreeFlatTs = 1, 21 | PlasmaTreeFlatTt = 2, 22 | PlasmaTreeBinary = 3, 23 | PlasmaTreeAuto = 4, 24 | PlasmaTreeGreedy = 5, 25 | PlasmaTreeBlockGreedy = 6, 26 | }; 27 | 28 | /***************************************************************************//** 29 | * Routine for registering a kernel into the list of operations for tile 30 | * QR and LQ factorization. 31 | * @see plasma_omp_zgeqrf 32 | **/ 33 | static inline int plasma_tree_insert_operation( 34 | int *operations, 35 | int loperations, 36 | int ind_op, 37 | plasma_enum_t kernel, 38 | int col, int row, int rowpiv) 39 | { 40 | assert(ind_op < loperations); 41 | 42 | operations[ind_op*4] = kernel; 43 | operations[ind_op*4+1] = col; 44 | operations[ind_op*4+2] = row; 45 | operations[ind_op*4+3] = rowpiv; 46 | 47 | ind_op++; 48 | 49 | return ind_op; 50 | } 51 | 52 | /***************************************************************************//** 53 | * Routine for getting a kernel from the list of operations for tile 54 | * QR and LQ factorization. 55 | * @see plasma_omp_zgeqrf 56 | **/ 57 | static inline void plasma_tree_get_operation( 58 | int *operations, 59 | int ind_op, 60 | plasma_enum_t *kernel, 61 | int *col, int *row, int *rowpiv) 62 | { 63 | *kernel = operations[ind_op*4]; 64 | *col = operations[ind_op*4+1]; 65 | *row = operations[ind_op*4+2]; 66 | *rowpiv = operations[ind_op*4+3]; 67 | } 68 | 69 | void plasma_tree_operations( 70 | int mt, int nt, 71 | int **operations, int *num_operations, 72 | plasma_sequence_t *sequence, 73 | plasma_request_t *request); 74 | 75 | #endif // PLASMA_TREE_H 76 | -------------------------------------------------------------------------------- /include/plasma_tuning.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_TUNING_H 11 | #define PLASMA_TUNING_H 12 | 13 | #include "plasma_context.h" 14 | 15 | #ifdef __cplusplus 16 | extern "C" { 17 | #endif 18 | 19 | /******************************************************************************/ 20 | void plasma_tuning_init(plasma_context_t *plasma); 21 | 22 | void plasma_tuning_finalize(plasma_context_t *plasma); 23 | 24 | void plasma_tune_gbmm( 25 | plasma_context_t *plasma, plasma_enum_t dtyp, 26 | int m, int n, int k, int kl, int ku); 27 | 28 | void plasma_tune_gbtrf( 29 | plasma_context_t *plasma, plasma_enum_t dtyp, 30 | int n, int bw); 31 | 32 | void plasma_tune_geadd( 33 | plasma_context_t *plasma, plasma_enum_t dtyp, 34 | int m, int n); 35 | 36 | void plasma_tune_geinv( 37 | plasma_context_t *plasma, plasma_enum_t dtyp, 38 | int m, int n); 39 | 40 | void plasma_tune_gelqf( 41 | plasma_context_t *plasma, plasma_enum_t dtyp, 42 | int m, int n); 43 | 44 | void plasma_tune_gemm( 45 | plasma_context_t *plasma, plasma_enum_t dtyp, 46 | int m, int n, int k); 47 | 48 | void plasma_tune_geqrf( 49 | plasma_context_t *plasma, plasma_enum_t dtyp, 50 | int m, int n); 51 | 52 | void plasma_tune_geswp( 53 | plasma_context_t *plasma, plasma_enum_t dtyp, 54 | int m, int n); 55 | 56 | void plasma_tune_getrf( 57 | plasma_context_t *plasma, plasma_enum_t dtyp, 58 | int m, int n); 59 | 60 | void plasma_tune_hetrf( 61 | plasma_context_t *plasma, plasma_enum_t dtyp, 62 | int n); 63 | 64 | void plasma_tune_lacpy( 65 | plasma_context_t *plasma, plasma_enum_t dtyp, 66 | int m, int n); 67 | 68 | void plasma_tune_lag2c( 69 | plasma_context_t *plasma, plasma_enum_t dtyp, 70 | int m, int n); 71 | 72 | void plasma_tune_lange( 73 | plasma_context_t *plasma, plasma_enum_t dtyp, 74 | int m, int n); 75 | 76 | void plasma_tune_lansy( 77 | plasma_context_t *plasma, plasma_enum_t dtyp, 78 | int n); 79 | 80 | void plasma_tune_lantr( 81 | plasma_context_t *plasma, plasma_enum_t dtyp, 82 | int m, int n); 83 | 84 | void plasma_tune_lascl( 85 | plasma_context_t *plasma, plasma_enum_t dtyp, 86 | int m, int n); 87 | 88 | void plasma_tune_laset( 89 | plasma_context_t *plasma, plasma_enum_t dtyp, 90 | int m, int n); 91 | 92 | void plasma_tune_lauum( 93 | plasma_context_t *plasma, plasma_enum_t dtyp, 94 | int n); 95 | 96 | void plasma_tune_pbtrf( 97 | plasma_context_t *plasma, plasma_enum_t dtyp, 98 | int n); 99 | 100 | void plasma_tune_poinv( 101 | plasma_context_t *plasma, plasma_enum_t dtyp, 102 | int n); 103 | 104 | void plasma_tune_potrf( 105 | plasma_context_t *plasma, plasma_enum_t dtyp, 106 | int n); 107 | 108 | void plasma_tune_symm( 109 | plasma_context_t *plasma, plasma_enum_t dtyp, 110 | int m, int n); 111 | 112 | void plasma_tune_syr2k( 113 | plasma_context_t *plasma, plasma_enum_t dtyp, 114 | int n, int k); 115 | 116 | void plasma_tune_syrk( 117 | plasma_context_t *plasma, plasma_enum_t dtyp, 118 | int n, int k); 119 | 120 | void plasma_tune_tradd( 121 | plasma_context_t *plasma, plasma_enum_t dtyp, 122 | int m, int n); 123 | 124 | void plasma_tune_trmm( 125 | plasma_context_t *plasma, plasma_enum_t dtyp, 126 | int m, int n); 127 | 128 | void plasma_tune_trsm( 129 | plasma_context_t *plasma, plasma_enum_t dtyp, 130 | int m, int n); 131 | 132 | void plasma_tune_trtri( 133 | plasma_context_t *plasma, plasma_enum_t dtyp, 134 | int n); 135 | 136 | #ifdef __cplusplus 137 | } // extern "C" 138 | #endif 139 | 140 | #endif // PLASMA_TUNING_H 141 | -------------------------------------------------------------------------------- /include/plasma_workspace.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | **/ 10 | #ifndef PLASMA_WORKSPACE_H 11 | #define PLASMA_WORKSPACE_H 12 | 13 | #include "plasma_types.h" 14 | 15 | #include 16 | #include 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | typedef struct { 23 | void **spaces; ///< array of nthread pointers to workspaces 24 | size_t lworkspace; ///< length in elements of workspace on each core 25 | int nthread; ///< number of threads 26 | plasma_enum_t dtyp; ///< precision of the workspace 27 | } plasma_workspace_t; 28 | 29 | /******************************************************************************/ 30 | int plasma_workspace_create(plasma_workspace_t *workspace, size_t lworkspace, 31 | plasma_enum_t dtyp); 32 | 33 | int plasma_workspace_destroy(plasma_workspace_t *workspace); 34 | 35 | #ifdef __cplusplus 36 | } // extern "C" 37 | #endif 38 | 39 | #endif // PLASMA_WORKSPACE_H 40 | -------------------------------------------------------------------------------- /include/plasma_zc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA header. 6 | * PLASMA is a software package provided by Univ. of Tennessee, 7 | * Univ. of Manchester, Univ. of California Berkeley and 8 | * Univ. of Colorado Denver. 9 | * 10 | * @precisions mixed zc -> ds 11 | * 12 | **/ 13 | #ifndef PLASMA_ZC_H 14 | #define PLASMA_ZC_H 15 | 16 | #include "plasma_async.h" 17 | #include "plasma_descriptor.h" 18 | #include "plasma_workspace.h" 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | /***************************************************************************//** 25 | * Standard interface 26 | **/ 27 | int plasma_zcgesv( 28 | int n, int nrhs, 29 | plasma_complex64_t *pA, int lda, int *ipiv, 30 | plasma_complex64_t *pB, int ldb, 31 | plasma_complex64_t *pX, int ldx, int *iter); 32 | 33 | int plasma_zcposv( 34 | plasma_enum_t uplo, int n, int nrhs, 35 | plasma_complex64_t *pA, int lda, 36 | plasma_complex64_t *pB, int ldb, 37 | plasma_complex64_t *pX, int ldx, int *iter); 38 | 39 | int plasma_zcgbsv( 40 | int n, int kl, int ku, int nrhs, 41 | plasma_complex64_t *pAB, int ldab, int *ipiv, 42 | plasma_complex64_t *pB, int ldb, 43 | plasma_complex64_t *pX, int ldx, int *iter); 44 | 45 | int plasma_zlag2c( 46 | int m, int n, 47 | plasma_complex64_t *pA, int lda, 48 | plasma_complex32_t *pAs, int ldas); 49 | 50 | int plasma_clag2z( 51 | int m, int n, 52 | plasma_complex32_t *pAs, int ldas, 53 | plasma_complex64_t *pA, int lda); 54 | 55 | /***************************************************************************//** 56 | * Tile asynchronous interface 57 | **/ 58 | void plasma_omp_zcgesv( 59 | plasma_desc_t A, int *ipiv, 60 | plasma_desc_t B, plasma_desc_t X, 61 | plasma_desc_t As, plasma_desc_t Xs, plasma_desc_t R, 62 | double *work, double *Rnorm, double *Xnorm, int *iter, 63 | plasma_sequence_t *sequence, 64 | plasma_request_t *request); 65 | 66 | void plasma_omp_zcposv( 67 | plasma_enum_t uplo, 68 | plasma_desc_t A, plasma_desc_t B, plasma_desc_t X, 69 | plasma_desc_t As, plasma_desc_t Xs, plasma_desc_t R, 70 | double *W, double *Rnorm, double *Xnorm, int *iter, 71 | plasma_sequence_t *sequence, 72 | plasma_request_t *request); 73 | 74 | void plasma_omp_zcgbsv( 75 | plasma_desc_t A, int *ipiv, 76 | plasma_desc_t B, plasma_desc_t X, 77 | plasma_desc_t As, plasma_desc_t Xs, plasma_desc_t R, 78 | double *work, double *Rnorm, double *Xnorm, int *iter, 79 | plasma_sequence_t *sequence, 80 | plasma_request_t *request); 81 | 82 | void plasma_omp_zlag2c( 83 | plasma_desc_t A, plasma_desc_t As, 84 | plasma_sequence_t *sequence, plasma_request_t *request); 85 | 86 | void plasma_omp_clag2z( 87 | plasma_desc_t As, plasma_desc_t A, 88 | plasma_sequence_t *sequence, plasma_request_t *request); 89 | 90 | #ifdef __cplusplus 91 | } // extern "C" 92 | #endif 93 | 94 | #endif // PLASMA_ZC_H 95 | -------------------------------------------------------------------------------- /include/plasma_zlaebz2_work.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA header. 6 | * PLASMA is a software package provided by Univ. of Tennessee, 7 | * Univ. of Manchester, Univ. of California Berkeley and 8 | * Univ. of Colorado Denver. 9 | * 10 | * @precisions normal z -> s d c 11 | * 12 | **/ 13 | #ifndef PLASMA_ZLAEBZ2_H 14 | #define PLASMA_ZLAEBZ2_H 15 | /******************************************************************************* 16 | * These structures support the ZLAEBZ2 code and ZSTEVX2 code, for eigenvalue 17 | * and eigenvector discovery. 18 | *******************************************************************************/ 19 | 20 | /******************************************************************************* 21 | * zstein needs work areas to function. Instead of allocating and deallocating 22 | * these work areas for every vector, we provide a set of work areas per thread. 23 | * They are allocated as needed; so we don't allocate more often than we need, 24 | * and only allocate at most once per thread and not once per eigenvector. 25 | *******************************************************************************/ 26 | 27 | typedef struct 28 | { 29 | int *IBLOCK; 30 | int *ISPLIT; 31 | plasma_complex64_t *WORK; 32 | int *IWORK; 33 | int *IFAIL; 34 | } zlaebz2_Stein_Array_t; 35 | 36 | /******************************************************************************* 37 | * Control is all the global variables needed. 38 | *******************************************************************************/ 39 | 40 | typedef struct 41 | { 42 | int N; 43 | plasma_complex64_t *diag; /* pointers the threads need. */ 44 | plasma_complex64_t *offd; 45 | plasma_enum_t range; /* PlasmaRangeV or PlasmaRangeI. */ 46 | plasma_enum_t jobtype; /* PlasmaNoVec, PlasmaVec, PlasmaCount */ 47 | int il; /* For PlasmaRangeI, least index desired. */ 48 | int iu; /* For PlasmaRangeI, max index desired. */ 49 | zlaebz2_Stein_Array_t *stein_arrays; /* Workspaces per thread for useStein.*/ 50 | int baseIdx; /* Number of EV less than user's low threshold. */ 51 | int error; /* first error, if non-zero. */ 52 | plasma_complex64_t *pVal; /* where to store eigenvalues. */ 53 | plasma_complex64_t *pVec; /* where to store eigenvectors. */ 54 | int *pMul; /* where to store Multiplicity. */ 55 | } zlaebz2_Control_t; 56 | 57 | #endif /* PLASMA_ZLAEBZ2_H */ 58 | -------------------------------------------------------------------------------- /lib/pkgconfig/plasma.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=${exec_prefix}/lib 4 | includedir=${prefix}/include 5 | 6 | Name: plasma 7 | Description: Parallel Linear Algebra Software for Multicore Architectures 8 | Version: 3.0.0 9 | Cflags: -I${includedir} @CFLAGS@ 10 | Libs: -L${libdir} -lplasma -lcoreblas @LIBS@ 11 | Libs.private: 12 | Requires: @REQUIRES@ 13 | Requires.private: 14 | -------------------------------------------------------------------------------- /share/cmake/plasma.cmakein: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | list(APPEND CMAKE_MODULE_PATH "@PACKAGE_cmakeModulesDir@") 4 | -------------------------------------------------------------------------------- /share/pkgconfig/plasma.pcin: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ 3 | libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ 4 | bindir=${prefix}/@CMAKE_INSTALL_BINDIR@ 5 | 6 | Name: @PROJECT_NAME@ 7 | Description: @PROJECT_DESCRIPTION@ 8 | URL: @PROJECT_HOMEPAGE_URL@ 9 | Version: @PROJECT_VERSION@ 10 | Cflags: -I"${includedir}" 11 | 12 | Requires: 13 | Libs: -L"${libdir}" -lplasma -lplasma_core_blas @plasma_libs_spaced@ 14 | Cflags: @CMAKE_CFLAGS@ 15 | -------------------------------------------------------------------------------- /test/test_z.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | #ifndef TEST_Z_H 13 | #define TEST_Z_H 14 | 15 | #include "test.h" 16 | 17 | //============================================================================== 18 | // test routines 19 | //============================================================================== 20 | void test_dzamax(param_value_t param[], bool run); 21 | void test_zgbmm(param_value_t param[], bool run); 22 | void test_zgbsv(param_value_t param[], bool run); 23 | void test_zgbtrf(param_value_t param[], bool run); 24 | void test_zgeadd(param_value_t param[], bool run); 25 | void test_zgeinv(param_value_t param[], bool run); 26 | void test_zgelqf(param_value_t param[], bool run); 27 | void test_zgelqs(param_value_t param[], bool run); 28 | void test_zgels(param_value_t param[], bool run); 29 | void test_zgemm(param_value_t param[], bool run); 30 | void test_zgeqrf(param_value_t param[], bool run); 31 | void test_zgeqrs(param_value_t param[], bool run); 32 | void test_zgesdd(param_value_t param[], bool run); 33 | void test_zgesv(param_value_t param[], bool run); 34 | void test_zgetrf(param_value_t param[], bool run); 35 | void test_zgetri(param_value_t param[], bool run); 36 | void test_zgetri_aux(param_value_t param[], bool run); 37 | void test_zgetrs(param_value_t param[], bool run); 38 | void test_zhbtrd(param_value_t param[], bool run); 39 | void test_zheevd(param_value_t param[], bool run); 40 | void test_zhemm(param_value_t param[], bool run); 41 | void test_zher2k(param_value_t param[], bool run); 42 | void test_zherk(param_value_t param[], bool run); 43 | void test_zhetrf(param_value_t param[], bool run); 44 | void test_zhesv(param_value_t param[], bool run); 45 | void test_zlacpy(param_value_t param[], bool run); 46 | void test_zlag2c(param_value_t param[], bool run); 47 | void test_zlange(param_value_t param[], bool run); 48 | void test_zlangb(param_value_t param[], bool run); 49 | void test_zlanhe(param_value_t param[], bool run); 50 | void test_zlansy(param_value_t param[], bool run); 51 | void test_zlantr(param_value_t param[], bool run); 52 | void test_zlascl(param_value_t param[], bool run); 53 | void test_zlaset(param_value_t param[], bool run); 54 | void test_zgeswp(param_value_t param[], bool run); 55 | void test_zlauum(param_value_t param[], bool run); 56 | void test_zpbsv(param_value_t param[], bool run); 57 | void test_zpbtrf(param_value_t param[], bool run); 58 | void test_zpoinv(param_value_t param[], bool run); 59 | void test_zposv(param_value_t param[], bool run); 60 | void test_zpotrf(param_value_t param[], bool run); 61 | void test_zpotri(param_value_t param[], bool run); 62 | void test_zpotrs(param_value_t param[], bool run); 63 | void test_zsymm(param_value_t param[], bool run); 64 | void test_zstevx2(param_value_t param[], bool run); 65 | void test_zsyr2k(param_value_t param[], bool run); 66 | void test_zsyrk(param_value_t param[], bool run); 67 | void test_ztradd(param_value_t param[], bool run); 68 | void test_ztrmm(param_value_t param[], bool run); 69 | void test_ztrsm(param_value_t param[], bool run); 70 | void test_ztrtri(param_value_t param[], bool run); 71 | void test_zunmlq(param_value_t param[], bool run); 72 | void test_zunmqr(param_value_t param[], bool run); 73 | 74 | //============================================================================== 75 | // utilities 76 | //============================================================================== 77 | void plasma_zprint_matrix( 78 | const char* label, int m, int n, plasma_complex64_t* A, int lda ); 79 | 80 | void plasma_zprint_vector( 81 | const char* label, int n, plasma_complex64_t* x, int incx ); 82 | 83 | #endif // TEST_Z_H 84 | -------------------------------------------------------------------------------- /test/test_zc.h: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions mixed zc -> ds 10 | * 11 | **/ 12 | #ifndef TEST_ZC_H 13 | #define TEST_ZC_H 14 | 15 | #include "test.h" 16 | 17 | //============================================================================== 18 | // test routines 19 | //============================================================================== 20 | void test_zcgesv(param_value_t param[], bool run); 21 | void test_zcposv(param_value_t param[], bool run); 22 | void test_zcgbsv(param_value_t param[], bool run); 23 | void test_zlag2c(param_value_t param[], bool run); 24 | void test_clag2z(param_value_t param[], bool run); 25 | 26 | #endif // TEST_ZC_H 27 | -------------------------------------------------------------------------------- /test/test_zprint.c: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | * @file 4 | * 5 | * PLASMA is a software package provided by: 6 | * University of Tennessee, US, 7 | * University of Manchester, UK. 8 | * 9 | * @precisions normal z -> s d c 10 | * 11 | **/ 12 | #include "plasma.h" 13 | 14 | #include 15 | 16 | #define COMPLEX 17 | #undef REAL 18 | 19 | #define A( i, j ) A[ (i) + (j)*lda ] 20 | 21 | //------------------------------------------------------------------------------ 22 | void plasma_zprint_matrix( 23 | const char* label, int m, int n, plasma_complex64_t* A, int lda ) 24 | { 25 | const plasma_complex64_t zero = 0; 26 | printf( "%s = [\n", label ); 27 | for (int i = 0; i < m; ++i) { 28 | printf( " " ); 29 | for (int j = 0; j < n; ++j) { 30 | plasma_complex64_t Aij = A( i, j ); 31 | #ifdef COMPLEX 32 | if (Aij == zero) { 33 | printf( " 0.0 " ); 34 | } 35 | else { 36 | printf( " %9.4f + %9.4fi", creal( Aij ), cimag( Aij ) ); 37 | } 38 | #else 39 | if (Aij == zero) { 40 | printf( " 0.0 " ); 41 | } 42 | else { 43 | printf( " %9.4f", Aij ); 44 | } 45 | #endif 46 | } 47 | printf( "\n" ); 48 | } 49 | printf( "];\n" ); 50 | } 51 | 52 | //------------------------------------------------------------------------------ 53 | void plasma_zprint_vector( 54 | const char* label, int n, plasma_complex64_t* x, int incx ) 55 | { 56 | plasma_complex64_t zero = 0; 57 | printf( "%s = [\n ", label ); 58 | for (int i = 0; i < n; ++i) { 59 | plasma_complex64_t xi = x[ i*incx ]; 60 | #ifdef COMPLEX 61 | if (xi == zero) { 62 | printf( " 0.0 " ); 63 | } 64 | else { 65 | printf( " %9.4f + %9.4fi", creal( xi ), cimag( xi ) ); 66 | } 67 | #else 68 | if (xi == zero) { 69 | printf( " 0.0 " ); 70 | } 71 | else { 72 | printf( " %9.4f", xi ); 73 | } 74 | #endif 75 | } 76 | printf( "\n];\n" ); 77 | } 78 | -------------------------------------------------------------------------------- /tools/create_release_file.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # 3 | # This script is to be run in PLASMA's top-level directory. 4 | # It will create a release file in the current directory. 5 | # 6 | # The script will search for the version numbers in the include directory. 7 | # The script will check if the version numbers match in the CMake installation script. 8 | # The script relies on using a modern TAR with advanced options that minimize the size of the resulting archive file. 9 | # 10 | 11 | MJR=`grep -r PLASMA_VERSION_MAJOR include | awk '{print $NF;}'` 12 | MNR=`grep -r PLASMA_VERSION_MINOR include | awk '{print $NF;}'` 13 | PTC=`grep -r PLASMA_VERSION_PATCH include | awk '{print $NF;}'` 14 | 15 | # this can come from include/plasma_types.h 16 | VERSION=${MJR}.${MNR}.$PTC 17 | 18 | if test -z "`grep -i plasma.version CMakeLists.txt | grep $VERSION`" ; then 19 | echo Version mismatch between headers $VERSION and CMakeLists.txt 20 | grep -i plasma.version CMakeLists.txt 21 | exit 127 22 | fi 23 | 24 | DIR=plasma-${VERSION} 25 | 26 | if test ! -e $DIR ; then 27 | ln -s . $DIR 28 | fi 29 | 30 | echo Preparing $DIR ... 31 | 32 | find -H ${DIR} -maxdepth 1 -type f -name '[A-Za-z0-9]*' | \ 33 | xargs echo ${DIR}/*/*.hin ${DIR}/*/*.[hc] ${DIR}/config/*.py ${DIR}/tools/*.py ${DIR}/cmake/*.cmake ${DIR}/*/*.f90 ${DIR}/*/*.lua ${DIR}/*/doxygen* ${DIR}/share/pkgconfig/plasma.pcin ${DIR}/share/cmake/plasma.cmakein | \ 34 | xargs tar --exclude=.hgtags --exclude=plasma_config.h --exclude=Makefile.\*.gen --owner=root --group=root --mtime=1970-01-01 -chof ${DIR}.tar 35 | gzip --best --rsyncable --verbose ${DIR}.tar 36 | 37 | rm -r $DIR 38 | -------------------------------------------------------------------------------- /tools/doxygen_filter: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -0777 -p 2 | # 3 | # Used as INPUT_FILTER in Doxyfile to strip out: 4 | # @generated 5 | # @precisions 6 | # from PLASMA docs, as doxygen yields warnings and they end up in the docs. 7 | # 8 | # -0777 slurps whole files; see 'man perlrun' 9 | 10 | s/\@(generated|precisions).*//g; 11 | -------------------------------------------------------------------------------- /tools/doxygen_groups.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Finds doxygen groups that are in use, sorts & puts in file "ingroup" 4 | # Finds doxygen groups that are defined, sorts & puts in file "defgroup" 5 | # Doing 6 | # diff ingroup defgroup 7 | # provides an easy way to see what groups are used vs. defined. 8 | # 9 | # Usage, from top level plasma directory: 10 | # ./tools/doxygen_groups.sh 11 | # 12 | # On MacOS, uses opendiff; otherwise uses diff. 13 | 14 | egrep -h '@ingroup' */*.{h,c} | \ 15 | perl -pe 's/^ *\*//; s@^ *///@@; s/^ +//; s/\@ingroup/\@group/;' | \ 16 | sort --unique > ingroup 17 | 18 | egrep -h '^ *@defgroup' docs/doxygen/groups.dox | \ 19 | egrep -v 'group_|core_blas' | \ 20 | perl -pe 's/^ *\@defgroup +(\w+).*/\@group $1/;' | \ 21 | sort > defgroup 22 | 23 | which opendiff > /dev/null 24 | if [ $? == 0 ]; then 25 | opendiff ingroup defgroup 26 | else 27 | diff ingroup defgroup 28 | fi 29 | -------------------------------------------------------------------------------- /tools/lua-5.3.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/icl-utk-edu/plasma/4ef66b8f24f52b0097fd81803e6788a81fd7468d/tools/lua-5.3.4.tar.gz -------------------------------------------------------------------------------- /tools/makespackrelease.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | 4 | import hashlib 5 | import os 6 | import pathlib 7 | import shutil 8 | import subprocess 9 | import sys 10 | import time 11 | 12 | 13 | def log(*args): 14 | print(*args) 15 | 16 | 17 | def trashit(filename): 18 | trash = "trash" 19 | if not os.path.exists(trash): 20 | os.mkdir(trash) 21 | if os.path.exists(filename): 22 | os.rename(filename, trash + "/" + filename + "_" + str(int(time.time()))) 23 | 24 | 25 | def main(argv): 26 | package_name = os.path.split(os.path.abspath(os.path.curdir))[1] 27 | 28 | for line in open("CMakeLists.txt"): 29 | package_name + "version" 30 | filename = argv[1] 31 | version = filename.replace(package_name, "").replace("-", "").replace(".tar.gz", "") 32 | log("Processing", package_name, "package version", version) 33 | 34 | if 0 and os.path.exists(filename): # FIXME: add proper creation of new file 35 | if "--remove" in argv: 36 | trashit(filename) 37 | else: 38 | if 0:subprocess.call(["tar", "--exclude=.git", "--exclude=tools", "-chzf", filename, "xsdk-examples-0.2.0"]) 39 | if 0:subprocess.call(["sh", "tools/create_release_file.sh"]) 40 | 41 | dgst = hashlib.sha256(open(filename,"rb").read()).hexdigest() 42 | dstdir = pathlib.Path(os.environ["SPACK_ROOT"]) / pathlib.Path("var/spack/cache/_source-cache/archive/"+dgst[:2]) 43 | log("New SHA256", dgst) 44 | if not dstdir.exists(): 45 | log("Created ", dstdir) 46 | os.makedirs(dstdir, exist_ok=True) 47 | dstfn = dstdir / pathlib.Path(dgst+".tar.gz") 48 | if not os.path.exists(dstfn): 49 | log("Copying ", filename) 50 | shutil.copy(filename, dstfn) 51 | 52 | package = pathlib.Path(os.environ["SPACK_ROOT"]) / pathlib.Path("var/spack/repos/builtin/packages/{}/package.py".format(package_name)) 53 | 54 | op = "" 55 | pkgdt = open(package).read() 56 | idx = pkgdt.find("version('{}'".format(version)) 57 | if idx > 0: 58 | eqidx = pkgdt.find("=", idx) # = 59 | qtidx = pkgdt.find("'", eqidx) # ' 60 | q2idx = pkgdt.find("'", qtidx+1) # ' 61 | sha256=pkgdt[qtidx+1:q2idx] 62 | if sha256 == dgst: 63 | log("{} already available".format(sha256)) 64 | else: 65 | with open(package, "w") as fd: 66 | fd.write(pkgdt.replace(sha256, dgst)) 67 | op = "Replacing" 68 | 69 | elif -1 == idx: 70 | # find first version spec 71 | idx = pkgdt.find("version(") 72 | indent = 4 * " " 73 | if idx > 0: 74 | with open(package, "w") as fd: 75 | fd.write(pkgdt[:idx] + "version('{}', sha256='{}')\n".format(version, dgst) + indent + pkgdt[idx:]) 76 | op = "Adding" 77 | sha256 = dgst 78 | 79 | else: 80 | raise RuntimeError("Didn't find spot for version {}".format(version)) 81 | 82 | if op: 83 | log("{} SHA256={} in Spack's {}".format(op, sha256, package)) 84 | 85 | if "--remove" in argv: 86 | trashit(filename) 87 | 88 | return 0 89 | 90 | 91 | if "__main__" == __name__: 92 | sys.exit(main(sys.argv)) 93 | -------------------------------------------------------------------------------- /tools/old_build_system/config/__init__.py: -------------------------------------------------------------------------------- 1 | # PLASMA is a software package provided by: 2 | # University of Tennessee, US, 3 | # University of Manchester, UK. 4 | 5 | from config import * 6 | -------------------------------------------------------------------------------- /tools/old_build_system/config/environment.py: -------------------------------------------------------------------------------- 1 | # PLASMA is a software package provided by: 2 | # University of Tennessee, US, 3 | # University of Manchester, UK. 4 | 5 | import os 6 | 7 | # ------------------------------------------------------------------------------ 8 | class Environment: 9 | def __init__( self ): 10 | self.stack = [ os.environ, {} ] 11 | # end 12 | 13 | # push( self, env={} ) would use the same hash each time; 14 | # this pushes a new hash each time 15 | def push( self, env=None ): 16 | if (not env): 17 | env = {} 18 | self.stack.append( env ) 19 | 20 | def top( self ): 21 | return self.stack[-1] 22 | 23 | def pop( self ): 24 | if (len(self.stack) == 2): 25 | raise Exception( "attempting to pop last user environment" ) 26 | return self.stack.pop() 27 | 28 | # compared to __getitem__, returns None if key doesn't exist 29 | def get( self, key ): 30 | for env in self.stack[::-1]: 31 | if (env.has_key( key )): 32 | return env[key] 33 | return None 34 | 35 | def __getitem__( self, key ): 36 | for env in self.stack[::-1]: 37 | if (env.has_key( key )): 38 | return env[key] 39 | return '' # or None? 40 | 41 | # todo: should val = None delete the key? 42 | def __setitem__( self, key, val ): 43 | self.stack[-1][ key ] = val 44 | 45 | def append( self, key, val ): 46 | orig = self[ key ] #self.get( key ) 47 | if (val): 48 | if (orig): 49 | val = orig + ' ' + val 50 | self[key] = val 51 | return orig 52 | 53 | def prepend( self, key, val ): 54 | orig = self[ key ] #self.get( key ) 55 | if (val): 56 | if (orig): 57 | val = val + ' ' + orig 58 | self[key] = val 59 | return orig 60 | # end 61 | 62 | # ------------------------------------------------------------------------------ 63 | def test(): 64 | env = Environment() 65 | print env.stack 66 | print 67 | 68 | CC = env['CC'] 69 | CXX = env['CXX'] 70 | print 'CC <' + CC + '>' 71 | print 'CXX <' + CXX + '>' 72 | print 73 | 74 | env.push() 75 | print env.stack 76 | print 77 | 78 | env['CC'] = 'icc' 79 | print env.stack 80 | print 81 | 82 | CC = env['CC'] 83 | env['CC'] += 'foo' 84 | print env.stack 85 | print 86 | 87 | CXX = env['CXX'] 88 | env['CXX'] += 'foo' 89 | print env.stack 90 | print 91 | 92 | save_CFLAGS = env.prepend( 'CFLAGS', '-O2' ) 93 | print env.stack 94 | env['CFLAGS'] = save_CFLAGS 95 | print env.stack 96 | print 97 | 98 | save_CFLAGS = env.append( 'CFLAGS', '-O2' ) 99 | print env.stack 100 | env['CFLAGS'] = save_CFLAGS 101 | print env.stack 102 | print 103 | 104 | save_CXXFLAGS = env.prepend( 'CXXFLAGS', '-O2' ) 105 | print env.stack 106 | env['CXXFLAGS'] = save_CXXFLAGS 107 | print env.stack 108 | print 109 | 110 | save_CXXFLAGS = env.prepend( 'CXXFLAGS', '-g' ) 111 | print env.stack 112 | print 113 | 114 | save_CXXFLAGS = env.append( 'CXXFLAGS', '-Wshadow' ) 115 | print env.stack 116 | env['CXXFLAGS'] = save_CXXFLAGS 117 | print env.stack 118 | print 119 | # end 120 | 121 | # ------------------------------------------------------------------------------ 122 | if (__name__ == '__main__'): 123 | test() 124 | -------------------------------------------------------------------------------- /tools/old_build_system/configure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # PLASMA is a software package provided by: 4 | # University of Tennessee, US, 5 | # University of Manchester, UK. 6 | 7 | from __future__ import print_function 8 | 9 | import sys 10 | import re 11 | 12 | import config 13 | from config import Error, red, font_bold, font_normal 14 | 15 | print( 16 | font_bold + red + 17 | ''' 18 | Welcome to PLASMA! 19 | ''' + font_normal + ''' 20 | This script, ''' + sys.argv[0] + ''', will create a make.inc configuration file, 21 | which you can then edit if needed. 22 | 23 | PLASMA requires a C compiler that supports C99 and OpenMP 4.0 with task depend. 24 | This will search for available compilers. Alternatively, set $CC to your C 25 | compiler. Variables can be set in your environment: 26 | export CC=gcc # in sh/bash 27 | or on the make or configure command line: 28 | make CC=gcc 29 | python ''' + sys.argv[0] + ''' CC=gcc 30 | 31 | PLASMA can optionally compile Fortran 2008 interfaces, if a suitable Fortran 32 | compiler is available. You can set $FC to your Fortran compiler. 33 | 34 | PLASMA requires BLAS, CBLAS, LAPACK, and LAPACKE libraries. These are often 35 | provided by optimized vendor math libraries such as AMD ACML, Cray LibSci, 36 | IBM ESSL, Intel MKL, or MacOS Accelerate; or open source libraries such as 37 | ATLAS or OpenBLAS. The open source reference version of LAPACK, with CBLAS 38 | and LAPACKE, is available at: 39 | http://www.netlib.org/lapack/ 40 | 41 | If the (C)BLAS and LAPACK(E) libraries are not in the compiler's default path, 42 | specify their location using one or more of the variables below, again set 43 | either in the environment or on the configure command line: 44 | $ACML_DIR, $ATLAS_DIR, $MKLROOT, $OPENBLAS_DIR, $CBLAS_DIR, $LAPACK_DIR 45 | $MKLROOT is often set in ~/.bash_profile or ~/.cshrc by one of: 46 | source /path/to/intel/compilers/bin/compilervars.sh intel64 47 | source /path/to/intel/compilers/bin/compilervars.csh intel64 48 | 49 | Alternatively, specify the necessary flags with: 50 | $LAPACK_CFLAGS Include paths, e.g., -I/opt/lapack/include 51 | $LAPACK_LIBS Library paths and libraries, e.g., 52 | -L/opt/lapack/lib -llapacke -llapack -lcblas -lblas 53 | 54 | [return to continue, q to quit] ''', end='' ) 55 | reply = raw_input() 56 | if (re.search( 'q|quit', reply, re.I )): 57 | exit(1) 58 | 59 | try: 60 | config.init() 61 | 62 | config.prog_cc() 63 | #config.prog_cxx() 64 | config.prog_fortran( required=False ) 65 | #config.prog_f77( required=False ) 66 | 67 | config.blas() 68 | print() 69 | config.blas_return_float( required=False ) 70 | config.blas_return_complex( required=False ) 71 | 72 | config.cblas() 73 | config.cblas_enum() 74 | config.lapack() 75 | 76 | #config.set_verbose() 77 | config.lapacke() 78 | print() 79 | config.lapacke_dlascl( required=False ) 80 | config.lapacke_dlantr( required=False ) 81 | config.lapacke_dlassq( required=False ) 82 | 83 | #config.output_headers( 'config.h' ) 84 | config.output_files( 'make.inc' ) 85 | config.print_header( '' ) 86 | 87 | except Error, e: 88 | config.print_error( 'Configuration aborted: ' + str(e) ) 89 | exit(1) 90 | -------------------------------------------------------------------------------- /tools/old_build_system/make.inc.in: -------------------------------------------------------------------------------- 1 | # PLASMA make.inc template, processed by configure.py 2 | # 3 | # PLASMA is a software package provided by: 4 | # University of Tennessee, US, 5 | # University of Manchester, UK. 6 | 7 | # ------------------------------------------------------------------------------ 8 | # programs and flags 9 | 10 | CC = @CC@ 11 | FC = @FC@ 12 | RANLIB = ranlib 13 | AR = ar 14 | 15 | # Use -fPIC to make shared (.so) and static (.a) libraries; 16 | # can be commented out if making only static libraries. 17 | FPIC = -fPIC 18 | 19 | CFLAGS = ${FPIC} @CFLAGS@ @OPENMP_CFLAGS@ @DEFS@ 20 | FCFLAGS = ${FPIC} @FCFLAGS@ @OPENMP_FCFLAGS@ 21 | LDFLAGS = ${FPIC} @LDFLAGS@ @OPENMP_CFLAGS@ 22 | LIBS = @LIBS@ 23 | 24 | # Enable or disable compiling Fortran 2008 interfaces into PLASMA library 25 | # 0 - disabled 26 | # 1 - enabled; build Fortran interfaces and examples 27 | fortran ?= 0 28 | 29 | # where to install PLASMA 30 | prefix ?= /usr/local/plasma 31 | 32 | # one of: aix bsd c89 freebsd generic linux macosx mingw posix solaris 33 | # usually generic is fine 34 | lua_platform ?= generic 35 | -------------------------------------------------------------------------------- /tools/old_build_system/make.inc.mkl-gcc: -------------------------------------------------------------------------------- 1 | # PLASMA example make.inc, using Intel MKL and gcc 2 | # 3 | # PLASMA is a software package provided by: 4 | # University of Tennessee, US, 5 | # University of Manchester, UK. 6 | 7 | # -------------------- 8 | # programs 9 | 10 | CC = gcc 11 | FC = gfortran 12 | AR = ar 13 | RANLIB = ranlib 14 | 15 | 16 | # -------------------- 17 | # flags 18 | 19 | # Use -fPIC to make shared (.so) and static (.a) libraries; 20 | # can be commented out if making only static libraries. 21 | FPIC = -fPIC 22 | 23 | CFLAGS = -fopenmp $(FPIC) -O3 -std=c99 -Wall -pedantic -Wshadow -Wno-unused-function 24 | FCFLAGS = -fopenmp $(FPIC) -O3 -std=f2008 -Wall 25 | LDFLAGS = -fopenmp $(FPIC) 26 | 27 | # options for MKL 28 | CFLAGS += -DPLASMA_HAVE_MKL 29 | 30 | # one of: aix bsd c89 freebsd generic linux macosx mingw posix solaris 31 | # usually generic is fine 32 | lua_platform = linux 33 | 34 | # -------------------- 35 | # PLASMA is a library in C, but can be also used from Fortran. 36 | # In this case, Fortran interface needs to be build. 37 | # 0 = no - Fortran codes will not be touched, the FC and FCFLAGS variables 38 | # will not be referenced 39 | # 1 = yes - Fortran interface will be compiled and included into the library 40 | 41 | fortran ?= 1 42 | 43 | # -------------------- 44 | # libraries 45 | # This assumes $MKLROOT is set in your environment. 46 | # Add these to your .cshrc or .bashrc, adjusting for where MKL is installed: 47 | # in .cshrc: source /opt/intel/bin/compilervars.csh intel64 48 | # in .bashrc: source /opt/intel/bin/compilervars.sh intel64 49 | 50 | # With gcc OpenMP (libgomp), use -lmkl_sequential or (-lmkl_gnu_thread with MKL_NUM_THREADS=1). 51 | # With icc OpenMP (liomp5), use -lmkl_sequential or (-lmkl_intel_thread with MKL_NUM_THREADS=1). 52 | LIBS = -L$(MKLROOT)/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm -ldl 53 | 54 | INC = -I$(MKLROOT)/include 55 | -------------------------------------------------------------------------------- /tools/old_build_system/make.inc.power8-gcc: -------------------------------------------------------------------------------- 1 | # PLASMA example make.inc, using Intel MKL and gcc 2 | # 3 | # PLASMA is a software package provided by: 4 | # University of Tennessee, US, 5 | # University of Manchester, UK. 6 | 7 | # -------------------- 8 | # programs 9 | 10 | CC = gcc 11 | FC = gfortran 12 | AR = ar 13 | RANLIB = ranlib 14 | 15 | 16 | # -------------------- 17 | # flags 18 | 19 | # Use -fPIC to make shared (.so) and static (.a) libraries; 20 | # can be commented out if making only static libraries. 21 | FPIC = -fPIC 22 | 23 | CFLAGS = -fopenmp $(FPIC) -O3 -std=c99 -Wall -pedantic -Wshadow -Wno-unused-function 24 | FCFLAGS = -fopenmp $(FPIC) -O3 -std=f2008 -Wall 25 | LDFLAGS = -fopenmp $(FPIC) 26 | 27 | # options for ESSL 28 | CFLAGS += -DPLASMA_HAVE_ESSL 29 | 30 | # one of: aix bsd c89 freebsd generic linux macosx mingw posix solaris 31 | # usually generic is fine 32 | lua_platform = posix 33 | 34 | # -------------------- 35 | # PLASMA is a library in C, but can be also used from Fortran. 36 | # In this case, Fortran interface needs to be build. 37 | # 0 = no - Fortran codes will not be touched, the FC and FCFLAGS variables 38 | # will not be referenced 39 | # 1 = yes - Fortran interface will be compiled and included into the library 40 | 41 | fortran ?= 0 42 | 43 | # -------------------- 44 | # libraries 45 | 46 | # USE NETLIB LAPACK distributed LAPACKE,CBLAS and its headers. 47 | # Link with ESSL for most of its BLAS/LAPACK functions 48 | # Link with Netlib LAPACK/BLAS for the missing functions in ESSL. 49 | LIBS = ~/lapack-3.7.0-US/liblapacke.a ~/lapack-3.7.0-US/libcblas.a /sw/summitdev/essl/5.5.0-20161110/lib64/libessl.so /sw/summitdev/xl/20161123/xlf/15.1.5/lib/libxlf90.so /sw/summitdev/xl/20161123/xlf/15.1.5/lib/libxlfmath.so ~/lapack-3.7.0-US/liblapack.a ~/lapack-3.7.0-US/librefblas.a -lm -lgfortran 50 | INC = -I ~/lapack-3.7.0-US/CBLAS/include -I ~/lapack-3.7.0-US/LAPACKE/include -I$(OLCF_ESSL_ROOT)/include 51 | --------------------------------------------------------------------------------