├── SymILDL ├── SymILDL │ ├── lilc_matrix.h │ ├── lilc_matrix_to_string.h │ ├── block_diag_matrix_to_string.h │ ├── lilc_matrix_find_level_set.h │ ├── block_diag_matrix_save.h │ ├── lilc_matrix_find_root.h │ ├── lilc_matrix_save.h │ ├── lilc_matrix_sym_perm.h │ ├── lilc_matrix_sym_rcm.h │ ├── swap_struct.h │ ├── lilc_matrix_sym_equil.h │ ├── lil_sparse_matrix.h │ ├── solver_sqmr.h │ ├── lilc_matrix_load.h │ ├── solver_minres.h │ ├── lilc_matrix_pivot.h │ ├── block_diag_matrix.h │ ├── lilc_matrix_ildl_helpers.h │ ├── lilc_matrix_sym_amd.h │ ├── solver.h │ ├── lilc_matrix_declarations.h │ └── lilc_matrix_ildl.h ├── .gitignore_global ├── .gitignore ├── README.md └── License.md ├── test ├── CMakeLists.txt ├── cmake │ ├── googletest-download.cmake │ └── googletest.cmake ├── LSCholTest.cpp └── ILDLTest.cpp ├── Preconditioners ├── ILDL │ ├── include │ │ └── ILDL │ │ │ ├── ILDL_utils.h │ │ │ └── ILDL.h │ └── src │ │ ├── ILDL_utils.cpp │ │ └── ILDL.cpp ├── Types.h └── LSChol │ ├── src │ └── LSChol.cpp │ └── include │ └── LSChol │ └── LSChol.h ├── cmake ├── FindSPQR.cmake ├── FindCholmod.cmake └── FindEigen3.cmake ├── README.md ├── CMakeLists.txt └── LICENSE /SymILDL/SymILDL/lilc_matrix.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _LILC_MATRIX_H_ 3 | #define _LILC_MATRIX_H_ 4 | 5 | #include "block_diag_matrix.h" 6 | #include "lil_sparse_matrix.h" 7 | 8 | template 9 | class lilc_matrix; 10 | 11 | #include "lilc_matrix_declarations.h" 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/googletest.cmake) 2 | fetch_googletest( 3 | ${CMAKE_CURRENT_SOURCE_DIR}/cmake 4 | ${PROJECT_BINARY_DIR}/googletest 5 | ) 6 | 7 | 8 | # Enable GTest unit testing 9 | enable_testing() 10 | 11 | add_executable(ILDLTest ILDLTest.cpp) 12 | target_link_libraries(ILDLTest ILDL gtest_main) 13 | 14 | add_executable(LSCholTest LSCholTest.cpp) 15 | target_link_libraries(LSCholTest LSChol gtest_main) 16 | -------------------------------------------------------------------------------- /test/cmake/googletest-download.cmake: -------------------------------------------------------------------------------- 1 | # code copied from https://crascit.com/2015/07/25/cmake-gtest/ 2 | cmake_minimum_required(VERSION 3.5 FATAL_ERROR) 3 | 4 | project(googletest-download NONE) 5 | 6 | include(ExternalProject) 7 | 8 | ExternalProject_Add( 9 | googletest 10 | SOURCE_DIR "@GOOGLETEST_DOWNLOAD_ROOT@/googletest-src" 11 | BINARY_DIR "@GOOGLETEST_DOWNLOAD_ROOT@/googletest-build" 12 | GIT_REPOSITORY 13 | https://github.com/google/googletest.git 14 | GIT_TAG 15 | release-1.8.0 16 | CONFIGURE_COMMAND "" 17 | BUILD_COMMAND "" 18 | INSTALL_COMMAND "" 19 | TEST_COMMAND "" 20 | ) 21 | -------------------------------------------------------------------------------- /Preconditioners/ILDL/include/ILDL/ILDL_utils.h: -------------------------------------------------------------------------------- 1 | /** This file provides several convenient utility functions for working with the 2 | * SymILDL library. 3 | * 4 | * Copyright (C) 2019 by David M. Rosen (dmrosen@mit.edu) 5 | */ 6 | 7 | #pragma once 8 | 9 | #include "Preconditioners/Types.h" 10 | #include 11 | 12 | namespace Preconditioners { 13 | /** Given a SYMMETRIC, ROW-MAJOR sparse Eigen matrix S, this function constructs 14 | * and returns the compressed sparse row (CSR) representation of S. Note that 15 | * since S is assumed to be symmetric, only the UPPER TRIANGLE of S is 16 | * referenced. 17 | */ 18 | void toCSR(const SparseMatrix &S, std::vector &row_ptr, 19 | std::vector &col_idx, std::vector &val); 20 | 21 | } // namespace Preconditioners 22 | -------------------------------------------------------------------------------- /SymILDL/.gitignore_global: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | *.deps 29 | 30 | # OS generated files # 31 | ###################### 32 | .DS_Store 33 | .DS_Store? 34 | ._* 35 | .Spotlight-V100 36 | .Trashes 37 | Icon? 38 | ehthumbs.db 39 | Thumbs.db 40 | 41 | # Notes to self # 42 | ################# 43 | notes_to_self.txt 44 | output.txt 45 | gmon.out -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_to_string.h: -------------------------------------------------------------------------------- 1 | //-*- mode: c++ -*- 2 | #ifndef _LIL_MATRIX_TO_STRING_H_ 3 | #define _LIL_MATRIX_TO_STRING_H_ 4 | 5 | #include 6 | #include 7 | 8 | template 9 | std::string lilc_matrix :: to_string() const 10 | { 11 | std::ostringstream os; 12 | #ifdef SYM_ILDL_DEBUG 13 | os << "List of Lists Matrix (" << m_n_rows << ", " << m_n_cols << ", " << nnz() << ")" << std::endl; 14 | 15 | for (int i = 0; i < n_cols(); i++) { 16 | os << "Column " << i << ":" << std::endl; 17 | os << "Row Indices = " << m_idx[i] << std::endl; 18 | os << "Non-zero Values = " << m_x[i] << std::endl; 19 | os << std::endl; 20 | } 21 | #endif 22 | return os.str(); 23 | } 24 | 25 | #endif // _LIL_MATRIX_TO_STRING_H_ 26 | -------------------------------------------------------------------------------- /SymILDL/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.rar 20 | *.tar 21 | *.zip 22 | 23 | # Logs and databases # 24 | ###################### 25 | *.log 26 | *.sql 27 | *.sqlite 28 | *.deps 29 | 30 | # OS generated files # 31 | ###################### 32 | .DS_Store 33 | .DS_Store? 34 | ._* 35 | .Spotlight-V100 36 | .Trashes 37 | Icon? 38 | ehthumbs.db 39 | Thumbs.db 40 | *.stackdump 41 | 42 | # Notes to self # 43 | ################# 44 | notes_to_self.txt 45 | output.txt 46 | gmon.out 47 | build* 48 | ldl_driver.exe* -------------------------------------------------------------------------------- /SymILDL/SymILDL/block_diag_matrix_to_string.h: -------------------------------------------------------------------------------- 1 | //-*- mode: c++ -*- 2 | #ifndef _BLOCK_DIAG_MATRIX_TO_STRING_H_ 3 | #define _BLOCK_DIAG_MATRIX_TO_STRING_H_ 4 | 5 | #include 6 | #include 7 | 8 | template 9 | std::string block_diag_matrix :: to_string() const 10 | { 11 | std::ostringstream os; 12 | #ifdef SYM_ILDL_DEBUG 13 | os << "Block Diagonal Matrix (" << n_rows() << ", " << n_cols() << ", " << nnz() << ")" << std::endl; 14 | 15 | os << "Main Diagonal Values = " << main_diag << std::endl; 16 | os << "Off Diagonal (col, val) = " << "["; 17 | for (int i = 0; i < n_cols(); i++) { 18 | if (block_size(i) == 2) { 19 | os << "(" << i << ", " << off_diag.find(i)->second << "), "; 20 | i++; 21 | } 22 | } 23 | os << "]"; 24 | #endif 25 | 26 | return os.str(); 27 | } 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_find_level_set.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _LILC_MATRIX_FIND_LEVEL_SET_H_ 3 | #define _LILC_MATRIX_FIND_LEVEL_SET_H_ 4 | 5 | template 6 | inline bool lilc_matrix :: find_level_set(vector& lvl_set, vector& visited) { 7 | vector new_set; 8 | for (idx_it node_it = lvl_set.begin(); node_it != lvl_set.end(); node_it++) { 9 | 10 | for (idx_it it = list[*node_it].begin(); it != list[*node_it].end(); it++) { 11 | if (!visited[*it]) { 12 | visited[*it] = true; 13 | new_set.push_back(*it); 14 | } 15 | } 16 | 17 | for (idx_it it = m_idx[*node_it].begin(); it != m_idx[*node_it].end(); it++) { 18 | if (!visited[*it]) { 19 | visited[*it] = true; 20 | new_set.push_back(*it); 21 | } 22 | } 23 | } 24 | 25 | if (new_set.empty()) return false; 26 | 27 | lvl_set.swap(new_set); 28 | return true; 29 | } 30 | 31 | #endif -------------------------------------------------------------------------------- /SymILDL/README.md: -------------------------------------------------------------------------------- 1 | sym-ildl 2 | ======== 3 | 4 | sym-ildl is a C++ package for producing fast incomplete factorizations of symmetric indefinite matrices. Given an n x n symmetric indefinite matrix A, this package produces an incomplete LDL' factorization. To improve stability, the matrix is equilibriated in the max-norm and preordered using the Reverse Cuthill-McKee algorithm prior to factorization. To maintain stability, we use Bunch-Kaufman partial pivoting during the factorization process. 5 | 6 | More details as well as extensive documentation can be found at http://www.cs.ubc.ca/~inutard/html/index.html. 7 | 8 | ### Authors: Chen Greif, Shiwen He, Paul Liu 9 | 10 | Quick Start 11 | ======================================================================= -------------------------------------------------------------------------------- /SymILDL/SymILDL/block_diag_matrix_save.h: -------------------------------------------------------------------------------- 1 | //-*-mode:c++-*- 2 | #ifndef _BLOCK_DIAG_MATRIX_SAVE_H_ 3 | #define _BLOCK_DIAG_MATRIX_SAVE_H_ 4 | 5 | template 6 | bool block_diag_matrix :: save(std::string filename) const 7 | { 8 | std::ofstream out(filename.c_str(), std::ios::out | std::ios::binary); 9 | if(!out) 10 | return false; 11 | 12 | out.flags(std::ios_base::scientific); 13 | out.precision(16); 14 | std::string header; 15 | 16 | header= "%%MatrixMarket matrix coordinate "; 17 | header += "real symmetric"; //maybe change later to have general/complex/blah as options 18 | 19 | out << header << std::endl; 20 | out << n_rows() << " " << n_cols() << " " << nnz() << "\n"; 21 | 22 | for(int i = 0; i < n_cols(); i++) { 23 | out << i+1 << " " << i+1 << " " << main_diag[i] << "\n"; 24 | if (block_size(i) == 2) { 25 | out << i+2 << " " << i+1 << " " << off_diag.find(i)->second << "\n"; 26 | out << i+2 << " " << i+2 << " " << main_diag[i+1] << "\n"; 27 | i++; 28 | } 29 | } 30 | 31 | out.close(); 32 | return true; 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_find_root.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _LILC_MATRIX_FIND_ROOT_H_ 3 | #define _LILC_MATRIX_FIND_ROOT_H_ 4 | 5 | template 6 | inline void lilc_matrix :: find_root(int& s) { 7 | vector visited(m_n_cols, false); 8 | vector lvl_set; 9 | int ls_max = 0, ls = 0; 10 | 11 | while (true) { 12 | lvl_set.clear(); 13 | std::fill(visited.begin(), visited.end(), false); 14 | ls = 0; 15 | 16 | lvl_set.push_back(s); 17 | visited[s] = true; 18 | while (find_level_set(lvl_set, visited)) 19 | ls++; 20 | 21 | if (ls > ls_max) { 22 | ls_max = ls; 23 | int deg, min_deg = m_n_cols; 24 | for (idx_it it = lvl_set.begin(); it != lvl_set.end(); it++) { 25 | deg = list[*it].size() + m_idx[*it].size(); 26 | if (m_idx[*it].size() > 0 && m_idx[*it][0] == *it) deg--; 27 | if (deg < min_deg) { //should consider tie breaking by index later if needed. 28 | min_deg = deg; 29 | s = *it; 30 | } 31 | } 32 | } else { 33 | break; 34 | } 35 | } 36 | } 37 | 38 | #endif -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_save.h: -------------------------------------------------------------------------------- 1 | //-*-mode:c++-*- 2 | #ifndef _LILC_MATRIX_SAVE_H_ 3 | #define _LILC_MATRIX_SAVE_H_ 4 | 5 | inline void put_header(std::string& header, bool sym = false) 6 | { 7 | header= "%%MatrixMarket matrix coordinate real "; 8 | if (sym) 9 | header += "symmetric"; //maybe change later to have symmetric/complex/blah as options 10 | else 11 | header += "general"; 12 | } 13 | 14 | template 15 | bool lilc_matrix :: save(std::string filename, bool sym) 16 | { 17 | std::ofstream out(filename.c_str(), std::ios::out | std::ios::binary); 18 | if(!out) 19 | return false; 20 | 21 | out.flags(std::ios_base::scientific); 22 | out.precision(16); 23 | std::string header; 24 | put_header(header, sym); 25 | 26 | out << header << std::endl; 27 | out << n_rows() << " " << n_cols() << " " << nnz() << "\n"; 28 | 29 | for(int i = 0; i < n_cols(); i++) { 30 | for(unsigned int j = 0; j < m_idx[i].size(); j++) { 31 | out << m_idx[i][j]+1 << " " << i+1 << " " << m_x[i][j] << "\n"; 32 | } 33 | } 34 | 35 | out.close(); 36 | return true; 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /test/cmake/googletest.cmake: -------------------------------------------------------------------------------- 1 | # the following code to fetch googletest 2 | # is inspired by and adapted after https://crascit.com/2015/07/25/cmake-gtest/ 3 | # download and unpack googletest at configure time 4 | 5 | macro(fetch_googletest _download_module_path _download_root) 6 | set(GOOGLETEST_DOWNLOAD_ROOT ${_download_root}) 7 | configure_file( 8 | ${_download_module_path}/googletest-download.cmake 9 | ${_download_root}/CMakeLists.txt 10 | @ONLY 11 | ) 12 | unset(GOOGLETEST_DOWNLOAD_ROOT) 13 | 14 | execute_process( 15 | COMMAND 16 | "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . 17 | WORKING_DIRECTORY 18 | ${_download_root} 19 | ) 20 | execute_process( 21 | COMMAND 22 | "${CMAKE_COMMAND}" --build . 23 | WORKING_DIRECTORY 24 | ${_download_root} 25 | ) 26 | 27 | # adds the targets: gtest, gtest_main, gmock, gmock_main 28 | add_subdirectory( 29 | ${_download_root}/googletest-src 30 | ${_download_root}/googletest-build 31 | ) 32 | endmacro() 33 | -------------------------------------------------------------------------------- /SymILDL/License.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | =============== 3 | 4 | Copyright (c) 2011-2015 Paul Liu, Chen Greif, Shiwen He 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /Preconditioners/ILDL/src/ILDL_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "ILDL/ILDL_utils.h" 2 | 3 | namespace Preconditioners { 4 | 5 | void toCSR(const SparseMatrix &S, std::vector &row_ptr, 6 | std::vector &col_idx, std::vector &val) { 7 | 8 | size_t n = S.rows(); 9 | size_t nnz = S.nonZeros(); 10 | 11 | // Preallocate storage for the vectors containing the CSR representation of S 12 | row_ptr.resize(n + 1); 13 | col_idx.clear(); 14 | col_idx.reserve(nnz); 15 | val.clear(); 16 | val.reserve(nnz); 17 | 18 | size_t idx = 0; 19 | for (size_t r = 0; r < S.outerSize(); ++r) { 20 | // Store starting index for the for the current (rth) row. 21 | row_ptr[r] = idx; 22 | 23 | for (SparseMatrix::InnerIterator it(S, r); it; ++it) { 24 | // Check whether the current element belongs to the upper triangle of S 25 | if (it.col() >= r) { 26 | // This element belongs to the upper triangle of S 27 | col_idx.emplace_back(it.col()); 28 | val.emplace_back(it.value()); 29 | ++idx; 30 | } 31 | } 32 | } 33 | 34 | // Don't forget the last element of row_ptr! 35 | row_ptr[S.rows()] = idx; 36 | } 37 | 38 | } // namespace Preconditioners 39 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_sym_perm.h: -------------------------------------------------------------------------------- 1 | //-*- mode: c++ -*- 2 | #ifndef _LIL_MATRIX_SYM_PERM_H_ 3 | #define _LIL_MATRIX_SYM_PERM_H_ 4 | 5 | template 6 | void lilc_matrix :: sym_perm(std::vector& perm) { 7 | vector m_idx_new(m_n_cols); 8 | vector m_x_new(m_n_cols); 9 | 10 | int i, j, pi, pj; 11 | el_type px; 12 | vector pinv(m_n_cols); 13 | for (i = 0; i < m_n_cols; i++) { 14 | pinv[perm[i]] = i; 15 | list[i].clear(); 16 | } 17 | 18 | for (j = 0; j < m_n_cols; j++) { //no need to use function call n_cols() every iter 19 | pj = pinv[j]; 20 | 21 | for (i = 0; i < (int) m_idx[j].size(); i++) { 22 | pi = pinv[m_idx[j][i]]; 23 | px = m_x[j][i]; 24 | 25 | if (pi < pj) { 26 | m_idx_new[ pi ].push_back(pj); 27 | m_x_new[ pi ].push_back(px); 28 | list[pj].push_back(pi); 29 | 30 | } else { 31 | m_idx_new[ pj ].push_back(pi); 32 | m_x_new[ pj ].push_back(px); 33 | 34 | if (pi != pj) 35 | list[pi].push_back(pj); 36 | 37 | } 38 | } 39 | } 40 | 41 | m_idx.swap(m_idx_new); 42 | m_x.swap(m_x_new); 43 | 44 | for (i = 0; i < m_n_cols; i++) { 45 | ensure_invariant(i, i, m_idx[i]); 46 | ensure_invariant(i, i, list[i], true); 47 | } 48 | } 49 | 50 | #endif // _LIL_MATRIX_SYM_PERM_H_ -------------------------------------------------------------------------------- /Preconditioners/Types.h: -------------------------------------------------------------------------------- 1 | /** A set of typedefs describing the basic types that will be used throughout 2 | * the Preconditioners library. 3 | * 4 | * Copyright (C) 2020 by David M. Rosen (dmrosen@mit.edu) 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace Preconditioners { 14 | 15 | /// Some useful typedefs for the Preconditioners library 16 | 17 | /// Linear algebra types 18 | typedef double Scalar; 19 | typedef Eigen::Matrix Vector; 20 | typedef Eigen::Matrix Matrix; 21 | 22 | /// Typedef for a (fixed-size) 2x2 matrix 23 | typedef Eigen::Matrix2d Matrix2d; 24 | 25 | /** We use row-major storage order to take advantage of fast (sparse-matrix) 26 | * (dense-vector) multiplications when OpenMP is available (cf. the Eigen 27 | * documentation page on "Eigen and Multithreading") */ 28 | typedef Eigen::SparseMatrix SparseMatrix; 29 | 30 | typedef Eigen::PermutationMatrix Permutation; 31 | typedef Permutation::IndicesType PermutationVector; 32 | 33 | /// Convenience typedef for a pair containing the number of positive and 34 | /// negative eigenvalues of a matrix 35 | typedef std::pair Inertia; 36 | 37 | } // namespace Preconditioners 38 | -------------------------------------------------------------------------------- /cmake/FindSPQR.cmake: -------------------------------------------------------------------------------- 1 | # SPQR lib usually requires linking to a blas and lapack library. 2 | # It is up to the user of this module to find a BLAS and link to it. 3 | 4 | # SPQR lib requires Cholmod, colamd and amd as well. 5 | # FindCholmod.cmake can be used to find those packages before finding spqr 6 | 7 | if (SPQR_INCLUDES AND SPQR_LIBRARIES) 8 | set(SPQR_FIND_QUIETLY TRUE) 9 | endif (SPQR_INCLUDES AND SPQR_LIBRARIES) 10 | 11 | find_path(SPQR_INCLUDES 12 | NAMES 13 | SuiteSparseQR.hpp 14 | PATHS 15 | $ENV{SPQRDIR} 16 | ${INCLUDE_INSTALL_DIR} 17 | PATH_SUFFIXES 18 | suitesparse 19 | ufsparse 20 | ) 21 | 22 | find_library(SPQR_LIBRARIES spqr $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) 23 | 24 | if(SPQR_LIBRARIES) 25 | 26 | find_library(SUITESPARSE_LIBRARY SuiteSparse PATHS $ENV{SPQRDIR} ${LIB_INSTALL_DIR}) 27 | if (SUITESPARSE_LIBRARY) 28 | set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${SUITESPARSE_LIBRARY}) 29 | endif() 30 | 31 | find_library(CHOLMOD_LIBRARY cholmod PATHS $ENV{UMFPACK_LIBDIR} $ENV{UMFPACKDIR} ${LIB_INSTALL_DIR}) 32 | if(CHOLMOD_LIBRARY) 33 | set(SPQR_LIBRARIES ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARY}) 34 | endif() 35 | 36 | endif(SPQR_LIBRARIES) 37 | 38 | include(FindPackageHandleStandardArgs) 39 | find_package_handle_standard_args(SPQR DEFAULT_MSG SPQR_INCLUDES SPQR_LIBRARIES) 40 | 41 | mark_as_advanced(SPQR_INCLUDES SPQR_LIBRARIES) -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_sym_rcm.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _LILC_MATRIX_SYM_RCM_H_ 3 | #define _LILC_MATRIX_SYM_RCM_H_ 4 | 5 | namespace { 6 | /*! \brief Functor for comparing elements by degree (in increasing order) instead of by index. 7 | \param A a pointer to the matrix being reordered. 8 | */ 9 | template 10 | struct by_degree { 11 | lilc_matrix* A; 12 | by_degree(lilc_matrix* mat) : A(mat) {} 13 | bool operator()(int const &a, int const &b) const { 14 | int deg_a = A->list[a].size() + A->m_idx[a].size(); 15 | int deg_b = A->list[b].size() + A->m_idx[b].size(); 16 | 17 | if (A->m_idx[a].size() > 0 && A->m_idx[a][0] == a) deg_a--; 18 | if (A->m_idx[b].size() > 0 && A->m_idx[b][0] == b) deg_b--; 19 | 20 | if (deg_a == deg_b) return a > b; 21 | return deg_a < deg_b; 22 | } 23 | }; 24 | } 25 | 26 | template 27 | inline void lilc_matrix :: sym_rcm(vector& perm) { 28 | int i, s; 29 | vector visited(m_n_cols, false); 30 | vector lvl_set; 31 | for (i = 0; i < m_n_cols; i++) { 32 | if (visited[i]) continue; 33 | 34 | lvl_set.clear(); 35 | s = i; 36 | find_root(s); 37 | lvl_set.push_back(s); 38 | perm.push_back(s); 39 | 40 | by_degree sorter(this); 41 | 42 | visited[s] = true; 43 | while (find_level_set(lvl_set, visited)) { 44 | sort(lvl_set.begin(), lvl_set.end(), sorter); 45 | perm.insert( perm.end(), lvl_set.begin(), lvl_set.end() ); 46 | } 47 | } 48 | 49 | reverse(perm.begin(), perm.end()); 50 | } 51 | 52 | #endif -------------------------------------------------------------------------------- /SymILDL/SymILDL/swap_struct.h: -------------------------------------------------------------------------------- 1 | /*! \brief A structure containing variables used in pivoting a LIL-C matrix. 2 | 3 | Storing these variables in a combined structure reduces memory requirements and bundles together all temporary structures needed during pivoting. 4 | */ 5 | template 6 | class swap_struct 7 | { 8 | //---------- useful typedefs (to keep consistent with lilc_matrix) -----------// 9 | typedef vector idx_vector_type; 10 | typedef vector elt_vector_type; 11 | typedef typename idx_vector_type::iterator idx_it; 12 | typedef typename elt_vector_type::iterator elt_it; 13 | 14 | public: 15 | vector swapk; /// swapr; /// 8 | void lilc_matrix :: sym_equil() { 9 | 10 | //find termination points for loops with binary search later. 11 | int i, ncols = n_cols(); 12 | // this is required since we do S[i] = max(S[i], ...) 13 | S.resize(ncols, 0); 14 | 15 | std::pair elem_its; 16 | for (i = 0; i < ncols; i++) { 17 | //assumes diag elem is always in 0th pos. if possible. 18 | if (!m_idx[i].empty() && m_idx[i][0] == i) 19 | S[i] = sqrt(abs(m_x[i][0])); 20 | 21 | //assumes indices are ordered. since this procedure is run 22 | //before factorization pivots matrix, this is a fair assumption 23 | //for most matrix market matrices. 24 | for (idx_it it = list[i].begin(); it != list[i].end(); it++) { 25 | S[i] = std::max(S[i], abs(coeff(i, *it))); 26 | } 27 | 28 | //S[i] > 0 since its the square root of a +ve number 29 | if (S[i] > eps) { 30 | for (idx_it it = list[i].begin(); it != list[i].end(); it++) { 31 | coeffRef(i, *it, elem_its); 32 | 33 | //can use bin. search on coeff since no reordering is done yet. 34 | *(elem_its.second) /= S[i]; 35 | } 36 | 37 | if (!m_idx[i].empty() && (m_idx[i][0] == i) ) 38 | m_x[i][0] /= S[i]; 39 | for (elt_it it = m_x[i].begin(); it != m_x[i].end(); it++) { 40 | *it /= S[i]; 41 | } 42 | } 43 | } 44 | 45 | for (i = 0; i < ncols; i++) { 46 | if (S[i] < eps) { 47 | for (elt_it it = m_x[i].begin(); it != m_x[i].end(); it++) { 48 | S[i] = std::max(S[i], abs(*it)); 49 | } 50 | 51 | if (S[i] < eps) { 52 | std::cerr << "Error: Matrix has a null column/row." << std::endl; 53 | return; 54 | } 55 | 56 | for (elt_it it = m_x[i].begin(); it != m_x[i].end(); it++) { 57 | *it /= S[i]; 58 | } 59 | } 60 | } 61 | 62 | for (i = 0; i < ncols; i++) { 63 | S[i] = 1.0/S[i]; 64 | } 65 | } 66 | 67 | #endif // _LIL_MATRIX_SYM_EQUIL_H_ 68 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lil_sparse_matrix.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _LIL_SPARSE_MATRIX_H_ 3 | #define _LIL_SPARSE_MATRIX_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | using std::vector; 12 | 13 | /*! \brief The abstract parent of all sparse matrices */ 14 | template 15 | class lil_sparse_matrix 16 | { 17 | 18 | public: 19 | 20 | typedef vector idx_vector_type; 21 | typedef vector elt_vector_type; 22 | 23 | /*! \brief Allows outputting the contents of the matrix via << operators. */ 24 | friend std::ostream & operator<<(std::ostream& os, const lil_sparse_matrix& A) 25 | { 26 | os << A.to_string(); 27 | return os; 28 | }; 29 | 30 | int m_n_rows;/// m_idx;/// m_x;///::epsilon(); 12 | useDefaultThreshold_ = true; 13 | 14 | // Initialize Cholmod environment 15 | cholmod_l_start(&chol_com_); 16 | } 17 | 18 | /// Constructors 19 | 20 | LSChol::LSChol() { init(); } 21 | 22 | LSChol::LSChol(const SparseMatrix &A) { 23 | init(); 24 | compute(A); 25 | } 26 | 27 | void LSChol::compute(const SparseMatrix &A) { 28 | 29 | LSCholSparseMatrix LSA(A); 30 | 31 | /// Get a view of mat as a cholmod_sparse matrix. NB: this does *NOT* 32 | /// allocate new memory, rather it just wrap's A's 33 | cholmod_sparse Achol = Eigen::viewAsCholmod(LSA); 34 | 35 | /// Set threshold for numerical pivoting 36 | 37 | /* Compute the default threshold as in MatLab, see: 38 | * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal 39 | * Multithreaded Rank-Revealing Sparse QR Factorization, ACM Trans. on 40 | * Math. Soft. 38(1), 2011, Page 8:3 41 | */ 42 | RealScalar pivotThreshold = pivot_tol_; 43 | if (useDefaultThreshold_) { 44 | RealScalar max2Norm = 0.0; 45 | for (int j = 0; j < LSA.cols(); j++) 46 | max2Norm = std::max(max2Norm, LSA.col(j).norm()); 47 | if (max2Norm == RealScalar(0)) 48 | max2Norm = RealScalar(1); 49 | pivotThreshold = 20 * (LSA.rows() + LSA.cols()) * max2Norm * 50 | std::numeric_limits::epsilon(); 51 | } 52 | 53 | // Cholmod output 54 | cholmod_sparse *cR; // The sparse triangular factor R in cholmod's format 55 | StorageIndex *E; // The permutation applied to columns 56 | 57 | // Compute factorization! 58 | rank_ = SuiteSparseQR(ordering_, pivotThreshold, Achol.ncol, &Achol, 59 | &cR, &E, &chol_com_); 60 | 61 | // Store upper-triangular factor R as a standard Eigen matrix 62 | R_ = Eigen::viewAsEigen(*cR); 63 | 64 | // Store permutation vector in P 65 | P_.resize(R_.cols()); 66 | for (size_t k = 0; k < R_.cols(); ++k) 67 | P_.indices()(k) = E[k]; 68 | 69 | // Release cR and E 70 | cholmod_l_free_sparse(&cR, &chol_com_); 71 | 72 | if (E != NULL) 73 | std::free(E); 74 | 75 | initialized_ = true; 76 | } 77 | 78 | Vector LSChol::PRinv(const Vector &x) const { 79 | return P_ * R_.triangularView().solve(x); 80 | } 81 | 82 | Vector LSChol::RinvTPinv(const Vector &x) const { 83 | return R_.transpose().triangularView().solve(P_.transpose() * 84 | x); 85 | } 86 | 87 | Vector LSChol::RPinv(const Vector &x) const { 88 | return R_ * (P_.transpose() * x); 89 | } 90 | 91 | Vector LSChol::solve(const Vector &x) const { return PRinv(RinvTPinv(x)); } 92 | 93 | LSChol::~LSChol() { cholmod_l_finish(&chol_com_); } 94 | 95 | } // namespace Preconditioners 96 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/solver_sqmr.h: -------------------------------------------------------------------------------- 1 | //-*- mode: c++ -*- 2 | #ifndef _SOLVER_SQMR_H_ 3 | #define _SOLVER_SQMR_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | int solver::sqmr(int max_iter, double stop_tol) { 11 | // Zero out solution vector 12 | int n = A.n_rows(); 13 | sol_vec.resize(n, 0); 14 | 15 | // ---------- set initial values and allocate memory for variables ---------// 16 | 17 | // temporary vectors for calcluations 18 | vector x(n), q(n), t(n), r(n), tmp(n); 19 | 20 | // residual = b - A*x0 21 | r = rhs; 22 | 23 | // search direction 24 | vector d(n); 25 | 26 | // set up initial values for variables above 27 | double eps = A.eps; 28 | double norm_rhs = norm(rhs, 2.0); 29 | 30 | double res = norm_rhs; 31 | double resmin = res; 32 | 33 | // Our preconditioner M = LDL'. 34 | auto Minv = [&](vector &in, vector &out) { 35 | L.backsolve(in, out); 36 | D.solve(out, tmp); 37 | L.forwardsolve(tmp, out); 38 | }; 39 | 40 | // compute t = M^(-1) * r 41 | Minv(r, t); 42 | double tau = norm(t, 2.0); 43 | 44 | q = t; 45 | double thet = 0; 46 | double rho = dot_product(r, q); 47 | 48 | double sigma, alpha, thet1, c2, rho1, beta; 49 | 50 | // -------------- begin sqmr iterations --------------// 51 | int k = 1; // iteration number 52 | while (res / norm_rhs > stop_tol && k <= max_iter) { 53 | // t = A * q 54 | // sigma = q'*t 55 | A.multiply(q, t); 56 | sigma = dot_product(q, t); 57 | alpha = rho / sigma; 58 | 59 | // r = r - alpha * t 60 | vector_sum(1, r, -alpha, t, r); 61 | 62 | // t = Minv(r) 63 | Minv(r, t); 64 | 65 | thet1 = thet; 66 | thet = norm(t, 2.0) / tau; 67 | 68 | c2 = 1.0 / (1 + thet * thet); 69 | 70 | tau = tau * thet * sqrt(c2); 71 | if (k == 1) { 72 | // d = c^2 * alpha * q 73 | for (int i = 0; i < n; i++) { 74 | d[i] = c2 * alpha * q[i]; 75 | } 76 | } else { 77 | // d = c^2 * thet1^2 * d + c^2 * alpha * q 78 | vector_sum(c2 * thet1 * thet1, d, c2 * alpha, q, d); 79 | } 80 | 81 | // update x 82 | vector_sum(1, x, 1, d, x); 83 | 84 | // update residual and norms 85 | res = norm(r, 2.0); 86 | /* 87 | // the true residual 88 | A.multiply(x, tmp); 89 | vector_sum(1, rhs, -1, tmp, tmp); 90 | res = norm(tmp, 2.0); 91 | */ 92 | 93 | if (res < resmin) { 94 | resmin = res; 95 | sol_vec = x; 96 | } 97 | 98 | rho1 = rho; 99 | rho = dot_product(r, t); 100 | beta = rho / rho1; 101 | vector_sum(1, t, beta, q, q); 102 | 103 | k++; 104 | // ------------- end update ------------- // 105 | } 106 | 107 | std::string iter_str = "iterations"; 108 | if (k - 1 == 1) 109 | iter_str = "iteration"; 110 | 111 | if (msg_lvl) 112 | printf("SQMR took %i %s and got down to relative residual %e.\n", k - 1, 113 | iter_str.c_str(), resmin / norm_rhs); 114 | return k - 1; 115 | } 116 | 117 | #endif // _SOLVER_SQMR_H_ 118 | -------------------------------------------------------------------------------- /test/LSCholTest.cpp: -------------------------------------------------------------------------------- 1 | #include "LSChol/LSChol.h" 2 | #include "gtest/gtest.h" 3 | 4 | using namespace Preconditioners; 5 | using namespace std; 6 | 7 | class LSCholTest : public testing::Test { 8 | protected: 9 | /// Test configuration 10 | 11 | double rel_tol = 1e-6; // Relative error tolerance 12 | double eps = 1e-6; // Absolute error tolerance 13 | 14 | /// Test data 15 | 16 | SparseMatrix A; 17 | LSChol Afact; 18 | 19 | void SetUp() override { 20 | A.resize(4, 3); 21 | 22 | A.insert(0, 0) = 1; 23 | A.insert(0, 1) = 10; 24 | A.insert(0, 2) = 5; 25 | 26 | A.insert(1, 0) = 3; 27 | A.insert(1, 1) = 2; 28 | A.insert(1, 2) = 9; 29 | 30 | A.insert(2, 0) = 6; 31 | A.insert(2, 2) = 2; 32 | 33 | A.insert(3, 0) = 10; 34 | } 35 | }; 36 | 37 | /// Basic test: check computation of factorization 38 | TEST_F(LSCholTest, compute) { 39 | 40 | /// Perform factorization 41 | Afact.compute(A); 42 | 43 | /// Extract triangular factor 44 | const SparseMatrix &R = Afact.R(); 45 | 46 | /// Check output 47 | EXPECT_EQ(R.rows(), A.cols()); 48 | EXPECT_EQ(R.cols(), A.cols()); 49 | EXPECT_EQ(Afact.rank(), A.cols()); 50 | EXPECT_EQ(Afact.P().size(), A.cols()); 51 | } 52 | 53 | /// Check computation of products with PR^{-1} 54 | TEST_F(LSCholTest, PRinv) { 55 | 56 | /// Perform factorization 57 | Afact.compute(A); 58 | size_t d = Afact.R().rows(); 59 | 60 | /// Compute the product matrix PRinv column-by-column 61 | Matrix Id = Matrix::Identity(Afact.R().rows(), Afact.R().cols()); 62 | Matrix PRinv(d, d); 63 | for (size_t k = 0; k < d; ++k) 64 | PRinv.col(k) = Afact.PRinv(Id.col(k)); 65 | 66 | /// Compute the product A*P*R^{-1} 67 | Matrix APRinv = A * PRinv; 68 | 69 | // AP = QR => APR^{-1} = Q, and therefore: 70 | // 71 | // (APR^{-1})^T * (APR^{-1} = Q^T * Q = Id 72 | // 73 | // So we check that this identity holds 74 | 75 | Matrix S = APRinv.transpose() * APRinv; 76 | 77 | EXPECT_LT((S - Id).norm(), eps); 78 | } 79 | 80 | /// Check computation of products with R^{-T} * P^{-1} 81 | TEST_F(LSCholTest, RinvTPinv) { 82 | 83 | /// Perform factorization 84 | Afact.compute(A); 85 | size_t d = Afact.R().rows(); 86 | 87 | /// Compute the product matrix R^{-T} * P^{-1} column-by-column 88 | Matrix Id = Matrix::Identity(Afact.R().rows(), Afact.R().cols()); 89 | Matrix RinvTPinv(d, d); 90 | for (size_t k = 0; k < d; ++k) 91 | RinvTPinv.col(k) = Afact.RinvTPinv(Id.col(k)); 92 | 93 | /// Compute the product R^{-T} * P^{-1} * A^T 94 | Matrix RinvTPinvAT = RinvTPinv * A.transpose(); 95 | 96 | // AP = QR => R^{-T} * Pinv * A^T = Q^T, and therefore: 97 | // 98 | // (R^{-T} * Pinv * A^T) * (R^{-T} * Pinv * A^T)^T = Q^T * Q = Id 99 | // 100 | // So we check that this identity holds 101 | 102 | Matrix S = RinvTPinvAT * RinvTPinvAT.transpose(); 103 | 104 | EXPECT_LT((S - Id).norm(), eps); 105 | } 106 | 107 | /// Check computation of products with R^{-T} * P^{-1} 108 | TEST_F(LSCholTest, solve) { 109 | 110 | /// Perform factorization 111 | Afact.compute(A); 112 | size_t d = Afact.R().rows(); 113 | 114 | /// Compute product A'*A 115 | Matrix M = A.transpose() * A; 116 | 117 | /// Verify that solve(A'A) = Id 118 | 119 | // Compute this result column-by-column 120 | Matrix prod(d, d); 121 | for (size_t k = 0; k < d; ++k) 122 | prod.col(k) = Afact.solve(M.col(k)); 123 | 124 | Matrix Id = Matrix::Identity(d, d); 125 | 126 | EXPECT_LT((Id - prod).norm(), eps); 127 | } 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Preconditioners 2 | 3 | This library implements a set of useful algebraic (incomplete factorization-based) [preconditioners](https://en.wikipedia.org/wiki/Preconditioner) that can be used to accelerate the convergence of many iterative numerical linear-algebraic algorithms (such as the [conjugate gradient](https://en.wikipedia.org/wiki/Conjugate_gradient_method), [Lanczos](https://en.wikipedia.org/wiki/Lanczos_algorithm), or [LOBPCG](https://en.wikipedia.org/wiki/LOBPCG) methods). It builds upon the incomplete Crout symmetric indefinite LDL' factorization provided by the [sym-ildl](https://cs.stanford.edu/people/paulliu/sym-ildl/html/index.html) library, extending its functionality to implement *inertia correction* (so that the constructed preconditioners are *positive-definite*, as required by many inexact linear-algebraic methods), as well as presenting an interface that permits easy integration with [Eigen](https://eigen.tuxfamily.org/index.php). 4 | 5 | ## Getting Started 6 | 7 | This library can be built and exported as a CMake project. The following installation instructions have been verified on Ubuntu 22.04: 8 | 9 | *Step 1:* Install dependencies 10 | 11 | ``` 12 | $ sudo apt-get install build-essential cmake-gui libeigen3-dev liblapack-dev libblas-dev libsuitesparse-dev 13 | ``` 14 | 15 | *Step 2:* Clone the repository 16 | 17 | ``` 18 | $ git clone https://github.com/david-m-rosen/Preconditioners Preconditioners 19 | ``` 20 | 21 | *Step 3:* Create build directory 22 | 23 | ``` 24 | $ cd Preconditioners && mkdir build 25 | ``` 26 | 27 | *Step 4:* Configure build and generate Makefiles 28 | ``` 29 | $ cd build && cmake .. 30 | ``` 31 | 32 | *Step 5:* Build the library 33 | 34 | ``` 35 | $ make -j 36 | ``` 37 | 38 | ## References 39 | 40 | We are making this software freely available in the hope that it will be useful to others. If you use our code in your own work, please cite our [paper](https://arxiv.org/abs/2207.05257), which describes the design of the inertia-corrected incomplete symmetric indefinite preconditioner implemented in the `ILDL` class: 41 | 42 | ``` 43 | @misc{Rosen2022Accelerating, 44 | title = {Accelerating Certifiable Estimation with Preconditioned Eigensolvers}, 45 | author = {Rosen, David M.}, 46 | month = may, 47 | year = {2022}, 48 | publisher = {arXiv}, 49 | doi = {10.48550/ARXIV.2207.05257}, 50 | url = {https://arxiv.org/abs/2207.05257}, 51 | } 52 | ``` 53 | 54 | and the following [paper](https://dl.acm.org/doi/abs/10.1145/3054948) of Greif et al., which describes the design of the `sym-ildl` library that this project includes: 55 | 56 | ``` 57 | @article{Greif2017SymILDL, 58 | title = {{SYM-ILDL}: Incomplete {$LDL\transpose$} Factorization of Symmetric Indefinite and Skew-Symmetric Matrices}, 59 | author = {Greif, C. and He, S. and Liu, P.}, 60 | journal = {{ACM} Trans. Math. Softw.}, 61 | volume = {44}, 62 | number = {1}, 63 | month = apr, 64 | year = {2017}, 65 | } 66 | ``` 67 | 68 | ## Copyright and License 69 | 70 | The `Preconditioners` software contained herein is copyright (C) 2016-2022 by David M. Rosen, and is distributed under the terms of the GNU Lesser General Public License (LGPL) version 3 (or later). Please see the [LICENSE](https://github.com/david-m-rosen/Preconditioners/blob/master/LICENSE) for more information. 71 | 72 | The modified version of the [sym-ildl](https://cs.stanford.edu/people/paulliu/sym-ildl/html/index.html) library redistributed with this project is released under the MIT license. Please refer to the [license](https://github.com/david-m-rosen/Preconditioners/blob/master/SymILDL/License.md) file distributed with that project. 73 | 74 | Contact: d.rosen@northeastern.edu 75 | -------------------------------------------------------------------------------- /cmake/FindEigen3.cmake: -------------------------------------------------------------------------------- 1 | # - Try to find Eigen3 lib 2 | # 3 | # This module supports requiring a minimum version, e.g. you can do 4 | # find_package(Eigen3 3.1.2) 5 | # to require version 3.1.2 or newer of Eigen3. 6 | # 7 | # Once done this will define 8 | # 9 | # EIGEN3_FOUND - system has eigen lib with correct version 10 | # EIGEN3_INCLUDE_DIR - the eigen include directory 11 | # EIGEN3_VERSION - eigen version 12 | # 13 | # This module reads hints about search locations from 14 | # the following enviroment variables: 15 | # 16 | # EIGEN3_ROOT 17 | # EIGEN3_ROOT_DIR 18 | 19 | # Copyright (c) 2006, 2007 Montel Laurent, 20 | # Copyright (c) 2008, 2009 Gael Guennebaud, 21 | # Copyright (c) 2009 Benoit Jacob 22 | # Redistribution and use is allowed according to the terms of the 2-clause BSD license. 23 | 24 | if(NOT Eigen3_FIND_VERSION) 25 | if(NOT Eigen3_FIND_VERSION_MAJOR) 26 | set(Eigen3_FIND_VERSION_MAJOR 2) 27 | endif(NOT Eigen3_FIND_VERSION_MAJOR) 28 | if(NOT Eigen3_FIND_VERSION_MINOR) 29 | set(Eigen3_FIND_VERSION_MINOR 91) 30 | endif(NOT Eigen3_FIND_VERSION_MINOR) 31 | if(NOT Eigen3_FIND_VERSION_PATCH) 32 | set(Eigen3_FIND_VERSION_PATCH 0) 33 | endif(NOT Eigen3_FIND_VERSION_PATCH) 34 | 35 | set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") 36 | endif(NOT Eigen3_FIND_VERSION) 37 | 38 | macro(_eigen3_check_version) 39 | file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) 40 | 41 | string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") 42 | set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") 43 | string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") 44 | set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") 45 | string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") 46 | set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") 47 | 48 | set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) 49 | if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 50 | set(EIGEN3_VERSION_OK FALSE) 51 | else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 52 | set(EIGEN3_VERSION_OK TRUE) 53 | endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) 54 | 55 | if(NOT EIGEN3_VERSION_OK) 56 | 57 | message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " 58 | "but at least version ${Eigen3_FIND_VERSION} is required") 59 | endif(NOT EIGEN3_VERSION_OK) 60 | endmacro(_eigen3_check_version) 61 | 62 | if (EIGEN3_INCLUDE_DIR) 63 | 64 | # in cache already 65 | _eigen3_check_version() 66 | set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) 67 | 68 | else (EIGEN3_INCLUDE_DIR) 69 | 70 | # search first if an Eigen3Config.cmake is available in the system, 71 | # if successful this would set EIGEN3_INCLUDE_DIR and the rest of 72 | # the script will work as usual 73 | find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) 74 | 75 | if(NOT EIGEN3_INCLUDE_DIR) 76 | find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library 77 | HINTS 78 | ENV EIGEN3_ROOT 79 | ENV EIGEN3_ROOT_DIR 80 | PATHS 81 | ${CMAKE_INSTALL_PREFIX}/include 82 | ${KDE4_INCLUDE_DIR} 83 | PATH_SUFFIXES eigen3 eigen 84 | ) 85 | endif(NOT EIGEN3_INCLUDE_DIR) 86 | 87 | if(EIGEN3_INCLUDE_DIR) 88 | _eigen3_check_version() 89 | endif(EIGEN3_INCLUDE_DIR) 90 | 91 | include(FindPackageHandleStandardArgs) 92 | find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) 93 | 94 | mark_as_advanced(EIGEN3_INCLUDE_DIR) 95 | 96 | endif(EIGEN3_INCLUDE_DIR) 97 | 98 | -------------------------------------------------------------------------------- /Preconditioners/LSChol/include/LSChol/LSChol.h: -------------------------------------------------------------------------------- 1 | /** This class provides functionality for computing the (sparse) upper 2 | * triangular factor R and permutation P from a sparse QR factorization of the 3 | * form: 4 | * 5 | * AP = QR 6 | * 7 | * This is useful for solving large-scale linear least-squares problems via 8 | * (well-conditioned) orthogonal decomposition. 9 | * 10 | * This small module is based upon Eigen's SPQRSupport, but avoids the 11 | * computation and storage of the orthogonal factor Q. 12 | * 13 | * Copyright(C) 2020 by David M.Rosen(dmrosen @mit.edu) 14 | **/ 15 | 16 | #pragma once 17 | 18 | #include "Eigen/SPQRSupport" 19 | #include "Preconditioners/Types.h" 20 | 21 | namespace Preconditioners { 22 | 23 | class LSChol { 24 | 25 | // Column normalization requires COLUMN-MAJOR storage order 26 | typedef SuiteSparse_long StorageIndex; 27 | typedef Eigen::SparseMatrix 28 | LSCholSparseMatrix; 29 | typedef SparseMatrix::RealScalar RealScalar; 30 | typedef SuiteSparse_long Index; 31 | 32 | private: 33 | /// Data members 34 | 35 | /// FACTORIZATION ELEMENTS 36 | 37 | /// Cholmod configuration struct 38 | cholmod_common chol_com_; 39 | 40 | SparseMatrix R_; // The sparse matrix R in Eigen format 41 | Permutation P_; // Permutation 42 | Index rank_; // The rank of the matrix 43 | 44 | // Treat columns with 2-norm below this tolerance as zero during 45 | // factorization. 46 | RealScalar pivot_tol_; 47 | int ordering_; // Ordering method to use, see SPQR's manual 48 | int num_fact_tol_; // Allow to use some tolerance during numerical 49 | bool useDefaultThreshold_; // Use default threshold 50 | 51 | // Boolean value indicating whether the object contains a valid cached 52 | // factorization 53 | bool initialized_ = false; 54 | 55 | /// Helper function: initialize Cholmod 56 | void init(); 57 | 58 | public: 59 | /// Constructors 60 | 61 | /** Construct an empty LSChol object */ 62 | LSChol(); 63 | 64 | /** Construct an LSChol object containing a factorization 65 | * of the passed matrix A */ 66 | LSChol(const SparseMatrix &A); 67 | 68 | /// Accessors 69 | 70 | /** Return the R-factor of the QR decomposition */ 71 | const SparseMatrix &R() const { return R_; } 72 | 73 | /** Return the permutation P used in this factorization */ 74 | const Permutation &P() const { return P_; } 75 | 76 | /** Get rank of computed triangular factor */ 77 | Index rank() const { return rank_; } 78 | 79 | /// Mutators 80 | 81 | /// Set the fill-reducing ordering method to be used 82 | void setSPQROrdering(int ordering) { ordering_ = ordering; } 83 | 84 | /// Set the tolerance tol to treat columns with 2-norm <= tol as zero 85 | void setPivotThreshold(const RealScalar &tol) { 86 | useDefaultThreshold_ = false; 87 | pivot_tol_ = tol; 88 | } 89 | 90 | /** Compute the "Q-less" QR factorization of the matrix matrix A. */ 91 | void compute(const SparseMatrix &A); 92 | 93 | /** Return a *mutable* pointer to the SPQR workspace */ 94 | cholmod_common *cholmodCommon() { return &chol_com_; } 95 | 96 | /// Linear-algebraic operations 97 | 98 | /** Computes and returns the product P * R^{-1} * x */ 99 | Vector PRinv(const Vector &x) const; 100 | 101 | /** Computes and returns the product R^{-T} * P^{-1} * x */ 102 | Vector RinvTPinv(const Vector &x) const; 103 | 104 | /** Computes and returns the product R * P^{-1} * x */ 105 | Vector RPinv(const Vector &x) const; 106 | 107 | /** Computes and returns the product 108 | * 109 | * P * R^{-1} * R^{-T} * P^{-1} * x = (A^T * A)^{-1} * x 110 | */ 111 | Vector solve(const Vector &x) const; 112 | 113 | /// Destructor 114 | ~LSChol(); 115 | }; 116 | 117 | } // namespace Preconditioners 118 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_load.h: -------------------------------------------------------------------------------- 1 | //-*-mode:c++-*- 2 | #ifndef _LILC_MATRIX_LOAD_H_ 3 | #define _LILC_MATRIX_LOAD_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | template 12 | inline bool readline (std::stringstream& line, int& n_rows, int& n_cols, int& i, int& j, el_type& value) { 13 | line >> i >> j >> value; 14 | i--; 15 | j--; 16 | if(i>=0 && j>=0 && i 24 | bool lilc_matrix :: load (std::string filename) 25 | { 26 | std::ifstream input(filename.c_str(), std::ios::in); 27 | //input.sync_with_stdio(0); 28 | 29 | if(!input) return false; 30 | 31 | const int maxBuffersize = 2048; 32 | char buffer[maxBuffersize]; 33 | 34 | bool readsizes = false; 35 | 36 | int n_rows(-1), n_cols(-1), n_nzs(-1), i(-1), j(-1); 37 | int count = 0; 38 | el_type value; 39 | 40 | bool full_detected = false; 41 | while(input.getline(buffer, maxBuffersize)) 42 | { 43 | // skip comments 44 | //NOTE An appropriate test should be done on the header to get the symmetry 45 | if(buffer[0]=='%') 46 | continue; 47 | 48 | std::stringstream line(buffer); 49 | //line.sync_with_stdio(0); 50 | if(!readsizes) 51 | { 52 | line >> n_rows >> n_cols >> n_nzs; 53 | if(n_rows > 0 && n_cols > 0 && n_nzs > 0) 54 | { 55 | readsizes = true; 56 | 57 | resize(n_rows, n_cols); 58 | std::fill(row_first.begin(), row_first.end(), 0); //a bit of optimization could be used here since resize sets all elem in first to 1 59 | std::fill(col_first.begin(), col_first.end(), 0); //a bit of optimization could be used here since resize sets all elem in first to 1 60 | } 61 | } 62 | else 63 | { 64 | i = -1; 65 | j = -1; 66 | if( readline(line, n_rows, n_cols, i, j, value) ) 67 | { 68 | if (j > i) { 69 | full_detected = true; 70 | continue; 71 | } 72 | m_idx[j].push_back(i); 73 | m_x[j].push_back(value); 74 | ++count; 75 | assert(i >= j); 76 | if (i != j) list[i].push_back(j); 77 | 78 | } 79 | else 80 | std::cerr << "Invalid read: " << i << "," << j << "\n"; 81 | } 82 | 83 | } 84 | 85 | if (!full_detected && count != n_nzs) std::cout << "Expected " << n_nzs << " elems but read " << count << "." << std::endl; 86 | 87 | if (full_detected) { 88 | std::cout << "Full matrix detected, assuming matrix is symmetric and loading lower-half of the matrix only." << std::endl; 89 | } 90 | nnz_count = count; 91 | std::cout << "Load succeeded. " << "File " << filename << " was loaded." << std::endl; 92 | input.close(); 93 | return true; 94 | } 95 | 96 | template 97 | bool lilc_matrix :: load (const std::vector& ptr, const std::vector& row, const std::vector& val) { 98 | if (ptr.size() == 0 || ptr.back() != row.size() || val.size() != ptr.back()) { 99 | std::cout << "Error in CSC format detected. Matrix failed to load." << std::endl; 100 | return false; 101 | } 102 | return load(ptr.data(), row.data(), val.data(), ptr.size()-1); 103 | } 104 | 105 | template 106 | bool lilc_matrix :: load (const int* ptr, const int* row, const el_type* val, int dim) { 107 | bool full_detected = false; 108 | int n_rows = dim, n_cols = dim; 109 | 110 | resize(n_rows, n_cols); 111 | std::fill(row_first.begin(), row_first.end(), 0); //a bit of optimization could be used here since resize sets all elem in first to 1 112 | std::fill(col_first.begin(), col_first.end(), 0); //a bit of optimization could be used here since resize sets all elem in first to 1 113 | 114 | int count = 0; 115 | for (int i = 0; i < dim; i++) { 116 | for (int j = ptr[i]; j < ptr[i+1]; j++) { 117 | if (i > row[j]) { 118 | full_detected = true; 119 | continue; 120 | } 121 | m_idx[i].push_back(row[j]); 122 | m_x[i].push_back(val[j]); 123 | if (i != row[j]) list[row[j]].push_back(i); 124 | ++count; 125 | } 126 | } 127 | 128 | if (full_detected) { 129 | std::cout << "Full matrix detected, assuming matrix is symmetric and loading lower-half of the matrix only." << std::endl; 130 | } 131 | 132 | nnz_count = count; 133 | return true; 134 | } 135 | 136 | #endif 137 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # dmrosen 4-July-2022 2 | 3 | cmake_minimum_required(VERSION 3.1) 4 | 5 | # PROJECT CONFIGURATION 6 | project(Preconditioners LANGUAGES C CXX VERSION 1.0.0) 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) # We require C++ 17 or later 9 | 10 | 11 | # Set build type to 'RelWithDebInfo' if one was not specified by the user 12 | if(NOT CMAKE_BUILD_TYPE) 13 | set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) 14 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS RelWithDebInfo Release Debug MinSizeRel) 15 | message(STATUS "Setting build type to ${CMAKE_BUILD_TYPE}, as none was specified\n") 16 | else() 17 | message(STATUS "Building in ${CMAKE_BUILD_TYPE} mode\n") 18 | endif() 19 | 20 | 21 | # Build the unit tests? 22 | set(BUILD_PRECONDITIONERS_TESTS OFF CACHE BOOL "Build unit tests? [disabled by default]") 23 | 24 | # Directory for built libraries 25 | set(LIBRARY_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/lib CACHE PATH "The directory in which to place the libraries built by this project") 26 | # Directory for built executables 27 | set(EXECUTABLE_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/bin CACHE PATH "The directory in which to place executables built by this project") 28 | 29 | # BUILD CONFIGURATIONS 30 | # Enable faster instruction sets (SIMD/AVX) 31 | set(ENABLE_VECTORIZATION OFF CACHE BOOL "Enable vectorized instruction sets (SIMD/AVX)? [disabled by default]") 32 | # Enable code profiling using gperftools 33 | set(ENABLE_PROFILING OFF CACHE BOOL "Enable code profiling using gperftools") 34 | 35 | # Add the .cmake files that ship with Eigen3 to the CMake module path (useful for finding other stuff) 36 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" CACHE STRING "The CMake module path used for this project") 37 | 38 | 39 | # PRECONDITIONERS DEPENDENCIES 40 | # Find Eigen library 41 | find_package(Eigen3 REQUIRED) 42 | if(EIGEN3_FOUND) 43 | message(STATUS "Found Eigen3 library: ${EIGEN3_INCLUDE_DIR}") 44 | endif() 45 | 46 | # FIND ADDITIONAL LIBRARIES 47 | # These next operations make use of the .cmake files shipped with Eigen3 48 | find_package(OpenMP REQUIRED) 49 | find_package(SPQR REQUIRED) 50 | 51 | # Set location of the include directory for the SYM-ILDL library 52 | set(SymILDL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/SymILDL/SymILDL) 53 | 54 | # Set the various directories for this project 55 | set(ILDL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Preconditioners/ILDL/include) 56 | set(ILDL_HDR_DIR ${ILDL_INCLUDE_DIR}/ILDL) 57 | set(ILDL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Preconditioners/ILDL/src) 58 | 59 | # Get the set of ILDL header and source files 60 | set(ILDL_HDRS 61 | ${CMAKE_CURRENT_SOURCE_DIR}/Preconditioners/Types.h 62 | ${ILDL_HDR_DIR}/ILDL_utils.h 63 | ${ILDL_HDR_DIR}/ILDL.h 64 | ) 65 | 66 | set(ILDL_SRCS 67 | ${ILDL_SOURCE_DIR}/ILDL_utils.cpp 68 | ${ILDL_SOURCE_DIR}/ILDL.cpp 69 | ) 70 | 71 | # The Sym-ILDL headers and Eigen 3 are both referenced by the header files of the ILDL library, hence must be PUBLICLY included (i.e. clients using the SYMILDLSupport headers must also include these headers) 72 | set(ILDL_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} ${ILDL_INCLUDE_DIR} ${EIGEN3_INCLUDE_DIR} CACHE INTERNAL "") 73 | 74 | add_library(ILDL SHARED ${ILDL_HDRS} ${ILDL_SRCS}) 75 | target_include_directories(ILDL PUBLIC ${SymILDL_INCLUDE_DIR} ${ILDL_INCLUDES}) 76 | target_link_libraries(ILDL ${BLAS_LIBRARIES}) 77 | 78 | 79 | 80 | # SET UP LSChol LIBRARY 81 | 82 | # Set the various directories for this project 83 | set(LSChol_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Preconditioners/LSChol/include) 84 | set(LSChol_HDR_DIR ${LSChol_INCLUDE_DIR}/LSChol) 85 | set(LSChol_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Preconditioners/LSChol/src) 86 | 87 | # Get the set of LSChol header and source files 88 | set(LSChol_HDRS 89 | ${CMAKE_CURRENT_SOURCE_DIR}/Preconditioners/Types.h 90 | ${LSChol_HDR_DIR}/LSChol.h 91 | ) 92 | 93 | set(LSChol_SRCS 94 | ${LSChol_SOURCE_DIR}/LSChol.cpp 95 | ) 96 | 97 | # Eigen 3 is referenced by the header files of the LSChol library, hence must be PUBLICLY included (i.e. clients using the SYMILDLSupport headers must also include these headers) 98 | set(LSChol_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} ${EIGEN3_INCLUDE_DIR} ${SPQR_INCLUDES} ${LSChol_INCLUDE_DIR} CACHE INTERNAL "") 99 | 100 | add_library(LSChol SHARED ${LSChol_HDRS} ${LSChol_SRCS}) 101 | target_include_directories(LSChol PUBLIC ${LSChol_INCLUDES}) 102 | target_link_libraries(LSChol ${BLAS_LIBRARIES} ${SPQR_LIBRARIES} ${M} ${LAPACK}) 103 | set_target_properties(LSChol PROPERTIES COMPILE_FLAGS ${OpenMP_CXX_FLAGS}) 104 | set_target_properties(LSChol PROPERTIES LINK_FLAGS "-fopenmp") 105 | 106 | # Build unit tests 107 | if(${BUILD_PRECONDITIONERS_TESTS}) 108 | message(STATUS "Adding unit tests to build") 109 | add_subdirectory(test) 110 | endif() 111 | 112 | # EXPORT THIS LIBRARY 113 | 114 | # Add add entry for this project into CMake's package registry, so that this project can be found by other CMake projects 115 | set(CMAKE_EXPORT_PACKAGE_REGISTRY TRUE) # This flag is required to actually export the package for CMake versions >= 3.15 116 | export(PACKAGE ${PROJECT_NAME}) 117 | 118 | # Create a configuration file for this project, so that it can be imported by other CMake projects 119 | export(TARGETS ILDL LSChol FILE ${PROJECT_NAME}Config.cmake) 120 | 121 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/solver_minres.h: -------------------------------------------------------------------------------- 1 | //-*- mode: c++ -*- 2 | #ifndef _SOLVER_MINRES_H_ 3 | #define _SOLVER_MINRES_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | int solver::minres(int max_iter, double stop_tol, 11 | double shift) { 12 | // Zero out solution vector 13 | int n = A.n_rows(); 14 | sol_vec.resize(n, 0); 15 | 16 | // ---------- set initial values and allocate memory for variables ---------// 17 | el_type alpha[2], beta[2]; // the last two entries of the T matrix 18 | el_type res[2]; // the last two residuals 19 | 20 | // the last two vectors of the lanczos iteration 21 | vector> v(2, vector(n)); 22 | 23 | double norm_A = 0; // matrix norm estimate 24 | double cond_A = 1; // condition number estimate 25 | double c = -1, s = 0; // givens rotation elements 26 | 27 | // temporary variables to store the corner of the matrix we're factoring 28 | double gamma_min = 1e99; 29 | el_type delta1[2], delta2[2], ep[2], gamma1[2], gamma2[2]; 30 | delta1[1] = 0; 31 | 32 | // temporary vectors for lanczos calcluations 33 | vector pk(n), tk(n); 34 | 35 | // step size in the current search direction (xk = x_{k-1} + tau*dk) 36 | double tau = 0; 37 | 38 | // the last 3 search directions 39 | vector> d(2, vector(n)); 40 | 41 | // set up initial values for variables above 42 | double eps = A.eps; 43 | beta[0] = 0; 44 | beta[1] = norm(rhs, 2.0); 45 | 46 | double norm_rhs = beta[1]; 47 | 48 | // v[1] = rhs/beta[1] 49 | for (int i = 0; i < n; i++) { 50 | v[1][i] = rhs[i] / beta[1]; 51 | } 52 | 53 | res[0] = beta[1]; 54 | tau = beta[1]; 55 | 56 | auto sign = [&](double x) { return (abs(x) < eps ? 0 : x / abs(x)); }; 57 | 58 | // -------------- begin minres iterations --------------// 59 | int k = 1; // iteration number 60 | while (res[(k + 1) % 2] / norm_rhs > stop_tol && k <= max_iter) { 61 | int cur = k % 2, nxt = (k + 1) % 2; 62 | // ---------- begin lanczos step ----------// 63 | // pk = (M^(-1) A M^(-t) - shift*I) * v[cur], where M = L|D|^(1/2) where 64 | // |D|^(1/2) = Q|V|^(1/2) we do this in steps. first, tk = L^(-t) * 65 | // |D|^(-t/2) pk 66 | D.sqrt_solve(v[cur], pk, true); 67 | L.forwardsolve(pk, tk); 68 | 69 | // pk = A*tk 70 | A.multiply(tk, pk); 71 | 72 | // pk = |D|^(-1/2) L^(-1) pk. after this step, pk = M^(-1) A M^(-t) v[cur] 73 | L.backsolve(pk, tk); 74 | D.sqrt_solve(tk, pk, false); 75 | 76 | // finally, pk = pk - shift*I * v[cur]; 77 | for (int i = 0; i < n; i++) { 78 | pk[i] -= shift * v[cur][i]; 79 | } 80 | 81 | // alpha = v[cur]' * pk 82 | alpha[cur] = dot_product(v[cur], pk); 83 | 84 | // pk = pk - alpha*v[cur] 85 | vector_sum(1, pk, -alpha[cur], v[cur], pk); 86 | // v[nxt] = pk - beta[cur]*v[nxt] 87 | vector_sum(1, pk, -beta[cur], v[nxt], v[nxt]); 88 | beta[nxt] = norm(v[nxt], 2.0); 89 | 90 | // scale v[nxt] if beta[nxt] is not zero 91 | if (abs(beta[nxt]) > eps) { 92 | for (int i = 0; i < n; i++) { 93 | v[nxt][i] /= beta[nxt]; 94 | } 95 | } 96 | // ---------- end lanczos step ----------// 97 | 98 | // left orthogonlization on the middle two entries in the last column of Tk 99 | delta2[cur] = c * delta1[cur] + s * alpha[cur]; 100 | gamma1[cur] = s * delta1[cur] - c * alpha[cur]; 101 | 102 | // left orthogonalization to product first two entries of T_{k+1} and 103 | // ep_{k+1} 104 | ep[nxt] = s * beta[nxt]; 105 | delta1[nxt] = -c * beta[nxt]; 106 | 107 | // ---------- begin givens rotation ----------// 108 | double a = gamma1[cur], b = beta[nxt]; 109 | if (abs(b) < eps) { 110 | s = 0; 111 | gamma2[cur] = abs(a); 112 | if (abs(a) < eps) { 113 | c = 1; 114 | } else { 115 | c = sign(a); 116 | } 117 | } else if (abs(a) < eps) { 118 | c = 0; 119 | s = sign(b); 120 | gamma2[cur] = abs(b); 121 | } else if (abs(b) > abs(a)) { 122 | double t = a / b; 123 | s = sign(b) / sqrt(1 + t * t); 124 | c = s * t; 125 | gamma2[cur] = b / s; 126 | } else { // abs(a) >= abs(b) 127 | double t = b / a; 128 | c = sign(a) / sqrt(1 + t * t); 129 | s = c * t; 130 | gamma2[cur] = a / c; 131 | } 132 | // ---------- end givens rotation ----------// 133 | 134 | // update residual norms and estimate for matrix norm 135 | tau = c * res[nxt]; 136 | res[cur] = s * res[nxt]; 137 | 138 | if (k == 1) 139 | norm_A = sqrt(alpha[cur] * alpha[cur] + beta[nxt] * beta[nxt]); 140 | else { 141 | double tnorm = sqrt(alpha[cur] * alpha[cur] + beta[nxt] * beta[nxt] + 142 | beta[cur] * beta[cur]); 143 | norm_A = std::max(norm_A, tnorm); 144 | } 145 | 146 | // ------ update solution and matrix condition number ------ // 147 | if (abs(gamma2[cur]) > eps) { 148 | // compute new search direction 149 | // d[cur] = (v[cur] - delta2[cur]*d[nxt] - ep[cur]*d[cur])/gamma2[cur]; 150 | vector_sum(1, v[cur], -ep[cur], d[cur], d[cur]); 151 | vector_sum(1, d[cur], -delta2[cur], d[nxt], d[cur]); 152 | 153 | for (int i = 0; i < n; i++) { 154 | d[cur][i] /= gamma2[cur]; 155 | } 156 | 157 | // sol = sol + tau*d[cur] 158 | vector_sum(1, sol_vec, tau, d[cur], sol_vec); 159 | gamma_min = std::min(gamma_min, gamma2[cur]); 160 | cond_A = norm_A / gamma_min; 161 | } 162 | 163 | k++; 164 | 165 | // ------------- end update ------------- // 166 | // cout << "current residual " << res[cur]/norm_rhs << endl; 167 | } 168 | 169 | if (msg_lvl) 170 | printf("The estimated condition number of the matrix is %e.\n", cond_A); 171 | 172 | std::string iter_str = "iterations"; 173 | if (k - 1 == 1) 174 | iter_str = "iteration"; 175 | 176 | if (msg_lvl) 177 | printf("MINRES took %i %s and got down to relative residual %e.\n", k - 1, 178 | iter_str.c_str(), res[(k + 1) % 2] / norm_rhs); 179 | return k - 1; 180 | } 181 | 182 | #endif // _SOLVER_MINRES_H_ 183 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /Preconditioners/ILDL/include/ILDL/ILDL.h: -------------------------------------------------------------------------------- 1 | /** This class provides functionality for computing an incomplete LDL^T 2 | * factorization of a symmetric indefinite matrix using the SYM-ILDL library. 3 | * 4 | * Specifically, we compute an incomplete factorization of the form: 5 | * 6 | * P'SASP ~ LDL' 7 | * 8 | * where: 9 | * 10 | * - P is a permutation of the rows and columns of A 11 | * - S is a diagonal scaling matrix used to equilibrate A 12 | * - L is a unit lower-triangular matrix 13 | * - D is a block-diagonal matrix comprised of 1x1 and 2x2 blocks 14 | * 15 | * The interface it provides is based upon the ones used by the Eigen library's 16 | * built-in matrix factorization types. 17 | * 18 | * Copyright (C) 2020 by David M. Rosen (dmrosen@mit.edu) 19 | */ 20 | 21 | #pragma once 22 | 23 | #include 24 | 25 | #include "lilc_matrix.h" // SYM-ILDL matrix type 26 | 27 | #include "Preconditioners/Types.h" 28 | 29 | namespace Preconditioners { 30 | 31 | /// Enum class that sets the type of pivoting to use during factorization 32 | enum class PivotType { Rook, BunchKaufman }; 33 | 34 | /// Enum class that sets the fill-reducing ordering to apply during 35 | /// factorization 36 | enum class Ordering { AMD, RCM, None }; 37 | 38 | /// Enum class that determines the type of equilibration (scaling) to apply to 39 | /// the matrix before factorization 40 | enum class Equilibration { Bunch, None }; 41 | 42 | /// This lightweight struct contains a simplified set of configuration options 43 | /// for the SYM-ILDL library, as it is used in the SymILDLSupport 44 | struct ILDLOpts { 45 | 46 | /** Parameter controlling the maximum fill-in for the incomplete 47 | * lower-triangular factor L: each column of L is guanteed to have at most 48 | * max_fill_factor * (nnz(A) / dim(A)) nonzero elements. */ 49 | double max_fill_factor = 3.0; 50 | 51 | /** Drop tolerance for elements of the incomplete lower-triangular factor L: 52 | * any elements l in L_k (the kth column of L) satisfying 53 | * |l| <= drop_tol * |L_k|_1 54 | * will be set to 0. */ 55 | double drop_tol = 1e-3; 56 | 57 | /** This parameter controls the aggressiveness of the Bunch-Kaufman pivoting 58 | * procedure. When BK_pivot_tol = 1, full Bunch-Kaufman pivoting is used; 59 | * if BK_pivot_tol = 0, partial pivoting is turned off, and the first non-zero 60 | * pivot under the diagonal will be used. Intermediate values continuously 61 | * vary the aggressiveness of the pivoting: wither values closer to 0 favoring 62 | * locality in pivoting (pivots closer do the diagonal are used), and values 63 | * closer to 1 increasing the stability of the selected pivots. 64 | * 65 | * This parameter is useful for trading off preservation of the *structure* of 66 | * the incomplete factor L vs. controlling the magnitudes of its elements */ 67 | double BK_pivot_tol = 1.0; 68 | 69 | /** This parameter determines the type of pivoting strategy to use during 70 | * factorization */ 71 | PivotType pivot_type = PivotType::Rook; 72 | 73 | /** This parameter determines the fill-reducing variable reordering strategy 74 | * to use when factoring the matrix */ 75 | Ordering order = Ordering::AMD; 76 | 77 | /** This parameter determines the equilibration (scaling) strategy to apply 78 | * when factoring the matrix */ 79 | Equilibration equilibration = Equilibration::Bunch; 80 | }; 81 | 82 | /** This lightweight class computes an incomplete LDL^T factorization of the 83 | * form: 84 | * 85 | * P'SASP ~ LDL' 86 | * 87 | * where: 88 | * 89 | * - P is a permutation of the rows and columns of A 90 | * - S is an [optional] diagonal scaling matrix used to equilibrate A 91 | * - L is a unit lower-triangular matrix 92 | * - D is a block-diagonal matrix comprised of 1x1 and 2x2 blocks 93 | */ 94 | class ILDL { 95 | private: 96 | /// Data members 97 | 98 | /** Dimension of the matrix stored in this factorization */ 99 | size_t dim_; 100 | 101 | /** Structure containing options for the SYM-ILDL library */ 102 | ILDLOpts opts_; 103 | 104 | /// FACTORIZATION ELEMENTS: Elements of the factorization of PSASP = LDL' 105 | 106 | /** Permutation P */ 107 | PermutationVector P_; 108 | 109 | /** Inverse permutation Pinv */ 110 | PermutationVector Pinv_; 111 | 112 | /** Diagonal scaling matrix S */ 113 | Vector S_; 114 | 115 | /** Lower-triangular factor */ 116 | SparseMatrix L_; 117 | 118 | /// We store an eigendecomposition of the block-diagonal matrix D 119 | 120 | // The vector of eigenvalues of the block-diagonal matrix D 121 | Vector Lambda_; 122 | 123 | /** These vectors keep track of the starting (upper-left) index of each of the 124 | * blocks on D's diagonal, and the dimension (1 or 2) of that block */ 125 | std::vector block_start_idxs_; 126 | std::vector block_sizes_; 127 | 128 | /** This map associates to each 2x2 block Di the orthogonal matrix Qi such 129 | * that Di = Qi * Lambda_i * Qi' */ 130 | std::unordered_map Q_; 131 | 132 | // Boolean value indicating whether the object contains a valid cached 133 | // factorization 134 | bool initialized_ = false; 135 | 136 | public: 137 | /// Constructors 138 | 139 | /** Construct an empty ILDL object */ 140 | ILDL(const ILDLOpts &options = ILDLOpts()); 141 | 142 | /** Construct an ILDL object containing a factorization 143 | * of the passed matrix A */ 144 | ILDL(const SparseMatrix &A, const ILDLOpts &options = ILDLOpts()); 145 | 146 | /// Mutators 147 | 148 | /** Set the options for the incomplete LDLT factorization. Note that calling 149 | * this function will release any cached factorizations currently held 150 | * by the ILDL object */ 151 | void setOptions(const ILDLOpts &options); 152 | 153 | /** Compute an incomplete LDL^T factorization of the matrix A. */ 154 | void compute(const SparseMatrix &A); 155 | 156 | /** Frees any cached factorizations currently held by the 157 | * ILDL object */ 158 | void clear(); 159 | 160 | /// Accessors 161 | 162 | /** Return the dimension of the matrix stored in this factorization */ 163 | const size_t dim() const { return dim_; } 164 | 165 | /** Return fill-reducing permutation ordering used in the factorization */ 166 | const PermutationVector &P() const { return P_; } 167 | 168 | /** Return the equilibration (scaling) matrix S */ 169 | const Vector &S() const { return S_; } 170 | 171 | /** Return the lower-triangular factor L */ 172 | const SparseMatrix &L() const { return L_; } 173 | 174 | /** Return the block-diagonal matrix D. If pos_def_mod = true, the returned 175 | * matrix is modified to ensure that it is positive-definite */ 176 | SparseMatrix D(bool pos_def_mod = false) const; 177 | 178 | /** Return the total number of blocks in the block-diagonal matrix D */ 179 | size_t num_blocks() const { return block_sizes_.size(); } 180 | 181 | /** Return the number of 2x2 blocks in the block-diagonal matrix D */ 182 | size_t num_2x2_blocks() const { return Q_.size(); } 183 | 184 | /** Returns the inertia (number of positive and negative eigenvalues) of the 185 | * block-diagonal matrix D in the factorization */ 186 | Inertia inertia() const; 187 | 188 | /// Linear-algebraic operations 189 | 190 | /** Compute the matrix-vector product D*x. If pos_def_mod is 'true', the 191 | * product is computed with a positive-definite modification with D*/ 192 | Vector Dproduct(const Vector &x, bool pos_def_mod = false) const; 193 | 194 | /** Solve the linear system Dx = b. If pos_def_mod is 'true', the system is 195 | * solved with D replaced by its positive-definite modification */ 196 | Vector Dsolve(const Vector &b, bool pos_def_mode = false) const; 197 | 198 | /** Solve the linear system (D+)^{1/2} * x = b, where (D+)^{1/2} is the 199 | * symmetric square-root of the positive-definite modification of the 200 | * block-diagonal matrix D. */ 201 | Vector sqrtDsolve(const Vector &b) const; 202 | 203 | /** Solve the linear system LDL'x = b. If pos_def_mod is 'true', the 204 | * system is solved using a positive-definite modification of D */ 205 | Vector LDLTsolve(const Vector &b, bool pos_def_mode = false) const; 206 | 207 | /** Solve the linear system (D+)^{1/2}L' * x = b, where (D+)^{1/2} is the 208 | * symmetric square-root of the positive-definite modification of the 209 | * block-diagonal matrix D. If transpose = true, this function instead solves 210 | * the linear system L D^{1/2} x = b (corresponding to transposing the 211 | * coefficient matrix). */ 212 | Vector sqrtDLTsolve(const Vector &b, bool transpose = false) const; 213 | 214 | /** Compute an approximate solution of Ax = b using the incomplete LDLT 215 | * factorization. If pos_def_mod is 'true', the block-diagonal matrix D is 216 | * modified to ensure that the corresponding modification of A is 217 | * positive-definite */ 218 | Vector solve(const Vector &b, bool pos_def_mod = false) const; 219 | }; 220 | 221 | } // namespace Preconditioners 222 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_pivot.h: -------------------------------------------------------------------------------- 1 | using std::abs; 2 | 3 | /*! There are four parts to this pivoting algorithm. 4 | For A, due to storing only the lower half, there are three steps to performing a symmetric permutation: 5 | -# A(k, 1:k) must be swapped with A(r, 1:k) (row-row swap). 6 | -# A(k:r, k) must be swapped with A(r, k:r) (row-column swap). 7 | -# A(k:r, k) must be swapped with A(k:r, r) (column-column swap). 8 | The steps above are implemented in the pivotA function. 9 | 10 | For L, since column k and r are not yet formed, there is only one step (a row permutation): 11 | -# L(k, 1:k) must be swapped with L(r, 1:k) (row-row swap). 12 | */ 13 | template 14 | inline void lilc_matrix :: pivot(swap_struct& s, vector& in_set, lilc_matrix& L, const int& k, const int& r) { 15 | //initialize temp variables 16 | int i, j, idx, offset; 17 | 18 | //----------pivot A ----------// 19 | this->pivotA(s, in_set, k, r); 20 | //--------end pivot A---------// 21 | 22 | //----------pivot L ----------// 23 | s.swap_clear(); 24 | 25 | // -------------------- (1) for L ------------------------// 26 | //push back pointers to L(k, i) 27 | for (idx_it it = L.list[k].begin(); it != L.list[k].end(); it++) 28 | { 29 | for (i = L.col_first[*it]; i < (int) L.m_idx[*it].size(); i++) { 30 | if (L.m_idx[*it][i] == k) { 31 | s.swapr.push_back(L.m_idx[*it].begin() + i); 32 | break; 33 | } 34 | } 35 | } 36 | 37 | //push back pointers to L(r, i) 38 | for (idx_it it = L.list[r].begin(); it != L.list[r].end(); it++) { 39 | for (i = L.col_first[*it]; i < (int) L.m_idx[*it].size(); i++) { 40 | if (L.m_idx[*it][i] == r) { 41 | s.swapk.push_back(L.m_idx[*it].begin() + i); 42 | break; 43 | } 44 | } 45 | } 46 | 47 | //swap rows k and r 48 | for (typename vector::iterator it = s.swapk.begin(); it != s.swapk.end(); it++) { 49 | **it = k; 50 | } 51 | for (typename vector::iterator it = s.swapr.begin(); it != s.swapr.end(); it++) { 52 | **it = r; 53 | } 54 | 55 | //row swap on row non-zero indices stored in L.list 56 | L.list[k].swap(L.list[r]); 57 | 58 | //--------end pivot L---------// 59 | } 60 | 61 | /*! There are three parts to this pivoting algorithm. 62 | For A, due to storing only the lower half, there are three steps to performing a symmetric permutation: 63 | -# A(k, 1:k) must be swapped with A(r, 1:k) (row-row swap). 64 | -# A(k:r, k) must be swapped with A(r, k:r) (row-column swap). 65 | -# A(k:r, k) must be swapped with A(k:r, r) (column-column swap). 66 | */ 67 | template 68 | inline void lilc_matrix :: pivotA(swap_struct& s, vector& in_set, const int& k, const int& r) { 69 | assert(k < r); // this algorithm implicitly assumes k < r 70 | 71 | //initialize temp variables 72 | std::pair its_k, its_r; 73 | int i, j, idx, offset; 74 | 75 | //----------- clear out old variables from last pivot -------------- // 76 | //for vectors of primitive types, clear is always constant time regardless of how many elements are in the container. 77 | s.col_clear(); 78 | s.row_clear(); 79 | 80 | //----------pivot A ----------// 81 | s.swap_clear(); 82 | 83 | //------------- row-row swap (1) for A -------------// 84 | 85 | //pushes column indices (which contain non-zero elements) of A(k, 1:k) onto row_r 86 | for (idx_it it = list[k].begin(); it != list[k].begin() + row_first[k]; ++it) { 87 | s.row_r.push_back(*it); 88 | } 89 | 90 | //pushes column indices (which contain non-zero elements) of A(r, 1:k) onto row_k 91 | for (idx_it it = list[r].begin(); it != list[r].begin() + row_first[r]; ++it) { 92 | s.row_k.push_back(*it); 93 | } 94 | 95 | //merge these two sets of indices together 96 | s.all_swaps.assign(list[k].begin(), list[k].begin() + row_first[k]); 97 | unordered_inplace_union(s.all_swaps, list[r].begin(), list[r].begin() + row_first[r], in_set); 98 | 99 | //do row swaps in A (i.e. swap A(k, 1:k) with A(r, 1:k)) 100 | for (idx_it it = s.all_swaps.begin(), end = s.all_swaps.end(); it != end; ++it) { 101 | safe_swap(m_idx[*it], k, r); 102 | } 103 | s.all_swaps.clear(); 104 | 105 | //----------------------------------------------------// 106 | 107 | 108 | //---------------------- (2) and (3) for A --------------------------// 109 | 110 | //after sym. perm, a_rr will be swapped to a_kk, so we put a_rr as first 111 | //elem of col k if its non-zero. this also means that we ensure the first 112 | //elem of col k is the diagonal element if it exists. 113 | el_type elem = coeff(r, r); 114 | if (abs(elem) > eps){ 115 | s.col_k_nnzs.push_back(k); 116 | s.col_k.push_back(elem); 117 | } 118 | 119 | //same as above, put a_kk in new col r if it exists. 120 | elem = coeff(k, k); 121 | if (abs(elem) > eps){ 122 | s.col_r_nnzs.push_back(r); 123 | s.col_r.push_back(elem); 124 | } 125 | 126 | 127 | //first[r] should have # of nnz of A(r, 0:k-1) 128 | for (i = row_first[r]; i < (int) list[r].size(); i++) { 129 | j = list[r][i]; 130 | assert(j >= k); 131 | if (coeffRef(r, j, its_k)) { 132 | if (j == k) { 133 | s.col_k_nnzs.push_back(r); //A(r, k) is fixed upon permutation so its index stays r 134 | s.row_r.push_back(k); 135 | } else { 136 | s.col_k_nnzs.push_back(j); //place A(r, j) (where k < j < r) into A(j, k) 137 | } 138 | s.col_k.push_back(*its_k.second); 139 | 140 | //delete A(r,j) from A. 141 | *its_k.first = m_idx[j].back(); 142 | *its_k.second = m_x[j].back(); 143 | 144 | m_idx[j].pop_back(); 145 | m_x[j].pop_back(); 146 | } 147 | } 148 | 149 | if (m_idx[r].size() > 0) { 150 | 151 | //place A(r:n, r) into A(r:n, k). since we already took care of A(r,r) above, 152 | //we need to offset by 1 if necessary. 153 | ensure_invariant(r, r, m_idx[r], false); 154 | offset = (m_idx[r][0] == r ? 1 : 0); 155 | std::copy(m_x[r].begin()+offset, m_x[r].end(), std::back_inserter(s.col_k)); 156 | std::copy(m_idx[r].begin()+offset, m_idx[r].end(), std::back_inserter(s.col_k_nnzs)); 157 | 158 | for (idx_it it = m_idx[r].begin() + offset; it != m_idx[r].end(); it++) { 159 | 160 | //for each non-zero row index in the rth column, find a pointer to it in list 161 | //these pointers will be used to perform column swaps on list 162 | for (i = row_first[*it]; i < (int) list[*it].size(); i++) { 163 | if (list[*it][i] == r) { 164 | s.swapk.push_back(list[*it].begin() + i); 165 | break; 166 | } 167 | } 168 | } 169 | } 170 | 171 | //swap A(k:r, k) with A(r, k:r) 172 | if (m_idx[k].size() > 0) { 173 | 174 | //since we already took care of A(k,k), we need an offset of 1 if necessary 175 | ensure_invariant(k, k, m_idx[k], false); 176 | offset = (m_idx[k][0] == k ? 1 : 0); 177 | for (i = offset; i < (int) m_idx[k].size(); i++) { 178 | idx = m_idx[k][i]; 179 | 180 | //if idx < r, we are in (2) (row-col swap) otherwise we are in (3) (col-col swap) 181 | if (idx < r) { 182 | 183 | //swap A(i, k) with A(r, i) where k < i < r. 184 | m_idx[idx].push_back(r); 185 | m_x[idx].push_back(m_x[k][i]); 186 | 187 | //we also have to ensure that list is updated by popping off old entries 188 | //that were meant for the A(i, k)'s before they were swapped. 189 | ensure_invariant(idx, k, list[idx], true); 190 | std::swap(list[idx][row_first[idx]], list[idx][list[idx].size() - 1]); 191 | list[idx].pop_back(); 192 | 193 | //push back new elements on row_r 194 | s.row_r.push_back(idx); 195 | 196 | } else if (idx > r) { 197 | 198 | //swap A(i, k) with A(i, r) where r < i. 199 | s.col_r.push_back(m_x[k][i]); 200 | s.col_r_nnzs.push_back(idx); 201 | 202 | //for each non-zero row index in the kth column, find a pointer to it in list 203 | //these pointers will be used to perform column swaps on list 204 | for (j = row_first[idx]; j < (int) list[idx].size(); j++) { 205 | if (list[idx][j] == k) { 206 | s.swapr.push_back(list[idx].begin() + j); 207 | break; 208 | } 209 | } 210 | } 211 | } 212 | } 213 | 214 | //swap all A(i, k) with A(i, r) in list. 215 | for (typename vector::iterator it = s.swapk.begin(); it != s.swapk.end(); it++) { 216 | **it = k; 217 | } 218 | for (typename vector::iterator it = s.swapr.begin(); it != s.swapr.end(); it++) { 219 | **it = r; 220 | } 221 | 222 | //add new entries for new col k into list 223 | for (idx_it it = s.col_k_nnzs.begin(); it != s.col_k_nnzs.end(); it++) { 224 | if ((*it > k) && (*it < r)) { 225 | list[*it].push_back(k); 226 | } 227 | } 228 | 229 | //set the kth col 230 | m_idx[k].swap(s.col_k_nnzs); 231 | m_x[k].swap(s.col_k); 232 | 233 | //set the rth col 234 | m_idx[r].swap(s.col_r_nnzs); 235 | m_x[r].swap(s.col_r); 236 | 237 | //set the kth row and rth row 238 | list[k].swap(s.row_k); 239 | list[r].swap(s.row_r); 240 | 241 | //row swaps for first 242 | std::swap(row_first[k], row_first[r]); 243 | //--------end pivot A---------// 244 | } 245 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/block_diag_matrix.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _BLOCK_DIAG_MATRIX_H_ 3 | #define _BLOCK_DIAG_MATRIX_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #ifdef SYM_ILDL_DEBUG 15 | template 16 | std::ostream &operator<<(std::ostream &os, const std::vector &vec) { 17 | os << "["; 18 | if (!vec.empty()) { 19 | for (typename std::vector::size_type index = 0; 20 | index < vec.size() - 1; index++) { 21 | os << vec[index] << ", "; 22 | } 23 | 24 | os << vec[vec.size() - 1]; 25 | } 26 | os << "]"; 27 | return os; 28 | } 29 | #endif 30 | 31 | using std::abs; 32 | 33 | /*! \brief A quick implementation of a diagonal matrix with 1x1 and 2x2 blocks. 34 | */ 35 | template class block_diag_matrix { 36 | public: 37 | typedef std::unordered_map int_elt_map; 38 | typedef std::vector elt_vector_type; 39 | 40 | /*! Allows outputting the contents of the matrix via << operators. */ 41 | friend std::ostream &operator<<(std::ostream &os, 42 | const block_diag_matrix &D) { 43 | os << D.to_string(); 44 | return os; 45 | }; 46 | 47 | int m_n_size; ///< Dimension of the matrix. 48 | int nnz_count; ///< Number of non-zeros in the matrix. 49 | elt_vector_type main_diag; ///< Stores main diagonal elements. 50 | int_elt_map off_diag; ///< Stores off-diagonal elements of 2x2 pivots. 51 | 52 | /*! \brief Constructor for diagonal class. Initializes a 0x0 matrix when 53 | * given no arguments. 54 | */ 55 | block_diag_matrix(int n_rows = 0, int n_cols = 0) : m_n_size(n_rows) { 56 | assert(n_rows == n_cols); 57 | nnz_count = n_rows; 58 | main_diag.clear(); 59 | main_diag.resize(n_rows); 60 | } 61 | 62 | /*! \brief Resizes this matrix to an n*n matrix with default_value on the 63 | * main diagonal. 64 | */ 65 | void resize(int n, el_type default_value) { 66 | m_n_size = n; 67 | main_diag.clear(); 68 | main_diag.resize(n, default_value); 69 | off_diag.clear(); 70 | nnz_count = n; 71 | } 72 | 73 | /*! \brief Resizes this matrix to an n*n matrix. 74 | */ 75 | void resize(int n) { 76 | m_n_size = n; 77 | resize(n, 0); 78 | nnz_count = n; 79 | } 80 | 81 | /*! \return Number of rows in the matrix. */ 82 | int n_rows() const { return m_n_size; } 83 | 84 | /*! \return Number of cols in the matrix. */ 85 | int n_cols() const { return m_n_size; } 86 | 87 | /*! \return Number of nonzeros in the matrix. */ 88 | int nnz() const { return nnz_count; }; 89 | 90 | /*! \param i the index of the element. 91 | \return The D(i,i)th element. 92 | */ 93 | el_type &operator[](int i) { return main_diag.at(i); } 94 | 95 | /*! \param i the index of the element. 96 | \return The D(i+1,i)th element. 97 | */ 98 | el_type &off_diagonal(int i) { 99 | if (!off_diag.count(i)) { 100 | off_diag.insert(std::make_pair(i, 0)); 101 | nnz_count++; 102 | } 103 | 104 | return off_diag[i]; 105 | } 106 | 107 | /*! \param i the index of the element. 108 | \return 2 if there is a diagonal pivot at D(i,i) and D(i+1,i+1). 109 | -2 if there is a diagonal pivot at D(i-1,i-1) and 110 | D(i,i). 1 if the pivot is only a 1x1 block. 111 | */ 112 | int block_size(int i) const { 113 | if (off_diag.count(i)) { 114 | return 2; 115 | } else if (off_diag.count(i - 1)) { 116 | return -2; 117 | } else { 118 | return 1; 119 | } 120 | } 121 | 122 | /*! \brief Solves the preconditioned problem |D| = Q|V|Q', where QVQ' is the 123 | eigendecomposition of D, and |.| is applied elementwise. \param b the right 124 | hand side. \param x a storage vector for the solution (must be same size as 125 | b). \param transposed solves |V|^(1/2)Q' if true, Q|V|^(1/2) if false. 126 | */ 127 | void sqrt_solve(const elt_vector_type &b, elt_vector_type &x, 128 | bool transposed = false) { 129 | assert(b.size() == x.size()); 130 | 131 | const double eps = 1e-8; 132 | double alpha, beta, gamma, eig0, eig1, disc; 133 | double Q[2][2], tx[2]; 134 | for (int i = 0; i < m_n_size; i += block_size(i)) { 135 | if (block_size(i) == 2) { 136 | alpha = main_diag[i]; 137 | beta = main_diag[i + 1]; 138 | gamma = off_diag[i]; 139 | 140 | disc = sqrt((alpha - beta) * (alpha - beta) + 4 * gamma * gamma); 141 | eig0 = 0.5 * (alpha + beta + disc); 142 | eig1 = 0.5 * (alpha + beta - disc); 143 | 144 | if (abs(gamma / std::min(alpha, beta)) < eps) { 145 | eig0 = alpha; 146 | eig1 = beta; 147 | Q[0][0] = 1; 148 | Q[1][0] = 0; 149 | Q[0][1] = 0; 150 | Q[1][1] = 1; 151 | } else { 152 | double sin2t = 2 * gamma / disc, cos2t = (alpha - beta) / disc; 153 | double theta = 0.5 * atan2(sin2t, cos2t); 154 | 155 | Q[0][0] = cos(theta); 156 | Q[0][1] = -sin(theta); 157 | Q[1][0] = sin(theta); 158 | Q[1][1] = cos(theta); 159 | } 160 | 161 | // solves Q|V|^(1/2) x = b or solves the transposed version |V|^(1/2)Q' 162 | // x = b. 163 | if (!transposed) { 164 | // tx = Q'*b 165 | tx[0] = Q[0][0] * b[i] + Q[1][0] * b[i + 1]; 166 | tx[1] = Q[0][1] * b[i] + Q[1][1] * b[i + 1]; 167 | 168 | // x = |V|^(-1/2)*tx 169 | x[i] = tx[0] / sqrt(abs(eig0)); 170 | x[i + 1] = tx[1] / sqrt(abs(eig1)); 171 | } else { 172 | // tx = |V|^(-1/2)*b 173 | tx[0] = b[i] / sqrt(abs(eig0)); 174 | tx[1] = b[i + 1] / sqrt(abs(eig1)); 175 | 176 | // x = Q*tx 177 | x[i] = Q[0][0] * tx[0] + Q[0][1] * tx[1]; 178 | x[i + 1] = Q[1][0] * tx[0] + Q[1][1] * tx[1]; 179 | } 180 | } else { 181 | x[i] = b[i] / sqrt(abs(main_diag[i])); 182 | } 183 | } 184 | } 185 | 186 | /** Solves Q|V|Q'x = b, where Q V Q' = D is the eigendecomposition of 187 | * D, and Lambda is the matrix obtained by replacing the eigenvalues of D with 188 | * their absolute values 189 | * 190 | * Added by DM Rosen 6-11-2020 191 | */ 192 | void pos_def_solve(const elt_vector_type &b, elt_vector_type &x) { 193 | assert(b.size() == x.size()); 194 | 195 | const double eps = 1e-8; 196 | double alpha, beta, gamma, eig0, eig1, disc; 197 | double Q[2][2], tx[2]; 198 | for (int i = 0; i < m_n_size; i += block_size(i)) { 199 | if (block_size(i) == 2) { 200 | alpha = main_diag[i]; 201 | beta = main_diag[i + 1]; 202 | gamma = off_diag[i]; 203 | 204 | /// Compute eigendecomposition of this 2x2 block 205 | 206 | disc = sqrt((alpha - beta) * (alpha - beta) + 4 * gamma * gamma); 207 | eig0 = 0.5 * (alpha + beta + disc); 208 | eig1 = 0.5 * (alpha + beta - disc); 209 | 210 | if (abs(gamma / std::min(alpha, beta)) < eps) { 211 | eig0 = alpha; 212 | eig1 = beta; 213 | Q[0][0] = 1; 214 | Q[1][0] = 0; 215 | Q[0][1] = 0; 216 | Q[1][1] = 1; 217 | } else { 218 | double sin2t = 2 * gamma / disc, cos2t = (alpha - beta) / disc; 219 | double theta = 0.5 * atan2(sin2t, cos2t); 220 | 221 | Q[0][0] = cos(theta); 222 | Q[0][1] = -sin(theta); 223 | Q[1][0] = sin(theta); 224 | Q[1][1] = cos(theta); 225 | } 226 | 227 | /// Now compute x = Q |V|^-1 Q' b 228 | 229 | // tx = Q'*b 230 | tx[0] = Q[0][0] * b[i] + Q[1][0] * b[i + 1]; 231 | tx[1] = Q[0][1] * b[i] + Q[1][1] * b[i + 1]; 232 | 233 | // tx = |V|^-1 tx 234 | tx[0] /= abs(eig0); 235 | tx[1] /= abs(eig1); 236 | 237 | // x = Q*tx 238 | x[i] = Q[0][0] * tx[0] + Q[0][1] * tx[1]; 239 | x[i + 1] = Q[1][0] * tx[0] + Q[1][1] * tx[1]; 240 | } else { 241 | x[i] = b[i] / abs(main_diag[i]); 242 | } 243 | } 244 | } 245 | 246 | /** \brief Solves the system Dx = b. 247 | * \param b the right hand side. 248 | * \param x a storage vector for the solution (must be same size as b). 249 | */ 250 | void solve(const elt_vector_type &b, elt_vector_type &x) { 251 | assert(b.size() == x.size()); 252 | 253 | double a, d, c, det; 254 | for (int i = 0; i < m_n_size; i += block_size(i)) { 255 | if (block_size(i) == 2) { 256 | a = main_diag[i]; 257 | d = main_diag[i + 1]; 258 | c = off_diag[i]; 259 | det = a * d - c * c; 260 | // system is (a c; c d) 261 | // inverse is 1/(ad - c^2) * (d -c; -c a) 262 | x[i] = (d * b[i] - c * b[i + 1]) / det; 263 | x[i + 1] = (-c * b[i] + a * b[i + 1]) / det; 264 | } else { 265 | x[i] = b[i] / main_diag[i]; 266 | } 267 | } 268 | } 269 | 270 | /*! \return A string reprepsentation of this matrix. 271 | */ 272 | std::string to_string() const; 273 | 274 | /*! \param filename the filename of the matrix to be saved. All matrices saved 275 | are in matrix market format (.mtx). \return True if the save succeeded, 276 | false otherwise. 277 | */ 278 | bool save(std::string filename) const; 279 | 280 | /*! \brief Generic class destructor. 281 | */ 282 | ~block_diag_matrix() {} 283 | }; 284 | 285 | #include "block_diag_matrix_save.h" 286 | #include "block_diag_matrix_to_string.h" 287 | 288 | #endif 289 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_ildl_helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef _LILC_MATRIX_ILDL_HELPERS_H 2 | #define _LILC_MATRIX_ILDL_HELPERS_H 3 | 4 | using std::abs; 5 | using std::vector; 6 | 7 | typedef vector::iterator idx_it; 8 | 9 | /*! \brief Computes the dot product of v and w. Only works when el_type is real right now. 10 | \param v the first vector whose dot product we wish to compute 11 | \param w the second vector whose dot product we wish to compute 12 | */ 13 | template 14 | inline double dot_product(const vector& v, const vector& w) { 15 | double res = 0; 16 | for (int i = 0; i < v.size(); i++) { 17 | res += v[i]*w[i]; 18 | } 19 | return res; 20 | } 21 | 22 | /*! \brief Computes the sum of two vectors: u = a*v + b*w. a and b are scalars, u, v, and w are vectors. 23 | \param u the storage vector for the result 24 | */ 25 | template 26 | inline void vector_sum(double a, vector& v, double b, vector& w, vector& u) { 27 | for (int i = 0; i < v.size(); i++) { 28 | u[i] = a*v[i] + b*w[i]; 29 | } 30 | } 31 | 32 | /*! \brief Computes the maximum (in absolute value) element of v(curr_nnzs) and it's index. 33 | \param v the vector whose max element is to be computed. 34 | \param curr_nnzs a list of indices representing non-zero elements in v. 35 | \param r the index of the maximum element of v 36 | 37 | \return the max element of v. 38 | */ 39 | template 40 | inline double max(vector& v, vector& curr_nnzs, int& r) { 41 | double res = 0; 42 | for (idx_it it = curr_nnzs.begin(), end = curr_nnzs.end(); it != end; ++it) { 43 | if (abs(v[*it]) > res) { 44 | res = abs(v[*it]); 45 | r = *it; 46 | } 47 | } 48 | 49 | return res; 50 | } 51 | 52 | /*! \brief Computes the norm of v(curr_nnzs). 53 | \param v the vector whose norm is to be computed. 54 | \param curr_nnzs a list of indices representing non-zero elements in v. 55 | \param p The norm number. 56 | \return the norm of v. 57 | */ 58 | template 59 | inline double norm(vector& v, vector& curr_nnzs, el_type p = 1) { 60 | el_type res = 0; 61 | for (idx_it it = curr_nnzs.begin(), end = curr_nnzs.end(); it != end; ++it) { 62 | res += pow(abs(v[*it]), p); 63 | } 64 | 65 | return pow(res, 1/p); 66 | } 67 | 68 | /*! \brief Computes the norm of v. 69 | \param v the vector whose norm is to be computed. 70 | \param p The norm number. 71 | \return the norm of v. 72 | */ 73 | template 74 | inline double norm(vector& v, el_type p = 1) { 75 | el_type res = 0; 76 | for (int i = 0; i < v.size(); i++) { 77 | res += pow(abs(v[i]), p); 78 | } 79 | return pow(res, 1/p); 80 | } 81 | 82 | /*! \brief Performs an inplace union of two sorted lists (a and b), removing duplicates in the final list. 83 | \param a the sorted list to contain the final merged list. 84 | \param b_start an iterator to the start of b. 85 | \param b_end an iterator to the end of b. 86 | */ 87 | template 88 | inline void inplace_union(InputContainer& a, InputIterator const& b_start, InputIterator const& b_end) 89 | { 90 | int mid = a.size(); //store the end of first sorted range 91 | 92 | //copy the second sorted range into the destination vector 93 | std::copy(b_start, b_end, std::back_inserter(a)); 94 | 95 | //perform the in place merge on the two sub-sorted ranges. 96 | std::inplace_merge(a.begin(), a.begin() + mid, a.end()); 97 | 98 | //remove duplicate elements from the sorted vector 99 | a.erase(std::unique(a.begin(), a.end()), a.end()); 100 | } 101 | 102 | /*! \brief Performs an inplace union of two unsorted lists (a and b), removing duplicates in the final list. 103 | \param a the sorted list to contain the final merged list. 104 | \param b_start an iterator to the start of b. 105 | \param b_end an iterator to the end of b. 106 | \param in_set a bitset used to indicate elements present in a and b. Reset to all zeros after union. 107 | */ 108 | template 109 | inline void unordered_inplace_union(InputContainer& a, InputIterator const& b_start, InputIterator const& b_end, vector& in_set) 110 | { 111 | for (InputIterator it = a.begin(), end = a.end(); it != end; ++it) { 112 | assert(*it < in_set.size() && *it >= 0); 113 | in_set[*it] = true; 114 | } 115 | 116 | for (InputIterator it = b_start; it != b_end; ++it) { 117 | if (!in_set[*it]) { 118 | in_set[*it] = true; 119 | a.push_back(*it); 120 | } 121 | } 122 | 123 | for (InputIterator it = a.begin(), end = a.end(); it != end; ++it) { 124 | assert(*it < in_set.size() && *it >= 0); 125 | in_set[*it] = false; 126 | } 127 | } 128 | 129 | //-------------Dropping rules-------------// 130 | 131 | 132 | namespace { 133 | /*! \brief Functor for comparing elements by value (in decreasing order) instead of by index. 134 | \param v the vector that contains the values being compared. 135 | */ 136 | template 137 | struct by_value { 138 | const vector& v; 139 | by_value(const vector& vec) : v(vec) {} 140 | inline bool operator()(int const &a, int const &b) const { 141 | // Not needed if we're using sort. If using this comparator 142 | // in a set, then uncomment the line below. 143 | //if (abs(v[a]) == abs(v[b])) return a < b; 144 | return abs(v[a]) > abs(v[b]); 145 | } 146 | }; 147 | 148 | /*! \brief Functor for determining if a variable is below the tolerance given. 149 | \param v the vector that contains the values being checked. 150 | \param eps the tolerance given. 151 | */ 152 | template 153 | struct by_tolerance { 154 | const vector& v; 155 | double eps; 156 | by_tolerance(const vector& vec, const double& eps) : v(vec), eps(eps) {} 157 | inline bool operator()(int const &i) const { 158 | return abs(v[i]) < eps; 159 | } 160 | }; 161 | } 162 | 163 | /*! \brief Performs the dual-dropping criteria outlined in Li & Saad (2005). 164 | \param v the vector that whose elements will be selectively dropped. 165 | \param curr_nnzs the non-zeros in the vector v. 166 | \param lfil a parameter to control memory usage. Each column is guarannted to have fewer than lfil elements. 167 | \param tol a parameter to control agressiveness of dropping. Elements less than tol*norm(v) are dropped. 168 | */ 169 | template 170 | inline void drop_tol(vector& v, vector& curr_nnzs, const int& lfil, const double& tol) { 171 | //determine dropping tolerance. all elements with value less than tolerance = tol * norm(v) is dropped. 172 | el_type tolerance = tol*norm(v, curr_nnzs); 173 | const long double eps = 1e-13; //TODO: fix later. need to make this a global thing 174 | if (tolerance > eps) { 175 | for (idx_it it = curr_nnzs.begin(), end = curr_nnzs.end(); it != end; ++it) 176 | if (abs(v[*it]) < tolerance) v[*it] = 0; 177 | 178 | //sort the remaining elements by value in decreasing order. 179 | by_value sorter(v); 180 | std::sort(curr_nnzs.begin(), curr_nnzs.end(), sorter); 181 | } 182 | 183 | for (int i = lfil, end = curr_nnzs.size(); i < end ; ++i) { 184 | v[curr_nnzs[i]] = 0; 185 | } 186 | 187 | by_tolerance is_zero(v, eps); 188 | curr_nnzs.erase( remove_if(curr_nnzs.begin(), curr_nnzs.end(), is_zero), curr_nnzs.end() ); 189 | curr_nnzs.resize( std::min(lfil, (int) curr_nnzs.size()) ); 190 | //sort the first lfil elements by index, only these will be assigned into L. this part can be removed. 191 | std::sort(curr_nnzs.begin(), curr_nnzs.end()); 192 | } 193 | 194 | //----------------Column updates------------------// 195 | 196 | template 197 | inline void update_single(const int& k, const int& j, const el_type& l_ki, const el_type& d, vector& work, vector& curr_nnzs, lilc_matrix& L, vector& in_set) { 198 | //find where L(k, k+1:n) starts 199 | int i, offset = L.col_first[j]; 200 | 201 | L.ensure_invariant(j, k, L.m_idx[j]); 202 | 203 | el_type factor = l_ki * d; 204 | for (i = offset; i < L.m_idx[j].size(); ++i) { 205 | int x = L.m_idx[j][i]; 206 | work[x] -= factor * L.m_x[j][i]; 207 | if (!in_set[x]) { 208 | curr_nnzs.push_back(x); 209 | in_set[x] = true; 210 | } 211 | } 212 | } 213 | 214 | /*! \brief Performs a delayed update of subcolumn A(k:n,r). Result is stored in work vector. Nonzero elements of the work vector are stored in curr_nnzs. 215 | \param r the column number to be updated. 216 | \param work the vector for which all delayed-updates are computed to. 217 | \param curr_nnzs the nonzero elements of work. 218 | \param L the (partial) lower triangular factor of A. 219 | \param D the (partial) diagonal factor of A. 220 | \param in_set temporary storage for use in merging two lists of nonzero indices. 221 | */ 222 | template 223 | inline void update(const int& r, vector& work, vector& curr_nnzs, lilc_matrix& L, block_diag_matrix& D, vector& in_set) { 224 | unsigned int j; 225 | int blk_sz; 226 | el_type d_12, l_ri; 227 | 228 | // precord: in_set is all false. 229 | // TOOD: if we are for sure doing sequential code, make in_set a static var. 230 | for (int x : curr_nnzs) { 231 | in_set[x] = true; 232 | } 233 | 234 | //iterate across non-zeros of row k using Llist 235 | for (int i = 0; i < (int) L.list[r].size(); ++i) { 236 | j = L.list[r][i]; 237 | assert(j < r); 238 | l_ri = L.coeff(r, j, L.col_first[j]); 239 | 240 | update_single(r, j, l_ri, D[j], work, curr_nnzs, L, in_set); //update col using d11 241 | 242 | blk_sz = D.block_size(j); 243 | if (blk_sz == 2) { 244 | d_12 = D.off_diagonal(j); 245 | update_single(r, j + 1, l_ri, d_12, work, curr_nnzs, L, in_set); 246 | } else if (blk_sz == -2) { 247 | d_12 = D.off_diagonal(j-1); 248 | update_single(r, j - 1, l_ri, d_12, work, curr_nnzs, L, in_set); //update col using d12 249 | } 250 | 251 | } 252 | 253 | for (int x : curr_nnzs) { 254 | in_set[x] = false; 255 | } 256 | } 257 | 258 | //not needed anymore 259 | template 260 | inline void vec_add(vector& v1, vector& v1_nnzs, vector& v2, vector& v2_nnzs) { 261 | //merge current non-zeros of col k with nonzeros of col *it. 262 | inplace_union(v1_nnzs, v2_nnzs.begin(), v2_nnzs.end()); 263 | for (idx_it it = v1_nnzs.begin(), end = v1_nnzs.end(); it != end; ++it) { 264 | v1[*it] += v2[*it]; 265 | } 266 | } 267 | 268 | inline void safe_swap(vector& curr_nnzs, const int& k, const int& r) { 269 | bool con_k = false, con_r = false; 270 | vector::iterator k_idx, r_idx; 271 | for (idx_it it = curr_nnzs.begin(), end = curr_nnzs.end(); it != end; ++it) { 272 | if (*it == k) { 273 | con_k = true; 274 | k_idx = it; 275 | } 276 | 277 | if (*it == r) { 278 | con_r = true; 279 | r_idx = it; 280 | } 281 | } 282 | 283 | if (con_k) *k_idx = r; //if we have k we'll swap index to r 284 | if (con_r) *r_idx = k; //if we have r we'll swap index to k 285 | } 286 | 287 | #endif 288 | -------------------------------------------------------------------------------- /test/ILDLTest.cpp: -------------------------------------------------------------------------------- 1 | #include "ILDL/ILDL.h" 2 | #include "ILDL/ILDL_utils.h" 3 | 4 | #include "solver.h" 5 | 6 | #include 7 | #include 8 | 9 | #include "gtest/gtest.h" 10 | 11 | using namespace Preconditioners; 12 | using namespace std; 13 | 14 | class ILDLTest : public testing::Test { 15 | protected: 16 | /// Test configuration 17 | 18 | double rel_tol = 1e-6; // Relative error tolerance 19 | double eps = 1e-6; // Absolute error tolerance 20 | 21 | /// Test data 22 | 23 | // Coefficient matrix 24 | SparseMatrix A; 25 | 26 | // Test vector x 27 | Vector xtest; 28 | 29 | // Pardiso options struct 30 | ILDLOpts opts; 31 | 32 | ILDL Afact; 33 | 34 | void SetUp() override { 35 | /// Set the upper triangle of A to be: 36 | /// 37 | /// A = 1 2 0 3 38 | /// -5 0 0 39 | /// 0 4 40 | /// 7 41 | 42 | SparseMatrix AUT(4, 4); 43 | AUT.resize(4, 4); 44 | 45 | AUT.insert(0, 0) = 1; 46 | AUT.insert(0, 1) = 2; 47 | AUT.insert(0, 3) = 3; 48 | 49 | AUT.insert(1, 1) = -5; 50 | 51 | AUT.insert(2, 3) = 4; 52 | 53 | AUT.insert(3, 3) = 7; 54 | 55 | A = AUT.selfadjointView(); 56 | 57 | // Randomly sample test vector x 58 | xtest = Vector::Random(A.rows()); 59 | 60 | /// Set factorization configurations 61 | // Setting max-fill to a be huge and drop tol = 0 results in an exact LDL 62 | // factorization 63 | opts.equilibration = Equilibration::Bunch; 64 | opts.order = Ordering::AMD; 65 | opts.pivot_type = PivotType::BunchKaufman; 66 | opts.max_fill_factor = 1e3; 67 | opts.BK_pivot_tol = 0; 68 | opts.drop_tol = 0; 69 | } 70 | }; 71 | 72 | TEST_F(ILDLTest, toCSR) { 73 | 74 | // Construct CSR representation of A 75 | 76 | std::vector row_ptr; 77 | std::vector col_idx; 78 | std::vector val; 79 | 80 | toCSR(A, row_ptr, col_idx, val); 81 | 82 | /// Verify that these vectors are what they should be 83 | 84 | // Check row_ptr 85 | std::vector row_ptr_true = {0, 3, 4, 5, 6}; 86 | for (size_t i = 0; i < row_ptr_true.size(); ++i) { 87 | EXPECT_EQ(row_ptr_true[i], row_ptr[i]); 88 | } 89 | 90 | // Check col_idx 91 | std::vector col_idx_true = {0, 1, 3, 1, 3, 3}; 92 | for (size_t i = 0; i < col_idx_true.size(); ++i) { 93 | EXPECT_EQ(col_idx_true[i], col_idx[i]); 94 | } 95 | 96 | // Check val 97 | std::vector val_true = {1, 2, 3, -5, 4, 7}; 98 | for (size_t i = 0; i < val_true.size(); ++i) { 99 | EXPECT_FLOAT_EQ(val_true[i], val[i]); 100 | } 101 | } 102 | 103 | /// Compute an *exact* LDL factorization, and verify that the elements P, S, L, 104 | /// and D are computed correctly 105 | TEST_F(ILDLTest, ExactFactorizationElements) { 106 | 107 | /// Compute factorization using SYM-ILDL's built-in solver 108 | 109 | // Construct CSR representation of A 110 | std::vector row_ptr; 111 | std::vector col_idx; 112 | std::vector val; 113 | toCSR(A, row_ptr, col_idx, val); 114 | 115 | symildl::solver solver; 116 | 117 | // Turn off messaging 118 | solver.msg_lvl = 0; 119 | 120 | // Load in initial matrix 121 | solver.load(row_ptr, col_idx, val); 122 | 123 | // Save initial matrix A 124 | solver.A.save("A.txt", true); 125 | 126 | // Set reordering scheme 127 | switch (opts.order) { 128 | case Ordering::AMD: 129 | solver.reorder_type = symildl::reordering_type::AMD; 130 | break; 131 | case Ordering::RCM: 132 | solver.reorder_type = symildl::reordering_type::RCM; 133 | break; 134 | case Ordering::None: 135 | solver.reorder_type = symildl::reordering_type::NONE; 136 | break; 137 | } 138 | 139 | // Set equilibration scheme 140 | solver.equil_type = (opts.equilibration == Equilibration::Bunch 141 | ? symildl::equilibration_type::BUNCH 142 | : symildl::equilibration_type::NONE); 143 | 144 | // Set pivoting type 145 | solver.piv_type = (opts.pivot_type == PivotType::Rook 146 | ? lilc_matrix::pivot_type::ROOK 147 | : lilc_matrix::pivot_type::BKP); 148 | 149 | solver.has_rhs = false; 150 | solver.perform_inplace = false; 151 | solver.solve(opts.max_fill_factor, opts.drop_tol, opts.BK_pivot_tol); 152 | 153 | /// Compute factorization using ILDL 154 | 155 | // Set factorization options 156 | Afact.setOptions(opts); 157 | 158 | // Compute factorization 159 | Afact.compute(A); 160 | 161 | // Extract elements of this factorization 162 | SparseMatrix D = Afact.D(); 163 | const SparseMatrix &L = Afact.L(); 164 | 165 | /// Ensure that the elements P, S, L, and D of the factorizations computed by 166 | /// SYM-ILDL and ILDL coincide 167 | 168 | /// Ensure that the permutations P agree 169 | EXPECT_EQ(Afact.P().size(), solver.perm.size()); 170 | for (int k = 0; k < Afact.P().size(); ++k) 171 | EXPECT_EQ(Afact.P()(k), solver.perm[k]); 172 | 173 | /// Ensure that the scaling matrices agree 174 | EXPECT_EQ(Afact.S().size(), solver.A.S.main_diag.size()); 175 | for (int k = 0; k < Afact.S().size(); ++k) 176 | EXPECT_FLOAT_EQ(Afact.S()(k), solver.A.S.main_diag[k]); 177 | 178 | /// Ensure that the lower-triangular factors agree 179 | EXPECT_EQ(Afact.L().nonZeros(), solver.L.nnz()); 180 | for (int k = 0; k < Afact.L().outerSize(); ++k) 181 | for (SparseMatrix::InnerIterator it(Afact.L(), k); it; ++it) 182 | EXPECT_FLOAT_EQ(it.value(), solver.L.coeff(it.row(), it.col())); 183 | 184 | /// Ensure that the block-diagonal matrices D agree 185 | 186 | // Extract lower triangle from D 187 | SparseMatrix DLT = D.triangularView(); 188 | EXPECT_EQ(DLT.nonZeros(), solver.D.nnz()); 189 | for (int k = 0; k < DLT.outerSize(); ++k) 190 | for (SparseMatrix::InnerIterator it(DLT, k); it; ++it) { 191 | int i = it.row(); 192 | int j = it.col(); 193 | if (i == j) { 194 | // This is a diagonal element 195 | EXPECT_LT(fabs(it.value() - solver.D.main_diag.at(i)), eps); 196 | } else { 197 | // This is the off-diagonal element *below* the element D(j,j) 198 | EXPECT_LT(fabs(it.value() - solver.D.off_diag.at(j)), eps); 199 | } 200 | } 201 | 202 | /// Save the matrices constructed by the SYM-ILDL solver 203 | 204 | std::ofstream perm_file("P.txt"); 205 | for (const auto &i : solver.perm) 206 | perm_file << i << " "; 207 | perm_file << std::endl; 208 | perm_file.close(); 209 | 210 | solver.A.S.save("S.txt"); 211 | solver.L.save("L.txt", true); 212 | solver.D.save("D.txt"); 213 | 214 | /// Verify that the elements of the factorization satisfy P'SASP = LDL' 215 | 216 | SparseMatrix SAS = Afact.S().asDiagonal() * A * Afact.S().asDiagonal(); 217 | SparseMatrix PtSASP; 218 | PtSASP = SAS.twistedBy(Afact.P().asPermutation().inverse()); 219 | 220 | SparseMatrix LDLt = Afact.L() * Afact.D() * Afact.L().transpose(); 221 | 222 | EXPECT_LT((PtSASP - LDLt).norm(), rel_tol * PtSASP.norm()); 223 | } 224 | 225 | /// Compute a modified LDL factorization, modifying D to ensure that it is 226 | /// positive-definite 227 | TEST_F(ILDLTest, PositiveDefiniteModification) { 228 | 229 | // Set factorization options 230 | Afact.setOptions(opts); 231 | 232 | // Compute factorization 233 | Afact.compute(A); 234 | 235 | // Extract diagonal matrix D 236 | Matrix D = Afact.D(); 237 | 238 | // Extract diagonal matrix D, modifying it to ensure positive-definiteness 239 | Matrix Dpos = Afact.D(true); 240 | 241 | // Compute the matrix P = Dinv*Dpos 242 | Matrix P = D.inverse() * Dpos; 243 | 244 | /// We modify D by taking the absolute values of its eigenvalues -- therefore, 245 | /// the only eigenvalues of Dinv*Dpos should be +/- 1 246 | Eigen::EigenSolver eigs(P); 247 | 248 | for (int k = 0; k < eigs.eigenvalues().size(); ++k) 249 | EXPECT_TRUE(fabs(eigs.eigenvalues()(k) - 1.0) < eps || 250 | (fabs(eigs.eigenvalues()(k) + 1.0) < eps)); 251 | } 252 | 253 | TEST_F(ILDLTest, Inertia) { 254 | 255 | // Set factorization options 256 | Afact.setOptions(opts); 257 | 258 | // Compute factorization 259 | Afact.compute(A); 260 | 261 | Inertia inertia = Afact.inertia(); 262 | 263 | // The test matrix A has 2 positive and 2 negative eigenvalues 264 | EXPECT_EQ(inertia.first, 2); 265 | EXPECT_EQ(inertia.second, 2); 266 | } 267 | 268 | /// Test computation of products with the diagonal matrix D 269 | TEST_F(ILDLTest, DProduct) { 270 | 271 | // Set factorization options 272 | Afact.setOptions(opts); 273 | 274 | // Compute factorization 275 | Afact.compute(A); 276 | 277 | // Extract diagonal matrix D 278 | Matrix D = Afact.D(); 279 | 280 | // Extract diagonal matrix D, modifying it to ensure positive-definiteness 281 | Matrix Dpos = Afact.D(true); 282 | 283 | /// Test computation of products with diagonal matrix 284 | Vector ygt = D * xtest; 285 | Vector y = Afact.Dproduct(xtest); 286 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 287 | 288 | /// Test computation of products with positive-definite modification of 289 | /// diagonal matrix 290 | ygt = Dpos * xtest; 291 | y = Afact.Dproduct(xtest, true); 292 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 293 | } 294 | 295 | /// Test solving linear systems of the form Dx = b 296 | TEST_F(ILDLTest, Dsolve) { 297 | 298 | // Set factorization options 299 | Afact.setOptions(opts); 300 | 301 | // Compute factorization 302 | Afact.compute(A); 303 | 304 | // Extract diagonal matrix D 305 | Matrix D = Afact.D(); 306 | 307 | // Extract diagonal matrix D, modifying it to ensure positive-definiteness 308 | Matrix Dpos = Afact.D(true); 309 | 310 | /// Test computation of products with diagonal matrix 311 | Vector ygt = D.inverse() * xtest; 312 | Vector y = Afact.Dsolve(xtest); 313 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 314 | 315 | /// Test computation of products with positive-definite modification of 316 | /// diagonal matrix 317 | ygt = Dpos.inverse() * xtest; 318 | y = Afact.Dsolve(xtest, true); 319 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 320 | } 321 | 322 | /// Test solving linear systems of the form (D+)^{1/2} x = b, were D+ is the 323 | /// positive-definite modification of the block-diagonal matrix Ds 324 | TEST_F(ILDLTest, sqrtDsolve) { 325 | 326 | // Set factorization options 327 | Afact.setOptions(opts); 328 | 329 | // Compute factorization 330 | Afact.compute(A); 331 | 332 | // Extract diagonal matrix D 333 | Matrix D = Afact.D(); 334 | 335 | // Compute symmetric eigendecomposition of D 336 | Eigen::SelfAdjointEigenSolver eig(D); 337 | 338 | Matrix Q = eig.eigenvectors(); 339 | Vector Lambda = eig.eigenvalues(); 340 | 341 | // Compute ground-truth solution ygt 342 | Vector ygt = Q * Lambda.cwiseAbs().cwiseSqrt().cwiseInverse().asDiagonal() * 343 | Q.transpose() * xtest; 344 | 345 | Vector y = Afact.sqrtDsolve(xtest); 346 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 347 | } 348 | 349 | /// Test solving linear systems of the form LDL' x = b 350 | TEST_F(ILDLTest, LDLTsolve) { 351 | 352 | // Set factorization options 353 | Afact.setOptions(opts); 354 | 355 | // Compute factorization 356 | Afact.compute(A); 357 | 358 | // Extract diagonal matrix D 359 | Matrix D = Afact.D(); 360 | Matrix Dpos = Afact.D(true); 361 | 362 | Matrix LDLt = Afact.L() * D * Afact.L().transpose(); 363 | Matrix LDposLt = Afact.L() * Dpos * Afact.L().transpose(); 364 | 365 | /// Compute ground-truth solution 366 | Vector ygt = LDLt.inverse() * xtest; 367 | 368 | Vector y = Afact.LDLTsolve(xtest); 369 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 370 | 371 | /// Compute ground-truth solution for positive-definite modification 372 | ygt = LDposLt.inverse() * xtest; 373 | 374 | y = Afact.LDLTsolve(xtest, true); 375 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 376 | } 377 | 378 | /// Test solving linear systems of the form (D+)^{1/2} L' x = b 379 | TEST_F(ILDLTest, sqrtDLTsolve) { 380 | 381 | // Set factorization options 382 | Afact.setOptions(opts); 383 | 384 | // Compute factorization 385 | Afact.compute(A); 386 | 387 | // Extract diagonal matrix Dpos 388 | Matrix Dpos = Afact.D(true); 389 | 390 | // Compute symmetric square square root of D 391 | Eigen::SelfAdjointEigenSolver eig(Dpos); 392 | Matrix sqrtD = eig.eigenvectors() * 393 | eig.eigenvalues().cwiseSqrt().asDiagonal() * 394 | eig.eigenvectors().transpose(); 395 | 396 | Matrix sqrtDLt = sqrtD * Afact.L().transpose(); 397 | Matrix sqrtDLt_inv = sqrtDLt.inverse(); 398 | 399 | /// Compute ground-truth solution for (D+)^{1/2} L' x = b 400 | Vector ygt = sqrtDLt_inv * xtest; 401 | Vector y = Afact.sqrtDLTsolve(xtest); 402 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 403 | 404 | /// Compute ground-truth solution for L(D+)^{1/2} x = b 405 | ygt = sqrtDLt_inv.transpose() * xtest; 406 | y = Afact.sqrtDLTsolve(xtest, true); 407 | EXPECT_LT((ygt - y).norm(), rel_tol * ygt.norm()); 408 | } 409 | 410 | /// Test approximate solution of Ax = b using incomplete factorization 411 | TEST_F(ILDLTest, solve) { 412 | 413 | // Set factorization options 414 | Afact.setOptions(opts); 415 | 416 | // Compute factorization 417 | Afact.compute(A); 418 | 419 | // Compute A^-1 420 | Matrix Ainv_gt = A.toDense().inverse(); 421 | 422 | // Compute A^-1 by applying the preconditioner Afact to each column of the 423 | // identity matrix Id 424 | Matrix Id = Matrix::Identity(A.rows(), A.cols()); 425 | 426 | Matrix Ainv(Afact.dim(), Afact.dim()); 427 | 428 | for (int k = 0; k < Afact.dim(); ++k) 429 | Ainv.col(k) = Afact.solve(Id.col(k)); 430 | 431 | EXPECT_LT((Ainv - Ainv_gt).norm(), rel_tol * Ainv_gt.norm()); 432 | } 433 | -------------------------------------------------------------------------------- /Preconditioners/ILDL/src/ILDL.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "Eigen/Eigenvalues" 5 | #include "ILDL/ILDL.h" 6 | #include "ILDL/ILDL_utils.h" 7 | 8 | namespace Preconditioners { 9 | 10 | /// Constructors 11 | 12 | // Basic constructor: just set the options 13 | ILDL::ILDL(const ILDLOpts &options) { setOptions(options); } 14 | 15 | // Advanced constructor: set the options, and then call compute() function to 16 | // factor the passed matrix A 17 | ILDL::ILDL(const SparseMatrix &A, const ILDLOpts &options) { 18 | setOptions(options); 19 | compute(A); 20 | } 21 | 22 | void ILDL::setOptions(const ILDLOpts &options) { 23 | /// Release any currently-held (cached) factorizations 24 | clear(); 25 | 26 | /// Input checking 27 | 28 | if (options.max_fill_factor <= 0) 29 | throw std::invalid_argument("Maximum fill-factor must be a positive value"); 30 | 31 | if (options.drop_tol < 0 || options.drop_tol > 1) 32 | throw std::invalid_argument( 33 | "Drop tolerance must be a value in the range [0,1]"); 34 | 35 | if (options.BK_pivot_tol < 0 || options.BK_pivot_tol > 1) 36 | throw std::invalid_argument( 37 | "Bunch-Kaufman pivoting tolerance must be a value in the range [0,1]"); 38 | 39 | // Save the passed options 40 | opts_ = options; 41 | } 42 | 43 | void ILDL::compute(const SparseMatrix &A) { 44 | 45 | // If we already have a cached factorization stored ... 46 | if (initialized_) { 47 | // Release it 48 | clear(); 49 | } 50 | 51 | /// Argument checking 52 | if (A.rows() != A.cols()) 53 | throw std::invalid_argument("Argument A must be a symmetric matrix!"); 54 | 55 | // Dimension of A 56 | dim_ = A.rows(); 57 | 58 | /// Preallocate storage to hold the incomplete factorization of A, computed 59 | /// using SYM-ILDL 60 | 61 | // LIL-C representation of A used by SYM-ILDL 62 | lilc_matrix Alilc; 63 | 64 | // Lower-triangular factor L 65 | lilc_matrix L; 66 | 67 | // Block-diagonal factor D 68 | block_diag_matrix D; 69 | 70 | // Row/col permutation 71 | std::vector perm(dim_); 72 | 73 | /// Construct representation of A 74 | 75 | // Construct CSR representation of 76 | std::vector row_ptr, col_idx; 77 | std::vector val; 78 | toCSR(A, row_ptr, col_idx, val); 79 | 80 | // Construct SYM-ILDL representation of passed matrix A. Note that although 81 | // SYM-ILDL expects compressed COLUMN storage arguments, here we take 82 | // advantage of the fact that the CSR representation of A's UPPER TRIANGLE 83 | // actually coincides with the CSC representation of A's LOWER TRIANGLE :-) 84 | Alilc.load(row_ptr, col_idx, val); 85 | 86 | /// Equilibrate A using a diagonal scaling matrix S, if requested. 87 | // This will overwrite Alilc with SAS, and save the diagonal scaling matrix as 88 | // Alilc.S 89 | if (opts_.equilibration == Equilibration::Bunch) 90 | Alilc.sym_equil(); 91 | 92 | /// Record scaling matrix S 93 | S_.resize(dim_); 94 | for (int k = 0; k < dim_; ++k) 95 | S_(k) = Alilc.S.main_diag[k]; 96 | 97 | /// Compute fill-reducing reordering of A, if requested 98 | switch (opts_.order) { 99 | case Ordering::AMD: 100 | Alilc.sym_amd(perm); 101 | break; 102 | case Ordering::RCM: 103 | Alilc.sym_rcm(perm); 104 | break; 105 | case Ordering::None: 106 | // Set perm to be the identity permutation 107 | perm.resize(dim_); 108 | for (int k = 0; k < dim_; ++k) 109 | perm[k] = k; 110 | break; 111 | } 112 | 113 | // Apply this permutation to A_, if one was requested 114 | if (opts_.order != Ordering::None) 115 | Alilc.sym_perm(perm); 116 | 117 | /// Compute in-place LDL factorization of P*S*A*S*P 118 | Alilc.ildl(L, D, perm, opts_.max_fill_factor, opts_.drop_tol, 119 | opts_.BK_pivot_tol, 120 | (opts_.pivot_type == PivotType::Rook 121 | ? lilc_matrix::pivot_type::ROOK 122 | : lilc_matrix::pivot_type::BKP)); 123 | 124 | /// Record the final permutation in P and Pinv 125 | P_.resize(dim_); 126 | Pinv_.resize(dim_); 127 | for (int k = 0; k < dim_; ++k) { 128 | P_(k) = perm[k]; 129 | Pinv_[P_(k)] = k; 130 | } 131 | 132 | /// Construct lower-triangular Eigen matrix L_ from L 133 | std::vector> triplets; 134 | triplets.reserve(L.nnz()); 135 | 136 | // From the lilc_matrix documentation: A(m_idx[k][j], k) = m_x[k][j] 137 | for (int k = 0; k < L.n_cols(); ++k) 138 | for (int j = 0; j < L.m_idx[k].size(); ++j) 139 | triplets.emplace_back(L.m_idx[k][j], k, L.m_x[k][j]); 140 | 141 | L_.resize(dim_, dim_); 142 | L_.setFromTriplets(triplets.begin(), triplets.end()); 143 | 144 | /// Construct and record eigendecomposition for block diagonal matrix D 145 | 146 | // Get the number of 1- and 2-d blocks in D 147 | size_t num_2d_blocks = D.off_diag.size(); 148 | size_t num_1d_blocks = dim_ - 2 * num_2d_blocks; 149 | size_t num_blocks = num_1d_blocks + num_2d_blocks; 150 | 151 | // Preallocate storage for this computation 152 | Lambda_.resize(dim_); 153 | block_start_idxs_.resize(num_blocks); 154 | block_sizes_.resize(num_blocks); 155 | 156 | // 2x2 matrix we will use to store any 2x2 blocks of D 157 | Matrix2d Di; 158 | // Eigensolver for computing an eigendecomposition of the 2x2 blocks of D 159 | Eigen::SelfAdjointEigenSolver eig; 160 | 161 | int idx = 0; // Starting (upper-left) index of the current block 162 | for (size_t i = 0; i < num_blocks; ++i) { 163 | // Record the starting index of this block 164 | block_start_idxs_[i] = idx; 165 | 166 | if (D.block_size(idx) > 1) { 167 | // This is a 2x2 block 168 | block_sizes_[i] = 2; 169 | 170 | // Extract 2x2 block from D 171 | 172 | // Extract diagonal elements 173 | Di(0, 0) = D.main_diag[idx]; 174 | Di(1, 1) = D.main_diag[idx + 1]; 175 | // Extract off-diagonal elements 176 | Di(0, 1) = D.off_diag.at(idx); 177 | Di(1, 0) = D.off_diag.at(idx); 178 | 179 | // Compute eigendecomposition of Di 180 | eig.compute(Di); 181 | 182 | // Record eigenvalues of this block 183 | Lambda_.segment<2>(idx) = eig.eigenvalues(); 184 | 185 | // Record eigenvectors of this block 186 | Q_[i] = eig.eigenvectors(); 187 | 188 | // Increment index 189 | idx += 2; 190 | } else { 191 | /// This is a 1x1 block 192 | block_sizes_[i] = 1; 193 | 194 | // Record eigenvalue 195 | Lambda_(idx) = D.main_diag[idx]; 196 | 197 | // Increment index 198 | ++idx; 199 | } 200 | } 201 | 202 | // Record the fact that we now have a valid cached factorization 203 | initialized_ = true; 204 | } 205 | 206 | void ILDL::clear() { 207 | 208 | // If we have a cached factorization ... 209 | if (initialized_) { 210 | // Release the memory associated with this factorization 211 | block_start_idxs_.clear(); 212 | block_sizes_.clear(); 213 | Q_.clear(); 214 | } 215 | 216 | // Record the fact that we no longer have a valid cached factorization 217 | initialized_ = false; 218 | } 219 | 220 | SparseMatrix ILDL::D(bool pos_def_mod) const { 221 | 222 | if (!initialized_) 223 | throw std::invalid_argument("Factorization has not yet been computed"); 224 | 225 | // We rebuild D from its eigendecomposition according to whether we are 226 | // enforcing positive-definiteness 227 | 228 | std::vector> triplets; 229 | triplets.reserve(dim_ + 2 * num_2x2_blocks()); 230 | 231 | // Preallocate working variables 232 | int idx; // Starting index of current block 233 | 234 | Matrix2d Di; // Working space for reconstructing 2x2 blocks 235 | 236 | // Iterate over the blocks of D 237 | for (size_t i = 0; i < num_blocks(); ++i) { 238 | idx = block_start_idxs_[i]; 239 | if (block_sizes_[i] == 1) { 240 | triplets.emplace_back(idx, idx, 241 | pos_def_mod ? fabs(Lambda_(idx)) : Lambda_(idx)); 242 | } else { 243 | // Reconstruct the 2x2 block here 244 | const Matrix2d &Qi = Q_.at(i); 245 | 246 | if (pos_def_mod) 247 | Di = Qi * Lambda_.segment<2>(idx).cwiseAbs().asDiagonal() * 248 | Qi.transpose(); 249 | else 250 | Di = Qi * Lambda_.segment<2>(idx).asDiagonal() * Qi.transpose(); 251 | 252 | for (int r = 0; r < 2; ++r) 253 | for (int c = 0; c < 2; ++c) 254 | triplets.emplace_back(idx + r, idx + c, Di(r, c)); 255 | } 256 | } 257 | 258 | /// Reconstruct and return D 259 | SparseMatrix D(dim_, dim_); 260 | D.setFromTriplets(triplets.begin(), triplets.end()); 261 | 262 | return D; 263 | } 264 | 265 | Inertia ILDL::inertia() const { 266 | if (!initialized_) 267 | throw std::invalid_argument("Factorization has not yet been computed"); 268 | 269 | // Calculate number of positive eigenvalues 270 | size_t npos = (Lambda_.array() > 0.0).count(); 271 | 272 | return std::make_pair(npos, dim_ - npos); 273 | } 274 | 275 | Vector ILDL::Dproduct(const Vector &x, bool pos_def_mod) const { 276 | /// Error checking 277 | if (!initialized_) 278 | throw std::invalid_argument("Factorization has not yet been computed"); 279 | 280 | if (x.size() != dim_) 281 | throw std::invalid_argument("Argument x has incorrect dimension"); 282 | 283 | // Preallocate output vector y = D*x 284 | Vector y(dim_); 285 | 286 | // We compute the output vector y blockwise 287 | for (int k = 0; k < num_blocks(); ++k) { 288 | 289 | // Get the starting index for this block 290 | const int &idx = block_start_idxs_[k]; 291 | 292 | if (block_sizes_[k] == 1) { 293 | // This is a 1x1 block 294 | y(idx) = (pos_def_mod ? fabs(Lambda_[idx]) : Lambda_[idx]) * x(idx); 295 | } else { 296 | // This is a 2x2 block 297 | const Matrix2d &Qk = Q_.at(k); 298 | 299 | if (!pos_def_mod) { 300 | y.segment<2>(idx) = Qk * Lambda_.segment<2>(idx).asDiagonal() * 301 | Qk.transpose() * x.segment<2>(idx); 302 | } else { 303 | // Take the absolute values of the eigenvalues of this block to enforce 304 | // positive-definiteness 305 | y.segment<2>(idx) = Qk * 306 | Lambda_.segment<2>(idx).cwiseAbs().asDiagonal() * 307 | Qk.transpose() * x.segment<2>(idx); 308 | } 309 | } 310 | } 311 | 312 | return y; 313 | } 314 | 315 | Vector ILDL::Dsolve(const Vector &b, bool pos_def_mod) const { 316 | /// Error checking 317 | if (!initialized_) 318 | throw std::invalid_argument("Factorization has not yet been computed"); 319 | 320 | if (b.size() != dim_) 321 | throw std::invalid_argument("Argument b has incorrect dimension"); 322 | 323 | // Preallocate output vector x = D^-1 * b 324 | Vector x(dim_); 325 | 326 | // We compute the output vector y blockwise 327 | for (int k = 0; k < num_blocks(); ++k) { 328 | 329 | // Get the starting index for this block 330 | const int &idx = block_start_idxs_[k]; 331 | 332 | if (block_sizes_[k] == 1) { 333 | // This is a 1x1 block 334 | x(idx) = b(idx) / (pos_def_mod ? fabs(Lambda_[idx]) : Lambda_[idx]); 335 | } else { 336 | // This is a 2x2 block 337 | const Matrix2d &Qk = Q_.at(k); 338 | 339 | if (!pos_def_mod) { 340 | x.segment<2>(idx) = 341 | Qk * Lambda_.segment<2>(idx).cwiseInverse().asDiagonal() * 342 | Qk.transpose() * b.segment<2>(idx); 343 | } else { 344 | // Take the absolute values of the eigenvalues of this block to enforce 345 | // positive-definiteness 346 | x.segment<2>(idx) = 347 | Qk * 348 | Lambda_.segment<2>(idx).cwiseInverse().cwiseAbs().asDiagonal() * 349 | Qk.transpose() * b.segment<2>(idx); 350 | } 351 | } 352 | } 353 | 354 | return x; 355 | } 356 | 357 | Vector ILDL::sqrtDsolve(const Vector &b) const { 358 | /// Error checking 359 | if (!initialized_) 360 | throw std::invalid_argument("Factorization has not yet been computed"); 361 | 362 | if (b.size() != dim_) 363 | throw std::invalid_argument("Argument b has incorrect dimension"); 364 | 365 | // Preallocate output vector x = (D+)^{-1/2} * b 366 | Vector x(dim_); 367 | 368 | // We compute the output vector y blockwise 369 | for (int k = 0; k < num_blocks(); ++k) { 370 | 371 | // Get the starting index for this block 372 | const int &idx = block_start_idxs_[k]; 373 | 374 | if (block_sizes_[k] == 1) { 375 | // This is a 1x1 block 376 | x(idx) = b(idx) / sqrt(fabs(Lambda_[idx])); 377 | } else { 378 | // This is a 2x2 block 379 | const Matrix2d &Qk = Q_.at(k); 380 | 381 | // Take the absolute values of the eigenvalues of this block to enforce 382 | // positive-definiteness 383 | x.segment<2>(idx) = Qk * 384 | Lambda_.segment<2>(idx) 385 | .cwiseAbs() 386 | .cwiseSqrt() 387 | .cwiseInverse() 388 | .asDiagonal() * 389 | Qk.transpose() * b.segment<2>(idx); 390 | } 391 | } 392 | 393 | return x; 394 | } 395 | 396 | Vector ILDL::LDLTsolve(const Vector &b, bool pos_def_mode) const { 397 | /// Error checking 398 | if (!initialized_) 399 | throw std::invalid_argument("Factorization has not yet been computed"); 400 | 401 | if (b.size() != dim_) 402 | throw std::invalid_argument("Argument b has incorrect dimension"); 403 | 404 | return L_.transpose().triangularView().solve( 405 | Dsolve(L_.triangularView().solve(b), pos_def_mode)); 406 | } 407 | 408 | Vector ILDL::sqrtDLTsolve(const Vector &b, bool transpose) const { 409 | if (!transpose) 410 | return L_.transpose().triangularView().solve( 411 | sqrtDsolve(b)); 412 | else 413 | return sqrtDsolve(L_.triangularView().solve(b)); 414 | } 415 | 416 | Vector ILDL::solve(const Vector &b, bool pos_def_mod) const { 417 | /// If P'SASP ~ LDL', then A^-1 ~ SP (LDL')^-1 P'S 418 | return S_.cwiseProduct( 419 | P_.asPermutation() * 420 | LDLTsolve(Pinv_.asPermutation() * S_.cwiseProduct(b), pos_def_mod)); 421 | } 422 | 423 | } // namespace Preconditioners 424 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_sym_amd.h: -------------------------------------------------------------------------------- 1 | //-*- mode: c++ -*- 2 | #ifndef _LIL_MATRIX_SYM_AMD_H_ 3 | #define _LIL_MATRIX_SYM_AMD_H_ 4 | 5 | //adapted from EIGEN 6 | //need to include license file later 7 | 8 | namespace amd { 9 | inline int amd_flip(const int& i) { return -i-2; } 10 | 11 | /* clear w */ 12 | inline int wclear (int mark, int lemax, int *w, int n) 13 | { 14 | int k; 15 | if(mark < 2 || (mark + lemax < 0)) 16 | { 17 | for(k = 0; k < n; k++) 18 | if(w[k] != 0) 19 | w[k] = 1; 20 | mark = 2; 21 | } 22 | return (mark); /* at this point, w[0..n-1] < mark holds */ 23 | } 24 | 25 | /* depth-first search and postorder of a tree rooted at node j */ 26 | inline int tdfs(int j, int k, int *head, const int *next, int *post, int *stack) 27 | { 28 | int i, p, top = 0; 29 | if(!head || !next || !post || !stack) return (-1); /* check inputs */ 30 | stack[0] = j; /* place j on the stack */ 31 | while (top >= 0) /* while (stack is not empty) */ 32 | { 33 | p = stack[top]; /* p = top of stack */ 34 | i = head[p]; /* i = youngest child of p */ 35 | if(i == -1) 36 | { 37 | top--; /* p has no unordered children left */ 38 | post[k++] = p; /* node p is the kth postordered node */ 39 | } 40 | else 41 | { 42 | head[p] = next[i]; /* remove i from children of p */ 43 | stack[++top] = i; /* start dfs on child node i */ 44 | } 45 | } 46 | return k; 47 | } 48 | } 49 | 50 | /** \internal 51 | * Approximate minimum degree ordering algorithm. 52 | * \returns the permutation P reducing the fill-in of the input matrix \a C 53 | * The input matrix \a C must be a selfadjoint compressed column major SparseMatrix object. Both the upper and lower parts have to be stored, but the diagonal entries are optional. 54 | * On exit the values of C are destroyed */ 55 | template 56 | inline void lilc_matrix :: sym_amd(vector& perm) { 57 | using std::sqrt; 58 | using std::min; 59 | using std::max; 60 | 61 | int d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1, 62 | k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi, 63 | ok, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, t; 64 | unsigned int h; 65 | 66 | int n = this->n_rows(); 67 | dense = max(16, int(10 * sqrt(double(n)))); /* find dense threshold */ 68 | dense = min(n-2, dense); 69 | 70 | perm.resize(n); 71 | 72 | int* W = new int[8*(n+1)]; /* get workspace */ 73 | int* len = W; 74 | int* nv = W + (n+1); 75 | int* next = W + 2*(n+1); 76 | int* head = W + 3*(n+1); 77 | int* elen = W + 4*(n+1); 78 | int* degree = W + 5*(n+1); 79 | int* w = W + 6*(n+1); 80 | int* hhead = W + 7*(n+1); 81 | 82 | int* temp_perm = new int[n+1]; 83 | int* last = temp_perm; //using temp_perms storage space as workspace for last 84 | 85 | /* --- Initialize quotient graph ---------------------------------------- */ 86 | int cnz = this->nnz(); 87 | t = 3*cnz + 3*cnz/5 + 2*n; /* add elbow room to C */ 88 | int* Cp = new int[n+1]; 89 | int* Ci = new int[t]; 90 | 91 | int cnt = 0; 92 | Cp[0] = 0; 93 | for (int i = 0; i < n; i++) { 94 | for (int j = 0; j < (int) list[i].size(); j++) { 95 | Ci[cnt++] = list[i][j]; 96 | } 97 | for (int j = 0; j < (int) m_idx[i].size(); j++) { 98 | Ci[cnt++] = m_idx[i][j]; 99 | } 100 | Cp[i+1] = cnt; 101 | } 102 | cnz = cnt; 103 | 104 | for(k = 0; k < n; k++) 105 | len[k] = Cp[k+1] - Cp[k]; 106 | len[n] = 0; 107 | nzmax = t; 108 | 109 | for(i = 0; i <= n; i++) 110 | { 111 | head[i] = -1; // degree list i is empty 112 | last[i] = -1; 113 | next[i] = -1; 114 | hhead[i] = -1; // hash list i is empty 115 | nv[i] = 1; // node i is just one node 116 | w[i] = 1; // node i is alive 117 | elen[i] = 0; // Ek of node i is empty 118 | degree[i] = len[i]; // degree of node i 119 | } 120 | mark = amd::wclear(0, 0, w, n); /* clear w */ 121 | elen[n] = -2; /* n is a dead element */ 122 | Cp[n] = -1; /* n is a root of assembly tree */ 123 | w[n] = 0; /* n is a dead element */ 124 | 125 | /* --- Initialize degree lists ------------------------------------------ */ 126 | for(i = 0; i < n; i++) 127 | { 128 | d = degree[i]; 129 | if(d == 0) /* node i is empty */ 130 | { 131 | elen[i] = -2; /* element i is dead */ 132 | nel++; 133 | Cp[i] = -1; /* i is a root of assembly tree */ 134 | w[i] = 0; 135 | } 136 | else if(d > dense) /* node i is dense */ 137 | { 138 | nv[i] = 0; /* absorb i into element n */ 139 | elen[i] = -1; /* node i is dead */ 140 | nel++; 141 | Cp[i] = amd::amd_flip (n); 142 | nv[n]++; 143 | } 144 | else 145 | { 146 | if(head[d] != -1) last[head[d]] = i; 147 | next[i] = head[d]; /* put node i in degree list d */ 148 | head[d] = i; 149 | } 150 | } 151 | 152 | while (nel < n) /* while (selecting pivots) do */ 153 | { 154 | /* --- Select node of minimum approximate degree -------------------- */ 155 | for(k = -1; mindeg < n && (k = head[mindeg]) == -1; mindeg++) {} 156 | if(next[k] != -1) last[next[k]] = -1; 157 | head[mindeg] = next[k]; /* remove k from degree list */ 158 | elenk = elen[k]; /* elenk = |Ek| */ 159 | nvk = nv[k]; /* # of nodes k represents */ 160 | nel += nvk; /* nv[k] nodes of A eliminated */ 161 | 162 | /* --- Garbage collection ------------------------------------------- */ 163 | if(elenk > 0 && cnz + mindeg >= nzmax) 164 | { 165 | for(j = 0; j < n; j++) 166 | { 167 | if((p = Cp[j]) >= 0) /* j is a live node or element */ 168 | { 169 | Cp[j] = Ci[p]; /* save first entry of object */ 170 | Ci[p] = amd::amd_flip (j); /* first entry is now amd::amd_flip(j) */ 171 | } 172 | } 173 | for(q = 0, p = 0; p < cnz; ) /* scan all of memory */ 174 | { 175 | if((j = amd::amd_flip (Ci[p++])) >= 0) /* found object j */ 176 | { 177 | Ci[q] = Cp[j]; /* restore first entry of object */ 178 | Cp[j] = q++; /* new pointer to object j */ 179 | for(k3 = 0; k3 < len[j]-1; k3++) Ci[q++] = Ci[p++]; 180 | } 181 | } 182 | cnz = q; /* Ci[cnz...nzmax-1] now free */ 183 | } 184 | 185 | /* --- Construct new element ---------------------------------------- */ 186 | dk = 0; 187 | nv[k] = -nvk; /* flag k as in Lk */ 188 | p = Cp[k]; 189 | pk1 = (elenk == 0) ? p : cnz; /* do in place if elen[k] == 0 */ 190 | pk2 = pk1; 191 | 192 | for(k1 = 1; k1 <= elenk + 1; k1++) 193 | { 194 | if(k1 > elenk) 195 | { 196 | e = k; /* search the nodes in k */ 197 | pj = p; /* list of nodes starts at Ci[pj]*/ 198 | ln = len[k] - elenk; /* length of list of nodes in k */ 199 | } 200 | else 201 | { 202 | e = Ci[p++]; /* search the nodes in e */ 203 | pj = Cp[e]; 204 | ln = len[e]; /* length of list of nodes in e */ 205 | } 206 | for(k2 = 1; k2 <= ln; k2++) 207 | { 208 | i = Ci[pj++]; 209 | if((nvi = nv[i]) <= 0) continue; /* node i dead, or seen */ 210 | dk += nvi; /* degree[Lk] += size of node i */ 211 | nv[i] = -nvi; /* negate nv[i] to denote i in Lk*/ 212 | Ci[pk2++] = i; /* place i in Lk */ 213 | if(next[i] != -1) last[next[i]] = last[i]; 214 | if(last[i] != -1) /* remove i from degree list */ 215 | { 216 | next[last[i]] = next[i]; 217 | } 218 | else 219 | { 220 | head[degree[i]] = next[i]; 221 | } 222 | } 223 | if(e != k) 224 | { 225 | Cp[e] = amd::amd_flip (k); /* absorb e into k */ 226 | w[e] = 0; /* e is now a dead element */ 227 | } 228 | } 229 | if(elenk != 0) cnz = pk2; /* Ci[cnz...nzmax] is free */ 230 | degree[k] = dk; /* external degree of k - |Lk\i| */ 231 | Cp[k] = pk1; /* element k is in Ci[pk1..pk2-1] */ 232 | len[k] = pk2 - pk1; 233 | elen[k] = -2; /* k is now an element */ 234 | 235 | /* --- Find set differences ----------------------------------------- */ 236 | mark = amd::wclear(mark, lemax, w, n); /* clear w if necessary */ 237 | for(pk = pk1; pk < pk2; pk++) /* scan 1: find |Le\Lk| */ 238 | { 239 | i = Ci[pk]; 240 | if((eln = elen[i]) <= 0) continue;/* skip if elen[i] empty */ 241 | nvi = -nv[i]; /* nv[i] was negated */ 242 | wnvi = mark - nvi; 243 | for(p = Cp[i]; p <= Cp[i] + eln - 1; p++) /* scan Ei */ 244 | { 245 | e = Ci[p]; 246 | if(w[e] >= mark) 247 | { 248 | w[e] -= nvi; /* decrement |Le\Lk| */ 249 | } 250 | else if(w[e] != 0) /* ensure e is a live element */ 251 | { 252 | w[e] = degree[e] + wnvi; /* 1st time e seen in scan 1 */ 253 | } 254 | } 255 | } 256 | 257 | /* --- Degree update ------------------------------------------------ */ 258 | for(pk = pk1; pk < pk2; pk++) /* scan2: degree update */ 259 | { 260 | i = Ci[pk]; /* consider node i in Lk */ 261 | p1 = Cp[i]; 262 | p2 = p1 + elen[i] - 1; 263 | pn = p1; 264 | for(h = 0, d = 0, p = p1; p <= p2; p++) /* scan Ei */ 265 | { 266 | e = Ci[p]; 267 | if(w[e] != 0) /* e is an unabsorbed element */ 268 | { 269 | dext = w[e] - mark; /* dext = |Le\Lk| */ 270 | if(dext > 0) 271 | { 272 | d += dext; /* sum up the set differences */ 273 | Ci[pn++] = e; /* keep e in Ei */ 274 | h += e; /* compute the hash of node i */ 275 | } 276 | else 277 | { 278 | Cp[e] = amd::amd_flip (k); /* aggressive absorb. e->k */ 279 | w[e] = 0; /* e is a dead element */ 280 | } 281 | } 282 | } 283 | elen[i] = pn - p1 + 1; /* elen[i] = |Ei| */ 284 | p3 = pn; 285 | p4 = p1 + len[i]; 286 | for(p = p2 + 1; p < p4; p++) /* prune edges in Ai */ 287 | { 288 | j = Ci[p]; 289 | if((nvj = nv[j]) <= 0) continue; /* node j dead or in Lk */ 290 | d += nvj; /* degree(i) += |j| */ 291 | Ci[pn++] = j; /* place j in node list of i */ 292 | h += j; /* compute hash for node i */ 293 | } 294 | if(d == 0) /* check for mass elimination */ 295 | { 296 | Cp[i] = amd::amd_flip (k); /* absorb i into k */ 297 | nvi = -nv[i]; 298 | dk -= nvi; /* |Lk| -= |i| */ 299 | nvk += nvi; /* |k| += nv[i] */ 300 | nel += nvi; 301 | nv[i] = 0; 302 | elen[i] = -1; /* node i is dead */ 303 | } 304 | else 305 | { 306 | degree[i] = std::min(degree[i], d); /* update degree(i) */ 307 | Ci[pn] = Ci[p3]; /* move first node to end */ 308 | Ci[p3] = Ci[p1]; /* move 1st el. to end of Ei */ 309 | Ci[p1] = k; /* add k as 1st element in of Ei */ 310 | len[i] = pn - p1 + 1; /* new len of adj. list of node i */ 311 | h %= n; /* finalize hash of i */ 312 | next[i] = hhead[h]; /* place i in hash bucket */ 313 | hhead[h] = i; 314 | last[i] = h; /* save hash of i in last[i] */ 315 | } 316 | } /* scan2 is done */ 317 | degree[k] = dk; /* finalize |Lk| */ 318 | lemax = std::max(lemax, dk); 319 | mark = amd::wclear(mark+lemax, lemax, w, n); /* clear w */ 320 | 321 | /* --- Supernode detection ------------------------------------------ */ 322 | for(pk = pk1; pk < pk2; pk++) 323 | { 324 | i = Ci[pk]; 325 | if(nv[i] >= 0) continue; /* skip if i is dead */ 326 | h = last[i]; /* scan hash bucket of node i */ 327 | i = hhead[h]; 328 | hhead[h] = -1; /* hash bucket will be empty */ 329 | for(; i != -1 && next[i] != -1; i = next[i], mark++) 330 | { 331 | ln = len[i]; 332 | eln = elen[i]; 333 | for(p = Cp[i]+1; p <= Cp[i] + ln-1; p++) w[Ci[p]] = mark; 334 | jlast = i; 335 | for(j = next[i]; j != -1; ) /* compare i with all j */ 336 | { 337 | ok = (len[j] == ln) && (elen[j] == eln); 338 | for(p = Cp[j] + 1; ok && p <= Cp[j] + ln - 1; p++) 339 | { 340 | if(w[Ci[p]] != mark) ok = 0; /* compare i and j*/ 341 | } 342 | if(ok) /* i and j are identical */ 343 | { 344 | Cp[j] = amd::amd_flip (i); /* absorb j into i */ 345 | nv[i] += nv[j]; 346 | nv[j] = 0; 347 | elen[j] = -1; /* node j is dead */ 348 | j = next[j]; /* delete j from hash bucket */ 349 | next[jlast] = j; 350 | } 351 | else 352 | { 353 | jlast = j; /* j and i are different */ 354 | j = next[j]; 355 | } 356 | } 357 | } 358 | } 359 | 360 | /* --- Finalize new element------------------------------------------ */ 361 | for(p = pk1, pk = pk1; pk < pk2; pk++) /* finalize Lk */ 362 | { 363 | i = Ci[pk]; 364 | if((nvi = -nv[i]) <= 0) continue;/* skip if i is dead */ 365 | nv[i] = nvi; /* restore nv[i] */ 366 | d = degree[i] + dk - nvi; /* compute external degree(i) */ 367 | d = std::min(d, n - nel - nvi); 368 | if(head[d] != -1) last[head[d]] = i; 369 | next[i] = head[d]; /* put i back in degree list */ 370 | last[i] = -1; 371 | head[d] = i; 372 | mindeg = std::min(mindeg, d); /* find new minimum degree */ 373 | degree[i] = d; 374 | Ci[p++] = i; /* place i in Lk */ 375 | } 376 | nv[k] = nvk; /* # nodes absorbed into k */ 377 | if((len[k] = p-pk1) == 0) /* length of adj list of element k*/ 378 | { 379 | Cp[k] = -1; /* k is a root of the tree */ 380 | w[k] = 0; /* k is now a dead element */ 381 | } 382 | if(elenk != 0) cnz = p; /* free unused space in Lk */ 383 | } 384 | 385 | 386 | /* --- Postordering ----------------------------------------------------- */ 387 | for(i = 0; i < n; i++) Cp[i] = amd::amd_flip (Cp[i]);/* fix assembly tree */ 388 | for(j = 0; j <= n; j++) head[j] = -1; 389 | for(j = n; j >= 0; j--) /* place unordered nodes in lists */ 390 | { 391 | if(nv[j] > 0) continue; /* skip if j is an element */ 392 | next[j] = head[Cp[j]]; /* place j in list of its parent */ 393 | head[Cp[j]] = j; 394 | } 395 | for(e = n; e >= 0; e--) /* place elements in lists */ 396 | { 397 | if(nv[e] <= 0) continue; /* skip unless e is an element */ 398 | if(Cp[e] != -1) 399 | { 400 | next[e] = head[Cp[e]]; /* place e in list of its parent */ 401 | head[Cp[e]] = e; 402 | } 403 | } 404 | 405 | for(k = 0, i = 0; i <= n; i++) /* postorder the assembly tree */ 406 | { 407 | if(Cp[i] == -1) k = amd::tdfs(i, k, head, next, temp_perm, w); 408 | } 409 | 410 | for (int i = 0; i < n; i++) { 411 | perm[i] = temp_perm[i]; 412 | } 413 | 414 | delete[] W; 415 | delete[] temp_perm; 416 | delete[] Cp; 417 | delete[] Ci; 418 | 419 | } 420 | 421 | #endif -------------------------------------------------------------------------------- /SymILDL/SymILDL/solver.h: -------------------------------------------------------------------------------- 1 | #ifndef _SOLVER_H 2 | #define _SOLVER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "lilc_matrix.h" 10 | 11 | namespace symildl { 12 | 13 | // Using struct'd enums to achieve a C++11 style enum class without C++11 14 | struct reordering_type { 15 | enum { 16 | NONE, 17 | AMD, 18 | RCM, 19 | MC64 20 | }; 21 | }; 22 | 23 | struct equilibration_type { 24 | enum { 25 | NONE, 26 | BUNCH, 27 | RUIZ, 28 | MC64 29 | }; 30 | }; 31 | 32 | struct solver_type { 33 | enum { 34 | NONE, 35 | MINRES, 36 | SQMR, 37 | FULL 38 | }; 39 | }; 40 | 41 | struct message_level { 42 | enum { 43 | NONE, 44 | STATISTICS, 45 | DEBUG 46 | }; 47 | }; 48 | 49 | 50 | /*! \brief Saves a permutation vector vec as a permutation matrix in matrix market (.mtx) format. 51 | \param vec the permutation vector. 52 | \param filename the filename the matrix will be saved under. 53 | */ 54 | template 55 | bool save_vector(const std::vector& vec, std::string filename) { 56 | std::ofstream out(filename.c_str(), std::ios::out | std::ios::binary); 57 | if(!out) 58 | return false; 59 | 60 | out.flags(std::ios_base::scientific); 61 | out.precision(12); 62 | std::string header = "%%MatrixMarket matrix coordinate real general";; 63 | 64 | out << header << std::endl; 65 | out << vec.size() << " " << 1 << " " << vec.size() << "\n"; 66 | 67 | for(int i = 0; i < (int) vec.size(); i++) { 68 | out << i+1 << " " << 1 << " " << vec[i] << "\n"; 69 | } 70 | 71 | out.close(); 72 | return true; 73 | } 74 | 75 | /*! \brief Reads in a dense row or column vector vec in matrix market (.mtx) format. 76 | \param rhs the permutation vector. 77 | \param filename the filename the matrix will be saved under. 78 | */ 79 | template 80 | bool read_vector(std::vector& vec, std::string filename, int msg_lvl = message_level::STATISTICS) { 81 | std::ifstream input(filename.c_str(), std::ios::in); 82 | 83 | if(!input) return false; 84 | 85 | const int maxBuffersize = 2048; 86 | char buffer[maxBuffersize]; 87 | 88 | bool readsizes = false; 89 | el_type value; 90 | 91 | int i = 0, n_rows, n_cols; 92 | while(input.getline(buffer, maxBuffersize)) { 93 | // skip comments 94 | //NOTE An appropriate test should be done on the header to get the symmetry 95 | if(buffer[0]=='%') continue; 96 | 97 | std::stringstream line(buffer); 98 | 99 | if(!readsizes) { 100 | line >> n_rows >> n_cols; 101 | if(n_rows > 0 && n_cols > 0) { 102 | readsizes = true; 103 | vec.resize(std::max(n_rows, n_cols)); 104 | } 105 | } else { 106 | line >> value; 107 | vec[i++] = value; 108 | } 109 | } 110 | 111 | if (i != std::max(n_rows, n_cols)) { 112 | std::cerr << "Expected " << std::max(n_rows, n_cols) << " elems but read " << i << "." << std::endl; 113 | } 114 | 115 | if (msg_lvl) std::cout << "Load succeeded. " << "Vector file " << filename << " was loaded." << std::endl; 116 | input.close(); 117 | return true; 118 | } 119 | 120 | /*! \brief Set of tools that facilitates conversion between different matrix formats. Also contains solver methods for matrices using a common interface. 121 | 122 | Currently, the only matrix type accepted is the lilc_matrix (as no other matrix type has been created yet). 123 | */ 124 | template > 125 | class solver { 126 | public: 127 | typedef typename mat_type::pivot_type pivot_type; 128 | 129 | mat_type A; /// perm; /// D; /// rhs; /// sol_vec; ///& ptr, const std::vector& row, const std::vector& val) { 173 | bool result = A.load(ptr, row, val); 174 | assert(result); 175 | if (msg_lvl) printf("A is %d by %d with %d non-zeros.\n", A.n_rows(), A.n_cols(), A.nnz() ); 176 | } 177 | 178 | /*! \brief Loads the matrix A into solver. A must be of CSC format. 179 | */ 180 | void load(const int* ptr, const int* row, const el_type* val, int dim) { 181 | bool result = A.load(ptr, row, val, dim); 182 | assert(result); 183 | if (msg_lvl) printf("A is %d by %d with %d non-zeros.\n", A.n_rows(), A.n_cols(), A.nnz() ); 184 | } 185 | 186 | 187 | /*! \brief Loads a right hand side b into the solver. 188 | \param b a vector of the right hand side. 189 | */ 190 | void set_rhs(vector b) { 191 | rhs = b; 192 | has_rhs = true; 193 | if (msg_lvl) printf("Right hand side has %d entries.\n", rhs.size() ); 194 | } 195 | 196 | /*! \brief Sets the reordering scheme for the solver. 197 | */ 198 | void set_reorder_scheme(const char* ordering) { 199 | if (strcmp(ordering, "rcm") == 0) { 200 | reorder_type = reordering_type::RCM; 201 | } else if (strcmp(ordering, "amd") == 0) { 202 | reorder_type = reordering_type::AMD; 203 | } else if (strcmp(ordering, "none") == 0) { 204 | reorder_type = reordering_type::NONE; 205 | } 206 | } 207 | 208 | /*! \brief Decides whether we should use equilibriation on the matrix or not. 209 | */ 210 | void set_equil(const char* equil) { 211 | if (strcmp(equil, "bunch") == 0) { 212 | equil_type = equilibration_type::BUNCH; 213 | } else if (strcmp(equil, "none") == 0) { 214 | equil_type = equilibration_type::NONE; 215 | } 216 | } 217 | 218 | /*! \brief Decides whether we perform a full solve or not. 219 | */ 220 | void set_solver(const char* solver) { 221 | if (strcmp(solver, "minres") == 0) { 222 | solve_type = solver_type::MINRES; 223 | } else if (strcmp(solver, "sqmr") == 0) { 224 | solve_type = solver_type::SQMR; 225 | } else if (strcmp(solver, "full") == 0) { 226 | solve_type = solver_type::FULL; 227 | } else if (strcmp(solver, "none") == 0) { 228 | solve_type = solver_type::NONE; 229 | } 230 | } 231 | 232 | /*! \brief Controls how much information gets printed to stdout. 233 | */ 234 | void set_message_level(const char* msg) { 235 | if (strcmp(msg, "none") == 0) { 236 | msg_lvl = message_level::NONE; 237 | } else if (strcmp(msg, "statistics") == 0) { 238 | msg_lvl = message_level::STATISTICS; 239 | } else if (strcmp(msg, "debug") == 0) { 240 | msg_lvl = message_level::DEBUG; 241 | } 242 | } 243 | 244 | /*! \brief Decides whether we perform the factorization inplace or not. 245 | */ 246 | void set_inplace(bool inplace) { 247 | perform_inplace = inplace; 248 | } 249 | 250 | /*! \brief Decides the kind of partial pivoting we should use. 251 | */ 252 | void set_pivot(const char* pivot) { 253 | if (strcmp(pivot, "rook") == 0) { 254 | piv_type = pivot_type::ROOK; 255 | } else if (strcmp(pivot, "bunch") == 0) { 256 | piv_type = pivot_type::BKP; 257 | } 258 | } 259 | 260 | /*! \brief Factors the matrix A into P' * S * A * S * P = LDL' in addition to printing some timing data to screen. 261 | 262 | More information about the parameters can be found in the documentation for the ildl() function. 263 | 264 | \param fill_factor a factor controling memory usage of factorization. 265 | \param tol a factor controling accuracy of factorization. 266 | \param pp_tol a factor controling the aggresiveness of Bunch-Kaufman pivoting. 267 | \param max_iter the maximum number of iterations for minres (ignored if no right hand side). 268 | */ 269 | void solve(double fill_factor, double tol, double pp_tol, int max_iter = -1, double minres_tol = 1e-6, double shift = 0.0) { 270 | // A full factorization is equivalent to a fill factor of n and tol of 0 271 | if (solve_type == solver_type::FULL) { 272 | tol = 0.0; 273 | fill_factor = A.n_cols(); 274 | } 275 | 276 | perm.reserve(A.n_cols()); 277 | cout << std::fixed << std::setprecision(3); 278 | 279 | double dif, total = 0; 280 | clock_t start; 281 | 282 | if (equil_type == equilibration_type::BUNCH) { 283 | start = clock(); 284 | A.sym_equil(); 285 | dif = clock() - start; total += dif; 286 | if (msg_lvl) printf(" Equilibration:\t\t%.3f seconds.\n", dif/CLOCKS_PER_SEC); 287 | } 288 | 289 | if (reorder_type != reordering_type::NONE) { 290 | start = clock(); 291 | std::string perm_name; 292 | switch (reorder_type) { 293 | case reordering_type::AMD: 294 | A.sym_amd(perm); 295 | perm_name = "AMD"; 296 | break; 297 | case reordering_type::RCM: 298 | A.sym_rcm(perm); 299 | perm_name = "RCM"; 300 | break; 301 | } 302 | 303 | dif = clock() - start; total += dif; 304 | if (msg_lvl) printf(" %s:\t\t\t\t%.3f seconds.\n", perm_name.c_str(), dif/CLOCKS_PER_SEC); 305 | 306 | start = clock(); 307 | A.sym_perm(perm); 308 | dif = clock() - start; total += dif; 309 | if (msg_lvl) printf(" Permutation:\t\t\t%.3f seconds.\n", dif/CLOCKS_PER_SEC); 310 | } else { 311 | // no permutation specified, store identity permutation instead. 312 | for (int i = 0; i < A.n_cols(); i++) { 313 | perm.push_back(i); 314 | } 315 | } 316 | 317 | start = clock(); 318 | if (perform_inplace) { 319 | A.ildl_inplace(D, perm, fill_factor, tol, pp_tol, piv_type); 320 | } else { 321 | A.ildl(L, D, perm, fill_factor, tol, pp_tol, piv_type); 322 | } 323 | dif = clock() - start; total += dif; 324 | 325 | std::string pivot_name; 326 | if (piv_type == pivot_type::BKP) { 327 | pivot_name = "BK"; 328 | } else if (piv_type == pivot_type::ROOK) { 329 | pivot_name = "Rook"; 330 | } 331 | 332 | if (msg_lvl) printf(" Factorization (%s pivoting):\t%.3f seconds.\n", pivot_name.c_str(), dif/CLOCKS_PER_SEC); 333 | if (msg_lvl) printf("Total time:\t\t\t%.3f seconds.\n", total/CLOCKS_PER_SEC); 334 | if (perform_inplace) { 335 | if (msg_lvl) printf("L is %d by %d with %d non-zeros.\n", A.n_rows(), A.n_cols(), A.nnz() ); 336 | } else { 337 | if (msg_lvl) printf("L is %d by %d with %d non-zeros.\n", L.n_rows(), L.n_cols(), L.nnz() ); 338 | } 339 | if (msg_lvl) printf("\n"); 340 | fflush(stdout); 341 | 342 | // if there is a right hand side, it means the user wants a solve. 343 | // TODO: refactor this solve to be in its own method, and separate 344 | // factoring/minres solve phase 345 | if (has_rhs) { 346 | if (perform_inplace) { 347 | if (msg_lvl) printf("Inplace factorization cannot be used with the solver. Please try again without -inplace.\n"); 348 | } else { 349 | // start timer in case we're doing a full solve 350 | start = clock(); 351 | 352 | // we've permuted and equilibrated the matrix, so we gotta apply 353 | // the same permutation and equilibration to the right hand side. 354 | // i.e. rhs = P'S*rhs 355 | // 0. apply S 356 | for (int i = 0; i < A.n_cols(); i++) { 357 | rhs[i] = A.S[i]*rhs[i]; 358 | } 359 | 360 | // 1. apply P' (takes rhs[perm[i]] to rhs[i], i.e. inverse of perm, 361 | // where perm takes i to perm[i]) 362 | vector tmp(A.n_cols()); 363 | for (int i = 0; i < A.n_cols(); i++) { 364 | tmp[i] = rhs[perm[i]]; 365 | } 366 | rhs = tmp; 367 | 368 | if (solve_type == solver_type::FULL) { 369 | if (msg_lvl) printf("Solving matrix with direct solver...\n"); 370 | sol_vec.resize(A.n_cols(), 0); 371 | // MINRES uses the preconditioned solver that 372 | // splits the block D into |D|^(1/2). 373 | // For the full solver we'll just solve D directly. 374 | L.backsolve(rhs, sol_vec); 375 | D.solve(sol_vec, tmp); 376 | L.forwardsolve(tmp, sol_vec); 377 | } else { 378 | start = clock(); 379 | 380 | if (solve_type == solver_type::MINRES) { 381 | // finally, since we're preconditioning with M = L|D|^(1/2), we have 382 | // to multiply M^(-1) to the rhs and solve the system 383 | // M^(-1) * B * M'^(-1) y = M^(-1)P'*S*b 384 | L.backsolve(rhs, tmp); 385 | D.sqrt_solve(tmp, rhs, false); 386 | 387 | if (msg_lvl) printf("Solving matrix with MINRES...\n"); 388 | // solve the equilibrated, preconditioned, and permuted linear system 389 | minres(max_iter, minres_tol, shift); 390 | 391 | // now we've solved M^(-1)*B*M'^(-1)y = M^(-1)P'*S*b 392 | // where B = P'SASPy. 393 | 394 | // but the actual solution is y = M' * P'S^(-1)*x 395 | // so x = S*P*M'^(-1)*y 396 | 397 | // 0. apply M'^(-1) 398 | D.sqrt_solve(sol_vec, tmp, true); 399 | L.forwardsolve(tmp, sol_vec); 400 | } else if (solve_type == solver_type::SQMR) { 401 | if (msg_lvl) printf("Solving matrix with SQMR...\n"); 402 | sqmr(max_iter, minres_tol); 403 | } 404 | } 405 | 406 | // 1. apply P 407 | for (int i = 0; i < A.n_cols(); i++) { 408 | tmp[perm[i]] = sol_vec[i]; 409 | } 410 | sol_vec = tmp; 411 | 412 | // 2. apply S 413 | for (int i = 0; i < A.n_cols(); i++) { 414 | sol_vec[i] = A.S[i]*sol_vec[i]; 415 | } 416 | dif = clock() - start; 417 | if (msg_lvl) printf("Solve time:\t%.3f seconds.\n", dif/CLOCKS_PER_SEC); 418 | if (msg_lvl) printf("\n"); 419 | 420 | if (save_sol) { 421 | // save results 422 | // TODO: refactor this to be in its own method 423 | if (msg_lvl) printf("Solution saved to output_matrices/outsol.mtx.\n"); 424 | save_vector(sol_vec, "output_matrices/outsol.mtx"); 425 | } 426 | 427 | } 428 | } 429 | } 430 | 431 | /*! \brief Applies minres on A, preconditioning with factors L and D. 432 | 433 | \param max_iter the maximum number of minres iterations. 434 | \param stop_tol the stopping tolerance of minres. i.e. we stop as soon as the residual goes below stop_tol. 435 | \param shift shifts A by shift*(identity matrix) to make it more positive definite. This sometimes helps. 436 | */ 437 | int minres(int max_iter = 1000, double stop_tol = 1e-6, double shift = 0.0); 438 | 439 | /*! \brief Applies SMQR on A, preconditioning with factors L and D. 440 | 441 | \param max_iter the maximum number of minres iterations. 442 | \param stop_tol the stopping tolerance of minres. i.e. we stop as soon as the residual goes below stop_tol. 443 | */ 444 | int sqmr(int max_iter = 1000, double stop_tol = 1e-6); 445 | 446 | /*! \brief Save results of factorization (automatically saved into the output_matrices folder). 447 | 448 | The names of the output matrices follow the format out{}.mtx, where {} describes what the file contains (i.e. A, L, or D). 449 | */ 450 | void save() { // TODO: refactor this as a "save factors" method 451 | if (msg_lvl) cout << "Saving matrices..." << endl; 452 | if (!perform_inplace) { 453 | A.save("output_matrices/outB.mtx", true); 454 | L.save("output_matrices/outL.mtx", false); 455 | } else { 456 | A.save("output_matrices/outL.mtx", false); 457 | } 458 | 459 | A.S.save("output_matrices/outS.mtx"); 460 | save_vector(perm, "output_matrices/outP.mtx"); 461 | 462 | D.save("output_matrices/outD.mtx"); 463 | if (msg_lvl) cout << "Save complete." << endl; 464 | } 465 | 466 | /*! \brief Prints the L and D factors to stdout. 467 | */ 468 | void display() { 469 | #ifdef SYM_ILDL_DEBUG 470 | if (perform_inplace) { 471 | cout << A << endl; 472 | } else { 473 | cout << L << endl; 474 | } 475 | cout << D << endl; 476 | cout << perm << endl; 477 | #endif 478 | } 479 | }; 480 | 481 | #include "solver_minres.h" 482 | #include "solver_sqmr.h" 483 | 484 | } 485 | 486 | #endif 487 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_declarations.h: -------------------------------------------------------------------------------- 1 | // -*- mode: c++ -*- 2 | #ifndef _LILC_MATRIX_DECLARATIONS_H_ 3 | #define _LILC_MATRIX_DECLARATIONS_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "swap_struct.h" 13 | 14 | /*! \brief A list-of-lists (LIL) matrix in column oriented format. 15 | 16 | For convience, the matrix this class represents will be refered to as matrix A. 17 | In LIL-C format, each column of A (an n*n matrix) is stored as a separate vector. The nonzeros are stored in m_idx while the non-zeros are stored in m_x. Both m_x and m_idx are initialized to a list of n lists. m_idx and m_x are ordered dependent on each other, in that A(m_idx[k][j], k) = m_x[k][j]. 18 | 19 | */ 20 | 21 | template 22 | class lilc_matrix : public lil_sparse_matrix 23 | { 24 | public: 25 | 26 | //-------------- typedefs and inherited variables --------------// 27 | using lil_sparse_matrix::m_idx; 28 | using lil_sparse_matrix::m_x; 29 | using lil_sparse_matrix::m_n_rows; 30 | using lil_sparse_matrix::m_n_cols; 31 | using lil_sparse_matrix::n_rows; 32 | using lil_sparse_matrix::n_cols; 33 | using lil_sparse_matrix::nnz; 34 | using lil_sparse_matrix::nnz_count; 35 | using lil_sparse_matrix::eps; 36 | 37 | 38 | typedef typename lil_sparse_matrix::idx_vector_type idx_vector_type; 39 | typedef typename lil_sparse_matrix::elt_vector_type elt_vector_type; 40 | 41 | typedef typename idx_vector_type::iterator idx_it; 42 | typedef typename elt_vector_type::iterator elt_it; 43 | 44 | std::vector< std::vector< int > > list; /// row_first; /// col_first; /// S; /// (n_rows, n_cols) 67 | { 68 | m_x.reserve(n_cols); 69 | m_idx.reserve(n_cols); 70 | } 71 | 72 | //----Matrix referencing/filling----// 73 | 74 | /*! \brief Finds the (i,j)th coefficient of the matrix. 75 | \param i the row of the (i,j)th element (zero-indexed). 76 | \param j the col of the (i,j)th element (zero-indexed). 77 | \param offset an optional search offset for use in linear search (start at offset instead of 0). 78 | \return The (i,j)th element of the matrix. 79 | */ 80 | inline virtual el_type coeff(const int& i, const int& j, int offset = 0) const 81 | { 82 | //invariant: first elem in each col of a is the diagonal elem if it exists. 83 | if (i == j) { 84 | if (m_idx[j].size() == 0) return 0; 85 | return (m_idx[j][0] == i ? m_x[j][0] : 0); 86 | } 87 | 88 | for (int k = offset, end = m_idx[j].size(); k < end; k++) { 89 | if (m_idx[j][k] == i) return m_x[j][k]; 90 | } 91 | 92 | return 0; 93 | } 94 | 95 | /*! \brief Finds the index/value pointers to (i,j)th coefficient of the matrix. 96 | \param i the row of the (i,j)th element (zero-indexed). 97 | \param j the col of the (i,j)th element (zero-indexed). 98 | \param its a pair of pointers, one for the index of the found element, and the other for the value of the element. If the element is not found, the pointers point to the end of column j. 99 | 100 | \return True if (i,j)th element is nonzero, false otherwise. 101 | */ 102 | inline bool coeffRef(const int& i, const int& j, std::pair& its) 103 | { 104 | for (unsigned int k = 0; k < m_idx[j].size(); k++) { 105 | if (m_idx[j][k] == i) { 106 | its = make_pair(m_idx[j].begin() + k, m_x[j].begin() + k); 107 | return true; 108 | } 109 | } 110 | 111 | its = make_pair(m_idx[j].end(), m_x[j].end()); 112 | return false; 113 | } 114 | 115 | /*! \brief Resizes the matrix. For use in preallocating space before factorization begins. 116 | \param n_rows the number of rows in the resized matrix. 117 | \param n_cols the number of cols in the resized matrix. 118 | */ 119 | void resize(int n_rows, int n_cols) 120 | { 121 | m_n_rows = n_rows; 122 | m_n_cols = n_cols; 123 | 124 | m_x.clear(); 125 | m_idx.clear(); 126 | row_first.clear(); 127 | col_first.clear(); 128 | list.clear(); 129 | 130 | m_x.resize(n_cols); 131 | m_idx.resize(n_cols); 132 | 133 | row_first.resize(n_cols, 1); 134 | col_first.resize(n_cols, 1); 135 | list.resize(n_cols); 136 | 137 | S.resize(n_cols, 1); 138 | for (int i = 0; i < n_cols; i++) { 139 | m_x[i].clear(); 140 | m_idx[i].clear(); 141 | list[i].clear(); 142 | } 143 | } 144 | 145 | //-----Reorderings/Rescalings------// 146 | /*! \brief Returns a pseudo-peripheral root of A. This is essentially many chained breadth-first searchs across the graph of A (where A is viewed as an adjacency matrix). 147 | 148 | \param s contains the initial node to seed the algorithm. A pseudo-peripheral root of A is stored in s at the end of the algorithm. 149 | */ 150 | void find_root(int& s); 151 | 152 | /*! \brief Returns the next level set given the current level set of A. This is essentially all neighbours of the currently enqueued nodes in breath-first search. 153 | 154 | \param lvl_set the current level set (a list of nodes). 155 | \param visited all previously visited nodes. 156 | */ 157 | inline bool find_level_set(vector& lvl_set, vector& visited); 158 | 159 | /*! \brief Returns a Reverse Cuthill-McKee ordering of the matrix A (stored in perm). 160 | 161 | A detailed description of this function as well as all its subfunctions can be found in "Computer Solution of Large Sparse Positive Definite Systems" by George and Liu (1981). 162 | \param perm An empty permutation vector (filled on function completion). 163 | */ 164 | void sym_rcm(vector& perm); 165 | 166 | /*! \brief Returns a Approximate Minimum Degree ordering of the matrix A (stored in perm). 167 | 168 | A detailed description of this function as well as all its subfunctions can be found in "An Approximate Minimum Dgree Algorithm" by Davis, Amestoy, and Duff (1981). 169 | \param perm An empty permutation vector (filled on function completion). 170 | */ 171 | inline void sym_amd(vector& perm); 172 | 173 | /*! \brief Given a permutation vector perm, A is permuted to P'AP, where P is the permutation matrix associated with perm. 174 | \param perm the permutation vector. 175 | */ 176 | void sym_perm(vector& perm); 177 | 178 | /*! \brief The symmetric matrix A is equilibrated and the symmetric equilibrated matrix SAS is stored in A, where S is a diagonal scaling matrix. 179 | 180 | This algorithm is based on the one outlined in "Equilibration of Symmetric Matrices in the Max-Norm" by Bunch (1971). 181 | */ 182 | void sym_equil(); 183 | 184 | //----Factorizations----// 185 | /*! \brief Performs an LDL' factorization of this matrix. 186 | 187 | The pivoted matrix P'AP will be stored in place of A. In addition, the L and D factors of P'AP will be stored in L and D (so that P'AP = LDL'). The factorization is performed in crout order and follows the algorithm outlined in "Crout versions of the ILU factorization with pivoting for sparse symmetric matrices" by Li and Saad (2005). 188 | 189 | \param L the L factor of this matrix. 190 | \param D the D factor of this matrix. 191 | \param perm the current permutation of A. 192 | \param fill_factor a parameter to control memory usage. Each column is guaranteed to have fewer than fill_factor*(nnz(A)/n_col(A)) elements. 193 | \param tol a parameter to control agressiveness of dropping. In each column, elements less than tol*norm(column) are dropped. 194 | \param pp_tol a parameter to control aggresiveness of pivoting. Allowable ranges are [0,inf). If the parameter is >= 1, Bunch-Kaufman pivoting will be done in full. If the parameter is 0, partial pivoting will be turned off and the first non-zero pivot under the diagonal will be used. Choices close to 0 increase locality in pivoting (pivots closer to the diagonal are used) while choices closer to 1 increase the stability of pivoting. Useful for situations where you care more about preserving the structure of the matrix rather than bounding the size of its elements. 195 | \param pivot_type chooses the type of pivoting procedure used: threshold Bunch-Kaufman, or rook pivoting. If rook pivoting is chosen, pp_tol is ignored. 196 | */ 197 | void ildl(lilc_matrix& L, block_diag_matrix& D, idx_vector_type& perm, const double& fill_factor, const double& tol, const double& pp_tol, int piv_type = pivot_type::BKP); 198 | 199 | /*! \brief Performs an _inplace_ LDL' factorization of this matrix. 200 | 201 | The pivoted matrix P'AP will be stored in place of A. In addition, the L and D factors of P'AP will be stored in L and D (so that P'AP = LDL'). The factorization is performed in crout order and follows the algorithm outlined in "Crout versions of the ILU factorization with pivoting for sparse symmetric matrices" by Li and Saad (2005). 202 | 203 | \param D the D factor of this matrix. 204 | \param perm the current permutation of A. 205 | \param fill_factor a parameter to control memory usage. Each column is guaranteed to have fewer than fill_factor*(nnz(A)/n_col(A)) elements. 206 | \param tol a parameter to control agressiveness of dropping. In each column, elements less than tol*norm(column) are dropped. 207 | \param pp_tol a parameter to control aggresiveness of pivoting. Allowable ranges are [0,inf). If the parameter is >= 1, Bunch-Kaufman pivoting will be done in full. If the parameter is 0, partial pivoting will be turned off and the first non-zero pivot under the diagonal will be used. Choices close to 0 increase locality in pivoting (pivots closer to the diagonal are used) while choices closer to 1 increase the stability of pivoting. Useful for situations where you care more about preserving the structure of the matrix rather than bounding the size of its elements. 208 | */ 209 | void ildl_inplace(block_diag_matrix& D, idx_vector_type& perm, const double& fill_factor, const double& tol, const double& pp_tol, int piv_type = pivot_type::BKP); 210 | 211 | //------Helpers------// 212 | /*! \brief Performs a back solve of this matrix, assuming that it is lower triangular (stored column major). 213 | 214 | \param b the right hand side. 215 | \param x a storage vector for the solution (must be same size as b). 216 | */ 217 | void backsolve(const elt_vector_type& b, elt_vector_type& x) { 218 | assert(b.size() == x.size()); 219 | x = b; 220 | // simple forward substitution 221 | for (int i = 0; i < m_n_cols; i++) { 222 | x[i] /= m_x[i][0]; 223 | for (int k = 1; k < m_idx[i].size(); k++) { 224 | x[m_idx[i][k]] -= x[i]*m_x[i][k]; 225 | } 226 | } 227 | } 228 | 229 | /*! \brief Performs a forward solve of this matrix, assuming that it is upper triangular (stored row major). 230 | 231 | \param b the right hand side. 232 | \param x a storage vector for the solution (must be same size as b). 233 | */ 234 | void forwardsolve(const elt_vector_type& b, elt_vector_type& x) { 235 | assert(b.size() == x.size()); 236 | // simple back substitution 237 | for (int i = m_n_cols-1; i >= 0; i--) { 238 | x[i] = b[i]/m_x[i][0]; 239 | for (int k = 1; k < m_idx[i].size(); k++) { 240 | x[i] -= x[m_idx[i][k]]*m_x[i][k]/m_x[i][0]; 241 | } 242 | } 243 | } 244 | 245 | /*! \brief Performs a matrix-vector product with this matrix. 246 | 247 | \param x the vector to be multiplied. 248 | \param y a storage vector for the result (must be same size as x). 249 | \param full_mult if true, we assume that only half the matrix is stored and do do operations per element of the matrix to account for the unstored other half. 250 | */ 251 | void multiply(const elt_vector_type& x, elt_vector_type& y, bool full_mult = true) { 252 | y.clear(); y.resize(x.size(), 0); 253 | for (int i = 0; i < m_n_cols; i++) { 254 | for (int k = 0; k < m_idx[i].size(); k++) { 255 | y[m_idx[i][k]] += x[i]*m_x[i][k]; 256 | if (full_mult && i != m_idx[i][k]) { 257 | y[i] += x[m_idx[i][k]]*m_x[i][k]; 258 | } 259 | } 260 | } 261 | } 262 | 263 | /*! \brief Performs a symmetric permutation between row/col k & r of A. 264 | 265 | \param s a struct containing temporary variables needed during pivoting. 266 | \param in_set a bitset needed for unordered unions during pivoting. 267 | \param L the lower triangular factor of A. 268 | \param k index of row/col k. 269 | \param r index of row/col r. 270 | */ 271 | inline void pivot(swap_struct& s, vector& in_set, lilc_matrix& L, const int& k, const int& r); 272 | 273 | /*! \brief The inplace version of the function above. 274 | 275 | \param s a struct containing temporary variables needed during pivoting. 276 | \param in_set a bitset needed for unordered unions during pivoting. 277 | \param k index of row/col k. 278 | \param r index of row/col r. 279 | */ 280 | inline void pivotA(swap_struct& s, vector& in_set, const int& k, const int& r); 281 | 282 | /*! \brief Ensures two the invariants observed by A.first and A.list are held. 283 | 284 | \invariant 285 | If this matrix is a lower triangular factor of another matrix: 286 | -# On iteration k, first[i] will give the number of non-zero elements on col i of A before A(k, i). 287 | -# On iteration k, list[i][ first[i] ] will contain the first element below or on index k of column i of A. 288 | 289 | \invariant 290 | If this matrix is the matrix to be factored: 291 | -# On iteration k, first[i] will give the number of non-zero elements on row i of A before A(i, k). 292 | -# On iteration k, list[i][ first[i] ] will contain the first element right of or on index k of row i of A. 293 | 294 | \param j the column of con. 295 | \param k the iteration number. 296 | \param con the container to be swapped. 297 | \param update_list boolean indicating whether list or m_x/m_idx should be updated. 298 | */ 299 | template 300 | inline void ensure_invariant(const int& j, const int& k, Container& con, bool update_list = false) { 301 | int offset; 302 | if (update_list) offset = row_first[j]; 303 | else offset = col_first[j]; 304 | 305 | if ((offset >= (int) con.size()) || con.empty() || con[offset] == k) return; 306 | 307 | int i, min(offset); 308 | for (i = offset; i < (int) con.size(); i++) { 309 | if (con[i] == k) { 310 | min = i; 311 | break; 312 | } else if ( con[i] < con[min] ) { 313 | min = i; 314 | } 315 | } 316 | 317 | if (update_list) 318 | std::swap(con[offset], con[min]); 319 | else { 320 | std::swap(con[offset], con[min]); 321 | std::swap(m_x[j][offset], m_x[j][min]); 322 | } 323 | } 324 | 325 | /*! \brief Updates A.first for iteration k. 326 | \param k current iteration index. 327 | */ 328 | inline void advance_first(const int& k) { 329 | for (idx_it it = list[k].begin(); it != list[k].end(); it++) { 330 | ensure_invariant(*it, k, m_idx[*it]); //make sure next element is good before we increment. 331 | col_first[*it]++; //should have ensured invariant now 332 | } 333 | } 334 | 335 | /*! \brief Updates A.list for iteration k. 336 | \param k current iteration index. 337 | */ 338 | inline void advance_list(const int& k) { 339 | for (idx_it it = m_idx[k].begin(); it != m_idx[k].end(); it++) { 340 | if (*it == k) continue; 341 | ensure_invariant(*it, k, list[*it], true); //make sure next element is good. 342 | row_first[*it]++; //invariant ensured. 343 | } 344 | } 345 | 346 | //----IO Functions----// 347 | 348 | /*! \brief Returns a string representation of A, with each column and its corresponding indices & non-zero values printed. 349 | \return A string representation of this matrix. 350 | */ 351 | 352 | std::string to_string () const; 353 | 354 | /*! \brief Loads a matrix in matrix market format. 355 | \param filename the filename of the matrix to be loaded. Must be in matrix market format (.mtx). 356 | */ 357 | bool load(std::string filename); 358 | 359 | /*! \brief Loads a matrix in CSC format. 360 | \param ptr A vector containing the ranges of indices in each col. 361 | \param row A vector containing the row indices of the nnz. 362 | \param val A vector containing the values of the non-zeros. 363 | */ 364 | bool load(const std::vector& ptr, const std::vector& row, const std::vector& val); 365 | 366 | /*! \brief Loads a matrix in CSC format. Does no error checking on the input vectors. 367 | \param row A vector containing the row indices of the nnz. 368 | \param ptr A vector containing the ranges of indices in each col. 369 | \param val A vector containing the values of the non-zeros. 370 | \param dim The dimension of the matrix. 371 | */ 372 | bool load(const int* ptr, const int* row, const el_type* val, int dim); 373 | 374 | /*! \brief Saves a matrix in matrix market format. 375 | \param filename the filename of the matrix to be saved. All matrices saved are in matrix market format (.mtx). 376 | \param sym flags whether the matrix is symmetric or not. 377 | */ 378 | bool save(std::string filename, bool sym = false); 379 | 380 | }; 381 | 382 | //------------------ include files for class functions -------------------// 383 | 384 | #include "lilc_matrix_find_level_set.h" 385 | #include "lilc_matrix_find_root.h" 386 | #include "lilc_matrix_sym_rcm.h" 387 | #include "lilc_matrix_sym_amd.h" 388 | #include "lilc_matrix_sym_perm.h" 389 | #include "lilc_matrix_sym_equil.h" 390 | #include "lilc_matrix_ildl_helpers.h" 391 | #include "lilc_matrix_ildl.h" 392 | #include "lilc_matrix_ildl_inplace.h" 393 | #include "lilc_matrix_pivot.h" 394 | #include "lilc_matrix_load.h" 395 | #include "lilc_matrix_save.h" 396 | #include "lilc_matrix_to_string.h" 397 | 398 | #endif 399 | -------------------------------------------------------------------------------- /SymILDL/SymILDL/lilc_matrix_ildl.h: -------------------------------------------------------------------------------- 1 | #ifndef _LILC_MATRIX_ILDL_H_ 2 | #define _LILC_MATRIX_ILDL_H_ 3 | 4 | 5 | using std::endl; 6 | using std::cout; 7 | using std::abs; 8 | 9 | template 10 | void lilc_matrix :: ildl(lilc_matrix& L, block_diag_matrix& D, idx_vector_type& perm, const double& fill_factor, const double& tol, const double& pp_tol, int piv_type) 11 | { 12 | 13 | //----------------- initialize temporary variables --------------------// 14 | const int ncols = n_cols(); //number of cols in A. 15 | 16 | int lfil; 17 | if (fill_factor > 1e4) lfil = ncols; //just incase users decide to enter a giant fill factor for fun... 18 | else lfil = 2*fill_factor*nnz()/ncols; //roughly a factor of 2 since only lower tri. of A is stored 19 | 20 | const el_type alpha = (1.0+sqrt(17.0))/8.0; //for use in pivoting. 21 | el_type w1(-1), wr(-1), d1(-1), dr(-1); //for use in bk-pivoting 22 | el_type det_D, D_inv11, D_inv22, D_inv12; //for use in 2x2 pivots 23 | el_type l_11, l_12; //for use in 2x2 pivots 24 | 25 | vector in_set(ncols, false); //bitset used for unsorted merges 26 | swap_struct s; //struct containing temp vars used in pivoting. 27 | 28 | elt_vector_type work(ncols, 0), temp(ncols, 0); ////work vector for the current column 29 | idx_vector_type curr_nnzs, temp_nnzs; //non-zeros on current col. 30 | curr_nnzs.reserve(ncols); //reserves space for worse case (entire col is non-zero) 31 | 32 | int count = 0; //the total number of nonzeros stored in L. 33 | int i, j, k, r, offset, col_size, col_size2(-1); 34 | bool size_two_piv = false; //boolean indicating if the pivot is 2x2 or 1x1 35 | 36 | //--------------- allocate memory for L and D ------------------// 37 | L.resize(ncols, ncols); //allocate a vector of size n for Llist as well 38 | D.resize(ncols ); 39 | 40 | //------------------- main loop: factoring begins -------------------------// 41 | for (k = 0; k < ncols; k++) { 42 | 43 | //curr nnz vector starts out empty and is cleared at the end of each loop iteration. 44 | //assign nonzeros indices of A(k:n, k) to curr_nnzs 45 | curr_nnzs.assign (m_idx[k].begin(), m_idx[k].end()); 46 | 47 | //assign nonzero values of A(k:n, k) to work 48 | for (j = 0; j < (int) curr_nnzs.size(); j++) { 49 | work[curr_nnzs[j]] = m_x[k][j]; 50 | } 51 | sort(curr_nnzs.begin(), curr_nnzs.end()); 52 | 53 | //--------------begin pivoting--------------// 54 | // the pivoting below DEFINITELY needs to be refactored into a separate function 55 | 56 | //do delayed updates on current column. work = Sum_{i=0}^{k-1} L(k,i) * D(i,i) * L(k:n, i) 57 | //(the formula above generalizes to block matrix form in the case of 2x2 pivots). 58 | update(k, work, curr_nnzs, L, D, in_set); 59 | 60 | //store diagonal element in d1. set diagonal element in work vector to 0 61 | //since we want to find the maximum off-diagonal element. 62 | d1 = work[k]; 63 | work[k] = 0; 64 | 65 | //find maximum element in work and store its index in r. 66 | w1 = max(work, curr_nnzs, r); 67 | 68 | if (piv_type == pivot_type::BKP) { 69 | //we do partial pivoting here, where we take the first element u in the column that satisfies 70 | //|u| > pp_tol*|wi|. for more information, consult "A Partial Pivoting Strategy for Sparse 71 | //Symmetric Matrix Decomposition" by J.H. Liu (1987). 72 | int t = r; //stores location of u 73 | el_type u = w1; //stores value of u 74 | for (i = 0; i < (int) curr_nnzs.size(); i++) { 75 | if (abs(work[curr_nnzs[i]])-pp_tol*w1 > eps ) { 76 | t = curr_nnzs[i]; 77 | u = work[t]; 78 | break; 79 | } 80 | } 81 | 82 | //bunch-kaufman partial pivoting is used below. for a more detailed reference, 83 | //refer to "Accuracy and Stability of Numerical Algorithms." by Higham (2002). 84 | //------------------- begin bunch-kaufman pivoting ------------------// 85 | if (w1 < eps) { 86 | //case 0: do nothing. pivot is k. 87 | } else if ( (alpha * w1 - abs(d1)) < eps ) { 88 | //case 1: do nothing. pivot is k. 89 | } else { 90 | //since we are doing partial pivoting, we should treat u and t like wi and r, so 91 | //we'll just reassign wi and r. note: this has to go in the else clause since 92 | //we still use the old wi for case 0 and case 1. 93 | w1 = u; 94 | r = t; 95 | 96 | offset = row_first[r]; 97 | //assign all nonzero indices and values in A(r, k:r) 98 | //( not including A(r,r) ) to temp and temp_nnzs 99 | for (j = offset; j < (int) list[r].size(); j++) { 100 | temp_nnzs.push_back(list[r][j]); 101 | temp[list[r][j]] = coeff(r, list[r][j]); 102 | } 103 | 104 | //assign nonzero indices of A(r:n, r) to temp_nnzs 105 | temp_nnzs.insert(temp_nnzs.end(), m_idx[r].begin(), m_idx[r].end()); 106 | 107 | //assign nonzero values of to temp 108 | for (j = 0; j < (int) m_idx[r].size(); j++) { 109 | temp[m_idx[r][j]] = m_x[r][j]; 110 | } 111 | 112 | //perform delayed updates on temp. temp = Sum_{i=0}^{k-1} L(r,i) * D(i,i) * L(k:n, i). 113 | //(the formula above generalizes to block matrix form in the case of 2x2 pivots). 114 | update(r, temp, temp_nnzs, L, D, in_set); 115 | 116 | dr = temp[r]; 117 | temp[r] = 0; 118 | 119 | //find maximum element in temp. 120 | wr = max(temp, temp_nnzs, j); 121 | 122 | if ((alpha*w1*w1 - abs(d1)*wr) < eps) { 123 | //case 2: do nothing. pivot is k. 124 | 125 | } else if ( (alpha * wr - abs(dr)) < eps) { 126 | //case 3: pivot is k with r: 1x1 pivot case. 127 | temp[r] = dr; 128 | work[k] = d1; 129 | 130 | //--------pivot A and L ---------// 131 | pivot(s, in_set, L, k, r); 132 | 133 | //----------pivot rest ----------// 134 | 135 | //permute perm 136 | std::swap(perm[k], perm[r]); 137 | 138 | work.swap(temp); //swap work with temp. 139 | std::swap(work[k], work[r]); //swap kth and rth row of work 140 | 141 | curr_nnzs.swap(temp_nnzs); //swap curr_nnzs with temp_nnzs 142 | 143 | safe_swap(curr_nnzs, k, r); //swap k and r if they are present in curr_nnzs 144 | 145 | d1 = work[k]; 146 | //--------end pivot rest---------// 147 | 148 | } else { 149 | //case 4: pivot is k+1 with r: 2x2 pivot case. 150 | 151 | //must advance list for 2x2 pivot since we are pivoting on col k+1 152 | advance_list(k); 153 | //for the same reason as above, we must advance L.first as well 154 | L.advance_first(k); 155 | 156 | //restore diagonal elements in work and temp 157 | temp[r] = dr; 158 | work[k] = d1; 159 | 160 | //indicate that pivot is 2x2 161 | size_two_piv = true; 162 | 163 | if (k+1 < r) { 164 | //symmetrically permute row/col k+1 and r. 165 | pivot(s, in_set, L, k+1, r); 166 | 167 | //----------pivot rest ----------// 168 | 169 | //permute perm 170 | std::swap(perm[k+1], perm[r]); 171 | 172 | //swap rows k+1 and r of work and temp 173 | std::swap(work[k+1], work[r]); 174 | std::swap(temp[k+1], temp[r]); 175 | 176 | //swap k+1 and r in curr_nnzs and temp_nnzs 177 | safe_swap(curr_nnzs, k+1, r); 178 | safe_swap(temp_nnzs, k+1, r); 179 | } 180 | 181 | d1 = work[k]; 182 | dr = temp[k+1]; 183 | } 184 | } 185 | //--------------end bkp pivoting--------------// 186 | } else if (piv_type == pivot_type::ROOK) { 187 | //--------------begin rook pivoting--------------// 188 | i = k; 189 | work[k] = d1; 190 | 191 | if (alpha * w1 <= abs(d1) + eps) { 192 | // do nothing 193 | } else { 194 | while (true) { 195 | // assign nonzeros indices and values of A(r:n, r) to col_r_nnzs 196 | for (idx_it it = temp_nnzs.begin(); it != temp_nnzs.end(); it++) { 197 | temp[*it] = 0; 198 | } 199 | temp_nnzs.clear(); 200 | 201 | offset = row_first[r]; 202 | //assign all nonzero indices and values in A(r, k:r) 203 | //( not including A(r,r) ) to temp and temp_nnzs 204 | for (j = offset; j < (int) list[r].size(); j++) { 205 | temp_nnzs.push_back(list[r][j]); 206 | temp[list[r][j]] = coeff(r, list[r][j]); 207 | } 208 | 209 | //assign nonzero indices of A(r:n, r) to temp_nnzs 210 | temp_nnzs.insert(temp_nnzs.end(), m_idx[r].begin(), m_idx[r].end()); 211 | 212 | //assign nonzero values of to temp 213 | for (j = 0; j < (int) m_idx[r].size(); j++) { 214 | temp[m_idx[r][j]] = m_x[r][j]; 215 | } 216 | 217 | //perform delayed updates on temp. temp = Sum_{i=0}^{k-1} L(r,i) * D(i,i) * L(k:n, i). 218 | //(the formula above generalizes to block matrix form in the case of 2x2 pivots). 219 | update(r, temp, temp_nnzs, L, D, in_set); 220 | 221 | dr = temp[r]; 222 | temp[r] = 0; 223 | 224 | //find maximum element in temp. 225 | wr = max(temp, temp_nnzs, j); 226 | temp[r] = dr; 227 | 228 | if (alpha * wr <= abs(dr) + eps) { 229 | // swap rows and columns k and r 230 | pivot(s, in_set, L, k, r); 231 | 232 | std::swap(perm[k], perm[r]); 233 | 234 | std::swap(temp[k], temp[r]); 235 | work.swap(temp); 236 | 237 | safe_swap(temp_nnzs, k, r); 238 | curr_nnzs.swap(temp_nnzs); 239 | 240 | d1 = work[k]; 241 | break; 242 | } else if (abs(w1 - wr) < eps) { 243 | size_two_piv = true; 244 | // swap rows and columns k and i, k+1 and r 245 | if (k != i) { 246 | //symmetrically permute row/col k and i. 247 | pivot(s, in_set, L, k, i); 248 | 249 | //----------pivot rest ----------// 250 | 251 | //permute perm 252 | std::swap(perm[k], perm[i]); 253 | 254 | //swap rows k and i of work and temp 255 | std::swap(work[k], work[i]); 256 | std::swap(temp[k], temp[i]); 257 | 258 | //swap k+1 and r in curr_nnzs and temp_nnzs 259 | safe_swap(curr_nnzs, k, i); 260 | safe_swap(temp_nnzs, k, i); 261 | 262 | d1 = work[k]; 263 | } 264 | 265 | advance_list(k); 266 | L.advance_first(k); 267 | 268 | if (k+1 < r) { 269 | //symmetrically permute row/col k+1 and r. 270 | pivot(s, in_set, L, k+1, r); 271 | 272 | //----------pivot rest ----------// 273 | 274 | //permute perm 275 | std::swap(perm[k+1], perm[r]); 276 | 277 | //swap rows k+1 and r of work and temp 278 | std::swap(work[k+1], work[r]); 279 | std::swap(temp[k+1], temp[r]); 280 | 281 | //swap k+1 and r in curr_nnzs and temp_nnzs 282 | safe_swap(curr_nnzs, k+1, r); 283 | safe_swap(temp_nnzs, k+1, r); 284 | 285 | dr = temp[k+1]; 286 | } 287 | break; 288 | } else { 289 | i = r; 290 | w1 = wr; 291 | r = j; 292 | work.swap(temp); 293 | curr_nnzs.swap(temp_nnzs); 294 | } 295 | } 296 | } 297 | //--------------end rook pivoting--------------// 298 | } 299 | 300 | //erase diagonal element from non-zero indices (to exclude it from being dropped) 301 | curr_nnzs.erase(std::remove(curr_nnzs.begin(), curr_nnzs.end(), k), curr_nnzs.end()); 302 | 303 | //performs the dual dropping procedure. 304 | if (!size_two_piv) { 305 | //perform dual dropping criteria on work 306 | drop_tol(work, curr_nnzs, lfil, tol); 307 | 308 | } else { 309 | //erase diagonal 2x2 block from non-zero indices (to exclude it from being dropped) 310 | temp_nnzs.erase(std::remove(temp_nnzs.begin(), temp_nnzs.end(), k), temp_nnzs.end()); 311 | curr_nnzs.erase(std::remove(curr_nnzs.begin(), curr_nnzs.end(), k+1), curr_nnzs.end()); 312 | temp_nnzs.erase(std::remove(temp_nnzs.begin(), temp_nnzs.end(), k+1), temp_nnzs.end()); 313 | 314 | //compute inverse of the 2x2 block diagonal pivot. 315 | det_D = d1*dr - work[k+1]*work[k+1]; 316 | if ( abs(det_D) < eps) det_D = 1e-6; //statically pivot; 317 | D_inv11 = dr/det_D; 318 | D_inv22 = d1/det_D; 319 | D_inv12 = -work[k+1]/det_D; 320 | 321 | //assign pivot to D (d1 is assigned to D(k,k) later) 322 | D.off_diagonal(k) = work[k+1]; 323 | D[k+1] = dr; 324 | 325 | //merge nonzeros of curr and temp together so iterating through them will be easier 326 | unordered_inplace_union(curr_nnzs, temp_nnzs.begin(), temp_nnzs.end(), in_set); 327 | 328 | 329 | //multiply inverse of pivot to work and temp (gives us two columns of l) 330 | for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) { 331 | l_11 = work[*it]*D_inv11 + temp[*it]*D_inv12; 332 | l_12 = work[*it]*D_inv12 + temp[*it]*D_inv22; 333 | 334 | //note that work and temp roughly share the same non-zero indices 335 | work[*it] = l_11; 336 | temp[*it] = l_12; 337 | } 338 | 339 | //since the work and temp non-zero indices are roughly the same, 340 | //we can copy it over to temp_nnzs 341 | temp_nnzs.assign(curr_nnzs.begin(), curr_nnzs.end()); 342 | 343 | //perform dual dropping procedure on work and temp 344 | drop_tol(temp, temp_nnzs, lfil, tol); 345 | drop_tol(work, curr_nnzs, lfil, tol); 346 | 347 | 348 | } 349 | 350 | //resize kth column of L to proper size. 351 | L.m_idx[k].resize(curr_nnzs.size()+1); 352 | L.m_x[k].resize(curr_nnzs.size()+1); 353 | 354 | //assign diagonal element to D 355 | D[k] = d1; 356 | 357 | //assign 1s to diagonal of L. 358 | L.m_x[k][0] = 1; 359 | L.m_idx[k][0] = k; 360 | count++; 361 | 362 | if (!size_two_piv) { 363 | if ( abs(D[k]) < eps) D[k] = 1e-6; //statically pivot 364 | i = 1; 365 | for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) { 366 | if ( abs(work[*it]) > eps) { 367 | L.m_idx[k][i] = *it; //col k nonzero indices of L are stored 368 | L.m_x[k][i] = work[*it]/D[k]; //col k nonzero values of L are stored 369 | 370 | L.list[*it].push_back(k); //update Llist 371 | count++; 372 | i++; 373 | } 374 | } 375 | 376 | col_size = i; 377 | 378 | //advance list and L.first 379 | L.advance_first(k); 380 | advance_list(k); 381 | } else { 382 | //resize k+1th column of L to proper size. 383 | L.m_idx[k+1].resize(temp_nnzs.size()+1); 384 | L.m_x[k+1].resize(temp_nnzs.size()+1); 385 | 386 | //assign 1s to diagonal of L. 387 | L.m_x[k+1][0] = 1; 388 | L.m_idx[k+1][0] = k+1; 389 | count++; 390 | 391 | i = 1; 392 | for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) { 393 | if ( abs(work[*it]) > eps) { 394 | L.m_x[k][i] = work[*it]; //col k nonzero indices of L are stored 395 | L.m_idx[k][i] = *it; //col k nonzero values of L are stored 396 | 397 | L.list[*it].push_back(k); //update L.list 398 | count++; 399 | i++; 400 | } 401 | 402 | } 403 | 404 | j = 1; 405 | for (idx_it it = temp_nnzs.begin(); it != temp_nnzs.end(); it++) { 406 | if ( abs(temp[*it]) > eps) { 407 | L.m_x[k+1][j] = temp[*it]; //col k+1 nonzero indices of L are stored 408 | L.m_idx[k+1][j] = *it; //col k+1 nonzero values of L are stored 409 | 410 | L.list[*it].push_back(k+1); //update L.list 411 | count++; 412 | j++; 413 | } 414 | 415 | } 416 | 417 | col_size = i; 418 | col_size2 = j; 419 | 420 | //update list and L.first 421 | L.advance_first(k+1); 422 | advance_list(k+1); 423 | 424 | } 425 | 426 | // ------------- reset temp and work back to zero -----------------// 427 | work[k] = 0; 428 | temp[k] = 0; 429 | 430 | if (k + 1 < ncols) { 431 | temp[k+1] = 0; 432 | work[k+1] = 0; 433 | } 434 | 435 | for (idx_it it = curr_nnzs.begin(); it != curr_nnzs.end(); it++) { 436 | work[*it] = 0; 437 | } 438 | curr_nnzs.clear(); //zero out work vector 439 | 440 | for (idx_it it = temp_nnzs.begin(); it != temp_nnzs.end(); it++) { 441 | temp[*it] = 0; 442 | } 443 | temp_nnzs.clear(); //zero out work vector 444 | 445 | //-------------------------------------------------------------------// 446 | 447 | //resize columns of L to correct size 448 | L.m_x[k].resize(col_size); 449 | L.m_idx[k].resize(col_size); 450 | 451 | if (size_two_piv) { 452 | L.m_x[k+1].resize(col_size2); 453 | L.m_idx[k+1].resize(col_size2); 454 | k++; 455 | 456 | size_two_piv = false; 457 | } 458 | } 459 | 460 | //assign number of non-zeros in L to L.nnz_count 461 | L.nnz_count = count; 462 | 463 | } 464 | 465 | #endif 466 | --------------------------------------------------------------------------------