├── .gitignore
├── src
    ├── Resources
    │   ├── fem.png
    │   └── fem2.png
    ├── test
    │   ├── test_data
    │   │   ├── simple.mat
    │   │   ├── simpleb.mat
    │   │   ├── tetVolA.mat
    │   │   ├── tetVolb.mat
    │   │   ├── simpleAns.mat
    │   │   ├── simpleTri.mat
    │   │   ├── tetVolAns.mat
    │   │   ├── simpleTriAns.mat
    │   │   └── simpleTrib.mat
    │   ├── sanity3D.cc
    │   ├── tetVol.cc
    │   ├── sanity2D.cc
    │   └── CMakeLists.txt
    ├── core
    │   ├── include
    │   │   ├── cuda_resources.h
    │   │   ├── cycles
    │   │   │   ├── vcycle.h
    │   │   │   ├── fcycle.h
    │   │   │   ├── wcycle.h
    │   │   │   ├── cgcycle.h
    │   │   │   └── cycle.h
    │   │   ├── amg_signal.h
    │   │   ├── Logger.h
    │   │   ├── Helper.h
    │   │   ├── allocator.h
    │   │   ├── FEM
    │   │   │   ├── FEM2D.h
    │   │   │   └── FEM3D.h
    │   │   ├── error.h
    │   │   ├── util.h
    │   │   ├── smoothedMG
    │   │   │   ├── aggregators
    │   │   │   │   ├── Timer.h
    │   │   │   │   ├── aggregator.h
    │   │   │   │   └── mis.h
    │   │   │   └── smoothedMG_amg_level.h
    │   │   ├── types.h
    │   │   ├── amg.h
    │   │   ├── Color.h
    │   │   ├── my_timer.h
    │   │   ├── cutil.h
    │   │   ├── amg_level.h
    │   │   ├── tetmesh.h
    │   │   └── smoothers
    │   │   │   ├── gauss_seidel.h
    │   │   │   └── smoother.h
    │   ├── cuda
    │   │   ├── cuda_resources.cu
    │   │   ├── aggregator.cu
    │   │   ├── smoother.cu
    │   │   ├── allocator.cu
    │   │   ├── amg_signal.cu
    │   │   ├── cgcycle.cu
    │   │   ├── amg_level.cu
    │   │   ├── FEM2D.cu
    │   │   └── randomizedMIS_GPU.cu
    │   ├── aggmis
    │   │   ├── include
    │   │   │   ├── AggMIS_MIS_CPU.h
    │   │   │   ├── AggMIS_IOHelpers.h
    │   │   │   ├── AggMIS_MIS_GPU.h
    │   │   │   ├── Timer.h
    │   │   │   ├── AggMIS_Metrics.h
    │   │   │   ├── AggMIS_FileIO.h
    │   │   │   ├── AggMIS_Aggregation_GPU.h
    │   │   │   ├── AggMIS_Aggregation_CPU.h
    │   │   │   ├── AggMIS_Types.h
    │   │   │   ├── AggMIS_GraphHelpers.h
    │   │   │   └── AggMIS_MergeSplitConditioner_CPU.h
    │   │   └── cuda
    │   │   │   ├── AggMIS_IOHelpers.cu
    │   │   │   ├── TriMesh_connectivity.cu
    │   │   │   ├── AggMIS_MIS_CPU.cu
    │   │   │   └── AggMIS_Aggregation_GPU.cu
    │   └── CMakeLists.txt
    ├── CTestConfig.cmake
    ├── examples
    │   ├── CMakeLists.txt
    │   ├── example2.cu
    │   └── example1.cu
    ├── cuda_compute_capability.c
    ├── CMakeLists.txt
    ├── FEMSolver.h
    └── CodeCoverage.cmake
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | msvc
2 | build
3 | Debug
4 | Release
5 | *.swp
6 | ~*
7 | *.project
8 | *.cproject
9 | 


--------------------------------------------------------------------------------
/src/Resources/fem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/Resources/fem.png


--------------------------------------------------------------------------------
/src/Resources/fem2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/Resources/fem2.png


--------------------------------------------------------------------------------
/src/test/test_data/simple.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simple.mat


--------------------------------------------------------------------------------
/src/test/test_data/simpleb.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleb.mat


--------------------------------------------------------------------------------
/src/test/test_data/tetVolA.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/tetVolA.mat


--------------------------------------------------------------------------------
/src/test/test_data/tetVolb.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/tetVolb.mat


--------------------------------------------------------------------------------
/src/test/test_data/simpleAns.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleAns.mat


--------------------------------------------------------------------------------
/src/test/test_data/simpleTri.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleTri.mat


--------------------------------------------------------------------------------
/src/test/test_data/tetVolAns.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/tetVolAns.mat


--------------------------------------------------------------------------------
/src/test/test_data/simpleTriAns.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleTriAns.mat


--------------------------------------------------------------------------------
/src/test/test_data/simpleTrib.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/HEAD/src/test/test_data/simpleTrib.mat


--------------------------------------------------------------------------------
/src/core/include/cuda_resources.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CUDA_RESOURCES_H__
 2 | #define __CUDA_RESOURCES_H__
 3 | 
 4 | #include <stdio.h>
 5 | #include <cutil.h>
 6 | 
 7 | int getMaxThreads(const int max_regs_per_thread, int cuda_device);
 8 | 
 9 | #endif //end __CUDA_RESOURCES_H__
10 | 


--------------------------------------------------------------------------------
/src/CTestConfig.cmake:
--------------------------------------------------------------------------------
1 | SET(CTEST_PROJECT_NAME "SCI-Solver_FEM")
2 | SET(CTEST_NIGHTLY_START_TIME "00:00:00 MDT")
3 | SET(CTEST_DROP_METHOD "http")
4 | SET(CTEST_DROP_SITE "my.cdash.org")
5 | SET(CTEST_DROP_LOCATION "/submit.php?project=SCI-Solver_FEM")
6 | SET(CTEST_DROP_SITE_CDASH TRUE)
7 | 


--------------------------------------------------------------------------------
/src/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include_directories("${CMAKE_SOURCE_DIR}/core/include")
2 | include_directories("${CMAKE_SOURCE_DIR}")
3 | 
4 | CUDA_ADD_EXECUTABLE(Example1 example1.cu)
5 | TARGET_LINK_LIBRARIES(Example1 FEM_SOLVER FEM_CORE)
6 | 
7 | CUDA_ADD_EXECUTABLE(Example2 example2.cu)
8 | TARGET_LINK_LIBRARIES(Example2 FEM_SOLVER FEM_CORE)


--------------------------------------------------------------------------------
/src/core/cuda/cuda_resources.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <cuda.h>
 3 | #include <cuda_runtime_api.h>
 4 | 
 5 | int getMaxThreads(
 6 |     const int max_regs_per_thread, 
 7 |     int cuda_device) {
 8 |   cudaDeviceProp d;
 9 |   cudaGetDeviceProperties(&d, cuda_device);
10 |   return d.regsPerBlock / max_regs_per_thread;
11 | }
12 | 


--------------------------------------------------------------------------------
/src/core/include/cycles/vcycle.h:
--------------------------------------------------------------------------------
 1 | #ifndef __VCYCLE_H__
 2 | #define __VCYCLE_H__
 3 | 
 4 | #include <cycles/cycle.h>
 5 | template <class Matrix, class Vector>
 6 | class V_Cycle {
 7 |   public:
 8 |   inline V_Cycle(AMG_Level<Matrix,Vector> *next, const Vector& b, Vector &x) {
 9 |     next->cycle(V_CYCLE,b,x);
10 |   }
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/core/include/amg_signal.h:
--------------------------------------------------------------------------------
 1 | #ifndef __AMG_SIGNAL_H__
 2 | #define __AMG_SIGNAL_H__
 3 | 
 4 | /*****************************************
 5 |  * A class that will install all signal
 6 |  * handlers when constructed
 7 |  ****************************************/
 8 | class SignalHandler {
 9 |   public:
10 |     SignalHandler();
11 | };
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/core/include/cycles/fcycle.h:
--------------------------------------------------------------------------------
 1 | #ifndef __FCYCLE_H__
 2 | #define __FCYCLE_H__
 3 | template <class Matrix, class Vector>
 4 | class F_Cycle {
 5 |   public:
 6 |   inline F_Cycle(AMG_Level<Matrix,Vector> *next, const Vector& b, Vector &x) {
 7 |     if(next->isFinest())
 8 |       next->cycle(F_CYCLE,b,x);
 9 |     else {
10 |       next->cycle(W_CYCLE,b,x);
11 |       next->cycle(V_CYCLE,b,x);
12 |     }
13 |   }
14 | };
15 | #endif
16 | 


--------------------------------------------------------------------------------
/src/core/include/cycles/wcycle.h:
--------------------------------------------------------------------------------
 1 | #ifndef __WCYCLE_H__
 2 | #define __WCYCLE_H__
 3 | 
 4 | 
 5 | template <class Matrix, class Vector>
 6 | class W_Cycle {
 7 |   public:
 8 |   inline W_Cycle(AMG_Level<Matrix,Vector> *next, const Vector& b, Vector &x) {
 9 |     if(next->isFinest())
10 |       next->cycle(W_CYCLE,b,x);
11 |     else {
12 |       next->cycle(W_CYCLE,b,x);
13 |       next->cycle(W_CYCLE,b,x);
14 |     }
15 |   }
16 | };
17 | 
18 | #endif 
19 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_MIS_CPU.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   AggMIS_MIS_CPU.h
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on June 25, 2013, 6:13 PM
 6 |  */
 7 | 
 8 | #ifndef AGGMIS_MIS_CPU_H
 9 | #define	AGGMIS_MIS_CPU_H
10 | #include <AggMIS_Types.h>
11 | #include <queue>
12 | namespace AggMIS {
13 |     namespace MIS {
14 |       Types::IntVector_h* FloodFillMIS(int k, Types::Graph_h &graph);
15 |         Types::IntVector_h* NaiveMIS(int k, Types::Graph_h &graph);
16 |     }
17 | }
18 | #endif	/* AGGMIS_MIS_CPU_H */
19 | 
20 | 


--------------------------------------------------------------------------------
/src/core/include/Logger.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   Logger.h
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on July 25, 2013, 12:44 PM
 6 |  */
 7 | 
 8 | #ifndef LOGGER_H
 9 | #define	LOGGER_H
10 | 
11 | #include <string>
12 | #include <iostream>
13 | #include <fstream>
14 | inline void Log(std::string fileName, std::string output) {
15 |     std::ofstream outputFile;
16 |     outputFile.open(fileName.c_str(), std::ofstream::app);
17 |     outputFile << output << "\n";
18 |     outputFile.close();
19 | }
20 | 
21 | #endif	/* LOGGER_H */
22 | 
23 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_IOHelpers.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   AggMIS_IOHelpers.h
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on May 25, 2013, 4:13 PM
 6 |  */
 7 | 
 8 | #ifndef AGGMIS_IOHELPERS_H
 9 | #define	AGGMIS_IOHELPERS_H
10 | 
11 | #include <iostream>
12 | #include <fstream>
13 | #include <sstream>
14 | #include <vector>
15 | 
16 | namespace AggMIS {
17 |     namespace InputHelpers {        
18 |         std::string GetNonEmptyLineCIN();
19 |         int GetSingleIntegerValueCIN();
20 |         std::vector<int> GetIntegerValuesCIN();
21 |     }
22 | }
23 | 
24 | #endif	/* AGGMIS_IOHELPERS_H */
25 | 
26 | 


--------------------------------------------------------------------------------
/src/core/include/cycles/cgcycle.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CGCYCLE_H__
 2 | #define __CGCYCLE_H__
 3 | 
 4 | template <class Matrix, class Vector> class CG_Cycle;
 5 | template <class Matrix, class Vector> class CG_Flex_Cycle;
 6 | 
 7 | #include <cycles/cycle.h>
 8 | #include <amg_level.h>
 9 | #include <types.h>
10 | 
11 | template <class Matrix, class Vector>
12 |   class CG_Flex_Cycle {
13 |     public:
14 |       typedef typename Matrix::value_type ValueType; 
15 |       CG_Flex_Cycle(CycleType next_cycle, int num_iters, 
16 |         AMG_Level<Matrix_h,Vector_h> *next, const Matrix_hyb_d_CG &Aell,
17 |         const Vector_d_CG &b, Vector_d_CG &x, CGType tol, int maxiters, bool verbose = false);
18 |  };
19 | #endif 
20 | 


--------------------------------------------------------------------------------
/src/core/cuda/aggregator.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <smoothedMG/aggregators/mis.h>
 3 | /*********************************************
 4 |  * Allocates selector based on passed in type
 5 |  *********************************************/
 6 | template <class Matrix, class Vector>
 7 | Aggregator<Matrix, Vector>* Aggregator<Matrix, Vector>::allocate(int type)
 8 | {
 9 |   //if (type == 0)
10 |   //  return new MIS_Aggregator < Matrix, Vector > ;
11 |   //else
12 |   return new RandMIS_Aggregator < Matrix, Vector > ;
13 | }
14 | 
15 | /****************************************
16 |  * Explict instantiations
17 |  ***************************************/
18 | template class Aggregator < Matrix_h, Vector_h > ;
19 | template class Aggregator < Matrix_d, Vector_d > ;
20 | 


--------------------------------------------------------------------------------
/src/core/include/Helper.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   Helper.h
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on July 17, 2013, 12:24 PM
 6 |  */
 7 | 
 8 | #ifndef HELPER_H
 9 | #define	HELPER_H
10 | namespace Helper {
11 |     template <class T>
12 |     int BinarySearch(T value, T* array, int size) {
13 |         int imin = 0;
14 |         int imax = size - 1;
15 |         while (imin < imax) {
16 |             int imid = (imax + imin) / 2;
17 |             if (array[imid] < value)
18 |                 imin = imid + 1;
19 |             else
20 |                 imax = imid;
21 |         }
22 |         if (imax == imin && array[imin] == value)
23 |             return imin;
24 |         else 
25 |             return -1;
26 |     }
27 | }
28 | #endif	/* HELPER_H */
29 | 
30 | 


--------------------------------------------------------------------------------
/src/core/include/allocator.h:
--------------------------------------------------------------------------------
 1 | #ifndef __ALLOCATOR_H__
 2 | #define __ALLOCATOR_H__
 3 | 
 4 | #include <stack>
 5 | #include <map>
 6 | /***********************************************************
 7 |  * Class to allocate arrays of memory for temperary use. 
 8 |  * The allocator will hold onto the memory for the next call.
 9 |  * This allows memory like Vectors to be reused in different
10 |  * parts of the algorithm without having to store it in 
11 |  * a class and hold onto even when it isn't being used.
12 |  ***********************************************************/
13 | template<typename T>
14 | class Allocator {
15 |   typedef std::stack<T*> FreeList;
16 |   typedef std::map<int,FreeList> FreeMap;
17 |     
18 |   public:
19 |     static T* allocate(int size);
20 |     static void free(T *v,int size);
21 |     static void clear();
22 | 
23 |   private:
24 |     static FreeMap free_vars;  //a map of vector lists
25 | };
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/core/include/FEM/FEM2D.h:
--------------------------------------------------------------------------------
 1 | #ifndef __FEM2D_H__
 2 | #define __FEM2D_H__
 3 | 
 4 | #include <TriMesh.h>
 5 | #include <types.h>
 6 | #include <vector>
 7 | 
 8 | class FEM2D
 9 | {
10 | public:
11 |   FEM2D() {};
12 |   FEM2D(TriMesh* meshPtr);
13 |   void initializeWithTriMesh(TriMesh* meshPtr);
14 |   void assemble(TriMesh* meshPtr, Matrix_ell_d_CG &A, Vector_d_CG &b);
15 |   void assemble(TriMesh* meshPtr, Matrix_d_CG &A, Vector_d_CG &b);
16 |   void JacobiGLZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta);
17 |   void JacobiGRZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta);
18 |   void JacobiPoly(int degree, Vector_h_CG x, int alpha, int beta, Vector_h_CG &y);
19 |   void JacobiPolyDerivative(int degree, Vector_h_CG &x, int alpha, int beta, Vector_h_CG &y);
20 |   void JacobiGZeros(int degree, int alpha, int beta, Vector_h_CG &z);
21 | 
22 |   IdxVector_d d_tri0;
23 |   IdxVector_d d_tri1;
24 |   IdxVector_d d_tri2;
25 |   Vector_d_CG d_vx;
26 |   Vector_d_CG d_vy;
27 |   int nv;
28 |   int ne;
29 | };
30 | #endif
31 | 


--------------------------------------------------------------------------------
/src/test/sanity3D.cc:
--------------------------------------------------------------------------------
 1 | #include "gtest/gtest.h"
 2 | #include "FEMSolver.h"
 3 | TEST(SanityTests, EggCarton3D) {
 4 |   //test the egg carton
 5 |   FEMSolver cfg(std::string(TEST_DATA_DIR) + "/simple", true, true);
 6 |   float lambda = 1.f;
 7 |   //read the A matrix
 8 |   cfg.readMatlabSparseMatrix(std::string(TEST_DATA_DIR) + "/simple.mat");
 9 |   //read the b vector
10 |   Vector_h_CG b_h(cfg.getMatrixRows(), 1.0), x_h(cfg.getMatrixRows(), 0.), x_answer;
11 |   cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleb.mat", &b_h);
12 |   //solve
13 |   cfg.solveFEM(&x_h, &b_h);
14 |   //read in known answer
15 |   cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleAns.mat", &x_answer);
16 |   //look for error
17 |   double error = 0.f;
18 |   std::vector<double> x_actual;
19 |   for (int i = 0; i < cfg.getMatrixRows(); i++) {
20 |     error += (x_h[i] - x_answer[i]) * (x_h[i] - x_answer[i]);
21 |     x_actual.push_back(x_h[i]);
22 |   }
23 |   std::cout << "The error is : " << std::sqrt(error) << std::endl;
24 |   ASSERT_TRUE(std::sqrt(error) < 1.);
25 | }
26 | 


--------------------------------------------------------------------------------
/src/test/tetVol.cc:
--------------------------------------------------------------------------------
 1 | #include "gtest/gtest.h"
 2 | #include "FEMSolver.h"
 3 | TEST(SanityTests, EggCarton3D) {
 4 |   //test the egg carton
 5 |   FEMSolver cfg(std::string(TEST_DATA_DIR) + "/tetVol", true, true);
 6 |   float lambda = 1.f;
 7 |   //read the A matrix
 8 |   cfg.readMatlabSparseMatrix(std::string(TEST_DATA_DIR) + "/tetVolA.mat");
 9 |   //read the b vector
10 |   Vector_h_CG b_h(cfg.getMatrixRows(), 1.0), x_h(cfg.getMatrixRows(), 0.), x_answer;
11 |   cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/tetVolb.mat", &b_h);
12 |   //solve
13 |   cfg.solveFEM(&x_h, &b_h);
14 |   //read in known answer
15 |   cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/tetVolAns.mat", &x_answer);
16 |   //look for error
17 |   double error = 0.f;
18 |   std::vector<double> x_actual;
19 |   for (int i = 0; i < cfg.getMatrixRows(); i++) {
20 |     error += (x_h[i] - x_answer[i]) * (x_h[i] - x_answer[i]);
21 |     x_actual.push_back(x_h[i]);
22 |   }
23 |   std::cout << "The error is : " << std::sqrt(error) << std::endl;
24 |   ASSERT_TRUE(std::sqrt(error) < 25.);
25 | }
26 | 


--------------------------------------------------------------------------------
/src/test/sanity2D.cc:
--------------------------------------------------------------------------------
 1 | #include "gtest/gtest.h"
 2 | #include "FEMSolver.h"
 3 | TEST(SanityTests, EggCarton2D) {
 4 |   //test the egg carton
 5 |   FEMSolver cfg(std::string(TEST_DATA_DIR) + "/simple.ply", false, true);
 6 |   float lambda = 1.f;
 7 |   //read the A matrix
 8 |   cfg.readMatlabSparseMatrix(std::string(TEST_DATA_DIR) + "/simpleTri.mat");
 9 |   //read the b vector
10 |   Vector_h_CG b_h(cfg.getMatrixRows(), 1.0), x_h(cfg.getMatrixRows(), 0.), x_answer;
11 |   cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleTrib.mat", &b_h);
12 |   //solve
13 |   cfg.solveFEM(&x_h, &b_h);
14 |   //read in known answer
15 |   cfg.readMatlabArray(std::string(TEST_DATA_DIR) + "/simpleTriAns.mat", &x_answer);
16 |   //look for error
17 |   double error = 0.f;
18 |   std::vector<double> x_actual;
19 |   for (int i = 0; i < x_answer.size(); i++) {
20 |     error += (x_h[i] - x_answer[i]) * (x_h[i] - x_answer[i]);
21 |     x_actual.push_back(x_h[i]);
22 |   }
23 |   std::cout << "The error is : " << std::sqrt(error) << std::endl;
24 |   ASSERT_TRUE(std::sqrt(error) < 100.);
25 | }
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2014 The Scientific Computing and Imaging Institute
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/core/include/cycles/cycle.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CYCLE_H__
 2 | #define __CYCLE_H__
 3 | 
 4 | enum CycleType {V_CYCLE,W_CYCLE,F_CYCLE,K_CYCLE};
 5 | 
 6 | #include <error.h>
 7 | #include <amg_level.h>
 8 | 
 9 | template <class Matrix, class Vector> void dispatch_cycle(int num_iters, 
10 |   CycleType cycle, AMG_Level<Matrix,Vector> *level, const Vector& b, Vector &x);
11 | 
12 | #include <amg.h>
13 | 
14 | #include <cycles/vcycle.h>
15 | #include <cycles/wcycle.h>
16 | #include <cycles/fcycle.h>
17 | #include <cycles/cgcycle.h>
18 | /*******************************************************
19 |  * Dispatches the cycle that is passed in
20 |  *******************************************************/
21 | template <class Matrix, class Vector>
22 | void dispatch_cycle(int num_iters, CycleType cycle, AMG_Level<Matrix,Vector> 
23 |   *level, const Vector& b, Vector &x) {
24 |   switch(cycle) {
25 |     case V_CYCLE:
26 |       V_Cycle<Matrix,Vector>(level,b,x);
27 |       break;
28 |     case W_CYCLE:
29 |       W_Cycle<Matrix,Vector>(level,b,x);
30 |       break;
31 |     case F_CYCLE:
32 |       F_Cycle<Matrix,Vector>(level,b,x);
33 |       break;
34 |     case K_CYCLE:
35 |       CG_Flex_Cycle<Matrix,Vector>(K_CYCLE,num_iters,level,b,x);
36 |       break;
37 |     default:
38 |       FatalError("dispatch_cycle not defined for cycle type");
39 |   }
40 | }
41 | 
42 | #endif 
43 | 


--------------------------------------------------------------------------------
/src/core/cuda/smoother.cu:
--------------------------------------------------------------------------------
 1 | #include <smoothers/smoother.h>
 2 | #include <cusp/blas.h>
 3 | #include <types.h>
 4 | 
 5 | /***************************************
 6 |  * Source Definitions
 7 |  ***************************************/
 8 | 
 9 | template<class Matrix, class Vector>
10 | Smoother<Matrix,Vector>::~Smoother() {
11 | };
12 | 
13 | template<class Matrix, class Vector>
14 | void Smoother<Matrix,Vector>::smooth_with_0_initial_guess(const Matrix &A, const Vector &b, Vector &x) {
15 |   //by default set x to zero and call smooth.  smoothers can optimize this path if they wish
16 |   cusp::blas::fill(x,0);
17 |   smooth(A,b,x);  
18 | };
19 | 
20 | #include <smoothers/gauss_seidel.h>
21 | /*********************************************
22 |  * Allocates smoothers based on passed in type
23 |  *********************************************/
24 | template <class Matrix, class Vector>
25 | Smoother<Matrix, Vector>* Smoother<Matrix, Vector>::allocate(double smootherWeight,
26 |   int preInnerIters, int postInnerIters, int postRelaxes, const Matrix_d& A)
27 | {
28 |   return new gauss_seidel<Matrix, Vector>( smootherWeight,
29 |     preInnerIters, postInnerIters, postRelaxes, A);
30 | }
31 | 
32 | /****************************************
33 |  * Explict instantiations
34 |  ***************************************/
35 | //template class Smoother<Matrix_h,Vector_h>;
36 | template class Smoother<Matrix_d,Vector_d>;
37 | 
38 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_MIS_GPU.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File:   AggMIS_MIS_GPU.h
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on April 17, 2013, 12:49 PM
 6 |  */
 7 | 
 8 | #ifndef AGGMIS_MIS_GPU_H
 9 | #define	AGGMIS_MIS_GPU_H
10 | #include "AggMIS_Types.h"
11 | namespace AggMIS {
12 |   namespace MIS {
13 |     namespace Kernels {
14 |       __global__ void GenerateRandoms(int size,
15 |         int iterations,
16 |         unsigned int *randoms,
17 |         unsigned int *seeds);
18 |       __global__ void PreInitialize(int size,
19 |         unsigned int *randoms,
20 |         int *bestSeen,
21 |         int *origin,
22 |         int *mis);
23 |       __global__ void Initialize(int size,
24 |         unsigned int *randoms,
25 |         int *bestSeen,
26 |         int *origin,
27 |         int *mis,
28 |         int *incomplete);
29 |       __global__ void Iterate(int size,
30 |         int *originIn,
31 |         int *originOut,
32 |         int *bestSeenIn,
33 |         int *bestSeenOut,
34 |         int *adjIndexes,
35 |         int *adjacency);
36 |       __global__ void Finalize(int size,
37 |         int *originIn,
38 |         int *originOut,
39 |         int *bestSeenIn,
40 |         int *bestSeenOut,
41 |         int *adjIndexes,
42 |         int *adjacency,
43 |         int *mis,
44 |         int *incomplete);
45 |     }
46 |     Types::IntVector_d* RandomizedMIS(int k, Types::Graph_d &graph);
47 |     bool IsValidKMIS(Types::IntVector_d &misIn, 
48 |       Types::Graph_d &graph, int k, bool verbose);
49 |   }
50 | }
51 | 
52 | 
53 | #endif	/* AGGMIS_MIS_GPU_H */
54 | 
55 | 


--------------------------------------------------------------------------------
/src/core/include/FEM/FEM3D.h:
--------------------------------------------------------------------------------
 1 | #ifndef __FEM3D_H__
 2 | #define __FEM3D_H__
 3 | 
 4 | #include <tetmesh.h>
 5 | #include <types.h>
 6 | #define DEGREE 4
 7 | 
 8 | class FEM3D
 9 | {
10 | public:
11 |   FEM3D() {};
12 |   FEM3D(TetMesh* meshPtr);
13 |   void initializeWithTetMesh(TetMesh* meshPtr);
14 |   void assemble(TetMesh* meshPtr, Matrix_ell_d_CG &A, Vector_d_CG &b, bool isdevice);
15 |   void JacobiGLZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta);
16 |   void JacobiGRZW(Vector_h_CG& Z, Vector_h_CG& weight, int degree, int alpha, int beta);
17 |   void JacobiPoly(int degree, Vector_h_CG x, int alpha, int beta, Vector_h_CG &y);
18 |   void JacobiPolyDerivative(int degree, Vector_h_CG &x, int alpha, int beta, Vector_h_CG &y);
19 |   void JacobiGZeros(int degree, int alpha, int beta, Vector_h_CG &z);
20 |   void Transform2StdTetSpace(const Vector_h_CG &z_x, const Vector_h_CG &z_y, const Vector_h_CG &z_z, CGType(*VecXYZ)[DEGREE][DEGREE][3]);
21 |   void EvalBasisTet(CGType(*coefmatBaseTet)[4], const CGType(*qdTet)[DEGREE][DEGREE][3], CGType(*phiTet)[DEGREE][DEGREE][4]);
22 |   void IntegrationInTet(Vector_h_CG &phi, Vector_h_CG &weight_x, Vector_h_CG &weight_y, Vector_h_CG &weight_z, Vector_h_CG &integralMass);
23 |   CGType Integration_Quadrilateral_3d(Matrix_ell_d_CG::value_type(*fx)[DEGREE][DEGREE], Vector_h_CG &w_x, Vector_h_CG &w_y, Vector_h_CG &w_z);
24 | 
25 |   IdxVector_d d_tri0;
26 |   IdxVector_d d_tri1;
27 |   IdxVector_d d_tri2;
28 |   IdxVector_d d_tri3;
29 |   Vector_d_CG d_vx;
30 |   Vector_d_CG d_vy;
31 |   Vector_d_CG d_vz;
32 |   int nv;
33 |   int ne;
34 | };
35 | #endif
36 | 


--------------------------------------------------------------------------------
/src/core/cuda/allocator.cu:
--------------------------------------------------------------------------------
 1 | #include <allocator.h>
 2 | 
 3 | #include<types.h>
 4 | 
 5 | #include <stack>
 6 | #include <map>
 7 | 
 8 | template<typename T>
 9 | Allocator<T>::FreeMap Allocator<T>::free_vars; 
10 | 
11 | template<typename T> 
12 | inline T* allocate(int size) {
13 |   return new T(size);
14 | };
15 | 
16 | template<>
17 | inline int* allocate<int>(int size) {
18 |   return new int[size];
19 | }
20 | 
21 | template<typename T>
22 | T* Allocator<T>::allocate(int size) {
23 |   //locate free var list for the right size
24 |   FreeList &f_vars=free_vars[size];
25 | 
26 |   T *v;
27 |   if(f_vars.empty()) //if there are no free vectors
28 |   {
29 |     //create a new vector
30 |     v=::allocate<T>(size);
31 |   }
32 |   else {
33 |     //set the return value to the previously freed vector
34 |     v=f_vars.top();
35 |     //remove the vector from the free vector list
36 |     f_vars.pop();
37 |   }
38 |   return v;
39 | }
40 | 
41 | template<typename T>
42 | void Allocator<T>::free(T* v,int size) {
43 |   //add the vector to the free vector list
44 |   free_vars[size].push(v);
45 | }
46 | 
47 | template<typename T>
48 | void Allocator<T>::clear() {
49 |   for(typename FreeMap::iterator m_iter=free_vars.begin();m_iter!=free_vars.end();m_iter++)
50 |   {
51 |     FreeList &stack=m_iter->second;
52 |     while(!stack.empty()) {
53 |       delete stack.top();
54 |       stack.pop();
55 |     }
56 |   }
57 |   free_vars.clear();
58 | }
59 | 
60 | /****************************************
61 |  * Explict instantiations
62 |  ***************************************/
63 | template class Allocator<Vector_h>;
64 | template class Allocator<Vector_d>;
65 | 


--------------------------------------------------------------------------------
/src/core/include/error.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MYERROR_H__
 2 | #define __MYERROR_H__
 3 | #ifndef WIN32
 4 | #include <execinfo.h>
 5 | #include <dlfcn.h>
 6 | #include <cxxabi.h>
 7 | #include <unistd.h>
 8 | #endif
 9 | #include <stdio.h>
10 | 
11 | /******************************************************
12 |  * prints the current stack trace
13 |  *****************************************************/
14 | inline void printStackTrace() {
15 | #ifndef WIN32
16 |   const int MAX_STACK=30;
17 |   size_t n;
18 |   static void *addresses[MAX_STACK];
19 |   n=backtrace(addresses,MAX_STACK);
20 | 
21 |   if(n<2)
22 |     return;
23 | 
24 |   char **names=backtrace_symbols( addresses, n );
25 | 
26 |   printf("Backtrace for pid %d:\n",getpid());
27 |   
28 |   for(int i=1;i<n;i++)
29 |   {
30 |     Dl_info info;
31 |     char *demangled=NULL;
32 |     //attempt to demangle the symbol
33 |     if(dladdr(addresses[i],&info) != 0)
34 |     {
35 |       if(info.dli_sname!=0) {
36 |         int stat;
37 |         demangled = abi::__cxa_demangle(info.dli_sname,0,0,&stat);
38 |         printf("    %d: %p - %s\n",i-1,addresses[i],demangled);
39 |       }
40 |       else //couldn't locate the symbol so just print the mangled name
41 |         printf("    %d: %p - %s\n",i-1,addresses[i],names[i]); 
42 |     }
43 |     else
44 |       printf("    %d: %p\n",i-1,(char*)addresses[i]);
45 |   }
46 | #endif
47 | }
48 | 
49 | /********************************************************
50 |  * Prints the error message, the stack trace, and exits
51 |  * ******************************************************/
52 | #define FatalError(s)                                               \
53 |   printf("Fatal error '%s' at %s:%d\n",s,__FILE__,__LINE__);        \
54 |   printStackTrace();                                                \
55 |   exit(1);
56 | #endif
57 | 


--------------------------------------------------------------------------------
/src/core/include/util.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MYUTIL_H__
 2 | #define __MYUTIL_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | 
 7 | /****************************************************
 8 |  * Debugging tools
 9 |  ***************************************************/
10 | template<class Vector>
11 | void printVector(const char* label, const Vector &v)
12 | {
13 |   std::cout << label << ": ";
14 |   for(int i=0;i<v.size();i++)
15 |   {
16 |     std::cout << std::setprecision(4) << std::setw(8) << v[i] << " ";    
17 |   }
18 |   std::cout << std::endl;
19 | }
20 | 
21 | template<class Matrix>
22 | void printDense(const Matrix& A)
23 | {
24 |   for(int i=0;i<A.num_rows;i++)
25 |   {
26 |     for(int j=0;j<A.num_cols;j++)
27 |     {
28 |       bool exists=false;
29 |       for(int jj=A.row_offsets[i];jj<A.row_offsets[i+1];jj++)
30 |       {
31 |         if(j==A.column_indices[jj])
32 |         {
33 |           printf("%5.2f ",A.values[jj]);
34 |           exists=true;
35 |           break;
36 |         }
37 |       }
38 |       if(!exists)
39 |         printf("   X  ");
40 |     }
41 |     printf("\n");
42 |   }
43 |   printf("\n");
44 | }
45 | 
46 | #include <fstream>
47 | template<class Matrix>
48 | void printMatrix(const Matrix& A, char* fname)
49 | {
50 |   std::ofstream fout;
51 |   fout.open(fname);
52 | 
53 |   fout << "%%MatrixMarket matrix coordinate real general" << std::endl;
54 |   fout << std::setprecision(16) << std::fixed << A.num_rows << " " << A.num_cols << " " << A.num_entries << std::endl;
55 |   for(int i=0;i<A.num_rows;i++)
56 |   {
57 |     for (int j=A.row_offsets[i];j<A.row_offsets[i+1];j++)
58 |     {
59 |       int c=A.column_indices[j];
60 |       typename Matrix::value_type v=A.values[j];
61 |       fout << i << " " << c << " " << v << std::endl;
62 |     }
63 |   }
64 |   fout.close();
65 | }
66 | #endif
67 | 


--------------------------------------------------------------------------------
/src/cuda_compute_capability.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2011 Florian Rathgeber, florian.rathgeber@gmail.com
 3 |  *
 4 |  * This code is licensed under the MIT License.  See the FindCUDA.cmake script
 5 |  * for the text of the license.
 6 |  *
 7 |  * Based on code by Christopher Bruns published on Stack Overflow (CC-BY):
 8 |  * http://stackoverflow.com/questions/2285185
 9 |  */
10 | 
11 | #include <stdio.h>
12 | #include <cuda_runtime.h>
13 | 
14 | int main() {
15 |   int deviceCount, device, major = 9999, minor = 9999;
16 |   int gpuDeviceCount = 0;
17 |   struct cudaDeviceProp properties;
18 | 
19 |   if (cudaGetDeviceCount(&deviceCount) != cudaSuccess)
20 |     {
21 |       printf("Couldn't get device count: %s\n", cudaGetErrorString(cudaGetLastError()));
22 |       return 1;
23 |     }
24 |   /* machines with no GPUs can still report one emulation device */
25 |   for (device = 0; device < deviceCount; ++device) {
26 |     cudaGetDeviceProperties(&properties, device);
27 |     if (properties.major != 9999) {/* 9999 means emulation only */
28 |       ++gpuDeviceCount;
29 |       /*  get minimum compute capability of all devices */
30 |       if (major > properties.major) {
31 |         major = properties.major;
32 |         minor = properties.minor;
33 |       } else if (minor > properties.minor) {
34 |         minor = properties.minor;
35 |       }
36 |     }
37 |   }
38 | 
39 |   /* don't just return the number of gpus, because other runtime cuda
40 |      errors can also yield non-zero return values */
41 |   if (gpuDeviceCount > 0) {
42 |     if (major == 2 && minor == 1)
43 |       {
44 |         // There is no --arch compute_21 flag for nvcc, so force minor to 0
45 |         minor = 0;
46 |       }
47 |     /* this output will be parsed by FindCUDA.cmake */
48 |     printf("%d%d", major, minor);
49 |     return 0; /* success */
50 |   }
51 |   return 1; /* failure */
52 | }


--------------------------------------------------------------------------------
/src/core/aggmis/include/Timer.h:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Timer.h
 3 | // =======
 4 | // High Resolution Timer.
 5 | // This timer is able to measure the elapsed time with 1 micro-second accuracy
 6 | // in both Windows, Linux and Unix system 
 7 | //
 8 | //  AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
 9 | // CREATED: 2003-01-13
10 | // UPDATED: 2006-01-13
11 | //
12 | // Copyright (c) 2003 Song Ho Ahn
13 | //////////////////////////////////////////////////////////////////////////////
14 | 
15 | #ifndef TIMER_H_DEF
16 | #define TIMER_H_DEF
17 | 
18 | #ifdef WIN32   // Windows system specific
19 | #include <windows.h>
20 | #else          // Unix based system specific
21 | #include <sys/time.h>
22 | #endif
23 | 
24 | 
25 | class Timer
26 | {
27 | public:
28 |     Timer();                                    // default constructor
29 |     ~Timer();                                   // default destructor
30 | 
31 |     void   start();                             // start timer
32 |     void   stop();                              // stop the timer
33 |     double getElapsedTime();                    // get elapsed time in second
34 |     double getElapsedTimeInSec();               // get elapsed time in second (same as getElapsedTime)
35 |     double getElapsedTimeInMilliSec();          // get elapsed time in milli-second
36 |     double getElapsedTimeInMicroSec();          // get elapsed time in micro-second
37 | 
38 | 
39 | protected:
40 | 
41 | 
42 | private:
43 |     double startTimeInMicroSec;                 // starting time in micro-second
44 |     double endTimeInMicroSec;                   // ending time in micro-second
45 |     int    stopped;                             // stop flag 
46 | #ifdef WIN32
47 |     LARGE_INTEGER frequency;                    // ticks per second
48 |     LARGE_INTEGER startCount;                   //
49 |     LARGE_INTEGER endCount;                     //
50 | #else
51 |     timeval startCount;                         //
52 |     timeval endCount;                           //
53 | #endif
54 | };
55 | 
56 | #endif // TIMER_H_DEF
57 | 


--------------------------------------------------------------------------------
/src/core/include/smoothedMG/aggregators/Timer.h:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////////////
 2 | // Timer.h
 3 | // =======
 4 | // High Resolution Timer.
 5 | // This timer is able to measure the elapsed time with 1 micro-second accuracy
 6 | // in both Windows, Linux and Unix system 
 7 | //
 8 | //  AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
 9 | // CREATED: 2003-01-13
10 | // UPDATED: 2006-01-13
11 | //
12 | // Copyright (c) 2003 Song Ho Ahn
13 | //////////////////////////////////////////////////////////////////////////////
14 | 
15 | #ifndef TIMER_H_DEF
16 | #define TIMER_H_DEF
17 | 
18 | #ifdef WIN32   // Windows system specific
19 | #include <windows.h>
20 | #else          // Unix based system specific
21 | #include <sys/time.h>
22 | #endif
23 | 
24 | 
25 | class Timer
26 | {
27 | public:
28 |     Timer();                                    // default constructor
29 |     ~Timer();                                   // default destructor
30 | 
31 |     void   start();                             // start timer
32 |     void   stop();                              // stop the timer
33 |     double getElapsedTime();                    // get elapsed time in second
34 |     double getElapsedTimeInSec();               // get elapsed time in second (same as getElapsedTime)
35 |     double getElapsedTimeInMilliSec();          // get elapsed time in milli-second
36 |     double getElapsedTimeInMicroSec();          // get elapsed time in micro-second
37 | 
38 | 
39 | protected:
40 | 
41 | 
42 | private:
43 |     double startTimeInMicroSec;                 // starting time in micro-second
44 |     double endTimeInMicroSec;                   // ending time in micro-second
45 |     int    stopped;                             // stop flag 
46 | #ifdef WIN32
47 |     LARGE_INTEGER frequency;                    // ticks per second
48 |     LARGE_INTEGER startCount;                   //
49 |     LARGE_INTEGER endCount;                     //
50 | #else
51 |     timeval startCount;                         //
52 |     timeval endCount;                           //
53 | #endif
54 | };
55 | 
56 | #endif // TIMER_H_DEF
57 | 


--------------------------------------------------------------------------------
/src/core/aggmis/cuda/AggMIS_IOHelpers.cu:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   AggMIS_IOHelpers.cu
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on May 25, 2013, 4:43 PM
 6 |  */
 7 | #include "AggMIS_IOHelpers.h"
 8 | namespace AggMIS {
 9 |     namespace InputHelpers {
10 |         std::string GetNonEmptyLineCIN() {
11 |             std::string b;
12 |             char dumb;
13 |             while (std::cin.peek() == '\n')
14 |               std::cin.get(dumb);
15 |             std::getline(std::cin, b);
16 |             return b;
17 |         }
18 |         int GetSingleIntegerValuecin() {
19 |             std::string input;
20 |             char dumb;
21 |             while (true)
22 |             {
23 |               while (std::cin.peek() == '\n')
24 |                     std::cin.get(dumb);
25 |                 std::getline(std::cin, input);
26 |                 std::stringstream str(input);
27 |                 int result;
28 |                 if (str >> result)
29 |                     return result;
30 |                 std::cout << "Please enter a number\n:";
31 |             }
32 |         }
33 |         std::vector<int> GetIntegerValuescin() {
34 |             std::string input;
35 |             char dumb;
36 |             int value;
37 |             std::vector<int> values;
38 |             while (true)
39 |             {
40 |                 while (std::cin.peek() == '\n')
41 |                     std::cin.get(dumb);
42 |                 std::getline(std::cin, input);
43 |                 std::stringstream stream(input);
44 |                 while(!stream.eof())
45 |                 {
46 |                     if (stream >> value)
47 |                         values.push_back(value);
48 |                     else
49 |                     {
50 |                         stream.clear();
51 |                         std::string dumber;
52 |                         stream >> dumber;
53 |                     }
54 |                 }
55 |                 if (values.size() > 0)
56 |                     return values;
57 |                 std::cout << "Please enter at least one number\n:";
58 |             }
59 |         }       
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # gtest external download
 2 | set(GTEST_INSTALL_DIR "${EXTERNAL_DIR}/gtest")
 3 | if (WIN32)
 4 |    set(GTEST_SHARED_ARG ON)
 5 | else()
 6 |    set(GTEST_SHARED_ARG OFF)
 7 | endif()
 8 | 
 9 | ExternalProject_Add( gtest
10 |   PREFIX "${GTEST_INSTALL_DIR}"
11 |   GIT_REPOSITORY "https://github.com/google/googletest.git"
12 |   INSTALL_DIR "${GTEST_INSTALL_DIR}/include"
13 |   BINARY_DIR "${GTEST_INSTALL_DIR}/lib"
14 |   INSTALL_COMMAND ""
15 |   CMAKE_ARGS 
16 |     -Dgtest_build_tests:BOOL=OFF
17 |     -DBUILD_GMOCK:BOOL=OFF
18 |     -DBUILD_GTEST:BOOL=ON
19 |     -Dgtest_force_shared_crt:BOOL=${GTEST_SHARED_ARG}
20 | )                   
21 | ExternalProject_Add_Step( gtest copy_deps
22 |   COMMAND ${CMAKE_COMMAND} -E copy_directory ${GTEST_INSTALL_DIR}/src/gtest/googletest/include ${GTEST_INSTALL_DIR}/include
23 |   DEPENDEES install
24 | )
25 | include_directories("${GTEST_INSTALL_DIR}/include")
26 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../core2d/include)
27 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../core3d/include)
28 | SET(GTEST_LIBRARY "${GTEST_INSTALL_DIR}/lib/googletest/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX};${GTEST_INSTALL_DIR}/lib/googletest/${CMAKE_STATIC_LIBRARY_PREFIX}gtest_main${CMAKE_STATIC_LIBRARY_SUFFIX}")
29 | ########################################################################
30 | 
31 | # Definitions for Tests
32 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test/bin)
33 | add_definitions(-DTEST_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/test_data/")
34 | include_directories(${CMAKE_SOURCE_DIR}/core/include)
35 | include_directories(${CMAKE_SOURCE_DIR})
36 | #test macro
37 | function(NEWTEST name)
38 |   add_executable(${name} ${name}.cc)
39 |   if (NOT WIN32)
40 |     target_link_libraries(${name} ${GTEST_LIBRARY} FEM_SOLVER FEM_CORE -lpthread)
41 |   else ()
42 |     target_link_libraries(${name} ${GTEST_LIBRARY} FEM_SOLVER FEM_CORE )
43 |   endif()
44 |   add_test(${name} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${name}) 
45 |   add_dependencies(${name} gtest FEM_SOLVER FEM_CORE)
46 | endfunction()
47 | #the tests
48 | newtest(sanity2D)
49 | newtest(sanity3D)
50 | newtest(tetVol)
51 | 


--------------------------------------------------------------------------------
/src/core/include/smoothedMG/smoothedMG_amg_level.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SMOOTHEDMG_AMG_LEVEL_H__
 2 | #define __SMOOTHEDMG_AMG_LEVEL_H__
 3 | 
 4 | template <class Matrix, class Vector> class SmoothedMG_AMG_Level;
 5 | 
 6 | #include <amg_level.h>
 7 | #include <smoothedMG/aggregators/aggregator.h>
 8 | #include <cusp/multiply.h>
 9 | #include <cusp/precond/aggregation/smooth.h>
10 | #include <cusp/transpose.h>
11 | 
12 | /***************************************************
13 |  * Classical AMG Base Class
14 |  *  Defines the AMG solve algorithm, decendents must
15 |  *  define markCoarseFinePoints() and
16 |  *  generateInterpoloationMatrix()
17 |  **************************************************/
18 | template <class Matrix, class Vector>
19 | class SmoothedMG_AMG_Level : public AMG_Level<Matrix, Vector>
20 | {
21 |    friend class AMG<Matrix, Vector>;
22 |    typedef typename Matrix::value_type ValueType;
23 |    typedef typename Matrix::index_type IndexType;
24 |    typedef typename Matrix::memory_space MemorySpace;
25 |    public:
26 |    SmoothedMG_AMG_Level(AMG<Matrix, Vector> *amg);
27 |    ~SmoothedMG_AMG_Level();
28 | 
29 |    //  void setup();
30 |    void createNextLevel(bool verbose = false);
31 |    void restrictResidual(const Vector &r, Vector &rr);
32 |    void prolongateAndApplyCorrection(const Vector &c, Vector &x, Vector &tmp);
33 | 
34 |    protected:
35 | 
36 |    void generateMatrixCsr(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel);
37 |    void generateMatrixSymmetric_d(IdxVector_d &permutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, bool verbose = false);
38 |    void generateProlongatorFull_d(IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx);
39 |    void computeProlongationOperator();
40 |    void computeRestrictionOperator();
41 |    void generateNextLevelMatrixFull_d(bool verbose = false);
42 |    void computeAOperator();
43 | 
44 |    Matrix P, R;
45 |    Matrix_coo_d P_d, R_d;
46 |    Matrix_coo_h Acoo;
47 |    Matrix_coo_d Acoo_d;
48 | 
49 |    Matrix_coo_h AinCoo;
50 | 
51 |    Aggregator<Matrix, Vector>* aggregator;
52 |    IdxVector_h aggregateIdx;
53 |    IdxVector_h partitionIdx;
54 |    IdxVector_h permutation_h;
55 |    IdxVector_h ipermutation_h;
56 | };
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/core/include/smoothedMG/aggregators/aggregator.h:
--------------------------------------------------------------------------------
 1 | #ifndef __AGGREGATOR_H__
 2 | #define __AGGREGATOR_H__
 3 | template <class Matrix, class Vector> class Aggregator;
 4 | 
 5 | #include <error.h>
 6 | #include <types.h>
 7 | #include <TriMesh.h>
 8 | #include <tetmesh.h>
 9 | 
10 | template <class Matrix, class Vector>
11 | class Aggregator
12 | {
13 |   typedef typename Matrix::value_type ValueType;
14 | 
15 |   public:
16 |   virtual void computePermutation(TriMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize) = 0;
17 |   virtual void computePermutation(TetMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize) = 0;
18 |   virtual void computePermutation(int nn, int* xadj, int* adjncy, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize) = 0;
19 |   virtual void computePermutation_d(IdxVector_d &adjIndexesIn, IdxVector_d &adjacencyIn, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false) = 0;
20 |   virtual void computePermutation_d(TriMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false) = 0;
21 |   virtual void computePermutation_d(TetMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false) = 0;
22 | 
23 |   virtual ~Aggregator()
24 |   {
25 |   }
26 |   static Aggregator<Matrix, Vector>* allocate(int type);
27 | 
28 | };
29 | #endif
30 | 


--------------------------------------------------------------------------------
/src/core/include/types.h:
--------------------------------------------------------------------------------
 1 | #ifndef __TYPES_H__
 2 | #define __TYPES_H__
 3 | #include <cusp/csr_matrix.h>
 4 | #include <cusp/hyb_matrix.h>
 5 | #include <cusp/array1d.h>
 6 | 
 7 | typedef double CGType;
 8 | //typedef float  CGType;
 9 | typedef float AMGType;
10 | typedef double AssembleType;
11 | 
12 | template<typename IndexType, typename ValueType, class MemorySpace>
13 | class myEll
14 | {
15 |   IndexType num_rows;
16 |   IndexType num_entries;
17 |   cusp::array1d<IndexType, MemorySpace> column_indices;
18 | 	cusp::array1d<ValueType, MemorySpace> values;
19 | };
20 | 
21 | typedef myEll<int, CGType, cusp::host_memory> myEll_h_CG;
22 | typedef myEll<int, CGType, cusp::device_memory> myEll_d_CG;
23 | 
24 | 
25 | typedef cusp::csr_matrix<int, CGType, cusp::host_memory> Matrix_h_CG;
26 | typedef cusp::csr_matrix<int, CGType, cusp::device_memory> Matrix_d_CG;
27 | 
28 | typedef cusp::array1d<CGType, cusp::host_memory> Vector_h_CG;
29 | typedef cusp::array1d<CGType, cusp::device_memory> Vector_d_CG;
30 | 
31 | 
32 | typedef cusp::ell_matrix<int, CGType, cusp::device_memory> Matrix_ell_d_CG;
33 | typedef cusp::ell_matrix<int, CGType, cusp::host_memory> Matrix_ell_h_CG;
34 | 
35 | typedef cusp::coo_matrix<int, CGType, cusp::device_memory> Matrix_coo_d_CG;
36 | typedef cusp::coo_matrix<int, CGType, cusp::host_memory> Matrix_coo_h_CG;
37 | 
38 | typedef cusp::hyb_matrix<int, CGType, cusp::device_memory> Matrix_hyb_d_CG;
39 | typedef cusp::hyb_matrix<int, CGType, cusp::host_memory> Matrix_hyb_h_CG;
40 | 
41 | typedef cusp::csr_matrix<int, AMGType, cusp::host_memory> Matrix_h;
42 | typedef cusp::csr_matrix<int, AMGType, cusp::device_memory> Matrix_d;
43 | 
44 | typedef cusp::array1d<AMGType, cusp::host_memory> Vector_h;
45 | typedef cusp::array1d<AMGType, cusp::device_memory> Vector_d;
46 | 
47 | typedef cusp::array1d<int, cusp::host_memory> IdxVector_h;
48 | typedef cusp::array1d<int, cusp::device_memory> IdxVector_d;
49 | 
50 | typedef cusp::ell_matrix<int, AMGType, cusp::device_memory> Matrix_ell_d;
51 | typedef cusp::ell_matrix<int, AMGType, cusp::host_memory> Matrix_ell_h;
52 | 
53 | typedef cusp::coo_matrix<int, AMGType, cusp::device_memory> Matrix_coo_d;
54 | typedef cusp::coo_matrix<int, AMGType, cusp::host_memory> Matrix_coo_h;
55 | 
56 | typedef cusp::hyb_matrix<int, AMGType, cusp::device_memory> Matrix_hyb_d;
57 | typedef cusp::hyb_matrix<int, AMGType, cusp::host_memory> Matrix_hyb_h;
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_Metrics.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File:   AggMIS_Metrics.h
 3 |  * Author: T. James Lewis
 4 |  *
 5 |  * Created on May 1, 2013, 12:19 PM
 6 |  */
 7 | 
 8 | #ifndef AGGMIS_METRICS_H
 9 | #define	AGGMIS_METRICS_H
10 | #include "AggMIS_Types.h"
11 | #include <vector>
12 | #include <queue>
13 | #include <set>
14 | #include <map>
15 | namespace AggMIS {
16 |   namespace Metrics {
17 |     class MetricsContext {
18 |     public:
19 |       MetricsContext(Types::Graph_h &graph, Types::IntVector_h &aggregation);
20 |       double GetConvexityRatio(int aggId);
21 |       double GetEccentricityRatio(int aggId);
22 |       double GetMinimumEnclosingBallRatio(int aggId);
23 |       int GetAggregateCount();
24 | 
25 |     private:
26 |       // Data structures
27 |       Types::Graph_h *graph;
28 |       Types::IntVector_h aggregation;
29 |       std::vector<std::vector<int> > aggregates;
30 |       std::vector<std::vector<int> > convexAggregates;
31 |       int currentAggregate;
32 | 
33 |       // Counters
34 |       int distanceLookups, makeConvexCalls;
35 | 
36 |       // Internal Methods
37 |       int Distance(int a, int b);
38 |       double GetEccentricityRatio(std::vector<int> &aggregate);
39 |       double GetMinimumEnclosingBallRatio(std::vector<int> &aggregate);
40 |       void MakeConvex(std::vector<int> &aggregate);
41 |       void EnsureConvex(int aggId);
42 |       std::vector<int>* FindCentroid(std::vector<int>& aggregate);
43 |       int FindMassScore(int node, std::vector<int>& aggregate);
44 |       std::map<int, int>* FindDistances(int rootNode, std::vector<int>& aggregate);
45 |       std::vector<std::vector<int> >* GetShortestPaths(int startId, int endId, 
46 |         std::map<int, int> &distances);
47 |       std::vector<std::vector<int> >* FindExternalsInPaths(std::vector<int>& aggregate, 
48 |         std::vector<std::vector<int> >* p);
49 |       bool IsPathSatisfied(std::set<int>& required,
50 |         std::vector<std::vector<int> >& pathOptions);
51 |       std::set<int>* BruteForceMinimalNodes(std::vector<
52 |         std::vector<std::vector<int> > >& pathOptions);
53 |       bool IncrementGuessVector(std::vector<int>& guess, std::vector<std::vector<
54 |         std::vector<int> > >& externalOptions);
55 | 
56 |       // Setup helpers
57 |       void Initialize();
58 |       void GetAggregates();
59 |     };
60 |   }
61 | }
62 | 
63 | #endif	/* AGGMIS_METRICS_H */
64 | 
65 | 


--------------------------------------------------------------------------------
/src/core/cuda/amg_signal.cu:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | #include <signal.h>
 3 | #include <error.h>
 4 | 
 5 | typedef void (*signal_handler)(int);
 6 | #ifndef WIN32
 7 | const int NUM_SIGS=11;
 8 | static int SIGNALS[NUM_SIGS] = {SIGINT,SIGQUIT,SIGILL,SIGABRT,SIGFPE,SIGKILL,SIGSEGV,SIGTERM,SIGPIPE,SIGUSR1,SIGUSR2};
 9 | #endif
10 | /****************************************
11 |  * converts a signal to a string
12 |  ****************************************/
13 | inline const char* getSigString(int sig) {
14 |   switch(sig)
15 |   {
16 |     case SIGINT:
17 |       return "SIGINT (interrupt)";
18 |     case SIGILL:
19 |       return "SIGILL (illegal instruction)";
20 |     case SIGABRT:
21 |       return "SIGABRT (abort)";
22 |     case SIGFPE:
23 |       return "SIGFPE (floating point exception)";
24 |     case SIGSEGV:
25 |       return "SIGSEGV (segmentation violation)";
26 |     case SIGTERM:
27 | 		return "SIGTERM (terminated)";
28 | #ifndef WIN32
29 | 	case SIGKILL:
30 | 		return "SIGKILL (killed)";
31 | 	case SIGQUIT:
32 | 		return "SIGQUIT (quit)";
33 |     case SIGPIPE:
34 |       return "SIGPIPE (broken pipe)";
35 |     case SIGUSR1:
36 |       return "SIGUSR1 (user 1)";
37 |     case SIGUSR2:
38 |       return "SIGUSR2 (user 2)";
39 | #endif
40 |     default:
41 |       return "UNKNOWN";
42 |   }
43 | }
44 | 
45 | /*****************************************
46 |  * handles the signals by printing the 
47 |  * error message, the stack, and exiting
48 |  * where appropriate
49 |  ****************************************/
50 | inline void handle_signals(int sig) {
51 |   printf("Caught signal %d - %s\n",sig,getSigString(sig));
52 |   switch(sig) {
53 |     case SIGINT:
54 |     case SIGTERM:
55 |       //don't print stack trace since the user interrupted this one
56 |       exit(1);
57 |       break;     
58 | #ifndef WIN32
59 |     case SIGUSR1:  case SIGUSR2: //user defined signal to print the backtrace but continue running
60 |       printStackTrace();
61 | 	  break;
62 | 	case SIGKILL:
63 | 	case SIGQUIT:
64 | #endif
65 |     default:
66 |       printStackTrace();
67 |       exit(1);
68 |   }
69 | }
70 | 
71 | #include <amg_signal.h>
72 | SignalHandler::SignalHandler() {
73 | #ifndef WIN32
74 |   struct sigaction action;
75 |   sigemptyset(&action.sa_mask);
76 |   action.sa_flags=0;
77 |   action.sa_handler = handle_signals;
78 |   for(int i=0;i<NUM_SIGS;i++)
79 |     sigaction(SIGNALS[i],&action,NULL);
80 | #endif
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/src/examples/example2.cu:
--------------------------------------------------------------------------------
 1 | #include "FEMSolver.h"
 2 | /**
 3 |  * SCI-Solver_FEM :: Example 2
 4 |  * This example is nearly identical to Example 1, except:
 5 |  *  1. We are using a tri mesh data set.
 6 |  */
 7 | 
 8 | int main(int argc, char** argv)
 9 | {
10 |   //Verbose option
11 |   bool verbose = false;
12 |   std::string fname = "../src/test/test_data/sphere_290verts.ply", bName = "", Aname = "";
13 |   for (int i = 0; i < argc; i++) {
14 |     if (strcmp(argv[i], "-v") == 0) {
15 |       verbose = true;
16 |     } else if (strcmp(argv[i], "-i") == 0) {
17 |       if (i + 1 >= argc) break;
18 |       fname = std::string(argv[i + 1]);
19 |       i++;
20 |     } else if (strcmp(argv[i], "-b") == 0) {
21 |       if (i + 1 >= argc) break;
22 |       bName = std::string(argv[i + 1]);
23 |       i++;
24 |     } else if (strcmp(argv[i], "-A") == 0) {
25 |       if (i + 1 >= argc) break;
26 |       Aname = std::string(argv[i + 1]);
27 |       i++;
28 |     }
29 |   }
30 |   //Our main configuration object. We will set aspects where the
31 |   // default values are not what we desire.
32 |   FEMSolver cfg(fname, false, verbose);
33 |   if (!Aname.empty()) {
34 |     //Import stiffness matrix (A)
35 |     if (cfg.readMatlabSparseMatrix(Aname) != 0)
36 |       std::cerr << "Failed to read in A matrix: " << Aname << std::endl;
37 |   }
38 |   //intialize the b matrix to ones for now.
39 |   Vector_h_CG b_h(cfg.getMatrixRows(), 1.0);
40 |   if (!bName.empty()) {
41 |     //Import right-hand-side single-column array (b)
42 |     if (cfg.readMatlabArray(bName, &b_h) != 0)
43 |       std::cerr << "Failed to read in b array: " << bName << std::endl;
44 |   }
45 |   //The answer vector.
46 |   Vector_h_CG x_h(cfg.getMatrixRows(), 0.0); //intial X vector
47 |   //The final call to the solver
48 |   cfg.solveFEM(&x_h, &b_h);
49 |   //At this point, you can do what you need with the matrices.
50 |   cfg.writeMatlabArray("output.mat", x_h);
51 |   //write the VTK
52 |   std::vector<double> vals;
53 |   for (size_t i = 0; i < x_h.size(); i++){
54 |     vals.push_back(x_h[i]);
55 |   }
56 |   int pos = cfg.filename_.find_last_of("/");
57 |   if (pos == std::string::npos)
58 |     pos = cfg.filename_.find_last_of("\\");
59 |   std::string outname = cfg.filename_.substr(pos + 1,
60 |     cfg.filename_.size() - 1);
61 |   pos = outname.find_last_of(".");
62 |   outname = outname.substr(0, pos);
63 |   cfg.writeVTK(vals, outname);
64 |   return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/src/core/cuda/cgcycle.cu:
--------------------------------------------------------------------------------
 1 | #include <allocator.h>
 2 | #include <cycles/cgcycle.h>
 3 | #include <cusp/multiply.h>
 4 | #include <cusp/blas.h>
 5 | 
 6 | template <class Matrix, class Vector>
 7 | CG_Flex_Cycle<Matrix, Vector>::CG_Flex_Cycle(CycleType next_cycle, int num_iters, AMG_Level<Matrix_h, Vector_h> *next, const Matrix_hyb_d_CG &Aell, const Vector_d_CG &b, Vector_d_CG &x, CGType tol, int maxiters, bool verbose)
 8 | {
 9 | 
10 |    typedef typename Matrix::value_type ValueType;
11 |    typedef typename Matrix::index_type IndexType;
12 |    typedef typename Matrix::memory_space MemorySpace;
13 | 
14 | 
15 |    int N = b.size();
16 |    ValueType bnorm = cusp::blas::nrm2(b);
17 |    Vector_d_CG y(N);
18 | 
19 |    Vector_d_CG z(N);
20 |    Vector_d_CG r(N);
21 |    Vector_d_CG d(N);
22 |    Vector_d_CG p(N);
23 | 
24 |    cusp::multiply(Aell, x, y);
25 |    cusp::blas::axpby(b, y, r, ValueType(1), ValueType(-1));
26 |    next->cycle_level0(next_cycle, r, z);
27 |    cusp::blas::copy(z, p);
28 | 
29 |    ValueType rzold = cusp::blas::dotc(r, z);
30 |    ValueType rznew;
31 | 
32 |    int niter = 0;
33 |    double iter_start, iter_stop;
34 |    iter_start = CLOCK();
35 |    while(niter < maxiters)
36 |    {
37 | 
38 |       cusp::multiply(Aell, p, y);
39 |       ValueType yp = cusp::blas::dotc(y, p);
40 |       ValueType alpha = rzold / yp;
41 |       cusp::blas::axpy(p, x, alpha);
42 |       cusp::blas::axpy(y, r, -alpha);
43 |       ValueType normr = cusp::blas::nrm2(r);
44 |       if (verbose)
45 |          std::cout << "normr=" << std::scientific << normr << "  niter=" << niter << std::endl;
46 | 
47 |       if( (normr / bnorm) <= tol)
48 |          break;
49 | 
50 |       niter++;
51 |       next->cycle_level0(next_cycle, r, z, verbose);
52 |       rznew = cusp::blas::dotc(z, r);
53 |       ValueType beta = rznew / rzold;
54 |       cusp::blas::axpby(z, p, p, ValueType(1), beta);
55 |       rzold = rznew;
56 |    }
57 |    cudaThreadSynchronize();
58 |    iter_stop = CLOCK();
59 |    if (verbose) {
60 |       std::cout << "average time per iteration:        " << (iter_stop-iter_start) / niter << std::endl;
61 |       std::cout << "total solve time:        " << (iter_stop-iter_start) << std::endl;
62 |    }
63 | 
64 |    y.clear();
65 |    z.clear();
66 |    r.clear();
67 |    d.clear();
68 |    p.clear();
69 | }
70 | 
71 | /****************************************
72 |  * Explict instantiations
73 |  ***************************************/
74 | template class CG_Flex_Cycle<Matrix_h_CG, Vector_h_CG>;
75 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | project(FEM_Solver)
 3 | 
 4 | if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
 5 |   add_definitions(-D_CRT_SECURE_NO_WARNINGS)
 6 | endif()
 7 | 
 8 | SET(BUILD_EXAMPLES false CACHE BOOL "Build sample executables")
 9 | 
10 | find_package(CUDA REQUIRED)
11 | include_directories(${CUDA_INCLUDE_DIRS})
12 | ###################
13 | # Externals
14 | include(ExternalProject)
15 | set(EXTERNAL_DIR "${CMAKE_BINARY_DIR}/externals")
16 | # Metis
17 | set(METIS_INSTALL_DIR "${EXTERNAL_DIR}/metis")
18 | ExternalProject_Add( metis
19 |   PREFIX "${METIS_INSTALL_DIR}"
20 |   GIT_REPOSITORY "https://github.com/cibc-internal/metis-4.0.3.git"
21 |   INSTALL_DIR "${METIS_INSTALL_DIR}/include"
22 |   BINARY_DIR "${METIS_INSTALL_DIR}/lib"
23 |   INSTALL_COMMAND ""
24 |   )
25 | ExternalProject_Add_Step( metis copy_deps
26 |   COMMAND ${CMAKE_COMMAND} -E copy_directory ${METIS_INSTALL_DIR}/src/metis/Lib ${METIS_INSTALL_DIR}/include
27 |   DEPENDEES install
28 | )
29 | include_directories("${METIS_INSTALL_DIR}/include")
30 | SET(METIS_LIBRARY "${METIS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}metis${CMAKE_STATIC_LIBRARY_SUFFIX}")
31 | # cusp
32 | set(CUSP_INSTALL_DIR "${EXTERNAL_DIR}/cusp")
33 | ExternalProject_Add( cusp
34 |   PREFIX "${CUSP_INSTALL_DIR}"
35 |   GIT_REPOSITORY "https://github.com/cibc-internal/cusp.git"
36 |   INSTALL_DIR "${CUSP_INSTALL_DIR}/include"
37 |   BINARY_DIR "${CUSP_INSTALL_DIR}/lib"
38 |   INSTALL_COMMAND ""
39 |   BUILD_COMMAND ""
40 |   CONFIGURE_COMMAND ""
41 |   )
42 | include_directories("${CUSP_INSTALL_DIR}/src")
43 | ###################
44 | # Core / Examples
45 | add_subdirectory(core)
46 | 
47 | include_directories(core/include)
48 | include_directories(${CMAKE_CURRENT_DIRECTORY})
49 | 
50 | CUDA_ADD_LIBRARY(FEM_SOLVER FEMSolver.cu FEMSolver.h)
51 | TARGET_LINK_LIBRARIES(FEM_SOLVER FEM_CORE)
52 | ADD_DEPENDENCIES(FEM_SOLVER FEM_CORE)
53 | 
54 | if (${BUILD_EXAMPLES})
55 |   add_subdirectory(examples)
56 | endif()
57 | ###################
58 | # Testing
59 | INCLUDE(CTest)
60 | if (BUILD_TESTING)
61 |   ENABLE_TESTING()
62 |   # Use gcov for code coverage (useful for seeing test completeness)
63 |   OPTION(USE_GCOV "Enable code coverage for GNUCXX Platforms" OFF)
64 |   if (USE_GCOV)
65 |     if(CMAKE_COMPILER_IS_GNUCXX)
66 |       INCLUDE(${PROJECT_SOURCE_DIR}/CodeCoverage.cmake)
67 |       SET(CMAKE_CXX_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage")
68 |       SET(CMAKE_C_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage")
69 |     endif()
70 |   endif()
71 |   ADD_SUBDIRECTORY(test)
72 | endif()
73 | 


--------------------------------------------------------------------------------
/src/examples/example1.cu:
--------------------------------------------------------------------------------
 1 | #include "FEMSolver.h"
 2 | /**
 3 |  * SCI-Solver_FEM :: Example 1
 4 |  * This example is the basic steps for running the solver:
 5 |  *  1. We define our main FEMSolver object.
 6 |  *  2. We set all of the parameters we want. (Otherwise defaults used.)
 7 |  *  3. We read in our input data mesh.
 8 |  *  4. We declare all the variables we need for the solver (matrices).
 9 |  *  5. We invoke the "setupFEM" call, which does all of the work.
10 |  */
11 | 
12 | int main(int argc, char** argv)
13 | {
14 |   //option
15 |   std::string Aname = "", bName, fname = "../src/test/test_data/CubeMesh_size256step16";
16 |   bool verbose = false;
17 |   for (int i = 0; i < argc; i++) {
18 |     if (strcmp(argv[i], "-v") == 0) {
19 |       verbose = true;
20 |     } else if (strcmp(argv[i], "-i") == 0) {
21 |       if (i + 1 >= argc) break;
22 |       fname = std::string(argv[i + 1]);
23 |       i++;
24 |     } else if (strcmp(argv[i], "-b") == 0) {
25 |       if (i + 1 >= argc) break;
26 |       bName = std::string(argv[i + 1]);
27 |       i++;
28 |     } else if (strcmp(argv[i], "-A") == 0) {
29 |       if (i + 1 >= argc) break;
30 |       Aname = std::string(argv[i + 1]);
31 |       i++;
32 |     }
33 |   }
34 |   //Our main configuration object. We will set aspects where the
35 |   // default values are not what we desire.
36 |   FEMSolver cfg(fname, true, verbose);
37 |   if (!Aname.empty()) {
38 |     //Import stiffness matrix (A)
39 |     if (cfg.readMatlabSparseMatrix(Aname) != 0)
40 |       std::cerr << "Failed to read in A matrix: " << Aname << std::endl;
41 |   }
42 |   //intialize the b matrix to ones for now.
43 |   Vector_h_CG b_h(cfg.getMatrixRows(), 1.0);
44 |   if (!bName.empty()) {
45 |     //Import right-hand-side single-column array (b)
46 |     if (cfg.readMatlabArray(bName, &b_h) != 0)
47 |       std::cerr << "Failed to read in b array: " << bName << std::endl;
48 |   }
49 |   //The answer vector.
50 |   Vector_h_CG x_h(cfg.getMatrixRows(), 0.0); //intial X vector
51 |   //The final call to the solver
52 |   cfg.solveFEM(&x_h, &b_h);
53 |   //At this point, you can do what you need with the matrices.
54 |   if (cfg.writeMatlabArray("output.mat", x_h)) {
55 | 	  std::cerr << "failed to write matlab file." << std::endl;
56 |   }
57 |   //write the VTK
58 |   std::vector<double> vals;
59 |   for (size_t i = 0; i < x_h.size(); i++){
60 |     vals.push_back(x_h[i]);
61 |   }
62 |   int pos = cfg.filename_.find_last_of("/");
63 |   if (pos == std::string::npos)
64 |     pos = cfg.filename_.find_last_of("\\");
65 |   std::string outname = cfg.filename_.substr(pos + 1,
66 |     cfg.filename_.size() - 1);
67 |   cfg.writeVTK(vals, outname);
68 |   return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_FileIO.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * File:   AggMIS_FileIO.h
 3 |  * Author: nachtluce
 4 |  *
 5 |  * Created on April 17, 2013, 4:23 PM
 6 |  */
 7 | 
 8 | #ifndef AGGMIS_FILEIO_H
 9 | #define	AGGMIS_FILEIO_H
10 | 
11 | #include <istd::ostream>
12 | #include <fstream>
13 | #include <sstream>
14 | #include <vector>
15 | #include "TriMesh.h"
16 | #include "tetmesh.h"
17 | #include "AggMIS_Types.h"
18 | namespace AggMIS {
19 |     namespace FileIO
20 |     {
21 |         class DataCollector {
22 |         public:
23 |             DataCollector(std::string title);
24 |             void set(std::string name, double value);
25 |             void set(std::string name, int value);
26 |             void set(std::string name, std::string value);
27 |             void set(std::string name, double value, bool keep);
28 |             void set(std::string name, int value, bool keep);
29 |             void set(std::string name, std::string value, bool keep);
30 |             void closeRow();
31 |             void blankRow();
32 |             void writeOutCSV(std::ostream *outputStream);
33 |         private:
34 |             vector<vector<std::string> > data;
35 |             vector<int> keeping;
36 |             std::string title;
37 |             bool dirty;
38 |         };
39 | 
40 |         // Takes a filename and tries to load a graph from it
41 |         // by automatically detecting the file type.
42 |         Types::Graph_h* GetGraphFromFile_Auto(std::string filename);
43 | 
44 |         // Takes an input stream and reads in a graph in text csr format
45 |         Types::Graph_h* GetGraphFromFile_CSR(std::istream *theInput);
46 | 
47 |         // Takes an input stream and reads in a graph in .MSH format
48 |         Types::Graph_h* GetGraphFromFile_MSH(std::istream *theInput);
49 | 
50 |         // Takes a filename and loads the graph from a triangular mesh
51 |         // stored in .ply format using Trimesh library
52 |         Types::Graph_h* GetGraphFromFile_TriMesh(std::string filename);
53 | 
54 |         // Takes a filename and loads the graph from a tetrahedral mesh
55 |         // stored in .node/.ele format using the tetmesh library
56 |         Types::Graph_h* GetGraphFromFile_TetMesh(std::string filename);
57 | 
58 |         // Takes a filename and loads a vector from it
59 |         Types::IntVector_h* GetVectorFromFile_BIN(std::string filename);
60 | 
61 |         // Writes out the graph to the specified file in CSR format
62 |         void WriteGraphToFile_CSR(Types::Graph_h graph, std::string filename);
63 | 
64 |         // Writes out vector to the specified file
65 |         void WriteVectorToFile_BIN(Types::IntVector_h toWrite, std::string filename);
66 |     }
67 | }
68 | #endif	/* AGGMIS_FILEIO_H */
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/src/core/include/amg.h:
--------------------------------------------------------------------------------
 1 | #ifndef __AMG_H__
 2 | #define __AMG_H__
 3 | template <class Matrix, class Vector> class AMG;
 4 | 
 5 | enum SolverType {AMG_SOLVER,PCG_SOLVER};
 6 | 
 7 | enum ConvergenceType { ABSOLUTE_CONVERGENCE, RELATIVE_CONVERGENCE };
 8 | 
 9 | #include <cusp/detail/lu.h>
10 | #include <error.h>
11 | #include <cycles/cycle.h>
12 | #include <smoothedMG/smoothedMG_amg_level.h>
13 | #include "TriMesh.h"
14 | #include "tetmesh.h"
15 | 
16 | /*********************************************************
17 |  * AMG Class
18 |  *  This class provides the user interface to the AMG 
19 |  *  solver library. 
20 |  ********************************************************/
21 | template <class Matrix, class Vector>
22 | class AMG 
23 | {
24 |   typedef typename Matrix::value_type ValueType;
25 |   friend class AMG_Level<Matrix,Vector>;
26 |   friend class SmoothedMG_AMG_Level<Matrix,Vector>;
27 | 
28 |   public:
29 |     AMG(bool verbose, int convergeType, int cycleType,
30 |       int solverType, double tolerance, int cycleIters, int maxIters,
31 |       int maxLevels, int topSize, double smootherWeight,
32 |     int preInnerIters, int postInnerIters, int postRelaxes,
33 |     int dsType, int randMisParameters, int partitionMaxSize, double proOmega,
34 |     int aggregatorType, int blockSize, TriMesh* triMesh, TetMesh* tetMesh);
35 |   ~AMG();
36 | 
37 |   void solve(const Vector_d_CG &b, Vector_d_CG &x);
38 |   void solve_iteration(const Vector_d_CG &b, Vector_d_CG &x);
39 | 
40 |   void setup(const Matrix_d &Acsr_d);
41 | 
42 |   void printGridStatistics();
43 | 
44 |   // profiling & debug output
45 |   void printProfile();
46 |   void printCoarsePoints();
47 |   void printConnections();
48 |   //config parameters
49 |   bool verbose_;
50 |   ConvergenceType convergeType_;
51 |   CycleType cycleType_;
52 |   SolverType solverType_;
53 |   double tolerance_;
54 |   int cycleIters_;
55 |   int maxIters_;
56 |   int maxLevels_;
57 |   int topSize_;
58 |   double smootherWeight_;
59 |   int preInnerIters_;             // the pre inner iterations for GSINNER
60 |   int postInnerIters_;            // the post inner iterations for GSINNER
61 |   int postRelaxes_;               // the number of post relax iterations
62 |   int dsType_;
63 |   int randMisParameters_;
64 |   int partitionMaxSize_;
65 |   double proOmega_;
66 |   int aggregatorType_;
67 |   int blockSize_;
68 |   TriMesh* triMesh_;
69 |   TetMesh* tetMesh_;
70 |   private:
71 |   bool converged(const Vector &r, ValueType &nrm);
72 | 
73 | 	cusp::detail::lu_solver<ValueType, cusp::host_memory> LU;
74 | 
75 |   AMG_Level<Matrix,Vector>* fine;
76 |   ValueType initial_nrm;
77 |   int iterations;
78 |   int num_levels;
79 | 	int coarsestlevel;
80 | 
81 | 	Matrix_hyb_d_CG Ahyb_d_CG;
82 | 
83 |   double solve_start, solve_stop;
84 |   double setup_start, setup_stop;
85 | };
86 | #endif
87 | 


--------------------------------------------------------------------------------
/src/FEMSolver.h:
--------------------------------------------------------------------------------
 1 | #ifndef __FEMSOLVER_H__
 2 | #define __FEMSOLVER_H__
 3 | 
 4 | #include <cstdlib>
 5 | #include <cstdio>
 6 | #include <iostream>
 7 | #include <fstream>
 8 | #include <vector>
 9 | #include "TriMesh.h"
10 | #include "tetmesh.h"
11 | #include "types.h"
12 | 
13 | /** The class that represents all of the available options for FEM */
14 | class FEMSolver {
15 | private:
16 |   class SparseEntry_t {
17 |   public:
18 |     int32_t row_;
19 |     int32_t col_;
20 |     float   val_;
21 |     SparseEntry_t(int32_t r, int32_t c, float v) : row_(r), col_(c), val_(
22 |       static_cast<float>(v)) {}
23 |     ~SparseEntry_t() {}
24 |   };
25 |   bool InitCUDA();
26 |   static bool compare_sparse_entry(SparseEntry_t a, SparseEntry_t b);
27 | public:
28 |   FEMSolver(std::string fname = "../src/test/test_data/simple",
29 |       bool isTetMesh = true, bool verbose = false);
30 |   virtual ~FEMSolver();
31 |   void solveFEM(Vector_h_CG* x_h, Vector_h_CG* b_h);
32 |   void getMatrixFromMesh();
33 |   int readMatlabSparseMatrix(const std::string &filename);
34 |   int readMatlabArray(const std::string &filename, Vector_h_CG* rhs);
35 |   int writeMatlabArray(const std::string &filename, const Vector_h_CG &array);
36 |   void checkMatrixForValidContents(Matrix_ell_h* A_h);
37 |   void writeVTK(std::vector <double> values, std::string fname);
38 |   size_t getMatrixRows();
39 |   //data members
40 |   bool verbose_;                  // output verbosity
41 |   std::string filename_;          // mesh file name
42 |   int maxLevels_;                 // the maximum number of levels
43 |   int maxIters_;                  // the maximum solve iterations
44 |   int preInnerIters_;             // the pre inner iterations for GSINNER
45 |   int postInnerIters_;            // the post inner iterations for GSINNER
46 |   int postRelaxes_;               // the number of post relax iterations
47 |   int cycleIters_;                // the number of CG iterations per outer iteration
48 |   int dsType_;                    // data structure type
49 |   int topSize_;                   // max size of coarsest level
50 |   int randMisParameters_;         // max size of coarsest level
51 |   int partitionMaxSize_;          // max size of of the partition
52 |   int aggregatorType_;            // aggregator oldMis (0), metis bottom up (1), 
53 |                                   //   metis top down (2), aggMisGPU (3), aggMisCPU (4), newMisLight (5)
54 |   int convergeType_;              // the convergence tolerance algorithm <absolute (0)|relative (1)>
55 |   double tolerance_;              // the convergence tolerance
56 |   int cycleType_;                 // the cycle algorithm <V (0) | W (1) | F (2) | K (3)>
57 |   int solverType_;                // the solving algorithm <AMG (0) | PCG (1)>
58 |   double smootherWeight_;         // the weight parameter used in a smoother
59 |   double proOmega_;               // the weight parameter used in prolongator smoother
60 |   int device_;                    // the GPU device number to specify
61 |   int blockSize_;
62 |   //The pointers to the meshes
63 |   TetMesh * tetMesh_;
64 |   TriMesh * triMesh_;
65 |   //The A matrix used by the solver
66 |   Matrix_ell_h A_h_;
67 | };
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/src/core/include/Color.h:
--------------------------------------------------------------------------------
  1 | #ifndef COLOR_H
  2 | #define COLOR_H
  3 | /*
  4 | Szymon Rusinkiewicz
  5 | Princeton University
  6 | 
  7 | Color.h
  8 | Random class for encapsulating colors...
  9 | */
 10 | 
 11 | #include "Vec.h"
 12 | #include <cmath>
 13 | #include <algorithm>
 14 | #ifndef M_PI
 15 | # define M_PI 3.14159265358979323846
 16 | #endif
 17 | 
 18 | 
 19 | class Color : public Vec<3,double> {
 20 | public:
 21 | 	Color()
 22 | 		{}
 23 | 	Color(const Vec<3,float> &v_) : Vec<3,double>(v_)
 24 | 		{}
 25 | 	Color(const Vec<3,double> &v_) : Vec<3,double>((double)v_[0], (double)v_[1], (double)v_[2])
 26 | 		{}
 27 | 	Color(float r, float g, float b) : Vec<3,double>(r,g,b)
 28 | 		{}
 29 | 	Color(double r, double g, double b) : Vec<3,double>((double)r, (double)g, (double)b)
 30 | 		{}
 31 | 	explicit Color(const float *rgb) : Vec<3,double>(rgb[0], rgb[1], rgb[2])
 32 | 		{}
 33 | 	explicit Color(const double *rgb) : Vec<3,double>((double)rgb[0], (double)rgb[1], (double)rgb[2])
 34 | 		{}
 35 | 
 36 | 	// Implicit conversion from float would be bad, so we have an
 37 | 	// explicit constructor and an assignment statement.
 38 | 	explicit Color(float c) : Vec<3,double>(c,c,c)
 39 | 		{}
 40 | 	explicit Color(double c) : Vec<3,double>((double)c, (double)c, (double)c)
 41 | 		{}
 42 | 	Color &operator = (float c)
 43 | 		{ return *this = Color(c); }
 44 | 	Color &operator = (double c)
 45 | 		{ return *this = Color(c); }
 46 | 
 47 | 	// Assigning from ints divides by 255
 48 | 	Color(int r, int g, int b)
 49 | 	{
 50 | 		const float mult = 1.0f / 255.0f;
 51 | 		*this = Color(mult*r, mult*g, mult*b);
 52 | 	}
 53 | 	explicit Color(const int *rgb)
 54 | 		{ *this = Color(rgb[0], rgb[1], rgb[2]); }
 55 | 	explicit Color(const unsigned char *rgb)
 56 | 		{ *this = Color(rgb[0], rgb[1], rgb[2]); }
 57 | 	explicit Color(int c)
 58 | 		{ *this = Color(c,c,c); }
 59 | 	Color &operator = (int c)
 60 | 		{ return *this = Color(c); }
 61 | 
 62 | 	static Color black()
 63 | 		{ return Color(0.0f, 0.0f, 0.0f); }
 64 | 	static Color white()
 65 | 		{ return Color(1.0f, 1.0f, 1.0f); }
 66 | 	static Color red()
 67 | 		{ return Color(1.0f, 0.0f, 0.0f); }
 68 | 	static Color green()
 69 | 		{ return Color(0.0f, 1.0f, 0.0f); }
 70 | 	static Color blue()
 71 | 		{ return Color(0.0f, 0.0f, 1.0f); }
 72 | 	static Color yellow()
 73 | 		{ return Color(1.0f, 1.0f, 0.0f); }
 74 | 	static Color cyan()
 75 | 		{ return Color(0.0f, 1.0f, 1.0f); }
 76 | 	static Color magenta()
 77 | 		{ return Color(1.0f, 0.0f, 1.0f); }
 78 | 	static Color hsv(float h, float s, float v)
 79 | 	{
 80 | 		// From FvD
 81 | 		if (s <= 0.0f)
 82 | 			return Color(v,v,v);
 83 | 		h = fmod(h, float(2.0f * M_PI));
 84 | 		if (h < 0.0)
 85 | 			h += (float)(2.0 * M_PI);
 86 | 		h /= (float)(M_PI / 3.0);
 87 | 		int i = int(std::floor(h));
 88 | 		float f = h - i;
 89 | 		float p = v * (1.0f - s);
 90 | 		float q = v * (1.0f - (s*f));
 91 | 		float t = v * (1.0f - (s*(1.0f-f)));
 92 | 		switch(i) {
 93 | 			case 0: return Color(v, t, p);
 94 | 			case 1: return Color(q, v, p);
 95 | 			case 2: return Color(p, v, t);
 96 | 			case 3: return Color(p, q, v);
 97 | 			case 4: return Color(t, p, v);
 98 | 			default: return Color(v, p, q);
 99 | 		}
100 | 	}
101 | };
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/src/core/include/my_timer.h:
--------------------------------------------------------------------------------
  1 | #ifndef __MY_TIMER_H__
  2 | #define __MY_TIMER_H__
  3 | #ifdef __MACH__
  4 | #include <mach/mach_time.h>
  5 | #define CLOCK_REALTIME 0
  6 | #define CLOCK_MONOTONIC 0
  7 | int inline clock_gettime(int clk_id, struct timespec *t){
  8 |   mach_timebase_info_data_t timebase;
  9 |   mach_timebase_info(&timebase);
 10 |   uint64_t time;
 11 |   time = mach_absolute_time();
 12 |   double nseconds = ((double)time * (double)timebase.numer)/((double)timebase.denom);
 13 |   double seconds = ((double)time * (double)timebase.numer)/((double)timebase.denom * 1e9);
 14 |   t->tv_sec = seconds;
 15 |   t->tv_nsec = nseconds;
 16 |   return seconds+nseconds*1e-9;;
 17 | }
 18 | #else
 19 | #include <time.h>
 20 | #endif
 21 | 
 22 | 
 23 | /**********************************************
 24 |  * A simple high resolution timer
 25 |  *********************************************/
 26 | double inline CLOCK() {
 27 | #ifdef WIN32
 28 | #include <windows.h>
 29 | #include <stdio.h>
 30 | 	SYSTEMTIME st;
 31 | 	GetSystemTime(&st);
 32 | 	return ((st.wDay * 24. + st.wHour) * 60. + st.wMinute) * 60. + st.wSecond + st.wMilliseconds / 1000.;
 33 | #else
 34 |   timespec ts;
 35 |   clock_gettime(CLOCK_REALTIME,&ts);
 36 |   return ts.tv_sec+ts.tv_nsec*1e-9;
 37 | #endif
 38 | }
 39 | 
 40 | 
 41 | /**********************************************
 42 |  *  class for holding profiling data if desired
 43 |  *********************************************/
 44 | 
 45 | #include <vector>
 46 | #include <map>
 47 | #include <iostream>
 48 | #include <iomanip>
 49 | 
 50 | typedef std::map<const char *, double> Event;
 51 | typedef std::map<const char *, double>::iterator Eiter;
 52 | 
 53 | class levelProfile {
 54 |   private:
 55 | #ifdef PROFILE
 56 |     Event Times;
 57 |     Event Tic;
 58 | #endif
 59 | 
 60 |   public:
 61 |     levelProfile() { }
 62 |     ~levelProfile() {}
 63 | 
 64 |     inline void tic(const char *event)
 65 |     {
 66 | #ifdef PROFILE
 67 | //      cudaThreadSynchronize();
 68 |       Tic[event] = CLOCK();
 69 | #endif
 70 |     }
 71 | 
 72 |     inline void toc(const char *event) {
 73 | #ifdef PROFILE
 74 | //      cudaThreadSynchronize();
 75 |       double t = CLOCK();
 76 |       Times[event] += t-Tic[event];
 77 | #endif
 78 |     }
 79 | 
 80 | #ifdef PROFILE
 81 |     std::vector<const char *>
 82 | #else
 83 |       void
 84 | #endif
 85 |       inline getHeaders()
 86 |       {
 87 | #ifdef PROFILE
 88 |         std::vector<const char *> headerVec;
 89 |         for (Eiter it=Times.begin(); it!=Times.end(); ++it) {
 90 |           headerVec.push_back(it->first);
 91 |         }
 92 |         return headerVec;
 93 | #endif
 94 |       }
 95 | 
 96 | #ifdef PROFILE
 97 |     std::vector<double>
 98 | #else
 99 |       void
100 | #endif
101 |       inline getTimes()
102 |       {
103 | #ifdef PROFILE
104 |         std::vector<double> times;
105 |         for (Eiter it=Times.begin(); it!=Times.end(); ++it) {
106 |           times.push_back(it->second);
107 |         }
108 |         return times;
109 | #endif
110 |       }
111 | 
112 |     /********************************************
113 |      * Reset all events
114 |      *******************************************/
115 |     inline void resetTimer() {
116 | #ifdef PROFILE
117 |       for (Eiter it=Times.begin(); it!=Times.end(); ++it) {
118 |         it->second = 0.0;
119 |       }
120 | #endif
121 |     }
122 | };
123 | #endif
124 | 


--------------------------------------------------------------------------------
/src/core/include/cutil.h:
--------------------------------------------------------------------------------
  1 | #ifndef __CUTIL_H__
  2 | #define __CUTIL_H__
  3 | 
  4 | #include <TriMesh.h>
  5 | #include <tetmesh.h>
  6 | 
  7 | 
  8 | 
  9 | /**********************************************************
 10 |  * Checks for a cuda error and if one exists prints it,
 11 |  * the stack trace, and exits
 12 |  *********************************************************/
 13 | #define cudaCheckError() {                              \
 14 |   cudaError_t e=cudaGetLastError();                                 \
 15 |   char error_str[100];                                              \
 16 |   if(e!=cudaSuccess) {                                              \
 17 |     sprintf(error_str,"Cuda failure: '%s'",cudaGetErrorString(e));  \
 18 |     FatalError(error_str);                                          \
 19 |   }                                                                 \
 20 | }
 21 | 
 22 | #define cudaSafeCall(x) {(x); cudaCheckError()}
 23 | 
 24 | template <class Matrix, class Vector>
 25 | void computeResidual(const Matrix& A, const Vector& x, const Vector& b, Vector& r);
 26 | 
 27 | template<typename IndexType, typename ValueType>
 28 | __global__ void find_diag_kernel(const IndexType num_rows, const IndexType num_cols, const IndexType num_cols_per_row, const IndexType pitch,
 29 |                                  const IndexType * Aj,
 30 |                                  const ValueType* Ax,
 31 |                                  ValueType* diag)
 32 | {
 33 |   const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
 34 |   const IndexType grid_size = gridDim.x * blockDim.x;
 35 | 
 36 |   for (IndexType row = thread_id; row < num_rows; row += grid_size)
 37 |   {
 38 |     IndexType offset = row;
 39 | 
 40 |     for (IndexType n = 0; n < num_cols_per_row; n++)
 41 |     {
 42 |       const IndexType col = Aj[offset];
 43 | 
 44 |       if (col == row)
 45 |       {
 46 |         const ValueType A_ij = Ax[offset];
 47 |         diag[row] = A_ij;
 48 |       }
 49 | 
 50 |       offset += pitch;
 51 |     }
 52 |   }
 53 | }
 54 | 
 55 | /**************************************************
 56 |  * structs for converting between signed and unsigned values without 
 57 |  * type casting.
 58 |  * ************************************************/
 59 | 
 60 | /*****************************
 61 |  * Generic converter for unsigned types.
 62 |  * This becomes a no op
 63 |  *****************************/
 64 | template <class GlobalOrdinal>
 65 | struct intuint
 66 | {
 67 | 
 68 |   union
 69 |   {
 70 |     GlobalOrdinal ival;
 71 |     GlobalOrdinal uval;
 72 |   };
 73 | };
 74 | 
 75 | /***************************
 76 |  * char converter
 77 |  **************************/
 78 | template <>
 79 | struct intuint<char>
 80 | {
 81 | 
 82 |   union
 83 |   {
 84 |     char ival;
 85 |     unsigned char uval;
 86 |   };
 87 | };
 88 | 
 89 | /***************************
 90 |  * Short converter
 91 |  **************************/
 92 | template <>
 93 | struct intuint<short>
 94 | {
 95 | 
 96 |   union
 97 |   {
 98 |     short ival;
 99 |     unsigned short uval;
100 |   };
101 | };
102 | 
103 | /***************************
104 |  * Integer converter
105 |  **************************/
106 | template <>
107 | struct intuint<int>
108 | {
109 | 
110 |   union
111 |   {
112 |     int ival;
113 |     unsigned int uval;
114 |   };
115 | };
116 | 
117 | /***************************
118 |  * long converter
119 |  **************************/
120 | template <>
121 | struct intuint<long>
122 | {
123 | 
124 |   union
125 |   {
126 |     long ival;
127 |     unsigned long uval;
128 |   };
129 | };
130 | 
131 | struct metisinput
132 | {
133 |   int nn;
134 |   int* xadj;
135 |   int* adjncy;
136 | };
137 | 
138 | struct cudaCSRGraph
139 | {
140 |   int nn;
141 |   int* xadj;
142 |   int* adjncy;
143 | };
144 | 
145 | template<class Matrix>
146 | void trimesh2ell(TriMesh* meshPtr, Matrix &A);
147 | 
148 | template<class Matrix>
149 | void trimesh2csr(TriMesh* meshPtr, Matrix &A);
150 | 
151 | template<class Matrix>
152 | void tetmesh2ell(TetMesh* meshPtr, Matrix &A, bool verbose);
153 | 
154 | void convertSym2gen(Matrix_d_CG &Acsr, Matrix_coo_d_CG &Aout);
155 | #endif
156 | 


--------------------------------------------------------------------------------
/src/core/include/amg_level.h:
--------------------------------------------------------------------------------
  1 | #ifndef __AMG_LEVEL_H__
  2 | #define __AMG_LEVEL_H__
  3 | template <class Matrix, class Vector> class AMG_Level;
  4 | 
  5 | #include <amg.h>
  6 | #include <smoothers/smoother.h>
  7 | #include <cycles/cycle.h>
  8 | #include <my_timer.h>
  9 | #include <vector>
 10 | #include <types.h>
 11 | #include "TriMesh.h"
 12 | #include "tetmesh.h"
 13 | #include <cusp/print.h>
 14 | 
 15 | /********************************************************
 16 |  * AMG Level class:
 17 |  *  This class is a base class for AMG levels.  This
 18 |  *  class is a linked list of levels where each
 19 |  *  level contains the solution state for that level.
 20 |  ********************************************************/
 21 | template <class Matrix, class Vector>
 22 |     class AMG_Level
 23 | {
 24 |    friend class AMG<Matrix, Vector>;
 25 |    public:
 26 | 
 27 |    AMG_Level(AMG<Matrix, Vector> *amg) : smoother(0), amg(amg), next(0), init(false)
 28 |    {};
 29 |    virtual ~AMG_Level();
 30 | 
 31 |    virtual void restrictResidual(const Vector &r, Vector &rr) = 0;
 32 |    virtual void prolongateAndApplyCorrection(const Vector &c, Vector &x, Vector &tmp) = 0;
 33 |    virtual void createNextLevel(bool verbose = false) = 0;
 34 | 
 35 |    void setup();
 36 |    void cycle(CycleType cycle, Vector_d &b, Vector_d &x, bool verbose = false);
 37 |    void cycle_level0(CycleType cycle, Vector_d_CG &b, Vector_d_CG &x, bool verbose = false);
 38 | 
 39 |    void setInitCycle()
 40 |    {
 41 |       init = true;
 42 |    }
 43 | 
 44 |    void unsetInitCycle()
 45 |    {
 46 |       init = false;
 47 |    }
 48 | 
 49 |    int getLevel()
 50 |    {
 51 |       return level_id;
 52 |    }
 53 | 
 54 |    bool isInitCycle()
 55 |    {
 56 |       return init;
 57 |    }
 58 | 
 59 |    inline Matrix_d& getA_d()
 60 |    {
 61 |       return A_d;
 62 |    }
 63 | 
 64 |    inline bool isFinest()
 65 |    {
 66 |       return level_id == 0;
 67 |    }
 68 | 
 69 |    inline bool isCoarsest()
 70 |    {
 71 |       return next == NULL;
 72 |    }
 73 | 
 74 |    static AMG_Level<Matrix, Vector>* allocate(AMG<Matrix, Vector>*amg);
 75 | 
 76 |    protected:
 77 |    typedef typename Matrix::index_type IndexType;
 78 |    typedef typename Matrix::value_type ValueType;
 79 |    typedef typename Matrix::memory_space MemorySpace;
 80 |    levelProfile Profile;
 81 |    std::vector<int> originalRow;
 82 |    std::vector<int> getOriginalRows();
 83 | 
 84 |    protected:
 85 |    TriMesh* m_meshPtr;
 86 |    TetMesh* m_tetmeshPtr;
 87 |    int nn;
 88 |    IdxVector_h m_xadj;
 89 |    IdxVector_h m_adjncy;
 90 | 
 91 |    IdxVector_d m_xadj_d;
 92 |    IdxVector_d m_adjncy_d;
 93 | 
 94 |    int nnout;
 95 |    //  int* m_xadjout;
 96 |    //  int* m_adjncyout;
 97 |    IdxVector_h m_xadjout;
 98 |    IdxVector_h m_adjncyout;
 99 |    IdxVector_d m_xadjout_d;
100 |    IdxVector_d m_adjncyout_d;
101 | 
102 |    int largestblock;
103 |    int largestblocksize;
104 |    //  Matrix A;
105 |    Vector prolongator; //incomplete prolongator
106 |    Matrix_coo_h prolongatorFull;
107 |    Matrix_ell_h AinEll;
108 |    Matrix_h     AinCsr;
109 |    //  Matrix_coo_h AinSysCoo;
110 |    Matrix_coo_h Aout;
111 |    //  Matrix_coo_h AoutSys;
112 |    IdxVector_h partSyncIdx_h;
113 |    IdxVector_h segSyncIdx_h;
114 | 
115 | 
116 |    Vector_d prolongator_d; //incomplete prolongator
117 |    Matrix_hyb_d prolongatorFull_d;
118 |    Matrix_hyb_d restrictorFull_d;
119 |    Matrix_d A_d;
120 |    Matrix_ell_d AinEll_d;
121 |    Matrix_d AinCSR_d;
122 |    Matrix_coo_d Aout_d;
123 |    IdxVector_d AinBlockIdx_d;
124 |    IdxVector_d AoutBlockIdx_d;
125 |    Matrix_coo_d AinSysCoo_d;
126 |    Matrix_coo_d AoutSys_d;
127 |    Vector_d bc_d, xc_d, r_d;
128 |    IdxVector_d aggregateIdx_d;
129 |    IdxVector_d partitionIdx_d;
130 |    IdxVector_d permutation_d;
131 |    IdxVector_d ipermutation_d;
132 |    IdxVector_d partSyncIdx_d;
133 |    IdxVector_d segSyncIdx_d;
134 |    Smoother<Matrix_d, Vector_d>* smoother;
135 | 
136 | 
137 |    AMG<Matrix, Vector>* amg;
138 |    AMG_Level* next;
139 |    int largest_num_entries;
140 |    int largest_num_per_row;
141 |    int largest_num_segment;
142 |    int level_id;
143 |    bool init; //marks if the x vector needs to be initialized
144 | };
145 | #endif
146 | 


--------------------------------------------------------------------------------
/src/core/include/tetmesh.h:
--------------------------------------------------------------------------------
  1 | #ifndef TETMESH_H
  2 | #define TETMESH_H
  3 | /*
  4 |    TetMesh: Class for tetrahedral meshes based on TriMesh by
  5 |    Szymon Rusinkiewicz
  6 |    Princeton University
  7 | 
  8 |    TriMesh.h
  9 |    Class for triangle meshes.
 10 |    */
 11 | 
 12 | #define  LARGENUM  10000000.0
 13 | #define  SMALLNUM  0.00000001
 14 | #define  ONE       1
 15 | #define  CURVATURE 2
 16 | #define  NOISE     3
 17 | #define  SPEEDTYPE ONE
 18 | #ifndef M_PI
 19 | #define  M_PI      3.14159265358979323846
 20 | #endif
 21 | 
 22 | #include "Vec.h"
 23 | #include <math.h>
 24 | #include <vector>
 25 | #include <list>
 26 | 
 27 | class TetMesh
 28 | {
 29 | 
 30 | public:
 31 |   // Types
 32 | 
 33 |   struct Tet
 34 |   {
 35 |     int v[4];
 36 | 
 37 |     Tet()
 38 |     {
 39 |     }
 40 | 
 41 |     Tet(const int &v0, const int &v1, const int &v2, const int &v3)
 42 |     {
 43 |       v[0] = v0;
 44 |       v[1] = v1;
 45 |       v[2] = v2;
 46 |       v[3] = v3;
 47 |     }
 48 | 
 49 |     Tet(const int *v_)
 50 |     {
 51 |       v[0] = v_[0];
 52 |       v[1] = v_[1];
 53 |       v[2] = v_[2];
 54 |       v[3] = v_[3];
 55 |     }
 56 | 
 57 |     int &operator[] (int i)
 58 |     {
 59 |       return v[i];
 60 |     }
 61 | 
 62 |     const int &operator[] (int i)const
 63 |     {
 64 |       return v[i];
 65 |     }
 66 | 
 67 |     operator const int * () const
 68 |     {
 69 |       return &(v[0]);
 70 |     }
 71 | 
 72 |     operator const int * ()
 73 |     {
 74 |       return &(v[0]);
 75 |     }
 76 | 
 77 |     operator int * ()
 78 |     {
 79 |       return &(v[0]);
 80 |     }
 81 | 
 82 |     int indexof(int v_) const
 83 |     {
 84 |       return (v[0] == v_) ? 0 :
 85 |         (v[1] == v_) ? 1 :
 86 |         (v[2] == v_) ? 2 :
 87 |         (v[3] == v_) ? 3 : -1;
 88 |     }
 89 |   };
 90 | 
 91 |   // The basics: vertices and faces
 92 |   std::vector<point> vertices;
 93 |   std::vector<Tet> tets;
 94 |   std::vector<int> matlabels;
 95 |   // Connectivity structures:
 96 |   //  For each vertex, all neighboring vertices
 97 |   std::vector< std::vector<int> > neighbors;
 98 |   //  For each vertex, all neighboring faces
 99 |   std::vector< std::vector<int> > adjacenttets;
100 |   std::vector<Tet> across_face;
101 | 
102 |   std::vector<double> radiusInscribe;
103 | 
104 |   void need_meshquality();
105 | 
106 |   void need_neighbors();
107 |   void need_adjacenttets();
108 |   void need_across_face();
109 |   void need_meshinfo();
110 |   void need_Rinscribe();
111 |   void rescale(int size);
112 | 
113 |   //Tet mesh constructor
114 |   //  nodefilename: file containing the XYZ position of each node or point.
115 |   //    This must have the extension .node, and have the following
116 |   //    characteristics: ASCII text with one node per line. Values are space-
117 |   //    delimited. First line is a header line with 4 values: 'n 3 0 0'
118 |   //    where n is the total number of nodes. Subsequent lines have the
119 |   //    format 'i x y z' where i is the node number (starts at 1),
120 |   //    and xyz are floats representing the node position in 3D space.
121 |   //  elefilename: file containing the 4 nodes that define each tetrahedron.
122 |   //    This must have the extension .ele, and have the following
123 |   //    characteristics: ASCII text with one element per line. Values are
124 |   //    space delimited. First line is a header line with 3 values: 't 4 0'
125 |   //    where t is the total number of elements. Subsequent lines have the
126 |   //    format 't a b c d' where t is the element number (starts at 1),
127 |   //    and abcd are integers representing the node numbers from that file.
128 |   //  zero_based: set to true if the element numbers in the file are zero-
129 |   //    based (defaults to false).
130 |   //  verbose: set to true for verbose output
131 |   static TetMesh *read(const char *nodefilename, const char* elefilename, const bool verbose = false);
132 |   //void write(const char *filename);
133 | 
134 |   // Debugging printout, controllable by a "verbose"ness parameter
135 |   static int verbose;
136 |   static void set_verbose(int);
137 |   static int dprintf(const char *format, ...);
138 | 
139 |   //Constructor
140 | 
141 |   TetMesh()
142 |   {
143 |   }
144 | };
145 | 
146 | #endif
147 | 


--------------------------------------------------------------------------------
/src/core/include/smoothedMG/aggregators/mis.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MIS_H__
 2 | #define __MIS_H__
 3 | 
 4 | #include <TriMesh.h>
 5 | #include "types.h"
 6 | 
 7 | extern "C"
 8 | {
 9 | #include "metis.h"
10 | }
11 | 
12 | #include <vector>
13 | #include <set>
14 | template <class Matrix, class Vector> class MIS_Aggregator;
15 | template <class Matrix, class Vector> class RandMIS_Aggregator;
16 | 
17 | #include <smoothedMG/aggregators/aggregator.h>
18 | 
19 | template <class Matrix, class Vector>
20 | class MIS_Aggregator : public Aggregator<Matrix, Vector>
21 | {
22 |   typedef typename Matrix::value_type ValueType;
23 | public:
24 |   void computePermutation(TriMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize);
25 |   void computePermutation(TetMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize);
26 |   void computePermutation(int nn, int* xadj, int* adjncy, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize);
27 |   void computePermutation_d(IdxVector_d &adjIndexesIn, IdxVector_d &adjacencyIn, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false);
28 |   void computePermutation_d(TriMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false);
29 |   void computePermutation_d(TetMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false);
30 | private:
31 |   void aggregateGraphMIS(int n, int *adjIndexes, int *adjacency, int *partition, int *partCount);
32 | };
33 | 
34 | template <class Matrix, class Vector>
35 | class RandMIS_Aggregator : public Aggregator<Matrix, Vector>
36 | {
37 |   typedef typename Matrix::value_type ValueType;
38 | public:
39 |   void computePermutation(TriMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize);
40 |   void computePermutation(TetMesh* meshPtr, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitonlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize);
41 |   void computePermutation(int nn, int* xadj, int* adjncy, IdxVector_h &permutation, IdxVector_h &ipermutation, IdxVector_h &aggregateIdx, IdxVector_h &partitionIdx, int* partitionlabel, int* nnout, int* &xadjout, int* &adjncyout, int metissize);
42 |   void computePermutation_d(IdxVector_d &adjIndexesIn, IdxVector_d &adjacencyIn, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false);
43 |   void computePermutation_d(TriMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false);
44 |   void computePermutation_d(TetMesh* meshPtr, IdxVector_d &permutation, IdxVector_d &ipermutation, IdxVector_d &aggregateIdx, IdxVector_d &partitionIdx, IdxVector_d &partitionlabel, IdxVector_d &adjIndexesOut, IdxVector_d &adjacencyOut, int aggregation_type, int parameters, int part_max_size, bool verbose = false);
45 | private:
46 |   void extendedMIS(int n, int depth, int *adjIndexes, int *adjacency, int *partition, int *partCount, bool verbose = false);
47 | };
48 | #endif
49 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_Aggregation_GPU.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * File:   AggMIS_Aggregation_GPU.h
  3 |  * Author: T. James Lewis
  4 |  *
  5 |  * Created on April 19, 2013, 11:30 AM
  6 |  */
  7 | 
  8 | #ifndef AGGMIS_AGGREGATION_GPU_H
  9 | #define	AGGMIS_AGGREGATION_GPU_H
 10 | #include "AggMIS_Types.h"
 11 | #include "AggMIS_GraphHelpers.h"
 12 | #include "thrust/scan.h"
 13 | #include "thrust/count.h"
 14 | namespace AggMIS {
 15 |   namespace Aggregation {
 16 |     namespace Kernels {
 17 |       /**
 18 |        * In this kernel each node checks if it has been allocated
 19 |        * to an aggregate, if it has not, it checks to see if any
 20 |        * of its neighbors have been. If they have, it finds the
 21 |        * aggregate to which more neighbors belong to and assigns
 22 |        * itself to it.
 23 |        * @param size The number of nodes in the graph
 24 |        * @param adjIndexes The adjacency indices of the graph
 25 |        * @param adjacency The adjacency list of the graph
 26 |        * @param partIn The aggregation labeling from last cycle
 27 |        * @param partOut The aggregation labeling being found
 28 |        * @param aggregated Flags whether node has been aggregated
 29 |        */
 30 |       __global__ void allocateNodesKernel(int size,
 31 |         int *adjIndexes,
 32 |         int *adjacency,
 33 |         int *partIn,
 34 |         int *partOut,
 35 |         int *aggregated);
 36 |       /**
 37 |        * This kernel does the same flood filling that the allocate
 38 |        * kernel but only propogates between nodes of the same
 39 |        * aggregate. Used to verify that an aggregation consists of
 40 |        * aggregates which are connected components.
 41 |        * @param size Number of nodes in graph
 42 |        * @param adjIndices Graph adjacency indices
 43 |        * @param adjacency Graph adjacency
 44 |        * @param aggregation Current aggregation
 45 |        * @param valuesIn The values from last cycle
 46 |        * @param valuesOut The values to write this cycle
 47 |        * @param incomplete Flag which indicates whether done
 48 |        */
 49 |       __global__ void checkAggregationFillAggregates(int size,
 50 |         int *adjIndices,
 51 |         int *adjacency,
 52 |         int* aggregation,
 53 |         int* valuesIn,
 54 |         int* valuesOut,
 55 |         int* incomplete);
 56 |     }
 57 |     namespace Functors {
 58 |       /**
 59 |        * This functor is used to sequentially number elements
 60 |        * in a vector. Argument a is the element in the labeling
 61 |        * vector. Argument b is the element in the pre-fixed sum
 62 |        * vector of the labels. If the element is not labeled it
 63 |        * returns -1. Otherwise it returns the new label id
 64 |        */
 65 |       struct NumberParts {
 66 |         __host__ __device__
 67 |           int operator()(const int &a, const int &b) const
 68 |         {
 69 |           if (b == 0)
 70 |             return -1;
 71 | 
 72 |           return a - 1;
 73 |         }
 74 |       };
 75 |     }
 76 |     /**
 77 |      * This method allocates each node of the graph to the nearest
 78 |      * root node using simple path distance and breaking ties by
 79 |      * adjacency.
 80 |      * @param graph The input graph to be aggregated
 81 |      * @param roots The set of initial root points as a vector the size
 82 |      * of the number of graph nodes, with entries being either 0=non root
 83 |      * or 1=root
 84 |      * @return A vector with an entry for each graph node indicating which
 85 |      * zero indexed aggregate it was allocated to.
 86 |      */
 87 |     Types::IntVector_d* AggregateToNearest(Types::Graph_d &graph,
 88 |       Types::IntVector_d &roots);
 89 |     /**
 90 |      * Checks if the given labeling constitutes a valid aggregation
 91 |      * of the graph. Checks that the aggregate ID's form an
 92 |      * uninterrupted sequence starting from zero, and that each
 93 |      * aggregate is a connected component.
 94 |      * @param graph The graph
 95 |      * @param aggregation The node labeling to verify
 96 |      * @param verbose Prints more info if true
 97 |      * @return True if valid aggregation false otherwise
 98 |      */
 99 |     bool IsValidAggregation(Types::Graph_d &graph,
100 |       Types::IntVector_d &aggregation,
101 |       bool verbose);
102 |     Types::Graph_d* GetAggregateMap(Types::IntVector_d& aggregation);
103 |     //	double GetEdgeCutRatio(Graph_d &graph, IntVector_d& aggregation);
104 |   }
105 | }
106 | 
107 | #endif	/* AGGMIS_AGGREGATION_GPU_H */
108 | 
109 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_Aggregation_CPU.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * File:   AggMIS_Aggregation_CPU.h
  3 |  * Author: T. James Lewis
  4 |  *
  5 |  * Created on July 3, 2013, 4:21 PM
  6 |  */
  7 | 
  8 | #ifndef AGGMIS_AGGREGATION_CPU_H
  9 | #define	AGGMIS_AGGREGATION_CPU_H
 10 | #include <AggMIS_Types.h>
 11 | #include <Helper.h>
 12 | #include <queue>
 13 | #include <set>
 14 | #include <vector>
 15 | 
 16 | namespace AggMIS {
 17 |   namespace Aggregation {
 18 |     /**
 19 |      * This method returns an aggregation where each node in the graph is
 20 |      * assigned to the nearest root point.
 21 |      * @param graph The graph being aggregated
 22 |      * @param roots The root points for the aggregation
 23 |      * @return An array where return[i] is the ID of the aggregate to which
 24 |      * node i is assigned.
 25 |      */
 26 |     Types::IntVector_h* AggregateToNearest(Types::Graph_h &graph,
 27 |       Types::IntVector_h &roots);
 28 |     /**
 29 |      * This method checks if the given aggregation is valid.
 30 |      * @param graph The graph that was aggregated.
 31 |      * @param aggregation The aggregation array to check
 32 |      * @param verbose If true, more output will be printed.
 33 |      * @return True if aggregation is valid, False otherwise.
 34 |      */
 35 |     bool IsValidAggregation(Types::Graph_h &graph,
 36 |       Types::IntVector_h &aggregation,
 37 |       bool verbose);
 38 |     /**
 39 |      * This method returns an array where each element is the count of
 40 |      * nodes in the corresponding aggregate.
 41 |      * @param aggregation The aggregation array.
 42 |      * @return An array giving the size of each aggregate.
 43 |      */
 44 |     Types::IntVector_h* GetPartSizes(Types::IntVector_h &aggregation);
 45 |     /**
 46 |      * This method returns an array where each element is the sum of the
 47 |      * weights of all nodes in the corresponding aggregate.
 48 |      * @param aggregation The aggregation array.
 49 |      * @param nodeWeights The weights of each node.
 50 |      * @return An array giving the total weight of each aggregate.
 51 |      */
 52 |     Types::IntVector_h* GetPartSizes(Types::IntVector_h &aggregation,
 53 |       Types::IntVector_h &nodeWeights);
 54 |     /**
 55 |      * This method returns a sub-graph of the input graph with only nodes
 56 |      * contained in nodeList, and only edges between nodes both contained
 57 |      * in nodeList.
 58 |      * @param graph The graph to get sub-graph of.
 59 |      * @param nodeList The list of nodes defining the subgraph
 60 |      * @return A graph where return[i][j] is the ID of the j'th neighbor
 61 |      * of the i'th node of the graph.
 62 |      */
 63 |     std::vector<std::vector<int> >* GetAggregateGraph(Types::Graph_h &graph,
 64 |       std::vector<int> &nodeList);
 65 |     /**
 66 |      * Returns a node in the given graph with maximal path distance from the
 67 |      * specified start node.
 68 |      * @param graph The graph.
 69 |      * @param start The start node.
 70 |      * @return The ID of a node such that no other node has a higher
 71 |      * distance to the start node.
 72 |      */
 73 |     int FindFarthestNode(std::vector<std::vector<int> > &graph,
 74 |       int start);
 75 |     /**
 76 |      * Marks the distance of all nodes in the given graph from the start
 77 |      * point. After completion distances[i] will contain the distance from
 78 |      * node i to the startPoint.
 79 |      * @param graph The input graph.
 80 |      * @param distances The array to mark distances in.
 81 |      * @param startPoint The starting point.
 82 |      */
 83 |     void MarkDistances(std::vector<std::vector<int> > &graph,
 84 |       std::vector<int> &distances,
 85 |       int startPoint);
 86 |     /**
 87 |      * Marks the distance of all nodes in the given graph from the set of
 88 |      * nodes specified in startPoints. After completion distances[i] will
 89 |      * contain the distance from node i to the nearest node in startPoints.
 90 |      * @param graph
 91 |      * @param distances
 92 |      * @param startPoints
 93 |      */
 94 |     void MarkDistances(std::vector<std::vector<int> > &graph,
 95 |       std::vector<int> &distances,
 96 |       std::vector<int> startPoints);
 97 |     /**
 98 |      * Returns the set of nodes in the given graph for which the sum of all
 99 |      * distances from them to all other nodes is minimal.
100 |      * @param graph The input graph.
101 |      * @return The set of nodes in the given graph for which the sum of all
102 |      * distances from them to all other nodes is minimal.
103 |      */
104 |     int FindMassScore(std::vector<std::vector<int> > &graph,
105 |       int startPoint);
106 |     std::vector<int>* GetCentroid(std::vector<std::vector<int> > &graph,
107 |       int startPoint);
108 |   }
109 | }
110 | 
111 | #endif	/* AGGMIS_AGGREGATION_CPU_H */
112 | 
113 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_Types.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * File:   AggMIS_Types.h
  3 |  * Author: T. James Lewis
  4 |  *
  5 |  * Created on April 15, 2013, 2:18 PM
  6 |  */
  7 | 
  8 | #ifndef AGGMIS_TYPES_H
  9 | #define  AGGMIS_TYPES_H
 10 | #include "thrust/device_vector.h"
 11 | #include "thrust/host_vector.h"
 12 | #include "thrust/sort.h"
 13 | #include "thrust/functional.h"
 14 | #include "thrust/unique.h"
 15 | #include "my_timer.h"
 16 | #include <time.h>
 17 | #include <string>
 18 | #include <sstream>
 19 | #include <vector>
 20 | 
 21 | namespace AggMIS {
 22 |   bool CheckCudaError(cudaError_t code, const char *file, int line);
 23 |   namespace Types {
 24 |     typedef thrust::device_vector<int> IntVector_d;
 25 |     typedef thrust::device_vector<unsigned int> UIntVector_d;
 26 |     typedef thrust::host_vector<int> IntVector_h;
 27 |     typedef thrust::host_vector<IntVector_h*> IntVector_h_ptr;
 28 |     typedef thrust::host_vector<IntVector_d*> IntVector_d_ptr;
 29 | 
 30 |     struct DGraph {
 31 |       __host__ __device__ DGraph(int _size,
 32 |           int *_ind,
 33 |           int *_adj)
 34 |         :size(_size),
 35 |         ind(_ind),
 36 |         adj(_adj){}
 37 |       __device__ int getNeighbor(int node, int neighbor) {
 38 |         return adj[ind[node] + neighbor];
 39 |       }
 40 |       int size;
 41 |       int *ind;
 42 |       int *adj;
 43 |     };
 44 | 
 45 |     class JTimer {
 46 |       public:
 47 |         JTimer();
 48 |         ~JTimer();
 49 |         void start();
 50 |         void stop();
 51 |         double getElapsedTimeInSec(bool host);
 52 |         double getElapsedTimeInMilliSec(bool host);
 53 |       private:
 54 | 
 55 |         double startTimeHost, endTimeHost;
 56 |         cudaEvent_t startTimeCuda, endTimeCuda;
 57 |         bool started, stopped;
 58 |         float elapsedCudaTime;
 59 |     };
 60 | 
 61 |     // Forward declarations of classes so the conversion constructors
 62 |     // will compile.
 63 |     class Graph_d;
 64 |     class Graph_h;
 65 | 
 66 |     class Graph_d {
 67 |       public:
 68 |         Graph_d(IntVector_d &indices, IntVector_d &adjacency);
 69 |         Graph_d(IntVector_h &indices, IntVector_h &adjacency);
 70 |         Graph_d(IntVector_d *indices, IntVector_d *adjacency);
 71 |         Graph_d(Graph_h &graph);
 72 |         Graph_d();
 73 |         ~Graph_d();
 74 |         int Size();
 75 |         int* indStart();
 76 |         int* adjStart();
 77 |         DGraph GetD();
 78 |         IntVector_d *indices;
 79 |         IntVector_d *adjacency;
 80 |       private:
 81 |         bool willClean;
 82 |     };
 83 |     class Graph_h {
 84 |       public:
 85 |         Graph_h(IntVector_d &indices, IntVector_d &adjacency);
 86 |         Graph_h(IntVector_h &indices, IntVector_h &adjacency);
 87 |         Graph_h(IntVector_h *indices, IntVector_h *adjacency);
 88 |         Graph_h(Graph_d &graph);
 89 |         Graph_h();
 90 |         ~Graph_h();
 91 |         int Size();
 92 |         int* nStart(int node);
 93 |         int* nEnd(int node);
 94 |         IntVector_h *indices;
 95 |         IntVector_h *adjacency;
 96 |       private:
 97 |         bool willClean;
 98 |     };
 99 | 
100 |     int* StartOf(IntVector_d &target);
101 |     int* StartOf(IntVector_d *target);
102 | 
103 |     namespace Compare {
104 |       bool AreEqual(IntVector_h& a,
105 |           IntVector_h& b,
106 |           bool verbose);
107 |       bool AreEqual(IntVector_d& a,
108 |           IntVector_d& b,
109 |           bool verbose);
110 |       bool AreEqual(IntVector_h& a,
111 |           IntVector_d& b,
112 |           bool verbose);
113 |       bool AreEqual(IntVector_d& a,
114 |           IntVector_h& b,
115 |           bool verbose);
116 |       bool AreEqual(std::vector<std::vector<int> > &a,
117 |         std::vector<std::vector<int> > &b,
118 |           bool verbose);
119 |       bool AreEqual(Graph_h& a,
120 |           Graph_h& b,
121 |           bool verbose);
122 |       bool AreEqual(Graph_d& a,
123 |           Graph_d& b,
124 |           bool verbose);
125 |       bool AreEqual(Graph_h& a,
126 |           Graph_d& b,
127 |           bool verbose);
128 |       bool AreEqual(Graph_d& a,
129 |           Graph_h& b,
130 |           bool verbose);
131 |     }
132 |     namespace Display {
133 |       void Print(IntVector_h& toPrint,
134 |           int start,
135 |           int end,
136 |           std::string message);
137 |       void Print(IntVector_d& toPrint,
138 |           int start,
139 |           int end,
140 |           std::string message);
141 |       void Print(IntVector_d& toPrint,
142 |         std::string message);
143 |       void Print(IntVector_h& toPrint,
144 |         std::string message);
145 |       void Print(std::vector<std::vector<std::vector<int> > >& toPrint, std::string message);
146 |       void Print(std::vector<std::vector<int> >& toPrint,
147 |         std::string message);
148 |       void Print(std::vector<int> &toPrint,
149 |           int start,
150 |           int end,
151 |           std::string message);
152 |       void Print(std::vector<int> &toPrint,
153 |         std::string message);
154 |     }
155 |   }
156 | }
157 | #endif  /* AGGMIS_TYPES_H */
158 | 


--------------------------------------------------------------------------------
/src/core/cuda/amg_level.cu:
--------------------------------------------------------------------------------
  1 | #include <amg_level.h>
  2 | #include <types.h>
  3 | #include <cutil.h>
  4 | 
  5 | template <class Matrix, class Vector>
  6 | AMG_Level<Matrix, Vector>::~AMG_Level()
  7 | {
  8 |   if (smoother != 0) delete smoother;
  9 |   if (next != 0) delete next;
 10 | }
 11 | 
 12 | #include<smoothedMG/smoothedMG_amg_level.h>
 13 | template <class Matrix, class Vector>
 14 | AMG_Level<Matrix, Vector>* AMG_Level<Matrix, Vector>::allocate(AMG<Matrix, Vector>*amg)
 15 | {
 16 |   return new SmoothedMG_AMG_Level<Matrix, Vector >(amg);
 17 | }
 18 | 
 19 | /******************************************************
 20 |  * Recusively solves the system on this level
 21 |  ******************************************************/
 22 | template <class Matrix, class Vector>
 23 | void AMG_Level<Matrix, Vector>::cycle(CycleType cycle, Vector_d& b_d, Vector_d& x_d, bool verbose)
 24 | {
 25 |   if (isCoarsest()) //solve directly
 26 |   {
 27 |     cusp::array1d<ValueType, cusp::host_memory> temp_b(b_d);
 28 |     cusp::array1d<ValueType, cusp::host_memory> temp_x(x_d.size());
 29 |     amg->LU(temp_b, temp_x);
 30 |     x_d = temp_x;
 31 |     return;
 32 |   } else
 33 |   {
 34 |     switch (amg->dsType_)
 35 |     {
 36 |     case 0:
 37 |       smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d,
 38 |         level_id, largestblocksize, largest_num_entries, verbose);
 39 |       break;
 40 |     case 1:
 41 |       smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize);
 42 |       break;
 43 |     case 2:
 44 |       smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row);
 45 |       break;
 46 |     default:
 47 |       std::cout << "Wrong DStype 1!" << std::endl;
 48 |       exit(0);
 49 | 
 50 |     }
 51 |     next->cycle(V_CYCLE, bc_d, xc_d, verbose);
 52 |     switch (amg->dsType_)
 53 |     {
 54 |     case 0:
 55 |       smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d,
 56 |         level_id, largestblocksize, largest_num_entries);
 57 |       break;
 58 |     case 1:
 59 |       smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize);
 60 |       break;
 61 |     case 2:
 62 |       smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row);
 63 |       break;
 64 |     default:
 65 |       std::cout << "Wrong DStype 0!" << std::endl;
 66 |       exit(0);
 67 | 
 68 |     }
 69 | 
 70 |   }
 71 | }
 72 | 
 73 | 
 74 | template <class Matrix, class Vector>
 75 | void AMG_Level<Matrix, Vector>::cycle_level0(CycleType cycle, Vector_d_CG &b_d_CG, Vector_d_CG &x_d_CG, bool verbose)
 76 | {
 77 |   if (isCoarsest()) //solve directly
 78 |   {
 79 |     cusp::array1d<ValueType, cusp::host_memory> temp_b = b_d_CG;
 80 |     cusp::array1d<ValueType, cusp::host_memory> temp_x(x_d_CG.size());
 81 |     amg->LU(temp_b, temp_x);
 82 |     x_d_CG = temp_x;
 83 | 
 84 |     return;
 85 |   } else
 86 |   {
 87 |     Vector_d b_d = b_d_CG;
 88 |     Vector_d x_d(x_d_CG.size(), 0.0);
 89 |     switch (amg->dsType_)
 90 |     {
 91 |     case 0:
 92 |       smoother->preRRRFullSymmetric(AinSysCoo_d, AoutSys_d, AinBlockIdx_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d,
 93 |         level_id, largestblocksize, largest_num_entries, verbose);
 94 |       break;
 95 |     case 1:
 96 |       smoother->preRRRFull(AinEll_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize);
 97 |       break;
 98 |     case 2:
 99 |       smoother->preRRRFullCsr(AinCSR_d, Aout_d, aggregateIdx_d, partitionIdx_d, restrictorFull_d, ipermutation_d, b_d, x_d, bc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row);
100 |       break;
101 |     default:
102 |       std::cout << "Wrong DStype 1!" << std::endl;
103 |       exit(0);
104 | 
105 |     }
106 |     next->cycle(V_CYCLE, bc_d, xc_d, verbose);
107 |     switch (amg->dsType_)
108 |     {
109 |     case 0:
110 |       smoother->postPCRFullSymmetric(AinSysCoo_d, AinBlockIdx_d, AoutSys_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d,
111 |         level_id, largestblocksize, largest_num_entries);
112 |       break;
113 |     case 1:
114 |       smoother->postPCRFull(AinEll_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize);
115 |       break;
116 |     case 2:
117 |       smoother->postPCRFullCsr(AinCSR_d, Aout_d, AoutBlockIdx_d, aggregateIdx_d, partitionIdx_d, prolongatorFull_d, ipermutation_d, b_d, x_d, xc_d, level_id, largestblocksize, largest_num_entries, largest_num_per_row);
118 |       break;
119 |     default:
120 |       std::cout << "Wrong DStype 0!" << std::endl;
121 |       exit(0);
122 | 
123 |     }
124 | 
125 |     x_d_CG = x_d;
126 |     b_d_CG = b_d;
127 |   }
128 | }
129 | 
130 | #include<smoothers/smoother.h>
131 | 
132 | template <class Matrix, class Vector>
133 | void AMG_Level<Matrix, Vector>::setup()
134 | {
135 |   smoother = Smoother<Matrix_d, Vector_d>::allocate(amg->smootherWeight_,
136 |     amg->preInnerIters_, amg->postInnerIters_, amg->postRelaxes_, A_d);
137 | }
138 | 
139 | template <class Matrix, class Vector>
140 | std::vector<int> AMG_Level<Matrix, Vector>::getOriginalRows()
141 | {
142 |   return originalRow;
143 | }
144 | 
145 | /****************************************
146 |  * Explict instantiations
147 |  ***************************************/
148 | template class AMG_Level < Matrix_h, Vector_h > ;
149 | template class AMG_Level < Matrix_d, Vector_d > ;
150 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_GraphHelpers.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * File:   AggMIS_GraphHelpers.h
  3 |  * Author: T. James Lewis
  4 |  *
  5 |  * Created on April 16, 2013, 2:58 PM
  6 |  */
  7 | #ifndef AGGMIS_GRAPHHELPERS_H
  8 | #define	AGGMIS_GRAPHHELPERS_H
  9 | #include <AggMIS_Types.h>
 10 | #include <thrust/scan.h>
 11 | #include <thrust/adjacent_difference.h>
 12 | namespace AggMIS {
 13 |     namespace GraphHelpers {
 14 |         
 15 |         // GPU Kernels
 16 |         namespace Kernels {
 17 |             __global__ void mapAdjacencyToBlockKernel(int size, 
 18 |                                             int *adjIndexes, 
 19 |                                             int *adjacency, 
 20 |                                             int *adjacencyBlockLabel, 
 21 |                                             int *blockMappedAdjacency, 
 22 |                                             int *fineAggregate);
 23 |             __global__ void findPartIndicesNegStartKernel(int size, 
 24 |                                             int *array, 
 25 |                                             int *partIndices);
 26 |             __global__ void findPartIndicesKernel(int size, 
 27 |                                             int *array, 
 28 |                                             int *partIndices);
 29 |             __global__ void findAdjacencySizesKernel(int size, 
 30 |                                             int *adjIndexes, 
 31 |                                             int *output);
 32 |             __global__ void accumulatedPartSizesKernel(int size, 
 33 |                                             int *part, 
 34 |                                             int *weights, 
 35 |                                             int *accumulatedSize);
 36 |             __global__ void unaccumulatedPartSizesKernel(int size, 
 37 |                                             int *accumulatedSize, 
 38 |                                             int *sizes);            
 39 |         }
 40 |         Types::Graph_d* GetInducedGraph(Types::Graph_d &graph, 
 41 |                             Types::IntVector_d &aggregation);
 42 |         Types::Graph_h* GetInducedGraph(Types::Graph_h &graph,
 43 |                             Types::IntVector_h &aggregation);
 44 |         void mapAdjacencyToBlock(Types::IntVector_d &adjIndexes, 
 45 |                             Types::IntVector_d &adjacency, 
 46 |                             Types::IntVector_d &adjacencyBlockLabel, 
 47 |                             Types::IntVector_d &blockMappedAdjacency, 
 48 |                             Types::IntVector_d &fineAggregate);
 49 |         void getPartIndicesNegStart(Types::IntVector_d& sortedPartition, 
 50 |                             Types::IntVector_d& partIndices);
 51 |         /**
 52 |          * Gets a vector where the values are the indices of the elements
 53 |          * @param size Size of vector to create
 54 |          * @return A pointer to newly created vector
 55 |          */
 56 |         Types::IntVector_d* GetIndicesVector(int size);
 57 |         /**
 58 |          * Writes the index of each vector element as its value
 59 |          * @param size The size the vector should be
 60 |          * @param toSet The vector to set (Overwritten)
 61 |          */
 62 |         void SetToIndicesVector(int size, 
 63 |                             Types::IntVector_d& toSet);
 64 |         /**
 65 |          * Gets the size (count of nodes) of each aggregate. 
 66 |          * @param aggregation Labels each node with its aggregate ID
 67 |          * @param sizes Vector to output computed sized (Overwritten)
 68 |          */
 69 |         void getPartSizes(Types::IntVector_d &aggregation, 
 70 |                             Types::IntVector_d &sizes);
 71 |         /**
 72 |          * Gets the size of each aggregate, taking into account the weight of 
 73 |          * each node.
 74 |          * @param aggregation Labels each node with its aggregate ID
 75 |          * @param sizes Vector to put the computed sizes into (Overwritten)
 76 |          * @param weights The weights of each graph node
 77 |          */
 78 |         void getPartSizes(Types::IntVector_d &aggregation, 
 79 |                             Types::IntVector_d &sizes, 
 80 |                             Types::IntVector_d &weights);
 81 |         /**
 82 |          * Finds the valence of each node in the given graph.
 83 |          * @param graph Input graph
 84 |          * @return A vector containing the valence of each node
 85 |          */
 86 |         Types::IntVector_d* GetValences(Types::Graph_d &graph);
 87 |         Types::IntVector_h* GetValences(Types::Graph_h &graph);
 88 |         /**
 89 |          * Checks if a graph is a valid undirected graph. Valid being that each 
 90 |          * node listing a node as neighbor is a neighbor of the listed node, and
 91 |          * that all listed neighbors are valid graph nodes.
 92 |          * @param graph The graph to check
 93 |          * @return True if graph is valid, false otherwise
 94 |          */
 95 |         bool IsGraphValid(Types::Graph_d &graph);
 96 |         /**
 97 |          * Checks if a graph is a valid undirected graph. Valid being that each 
 98 |          * node listing a node as neighbor is a neighbor of the listed node, and
 99 |          * that all listed neighbors are valid graph nodes.
100 |          * @param graph The graph to check
101 |          * @return True if graph is valid, false otherwise
102 |          */
103 |         bool IsGraphValid(Types::Graph_h &graph);
104 |         /**
105 |          * Checks if a graph is a valid undirected graph. Valid being that each 
106 |          * node listing a node as neighbor is a neighbor of the listed node, and
107 |          * that all listed neighbors are valid graph nodes.
108 |          * @param indices The vector of indices into adjacency list
109 |          * @param adjacency The adjacency list
110 |          * @return True if graph is valid, false otherwise
111 |          */
112 |         bool IsGraphValid(Types::IntVector_d &indices, Types::IntVector_d &adjacency);
113 |         /**
114 |          * Checks if a graph is a valid undirected graph. Valid being that each 
115 |          * node listing a node as neighbor is a neighbor of the listed node, and
116 |          * that all listed neighbors are valid graph nodes.
117 |          * @param indices The vector of indices into adjacency list
118 |          * @param adjacency The adjacency list
119 |          * @return True if graph is valid, false otherwise
120 |          */
121 |         bool IsGraphValid(Types::IntVector_h &indices, Types::IntVector_h &adjacency);
122 |     }
123 | }
124 | #endif	/* AGGMIS_GRAPHHELPERS_H */
125 | 
126 | 


--------------------------------------------------------------------------------
/src/CodeCoverage.cmake:
--------------------------------------------------------------------------------
  1 | #
  2 | # 2012-01-31, Lars Bilke
  3 | # - Enable Code Coverage
  4 | #
  5 | # 2013-09-17, Joakim Söderberg
  6 | # - Added support for Clang.
  7 | # - Some additional usage instructions.
  8 | #
  9 | # USAGE:
 10 | 
 11 | # 0. (Mac only) If you use Xcode 5.1 make sure to patch geninfo as described here:
 12 | #      http://stackoverflow.com/a/22404544/80480
 13 | #
 14 | # 1. Copy this file into your cmake modules path.
 15 | #
 16 | # 2. Add the following line to your CMakeLists.txt:
 17 | #      INCLUDE(CodeCoverage)
 18 | #
 19 | # 3. Set compiler flags to turn off optimization and enable coverage:
 20 | #    SET(CMAKE_CXX_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage")
 21 | #  SET(CMAKE_C_FLAGS "-g -O0 -fprofile-arcs -ftest-coverage")
 22 | #
 23 | # 3. Use the function SETUP_TARGET_FOR_COVERAGE to create a custom make target
 24 | #    which runs your test executable and produces a lcov code coverage report:
 25 | #    Example:
 26 | #  SETUP_TARGET_FOR_COVERAGE(
 27 | #       my_coverage_target  # Name for custom target.
 28 | #       test_driver         # Name of the test driver executable that runs the tests.
 29 | #                 # NOTE! This should always have a ZERO as exit code
 30 | #                 # otherwise the coverage generation will not complete.
 31 | #       coverage            # Name of output directory.
 32 | #       )
 33 | #
 34 | # 4. Build a Debug build:
 35 | #  cmake -DCMAKE_BUILD_TYPE=Debug ..
 36 | #  make
 37 | #  make my_coverage_target
 38 | #
 39 | #
 40 | 
 41 | # Check prereqs
 42 | FIND_PROGRAM( GCOV_PATH gcov )
 43 | FIND_PROGRAM( LCOV_PATH lcov )
 44 | FIND_PROGRAM( GENHTML_PATH genhtml )
 45 | FIND_PROGRAM( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/tests)
 46 | 
 47 | IF(NOT GCOV_PATH)
 48 |   MESSAGE(FATAL_ERROR "gcov not found! Aborting...")
 49 | ENDIF() # NOT GCOV_PATH
 50 | 
 51 | IF(NOT CMAKE_COMPILER_IS_GNUCXX)
 52 |   # Clang version 3.0.0 and greater now supports gcov as well.
 53 |   MESSAGE(WARNING "Compiler is not GNU gcc! Clang Version 3.0.0 and greater supports gcov as well, but older versions don't.")
 54 | 
 55 |   IF(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
 56 |     MESSAGE(FATAL_ERROR "Compiler is not GNU gcc! Aborting...")
 57 |   ENDIF()
 58 | ENDIF() # NOT CMAKE_COMPILER_IS_GNUCXX
 59 | 
 60 | SET(CMAKE_CXX_FLAGS_COVERAGE
 61 |   "-g -O0 --coverage -fprofile-arcs -ftest-coverage"
 62 |   CACHE STRING "Flags used by the C++ compiler during coverage builds."
 63 |   FORCE )
 64 | SET(CMAKE_C_FLAGS_COVERAGE
 65 |   "-g -O0 --coverage -fprofile-arcs -ftest-coverage"
 66 |   CACHE STRING "Flags used by the C compiler during coverage builds."
 67 |   FORCE )
 68 | SET(CMAKE_EXE_LINKER_FLAGS_COVERAGE
 69 |   ""
 70 |   CACHE STRING "Flags used for linking binaries during coverage builds."
 71 |   FORCE )
 72 | SET(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
 73 |   ""
 74 |   CACHE STRING "Flags used by the shared libraries linker during coverage builds."
 75 |   FORCE )
 76 | MARK_AS_ADVANCED(
 77 |   CMAKE_CXX_FLAGS_COVERAGE
 78 |   CMAKE_C_FLAGS_COVERAGE
 79 |   CMAKE_EXE_LINKER_FLAGS_COVERAGE
 80 |   CMAKE_SHARED_LINKER_FLAGS_COVERAGE )
 81 | 
 82 | IF ( NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "Coverage"))
 83 |   MESSAGE( WARNING "Code coverage results with an optimized (non-Debug -- currently '${CMAKE_BUILD_TYPE}') build may be misleading" )
 84 | ENDIF() # NOT CMAKE_BUILD_TYPE STREQUAL "Debug"
 85 | 
 86 | 
 87 | # Param _targetname     The name of new the custom make target
 88 | # Param _testrunner     The name of the target which runs the tests.
 89 | #           MUST return ZERO always, even on errors.
 90 | #           If not, no coverage report will be created!
 91 | # Param _outputname     lcov output is generated as _outputname.info
 92 | #                       HTML report is generated in _outputname/index.html
 93 | # Optional fourth parameter is passed as arguments to _testrunner
 94 | #   Pass them in list form, e.g.: "-j;2" for -j 2
 95 | FUNCTION(SETUP_TARGET_FOR_COVERAGE _targetname _testrunner _outputname)
 96 | 
 97 |   IF(NOT LCOV_PATH)
 98 |     MESSAGE(FATAL_ERROR "lcov not found! Aborting...")
 99 |   ENDIF() # NOT LCOV_PATH
100 | 
101 |   IF(NOT GENHTML_PATH)
102 |     MESSAGE(FATAL_ERROR "genhtml not found! Aborting...")
103 |   ENDIF() # NOT GENHTML_PATH
104 | 
105 |   # Setup target
106 |   ADD_CUSTOM_TARGET(${_targetname}
107 | 
108 |     # Cleanup lcov
109 |     ${LCOV_PATH} --directory . --zerocounters
110 | 
111 |     # Run tests
112 |     COMMAND ${_testrunner} ${ARGV3}
113 | 
114 |     # Capturing lcov counters and generating report
115 |     COMMAND ${LCOV_PATH} --directory . --capture --output-file ${_outputname}.info
116 |     COMMAND ${LCOV_PATH} --remove ${_outputname}.info 'tests/*' '/usr/*' --output-file ${_outputname}.info.cleaned
117 |     COMMAND ${GENHTML_PATH} -o ${_outputname} ${_outputname}.info.cleaned
118 |     COMMAND ${CMAKE_COMMAND} -E remove ${_outputname}.info ${_outputname}.info.cleaned
119 | 
120 |     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
121 |     COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report."
122 |     )
123 | 
124 |   # Show info where to find the report
125 |   ADD_CUSTOM_COMMAND(TARGET ${_targetname} POST_BUILD
126 |     COMMAND ;
127 |     COMMENT "Open ./${_outputname}/index.html in your browser to view the coverage report."
128 |     )
129 | 
130 | ENDFUNCTION() # SETUP_TARGET_FOR_COVERAGE
131 | 
132 | # Param _targetname     The name of new the custom make target
133 | # Param _testrunner     The name of the target which runs the tests
134 | # Param _outputname     cobertura output is generated as _outputname.xml
135 | # Optional fourth parameter is passed as arguments to _testrunner
136 | #   Pass them in list form, e.g.: "-j;2" for -j 2
137 | FUNCTION(SETUP_TARGET_FOR_COVERAGE_COBERTURA _targetname _testrunner _outputname)
138 | 
139 |   IF(NOT PYTHON_EXECUTABLE)
140 |     MESSAGE(FATAL_ERROR "Python not found! Aborting...")
141 |   ENDIF() # NOT PYTHON_EXECUTABLE
142 | 
143 |   IF(NOT GCOVR_PATH)
144 |     MESSAGE(FATAL_ERROR "gcovr not found! Aborting...")
145 |   ENDIF() # NOT GCOVR_PATH
146 | 
147 |   ADD_CUSTOM_TARGET(${_targetname}
148 | 
149 |     # Run tests
150 |     ${_testrunner} ${ARGV3}
151 | 
152 |     # Running gcovr
153 |     COMMAND ${GCOVR_PATH} -x -r ${CMAKE_SOURCE_DIR} -e '${CMAKE_SOURCE_DIR}/tests/'  -o ${_outputname}.xml
154 |     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
155 |     COMMENT "Running gcovr to produce Cobertura code coverage report."
156 |     )
157 | 
158 |   # Show info where to find the report
159 |   ADD_CUSTOM_COMMAND(TARGET ${_targetname} POST_BUILD
160 |     COMMAND ;
161 |     COMMENT "Cobertura code coverage report saved in ${_outputname}.xml."
162 |     )
163 | 
164 | ENDFUNCTION() # SETUP_TARGET_FOR_COVERAGE_COBERTURA
165 | 


--------------------------------------------------------------------------------
/src/core/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | #CMAKE SETTING CUDA_NVCC_FLAGS
  2 | #############################
  3 | # Check for GPUs present and their compute capability
  4 | # based on http://stackoverflow.com/questions/2285185/easiest-way-to-test-for-existence-of-cuda-capable-gpu-from-cmake/2297877#2297877 (Christopher Bruns)
  5 | if(CUDA_FOUND AND NOT CUDA_HAVE_GPU)
  6 |     message(STATUS "Checking CUDA compute capability.")
  7 |     try_run(RUN_RESULT_VAR COMPILE_RESULT_VAR
  8 |         ${CMAKE_BINARY_DIR} 
  9 |         ${CMAKE_SOURCE_DIR}/cuda_compute_capability.c
 10 |         CMAKE_FLAGS 
 11 |         -DINCLUDE_DIRECTORIES:STRING=${CUDA_TOOLKIT_INCLUDE}
 12 |         -DLINK_LIBRARIES:STRING=${CUDA_CUDART_LIBRARY}
 13 |         COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR
 14 |         RUN_OUTPUT_VARIABLE RUN_OUTPUT_VAR)
 15 |     # COMPILE_RESULT_VAR is TRUE when compile succeeds
 16 |     # RUN_RESULT_VAR is zero when a GPU is found
 17 |     if(COMPILE_RESULT_VAR AND NOT RUN_RESULT_VAR)
 18 |         set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 19 |         set(CUDA_COMPUTE_CAPABILITY ${RUN_OUTPUT_VAR} CACHE STRING "Compute capability of CUDA-capable GPU present")
 20 |         set(CUDA_GENERATE_CODE "arch=compute_${CUDA_COMPUTE_CAPABILITY},code=sm_${CUDA_COMPUTE_CAPABILITY}" CACHE STRING "Which GPU architectures to generate code for (each arch/code pair will be passed as --generate-code option to nvcc, separate multiple pairs by ;)")
 21 |         mark_as_advanced(CUDA_COMPUTE_CAPABILITY CUDA_GENERATE_CODE)
 22 |         LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21" "-arch=sm_30" "-arch=sm_31")
 23 |         set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-arch=sm_${CUDA_COMPUTE_CAPABILITY}"  CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 24 | 	message(STATUS "Automatic Compute Capability Set: ${CUDA_COMPUTE_CAPABILITY}")
 25 |     else()
 26 |         message(STATUS "You must set your compute capability manually")
 27 |         set(CUDA_HAVE_GPU FALSE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 28 |     endif()
 29 | endif()
 30 | 
 31 | IF (NOT CUDA_HAVE_GPU)
 32 |     SET(CUDA_USE_COMPUTE_CAPABILITY_10 false CACHE BOOL "Use Compute Capability 1.0")
 33 |     SET(CUDA_USE_COMPUTE_CAPABILITY_11 false CACHE BOOL "Use Compute Capability 1.1")
 34 |     SET(CUDA_USE_COMPUTE_CAPABILITY_12 false CACHE BOOL "Use Compute Capability 1.2")  
 35 |     SET(CUDA_USE_COMPUTE_CAPABILITY_13 false CACHE BOOL "Use Compute Capability 1.3")
 36 |     SET(CUDA_USE_COMPUTE_CAPABILITY_20 true CACHE BOOL "Use Compute Capability 2.0")
 37 |     SET(CUDA_USE_COMPUTE_CAPABILITY_21 false CACHE BOOL "Use Compute Capability 2.1")
 38 |     
 39 |     IF(CUDA_USE_COMPUTE_CAPABILITY_10)
 40 |       LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21")
 41 |       LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_10")
 42 |       SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 43 |       set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 44 |     ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_11)
 45 |       LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21")
 46 |       LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_11")
 47 |       SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 48 |       set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 49 |     ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_12)
 50 |       LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21")
 51 |       LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_12")
 52 |       SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 53 |       set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 54 |     ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_13)
 55 |       LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21")
 56 |       LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_13")
 57 |       SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 58 |       set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 59 |     ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_20)
 60 |       LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21")
 61 |       LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_20")
 62 |       SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 63 |       set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 64 |     ELSEIF(CUDA_USE_COMPUTE_CAPABILITY_21)
 65 |       LIST(REMOVE_ITEM CUDA_NVCC_FLAGS "-arch=sm_10" "-arch=sm_11" "-arch=sm_12" "-arch=sm_13" "-arch=sm_20" "-arch=sm_21")
 66 |       LIST(APPEND CUDA_NVCC_FLAGS "-arch=sm_21")
 67 |       SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} CACHE LIST "Semi-colon delimit multiple arguments." FORCE)
 68 |       set(CUDA_HAVE_GPU TRUE CACHE INTERNAL BOOL "Whether CUDA-capable GPU is present")
 69 |     ENDIF()
 70 | ENDIF ()
 71 | 
 72 | IF(NOT CUDA_HAVE_GPU)
 73 | 	message( FATAL_ERROR "You must set your compute capability" )
 74 | ENDIF()
 75 | #END CMAKE SETTING CUDA_NVCC_FLAGS
 76 | ###################
 77 | 
 78 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
 79 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/cycles)
 80 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/smoothers)
 81 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/aggmis/include)
 82 | include_directories(${METIS_PATH}/Lib)
 83 | include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 84 | 
 85 | FILE(GLOB CORE_CUDA_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cuda/*.cu
 86 | 							${CMAKE_CURRENT_SOURCE_DIR}/aggmis/cuda/*.cu)
 87 | 							
 88 | FILE(GLOB CORE_CUDA_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h
 89 | 							${CMAKE_CURRENT_SOURCE_DIR}/include/smoothers/*.h
 90 | 							${CMAKE_CURRENT_SOURCE_DIR}/include/smoothedMG/*.h
 91 | 							${CMAKE_CURRENT_SOURCE_DIR}/include/smoothedMG/aggregators/*.h
 92 | 							${CMAKE_CURRENT_SOURCE_DIR}/include/FEM/*.h
 93 | 							${CMAKE_CURRENT_SOURCE_DIR}/include/cycles/*.h
 94 | 							${CMAKE_CURRENT_SOURCE_DIR}/aggmis/include/*.h
 95 | 							${CMAKE_CURRENT_SOURCE_DIR}/cuda/perform_element_loop_3D.cuh
 96 | 							${CMAKE_CURRENT_SOURCE_DIR}/cuda/perform_element_loop_2D.cuh)
 97 | 
 98 | CUDA_ADD_LIBRARY(FEM_CORE STATIC ${CORE_CUDA_SOURCES} ${CORE_CUDA_HEADERS})
 99 | ADD_DEPENDENCIES(FEM_CORE metis)
100 | ADD_DEPENDENCIES(FEM_CORE cusp)
101 | if (CMAKE_SYSTEM_NAME MATCHES "Linux")
102 |   TARGET_LINK_LIBRARIES(FEM_CORE -ldl ${METIS_LIBRARY})
103 | else()
104 |   TARGET_LINK_LIBRARIES(FEM_CORE ${METIS_LIBRARY})
105 | endif()
106 | 


--------------------------------------------------------------------------------
/src/core/aggmis/include/AggMIS_MergeSplitConditioner_CPU.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * File:   AggMIS_MergeSplitConditioner_CPU.h
  3 |  * Author: T. James Lewis
  4 |  *
  5 |  * Created on July 4, 2013, 1:14 PM
  6 |  */
  7 | 
  8 | #ifndef AGGMIS_MERGESPLITCONDITIONER_CPU_H
  9 | #define	AGGMIS_MERGESPLITCONDITIONER_CPU_H
 10 | 
 11 | #include "AggMIS_Types.h"
 12 | #include "AggMIS_Aggregation_CPU.h"
 13 | #include "AggMIS_GraphHelpers.h"
 14 | #include "thrust/count.h"
 15 | #include "AggMIS_IOHelpers.h"
 16 | 
 17 | namespace AggMIS {
 18 |     namespace MergeSplitCPU {
 19 |         
 20 |         // Functors for Thrust calls
 21 |         namespace Functors {
 22 |             struct isOutSized:public thrust::unary_function<int,int> {
 23 |                 const int a, b;
 24 |                 isOutSized(int _a, int _b):a(_a),b(_b){}
 25 | 
 26 |                 __host__ __device__ 
 27 |                 bool operator()(const int &x) const
 28 |                 {
 29 |                     return x < a || x > b;
 30 |                 }
 31 |             };
 32 |             struct lessThan:public thrust::unary_function<int,int> {
 33 |                 const int a;
 34 | 
 35 |                 lessThan(int _a): a(_a){}
 36 | 
 37 |                 __host__ __device__
 38 |                 int operator()(const int &x) const
 39 |                 {
 40 |                     return x < a;
 41 |                 }
 42 |             };
 43 |             struct greaterThan:public thrust::unary_function<int,int> {
 44 |                 const int a;
 45 | 
 46 |                 greaterThan(int _a): a(_a){}
 47 | 
 48 |                 __host__ __device__
 49 |                 int operator()(const int &x) const
 50 |                 {
 51 |                     return x > a;
 52 |                 }
 53 |             };
 54 |             struct NotNegOne:public thrust::unary_function<int,int> {
 55 |                 __host__ __device__
 56 |                 int operator()(int a) const
 57 |                 {
 58 |                     // If value is negative return 0 else return one
 59 |                     return a < 0 ? 0 : 1;
 60 |                 }
 61 |             };
 62 |             struct EqualTo:public thrust::unary_function<int,int> {
 63 |                 const int a;
 64 | 
 65 |                 EqualTo(int _a): a(_a){}
 66 | 
 67 |                 __host__ __device__
 68 |                 int operator()(const int &x) const
 69 |                 {
 70 |                     return x == a;
 71 |                 }
 72 |             };
 73 |         }
 74 |         
 75 |         // Merge and Split conditioner main container
 76 |         class MergeSplitConditionerCPU {
 77 |         public:
 78 |             /**
 79 |             * The primary constructor it creates a conditioner given the 
 80 |             * specified graph and aggregation. It makes an internal copy of
 81 |             * the aggregation vector, but it uses a pointer back to the given 
 82 |             * Graph object
 83 |             * @param graph The graph that the aggregation being conditioned
 84 |             * is an aggregation of.
 85 |             * @param aggregation A vector which labels every node in the graph
 86 |             * with an aggregate ID.
 87 |             */            
 88 |             MergeSplitConditionerCPU(Types::Graph_h &graph, 
 89 |                     Types::IntVector_h &aggregation);
 90 |             void SetSizeBounds(int min, int max);
 91 |             void SetVerbose(bool v);
 92 |             /**
 93 |              * Sets the node weights by swapping the contents of the provided 
 94 |              * vector into the nodeWeights member. Then it re-computes the part
 95 |              * sizes with weighting.
 96 |              * @param nodeWeights A vector containing the weights of each node
 97 |              * the contents of this vector are swapped out by the method.
 98 |              */
 99 |             void SetNodeWeights(Types::IntVector_h &nodeWeights);
100 |             /**
101 |              * Getter for the aggregation vector.
102 |              * @return A pointer to the current aggregation vector
103 |              */
104 |             Types::IntVector_h* GetAggregation();
105 |             /**
106 |              * Getter for the NodeWeights vector
107 |              * @return A pointer to the current NodeWeights vector
108 |              */
109 |             Types::IntVector_h* GetNodeWeights();
110 |             void CycleMerges(bool force);
111 |             void CycleSplits(bool force);
112 |             void CycleMergeSplits(float minImprove, 
113 |                     int desiredSize);
114 |             bool Condition(int desiredSize,
115 |                     bool respectUpper, 
116 |                     float tolerance, 
117 |                     float minImprove, 
118 |                     int maxCycles);
119 |             void PrintProgress(std::ostream* output, 
120 |                     std::string note,
121 |                     bool graphStat,
122 |                     bool progressStat,
123 |                     bool sizeStat);
124 |             void PrintSizeStats(std::ostream* output,
125 |                     bool makeHeader);
126 |             void PrintProgressStats(std::ostream* output,
127 |                     bool makeHeader);
128 |             void PrintGraphStats(std::ostream* output,
129 |                     bool makeHeader);
130 |             void InteractiveConsole(std::string message);
131 |         private:
132 |             bool MarkMerges(bool force);
133 |             bool MarkSplits(bool force);
134 |             void MarkMergeSplits(int desiredSize);
135 |             void MakeSplits();
136 |             void MakeMerges(bool markSplits);
137 |             void MakeMergesDirect(bool force);
138 |             int MergeAggregates(int aggA, int aggB);
139 |             int MergeAggregates(int aggA, int aggB, bool fillSpot);
140 |             void MakeSplitsDirect(bool force);
141 |             void SplitAggregate(int agg, int newAgg);
142 |             void MakeMergeSplits(int desiredSize);
143 |             void UnlinkAggregate(int aggId);
144 |             void FixSizesFromAggMap(int aggId);
145 |             void LinkAggregate(int aggId);
146 |             void FillAggAdjacency();
147 |             void FillAggMap();
148 |             void ValidateAggAdjacency();
149 |             void ValidateAggMap();
150 |             void ValidatePartSizes();
151 |             void ValidateArraySizes(std::string message);
152 |             
153 |             // Data members
154 |             Types::Graph_h *graph;
155 |             Types::Graph_h *inducedGraph;
156 |             
157 |             int minSize, 
158 |                 maxSize, 
159 |                 outsizedParts,
160 |                 merges,
161 |                 mergeSplits,
162 |                 splits;
163 |             
164 |             Types::IntVector_h aggregation,
165 |                 nodeWeights,
166 |                 distances,
167 |                 partSizes,
168 |                 weightedSizes,
169 |                 splitsToMake, 
170 |                 mergesToMake,
171 |                 mergeOffsets;
172 |             
173 |             // Stores lists of nodes in each aggregate
174 |             std::vector<std::vector<int> > aggMap;
175 |             
176 |             // Stores the neighbors of each aggregate
177 |             std::vector<std::vector<int> > aggAdjacency;
178 |             
179 |             // Stores the root point sets for each aggregate
180 |             std::vector<std::vector<int> > rootPoints;
181 |             
182 |             bool verbose;
183 |         };
184 |     }
185 | }
186 | #endif	/* AGGMIS_MERGESPLITCONDITIONER_CPU_H */
187 | 
188 | 


--------------------------------------------------------------------------------
/src/core/include/smoothers/gauss_seidel.h:
--------------------------------------------------------------------------------
  1 | #ifndef __GAUSSSEIDEL_H__
  2 | #define __GAUSSSEIDEL_H__
  3 | 
  4 | #include <string>
  5 | #include <smoothers/smoother.h>
  6 | #include <cusp/multiply.h>
  7 | #include <my_timer.h>
  8 | 
  9 | /*****************************************************
 10 |  * Jacobi smoother
 11 |  ****************************************************/
 12 | template<class Matrix, class Vector>
 13 | class gauss_seidel : public Smoother < Matrix, Vector >
 14 | {
 15 | public:
 16 |   typedef typename Matrix::value_type ValueType;
 17 |   typedef typename Matrix::index_type IndexType;
 18 |   typedef typename Matrix::memory_space MemorySpace;
 19 | 
 20 |   gauss_seidel(double smootherWeight,
 21 |     int preInnerIters, int postInnerIters, int postRelaxes, 
 22 |     const Matrix_d& Ainit);
 23 |   void find_diag(const Matrix_ell_d& A);
 24 |   void smooth(const Matrix& A, const Vector& b, Vector& x);
 25 |   void smooth_with_0_initial(const Matrix& A, const Vector &b, Vector &x);
 26 | 
 27 |   void preRRRFull(const cusp::ell_matrix<IndexType, ValueType, MemorySpace>& AinEll,
 28 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 29 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 30 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 31 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 32 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
 33 |     cusp::array1d<ValueType, MemorySpace>& b,
 34 |     cusp::array1d<ValueType, MemorySpace>& x,
 35 |     cusp::array1d<ValueType, MemorySpace>& bc,
 36 |     int level_id,
 37 |     int largestblksz);
 38 | 
 39 |   void preRRRFullCsr(const cusp::csr_matrix<IndexType, ValueType, MemorySpace>& AinCsr,
 40 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 41 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 42 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 43 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 44 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
 45 |     cusp::array1d<ValueType, MemorySpace>& b,
 46 |     cusp::array1d<ValueType, MemorySpace>& x,
 47 |     cusp::array1d<ValueType, MemorySpace>& bc,
 48 |     int level_id,
 49 |     int largestblksize,
 50 |     int largestnumentries,
 51 |     int largestnumperrow);
 52 | 
 53 |   void preRRRFullSymmetric(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
 54 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
 55 |     const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
 56 |     const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
 57 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 58 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 59 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 60 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
 61 |     cusp::array1d<ValueType, MemorySpace>& b,
 62 |     cusp::array1d<ValueType, MemorySpace>& x,
 63 |     cusp::array1d<ValueType, MemorySpace>& bc,
 64 |     int level_id,
 65 |     int largestblksz,
 66 |     int largestnumentries,
 67 |     bool verbose = false);
 68 |   void preRRRFullSymmetricSync(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
 69 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
 70 |     const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
 71 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 72 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 73 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 74 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
 75 |     cusp::array1d<ValueType, MemorySpace>& b,
 76 |     cusp::array1d<ValueType, MemorySpace>& x,
 77 |     cusp::array1d<ValueType, MemorySpace>& bc,
 78 |     const cusp::array1d<IndexType, MemorySpace>& segSyncIdx,
 79 |     const cusp::array1d<IndexType, MemorySpace>& partSyncIdx,
 80 |     int level_id,
 81 |     int largestblksz,
 82 |     int largestnumentries);
 83 | 
 84 |   void postPCR(const cusp::ell_matrix<IndexType, ValueType, MemorySpace>& AinEll,
 85 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 86 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 87 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 88 |     const cusp::array1d<ValueType, MemorySpace>& P,
 89 |     const cusp::array1d<ValueType, MemorySpace>& b,
 90 |     cusp::array1d<ValueType, MemorySpace>& x,
 91 |     cusp::array1d<ValueType, MemorySpace>& xc);
 92 | 
 93 |   void postPCRFull(const cusp::ell_matrix<IndexType, ValueType, MemorySpace>& AinEll,
 94 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 95 |     const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
 96 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 97 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 98 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
 99 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
100 |     const cusp::array1d<ValueType, MemorySpace>& b,
101 |     cusp::array1d<ValueType, MemorySpace>& x,
102 |     cusp::array1d<ValueType, MemorySpace>& xc,
103 |     int level_id,
104 |     int largestblksz);
105 | 
106 |   void postPCRFullCsr(const cusp::csr_matrix<IndexType, ValueType, MemorySpace>& AinCsr,
107 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
108 |     const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
109 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
110 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
111 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
112 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
113 |     const cusp::array1d<ValueType, MemorySpace>& b,
114 |     cusp::array1d<ValueType, MemorySpace>& x,
115 |     cusp::array1d<ValueType, MemorySpace>& xc,
116 |     int level_id,
117 |     int largestblksz,
118 |     int largestnumentries,
119 |     int largestnumperrow);
120 | 
121 |   void postPCRFullSymmetric(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
122 |     const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
123 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
124 |     const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
125 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
126 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
127 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
128 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
129 |     const cusp::array1d<ValueType, MemorySpace>& b,
130 |     cusp::array1d<ValueType, MemorySpace>& x,
131 |     cusp::array1d<ValueType, MemorySpace>& xc,
132 |     int level_id,
133 |     int largestblksz,
134 |     int largestnumentries);
135 | 
136 |   void postPCRFullSymmetricSync(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
137 |     const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
138 |     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
139 |     const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
140 |     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
141 |     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
142 |     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
143 |     const cusp::array1d<IndexType, MemorySpace>& permutation,
144 |     const cusp::array1d<ValueType, MemorySpace>& b,
145 |     cusp::array1d<ValueType, MemorySpace>& x,
146 |     cusp::array1d<ValueType, MemorySpace>& xc,
147 |     const cusp::array1d<IndexType, MemorySpace>& segSyncIdx,
148 |     const cusp::array1d<IndexType, MemorySpace>& partSyncIdx,
149 |     int level_id,
150 |     int largestblksz,
151 |     int largestnumentries);
152 | 
153 | public:
154 |   double smootherWeight_;
155 |   int nPreInnerIter_;
156 |   int nPostInnerIter_;
157 |   int post_relaxes_;
158 | 
159 | };
160 | #endif
161 | 


--------------------------------------------------------------------------------
/src/core/aggmis/cuda/TriMesh_connectivity.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |    Szymon Rusinkiewicz
  3 |    Princeton University
  4 | 
  5 |    TriMesh_connectivity.cc
  6 |    Manipulate data structures that describe connectivity between faces and verts.
  7 |  */
  8 | 
  9 | 
 10 | #include <stdio.h>
 11 | #include "TriMesh.h"
 12 | #include <algorithm>
 13 | 
 14 | void TriMesh::need_meshquality()
 15 | {
 16 |   int max_valance = 0;
 17 |   int sum = 0;
 18 |   int avg_valance = 0;
 19 |   FILE *valancefile, *reratiofile;
 20 |   valancefile = fopen("valance.txt", "w");
 21 |   reratiofile = fopen("reratio.txt", "w");
 22 | 
 23 | 
 24 |   for(int i =0; i<neighbors.size(); i++)
 25 |   {
 26 |     int val = neighbors[i].size();
 27 |     sum += val;
 28 |     max_valance = max(max_valance, val);
 29 |     fprintf(valancefile, "%d\n", val);
 30 |   }
 31 |   avg_valance = sum / neighbors.size();
 32 |   if (this->verbose) {
 33 |     printf("Max valance is %d\n", max_valance);
 34 |     printf("average valance is %d\n", avg_valance);
 35 |   }
 36 |   int ne = faces.size();
 37 |   for(int i =0; i<ne; i++)
 38 |   {
 39 |     //compute circum sphere radius
 40 |     Face t = faces[i];
 41 |     point A = vertices[t[0]];
 42 |     point B = vertices[t[1]];
 43 |     point C = vertices[t[2]];
 44 |     point ab = B - A;
 45 |     point ac = C - A;
 46 |     point bc = C - B;
 47 | 
 48 |     double a = len(bc);
 49 |     double b = len(ac);
 50 |     double c = len(ab);
 51 | 
 52 |     double radius = a*b*c / sqrt((a+b+c)*(b+c-a)*(c+a-b)*(a+b-c));
 53 | 
 54 |     double min_edge = (double)INT_MAX;
 55 |     min_edge = min(min_edge, a);
 56 |     min_edge = min(min_edge, b);
 57 |     min_edge = min(min_edge, c);
 58 | 
 59 |     double radius_edge = radius / min_edge;
 60 | 
 61 |     fprintf(reratiofile, "%.3f\n", radius_edge);
 62 | 
 63 |   }
 64 | 
 65 |   fclose(valancefile);
 66 |   fclose(reratiofile);
 67 | 
 68 | 
 69 | 
 70 | }
 71 | 
 72 | void TriMesh::need_faceedges()
 73 | {
 74 |   if (faces.empty())
 75 |   {
 76 |     printf("No faces to compute face edges!!!\n");
 77 |     return;
 78 |   }
 79 |   int numFaces = faces.size();
 80 |   for (int i = 0; i < numFaces; i++)
 81 |   {
 82 |     Face f = faces[i];
 83 |     point edge01 = vertices[f[1]] - vertices[f[0]];
 84 |     point edge12 = vertices[f[2]] - vertices[f[1]];
 85 |     point edge20 = vertices[f[0]] - vertices[f[2]];
 86 |     faces[i].edgeLens[0] =sqrt(edge01[0]*edge01[0] + edge01[1]*edge01[1] + edge01[2]*edge01[2]);
 87 |     faces[i].edgeLens[1] =sqrt(edge12[0]*edge12[0] + edge12[1]*edge12[1] + edge12[2]*edge12[2]);
 88 |     faces[i].edgeLens[2] =sqrt(edge20[0]*edge20[0] + edge20[1]*edge20[1] + edge20[2]*edge20[2]);
 89 | 
 90 | 
 91 |   }
 92 | 
 93 | }
 94 | 
 95 | // Find the direct neighbors of each vertex
 96 | void TriMesh::need_neighbors()
 97 | {
 98 |   if (!neighbors.empty())
 99 |     return;
100 |   if (this->verbose) {
101 |     printf("Finding vertex neighbors... ");
102 |   }
103 |   int nv = vertices.size(), nf = faces.size();
104 | 
105 |   std::vector<int> numneighbors(nv);
106 |   for (int i = 0; i < nf; i++) {
107 |     numneighbors[faces[i][0]]++;
108 |     numneighbors[faces[i][1]]++;
109 |     numneighbors[faces[i][2]]++;
110 |   }
111 | 
112 |   neighbors.resize(nv);
113 |   for (int i = 0; i < nv; i++)
114 |     neighbors[i].reserve(numneighbors[i]+2); // Slop for boundaries
115 | 
116 |   for (int i = 0; i < nf; i++) {
117 |     for (int j = 0; j < 3; j++) {
118 |       std::vector<int> &me = neighbors[faces[i][j]];
119 |       int n1 = faces[i][(j+1)%3];
120 |       int n2 = faces[i][(j+2)%3];
121 |       if (std::find(me.begin(), me.end(), n1) == me.end())
122 |         me.push_back(n1);
123 |       if (std::find(me.begin(), me.end(), n2) == me.end())
124 |         me.push_back(n2);
125 |     }
126 |   }
127 | 
128 |   if (this->verbose) {
129 |     printf("Done.\n");
130 |   }
131 | }
132 | 
133 | void TriMesh::rescale(int size)
134 | {
135 | 
136 |   double minx = LARGENUM;
137 |   double miny = LARGENUM;
138 |   double minz = LARGENUM;
139 |   double maxx = -LARGENUM;
140 |   double maxy = -LARGENUM;
141 |   double maxz = -LARGENUM;
142 |   for(int v = 0; v < vertices.size(); v++)
143 |   {
144 |     double x = vertices[v][0];
145 |     double y = vertices[v][1];
146 |     double z = vertices[v][2];
147 |     if(x < minx)
148 |       minx = x;
149 |     if(y < miny)
150 |       miny= y;
151 |     if(z < minz)
152 |       minz = z;
153 | 
154 |     if(x> maxx)
155 |       maxx = x;
156 |     if(y> maxy)
157 |       maxy = y;
158 |     if(z> maxz)
159 |       maxz = z;
160 |   }
161 |   for(int v = 0; v < vertices.size(); v++)
162 |   {
163 | 
164 |     vertices[v][0] -= minx;
165 |     vertices[v][1] -= miny;
166 |     //vertices[v][2] -= minz;
167 | 
168 | 
169 |     vertices[v][0] = vertices[v][0] / (maxx - minx) * size;
170 |     vertices[v][1] = vertices[v][1] / (maxy - miny) * size;
171 |     //vertices[v][2] = vertices[v][2] / (maxz - minz) * size;
172 | 
173 | 
174 |   }
175 | }
176 | 
177 | void TriMesh::meshoptimization(int iterNum)
178 | {
179 |   need_neighbors();
180 |   for(int i=0; i<iterNum; i++)
181 |   {
182 |     for(int v = 0; v<vertices.size(); v++)
183 |     {
184 |       double x = vertices[v][0];
185 |       double y = vertices[v][1];
186 |       double z = vertices[v][2];
187 |       double newx = 0.0, newy=0.0,newz=0.0;
188 |       for(int b =0; b<neighbors[v].size(); b++)
189 |       {
190 |         newx += vertices[neighbors[v][b]][0];
191 |         newy += vertices[neighbors[v][b]][1];
192 |         newz += vertices[neighbors[v][b]][2];
193 |       }
194 |       newx /= neighbors[v].size();
195 |       newy /= neighbors[v].size();
196 |       newz /= neighbors[v].size();
197 | 
198 |       if(x != 0.0 && x != 16.0)
199 |         vertices[v][0] = newx;
200 | 
201 |       if(y != 0.0 && y != 16.0)
202 |         vertices[v][1] = newy;
203 | 
204 |       if(z != 0.0 && z != 16.0)
205 |         vertices[v][2] = newz;
206 |     }
207 | 
208 |   }
209 | 
210 | }
211 | // Find the faces touching each vertex
212 | void TriMesh::need_adjacentfaces()
213 | {
214 |   if (!adjacentfaces.empty())
215 |     return;
216 |   //  need_faces();
217 | 
218 |   if (this->verbose) {
219 |     printf("Finding vertex to triangle maps... ");
220 |   }
221 |   int nv = vertices.size(), nf = faces.size();
222 | 
223 |   std::vector<int> numadjacentfaces(nv);
224 |   for (int i = 0; i < nf; i++) {
225 |     numadjacentfaces[faces[i][0]]++;
226 |     numadjacentfaces[faces[i][1]]++;
227 |     numadjacentfaces[faces[i][2]]++;
228 |   }
229 | 
230 |   adjacentfaces.resize(vertices.size());
231 |   for (int i = 0; i < nv; i++)
232 |     adjacentfaces[i].reserve(numadjacentfaces[i]);
233 | 
234 |   for (int i = 0; i < nf; i++) {
235 |     for (int j = 0; j < 3; j++)
236 |       adjacentfaces[faces[i][j]].push_back(i);
237 |   }
238 | 
239 |   if (this->verbose) {
240 |     printf("Done.\n");
241 |   }
242 | }
243 | 
244 | // Find the face across each edge from each other face (-1 on boundary)
245 | // If topology is bad, not necessarily what one would expect...
246 | void TriMesh::need_across_edge()
247 | {
248 |   if (!across_edge.empty())
249 |     return;
250 |   need_adjacentfaces();
251 | 
252 |   if (this->verbose) {
253 |     printf("Finding across-edge maps... ");
254 |   }
255 |   int nf = faces.size();
256 |   across_edge.resize(nf, Face(-1,-1,-1));
257 | 
258 |   for (int i = 0; i < nf; i++) {
259 |     for (int j = 0; j < 3; j++) {
260 |       if (across_edge[i][j] != -1)
261 |         continue;
262 |       int v1 = faces[i][(j+1)%3];
263 |       int v2 = faces[i][(j+2)%3];
264 |       const std::vector<int> &a1 = adjacentfaces[v1];
265 |       const std::vector<int> &a2 = adjacentfaces[v2];
266 |       for (int k1 = 0; k1 < a1.size(); k1++) {
267 |         int other = a1[k1];
268 |         if (other == i)
269 |           continue;
270 |         std::vector<int>::const_iterator it =
271 |           std::find(a2.begin(), a2.end(), other);
272 |         if (it == a2.end())
273 |           continue;
274 |         int ind = (faces[other].indexof(v1)+1)%3;
275 |         if (faces[other][(ind+1)%3] != v2)
276 |           continue;
277 |         across_edge[i][j] = other;
278 |         across_edge[other][ind] = i;
279 |         break;
280 |       }
281 |     }
282 |   }
283 | 
284 |   if (this->verbose) {
285 |     printf("Done.\n");
286 |   }
287 | }
288 | 
289 | 


--------------------------------------------------------------------------------
/src/core/aggmis/cuda/AggMIS_MIS_CPU.cu:
--------------------------------------------------------------------------------
  1 | #include <AggMIS_MIS_CPU.h>
  2 | namespace AggMIS {
  3 |     namespace MIS {
  4 |         struct fringeNode
  5 |         {
  6 |             int nodeIdx;
  7 |             int visits;
  8 |             fringeNode(int n, int v)
  9 |             {
 10 |                 nodeIdx = n;
 11 |                 visits = v;
 12 |             }
 13 |         };
 14 | 
 15 |         class fringeNodeComparer
 16 |         {
 17 |         public:
 18 |             bool operator()(fringeNode &f1, fringeNode &f2)
 19 |             {
 20 |                 if (f1.visits < f2.visits)
 21 |                     return true;
 22 |                 return false;
 23 |             }
 24 |         };
 25 |         AggMIS::Types::IntVector_h* FloodFillMIS(int k, AggMIS::Types::Graph_h &graph) {
 26 |             AggMIS::Types::IntVector_h *m = new AggMIS::Types::IntVector_h(graph.Size(), -1);
 27 |             AggMIS::Types::IntVector_h &mis = *m; 
 28 |             AggMIS::Types::IntVector_h visited(graph.Size(), 0);
 29 |             AggMIS::Types::IntVector_h distances(graph.Size(), 1000);
 30 |             std::queue<int> frontier;
 31 |             std::priority_queue<fringeNode, std::vector<fringeNode>, fringeNodeComparer> fringe;
 32 | 
 33 | 
 34 |             // Picking a random starting point:
 35 |             srand(time(NULL));
 36 |             int starter = rand() % graph.Size();
 37 | 
 38 |             bool incomplete = true;
 39 |             while (incomplete)
 40 |             {   
 41 |                 if (mis[starter] == -1)
 42 |                 {
 43 |                     fringeNode toAdd(starter, 1);
 44 |                     fringe.push(toAdd);
 45 |                 }
 46 | 
 47 |                 while (!fringe.empty())
 48 |                 {
 49 |                     // finding best fringe node
 50 |                     int nodeToAdd = -1;
 51 |                     while (!fringe.empty())
 52 |                     {
 53 |                         fringeNode candidate = fringe.top();
 54 |                         fringe.pop();
 55 |                         if (distances[candidate.nodeIdx] > k)
 56 |                         {
 57 |                             nodeToAdd = candidate.nodeIdx;
 58 |                             break;
 59 |                         }
 60 |                     }
 61 |                     if (nodeToAdd == -1)
 62 |                     {
 63 |                         break;
 64 |                     }
 65 | 
 66 |                     mis[nodeToAdd] = 1;
 67 |                     distances[nodeToAdd] = 0;
 68 | 
 69 |                     // Pushing neighbors of mis node onto frontier to start out
 70 |                     int start = (*(graph.indices))[nodeToAdd];
 71 |                     int end = (*(graph.indices))[nodeToAdd + 1];
 72 |                     for (int nIt = start; nIt < end; nIt++)
 73 |                     {
 74 | 
 75 |                         int neighbor = (*(graph.adjacency))[nIt];
 76 |                         if (distances[neighbor] > 1)
 77 |                         {
 78 |                             distances[neighbor] = 1;
 79 |                             frontier.push(neighbor);
 80 |                         }
 81 |                     }
 82 | 
 83 |                     // Exploring to the end of the frontier:
 84 |                     while (!frontier.empty())
 85 |                     {
 86 |                         int exploring = frontier.front();
 87 |                         frontier.pop();
 88 | 
 89 |                         int distance = distances[exploring];
 90 | 
 91 |                         // Mark out the node from the MIS
 92 |                         mis[exploring] = 0;
 93 | 
 94 |                         // Add the neighbors
 95 |                         if (distance < k)
 96 |                         {
 97 |                             int start = (*(graph.indices))[exploring];
 98 |                             int end = (*(graph.indices))[exploring + 1];
 99 |                             for (int nIt = start; nIt < end; nIt++)
100 |                             {
101 |                                 int neighbor = (*(graph.adjacency))[nIt];
102 |                                 if (distances[neighbor] > distance + 1)
103 |                                 {
104 |                                     distances[neighbor] = distance + 1;
105 |                                     frontier.push(neighbor);
106 |                                 }
107 |                             }
108 |                         }
109 |                         if (distance == k)
110 |                         {
111 |                             int start = (*(graph.indices))[exploring];
112 |                             int end = (*(graph.indices))[exploring + 1];
113 |                             for (int nIt = start; nIt < end; nIt++)
114 |                             {
115 |                                 int neighbor = (*(graph.adjacency))[nIt];
116 |                                 if (distances[neighbor] >= distance + 1)
117 |                                 {
118 |                                     distances[neighbor] = distance + 1;
119 |                                     fringeNode toAdd(neighbor, ++visited[neighbor]);
120 |                                     fringe.push(toAdd);
121 |                                 }
122 |                             }
123 |                         }
124 |                     }
125 |                 }
126 | 
127 |                 incomplete = false;
128 |                 for (int i = 0; i < graph.Size(); i++)
129 |                 {
130 |                     if (mis[i] == -1)
131 |                     {
132 |                         incomplete = true;
133 |                         starter = i;
134 |                         break;
135 |                     }
136 |                 }
137 |             }
138 |             visited.clear();
139 |             distances.clear();
140 |             return m;
141 |         }
142 |         AggMIS::Types::IntVector_h* NaiveMIS(int k, AggMIS::Types::Graph_h graph) {
143 |             AggMIS::Types::IntVector_h *m = new AggMIS::Types::IntVector_h(graph.Size(), -1);
144 |             AggMIS::Types::IntVector_h &mis = *m;  
145 |             AggMIS::Types::IntVector_h distances(graph.Size(), 1000);
146 |             for (int i = 0; i < graph.Size(); i++)
147 |             {
148 |                 if (mis[i] == -1)
149 |                 {
150 |                     mis[i] = 1;
151 |                     distances[i] = 0;
152 | 
153 |                     std::queue<int> frontier;
154 | 
155 |                     // Pushing neighbors of mis node onto frontier to start out
156 |                     int start = (*(graph.indices))[i];
157 |                     int end = (*(graph.indices))[i + 1];
158 |                     for (int nIt = start; nIt < end; nIt++)
159 |                     {
160 |                         int neighbor = (*(graph.adjacency))[nIt];
161 |                         if (distances[neighbor] > 1)
162 |                         {
163 |                             frontier.push(neighbor);
164 |                             distances[neighbor] = 1;
165 |                         }
166 |                     }
167 | 
168 |                     // Exploring to the end of the frontier:
169 |                     while (!frontier.empty())
170 |                     {
171 |                         int exploring = frontier.front();
172 |                         int distance = distances[exploring];
173 |                         frontier.pop();
174 | 
175 |                         // Set node out of mis
176 |                         mis[exploring] = 0;
177 | 
178 |                         // Add the neighbors
179 |                         if (distance < k)
180 |                         {
181 |                             int start = (*(graph.indices))[exploring];
182 |                             int end = (*(graph.indices))[exploring + 1];
183 |                             for (int nIt = start; nIt < end; nIt++)
184 |                             {
185 |                                 int neighbor = (*(graph.adjacency))[nIt];
186 |                                 if (distances[neighbor] > distance + 1)
187 |                                 {
188 |                                     distances[neighbor] = distance + 1;
189 |                                     frontier.push(neighbor);
190 |                                 }
191 |                             }
192 |                         }
193 |                     }
194 |                 }
195 |             }
196 |             distances.clear();
197 |             return m;
198 |         }
199 |     }
200 | }


--------------------------------------------------------------------------------
/src/core/cuda/FEM2D.cu:
--------------------------------------------------------------------------------
  1 | #include <FEM/FEM2D.h>
  2 | #include <cuda/perform_element_loop_2D.cuh>
  3 | #include <util.h>
  4 | 
  5 | #define  PI 3.1415927
  6 | 
  7 | FEM2D::FEM2D(TriMesh* meshPtr)
  8 | {
  9 |   initializeWithTriMesh(meshPtr);
 10 | }
 11 | 
 12 | void FEM2D::initializeWithTriMesh(TriMesh* meshPtr)
 13 | {
 14 |   nv = meshPtr->vertices.size();
 15 |   ne = meshPtr->faces.size();
 16 |   IdxVector_h tri0(ne);
 17 |   IdxVector_h tri1(ne);
 18 |   IdxVector_h tri2(ne);
 19 | 
 20 |   for(int i = 0; i < ne; i++)
 21 |   {
 22 |     tri0[i] = meshPtr->faces[i][0];
 23 |     tri1[i] = meshPtr->faces[i][1];
 24 |     tri2[i] = meshPtr->faces[i][2];
 25 |   }
 26 | 
 27 |   Vector_h_CG vx(nv);
 28 |   Vector_h_CG vy(nv);
 29 | 
 30 |   for(int i = 0; i < nv; i++)
 31 |   {
 32 |     vx[i] = meshPtr->vertices[i][0];
 33 |     vy[i] = meshPtr->vertices[i][1];
 34 |   }
 35 | 
 36 |   d_tri0 = tri0;
 37 |   d_tri1 = tri1;
 38 |   d_tri2 = tri2;
 39 | 
 40 |   d_vx = vx;
 41 |   d_vy = vy;
 42 | 
 43 |   tri0.resize(0);
 44 |   tri1.resize(0);
 45 |   tri2.resize(0);
 46 |   vx.resize(0);
 47 |   vy.resize(0);
 48 | }
 49 | 
 50 | double compute_gamma(double x)
 51 | {
 52 | 	int i,k,m;
 53 | 	double ga,gr,r,z;
 54 | 
 55 | 	static double g[] = {
 56 | 		1.0,
 57 | 		0.5772156649015329,
 58 | 		-0.6558780715202538,
 59 | 		-0.420026350340952e-1,
 60 | 		0.1665386113822915,
 61 | 		-0.421977345555443e-1,
 62 | 		-0.9621971527877e-2,
 63 | 		0.7218943246663e-2,
 64 | 		-0.11651675918591e-2,
 65 | 		-0.2152416741149e-3,
 66 | 		0.1280502823882e-3,
 67 | 		-0.201348547807e-4,
 68 | 		-0.12504934821e-5,
 69 | 		0.1133027232e-5,
 70 | 		-0.2056338417e-6,
 71 | 		0.6116095e-8,
 72 | 		0.50020075e-8,
 73 | 		-0.11812746e-8,
 74 | 		0.1043427e-9,
 75 | 		0.77823e-11,
 76 | 		-0.36968e-11,
 77 | 		0.51e-12,
 78 | 		-0.206e-13,
 79 | 		-0.54e-14,
 80 | 		0.14e-14};
 81 | 
 82 | 		if (x > 171.0) return 1e308;    // This value is an overflow flag.
 83 | 		if (x == (int)x) {
 84 | 			if (x > 0.0) {
 85 | 				ga = 1.0;               // use factorial
 86 | 				for (i=2;i<x;i++) {
 87 | 					ga *= i;
 88 | 				}
 89 | 			}
 90 | 			else
 91 | 				ga = 1e308;
 92 | 		}
 93 | 		else {
 94 | 			if (fabs(x) > 1.0) {
 95 | 				z = fabs(x);
 96 | 				m = (int)z;
 97 | 				r = 1.0;
 98 | 				for (k=1;k<=m;k++) {
 99 | 					r *= (z-k);
100 | 				}
101 | 				z -= m;
102 | 			}
103 | 			else
104 | 				z = x;
105 | 			gr = g[24];
106 | 			for (k=23;k>=0;k--) {
107 | 				gr = gr*z+g[k];
108 | 			}
109 | 			ga = 1.0/(gr*z);
110 | 			if (fabs(x) > 1.0) {
111 | 				ga *= r;
112 | 				if (x < 0.0) {
113 | 					ga = -M_PI/(x*ga*sin(M_PI*x));
114 | 				}
115 | 			}
116 | 		}
117 | 		return ga;
118 | }
119 | 
120 | void FEM2D::JacobiPoly(int degree, Vector_h_CG x,int alpha,int beta, Vector_h_CG &y)
121 | {
122 | 	int s = x.size();
123 | 	if (degree == 0)
124 | 	{
125 | 		
126 | 		y.resize(s);
127 | 		for (int i =0; i< s; i++)
128 | 		{
129 | 			y[i] = 1.0;
130 | 		}
131 | 
132 | 	}
133 | 	else if(degree == 1)
134 | 	{
135 | 		y.resize(s);
136 | 		for (int i =0; i<s; i++)
137 | 		{
138 | 
139 | 			y[i] = 0.5*(alpha-beta+(alpha+beta+2.0)*x[i]);
140 | 
141 | 		}
142 | 		
143 | 	}
144 | 	else
145 | 	{
146 | 		double degm1 = degree-1.0; 
147 | 		double tmp = 2.0*degm1+alpha+beta;
148 | 		double a1= 2.0*(degm1+1)*(degm1+alpha+beta+1)*tmp;
149 | 		double a2= (tmp+1)*(alpha*alpha-beta*beta);
150 | 		double a3= tmp*(tmp+1.0)*(tmp+2.0);
151 | 		double a4= 2.0*(degm1+alpha)*(degm1+beta)*(tmp+2.0);
152 | 		Vector_h_CG poly1, poly2;
153 | 		JacobiPoly(degree-1,x,alpha,beta, poly1);
154 | 		JacobiPoly(degree-2,x,alpha,beta, poly2);
155 | 
156 | 		int plolysize = poly1.size();
157 | 		y.resize(plolysize);
158 | 
159 | 
160 | 
161 | 		for (int i=0; i<plolysize; i++)
162 | 		{
163 | 			y[i] = ((a2+a3*x[i])*poly1[i]- a4*poly2[i] )/a1;
164 | 		}
165 | 
166 | 		
167 | 	}
168 | 
169 | 
170 | }
171 | 
172 | void FEM2D::JacobiPolyDerivative(int degree, Vector_h_CG &x, int alpha,int beta, Vector_h_CG &y)
173 | {
174 | 	int s = x.size();
175 | 	if (degree == 0)
176 | 	{
177 | 
178 | 		y.resize(s);
179 | 		for (int i =0; i< s; i++)
180 | 		{
181 | 			y[i] = 0.0;
182 | 		}
183 | 
184 | 	}
185 | 	else
186 | 	{
187 | 		Vector_h_CG poly;
188 | 		JacobiPoly(degree-1,x,alpha+1,beta+1, poly);
189 | 		y.resize(poly.size());
190 | 		for (int i =0; i<poly.size(); i++)
191 | 		{
192 | 			y[i] = 0.5*(alpha+beta+degree+1)*poly[i];
193 | 		}
194 | 	}
195 | 	//y = 0.5*(alpha+beta+degree+1)*JacobiPoly(degree-1,x,alpha+1,beta+1);
196 | }
197 | 
198 | void FEM2D::JacobiGZeros(int degree,int alpha,int beta, Vector_h_CG &z)
199 | {
200 | 	z.resize(degree);
201 | 	if (degree == 0)
202 | 	{
203 | 		for (int i =0; i<degree; i++)
204 | 		{
205 | 			z[i] =0.0;
206 | 		}
207 | 		return;
208 | 	} 
209 | 	int	maxit = 60;
210 | 	double EPS = 1.0e-6;
211 | 	double dth =double(PI)/(2.0*degree);
212 | 
213 | 	double rlast=0.0;
214 | 	double one = 1.0;
215 | 	double two = 2.0;
216 | 
217 | 
218 | 	Vector_h_CG r;
219 | 	Vector_h_CG poly, pder;
220 | 	r.resize(1);
221 | 	poly.resize(1);
222 | 	pder.resize(1);
223 | 
224 | 
225 | 
226 | 	double sum = 0;
227 | 	double delr;
228 | 	for (int k=0; k< degree; k++) 
229 | 	{
230 | 		r[0] = -cos((two*k + one) * dth);
231 | 		if (k)
232 | 			r[0] = 0.5*(r[0] + rlast);
233 | 		
234 | 
235 | 		for(int j = 0; j < maxit; j++) 
236 | 		{
237 | 
238 | 			JacobiPoly(degree,r,alpha,beta, poly);
239 | 			JacobiPolyDerivative(degree,r,alpha,beta, pder);
240 | 
241 | 			sum = 0.0;
242 | 			for (int i=0; i< k; i++)
243 | 				sum = sum + one/(r[0] - z[i]);
244 | 			  
245 | 			delr = -poly[0] / (pder[0] - sum * poly[0]);
246 | 			r[0]  = r[0] + delr;
247 | 			if (fabs(delr) < EPS)
248 | 				break;
249 | 		}
250 | 	
251 | 		z[k]  = r[0];
252 | 		rlast = r[0];
253 | 		
254 | 	}
255 | 
256 | }
257 | 
258 | 
259 | 
260 | void FEM2D::JacobiGLZW(Vector_h_CG&  Z,Vector_h_CG& weight,  int degree, int alpha, int beta)
261 | {
262 | 	Z.resize(degree);
263 | 	weight.resize(degree);
264 | 
265 | 	double  fac=0 ;
266 | 
267 | 
268 | 	if (degree == 1)
269 | 	{
270 | 		Z[0] = 0.0;
271 | 		weight[0] = 0.0;
272 | 	}
273 | 	else
274 | 	{
275 | 		//one = 1.0;
276 | 		int apb = alpha + beta;
277 | 		//two = 2.0;
278 | 
279 | 		Z[0] = -1;
280 | 		Z[degree-1] = 1;
281 | 
282 | 		Vector_h_CG tmppoly; 
283 | 	  JacobiGZeros(degree-2,alpha+1,beta+1, tmppoly);
284 | 
285 | 		for (int i = 1; i< degree-1; i++)
286 | 		{
287 | 			Z[i] = tmppoly[i-1];
288 | 		}
289 | 		//Z(2:degree-1) = JacobiGZeros(degree-2,alpha+one,beta+one);    
290 | 		JacobiPoly(degree-1,Z,alpha,beta, weight);
291 | 
292 |     Matrix_ell_d_CG::value_type tmp1 = pow(Matrix_ell_d_CG::value_type(2), Matrix_ell_d_CG::value_type(apb + 1));
293 |     Matrix_ell_d_CG::value_type tmp2 = compute_gamma(alpha + degree);
294 | 
295 | 		 fac =  tmp1 * tmp2 * compute_gamma(beta + degree);
296 | 		fac = fac / ((degree-1)*compute_gamma(degree)*compute_gamma(alpha + beta + degree + 1));
297 | 
298 | 		for (int j =0; j< degree; j++)
299 | 		{
300 |       weight[j] = Matrix_ell_d_CG::value_type(fac) / (weight[j] * weight[j]);
301 | 		}
302 | 		//weight = fac./(w.*w);
303 | 		weight[0] = weight[0]*(beta+1);
304 | 		weight[degree-1] = weight[degree-1]*(alpha+1);
305 | 	}
306 | 
307 | 
308 | }
309 | 
310 | 
311 | void FEM2D::JacobiGRZW(Vector_h_CG&  Z, Vector_h_CG& weight,  int degree, int alpha, int beta)
312 | {
313 | 	Z.resize(degree);
314 | 	weight.resize(degree);
315 | 
316 |   Matrix_ell_d_CG::value_type fac = 0;
317 | 
318 | 
319 | 	if (degree == 1)
320 | 	{
321 | 		Z[0] = 0.0;
322 | 		weight[0] = 2.0;
323 | 	}
324 | 	else
325 | 	{
326 | 		//one = 1.0;
327 | 		int apb = alpha + beta;
328 | 		//two = 2.0;
329 | 
330 | 		Z[0] = -1;
331 | 
332 | 		Vector_h_CG tmpPoly;
333 | 	  JacobiGZeros(degree-1,alpha,beta+1, tmpPoly);
334 | 
335 | 		for (int i = 1; i< degree; i++)
336 | 		{
337 | 			Z[i] = tmpPoly[i-1];
338 | 		}
339 | 		//Z(2:degree-1) = JacobiGZeros(degree-1,alpha+one,beta+one);    
340 | 		JacobiPoly(degree-1,Z,alpha,beta, weight);
341 |     Matrix_ell_d_CG::value_type tmp = compute_gamma(alpha + degree);
342 | 
343 |     fac = pow(Matrix_ell_d_CG::value_type(2), Matrix_ell_d_CG::value_type(apb)) * tmp *compute_gamma(beta + degree);
344 | 		fac = fac / (compute_gamma(degree)*(beta+degree)*compute_gamma(apb+degree + 1));
345 | 
346 | 		for (int j =0; j< degree; j++)
347 | 		{
348 |       weight[j] = Matrix_ell_d_CG::value_type(fac)*(1 - Z[j]) / (weight[j] * weight[j]);
349 | 		}
350 | 		
351 | 		weight[0] = weight[0]*(beta+1);
352 | 		
353 | 	}
354 | 
355 | }
356 | 
357 | 
358 | void FEM2D::assemble(TriMesh* meshPtr, Matrix_ell_d_CG &A, Vector_d_CG &b)
359 | {
360 | 	int degree_x = 6;
361 | 	int degree_y = 6;
362 | 
363 | 	int alpha1 = 0,  beta1 = 0;
364 | 	int alpha2 = 1,  beta2 = 0;
365 | 
366 | 	Vector_h_CG z_x, z_y;
367 | 	Vector_h_CG weight_x, weight_y;
368 | 
369 | 	JacobiGLZW(z_x,  weight_x, degree_x, alpha1, beta1);
370 | 	JacobiGRZW(z_y,  weight_y, degree_y, alpha2, beta2);
371 | 
372 |   Matrix_ell_d_CG::value_type* tmp_w_x = thrust::raw_pointer_cast(&weight_x[0]);
373 |   Matrix_ell_d_CG::value_type* tmp_w_y = thrust::raw_pointer_cast(&weight_y[0]);
374 |   Matrix_ell_d_CG::value_type* tmp_z_x = thrust::raw_pointer_cast(&z_x[0]);
375 |   Matrix_ell_d_CG::value_type* tmp_z_y = thrust::raw_pointer_cast(&z_y[0]);
376 | 
377 |  perform_element_loop_2d(d_vx, d_vy, d_tri0, d_tri1, d_tri2, A, b, z_x, z_y, weight_x, weight_y);
378 | }
379 | 
380 | void FEM2D::assemble(TriMesh* meshPtr, Matrix_d_CG &A, Vector_d_CG &b)
381 | {
382 | 	int degree_x = 6;
383 | 	int degree_y = 6;
384 | 
385 | 	int alpha1 = 0,  beta1 = 0;
386 | 	int alpha2 = 1,  beta2 = 0;
387 | 
388 | 	Vector_h_CG z_x, z_y;
389 | 	Vector_h_CG weight_x, weight_y;
390 | 
391 | 	JacobiGLZW(z_x,  weight_x, degree_x, alpha1, beta1);
392 | 	JacobiGRZW(z_y,  weight_y, degree_y, alpha2, beta2);
393 | 
394 | 
395 |  perform_element_loop_2d_coo(d_vx, d_vy, d_tri0, d_tri1, d_tri2, A, b, z_x, z_y, weight_x, weight_y);
396 |  
397 | 
398 | }
399 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | GPUTUM : FEM Solver
  2 | ==============
  3 | 
  4 | GPUTUM FEM Solver is a C++/CUDA library written to solve an FEM linear system. It is designed to solve the FEM system quickly by using GPU hardware.
  5 | 
  6 | The code was written by Zhisong Fu and T. James Lewis at the Scientific Computing and Imaging Institute, 
  7 | University of Utah, Salt Lake City, USA. The theory behind this code is published in the papers linked below. 
  8 | Table of Contents
  9 | ========
 10 | <img src="https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/master/src/Resources/fem.png"  align="right" hspace="20" width=350>
 11 | - [FEM Aknowledgements](#fem-aknowledgements)
 12 | - [Requirements](#requirements)
 13 | - [Building](#building)<br/>
 14 | 		- [Linux and OSX](#linux-and-osx)<br/>
 15 | 		- [Windows](#windows)<br/>
 16 | - [Running Examples](#running-examples)
 17 | - [Using the Library](#using-the-library)
 18 | - [Testing](#testing)<br/>
 19 | 
 20 | <br/>
 21 | 
 22 | <h4>FEM Aknowledgements</h4>
 23 | **<a href ="http://www.sciencedirect.com/science/article/pii/S0377042713004470">
 24 | Architecting the Finite Element Method Pipeline for the GPU</a>**<br/>
 25 | <img src="https://raw.githubusercontent.com/SCIInstitute/SCI-Solver_FEM/master/src/Resources/fem2.png"  align="right" hspace="20" width=360>
 26 | 
 27 | **AUTHORS:**
 28 | <br/>Zhisong Fu(*a*) <br/>
 29 | T. James Lewis(*b*) <br/>
 30 | Robert M. Kirby(*a*) <br/>
 31 | Ross T. Whitaker(*a*) <br/>
 32 | 
 33 | This library solves for the partial differential equations and coefficients values 
 34 | on vertices located on a tetrahedral or triangle mesh on the GPU. Several mesh formats
 35 | are supported, and are read by the <a href="http://wias-berlin.de/software/tetgen/">TetGen Library</a> and the
 36 |  <a href="http://graphics.stanford.edu/software/trimesh/">TriMesh Library</a>. 
 37 | The <a href="http://glaros.dtc.umn.edu/gkhome/metis/metis/download">METIS library</a> is used to partition unstructured 
 38 | meshes. <a href="https://code.google.com/p/googletest/">
 39 | Google Test</a> is used for testing.
 40 | <br/><br/>
 41 | Requirements
 42 | ==============
 43 | 
 44 |  * Git, CMake (3.0+ recommended), and the standard system build environment tools.
 45 |  * You will need a CUDA Compatible Graphics card. See <a href="https://developer.nvidia.com/cuda-gpus">here</a> You will also need to be sure your card has CUDA compute capability of at least 2.0.
 46 |  * SCI-Solver_FEM is compatible with the latest CUDA toolkit (7.0). Download <a href="https://developer.nvidia.com/cuda-downloads">here</a>.
 47 |  * This project has been tested on OpenSuse 12.3 (Dartmouth) on NVidia GeForce GTX 570 HD, Ubuntu 14.04 on NVidia GeForce GTX 560 Ti, Windows 7 on NVidia GeForce GTX 775M, and OSX 10.10 on NVidia GeForce GTX 775M. 
 48 |  * If you have a CUDA compatible card with the above operating systems, and are experiencing issues, please contact the repository owners.
 49 |  * Windows: You will need Microsoft Visual Studio 2013 build tools. This document describes the "NMake" process.
 50 |  * OSX: Please be sure to follow setup for CUDA <a href="http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-mac-os-x/#axzz3W4nXNNin">here</a>. There are several compatability requirements for different MAC machines, including using a different version of CUDA (ie. 5.5).
 51 | 
 52 | Building
 53 | ==============
 54 | 
 55 | <h3>Linux and OSX</h3>
 56 | In a terminal:
 57 | ```c++
 58 | mkdir SCI-SOLVER_FEM/build
 59 | cd SCI-SOLVER_FEM/build
 60 | cmake ../src
 61 | make
 62 | ```
 63 | 
 64 | <h3>Windows</h3>
 65 | Open a Visual Studio (32 or 64 bit) Native Tools Command Prompt. 
 66 | Follow these commands:
 67 | ```c++
 68 | mkdir C:\Path\To\SCI-Solver_FEM\build
 69 | cd C:\Path\To\SCI-Solver_FEM\build
 70 | cmake -G "NMake Makefiles" ..\src
 71 | nmake
 72 | ```
 73 | 
 74 | **Note:** For all platforms, you may need to specify your CUDA toolkit location (especially if you have multiple CUDA versions installed):
 75 | ```c++
 76 | cmake -DCUDA_TOOLKIT_ROOT_DIR="~/NVIDIA/CUDA-7.0" ../src
 77 | ```
 78 | (Assuming this is the location).
 79 | 
 80 | **Note:** If you have compile errors such as <code>undefined reference: atomicAdd</code>, it is likely you need to set your compute capability manually. CMake outputs whether compute capability was determined automatically, or if you need to set it manually. The default (and known working) minimum compute capability is 2.0.
 81 | 
 82 | ```c++
 83 | cmake -DCUDA_COMPUTE_CAPABILITY=20 ../src
 84 | make
 85 | ```
 86 | 
 87 | 
 88 | Running Examples
 89 | ==============
 90 | 
 91 | You will need to enable examples in your build to compile and run them
 92 | 
 93 | ```c++
 94 | cmake -DBUILD_EXAMPLES=ON ../src
 95 | make
 96 | ```
 97 | 
 98 | You will find the example binaries built in the <code>build/examples</code> directory.
 99 | 
100 | Run the examples in the build directory:
101 | 
102 | ```c++
103 | examples/Example1
104 | examples/Example2
105 | ...
106 | ```
107 | Each example has a <code>-h</code> flag that prints options for that example. <br/>
108 | 
109 | Follow the example source code in <code>src/examples</code> to learn how to use the library.
110 | <br/>
111 | To run examples similar to the paper, the following example calls would do so:<br/>
112 | <b>2D FEM, Egg Carton </b><br/>
113 | <code>examples/Example2 -v -i ../src/test/test_data/simple.ply -A "../src/test/test_data/simpleTri.mat" -b "../src/test/test_data/simpleTrib.mat"</code><br/>
114 | 
115 | **NOTE** All examples output a set of <code>result.vtk</code> (name based off input 
116 | filename) VTK files in the current directory. These files are easily viewed via VTK readers like Paraview.
117 | You can clip and add iso-values to more distinctly visualize the result. An <code>output.mat</code>
118 | MATLAB file is also written to file (solution coefficients).
119 | 
120 | Using the Library
121 | ==============
122 | 
123 | A basic usage of the library links to the <code>libFEM_CORE</code> library during build and 
124 | includes the headers needed, which are usually no more than:
125 | 
126 | ```c++
127 | #include "FEMSolver.h"
128 | ```
129 | 
130 | Then a program would setup the FEM parameters using the 
131 | <code>"FEMSolver object"</code> object and call
132 | <code>object.solveFEM()</code> to generate
133 | the answer matrices.
134 | 
135 | Here is a minimal usage example (using a tet mesh).<br/>
136 | ```c++
137 | #include <FEMSolver.h>
138 | int main(int argc, char *argv[])
139 | {
140 |   //the filename in the constructor below means ~/myTetMesh.node & ~/myTetMesh.ele
141 |   FEMSolver data("~/myTetMesh", false, true); // tet mesh, not a tri mesh, and verbose
142 |   //read in your Matrices (A matrix object is a member of FEMSolver)
143 |   data.readMatlabSparseMatrix("~/A_MATRIX.mat");
144 |   Vector_h_CG b_h(cfg.getMatrixRows(), 1.0);
145 |   data.readMatlabArray("~/b_array.mat", &b_h)
146 |   //The answer vector.
147 |   Vector_h_CG x_h(cfg.getMatrixRows(), 0.0);
148 |   //Run the solver
149 |   data.solveFEM(&x_h, &b_h);
150 |   //now use the result
151 |   data.writeMatlabArray("outputName.mat", x_h);
152 |   //write the VTK
153 |   std::vector<double> vals;
154 |   for (size_t i = 0; i < x_h.size(); i++){
155 |     vals.push_back(x_h[i]);
156 |   }
157 |   data.writeVTK(vals, "outputName");
158 |   return 0;
159 | }
160 | ```
161 | 
162 | You can access the A matrix and meshes directly:
163 | ```c++
164 | TetMesh * FEMSolver::tetMesh_;
165 | TriMesh * FEMSolver::triMesh_;
166 | ```
167 | 
168 | <h3>FEM Solver Parameters</h3>
169 | 
170 | ```C++
171 |   class FEMSolver {
172 | 	  bool verbose_;                  // output verbosity
173 | 	  std::string filename_;          // mesh file name
174 | 	  int maxLevels_;                 // the maximum number of levels
175 | 	  int maxIters_;                  // the maximum solve iterations
176 | 	  int preInnerIters_;             // the pre inner iterations for GSINNER
177 | 	  int postInnerIters_;            // the post inner iterations for GSINNER
178 | 	  int postRelaxes_;               // the number of post relax iterations
179 | 	  int cycleIters_;                // the number of CG iterations per outer iteration
180 | 	  int dsType_;                    // data structure type
181 | 	  int topSize_;                   // max size of coarsest level
182 | 	  int randMisParameters_;         // max size of coarsest level
183 | 	  int partitionMaxSize_;          // max size of of the partition
184 | 	  int aggregatorType_;            // aggregator oldMis (0), metis bottom up (1), 
185 | 									  //   metis top down (2), aggMisGPU (3), aggMisCPU (4), newMisLight (5)
186 | 	  int convergeType_;              // the convergence tolerance algorithm <absolute (0)|relative (1)>
187 | 	  double tolerance_;              // the convergence tolerance
188 | 	  int cycleType_;                 // the cycle algorithm <V (0) | W (1) | F (2) | K (3)>
189 | 	  int solverType_;                // the solving algorithm <AMG (0) | PCG (1)>
190 | 	  double smootherWeight_;         // the weight parameter used in a smoother
191 | 	  double proOmega_;               // the weight parameter used in prolongator smoother
192 | 	  int device_;                    // the GPU device number to specify
193 | 	  int blockSize_;
194 |       ...
195 |   };
196 | ```
197 | <br/>
198 | You will need to make sure your CMake/Makfile/Build setup knows where 
199 | to point for the library and header files. See the examples and their CMakeLists.txt.<br/><br/>
200 | Testing
201 | ==============
202 | The repo comes with a set of regression tests to see if recent changes break
203 | expected results. To build the tests, you will need to set
204 | <code>BUILD_TESTING</code> to "ON" in either <code>ccmake</code> or when calling CMake:
205 | 
206 | ```c++
207 | cmake -DBUILD_TESTING=ON ../src
208 | ```
209 | After building, run <code>make test</code> or <code>ctest</code> in the build directory to run tests.<br/>
210 | <h4>Windows</h4>
211 | The gtest library included in the repo needs to be built with 
212 | forced shared libraries on Windows, so use the following:
213 | 
214 | ```c++
215 | cmake -DBUILD_TESTING=ON -Dgtest_forced_shared_crt=ON ../src
216 | ```
217 | Be sure to include all other necessary CMake definitions as annotated above.
218 | 


--------------------------------------------------------------------------------
/src/core/cuda/randomizedMIS_GPU.cu:
--------------------------------------------------------------------------------
  1 | #include <smoothedMG/aggregators/misHelpers.h>
  2 | 
  3 | __global__ void Generate_Randoms_Kernel(int size, int iterations, unsigned int *randoms, unsigned int *seeds)
  4 | {
  5 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
  6 |     unsigned int z = seeds[idx];
  7 |     int offset = idx;
  8 |     int step = 32768;
  9 |     
 10 |     for (int i = 0; i < iterations; i++)
 11 |     {
 12 |         if (offset < size)
 13 |         {
 14 |             unsigned int b = (((z << 13) ^ z) >> 19);
 15 |             z = (((z & UINT_MAX) << 12) ^ b);
 16 |             randoms[offset] = z;
 17 |             offset += step;
 18 |         }
 19 |     }
 20 | }
 21 | 
 22 | __global__ void First_Initialize_Kernel(int size, unsigned int *randoms, int *bestSeen, int *origin)
 23 | {
 24 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 25 |     if (idx < size)
 26 |     {   
 27 |         // Set the origin to be self
 28 |         origin[idx] = idx;
 29 |         
 30 |         // Set the bestSeen value to be random
 31 |         bestSeen[idx] = randoms[idx] % 1000000;
 32 |     } 
 33 | }
 34 | 
 35 | __global__ void Initialize_Kernel(int size, unsigned int *randoms, int *bestSeen, int *origin, int *mis, int *incomplete)
 36 | {
 37 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 38 |     if (idx < size)
 39 |     {
 40 |         // Taustep is performed with S1=13, S2=19, S3=12, and M=UINT_MAX coded into kernel
 41 |         unsigned int z = randoms[idx];
 42 |         unsigned int b = (((z << 13) ^ z) >> 19);
 43 |         z = (((z & UINT_MAX) << 12) ^ b);
 44 |         
 45 |         // Set the origin to be self
 46 |         origin[idx] = idx;
 47 |         
 48 |         // Set the bestSeen value to be either random from 0-1000000 or 1000001 if in MIS
 49 |         int status = mis[idx];
 50 |         int value = 0;
 51 |         if (status == 1)
 52 |             value = 1000001;
 53 |         
 54 |         bestSeen[idx] = (mis[idx] == -1) ? (z % 1000000) : value;
 55 |         
 56 |         // Write out new random value for seeding
 57 |         randoms[idx] = z;
 58 |     } 
 59 |     
 60 |     // Reset incomplete value
 61 |     if (idx == 0)
 62 |         incomplete[0] = 0;
 63 | }
 64 | 
 65 | __global__ void Iterate_Kernel(int size, int *originIn, int *originOut, int *bestSeenIn, int *bestSeenOut, int *adjIndexes, int *adjacency)
 66 | {
 67 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
 68 |     if (idx < size)
 69 |     {
 70 |         int bestSeen = bestSeenIn[idx];
 71 |         int origin = originIn[idx];
 72 |         if (bestSeen < 1000001)
 73 |         {
 74 |             int start = adjIndexes[idx];
 75 |             int end = adjIndexes[idx + 1];
 76 | 
 77 |             // Look at all the neighbors and take best values:
 78 |             for (int i = start; i < end; i++)
 79 |             {
 80 |                 int neighbor = adjacency[i];
 81 |                 int challenger = bestSeenIn[neighbor];
 82 |                 int challengerOrigin = originIn[neighbor];
 83 | 
 84 |                 if (challenger > 0 && challenger == bestSeen && challengerOrigin > origin)
 85 |                 {
 86 |                     origin = challengerOrigin;
 87 |                 }
 88 | 
 89 | 
 90 |                 if (challenger > bestSeen)
 91 |                 {
 92 |                     bestSeen = challenger;
 93 |                     origin = challengerOrigin;
 94 |                 }
 95 |             }
 96 |         }
 97 |         
 98 |         // Write out the best values found
 99 |         bestSeenOut[idx] = bestSeen;
100 |         originOut[idx] = origin;
101 |     }  
102 | }
103 |     
104 | __global__ void Final_Iterate_Kernel(int size, int *originIn, int *originOut, int *bestSeenIn, int *bestSeenOut, int *adjIndexes, int *adjacency, int *mis, int *incomplete)
105 | {
106 |     int idx = blockIdx.x * blockDim.x + threadIdx.x;
107 |     if (idx < size)
108 |     {
109 |         int bestSeen = bestSeenIn[idx];
110 |         int origin = originIn[idx];
111 |         if (bestSeen < 1000001)
112 |         {
113 |             int start = adjIndexes[idx];
114 |             int end = adjIndexes[idx + 1];
115 | 
116 |             // Look at all the neighbors and take best values:
117 |             for (int i = start; i < end; i++)
118 |             {
119 |                 int neighbor = adjacency[i];
120 |                 unsigned int challenger = bestSeenIn[neighbor];
121 |                 int challengerOrigin = originIn[neighbor];
122 | 
123 |                 if (challenger > 0 && challenger == bestSeen && challengerOrigin > origin)
124 |                 {
125 |                     origin = challengerOrigin;
126 |                 }
127 | 
128 |                 if (challenger > bestSeen)
129 |                 {
130 |                     bestSeen = challenger;
131 |                     origin = challengerOrigin;
132 |                 }
133 |             }
134 |         }
135 |         
136 |         // Write new MIS status 
137 |         int misStatus = -1;
138 |         if (origin == idx)
139 |             misStatus = 1;
140 |         else if (bestSeen == 1000001)
141 |             misStatus = 0;
142 | 
143 |         mis[idx] = misStatus;
144 | 
145 | 
146 |         // If this node is still unassigned mark
147 |         if (misStatus == -1)
148 |         {
149 |             incomplete[0] = 1;
150 |         }
151 |     }    
152 | }
153 | 
154 | void misHelpers::randomizedMIS(IdxVector_d adjIndexes, IdxVector_d adjacency, IdxVector_d &mis, int k)
155 | {   
156 |     // Setting to prefer the cache:
157 |     cudaFuncSetCacheConfig(Initialize_Kernel, cudaFuncCachePreferL1);
158 |     cudaFuncSetCacheConfig(Iterate_Kernel, cudaFuncCachePreferL1);
159 |     cudaFuncSetCacheConfig(Final_Iterate_Kernel, cudaFuncCachePreferL1);
160 |     
161 |     cudaEvent_t s, e;
162 |     cudaEventCreate(&s);
163 |     cudaEventCreate(&e);
164 |     
165 |     int size = adjIndexes.size() - 1;
166 |     mis.resize(size);
167 |     thrust::fill(mis.begin(),mis.end(), -1);
168 |     
169 |     IntVector_d incomplete(1);            // This is a single value that will be marked with 1 by initialize kernel if there are unallocated nodes    //IdxVector_d misIn(size, -1);          // The current MIS assignments 1 = in MIS, 0 = not in MIS, -1 = undetermined 
170 |     cusp::array1d<unsigned int, cusp::device_memory> randoms(size);           // Set of random values generated by each threads random generator                      
171 |     IntVector_d bestSeenIn(size);         // Holds the highest value seen so far propogated through neigbhors each iteration
172 |     IntVector_d bestSeenOut(size);        // Holds the highest value seen so far propogated through neigbhors each iteration
173 |     IntVector_d originIn(size);           // The index where the best seen value originated     
174 |     IntVector_d originOut(size);          // The index where the best seen value originated  
175 |     cusp::array1d<unsigned int, cusp::device_memory> seeds(32768);              // Host side vector of initial random values
176 |     
177 |     // Getting raw pointers:
178 |     int *incomplete_d = thrust::raw_pointer_cast(&incomplete[0]);
179 |     int *misIn_d = thrust::raw_pointer_cast(&mis[0]);
180 |     unsigned int *randoms_d = thrust::raw_pointer_cast(&randoms[0]);
181 |     unsigned int *seeds_d = thrust::raw_pointer_cast(&seeds[0]);
182 |     int *bestSeenIn_d = thrust::raw_pointer_cast(&bestSeenIn[0]);
183 |     int *bestSeenOut_d = thrust::raw_pointer_cast(&bestSeenOut[0]);
184 |     int *originIn_d = thrust::raw_pointer_cast(&originIn[0]);
185 |     int *originOut_d = thrust::raw_pointer_cast(&originOut[0]);
186 |     int *adjIndexes_d = thrust::raw_pointer_cast(&(adjIndexes[0]));
187 |     int *adjacency_d = thrust::raw_pointer_cast(&(adjacency[0]));
188 |     
189 |     // Setting up for kernel launches
190 |     int blockSize = 256;
191 |     int nBlocks = size / blockSize + (size % blockSize == 0 ? 0 : 1);
192 |     
193 |     
194 |     // Seeding the randoms array:    
195 |     srand(time(NULL));
196 |     unsigned *seeds_h = new unsigned[32768];
197 |     for (int i = 0; i < 32768; i++)
198 |         seeds_h[i] = (unsigned)rand();
199 |     thrust::copy(seeds_h, seeds_h + 32768, seeds.begin());
200 |     int iterations = (size + 32767) / 32768;
201 |     Generate_Randoms_Kernel <<<128, 256>>> (size, iterations, randoms_d, seeds_d);
202 |     
203 |     // Running the initialize kernel:
204 |     First_Initialize_Kernel <<< nBlocks, blockSize >>> (size, randoms_d, bestSeenIn_d, originIn_d);
205 |     
206 |     // Running the iteration kernel k times swapping in and out for each iteration
207 |     for (int i = 0; i < k; i++)
208 |     {
209 |         if (i < k - 1)
210 |         {
211 |             Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d);
212 |         }
213 |         else 
214 |         {       
215 |             Final_Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d, misIn_d, incomplete_d);
216 |         }  
217 |         
218 |         // Swap the pointers for the next iteration:
219 |         int *temp = originIn_d;
220 |         originIn_d = originOut_d;
221 |         originOut_d = temp;
222 |         
223 |         int *temp2 = bestSeenIn_d;
224 |         bestSeenIn_d = bestSeenOut_d;
225 |         bestSeenOut_d = temp2;
226 |     }
227 |     
228 |     // If not complete get new randoms and repeat  
229 |     cudaThreadSynchronize();
230 |     int unallocated = incomplete[0];
231 |     
232 |     while (unallocated == 1)
233 |     {   
234 |         // Initialize kernel
235 |         Initialize_Kernel <<< nBlocks, blockSize >>> (size, randoms_d, bestSeenIn_d, originIn_d, misIn_d, incomplete_d);
236 |         
237 |         // Running the iteration kernel k times swapping in and out for each iteration
238 |         for (int i = 0; i < k; i++)
239 |         {
240 |             if (i < k - 1)
241 |             {
242 |                 Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d);
243 |             }
244 |             else 
245 |             {       
246 |                 Final_Iterate_Kernel <<< nBlocks, blockSize >>> (size, originIn_d, originOut_d, bestSeenIn_d, bestSeenOut_d, adjIndexes_d, adjacency_d, misIn_d, incomplete_d);
247 |             }
248 | 
249 | 
250 |             // Swap the pointers for the next iteration:
251 |             int *temp = originIn_d;
252 |             originIn_d = originOut_d;
253 |             originOut_d = temp;
254 | 
255 |             int *temp2 = bestSeenIn_d;
256 |             bestSeenIn_d = bestSeenOut_d;
257 |             bestSeenOut_d = temp2;
258 |         }        
259 |                 
260 |         // Checking if done:
261 |         cudaThreadSynchronize();
262 |         unallocated = incomplete[0];
263 |     }
264 |     
265 |     // Deallocating temporary arrays:
266 |     incomplete.resize(0);
267 |     randoms.resize(0);
268 |     bestSeenIn.resize(0);
269 |     bestSeenOut.resize(0);
270 |     originIn.resize(0);
271 |     originOut.resize(0);
272 | }
273 | 


--------------------------------------------------------------------------------
/src/core/include/smoothers/smoother.h:
--------------------------------------------------------------------------------
  1 | #ifndef __SMOOTHER_H__
  2 | #define __SMOOTHER_H__
  3 | template <class Matrix, class Vector> class Smoother;
  4 | 
  5 | enum SmootherType
  6 | {
  7 |   JACOBI, JACOBI_NO_CUSP, GAUSSSEIDEL, POLYNOMIAL, GSINNER
  8 | };
  9 | 
 10 | #include <error.h>
 11 | #include <types.h>
 12 | 
 13 | class FEMSolver;
 14 | 
 15 | /*************************************
 16 |  * Smoother base class
 17 |  *************************************/
 18 | template<class Matrix, class Vector>
 19 | class Smoother
 20 | {
 21 |   typedef typename Matrix::value_type ValueType;
 22 |   typedef typename Matrix::index_type IndexType;
 23 |   typedef typename Matrix::memory_space MemorySpace;
 24 | public:
 25 |   virtual void preRRRFull(const cusp::ell_matrix<IndexType, ValueType, MemorySpace>& AinEll,
 26 |                           const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 27 |                           const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 28 |                           const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 29 |                           const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 30 |                           const cusp::array1d<IndexType, MemorySpace>& permutation,
 31 |                           cusp::array1d<ValueType, MemorySpace>& b,
 32 |                           cusp::array1d<ValueType, MemorySpace>& x,
 33 |                           cusp::array1d<ValueType, MemorySpace>& bc,
 34 |                           int level_id,
 35 |                           int largestblksz) = 0;
 36 |   virtual void preRRRFullCsr(const cusp::csr_matrix<IndexType, ValueType, MemorySpace>& AinCsr,
 37 |                              const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 38 |                              const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 39 |                              const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 40 |                              const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 41 |                              const cusp::array1d<IndexType, MemorySpace>& permutation,
 42 |                              cusp::array1d<ValueType, MemorySpace>& b,
 43 |                              cusp::array1d<ValueType, MemorySpace>& x,
 44 |                              cusp::array1d<ValueType, MemorySpace>& bc,
 45 |                              int level_id,
 46 |                              int largestblksz,
 47 | 														 int largestnumentries,
 48 |                              int largestnumperrow) = 0;
 49 | 
 50 |   virtual void preRRRFullSymmetric(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
 51 |                                    const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
 52 |                                    const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
 53 |                                    const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
 54 |                                    const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 55 |                                    const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 56 |                                    const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 57 |                                    const cusp::array1d<IndexType, MemorySpace>& permutation,
 58 |                                    cusp::array1d<ValueType, MemorySpace>& b,
 59 |                                    cusp::array1d<ValueType, MemorySpace>& x,
 60 |                                    cusp::array1d<ValueType, MemorySpace>& bc,
 61 |                                    int level_id,
 62 |                                    int largestblksz,
 63 |                                    int largestnumentries,
 64 |                                    bool verbose = false) = 0;
 65 |   virtual void preRRRFullSymmetricSync(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
 66 |                                        const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
 67 |                                        const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
 68 |                                        const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 69 |                                        const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 70 |                                        const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& restrictor,
 71 |                                        const cusp::array1d<IndexType, MemorySpace>& permutation,
 72 |                                        cusp::array1d<ValueType, MemorySpace>& b,
 73 |                                        cusp::array1d<ValueType, MemorySpace>& x,
 74 |                                        cusp::array1d<ValueType, MemorySpace>& bc,
 75 |                                        const cusp::array1d<IndexType, MemorySpace>& segSyncIdx,
 76 |                                        const cusp::array1d<IndexType, MemorySpace>& partSyncIdx,
 77 |                                        int level_id,
 78 |                                        int largestblksz,
 79 |                                        int largestnumentries) = 0;
 80 | 
 81 |   virtual void postPCR(const cusp::ell_matrix<IndexType, ValueType, MemorySpace>& AinEll,
 82 |                        const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 83 |                        const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 84 |                        const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 85 |                        const cusp::array1d<ValueType, MemorySpace>& P,
 86 |                        const cusp::array1d<ValueType, MemorySpace>& b,
 87 |                        cusp::array1d<ValueType, MemorySpace>& x,
 88 |                        cusp::array1d<ValueType, MemorySpace>& xc) = 0;
 89 | 
 90 |   virtual void postPCRFull(const cusp::ell_matrix<IndexType, ValueType, MemorySpace>& AinEll,
 91 |                            const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
 92 |                            const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
 93 |                            const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
 94 |                            const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
 95 |                            const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
 96 |                            const cusp::array1d<IndexType, MemorySpace>& permutation,
 97 |                            const cusp::array1d<ValueType, MemorySpace>& b,
 98 |                            cusp::array1d<ValueType, MemorySpace>& x,
 99 |                            cusp::array1d<ValueType, MemorySpace>& xc,
100 |                            int level_id,
101 |                            int largestblksz) = 0;
102 |   virtual void postPCRFullCsr(const cusp::csr_matrix<IndexType, ValueType, MemorySpace>& AinCsr,
103 |                               const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutCoo,
104 |                               const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
105 |                               const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
106 |                               const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
107 |                               const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
108 |                               const cusp::array1d<IndexType, MemorySpace>& permutation,
109 |                               const cusp::array1d<ValueType, MemorySpace>& b,
110 |                               cusp::array1d<ValueType, MemorySpace>& x,
111 |                               cusp::array1d<ValueType, MemorySpace>& xc,
112 |                               int level_id,
113 |                               int largestblksz,
114 | 															int largestnumentries,
115 | 															int largestnumperrow) = 0;
116 | 
117 |   virtual void postPCRFullSymmetric(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
118 |                                     const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
119 |                                     const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
120 |                                     const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
121 |                                     const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
122 |                                     const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
123 |                                     const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
124 |                                     const cusp::array1d<IndexType, MemorySpace>& permutation,
125 |                                     const cusp::array1d<ValueType, MemorySpace>& b,
126 |                                     cusp::array1d<ValueType, MemorySpace>& x,
127 |                                     cusp::array1d<ValueType, MemorySpace>& xc,
128 |                                     int level_id,
129 |                                     int largestblksz,
130 |                                     int largestnumentries) = 0;
131 |   virtual void postPCRFullSymmetricSync(const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AinSysCoo,
132 |                                         const cusp::array1d<IndexType, MemorySpace>& AinBlockIdx,
133 |                                         const cusp::coo_matrix<IndexType, ValueType, MemorySpace>& AoutSysCoo,
134 |                                         const cusp::array1d<IndexType, MemorySpace>& AoutBlockIdx,
135 |                                         const cusp::array1d<IndexType, MemorySpace>& aggregateIdx,
136 |                                         const cusp::array1d<IndexType, MemorySpace>& partitionIdx,
137 |                                         const cusp::hyb_matrix<IndexType, ValueType, MemorySpace>& prolongator,
138 |                                         const cusp::array1d<IndexType, MemorySpace>& permutation,
139 |                                         const cusp::array1d<ValueType, MemorySpace>& b,
140 |                                         cusp::array1d<ValueType, MemorySpace>& x,
141 |                                         cusp::array1d<ValueType, MemorySpace>& xc,
142 |                                         const cusp::array1d<IndexType, MemorySpace>& segSyncIdx,
143 |                                         const cusp::array1d<IndexType, MemorySpace>& partSyncIdx,
144 |                                         int level_id,
145 |                                         int largestblksz,
146 |                                         int largestnumentries) = 0;
147 | 
148 | 
149 |   virtual void smooth(const Matrix &A, const Vector &b, Vector &x) = 0;
150 |   virtual void smooth_with_0_initial_guess(const Matrix &A, const Vector &b, Vector &x); //default initializes the vector to 0 and calls smooth
151 |   virtual ~Smoother();
152 |   static Smoother<Matrix, Vector>* allocate(double smootherWeight,
153 |     int preInnerIters, int postInnerIters, int postRelaxes, const Matrix_d& A);
154 | 	Vector diag;
155 | };
156 | #endif
157 | 


--------------------------------------------------------------------------------
/src/core/aggmis/cuda/AggMIS_Aggregation_GPU.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * File:   AggMIS_Aggregation_GPU.cu
  3 |  * Author: T. James Lewis
  4 |  *
  5 |  * Created on April 19, 2013, 11:30 AM
  6 |  */
  7 | #include "AggMIS_Aggregation_GPU.h"
  8 | #include "AggMIS_Types.h"
  9 | #include "AggMIS_GraphHelpers.h"
 10 | namespace AggMIS {
 11 |   namespace Aggregation {
 12 |     namespace Kernels {
 13 |       __global__ void allocateNodesKernel(int size,
 14 |         int *adjIndexes,
 15 |         int *adjacency,
 16 |         int *partIn,
 17 |         int *partOut,
 18 |         int *aggregated) {
 19 |         int idx = blockIdx.x * blockDim.x + threadIdx.x;
 20 |         if (idx < size)
 21 |         {
 22 |           if (aggregated[idx] == 0)
 23 |           {
 24 |             int start = adjIndexes[idx];
 25 |             int end = adjIndexes[idx + 1];
 26 | 
 27 |             // Storage for possible aggregations.
 28 |             int candidates[10];
 29 |             int candidateCounts[10];
 30 |             for (int i = 0; i < 10; i++)
 31 |             {
 32 |               candidates[i] = -1;
 33 |               candidateCounts[i] = 0;
 34 |             }
 35 | 
 36 |             // Going through neighbors to aggregate:
 37 |             for (int i = start; i < end; i++)
 38 |             {
 39 |               int candidate = partIn[adjacency[i]];
 40 |               if (candidate != -1)
 41 |               {
 42 |                 for (int j = 0; j < 10 && candidate != -1; j++)
 43 |                 {
 44 |                   if (candidates[j] == -1)
 45 |                   {
 46 |                     candidates[j] = candidate;
 47 |                     candidateCounts[j] = 1;
 48 |                   } else
 49 |                   {
 50 |                     if (candidates[j] == candidate)
 51 |                     {
 52 |                       candidateCounts[j] += 1;
 53 |                       candidate = -1;
 54 |                     }
 55 |                   }
 56 |                 }
 57 |               }
 58 |             }
 59 | 
 60 |             // Finding the most adjacent aggregate and adding node to it:
 61 |             int addTo = candidates[0];
 62 |             int count = candidateCounts[0];
 63 |             for (int i = 1; i < 10; i++)
 64 |             {
 65 |               if (candidateCounts[i] > count)
 66 |               {
 67 |                 count = candidateCounts[i];
 68 |                 addTo = candidates[i];
 69 |               }
 70 |             }
 71 |             partOut[idx] = addTo;
 72 |             if (addTo != -1)
 73 |             {
 74 |               aggregated[idx] = 1;
 75 |             }
 76 |           }
 77 |         }
 78 |       }
 79 |       __global__ void checkAggregationFillAggregates(int size,
 80 |         int *adjIndices,
 81 |         int *adjacency,
 82 |         int* aggregation,
 83 |         int* valuesIn,
 84 |         int* valuesOut,
 85 |         int* incomplete) {
 86 |         int idx = blockIdx.x * blockDim.x + threadIdx.x;
 87 |         if (idx < size)
 88 |         {
 89 |           // Find the currently marked distance
 90 |           int currentVal = valuesIn[idx];
 91 |           int currentAgg = aggregation[idx];
 92 | 
 93 |           // Checking if any neighbors have a better value
 94 |           int start = adjIndices[idx];
 95 |           int end = adjIndices[idx + 1];
 96 |           for (int i = start; i < end; i++)
 97 |           {
 98 |             int neighborAgg = aggregation[adjacency[i]];
 99 |             int neighborVal = valuesIn[adjacency[i]];
100 |             if (neighborAgg == currentAgg && neighborVal > currentVal)
101 |             {
102 |               currentVal = neighborVal;
103 |               incomplete[0] = 1;
104 |             }
105 |           }
106 | 
107 |           // Write out the distance to the output vector:
108 |           valuesOut[idx] = currentVal;
109 |         }
110 |       }
111 |     }
112 | 
113 |     // Functions
114 |     AggMIS::Types::IntVector_d* AggregateToNearest(AggMIS::Types::Graph_d &graph,
115 |       AggMIS::Types::IntVector_d &roots) {
116 |       // Create temp vectors to work with
117 |       int size = graph.Size();
118 |       AggMIS::Types::IntVector_d *aggregated = new AggMIS::Types::IntVector_d(roots);
119 |       AggMIS::Types::IntVector_d *partIn = new AggMIS::Types::IntVector_d(roots);
120 | 
121 | 
122 |       // Prefix sum to number aggregate roots:
123 |       thrust::inclusive_scan(partIn->begin(), partIn->end(), partIn->begin());
124 | 
125 |       // Transform non root nodes to -1
126 |       thrust::transform(partIn->begin(), partIn->end(), aggregated->begin(), partIn->begin(), Functors::NumberParts());
127 |       AggMIS::Types::IntVector_d *partOut = new AggMIS::Types::IntVector_d(*partIn);
128 | 
129 |       // Preparing to call aggregate kernel:   
130 |       int *partIn_d = thrust::raw_pointer_cast(partIn->data());               // Pointer to partIn vector
131 |       int *partOut_d = thrust::raw_pointer_cast(partOut->data());             // Pointer to partOut vector
132 |       int *adjIndexes_d = thrust::raw_pointer_cast(graph.indices->data()); // Pointer to adjacency indexes
133 |       int *adjacency_d = thrust::raw_pointer_cast(graph.adjacency->data());   // Pointer to adjacency
134 |       int *aggregated_d = thrust::raw_pointer_cast(aggregated->data());       // Pointer to aggregated
135 |       bool complete = false;      // Indicates whether all nodes are aggregated
136 | 
137 |       // Figuring out block sizes for kernel call:
138 |       int blockSize = 256;
139 |       int nBlocks = size / blockSize + (size%blockSize == 0 ? 0 : 1);
140 | 
141 |       while (!complete)
142 |       {
143 |         // Allocating nodes
144 |         Kernels::allocateNodesKernel << < nBlocks, blockSize >> > (size, adjIndexes_d, adjacency_d, partIn_d, partOut_d, aggregated_d);
145 | 
146 |         // Copying partOut to partIn
147 |         thrust::copy(partOut->begin(), partOut->end(), partIn->begin());
148 | 
149 |         // Checking if done
150 |         int unallocatedNodes = thrust::count(aggregated->begin(), aggregated->end(), 0);
151 |         complete = unallocatedNodes == 0;
152 |       }
153 | 
154 |       // Cleaning up
155 |       aggregated->clear();
156 |       partOut->clear();
157 |       delete aggregated;
158 |       delete partOut;
159 | 
160 |       return partIn;
161 |     }
162 |     bool IsValidAggregation(AggMIS::Types::Graph_d &graph,
163 |       AggMIS::Types::IntVector_d &aggregation,
164 |       bool verbose) {
165 |       // Counter for number of errors found
166 |       int errors = 0;
167 | 
168 |       // Check to make sure that the aggregate id's are sequential
169 |       AggMIS::Types::IntVector_d scratch(aggregation);
170 |       thrust::sort(scratch.begin(), scratch.end());
171 |       int newLength = thrust::unique(scratch.begin(), scratch.end()) - scratch.begin();
172 |       scratch.resize(newLength);
173 | 
174 |       if (scratch[0] != 0 || scratch[scratch.size() - 1] != scratch.size() - 1)
175 |       {
176 |         if (verbose) {
177 |           printf("Error found in aggregation: improper aggregate indices:\n");
178 |           int firstId = scratch[0];
179 |           int lastId = scratch[scratch.size() - 1];
180 |           int count = scratch.size();
181 |           printf("\tFirst index is %d, last index is %d, there are %d unique id's\n", firstId, lastId, count);
182 |         }
183 |         errors++;
184 |         return false;
185 |       }
186 | 
187 |       // Check to make sure each aggregate is a connected component
188 |       AggMIS::Types::IntVector_d *valuesIn = GraphHelpers::GetIndicesVector(aggregation.size());
189 |       AggMIS::Types::IntVector_d valuesOut(aggregation.size());
190 |       AggMIS::Types::IntVector_d incomplete(1, 1);
191 | 
192 |       // Figuring out block sizes for kernel call:
193 |       int size = graph.Size();
194 |       int blockSize = 256;
195 |       int nBlocks = size / blockSize + (size%blockSize == 0 ? 0 : 1);
196 | 
197 |       // Getting raw pointers
198 |       int *valuesIn_d = thrust::raw_pointer_cast(valuesIn->data());
199 |       int *valuesOut_d = thrust::raw_pointer_cast(&valuesOut[0]);
200 |       int *incomplete_d = thrust::raw_pointer_cast(&incomplete[0]);
201 |       int *adjacency_d = thrust::raw_pointer_cast(graph.adjacency->data());
202 |       int *adjIndices_d = thrust::raw_pointer_cast(graph.indices->data());
203 |       int *aggregation_d = thrust::raw_pointer_cast(&aggregation[0]);
204 | 
205 |       // Flood filling within each aggregate
206 |       int *originalOut = valuesIn_d;
207 |       while (incomplete[0] == 1)
208 |       {
209 |         incomplete[0] = 0;
210 |         Kernels::checkAggregationFillAggregates << < nBlocks, blockSize >> >
211 |           (size, adjIndices_d, adjacency_d, aggregation_d, valuesIn_d, valuesOut_d, incomplete_d);
212 |         int *temp = valuesIn_d;
213 |         valuesIn_d = valuesOut_d;
214 |         valuesOut_d = temp;
215 |       }
216 | 
217 |       if (originalOut != valuesOut_d)
218 |         valuesOut.assign(valuesIn->begin(), valuesIn->end());
219 |       valuesIn->assign(aggregation.begin(), aggregation.end());
220 | 
221 |       // 
222 |       int correctLength = newLength;
223 |       thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(valuesIn->begin(), valuesOut.begin())),
224 |         thrust::make_zip_iterator(thrust::make_tuple(valuesIn->end(), valuesOut.end())));
225 |       newLength = thrust::unique(thrust::make_zip_iterator(thrust::make_tuple(valuesOut.begin(), valuesIn->begin())),
226 |         thrust::make_zip_iterator(thrust::make_tuple(valuesOut.end(), valuesIn->end())))
227 |         - thrust::make_zip_iterator(thrust::make_tuple(valuesOut.begin(), valuesIn->begin()));
228 | 
229 |       valuesIn->resize(newLength);
230 |       valuesOut.resize(newLength);
231 | 
232 |       if (newLength != correctLength)
233 |       {
234 |         if (verbose)
235 |           printf("Error: there were %d connected components found and %d aggregates\n", newLength, correctLength);
236 |         errors++;
237 | 
238 |         AggMIS::Types::IntVector_h aggIds(*valuesIn);
239 |         AggMIS::Types::IntVector_h nodeIds(valuesOut);
240 |         for (int i = 0; i < valuesOut.size() - 1; i++)
241 |         {
242 |           int currentAgg = aggIds[i];
243 |           int nextAgg = aggIds[i + 1];
244 |           if (currentAgg == nextAgg && verbose)
245 |             printf("Aggregate %d was filled from %d and %d\n", currentAgg, nodeIds[i], nodeIds[i + 1]);
246 |         }
247 |       }
248 | 
249 |       // Clean up 
250 |       scratch.resize(0);
251 |       valuesIn->resize(0);
252 |       delete valuesIn;
253 |       incomplete.resize(0);
254 | 
255 |       return errors == 0;
256 |     }
257 |     AggMIS::Types::Graph_d* GetAggregateMap(AggMIS::Types::IntVector_d& aggregation) {
258 |       AggMIS::Types::Graph_d* output = new AggMIS::Types::Graph_d();
259 |       // Setting adjacency of output to be indices 
260 |       GraphHelpers::SetToIndicesVector(aggregation.size(), *(output->adjacency));
261 |       AggMIS::Types::IntVector_d aggLabels(aggregation.begin(), aggregation.end());
262 | 
263 |       // Sorting by key to get node id's grouped by aggregates
264 |       thrust::sort_by_key(aggLabels.begin(), aggLabels.end(), output->adjacency->begin());
265 | 
266 |       // Resizing the indices to aggregate count
267 |       int maxAggregate = aggLabels[aggLabels.size() - 1];
268 |       output->indices->resize(maxAggregate + 2, 0);
269 | 
270 |       // Figuring out block sizes for kernel call:
271 |       int size = aggregation.size();
272 |       int blockSize = 256;
273 |       int nBlocks = size / blockSize + (size%blockSize == 0 ? 0 : 1);
274 | 
275 |       // Calling kernel to find indices for each part:
276 |       GraphHelpers::Kernels::findPartIndicesKernel << < nBlocks, blockSize >> >
277 |         (size,
278 |         AggMIS::Types::StartOf(aggLabels),
279 |         output->indStart());
280 | 
281 |       // Cleaning up
282 |       aggLabels.clear();
283 | 
284 |       return output;
285 |     }
286 |   }
287 | }
288 | 


--------------------------------------------------------------------------------