├── configs ├── Makefile ├── Millennium2.conf ├── AqB4.conf ├── MilliMill.conf ├── AqA5.conf ├── AqA4.conf ├── Apostle_S1_LR.conf ├── Apostle_V1_HR.conf ├── MADinfall.conf ├── EagleL100N1504.conf ├── AqA3.conf ├── DynamicMerger.conf ├── AqA2.conf └── Example.conf ├── src ├── Makefile ├── test │ ├── test_printf.cpp │ ├── Makefile │ ├── test_closestfactor.cpp │ ├── test_accumulate.cpp │ ├── test_getenv.cpp │ ├── test_mpi_reduce.cpp │ ├── test_ompvector.cpp │ ├── testnew.cpp │ ├── test_init.cpp │ ├── test_ompfor.cpp │ ├── test_boostmpi_sendvec.cpp │ ├── test_ompfirstprivate.cpp │ ├── test_ompnest.cpp │ ├── test_VecAllToAll.cpp │ ├── test_boostmpi_mem.cpp │ ├── test_mpi_packed.cpp │ ├── test_template_inheritance.cpp │ ├── testSharedPtr.cpp │ ├── test_boostmpi_serialize.cpp │ ├── test_mpi.cpp │ ├── testHBTxyz.cpp │ ├── test_parse.cpp │ ├── ph5File_create.c │ ├── test_boostmpi_partialsend.cpp │ ├── test_omptask.cpp │ ├── testHBTxyz2.cpp │ ├── test_h5compound2.cpp │ ├── ph5Dataset.c │ └── test_h5subset.cpp ├── io │ ├── Makefile │ ├── gadget_group_io.h │ ├── gadget_io.h │ ├── snapshot_io.cpp │ ├── apostle_io.h │ ├── halo_io.cpp │ └── gadget_virial_io.h ├── hdf_wrapper.cpp ├── gravity_tree.h ├── particle_exchanger.cpp ├── geometric_tree.h ├── mpi_wrapper.cpp ├── snapshot_number.h ├── halo.h ├── hdf_wrapper.h ├── oct_tree.h ├── halo_particle_iterator.h ├── hash_remote.tpp ├── hash.h ├── linkedlist_base.h ├── hash.tpp ├── halo.cpp ├── linkedlist.cpp ├── oct_tree.tpp ├── datatypes.h ├── config_parser.h ├── snapshot_exchanger.cpp ├── geometric_tree.cpp ├── gravity_tree.cpp ├── linkedlist.h ├── subhalo_merge.cpp ├── mymath.h ├── mymath.cpp ├── snapshot.h └── mpi_wrapper.h ├── ToDo ├── README.md ├── toolbox ├── Makefile ├── analysis │ └── Makefile ├── dump_a_sub.cpp ├── CreateSnapshotList.py ├── SplitSubSnap.py └── combineSubSnap.py ├── HBTjob_mpi.slurm ├── HBTjob_hybrid.slurm ├── HBTjob_mpi.bsub ├── HBTjob_hybrid.bsub ├── KnownIssues ├── Makefile.inc ├── HBT.cpp ├── CMakeLists.txt └── Makefile /configs/Makefile: -------------------------------------------------------------------------------- 1 | clean: clean_curdir 2 | 3 | clean_curdir: 4 | rm -rf *~ 5 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | SRC=$(wildcard *.cpp) 2 | OBJS=$(SRC:%.cpp=%.o) 3 | TEST=$(SRC:%.cpp=%.test) 4 | 5 | gravity_tree.test: config_parser.o mymath.o halo.o snapshot.o io/halo_io.o io/snapshot_io.o 6 | 7 | $(TEST): HDFLIB= 8 | 9 | include ../Makefile.inc 10 | 11 | -------------------------------------------------------------------------------- /ToDo: -------------------------------------------------------------------------------- 1 | new particle query algorithm 2 | 3 | 1) distribute snapshot in blocks 4 | 2) distribute halo in blocks 5 | 3) compute location of halo using queried particles 6 | 4) guess target node of unqueried particles, fetch particles to local buffer (can have a guessed buffer first) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | New implementation of HBT in C++ . This is the hybrid MPI/OpenMP parallelized version. Check the [Hydro](https://github.com/Kambrian/HBT2/tree/Hydro) branch for a pure OpenMP version. 2 | 3 | Documentation is available on the [wiki](https://github.com/Kambrian/HBT2/wiki). -------------------------------------------------------------------------------- /src/test/test_printf.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main() 8 | { 9 | std::string fmt="%d/%s.0"; 10 | char buf[1024]; 11 | sprintf(buf, fmt.c_str(), 3, "hi"); 12 | std::cout< 2 | #include 3 | #include "../mymath.h" 4 | 5 | int main(int argc, char **argv) 6 | { 7 | #define test(x,y) cout< 2 | #include 3 | #include 4 | using namespace std; 5 | int main() 6 | { 7 | vector x(10,1); 8 | int s=0; 9 | s=accumulate(x.begin(),x.end(),s); 10 | cout< 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char **argv) 8 | { 9 | 10 | string dir="."; 11 | char * versionstr=getenv("USER"); 12 | if(versionstr) 13 | { 14 | cout< 3 | #include 4 | #include "mpi.h" 5 | #include 6 | #include 7 | 8 | int main(int argc, char **argv) 9 | { 10 | int myrank, x; 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 13 | 14 | x=myrank; 15 | if(myrank==0) 16 | MPI_Reduce(MPI_IN_PLACE, &x, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); 17 | else 18 | MPI_Reduce(&x, &x, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); 19 | 20 | cout<<"thread "< 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | int main(int argc, char **argv) 7 | { 8 | int n=3; 9 | if(argc>1) n=atoi(argv[1]); 10 | #pragma omp parallel num_threads(3) 11 | { 12 | static vector x(3, 1); //by default, x is private, and is initialized properly 13 | int y[n];//this is allowed by C99 and g++ 14 | for(int i=0;i &Halos); 28 | extern bool IsGadgetGroup(const string &GroupFileFormat); 29 | 30 | } 31 | 32 | #endif -------------------------------------------------------------------------------- /configs/EagleL100N1504.conf: -------------------------------------------------------------------------------- 1 | #sample config file. Case sensitive 2 | 3 | [Compulsary Params] 4 | SnapshotPath /gpfs/data/jvbq85/HBT/data/eagle/L0100N1504/simu 5 | HaloPath /gpfs/data/jvbq85/HBT/data/eagle/L0100N1504/simu 6 | SubhaloPath /gpfs/data/jvbq85/HBT/data/eagle/L0100N1504/subcat 7 | SnapshotFileBase snap #irrelevant 8 | MaxSnapshotIndex 365 9 | BoxSize 67.77 10 | SofteningHalo 0.00180239 11 | 12 | 13 | [Reader] 14 | SnapshotFormat apostle 15 | GroupFileFormat apostle_particle_index 16 | 17 | [Units] 18 | MassInMsunh 1e10 19 | LengthInMpch 1 20 | VelInKmS 1 21 | 22 | MinNumPartOfSub 20 23 | #MinSnapshotIndex 6 24 | 25 | #BoundMassPrecision 0.995 26 | #PeriodicBoundaryOn 1 27 | 28 | SaveSubParticleProperties 0 29 | 30 | MaxConcurrentIO 16 #the maximum number of concurrent io processes -------------------------------------------------------------------------------- /src/test/testnew.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // #include "../datatypes.h" 4 | class GB 5 | { 6 | long data[1024*1024*1024]; 7 | }; 8 | int main() 9 | { 10 | const int x=1; 11 | const int &y=x; 12 | int *p0=new int[10]; 13 | int *p1=new (p0+5) int[3]; 14 | // delete [] p1; 15 | // std::cout<<"p1 deleted\n"; 16 | delete [] p0; 17 | std::cout<<"p0 deleted\n"; 18 | std::cout< 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | class myclass 7 | { 8 | public: 9 | int x,y,z; 10 | myclass(): x(0),y(1) //z is uninitialized. can be anything. 11 | { 12 | printf("default called from %d\n", omp_get_thread_num()); 13 | } 14 | myclass(int a, int b, int c):x(a),y(b),z(c) 15 | { 16 | printf("initialized from %d\n", omp_get_thread_num()); 17 | } 18 | }; 19 | int main() 20 | { 21 | #pragma omp parallel num_threads(2) 22 | { 23 | myclass c=myclass(),d; 24 | myclass a(0,1,2);//every thread executes the initializer 25 | #pragma omp single 26 | { 27 | // cout< 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | int main() 7 | { 8 | #pragma omp parallel num_threads(3) 9 | for(int i=0;i<4;i++)//each thread will loop through i individually 10 | { 11 | int ithread=omp_get_thread_num(); 12 | printf("Thread %d: i=%d\n", ithread,i); 13 | if(1==ithread) sleep(2); 14 | // #pragma omp barrier 15 | #pragma omp for //this will start to create tasks 16 | for(int j=0;j 3 | #include 4 | #include "mpi.h" 5 | 6 | #include 7 | #include 8 | namespace mpi = boost::mpi; 9 | 10 | int main(int argc, char **argv) 11 | { 12 | mpi::environment env; 13 | mpi::communicator world; 14 | 15 | #define MSG_LEN 100 16 | vector sendbuf(MSG_LEN, 1), recvbuf(MSG_LEN); 17 | 18 | if(world.rank()==0) 19 | world.send(1, 0, sendbuf.data(), sendbuf.size()); 20 | else if(world.rank()==1) 21 | //you cannot send array and receive vec. have to match types: world.recv(0, 0, recvbuf) will fail. 22 | world.recv(0, 0, recvbuf.data(), recvbuf.size()); 23 | 24 | if(world.rank()==1) 25 | cout< 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | class myclass 7 | { 8 | public: 9 | int x,y,z; 10 | myclass(): x(0),y(1) //z is uninitialized. can be anything. 11 | { 12 | printf("default called from %d\n", omp_get_thread_num()); 13 | } 14 | myclass(int a, int b, int c):x(a),y(b),z(c) 15 | { 16 | printf("initialized from %d\n", omp_get_thread_num()); 17 | } 18 | }; 19 | int main() 20 | { 21 | #pragma omp parallel num_threads(3) 22 | { 23 | int x=omp_get_thread_num(); 24 | #pragma omp for 25 | for(int i=0;i<3;i++) 26 | { 27 | static myclass a(1,2,3);//static var is initialized only once! 28 | printf("%d, %d: %d, %d\n", omp_get_thread_num(), i, x, a.x); 29 | a.x=i; 30 | printf("%d, %d: %d\n", omp_get_thread_num(), i, a.x); 31 | } 32 | } 33 | return 0; 34 | } -------------------------------------------------------------------------------- /src/test/test_ompnest.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | 7 | int main(int argc, char **argv) 8 | { 9 | int n=atoi(argv[1]); 10 | //omp_set_nested(0); 11 | omp_set_max_active_levels(1); //max_active_level 0: no para; 1: single layer; 12 | cout<<"InPara:"< 3 | #include 4 | #include "../boost_mpi.h" 5 | 6 | int main(int argc, char **argv) 7 | { 8 | mpi::environment env; 9 | mpi::communicator world; 10 | 11 | vector > Send(world.size()), Receive(world.size()); 12 | for(int i=0;i(cout, ", ")); 21 | cout<(cout, ", ")); 28 | cout< 3 | #include 4 | #include "mpi.h" 5 | 6 | #include 7 | #include 8 | namespace mpi = boost::mpi; 9 | 10 | int main(int argc, char **argv) 11 | { 12 | mpi::environment env; 13 | mpi::communicator world; 14 | 15 | #define MSG_LEN 100000 16 | vector sendbuf(MSG_LEN, 1), recvbuf(MSG_LEN); 17 | 18 | #define USE_BOOST 19 | 20 | #ifndef USE_BOOST 21 | if(world.rank()==0) 22 | MPI_Send(sendbuf.data(), MSG_LEN, MPI_INT, 1, 0, MPI_COMM_WORLD); 23 | else if(world.rank()==1) 24 | MPI_Recv(recvbuf.data(), MSG_LEN, MPI_INT, 0, 0, MPI_COMM_WORLD, NULL); 25 | #else 26 | if(world.rank()==0) 27 | world.send(1, 0, sendbuf); 28 | else if(world.rank()==1) 29 | world.recv(0, 0, recvbuf); 30 | #endif 31 | 32 | if(world.rank()==1) 33 | cout<<"Data received: "< GravityTreeCell_t; 8 | class GravityTree_t:public OctTree_t 9 | { 10 | private: 11 | void ProcessNode(HBTInt nodeid, HBTInt nextid, int sonid, double &mass, double CoM[3], double len, const double center[3]); 12 | void FillNodeCenter(HBTInt nodeid, const double center[3], double CoM[3], double mass); 13 | void UpdateInternalNodes(HBTInt no,HBTInt sib,double len, const double center[3]); 14 | public: 15 | double EvaluatePotential(const HBTxyz &targetPos, const HBTReal targetMass=0.); 16 | double BindingEnergy(const HBTxyz &targetPos, const HBTxyz &targetVel, const HBTxyz &refPos, const HBTxyz &refVel, const HBTReal targetMass=0.); 17 | }; 18 | 19 | #endif 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /HBTjob_mpi.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=PMO3000 # 作业名 3 | #SBATCH --no-requeue 4 | #SBATCH --partition=debug # cpu 队列 5 | #SBATCH --ntasks=1920 # 32 nodes 6 | #SBATCH --ntasks-per-node=60 # 每节点核数 7 | #SBATCH --output=logs/%x.%j 8 | #SBATCH --error=logs/%x.%j 9 | #ls /public/home/jiaxinhan/PMO3000/simu/snapdir_* 10 | #ls /public/home/liming/data/Simu_n3072_b500_pl13/snapdir_* -d 11 | 12 | ulimit -s unlimited 13 | module load mpi/intelmpi compiler/intel mathlib/gsl mathlib/hdf5 14 | #~ source /etc/profile.d/modules.csh 15 | 16 | # by default, openmp is not enabled in the MPI version of HBT+. If you want to enable it, uncomment the openmp flags in Makefile.inc and rebuild. 17 | # export OMP_NUM_THREADS=20 18 | 19 | # specify the first and last snapshot to process. useful for restarting from a specific snapshot. if not specified, then snapstart=0 and snapend=MaxSnapshotIndex. 20 | snapstart=62 21 | snapend=63 22 | 23 | NP=$SLURM_NTASKS 24 | echo $NP "tasks" 25 | 26 | mpirun -np $NP ./HBTi8 configs/${SLURM_JOB_NAME}.conf $snapstart $snapend 27 | -------------------------------------------------------------------------------- /toolbox/dump_a_sub.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | // #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "../src/snapshot.h" 11 | #include "../src/subhalo.h" 12 | #include "../src/mymath.h" 13 | 14 | int main(int argc, char **argv) 15 | { 16 | int isnap=32, subid=22; 17 | HBTConfig.ParseConfigFile(argv[1]); 18 | SubhaloSnapshot_t subsnap; 19 | subsnap.Load(isnap, true); 20 | 21 | cout< 3 | #include 4 | #include "mpi.h" 5 | 6 | int main(int argc, char **argv) 7 | { 8 | #define MSG_LEN 1000000 9 | #define BUF_LEN (4*(MSG_LEN+2)) 10 | int position, i, j[MSG_LEN], a[2]; 11 | char buff[BUF_LEN]; 12 | int myrank; 13 | 14 | MPI_Init(&argc, &argv); 15 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 17 | if(myrank == 0) 18 | { 19 | position = 0; 20 | MPI_Pack(&a, 2, MPI_INT, buff, BUF_LEN, &position, MPI_COMM_WORLD); 21 | // cout< 2 | #include 3 | 4 | typedef int HBTInt; 5 | 6 | template 7 | class List_t 8 | { 9 | HBTInt N; 10 | public: 11 | T * Data; 12 | List_t(HBTInt n=0, T *data=NULL) 13 | { 14 | N=n; 15 | if(data)//memory can be shared, not always allocated. ToDo: implement shared_ptr? 16 | Data=data; 17 | else 18 | Data=new T[n]; 19 | } 20 | T & operator [](HBTInt i) 21 | { 22 | return Data[i]; 23 | } 24 | HBTInt size() 25 | { 26 | return N; 27 | } 28 | void clear()//the user is responsible for cleaning up. 29 | { 30 | delete [] Data; 31 | N=0; 32 | } 33 | }; 34 | struct ParticleReference_t 35 | { 36 | HBTInt Id; 37 | HBTInt Index; 38 | }; 39 | template 40 | class ParticleList_t: public List_t 41 | { 42 | public: 43 | T & operator [](ParticleReference_t ref) //overload for reference 44 | { 45 | return List_t ::Data[ref.Index]; //or this->Data could also work; but Data along is unqualified. 46 | } 47 | }; 48 | int main() 49 | { 50 | List_t L(10); 51 | std::cout< 2 | #include "../datatypes.h" 3 | template 4 | class SharedList_t 5 | { 6 | HBTInt N; 7 | std::shared_ptr Data; 8 | public: 9 | SharedList_t(): N(0), Data(NULL) 10 | { 11 | } 12 | SharedList_t(HBTInt n) 13 | { 14 | Data=new T[n]; 15 | N=n; 16 | } 17 | SharedList_t(HBTInt n, void *data, std::shared_ptr storage): 18 | N(n), Data(storage, data) 19 | { 20 | } 21 | //the default copy constructor would be shallow copy. 22 | SharedList_t(SharedList_t & list): N(list.N), Data(list.Data) 23 | { 24 | } 25 | void deep_copy(SharedList_t & list) 26 | {//deep copy 27 | N=list.N; 28 | Data=new T[N]; 29 | memcpy(Data, list.Data, sizeof(T)*N); 30 | } 31 | void reset() 32 | { 33 | N=0; 34 | Data.reset(); 35 | } 36 | HBTInt size() 37 | { 38 | return N; 39 | } 40 | T & operator [] (HBTInt i) 41 | { 42 | return ((T *)Data)[i]; 43 | } 44 | }; 45 | struct HaloCatalogue 46 | { 47 | SharedList_t * Halos; 48 | int Nhalos; 49 | }; 50 | int main() 51 | { 52 | // SharedList_t H1(10),H2(5,(void *)&(H1[5]),(std::shared_ptr)&H1); 53 | return 0; 54 | } -------------------------------------------------------------------------------- /HBTjob_hybrid.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=PMO3000.hybrid # 作业名 3 | #SBATCH --no-requeue 4 | #SBATCH --partition=debug # cpu 队列 5 | #SBATCH --ntasks=64 # mpi tasks 6 | #SBATCH --cpus-per-task=30 #threads; each node 64cpus, so 2 tasks 7 | #SBATCH --output=logs/%x.%j 8 | #SBATCH --error=logs/%x.%j 9 | #ls /public/home/jiaxinhan/PMO3000/simu/snapdir_* 10 | #ls /public/home/liming/data/Simu_n3072_b500_pl13/snapdir_* -d 11 | 12 | ulimit -s unlimited 13 | module load mpi/intelmpi compiler/intel mathlib/gsl mathlib/hdf5 14 | #~ source /etc/profile.d/modules.csh 15 | 16 | # by default, openmp is not enabled in the MPI version of HBT+. If you want to enable it, uncomment the openmp flags in Makefile.inc and rebuild. 17 | export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK 18 | 19 | # specify the first and last snapshot to process. useful for restarting from a specific snapshot. if not specified, then snapstart=0 and snapend=MaxSnapshotIndex. 20 | snapstart=59 21 | snapend=63 22 | 23 | NP=$SLURM_NTASKS 24 | echo $NP tasks each $OMP_NUM_THREADS threads 25 | 26 | mpirun -np $NP ./HBTi8.hybrid configs/${SLURM_JOB_NAME}.conf $snapstart $snapend 27 | -------------------------------------------------------------------------------- /src/test/test_boostmpi_serialize.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | #include 4 | // #include "mpi.h" 5 | 6 | #include 7 | namespace mpi = boost::mpi; 8 | 9 | int main(int argc, char **argv) 10 | { 11 | mpi::environment env; 12 | mpi::communicator world; 13 | 14 | #define MSG_LEN 1000000 15 | vector sendbuf(MSG_LEN, 1), recvbuf(MSG_LEN); 16 | 17 | MPI_Comm comm=world; 18 | if(world.rank()==0) 19 | { 20 | mpi::packed_oarchive oa(comm); 21 | oa << sendbuf; 22 | auto sendptr = const_cast(oa.address()); 23 | // cast to int because MPI uses ints for sizes like it's still 1990 24 | int sendsize = static_cast(oa.size()); 25 | MPI_Send(&sendsize, 1, MPI_INT, 1, 0, comm); 26 | MPI_Ssend(sendptr, sendsize, MPI_PACKED, 1, 0, comm); 27 | cout<> recvbuf; 38 | cout<<"Data received: "< 3 | #include 4 | #include "mpi.h" 5 | #include 6 | #include 7 | 8 | int main(int argc, char **argv) 9 | { 10 | int myrank, x; 11 | MPI_Init(&argc, &argv); 12 | MPI_Comm_rank(MPI_COMM_WORLD, &myrank); 13 | 14 | if(myrank == 0) 15 | { 16 | x=1000; 17 | } 18 | MPI_Bcast(&x, 1, MPI_INT, 0, MPI_COMM_WORLD); 19 | cout<<"x= "< y(nmax); 33 | vector Reqs(nmax); 34 | for(int i=0;i(cout, ", ")); 43 | cout< 2 | #include 3 | // #include "../datatypes.h" 4 | template 5 | class XYZ 6 | { 7 | std::array data; 8 | public: 9 | XYZ(T x=0, T y=0, T z=0) //:data{x,y,z} 10 | { 11 | std::cout<<"new\n"; 12 | data[0]=x; 13 | data[1]=y; 14 | data[2]=z; 15 | } 16 | XYZ(const XYZ &s) 17 | { 18 | std::cout<<"assign"< 31 | std::ostream& operator << (std::ostream& o, XYZ &a) 32 | { 33 | 34 | o << "(" << a[0] << ", " << a[1] << ", " << a[2] << ")"; 35 | return o; 36 | }; 37 | typedef XYZ HBTxyz; 38 | class myxyz: public XYZ 39 | { 40 | public: 41 | myxyz(double x,double y, double z): XYZ (x,y,z) 42 | { 43 | } 44 | //default destructor is auto called. 45 | }; 46 | int main() 47 | { 48 | XYZ x={1.,2.,3.}; 49 | std::cout< y(x); 51 | HBTxyz &z=y; 52 | myxyz b(1.,2.,3.); 53 | std::cout< x; 60 | std::cout< 3 | #include 4 | #include 5 | #include 6 | #include "mpi.h" 7 | 8 | #include "../datatypes.h" 9 | #include "../config_parser.h" 10 | #include "../boost_mpi.h" 11 | #include "../mymath.h" 12 | 13 | int main(int argc, char **argv) 14 | { 15 | mpi::environment env; 16 | mpi::communicator world; 17 | #ifdef _OPENMP 18 | omp_set_nested(0); 19 | #endif 20 | 21 | int snapshot_start, snapshot_end; 22 | if(0==world.rank()) 23 | { 24 | ParseHBTParams(argc, argv, HBTConfig, snapshot_start, snapshot_end); 25 | mkdir(HBTConfig.SubhaloPath.c_str(), 0755); 26 | MarkHBTVersion(); 27 | } 28 | HBTConfig.BroadCast(world, 0, snapshot_start, snapshot_end); 29 | 30 | cout<< HBTConfig.SnapshotPath<< " from "< GeoTreeCell_t; 8 | 9 | class GeoTree_t: public OctTree_t 10 | { 11 | private: 12 | void ProcessNode(HBTInt nodeid, HBTInt nextid, int sonid, HBTInt &mass, double len, const double center[3]); 13 | void FillNodeCenter(HBTInt nodeid, const double center[3]); 14 | void UpdateInternalNodes(HBTInt no,HBTInt sib,double len, const double center[3]); 15 | int NumNeighbourSPH; 16 | public: 17 | GeoTree_t():OctTree_t(), NumNeighbourSPH(64) 18 | { 19 | } 20 | void Search(const HBTxyz &searchcenter, HBTReal radius, ParticleCollector_t &collector); 21 | HBTInt NearestNeighbour(const HBTxyz &searchcenter, HBTReal rguess); 22 | double SphDensity(const HBTxyz &cen, HBTReal & rguess); 23 | int GetNumNeighbourSPH() 24 | { 25 | return NumNeighbourSPH; 26 | } 27 | void SetNumNeighbourSPH(int num_neighbour) 28 | { 29 | NumNeighbourSPH=num_neighbour; 30 | } 31 | }; 32 | 33 | inline HBTReal GuessNeighbourRange(HBTInt n_neighbours, HBTReal number_density_guess) 34 | { 35 | return pow(3 * n_neighbours / (4 * 3.141593) / number_density_guess, 1.0 / 3); 36 | } 37 | 38 | #endif 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /src/test/ph5File_create.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This example creates an HDF5 file. 3 | */ 4 | #include "mpi.h" 5 | #include "hdf5.h" 6 | 7 | #define H5FILE_NAME "SDS_row.h5" 8 | 9 | int 10 | main (int argc, char **argv) 11 | { 12 | /* 13 | * HDF5 APIs definitions 14 | */ 15 | hid_t file_id; /* file and dataset identifiers */ 16 | hid_t plist_id; /* property list identifier( access template) */ 17 | herr_t status; 18 | 19 | /* 20 | * MPI variables 21 | */ 22 | int mpi_size, mpi_rank; 23 | MPI_Comm comm = MPI_COMM_WORLD; 24 | MPI_Info info = MPI_INFO_NULL; 25 | 26 | /* 27 | * Initialize MPI 28 | */ 29 | MPI_Init(&argc, &argv); 30 | MPI_Comm_size(comm, &mpi_size); 31 | MPI_Comm_rank(comm, &mpi_rank); 32 | 33 | /* 34 | * Set up file access property list with parallel I/O access 35 | */ 36 | plist_id = H5Pcreate(H5P_FILE_ACCESS); 37 | H5Pset_fapl_mpio(plist_id, comm, info); 38 | 39 | /* 40 | * Create a new file collectively. 41 | */ 42 | file_id = H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id); 43 | 44 | /* 45 | * Close property list. 46 | */ 47 | H5Pclose(plist_id); 48 | 49 | /* 50 | * Close the file. 51 | */ 52 | H5Fclose(file_id); 53 | 54 | MPI_Finalize(); 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /src/mpi_wrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "mpi_wrapper.h" 2 | #include 3 | // using namespace std; 4 | 5 | void MpiWorker_t::SyncAtomBool(bool& x, int root) 6 | { 7 | char y; 8 | if(rank()==root) 9 | y=x; 10 | MPI_Bcast(&y, 1, MPI_CHAR, root, Communicator); 11 | x=y; 12 | } 13 | void MpiWorker_t::SyncVectorBool(vector< bool >& x, int root) 14 | { 15 | vector y; 16 | if(rank()==root) 17 | y.assign(x.begin(),x.end()); 18 | SyncContainer(y, MPI_CHAR, root); 19 | if(rank()!=root) 20 | x.assign(y.begin(),y.end()); 21 | } 22 | 23 | void MpiWorker_t::SyncVectorString(vector< string >& x, int root) 24 | { 25 | string buffer; 26 | 27 | if(rank()==root) 28 | { 29 | ostringstream file; 30 | for(auto &s: x) 31 | file< &Particles; 29 | Cosmology_t &Cosmology; 30 | 31 | int SnapshotId; 32 | GadgetHeader_t Header; 33 | bool NeedByteSwap; 34 | int IntTypeSize; 35 | int RealTypeSize; 36 | vector NumberOfParticleInFiles; 37 | vector OffsetOfParticleInFiles; 38 | void ReadGadgetFile(int ifile); 39 | void LoadGadgetHeader(int ifile=0); 40 | bool ReadGadgetFileHeader(FILE *fp, GadgetHeader_t &header); 41 | HBTInt ReadGadgetNumberOfParticles(int ifile); 42 | void GetGadgetFileName(int ifile, string &filename); 43 | void Load(MpiWorker_t &world); 44 | 45 | public: 46 | GadgetReader_t(MpiWorker_t &world, int snapshot_id, vector &particles, Cosmology_t & cosmology); 47 | }; 48 | 49 | #endif -------------------------------------------------------------------------------- /src/test/test_boostmpi_partialsend.cpp: -------------------------------------------------------------------------------- 1 | /* it is valid to only serialize part of the struct */ 2 | using namespace std; 3 | #include 4 | #include 5 | #include "mpi.h" 6 | 7 | #include 8 | #include 9 | namespace mpi = boost::mpi; 10 | 11 | struct TestStruct_t 12 | { 13 | int a; 14 | vector c; 15 | double b; 16 | TestStruct_t(): a(0), b(0), c(2,1) 17 | { 18 | } 19 | private: 20 | friend class boost::serialization::access; 21 | template 22 | void serialize(Archive & ar, const unsigned int version) 23 | { 24 | ar & a; 25 | ar & b; 26 | // ar & c; 27 | } 28 | }; 29 | BOOST_IS_MPI_DATATYPE(TestStruct_t) 30 | std::ostream& operator << (std::ostream& o, vector &x) 31 | { 32 | for(auto &&a: x) 33 | { 34 | o << "[" << a.a << "; " << a.b << "; "; 35 | 36 | auto &vec=a.c; 37 | copy(vec.cbegin(), vec.cend(), ostream_iterator(o, ", ")); 38 | 39 | o<< "] "; 40 | } 41 | 42 | return o; 43 | }; 44 | int main(int argc, char **argv) 45 | { 46 | mpi::environment env; 47 | mpi::communicator world; 48 | 49 | #define MSG_LEN 3 50 | vector sendbuf(MSG_LEN), recvbuf; 51 | 52 | if(world.rank()==0) 53 | { 54 | sendbuf[1].a=1; 55 | sendbuf[2].b=2; 56 | sendbuf[1].c.push_back(-1); 57 | cout<size > 0 && entry_ptr->size < ((size_t)(32 * 1024 * 1024))' failed" 18 | 19 | When the number of particles in a subhalo (or src subhalo) is very large, one may encounter the above error when saving the particles to hdf5 file. This is probably due to a bug in hdf5-1.12.0. The error goes away when using other versions of hdf5 including 1.8 and 1.12.1. 20 | -------------------------------------------------------------------------------- /src/test/test_omptask.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #define N 10000 7 | using namespace std; 8 | int work(int i, int j) 9 | { 10 | int x=i+j*2+j*j*j+i*i; 11 | x*=x; 12 | return x; 13 | } 14 | int main(int argc, char **argv) 15 | { 16 | time_t t0,t1; 17 | 18 | t0=time(NULL); 19 | #pragma omp parallel num_threads(4) 20 | { 21 | #pragma omp for //collapse(2) //the use of collapse is unspecified here? 22 | for(int i=0;i 2 | #include 3 | #include 4 | #include "../datatypes.h" 5 | template 6 | class XYZ 7 | { 8 | T data[3]; 9 | public: 10 | XYZ(T x, T y, T z) //:data{x,y,z} 11 | { 12 | data[0]=x; 13 | data[1]=y; 14 | data[2]=z; 15 | } 16 | XYZ(const XYZ &s) 17 | { 18 | std::cout<<"assign"< 36 | std::ostream& operator << (std::ostream& o, XYZ &a) 37 | { 38 | 39 | o << "(" << a[0] << ", " << a[1] << ", " << a[2] << ")"; 40 | return o; 41 | }; 42 | std::ostream& operator << (std::ostream& o, HBTxyz &a) 43 | { 44 | 45 | o << "(" << a[0] << ", " << a[1] << ", " << a[2] << ")"; 46 | return o; 47 | }; 48 | typedef double dxyz[3]; 49 | std::ostream& operator << (std::ostream& o, dxyz &a) 50 | { 51 | 52 | o << "(" << a[0] << ", " << a[1] << ", " << a[2] << ")"; 53 | return o; 54 | }; 55 | 56 | int main() 57 | { 58 | XYZ x(1.,2.,3.); 59 | std::cout< y(x); 61 | XYZ &z=y; 62 | HBTxyz a={-1,-2,-3}, b={-4,-5,-6}; 63 | std::cout< v; 70 | float data[]={1,2,3,4,5,6}; 71 | // v.assign((float (*)[3])data, (float (*)[3])data+2); 72 | v.resize(2); 73 | copy(data, data+6, (HBTReal *) v.data()); 74 | cout< 5 | #include 6 | #include 7 | #include 8 | // #include 9 | // #include 10 | // #include 11 | #include "datatypes.h" 12 | #include "config_parser.h" 13 | 14 | class SnapshotNumber_t 15 | { 16 | protected: 17 | int SnapshotIndex; 18 | int SnapshotId; 19 | public: 20 | SnapshotNumber_t() 21 | { 22 | SnapshotIndex=SpecialConst::NullSnapshotId; 23 | SnapshotId=SpecialConst::NullSnapshotId; 24 | } 25 | SnapshotNumber_t(SnapshotNumber_t & sn):SnapshotId(sn.SnapshotId), SnapshotIndex(sn.SnapshotIndex) 26 | { 27 | } 28 | SnapshotNumber_t & operator=(SnapshotNumber_t &sn) 29 | { 30 | SnapshotIndex=sn.SnapshotIndex; 31 | SnapshotId=sn.SnapshotId; 32 | return *this; 33 | } 34 | void ResetSnapshotNumber() 35 | {//reset is not destructon! when destructor is called, the data content no matter matters. 36 | SnapshotIndex=SpecialConst::NullSnapshotId; 37 | SnapshotId=SpecialConst::NullSnapshotId; 38 | } 39 | void FormatSnapshotId(std::stringstream &ss); 40 | void SetSnapshotIndex(int snapshot_index); 41 | int GetSnapshotIndex() const; 42 | int GetSnapshotId() const; 43 | }; 44 | inline int SnapshotNumber_t::GetSnapshotIndex() const 45 | { 46 | return SnapshotIndex; 47 | } 48 | inline int SnapshotNumber_t::GetSnapshotId() const 49 | { 50 | return SnapshotId; 51 | } 52 | inline void SnapshotNumber_t::FormatSnapshotId(stringstream& ss) 53 | { 54 | ss << std::setw(3) << std::setfill('0') << SnapshotId; 55 | } 56 | inline void SnapshotNumber_t::SetSnapshotIndex(int snapshot_index) 57 | { 58 | assert(snapshot_index>=HBTConfig.MinSnapshotIndex&&snapshot_index<=HBTConfig.MaxSnapshotIndex); 59 | // assert(SpecialConst::NullSnapshotId!=snapshot_index); 60 | SnapshotIndex=snapshot_index; 61 | if(HBTConfig.SnapshotIdList.empty()) 62 | SnapshotId=SnapshotIndex; 63 | else 64 | SnapshotId=HBTConfig.SnapshotIdList[SnapshotIndex]; 65 | } 66 | 67 | #endif -------------------------------------------------------------------------------- /src/io/snapshot_io.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | // #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "../mpi_wrapper.h" 14 | #include "../snapshot.h" 15 | #include "../mymath.h" 16 | #include "gadget_io.h" 17 | #include "apostle_io.h" 18 | 19 | void ParticleSnapshot_t::Load(MpiWorker_t & world, int snapshot_index, bool fill_particle_hash) 20 | { 21 | Clear(); 22 | SetSnapshotIndex(snapshot_index); 23 | 24 | if(HBTConfig.SnapshotFormat=="gadget") 25 | { 26 | GadgetReader_t(world, SnapshotId, Particles, Cosmology); 27 | } 28 | else if(HBTConfig.SnapshotFormat=="apostle") 29 | { 30 | ApostleReader_t().LoadSnapshot(world, SnapshotId, Particles, Cosmology); 31 | } 32 | else if(HBTConfig.SnapshotFormat=="mysnapshot") 33 | {/*insert your snapshot reader here, and include relevant header in the header if necessary 34 | you need to fill up Particles vector, and set the cosmology, e.g., 35 | 36 | LoadMySnapshot(SnapshotId, Particles, Cosmology); 37 | 38 | */ 39 | } 40 | else 41 | throw(runtime_error("unknown SnapshotFormat "+HBTConfig.SnapshotFormat)); 42 | 43 | #ifdef DM_ONLY 44 | // assert(Cosmology.ParticleMass>0); 45 | #endif 46 | 47 | ExchangeParticles(world); 48 | 49 | if(fill_particle_hash) 50 | FillParticleHash(); 51 | 52 | if(world.rank()==0) cout< np_file; 41 | vector offset_file; 42 | ApostleHeader_t Header; 43 | void ReadHeader(int ifile, ApostleHeader_t &header); 44 | HBTInt CompileFileOffsets(int nfiles); 45 | void ReadSnapshot(int ifile, Particle_t * ParticlesInFile); 46 | void ReadGroupParticles(int ifile, ParticleHost_t * ParticlesInFile, bool FlagReadParticleId); 47 | void GetFileName(int ifile, string &filename); 48 | void SetSnapshot(int snapshotId); 49 | void GetParticleCountInFile(hid_t file, int np[]); 50 | void ExchangeAndMerge(MpiWorker_t &world, vector< Halo_t >& Halos); 51 | 52 | MPI_Datatype MPI_ApostleHeader_t; 53 | 54 | public: 55 | ApostleReader_t() 56 | { 57 | create_ApostleHeader_MPI_type(MPI_ApostleHeader_t); 58 | } 59 | ~ApostleReader_t() 60 | { 61 | My_Type_free(&MPI_ApostleHeader_t); 62 | } 63 | void LoadSnapshot(MpiWorker_t &world, int snapshotId, vector &Particles, Cosmology_t &Cosmology); 64 | void LoadGroups(MpiWorker_t &world, int snapshotId, vector &Halos); 65 | }; 66 | 67 | extern bool IsApostleGroup(const string &GroupFileFormat); 68 | #endif 69 | -------------------------------------------------------------------------------- /Makefile.inc: -------------------------------------------------------------------------------- 1 | #HDFINC=-I/usr/include/hdf5/serial/ 2 | #HDFLIB=-L/usr/lib/x86_64-linux-gnu/hdf5/serial/ -lhdf5_hl -lhdf5 3 | #or set CPATH and LIBRARY_PATH variable locally 4 | HDFINC= 5 | HDFLIB=-lhdf5_hl -lhdf5 6 | #HDFINC+=-Wno-literal-suffix #to disable the annoying literal-suffix warning with the hdf header 7 | 8 | ###comment out the following lines if you do not have GSL (for eigenvalue decomposition of inertial tensors) 9 | #GSLINC= 10 | #GSLINC+=-DHAS_GSL 11 | #GSLLIB=-lgsl -lgslcblas 12 | ###comment up to here to disable GSL. 13 | 14 | #CC=mpicc 15 | CXX=mpiCC 16 | CXXFLAGS+=-std=c++0x $(HDFINC) $(GSLINC) $(OMPFLAG) -O3 -Wno-deprecated -g 17 | 18 | ###for Dark Matter only simulations 19 | #CXXFLAGS+=-DDM_ONLY 20 | ### 21 | 22 | ##enable these if you want to obtain (thermal) binding energy for each particle 23 | ###save binding energy: 24 | #CXXFLAGS+=-DSAVE_BINDING_ENERGY 25 | ### 26 | ###load thermal energy (also set SaveSubParticleProperties to 1 in config file if you want to save thermal energy) 27 | #CXXFLAGS+=-DHAS_THERMAL_ENERGY 28 | ### 29 | 30 | ###for thermal unbinding: 31 | #CXXFLAGS+=-DUNBIND_WITH_THERMAL_ENERGY 32 | ### alternatively, you can save both the non-thermal binding energy and the thermal energy and do it yourself. see above. 33 | 34 | #CXXFLAGS+=-DMPICH_IGNORE_CXX_SEEK -DMPICH_SKIP_MPICXX -idirafter /usr/include/linux #to work around intel compiler bugs 35 | #CXXFLAGS+=-xHost #to get it runnable on cosma-e 36 | 37 | LDLIBS+=$(HDFLIB) $(OMPFLAG) $(GSLLIB) 38 | LDFLAGS+=-g 39 | LINK.o=$(CXX) $(LDFLAGS) $(TARGET_ARCH) 40 | 41 | ifneq (,$(filter $(CXX),mpic++ mpicxx mpiCC)) 42 | BASECXX=$(shell $(CXX) -show|cut -f 1 -d ' '| rev |cut -c1-3|rev) 43 | else 44 | BASECXX=$(CXX) 45 | endif 46 | 47 | ifeq ($(BASECXX), g++) 48 | #OMPFLAG=-fopenmp 49 | else 50 | #OMPFLAG=-qopenmp 51 | CXXFLAGS+=-diag-disable 3180 #to disable omp warnings by icc when not using omp; comment this out if you enabled OMPFLAG 52 | endif 53 | 54 | %.test.o: %.cpp 55 | $(CXX) $^ -DTEST_$(basename $<) $(CXXFLAGS) -c -o $@ 56 | %.test: %.test.o 57 | $(CXX) $^ $(LDFLAGS) $(LDLIBS) -o $@ 58 | $(TEST): CXXFLAGS+=-g -gdwarf-2 -O0 59 | 60 | SUBDIRS:=$(wildcard */.) 61 | SUBDIRSCLEAN=$(addsuffix clean,$(SUBDIRS)) 62 | 63 | clean: clean_curdir $(SUBDIRSCLEAN) 64 | 65 | clean_curdir: 66 | rm -rf *.o *~ core a.out $(EXE) $(OUTPUTS) *.test 67 | 68 | %clean: % 69 | $(MAKE) -C $< clean 70 | 71 | depend_curdir: 72 | makedepend --$(CXXFLAGS)-- -Y $(SRC) 73 | -------------------------------------------------------------------------------- /src/halo.h: -------------------------------------------------------------------------------- 1 | #ifndef HALO_H_INCLUDED 2 | #define HALO_H_INCLUDED 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "datatypes.h" 11 | #include "snapshot_number.h" 12 | #include "snapshot.h" 13 | #include "mpi_wrapper.h" 14 | 15 | class Halo_t 16 | { 17 | public: 18 | typedef vector ParticleList_t; 19 | ParticleList_t Particles; 20 | HBTInt HaloId; 21 | HBTxyz ComovingAveragePosition; 22 | HBTxyz PhysicalAverageVelocity; 23 | HBTReal Mass; 24 | void AverageCoordinates(); 25 | /* deprecated; use move assignment instead; 26 | * shall not define destructor in order for default move to be implemented by the compiler. 27 | void MoveTo(Halo_t & dest, bool MoveParticle=true) 28 | { 29 | dest.HaloId=HaloId; 30 | copyHBTxyz(dest.ComovingPosition, ComovingPosition); 31 | copyHBTxyz(dest.PhysicalVelocity, PhysicalVelocity); 32 | if(MoveParticle) 33 | dest.Particles.swap(Particles); 34 | } 35 | */ 36 | HBTInt KickNullParticles(); 37 | }; 38 | extern void create_MPI_Halo_Id_type(MPI_Datatype &MPI_HBTHalo_Id_t); 39 | 40 | class HaloSnapshot_t: public Snapshot_t 41 | { 42 | typedef vector HaloList_t; 43 | MPI_Datatype MPI_HBT_HaloId_t;//MPI datatype ignoring the particle list 44 | void BuildMPIDataType(); 45 | public: 46 | HaloList_t Halos; 47 | HBTInt TotNumberOfParticles; 48 | HBTInt NumPartOfLargestHalo; 49 | MappedIndexTable_t ParticleHash; 50 | 51 | HaloSnapshot_t(): Snapshot_t(), Halos(), TotNumberOfParticles(0), NumPartOfLargestHalo(0) 52 | { 53 | BuildMPIDataType(); 54 | } 55 | ~HaloSnapshot_t() 56 | { 57 | // Clear(); 58 | My_Type_free(&MPI_HBT_HaloId_t); 59 | } 60 | void Load(MpiWorker_t & world, int snapshot_index); 61 | void Clear(); 62 | void UpdateParticles(MpiWorker_t & world, const ParticleSnapshot_t & snapshot); 63 | // void ParticleIndexToId(); 64 | void FillParticleHash(); 65 | void ClearParticleHash(); 66 | HBTInt size() const 67 | { 68 | return Halos.size(); 69 | } 70 | HBTInt GetId(HBTInt index) const 71 | { 72 | return Halos[index].HaloId; 73 | } 74 | const HBTxyz & GetComovingPosition(HBTInt index) const 75 | { 76 | return Halos[index].ComovingAveragePosition; 77 | } 78 | const HBTxyz & GetPhysicalVelocity(HBTInt index) const 79 | { 80 | return Halos[index].PhysicalAverageVelocity; 81 | } 82 | HBTReal GetMass(HBTInt index) const 83 | { 84 | return Halos[index].Mass; 85 | } 86 | }; 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /src/io/halo_io.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "../mymath.h" 15 | #include "../halo.h" 16 | #include "gadget_group_io.h" 17 | #include "apostle_io.h" 18 | 19 | void HaloSnapshot_t::Load(MpiWorker_t &world, int snapshot_index) 20 | { 21 | SetSnapshotIndex(snapshot_index); 22 | 23 | string GroupFileFormat=HBTConfig.GroupFileFormat; 24 | 25 | if(GadgetGroup::IsGadgetGroup(GroupFileFormat)) 26 | GadgetGroup::Load(world, SnapshotId, Halos); 27 | else if(IsApostleGroup(GroupFileFormat)) 28 | ApostleReader_t().LoadGroups(world, SnapshotId, Halos); 29 | else if(GroupFileFormat=="my_group_format") 30 | {/*extend your own group reader here, input SnapshotId and output filled Halo list, e.g.: 31 | 32 | MyGroupReader(world, SnapshotId, Halos) 33 | 34 | */ 35 | } 36 | else 37 | throw(runtime_error("unknown GroupFileFormat "+GroupFileFormat)); 38 | 39 | NumPartOfLargestHalo=0; 40 | TotNumberOfParticles=0; 41 | for(auto && h: Halos) 42 | { 43 | auto np=h.Particles.size(); 44 | TotNumberOfParticles+=np; 45 | if(np>NumPartOfLargestHalo) NumPartOfLargestHalo=np; 46 | } 47 | 48 | HBTInt NumHalos=Halos.size(), NumHalosAll=0; 49 | MPI_Reduce(&NumHalos, &NumHalosAll, 1, MPI_HBT_INT, MPI_SUM, 0, world.Communicator); 50 | if(world.rank()==0) 51 | cout<1) 74 | { 75 | auto & h=halo.Halos[1]; 76 | cout<<" Halo 1 from thread "< 8 | 9 | #ifdef HBT_REAL8 10 | #define H5T_HBTReal H5T_NATIVE_DOUBLE 11 | #else 12 | #define H5T_HBTReal H5T_NATIVE_FLOAT 13 | #endif 14 | #ifdef HBT_INT8 15 | #define H5T_HBTInt H5T_NATIVE_LONG 16 | #else 17 | #define H5T_HBTInt H5T_NATIVE_INT 18 | #endif 19 | 20 | extern void writeHDFmatrix(hid_t file, const void * buf, const char * name, hsize_t ndim, const hsize_t *dims, hid_t dtype, hid_t dtype_file); 21 | 22 | inline int GetDatasetDims(hid_t dset, hsize_t dims[]) 23 | { 24 | hid_t dspace=H5Dget_space(dset); 25 | int ndim=H5Sget_simple_extent_dims(dspace, dims, NULL); 26 | H5Sclose(dspace); 27 | return ndim; 28 | } 29 | inline herr_t ReclaimVlenData(hid_t dset, hid_t dtype, void * buf) 30 | { 31 | herr_t status; 32 | hid_t dspace=H5Dget_space(dset); 33 | status=H5Dvlen_reclaim(dtype, dspace, H5P_DEFAULT, buf); 34 | status=H5Sclose(dspace); 35 | return status; 36 | } 37 | inline herr_t ReadDataset(hid_t file, const char *name, hid_t dtype, void *buf) 38 | /* read named dataset from file into buf. 39 | * dtype specifies the datatype of buf; it does not need to be the same as the storage type in file*/ 40 | { 41 | herr_t status; 42 | hid_t dset=H5Dopen2(file, name, H5P_DEFAULT); 43 | status=H5Dread(dset, dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf); 44 | if(status<0) 45 | { 46 | const int bufsize=1024; 47 | char grpname[bufsize],filename[bufsize]; 48 | H5Iget_name(file, grpname, bufsize); 49 | H5Fget_name(file, filename, bufsize); 50 | std::cerr<<"####ERROR READING "< 6 | #include "datatypes.h" 7 | #include "snapshot.h" 8 | 9 | class OctTreeExceeded_t : public exception 10 | { 11 | private: 12 | string msg; 13 | public: 14 | OctTreeExceeded_t(const string & message) 15 | { 16 | msg=message; 17 | } 18 | const char * what () const throw () 19 | { 20 | return msg.c_str(); 21 | } 22 | ~OctTreeExceeded_t() throw() 23 | {} 24 | }; 25 | 26 | template union TreeCell_t 27 | { 28 | typedef T MassType_t; 29 | HBTInt sons[8]; /*!< temporary pointers to daughter nodes */ 30 | struct 31 | { 32 | HBTReal s[3]; /*!< center of mass of node (gravity tree); geocenter for geotree*/ 33 | HBTReal len; /*!< sidelength of treenode */ 34 | T mass; /*!< mass of node (gravity tree); counts of particles for geotree */ 35 | HBTInt sibling; /*!< this gives the next node in the walk in case the current node can be used */ 36 | HBTInt nextnode; /*!< this gives the next node in case the current node needs to be opened */ 37 | }way; 38 | TreeCell_t(){}; 39 | TreeCell_t(HBTInt i): sons{i,i,i,i,i,i,i,i} 40 | { 41 | } 42 | }; 43 | 44 | template 45 | class OctTree_t 46 | { 47 | protected: 48 | typedef CellT OctTreeCell_t; 49 | /*the storage*/ 50 | vector Cells; 51 | OctTreeCell_t *Nodes; /* =Cells-NumberOfParticles. the nodes are labelled from 0 to NumPart+NumNodes-1, so that nodeid=0~NumPart-1 are particles, and nodeid>=NumPart are cells */ 52 | vector NextnodeFromParticle; /* next node for each particle. Particles are the first NumPart nodes, and cells are the remaining nodes.*/ 53 | const Snapshot_t * Snapshot; 54 | HBTInt NumberOfParticles; //alias to Snapshot->GetSize(). 55 | HBTInt & RootNodeId; //alias to NumberOfParticles 56 | private: 57 | virtual void UpdateInternalNodes(HBTInt no,HBTInt sib,double len, const double center[3])=0; 58 | public: 59 | OctTree_t(): NumberOfParticles(0), RootNodeId(NumberOfParticles) 60 | { 61 | } 62 | void Reserve(const size_t max_num_part); 63 | HBTInt Build(const Snapshot_t &snapshot, HBTInt num_part=0); 64 | void AppendCell(); 65 | void Clear(); 66 | ~OctTree_t() 67 | { 68 | Clear(); 69 | } 70 | }; 71 | 72 | #include "oct_tree.tpp" 73 | 74 | #endif 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/halo_particle_iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef HALO_PARTICLE_ITERATOR_INCLUDED 2 | #define HALO_PARTICLE_ITERATOR_INCLUDED 3 | 4 | template 5 | class HaloParticleIterator_t 6 | { 7 | typedef vector::iterator particle_iterator; 8 | HaloIterator FirstHalo, EndHalo, CurrHalo; 9 | particle_iterator CurrPart; 10 | public: 11 | HaloParticleIterator_t(){}; 12 | HaloParticleIterator_t(const HaloIterator &begin, const HaloIterator &end) 13 | { 14 | init(begin, end); 15 | } 16 | void init(HaloIterator begin, HaloIterator end) 17 | { 18 | while((begin!=end)&&(begin->Particles.size()==0))//skip empty ones, though not necessary for current HBT2 19 | ++begin; 20 | FirstHalo=begin; 21 | EndHalo=end; 22 | reset(); 23 | } 24 | void reset() 25 | { 26 | CurrHalo=FirstHalo; 27 | if(CurrHalo!=EndHalo) 28 | CurrPart=FirstHalo->Particles.begin(); 29 | } 30 | particle_iterator begin() 31 | { 32 | return FirstHalo->Particles.begin(); 33 | } 34 | HaloParticleIterator_t & operator ++()//left operator 35 | { 36 | ++CurrPart; 37 | while(CurrPart==CurrHalo->Particles.end())//increment halo and skip empty haloes 38 | { 39 | ++CurrHalo; 40 | if(CurrHalo==EndHalo) break; 41 | CurrPart=CurrHalo->Particles.begin(); 42 | } 43 | return *this; 44 | } 45 | Particle_t & operator *() 46 | { 47 | return *CurrPart; 48 | } 49 | bool is_end() 50 | { 51 | return CurrHalo==EndHalo; 52 | } 53 | }; 54 | 55 | 56 | template 57 | class HaloNestIterator_t 58 | { 59 | typedef HBTInt NestMember_t; 60 | typedef vector::iterator nest_iterator; 61 | HaloIterator FirstHalo, EndHalo, CurrHalo; 62 | nest_iterator CurrPart; 63 | public: 64 | HaloNestIterator_t(){}; 65 | HaloNestIterator_t(const HaloIterator &begin, const HaloIterator &end) 66 | { 67 | init(begin, end); 68 | } 69 | void init(HaloIterator begin, HaloIterator end) 70 | { 71 | while((begin!=end)&&(begin->NestedSubhalos.size()==0))//skip empty ones, though not necessary for current HBT2 72 | ++begin; 73 | FirstHalo=begin; 74 | EndHalo=end; 75 | reset(); 76 | } 77 | void reset() 78 | { 79 | CurrHalo=FirstHalo; 80 | if(CurrHalo!=EndHalo) 81 | CurrPart=FirstHalo->NestedSubhalos.begin(); 82 | } 83 | nest_iterator begin() 84 | { 85 | return FirstHalo->NestedSubhalos.begin(); 86 | } 87 | HaloNestIterator_t & operator ++()//left operator 88 | { 89 | ++CurrPart; 90 | while(CurrPart==CurrHalo->NestedSubhalos.end())//increment halo and skip empty haloes 91 | { 92 | ++CurrHalo; 93 | if(CurrHalo==EndHalo) break; 94 | CurrPart=CurrHalo->NestedSubhalos.begin(); 95 | } 96 | return *this; 97 | } 98 | NestMember_t & operator *() 99 | { 100 | return *CurrPart; 101 | } 102 | bool is_end() 103 | { 104 | return CurrHalo==EndHalo; 105 | } 106 | }; 107 | 108 | #endif -------------------------------------------------------------------------------- /HBT.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "src/mpi_wrapper.h" 8 | #include "src/datatypes.h" 9 | #include "src/config_parser.h" 10 | #include "src/snapshot.h" 11 | #include "src/halo.h" 12 | #include "src/subhalo.h" 13 | #include "src/mymath.h" 14 | #include "src/particle_exchanger.h" 15 | 16 | int main(int argc, char **argv) 17 | { 18 | MPI_Init(&argc, &argv); 19 | MpiWorker_t world(MPI_COMM_WORLD); 20 | #ifdef _OPENMP 21 | //omp_set_nested(0); 22 | omp_set_max_active_levels(1); //max_active_level 0: no para; 1: single layer; >1: nest enabled 23 | #endif 24 | 25 | int snapshot_start, snapshot_end; 26 | if(0==world.rank()) 27 | { 28 | ParseHBTParams(argc, argv, HBTConfig, snapshot_start, snapshot_end); 29 | mkdir(HBTConfig.SubhaloPath.c_str(), 0755); 30 | HBTConfig.DumpParameters(); 31 | 32 | cout< 3 | #else 4 | #include 5 | #endif 6 | #include 7 | #include 8 | 9 | #ifndef H5_NO_NAMESPACE 10 | #ifndef H5_NO_STD 11 | using std::cout; 12 | using std::endl; 13 | using std::vector; 14 | #endif // H5_NO_STD 15 | #endif 16 | #include "H5Cpp.h" 17 | 18 | #ifndef H5_NO_NAMESPACE 19 | using namespace H5; 20 | #endif 21 | 22 | const H5std_string FILE_NAME( "test_compound2.hdf5" ); 23 | const H5std_string DATASET_NAME( "data" ); 24 | const int LENGTH = 5; 25 | const int RANK = 1; 26 | 27 | #define ShowField(s,f){\ 28 | cout << endl<<"Field "<<#f<<" : " << endl; \ 29 | for(int i = 0; i < LENGTH; i++)\ 30 | cout< datain(LENGTH); 55 | for(int i=0; i dataout(LENGTH); 78 | 79 | dset.read( dataout.data(), mtype ); 80 | 81 | cout<<"\n===========Data Read==========\n"; 82 | ShowField(dataout,a); 83 | ShowField(dataout,b); 84 | ShowField(dataout,c); 85 | 86 | CompType btype(sizeof(float)); 87 | btype.insertMember("b", 0, PredType::NATIVE_FLOAT); 88 | float b[LENGTH]; 89 | dset.read(b, btype); 90 | cout<<"b \n"; 91 | for(int i=0;i 5 | inline int CompPairWithValue(const Pair_t a, const Val_t b) 6 | { 7 | return (a.Key 10 | template 11 | void MappedIndexTable_t::GetIndices(ParticleIdList_T &particles) const 12 | { 13 | #define ALWAYS_BATCH_BINARY_SEARCH 14 | 15 | #ifdef ALWAYS_BATCH_BINARY_SEARCH 16 | GetIndicesRecursive(particles, 0, particles.size(), Map.begin(), Map.end());//batch-binary-search: is this always faster? 17 | #else 18 | if(particles.size()IdKey) 36 | { 37 | it_p->Id=null; 38 | ++it_p; 39 | } 40 | else if(it_p->Id==it_map->Key) 41 | { 42 | it_p->Id=it_map->Index; 43 | ++it_p; 44 | } 45 | else 46 | ++it_map; 47 | } 48 | 49 | while(true) 50 | { 51 | it_p->Id=null; 52 | ++it_p; 53 | if(it_p==particles.end()) return; 54 | } 55 | #endif 56 | } 57 | 58 | template 59 | template 60 | void MappedIndexTable_t::GetIndicesRecursive(ParticleIdList_T &particles, HBTInt imin, HBTInt imax, MapIter_t MapBegin, MapIter_t MapEnd) const 61 | { 62 | //GetIndices of particles in storage range [imin, imax) from map [MapBegin, MapEnd). 63 | auto &null=BaseClass_t::NullIndex; 64 | 65 | if(MapBegin==MapEnd) 66 | { 67 | for(HBTInt i=imin;i=imax) return; 73 | 74 | HBTInt imid; 75 | if(imax-imin==1) 76 | imid=imin; 77 | else 78 | imid=(imin+imax)/2; 79 | Key_t key=particles[imid].Id; 80 | MapIter_t MapMid=lower_bound(MapBegin, MapEnd, key, CompPairWithValue); 81 | MapIter_t MapEndLeft=MapMid, MapBeginRight=MapMid; 82 | if(MapMid==MapEnd||MapMid->Key>key) 83 | particles[imid].Id=null; 84 | else 85 | { 86 | particles[imid].Id=MapMid->Index; 87 | ++MapEndLeft; 88 | } 89 | 90 | GetIndicesRecursive(particles, imin, imid, MapBegin, MapEndLeft); 91 | GetIndicesRecursive(particles, imid+1, imax, MapBeginRight, MapEnd); 92 | } 93 | 94 | template 95 | template 96 | void FlatIndexTable_t::GetIndices(ParticleIdList_T &particles) const 97 | { 98 | for(auto &&p: particles) 99 | p.Id=GetIndex(p.Id); 100 | } 101 | 102 | template 103 | void ParticleSnapshot_t::GetIndices(ParticleIdList_t& particles) const 104 | {//ParticleIdList_t is a list of particle structs containing at least an Id field 105 | if(HBTConfig.ParticleIdNeedHash) 106 | MappedHash.GetIndices(particles); 107 | else 108 | FlatHash.GetIndices(particles); 109 | } 110 | -------------------------------------------------------------------------------- /src/io/gadget_virial_io.h: -------------------------------------------------------------------------------- 1 | /*utility function for reading gadget output of halo virial mass and radius. 2 | *This file is not used by main program of HBT+, and is provided for convenience of analysing gadget group data*/ 3 | #ifndef GADGET_VIRIAL_IO_HEADER_INCLUDED 4 | #define GADGET_VIRIAL_IO_HEADER_INCLUDED 5 | 6 | #include "gadget_group_io.h" 7 | #define myfread(buf,size,count,fp) fread_swap(buf,size,count,fp,NeedByteSwap) 8 | namespace GadgetVirialIO{ 9 | template 10 | void LoadVirial(int SnapshotId, vector &Mvir, vector &Rvir, const string & virtype) 11 | { 12 | // typedef float MyReal; 13 | 14 | int itype; 15 | if(virtype=="Mean200") 16 | itype=0; 17 | else if(virtype=="Crit200") 18 | itype=1; 19 | else if(virtype=="TopHat") 20 | itype=2; 21 | else 22 | throw(runtime_error("unknow virtype"+virtype)); 23 | 24 | FILE *fd; 25 | char filename[1024]; 26 | vector Len; 27 | vector Offset; 28 | int Ngroups, TotNgroups, Nids, NFiles; 29 | HBTInt NumberOfHaloes; 30 | long long TotNids; 31 | bool NeedByteSwap; 32 | bool IsGroupV3=("gadget3_int"==HBTConfig.GroupFileFormat||"gadget3_long"==HBTConfig.GroupFileFormat); 33 | 34 | string filename_format; 35 | bool IsSubFile; 36 | int FileCounts; 37 | GadgetGroup::GetFileNameFormat(SnapshotId, filename_format, FileCounts, IsSubFile, NeedByteSwap); 38 | assert(IsSubFile); 39 | 40 | long long Nload=0; 41 | for(int iFile=0;iFile1) 44 | sprintf(filename, filename_format.c_str(), "tab", iFile); 45 | else 46 | sprintf(filename, filename_format.c_str(), "tab"); 47 | 48 | myfopen(fd,filename,"r"); 49 | myfread(&Ngroups, sizeof(Ngroups), 1, fd); 50 | if(IsGroupV3) 51 | { 52 | myfread(&TotNgroups, sizeof(TotNgroups), 1, fd); 53 | myfread(&Nids, sizeof(Nids), 1, fd); 54 | myfread(&TotNids,sizeof(TotNids),1,fd); 55 | } 56 | else 57 | { 58 | myfread(&Nids, sizeof(Nids), 1, fd); 59 | myfread(&TotNgroups, sizeof(TotNgroups), 1, fd); 60 | } 61 | myfread(&NFiles, sizeof(NFiles), 1, fd); 62 | int Nsub,TotNsub; 63 | myfread(&Nsub,sizeof(int),1,fd); 64 | myfread(&TotNsub,sizeof(int),1,fd); 65 | if(FileCounts!=NFiles) 66 | { 67 | cout<<"File count mismatch for file "< 8 | #include 9 | 10 | template 11 | class KeyList_t 12 | { 13 | public: 14 | virtual Key_t GetKey(const Index_t i) const=0; 15 | virtual Index_t GetIndex(const Index_t i) const=0; 16 | virtual Index_t size() const=0; 17 | }; 18 | 19 | class InvalidPIdException_t : public exception 20 | { 21 | private: 22 | HBTInt PId; 23 | public: 24 | InvalidPIdException_t(HBTInt pid) 25 | { 26 | PId=pid; 27 | }; 28 | const char * what () const throw () 29 | { 30 | stringstream msg; 31 | msg<<"Invalid Particle Id "< 39 | struct IndexedKey_t 40 | { 41 | Key_t Key; 42 | Index_t Index; 43 | IndexedKey_t(){};//add default constructor for quick memory alloc 44 | }; 45 | 46 | template 47 | class IndexTable_t 48 | { 49 | public: 50 | // typedef HBTInt Key_t; 51 | // typedef HBTInt Index_t; 52 | Index_t NullIndex; 53 | 54 | virtual void Fill(const KeyList_t &Keys, Index_t null_index=SpecialConst::NullParticleId)=0; 55 | virtual void Clear()=0; 56 | virtual Index_t GetIndex(const Key_t key) const =0; 57 | virtual void GetKeyMinMax(Key_t &key_min, Key_t &key_max) const=0; 58 | }; 59 | 60 | template 61 | class FlatIndexTable_t: public IndexTable_t 62 | { 63 | private: 64 | typedef IndexTable_t BaseClass_t; 65 | Index_t * Index; 66 | Index_t Offset; 67 | Key_t KeySpan, KeyMax, KeyMin; 68 | public: 69 | FlatIndexTable_t(): Index(), Offset(0), KeySpan(0) 70 | { 71 | } 72 | void Fill(const KeyList_t &Keys, Index_t null_index=SpecialConst::NullParticleId); 73 | void Clear(); 74 | Index_t GetIndex(const Key_t key) const; 75 | template 76 | void GetIndices(ParticleIdList_T &particles) const; 77 | void GetKeyMinMax(Key_t &key_min, Key_t &key_max) const 78 | { 79 | key_min=KeyMin; 80 | key_max=KeyMax; 81 | } 82 | ~FlatIndexTable_t() 83 | { 84 | Clear(); 85 | } 86 | }; 87 | 88 | template 89 | class MappedIndexTable_t: public IndexTable_t 90 | { 91 | public: 92 | typedef IndexedKey_t Pair_t; 93 | private: 94 | HBTInt NumQueryCrit; 95 | typedef IndexTable_t BaseClass_t; 96 | vector Map; 97 | typedef typename vector ::const_iterator MapIter_t; 98 | template 99 | void GetIndicesRecursive(ParticleIdList_T &particles, HBTInt imin, HBTInt imax, MapIter_t MapBegin, MapIter_t MapEnd) const; 100 | public: 101 | MappedIndexTable_t(): Map(), NumQueryCrit() 102 | { 103 | } 104 | void Fill(const KeyList_t &Keys, Index_t null_index=SpecialConst::NullParticleId); 105 | void Clear(); 106 | Index_t GetIndex(const Key_t key) const; 107 | template 108 | void GetIndices(ParticleIdList_T &particles) const; 109 | void GetKeyMinMax(Key_t &key_min, Key_t &key_max) const 110 | { 111 | if(Map.empty()) return; 112 | key_min=Map.front().Key; 113 | key_max=Map.back().Key; 114 | } 115 | ~MappedIndexTable_t() 116 | { 117 | Clear(); 118 | } 119 | }; 120 | 121 | #include "hash.tpp" 122 | 123 | #endif -------------------------------------------------------------------------------- /src/linkedlist_base.h: -------------------------------------------------------------------------------- 1 | #ifndef LINKEDLIST_BASE_HEADER_INCLUDED 2 | #define LINKEDLIST_BASE_HEADER_INCLUDED 3 | #include "mymath.h" 4 | #include "snapshot.h" 5 | 6 | //TODO:discard the fortran-style ll; use struct or indexed table to parallelize the linklist! 7 | class PositionData_t 8 | { 9 | public: 10 | virtual const HBTxyz & operator [](HBTInt i) const=0; 11 | /*virtual const HBTReal GetPos(HBTInt i, int j) const 12 | { 13 | return (*this)[i][j]; 14 | }*/ 15 | virtual size_t size() const=0; 16 | }; 17 | class SnapshotPos_t: public PositionData_t 18 | { 19 | const Snapshot_t &Snap; 20 | public: 21 | SnapshotPos_t(const Snapshot_t &snap):Snap(snap) 22 | {} 23 | const HBTxyz & operator [](HBTInt i) const 24 | { return Snap.GetComovingPosition(i); } 25 | size_t size() const 26 | { return Snap.size(); } 27 | }; 28 | class LinkedlistBase_t 29 | /*the particle ids used and returned refer to the index of particles in the input position data*/ 30 | { 31 | private: 32 | int NDiv, NDiv2; 33 | bool PeriodicBoundary; 34 | HBTReal BoxSize, BoxHalf; 35 | HBTReal Range[3][2]; 36 | HBTReal Step[3]; 37 | PositionData_t *Particles; 38 | int RoundGridId(int i); 39 | int ShiftGridId(int i); 40 | int FixGridId(int i); 41 | HBTInt Sub2Ind(int i, int j, int k); 42 | HBTInt GetHOC(int i, int j, int k); 43 | HBTInt GetHOCSafe(int i, int j, int k); 44 | HBTReal Distance2(const HBTxyz &x, const HBTxyz &y); 45 | protected: 46 | void init(int ndiv, PositionData_t *data, HBTReal boxsize, bool periodic); 47 | public: 48 | vector HOC; 49 | vector List; 50 | LinkedlistBase_t()=default; 51 | LinkedlistBase_t(int ndiv, PositionData_t *data, HBTReal boxsize=0., bool periodic=false) 52 | { 53 | build(ndiv, data, boxsize, periodic); 54 | } 55 | void build(int ndiv, PositionData_t *data, HBTReal boxsize=0., bool periodic=false); 56 | void SearchShell(HBTReal rmin, HBTReal rmax, const HBTxyz &searchcenter, ParticleCollector_t &collector); 57 | void SearchSphere(HBTReal radius, const HBTxyz &searchcenter, ParticleCollector_t &colletor); 58 | void SearchCylinder(HBTReal radius_z, HBTReal radius_p, const HBTxyz &searchcenter, ParticleCollector_t &collector);//search within +-radius_z along z and projected radius_p 59 | HBTInt TagFriendsOfFriends(HBTInt seed, HBTInt grpid, vector &group_tags, HBTReal LinkLength); 60 | HBTInt get_chain_length(int i) 61 | { 62 | HBTInt pid=HOC[i]; 63 | HBTInt n=0; 64 | while(pid>=0) 65 | { 66 | n++; 67 | pid=List[pid]; 68 | } 69 | return n; 70 | } 71 | void print_chain(int i) 72 | { 73 | auto pid=HOC[i]; 74 | while(pid>=0) 75 | { 76 | cout<=NDiv?NDiv-1:i); 87 | } 88 | inline int LinkedlistBase_t::ShiftGridId(int i) 89 | /*to correct for periodic conditions; 90 | only applicable when def PERIODIC_BDR and ll.UseFullBox=1 */ 91 | { 92 | i=i%NDiv; 93 | if(i<0) i+=NDiv; 94 | return i; 95 | } 96 | inline int LinkedlistBase_t::FixGridId(int i) 97 | { 98 | if(PeriodicBoundary) 99 | return ShiftGridId(i); 100 | return RoundGridId(i); 101 | } 102 | inline HBTInt LinkedlistBase_t::Sub2Ind(int i, int j, int k) 103 | { 104 | return i+j*NDiv+k*NDiv2; 105 | } 106 | inline HBTInt LinkedlistBase_t::GetHOC(int i, int j, int k) 107 | { 108 | return HOC[Sub2Ind(i,j,k)]; 109 | } 110 | inline HBTInt LinkedlistBase_t::GetHOCSafe(int i, int j, int k) 111 | { 112 | return HOC[Sub2Ind(FixGridId(i), FixGridId(j), FixGridId(k))]; 113 | } 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /src/hash.tpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "hash.h" 8 | //=====general ID2Index table======// 9 | /* the hash-table implementation here is by sorting Id and use binsearch to locate keys*/ 10 | /* more general functions hcreate(),hsearch()... exists in glibc; but binsearch should be 11 | * more efficient than the general hsearch() I guess?*/ 12 | 13 | template 14 | inline bool CompPair(const IndexedKey_t & a, const IndexedKey_t & b) 15 | { 16 | return (a.Key 19 | void MappedIndexTable_t::Fill(const KeyList_t &Keys, Index_t null_index) 20 | { 21 | BaseClass_t::NullIndex=null_index; 22 | Index_t n=Keys.size(); 23 | Map.resize(n); 24 | #pragma omp parallel for 25 | for(Index_t i=0;i); 31 | NumQueryCrit=n/log2(n); 32 | } 33 | template 34 | void MappedIndexTable_t::Clear() 35 | { 36 | vector ().swap(Map); 37 | NumQueryCrit=0; 38 | } 39 | template 40 | inline int CompKeyWithPair(const void *a, const void *b)//used to sort Id in ascending order; 41 | { 42 | Key_t va=* static_cast(a); 43 | Key_t vb=static_cast *> (b)->Key; 44 | if(va>vb) return 1; 45 | if(va 50 | Index_t MappedIndexTable_t::GetIndex(const Key_t key) const 51 | {//maybe implement the exception here? could be slow... test it first. 52 | if(key<0) return BaseClass_t::NullIndex; 53 | Pair_t *p=(Pair_t *) bsearch(&key,Map.data(),Map.size(),sizeof(Pair_t),CompKeyWithPair); 54 | if(NULL==p) return BaseClass_t::NullIndex; //no match 55 | return p->Index; 56 | } 57 | 58 | template 59 | void FlatIndexTable_t::Fill(const KeyList_t &Keys, Index_t null_index) 60 | { 61 | BaseClass_t::NullIndex=null_index; 62 | Clear(); 63 | Index_t n=Keys.size(); 64 | if(0==n) return; 65 | 66 | Key_t keymin, keymax; 67 | keymin=keymax=Keys.GetKey(0); 68 | #pragma omp parallel for reduction(min:keymin) reduction(max:keymax) 69 | for(Index_t i=1;ikeymax) 73 | keymax=key; 74 | if(key 96 | void FlatIndexTable_t::Clear() 97 | { 98 | if(KeySpan) 99 | { 100 | KeySpan=0; 101 | Index+=Offset; 102 | delete [] Index; 103 | } 104 | } 105 | template 106 | Index_t FlatIndexTable_t::GetIndex(const Key_t key) const 107 | { 108 | if(KeySpan==0||keyKeyMax) return BaseClass_t::NullIndex;//no match 109 | return Index[key]; 110 | } 111 | -------------------------------------------------------------------------------- /src/halo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "mpi_wrapper.h" 12 | #include "mymath.h" 13 | #include "halo.h" 14 | #include "particle_exchanger.h" 15 | 16 | // #include 17 | // #include 18 | 19 | void create_MPI_Halo_Id_type(MPI_Datatype &MPI_HBTHalo_Id_t) 20 | { 21 | /*to create the struct containing only haloid*/ 22 | Halo_t p; 23 | #define NumAttr 4 24 | MPI_Datatype oldtypes[NumAttr]; 25 | int blockcounts[NumAttr]; 26 | MPI_Aint offsets[NumAttr], origin,extent; 27 | 28 | MPI_Get_address(&p,&origin); 29 | MPI_Get_address((&p)+1,&extent);//to get the extent of s 30 | extent-=origin; 31 | 32 | int i=0; 33 | #define RegisterAttr(x, type, count) {MPI_Get_address(&(p.x), offsets+i); offsets[i]-=origin; oldtypes[i]=type; blockcounts[i]=count; i++;} 34 | RegisterAttr(HaloId, MPI_HBT_INT, 1) 35 | RegisterAttr(ComovingAveragePosition[0], MPI_HBT_REAL, 3) 36 | RegisterAttr(PhysicalAverageVelocity[0], MPI_HBT_REAL, 3) 37 | RegisterAttr(Mass, MPI_HBT_REAL, 1) 38 | // assert(offsets[i-1]-offsets[i-2]==sizeof(HBTReal)*3);//to make sure HBTxyz is stored locally. 39 | #undef RegisterAttr 40 | assert(i==NumAttr); 41 | 42 | MPI_Type_create_struct(i,blockcounts,offsets,oldtypes, &MPI_HBTHalo_Id_t);//some padding is added automatically by MPI as well 43 | MPI_Type_create_resized(MPI_HBTHalo_Id_t,(MPI_Aint)0, extent, &MPI_HBTHalo_Id_t); 44 | MPI_Type_commit(&MPI_HBTHalo_Id_t); 45 | #undef NumAttr 46 | } 47 | 48 | void Halo_t::AverageCoordinates() 49 | { 50 | AveragePosition(ComovingAveragePosition, Particles.data(), Particles.size()); 51 | Mass=AverageVelocity(PhysicalAverageVelocity, Particles.data(), Particles.size()); 52 | } 53 | 54 | void HaloSnapshot_t::BuildMPIDataType() 55 | { 56 | create_MPI_Halo_Id_type(MPI_HBT_HaloId_t); 57 | } 58 | void HaloSnapshot_t::UpdateParticles(MpiWorker_t &world, const ParticleSnapshot_t &snap) 59 | { 60 | Cosmology=snap.Cosmology; 61 | if(!HBTConfig.GroupLoadedFullParticle) 62 | { 63 | HaloList_t LocalHalos; 64 | snap.ExchangeHalos(world, Halos, LocalHalos, MPI_HBT_HaloId_t); 65 | Halos.swap(LocalHalos); 66 | } 67 | 68 | TotNumberOfParticles=0; 69 | NumPartOfLargestHalo=0; 70 | for(auto &&h: Halos) 71 | { 72 | HBTInt np=h.Particles.size(); 73 | TotNumberOfParticles+=np;//local 74 | if(NumPartOfLargestHalo 79 | { 80 | typedef HBTInt Index_t; 81 | typedef HBTInt Key_t; 82 | vector ParticleIds; 83 | vector HaloIds;//local haloid 84 | public: 85 | HaloParticleKeyList_t(HaloSnapshot_t &snap) 86 | { 87 | ParticleIds.reserve(snap.TotNumberOfParticles); 88 | HaloIds.reserve(snap.TotNumberOfParticles); 89 | for(HBTInt i=0;iId!=SpecialConst::NullParticleId)//there will be consumed particles 140 | { 141 | if(it!=it_save) 142 | *it_save=move(*it); 143 | ++it_save; 144 | } 145 | } 146 | Particles.resize(it_save-Particles.begin()); 147 | 148 | return it-it_save; 149 | #endif 150 | } 151 | -------------------------------------------------------------------------------- /src/linkedlist.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "linkedlist.h" 3 | 4 | LinkedlistPara_t::LinkedlistPara_t(int ndiv, PositionData_t *data, HBTReal boxsize, bool periodic) 5 | { 6 | #pragma omp parallel 7 | { 8 | #ifdef _OPENMP 9 | int thread_id=omp_get_thread_num(); 10 | int thread_num=omp_get_num_threads(); 11 | #else 12 | int thread_id=0, thread_num=1; 13 | #endif 14 | #pragma omp single 15 | { 16 | LLs.resize(thread_num); 17 | Samples.resize(thread_num); 18 | } 19 | Samples[thread_id].init(thread_id, thread_num, data); 20 | LLs[thread_id].build(ndiv, &(Samples[thread_id]), boxsize, periodic); 21 | } 22 | } 23 | class SampleCollector_t: public ParticleCollector_t 24 | { 25 | ParticleCollector_t &Collector; 26 | PositionSampleBase_t &Sample; 27 | public: 28 | SampleCollector_t(ParticleCollector_t &collector, PositionSampleBase_t &sample):Collector(collector), Sample(sample) 29 | {} 30 | void Collect(HBTInt pid, HBTReal d2) 31 | { 32 | Collector.Collect(Sample.restore_id(pid), d2); 33 | } 34 | }; 35 | void LinkedlistPara_t::SearchShellSerial(HBTReal rmin, HBTReal rmax, const HBTxyz &searchcenter, ParticleCollector_t &collector) 36 | {//serial version, which can be safely run inside another parallel region 37 | for(int thread_id=0;thread_id0);//do not support auto-boxsize; has to make sure each thread use the same boxsize 55 | #pragma omp parallel 56 | { 57 | #ifdef _OPENMP 58 | int thread_id=omp_get_thread_num(); 59 | int thread_num=omp_get_num_threads(); 60 | #else 61 | int thread_id=0, thread_num=1; 62 | #endif 63 | #pragma omp single 64 | { 65 | LLs.resize(thread_num); 66 | Samples.resize(thread_num); 67 | } 68 | Samples[thread_id].init(thread_id, thread_num, data); 69 | LLs[thread_id].build(ndiv, &(Samples[thread_id]), boxsize, periodic); 70 | } 71 | 72 | init(ndiv, data, boxsize, periodic); 73 | merge(); 74 | } 75 | void Linkedlist_t::merge() 76 | {//merge the LLs into the main list 77 | #pragma omp parallel for 78 | for(int ichain=0;ichain=0) 85 | { 86 | {//push back 87 | auto true_pid=Samples[ithread].restore_id(pid); 88 | List[true_pid]=hoc; 89 | hoc=true_pid; 90 | } 91 | pid=LLs[ithread].List[pid];//next particle 92 | } 93 | } 94 | } 95 | 96 | //equivalent way: 97 | // #pragma omp parallel for 98 | // for(int ichain=0;ichain=0)//copy till tail 105 | // { 106 | // *p=Samples[ithread].restore_id(pid);//copy 107 | // pid=LLs[ithread].List[pid];//next value 108 | // p=&List[*p];//next storage 109 | // } 110 | // } 111 | // *p=-1;//close the chain 112 | // } 113 | 114 | LLs.clear(); 115 | Samples.clear(); 116 | } 117 | 118 | void LinkedlistLinkGroup(HBTReal radius, const Snapshot_t &snapshot, vector &GrpLen, vector &GrpTags, int ndiv) 119 | /* link particles in the given snapshot into groups. 120 | * Output: filled GrpLen and GrpTags (0~Ngroups-1), down to mass=1 (diffuse particles) 121 | * */ 122 | { 123 | GrpTags.assign(snapshot.size(), -1); 124 | 125 | cout<<"Building linkedlist...\n"<=progress) 139 | { 140 | cout<<"\b\b\b"< 2 | // #include 3 | // #include 4 | // #include 5 | 6 | #include "mymath.h" 7 | #include "config_parser.h" 8 | 9 | template 10 | inline void OctTree_t::AppendCell() 11 | { 12 | Cells.emplace_back(-1); 13 | Nodes=Cells.data()-RootNodeId;//always update reference 14 | } 15 | 16 | template 17 | HBTInt OctTree_t::Build(const Snapshot_t &snapshot, HBTInt num_part) 18 | /* build tree for a snapshot (or SnapshotView); automatically resize memory if necessary. 19 | * if num_part>0 is given, then only use the first num_part particles in the snapshot 20 | */ 21 | { 22 | HBTInt sub,subid,i,j,nodeid; 23 | double center[3], lenhalf; 24 | double xmin[3], xmax[3],Center[3], Len,Lenhalf; 25 | 26 | Snapshot=&snapshot; 27 | if(!num_part) num_part=snapshot.size(); 28 | Reserve(num_part); 29 | 30 | /* find enclosing rectangle */ 31 | for(j = 0; j < 3; j++) 32 | xmin[j] = xmax[j] = Snapshot->GetComovingPosition(0)[j]; 33 | 34 | for(i = 1; i < NumberOfParticles; i++) 35 | for(j = 0; j < 3; j++) 36 | { 37 | if(Snapshot->GetComovingPosition(i)[j] > xmax[j]) 38 | xmax[j] = Snapshot->GetComovingPosition(i)[j]; 39 | else if(Snapshot->GetComovingPosition(i)[j] < xmin[j]) 40 | xmin[j] = Snapshot->GetComovingPosition(i)[j]; 41 | } 42 | 43 | /* determine maxmimum extension */ 44 | for(j = 1, Len = xmax[0] - xmin[0]; j < 3; j++) 45 | if((xmax[j] - xmin[j]) > Len) 46 | Len = xmax[j] - xmin[j]; 47 | 48 | for(j = 0; j < 3; j++) 49 | Center[j] = 0.5 * (xmax[j] + xmin[j]); 50 | 51 | Lenhalf=0.5*Len; 52 | 53 | /* create an empty root node */ 54 | AppendCell(); 55 | 56 | for(i = 0; i < NumberOfParticles; i++) /* insert all particles */ 57 | { 58 | nodeid = RootNodeId ; /* select index of first node in tree */ 59 | lenhalf = Lenhalf; 60 | for(j = 0; j < 3; j++) 61 | center[j] = Center[j]; 62 | 63 | while(1) 64 | { 65 | //len = lenhalf; 66 | //fprintf(logfile,"%f\n",len); 67 | lenhalf *= 0.5;//halflen for the to-be-found subnode 68 | sub = 0; 69 | if(Snapshot->GetComovingPosition(i)[0] > center[0]) 70 | { 71 | center[0] += lenhalf;//subcenter 72 | sub += 1;//sub index 73 | } 74 | else 75 | { 76 | center[0] -= lenhalf; 77 | } 78 | if(Snapshot->GetComovingPosition(i)[1] > center[1]) 79 | { 80 | center[1] += lenhalf; 81 | sub += 2; 82 | } 83 | else 84 | { 85 | center[1] -= lenhalf; 86 | } 87 | if(Snapshot->GetComovingPosition(i)[2] > center[2]) 88 | { 89 | center[2] += lenhalf; 90 | sub += 4; 91 | } 92 | else 93 | { 94 | center[2] -= lenhalf; 95 | } 96 | 97 | subid=Nodes[nodeid].sons[sub]; 98 | if(subid<0)//an empty node, insert particle as leaf 99 | { 100 | Nodes[nodeid].sons[sub]=i; 101 | break;//finished for this particle, begin to insert a new particle 102 | } 103 | else if(subid= 8) 120 | sub = 7; 121 | //~ fprintf(logfile,"len=%g Len=%g sub=%d i=%d (%g|%g|%g)\n", 122 | //~ lenhalf*2, Len, sub, i, Snapshot->GetComovingPosition(i][0], Snapshot->GetComovingPosition(i][1], Snapshot->GetComovingPosition(i][2]); 123 | } 124 | else 125 | { 126 | sub=0; 127 | if(Snapshot->GetComovingPosition(subid)[0] > center[0]) 128 | sub += 1; 129 | if(Snapshot->GetComovingPosition(subid)[1] > center[1]) 130 | sub += 2; 131 | if(Snapshot->GetComovingPosition(subid)[2] > center[2]) 132 | sub += 4; 133 | } 134 | Nodes[nodeid].sons[sub]=subid;//the disturbing particle inserted 135 | } 136 | else nodeid=subid;//an internal node,take over it; 137 | } 138 | } 139 | 140 | /* finished inserting, now update for walk*/ 141 | UpdateInternalNodes(NumberOfParticles , -1, Len, Center);/*insert sibling and next infomation*/ 142 | 143 | return Cells.size(); 144 | } 145 | 146 | template 147 | void OctTree_t::Reserve(const size_t np) 148 | /* allocate tree memory to hold a maximum of max_num_part particles */ 149 | { 150 | NumberOfParticles=np; 151 | NextnodeFromParticle.resize(NumberOfParticles); 152 | 153 | HBTInt MaxNumberOfCells =HBTConfig.TreeAllocFactor*NumberOfParticles; 154 | if(MaxNumberOfCells 160 | void OctTree_t::Clear() 161 | { 162 | NextnodeFromParticle.clear(); 163 | Cells.clear(); 164 | NumberOfParticles=0; 165 | } 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SRC_COMM=$(wildcard src/*.cpp) $(wildcard src/io/*.cpp) 2 | OBJS_COMM=$(SRC_COMM:%.cpp=%.o) 3 | 4 | SRC=$(wildcard *.cpp) 5 | EXE_HBT=HBT HBTdouble HBT_majormerger_test HBTi8 HBT.apostle HBT.apostle_thermal HBT.nostrip 6 | EXE=$(EXE_HBT) 7 | # EXE+=debug 8 | 9 | default: HBT 10 | include Makefile.inc 11 | 12 | $(EXE): $(OBJS_COMM) 13 | 14 | HBT.apostle HBT.apostle_thermal: CXXFLAGS+=-DHBT_INT8 -DUNSIGNED_LONG_ID_OUTPUT 15 | HBT.apostle_thermal: CXXFLAGS+=-DUNBIND_WITH_THERMAL_ENERGY 16 | # debug: CXXFLAGS+=-DHBT_INT8 -DHBT_REAL8 17 | 18 | HBTdouble: CXXFLAGS+=-DHBT_INT8 -DHBT_REAL8 19 | HBTi8: CXXFLAGS+=-DHBT_INT8 20 | HBT_majormerger_test: CXXFLAGS+=-DMAJOR_MERGER_PATCH #-DALLOW_BINARY_SYSTEM 21 | HBT.nostrip: CXXFLAGS+=-DNO_STRIPPING -DHBT_INT8 #track without unbinding. 22 | $(EXE_HBT): HBT.o 23 | $(CXX) $^ $(LDFLAGS) $(LDLIBS) -o $@ 24 | 25 | depend: 26 | makedepend --$(CXXFLAGS)-- -Y $(SRC) $(SRC_COMM) 27 | 28 | #custom command, not needed by a general user 29 | -include .Makefile_sync_mpi.inc 30 | # DO NOT DELETE 31 | 32 | HBT.o: src/mpi_wrapper.h src/datatypes.h src/mymath.h src/datatypes.h 33 | HBT.o: src/config_parser.h src/mpi_wrapper.h src/snapshot.h 34 | HBT.o: src/config_parser.h src/snapshot_number.h src/hash.h src/hash.tpp 35 | HBT.o: src/halo.h src/snapshot.h src/subhalo.h src/halo.h src/hdf_wrapper.h 36 | HBT.o: src/mymath.h src/particle_exchanger.h src/halo_particle_iterator.h 37 | HBT.o: src/subhalo.h src/hash_remote.tpp 38 | src/subhalo_unbind.o: src/datatypes.h src/snapshot_number.h 39 | src/subhalo_unbind.o: src/config_parser.h src/subhalo.h src/gravity_tree.h 40 | src/subhalo_unbind.o: src/oct_tree.h src/snapshot.h src/oct_tree.tpp 41 | src/subhalo_unbind.o: src/mymath.h 42 | src/particle_exchanger.o: src/snapshot.h src/particle_exchanger.h 43 | src/particle_exchanger.o: src/datatypes.h src/mymath.h src/mpi_wrapper.h 44 | src/particle_exchanger.o: src/halo_particle_iterator.h src/subhalo.h 45 | src/particle_exchanger.o: src/hash_remote.tpp src/hash.h src/hash.tpp 46 | src/snapshot.o: src/snapshot.h src/mymath.h src/datatypes.h 47 | src/linkedlist.o: src/linkedlist.h src/mymath.h src/datatypes.h 48 | src/linkedlist.o: src/linkedlist_base.h src/snapshot.h 49 | src/hdf_wrapper.o: src/hdf_wrapper.h 50 | src/halo.o: src/mpi_wrapper.h src/mymath.h src/datatypes.h src/halo.h 51 | src/halo.o: src/particle_exchanger.h src/snapshot.h 52 | src/halo.o: src/halo_particle_iterator.h src/subhalo.h src/hash_remote.tpp 53 | src/halo.o: src/hash.h src/hash.tpp 54 | src/subhalo_merge.o: src/datatypes.h src/snapshot_number.h 55 | src/subhalo_merge.o: src/config_parser.h src/subhalo.h 56 | src/mpi_wrapper.o: src/mpi_wrapper.h 57 | src/snapshot_exchanger.o: src/snapshot.h src/mymath.h src/datatypes.h 58 | src/snapshot_exchanger.o: src/mpi_wrapper.h 59 | src/geometric_tree.o: src/mymath.h src/datatypes.h src/config_parser.h 60 | src/geometric_tree.o: src/geometric_tree.h src/oct_tree.h src/snapshot.h 61 | src/geometric_tree.o: src/oct_tree.tpp 62 | src/subhalo.o: src/datatypes.h src/snapshot_number.h src/config_parser.h 63 | src/subhalo.o: src/subhalo.h src/particle_exchanger.h src/mymath.h 64 | src/subhalo.o: src/mpi_wrapper.h src/snapshot.h src/halo_particle_iterator.h 65 | src/subhalo.o: src/hash_remote.tpp src/hash.h src/hash.tpp 66 | src/subhalo_tracking.o: src/datatypes.h src/snapshot_number.h 67 | src/subhalo_tracking.o: src/config_parser.h src/subhalo.h 68 | src/gravity_tree.o: src/mymath.h src/datatypes.h src/config_parser.h 69 | src/gravity_tree.o: src/gravity_tree.h src/oct_tree.h src/snapshot.h 70 | src/gravity_tree.o: src/oct_tree.tpp 71 | src/mymath.o: src/mymath.h src/datatypes.h 72 | src/linkedlist_base.o: src/mymath.h src/datatypes.h src/linkedlist_base.h 73 | src/linkedlist_base.o: src/snapshot.h 74 | src/config_parser.o: src/config_parser.h 75 | src/io/snapshot_io.o: src/mpi_wrapper.h src/datatypes.h src/mymath.h 76 | src/io/snapshot_io.o: src/snapshot.h src/config_parser.h 77 | src/io/snapshot_io.o: src/snapshot_number.h src/hash.h src/hash.tpp 78 | src/io/snapshot_io.o: src/mpi_wrapper.h src/mymath.h src/io/gadget_io.h 79 | src/io/snapshot_io.o: src/io/apostle_io.h src/hdf_wrapper.h src/halo.h 80 | src/io/snapshot_io.o: src/snapshot.h 81 | src/io/gadget_io.o: src/snapshot.h src/datatypes.h src/mymath.h 82 | src/io/gadget_io.o: src/config_parser.h src/snapshot_number.h src/hash.h 83 | src/io/gadget_io.o: src/hash.tpp src/mpi_wrapper.h src/mymath.h 84 | src/io/gadget_io.o: src/io/gadget_io.h src/mpi_wrapper.h 85 | src/io/apostle_io.o: src/snapshot.h src/datatypes.h src/mymath.h 86 | src/io/apostle_io.o: src/config_parser.h src/snapshot_number.h src/hash.h 87 | src/io/apostle_io.o: src/hash.tpp src/mpi_wrapper.h src/mymath.h 88 | src/io/apostle_io.o: src/hdf_wrapper.h src/io/apostle_io.h src/halo.h 89 | src/io/apostle_io.o: src/snapshot.h src/mpi_wrapper.h 90 | src/io/apostle_io.o: src/halo_particle_iterator.h 91 | src/io/subhalo_io.o: src/mpi_wrapper.h src/datatypes.h src/mymath.h 92 | src/io/subhalo_io.o: src/datatypes.h src/snapshot_number.h 93 | src/io/subhalo_io.o: src/config_parser.h src/subhalo.h src/snapshot_number.h 94 | src/io/subhalo_io.o: src/halo.h src/hdf_wrapper.h 95 | src/io/gadget_group_io.o: src/mymath.h src/halo.h src/datatypes.h 96 | src/io/gadget_group_io.o: src/snapshot_number.h src/config_parser.h 97 | src/io/gadget_group_io.o: src/snapshot.h src/io/gadget_group_io.h 98 | src/io/gadget_group_io.o: src/mpi_wrapper.h src/mymath.h 99 | src/io/halo_io.o: src/mymath.h src/halo.h src/datatypes.h 100 | src/io/halo_io.o: src/snapshot_number.h src/config_parser.h src/snapshot.h 101 | src/io/halo_io.o: src/io/gadget_group_io.h src/mpi_wrapper.h src/mymath.h 102 | src/io/halo_io.o: src/io/apostle_io.h src/hdf_wrapper.h 103 | -------------------------------------------------------------------------------- /src/datatypes.h: -------------------------------------------------------------------------------- 1 | #ifndef DATATYPES_INCLUDED 2 | 3 | #include 4 | #include 5 | #include 6 | using namespace std; 7 | #include 8 | #include 9 | // #include 10 | #ifdef DM_ONLY 11 | #undef UNBIND_WITH_THERMAL_ENERGY 12 | #undef HAS_THERMAL_ENERGY 13 | #endif 14 | 15 | #ifdef UNBIND_WITH_THERMAL_ENERGY 16 | #ifndef HAS_THERMAL_ENERGY 17 | #define HAS_THERMAL_ENERGY 18 | #endif 19 | #endif 20 | 21 | /*datatype for input particle data*/ 22 | #ifdef INPUT_REAL8 23 | typedef double IDatReal; 24 | #else 25 | typedef float IDatReal; 26 | #endif 27 | 28 | /*datatype for input particle IDs*/ 29 | #ifdef INPUT_INT8 30 | typedef long IDatInt; 31 | #else 32 | #ifdef INPUT_UINT4 33 | typedef unsigned IDatInt; 34 | #else 35 | typedef int IDatInt; 36 | #endif 37 | #endif 38 | 39 | /*datatype for internal calculation and output*/ 40 | #ifdef HBT_REAL8 41 | typedef double HBTReal; 42 | #define MPI_HBT_REAL MPI_DOUBLE 43 | #else 44 | typedef float HBTReal; 45 | #define MPI_HBT_REAL MPI_FLOAT 46 | #endif 47 | 48 | // the user should ganrantee that HBTInt can at least hold NP_DM 49 | #ifdef HBT_INT8 50 | typedef long HBTInt; 51 | #define HBTIFMT "%ld" 52 | #define MPI_HBT_INT MPI_LONG 53 | #else 54 | typedef int HBTInt; 55 | #define HBTIFMT "%d" 56 | #define MPI_HBT_INT MPI_INT 57 | #endif 58 | 59 | // typedef HBTReal HBTxyz[3]; //3-d pos/vel data 60 | /*inline void copyHBTxyz(HBTxyz & dest, const HBTxyz & src) 61 | { 62 | memcpy(dest, src, sizeof(HBTxyz)); 63 | }*/ 64 | typedef array HBTxyz; 65 | inline void copyHBTxyz(HBTxyz &dest, const HBTxyz &src) 66 | { 67 | /*copy for std:arr implementation*/ 68 | dest=src; 69 | } 70 | template 71 | inline void copyHBTxyz(HBTxyz &dest, const T src[3]) 72 | { 73 | dest[0]=src[0]; 74 | dest[1]=src[1]; 75 | dest[2]=src[2]; 76 | } 77 | template 78 | inline void copyXYZ(T1 & dest, const T2 src) 79 | { 80 | dest[0]=src[0]; 81 | dest[1]=src[1]; 82 | dest[2]=src[2]; 83 | } 84 | 85 | namespace SpecialConst 86 | { 87 | const HBTInt NullParticleId=-1;//reserved special id, should not be used by input simulation data 88 | const HBTInt NullSnapshotId=-1; 89 | const HBTInt NullHaloId=-1;//do not change this. 90 | const HBTInt NullSubhaloId=-1; 91 | const HBTInt NullTrackId=-1; 92 | 93 | const HBTxyz NullCoordinate={0.,0.,0.}; 94 | // const Particle_t NullParticle(NullParticleId, NullParticleId, NullCoordinate, NullCoordinate); 95 | }; 96 | 97 | struct IdRank_t 98 | { 99 | HBTInt Id; 100 | int Rank; 101 | IdRank_t(){}; 102 | IdRank_t(HBTInt id, int rank): Id(id), Rank(rank) 103 | { 104 | } 105 | }; 106 | #ifdef HBT_INT8 107 | #define MPI_HBTRankPair MPI_LONG_INT 108 | #else 109 | #define MPI_HBTRankPair MPI_2INT 110 | #endif 111 | inline bool CompareRank(const IdRank_t &a, const IdRank_t &b) 112 | { 113 | return (a.Rank 117 | class VectorView_t 118 | /* similar to vector, but never actively manage memory; only bind to existing memory*/ 119 | { 120 | public: 121 | typedef T * iterator; 122 | HBTInt N; 123 | T * Data; //this is only copied. never allocated by itself. 124 | VectorView_t(): N(0), Data(nullptr) 125 | { 126 | } 127 | VectorView_t(const HBTInt n, T * const data): N(n), Data(data) 128 | { 129 | } 130 | void Bind(const HBTInt n, T * const data) 131 | { 132 | N=n; 133 | Data=data; 134 | } 135 | void Bind(T * const data) 136 | { 137 | Data=data; 138 | } 139 | void ReBind(const HBTInt n) 140 | { 141 | N=n; 142 | } 143 | void IncrementBind() 144 | { 145 | N++; 146 | } 147 | T * data() const 148 | { 149 | return Data; 150 | } 151 | T & operator [](const HBTInt index) const 152 | { 153 | return Data[index]; 154 | } 155 | HBTInt size() const 156 | { 157 | return N; 158 | } 159 | void PushBack(T x) 160 | /*memory is never reallocated*/ 161 | { 162 | Data[N]=x; 163 | N++; 164 | } 165 | T * begin() 166 | { 167 | return Data; 168 | } 169 | T* end() 170 | { 171 | return Data+N; 172 | } 173 | T & back() 174 | { 175 | return Data[N-1]; 176 | } 177 | }; 178 | 179 | enum ParticleType_t:int 180 | { 181 | TypeGas=0, 182 | TypeDM, 183 | TypeDisk, 184 | TypeBulge , 185 | TypeStar, 186 | TypeBndry, 187 | TypeMax 188 | }; 189 | 190 | struct LocatedParticle_t 191 | { 192 | HBTInt index; 193 | HBTReal d2; //distance**2 194 | LocatedParticle_t(){}; 195 | LocatedParticle_t(HBTInt index, HBTReal d2):index(index),d2(d2) 196 | {} 197 | }; 198 | inline bool CompLocatedDistance(const LocatedParticle_t &a, const LocatedParticle_t &b) 199 | { 200 | return a.d2 Founds; 212 | LocatedParticleCollector_t(HBTInt n_reserve=0): Founds() 213 | { 214 | Founds.reserve(n_reserve); 215 | } 216 | void Collect(HBTInt index, HBTReal d2) 217 | { 218 | Founds.emplace_back(index, d2); 219 | } 220 | void Clear() 221 | { 222 | Founds.clear(); 223 | } 224 | }; 225 | class NearestNeighbourCollector_t: public ParticleCollector_t 226 | //a collector for nearest neighbour search. keeps the nearest neighbour particle. 227 | { 228 | public: 229 | HBTInt Index; 230 | HBTReal D2;//distance squared 231 | NearestNeighbourCollector_t():Index(0), D2(-1) 232 | {} 233 | void Collect(HBTInt index, HBTReal d2) 234 | { 235 | if(d2 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "datatypes.h" 11 | #include "mpi_wrapper.h" 12 | 13 | #define HBT_VERSION "1.16.2.MPI" 14 | 15 | namespace PhysicalConst 16 | {//initialized after reading parameter file. 17 | extern HBTReal G; 18 | extern HBTReal H0; 19 | } 20 | 21 | #define NumberOfCompulsaryConfigEntries 7 22 | class Parameter_t 23 | {/*!remember to register members in BroadCast() and SetParameterValue() functions if you change them!*/ 24 | public: 25 | //remember to update SetParameterValue() and DumpParameters() accordingly if you change any parameter definition. 26 | /*compulsory parameters*/ 27 | string SnapshotPath; 28 | string HaloPath; 29 | string SubhaloPath; 30 | string SnapshotFileBase; 31 | int MaxSnapshotIndex; 32 | HBTReal BoxSize; //to check the unit of snapshot according to the BoxSize in header 33 | HBTReal SofteningHalo; 34 | vector IsSet; 35 | 36 | /*optional*/ 37 | string SnapshotFormat; 38 | string GroupFileFormat; 39 | int MaxConcurrentIO; 40 | int MinSnapshotIndex; 41 | int MinNumPartOfSub; 42 | long GroupParticleIdMask; //only used for a peculiar gadget format. 43 | HBTReal MassInMsunh; 44 | HBTReal LengthInMpch; 45 | HBTReal VelInKmS; 46 | bool PeriodicBoundaryOn; 47 | bool SnapshotHasIdBlock;//set to False when your snapshot is sorted according to particle id so that no id block is present. 48 | // bool SnapshotNoMassBlock;//to disable checking for presence of mass block, even if some header.mass==0. 49 | bool ParticleIdRankStyle;//performance related; load particleId as id ranks. not implemented yet. 50 | bool ParticleIdNeedHash;//performance related; disabled if ParticleIdRankStyle is true 51 | bool SnapshotIdUnsigned; 52 | bool SaveSubParticleProperties; 53 | bool MergeTrappedSubhalos;//whether to MergeTrappedSubhalos, see code paper for more info. 54 | vector SnapshotIdList; 55 | vector SnapshotNameList; 56 | 57 | HBTReal MajorProgenitorMassRatio; 58 | HBTReal BoundMassPrecision; 59 | HBTReal SourceSubRelaxFactor; 60 | HBTReal SubCoreSizeFactor; //coresize=Nbound*CoreSizeFactor, to get center coordinates for the KineticDistance test. 61 | HBTInt SubCoreSizeMin; //Minimum coresize 62 | 63 | HBTReal TreeAllocFactor; 64 | HBTReal TreeNodeOpenAngle; 65 | HBTInt TreeMinNumOfCells; 66 | 67 | HBTInt MaxSampleSizeOfPotentialEstimate; 68 | bool RefineMostboundParticle; //whether to further improve mostbound particle accuracy in case a MaxSampleSizeOfPotentialEstimate is used. this introduces some overhead if true, but leads to more accuracy mostbound particle 69 | 70 | /*derived parameters; do not require user input*/ 71 | HBTReal TreeNodeOpenAngleSquare; 72 | HBTReal TreeNodeResolution; 73 | HBTReal TreeNodeResolutionHalf; 74 | HBTReal BoxHalf; 75 | bool GroupLoadedFullParticle;//whether group particles are loaded with full particle properties or just ids. 76 | 77 | Parameter_t(): IsSet(NumberOfCompulsaryConfigEntries, false),SnapshotIdList(), SnapshotNameList() 78 | { 79 | SnapshotFormat="gadget"; //see example config file for alternative formats 80 | GroupFileFormat="gadget3_int"; 81 | MaxConcurrentIO=10; 82 | MinSnapshotIndex=0; 83 | MinNumPartOfSub=20; 84 | GroupParticleIdMask=0; 85 | MassInMsunh=1e10; 86 | LengthInMpch=1; 87 | VelInKmS=1.; 88 | PeriodicBoundaryOn=true; 89 | SnapshotHasIdBlock=true; 90 | ParticleIdRankStyle=false;//to be removed 91 | ParticleIdNeedHash=true; 92 | SnapshotIdUnsigned=false; 93 | SaveSubParticleProperties=false; 94 | #ifdef NO_STRIPPING 95 | MergeTrappedSubhalos=false; 96 | #else 97 | MergeTrappedSubhalos=true; 98 | #endif 99 | MajorProgenitorMassRatio=0.8; 100 | BoundMassPrecision=0.995; 101 | SourceSubRelaxFactor=3.; 102 | SubCoreSizeFactor=0.25; 103 | SubCoreSizeMin=20; 104 | TreeAllocFactor=0.8; /* a value of 2 should be more than sufficient*/ 105 | TreeNodeOpenAngle=0.45; 106 | TreeMinNumOfCells=10; 107 | MaxSampleSizeOfPotentialEstimate=1000;//set to 0 to disable sampling 108 | RefineMostboundParticle=true; 109 | GroupLoadedFullParticle=false; 110 | } 111 | void ReadSnapshotNameList(); 112 | void ParseConfigFile(const char * param_file); 113 | void SetParameterValue(const string &line); 114 | void CheckUnsetParameters(); 115 | void BroadCast(MpiWorker_t &world, int root); 116 | void BroadCast(MpiWorker_t &world, int root, int &snapshot_start, int &snapshot_end) 117 | { 118 | BroadCast(world, root); 119 | world.SyncAtom(snapshot_start, MPI_INT, root); 120 | world.SyncAtom(snapshot_end, MPI_INT, root); 121 | } 122 | void DumpParameters(); 123 | }; 124 | 125 | extern Parameter_t HBTConfig; 126 | extern void ParseHBTParams(int argc, char **argv, Parameter_t &config, int &snapshot_start, int &snapshot_end); 127 | inline void trim_leading_garbage(string &s, const string &garbage_list) 128 | { 129 | int pos= s.find_first_not_of(garbage_list);//look for any good staff 130 | if( string::npos!=pos) 131 | s.erase(0, pos);//s=s.substr(pos); 132 | else //no good staff, clear everything 133 | s.clear(); 134 | } 135 | inline void trim_trailing_garbage(string &s, const string &garbage_list) 136 | { 137 | int pos=s.find_first_of(garbage_list); 138 | if(string::npos!=pos) 139 | s.erase(pos); 140 | } 141 | 142 | #define NEAREST(x) (((x)>HBTConfig.BoxHalf)?((x)-HBTConfig.BoxSize):(((x)<-HBTConfig.BoxHalf)?((x)+HBTConfig.BoxSize):(x))) 143 | inline HBTReal PeriodicDistance(const HBTxyz &x, const HBTxyz &y) 144 | { 145 | HBTxyz dx; 146 | dx[0]=x[0]-y[0]; 147 | dx[1]=x[1]-y[1]; 148 | dx[2]=x[2]-y[2]; 149 | if(HBTConfig.PeriodicBoundaryOn) 150 | { 151 | dx[0]=NEAREST(dx[0]); 152 | dx[1]=NEAREST(dx[1]); 153 | dx[2]=NEAREST(dx[2]); 154 | } 155 | return sqrt(dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2]); 156 | } 157 | #endif 158 | -------------------------------------------------------------------------------- /src/snapshot_exchanger.cpp: -------------------------------------------------------------------------------- 1 | using namespace std; 2 | #include 3 | // #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "snapshot.h" 13 | #include "mymath.h" 14 | #include "mpi_wrapper.h" 15 | 16 | inline int GetGrid(HBTReal x, HBTReal step, int dim) 17 | { 18 | int i=floor(x/step); 19 | if(i<0) i=0; 20 | if(i>=dim) i=dim-1; 21 | return i; 22 | } 23 | inline int AssignCell(HBTxyz & Pos, const HBTReal step[3], const vector &dims) 24 | { 25 | #define GRIDtoRank(g0,g1,g2) (((g0)*dims[1]+(g1))*dims[2]+(g2)) 26 | #define GID(i) GetGrid(Pos[i], step[i], dims[i]) 27 | return GRIDtoRank(GID(0), GID(1), GID(2)); 28 | } 29 | 30 | void ParallelStride(MpiWorker_t &world, vector &Particles, HBTInt &offset, HBTInt steps) 31 | { 32 | while(steps) 33 | { 34 | HBTInt nmax=Particles.size()-offset; 35 | MPI_Comm newcomm; 36 | MPI_Comm_split(world.Communicator, nmax==0, 0, &newcomm); 37 | int newcomm_size; 38 | MPI_Comm_size(newcomm, &newcomm_size); 39 | 40 | HBTInt n=0; 41 | if(nmax) 42 | { 43 | n=steps/newcomm_size; 44 | if(n*newcomm_sizenmax) n=nmax; 46 | 47 | HBTInt pid=Particles[offset+n-1].Id; 48 | HBTInt MinId; 49 | MPI_Allreduce(&pid, &MinId, 1, MPI_HBT_INT, MPI_MIN, newcomm); 50 | 51 | if(pid>MinId)//fastforward sparse ranks 52 | { 53 | n=offset; 54 | while(Particles[offset].Id &offset) 107 | { 108 | int nremainder=NumberOfParticlesOnAllNodes%world.size(); 109 | HBTInt nnew=NumberOfParticlesOnAllNodes/world.size()+1; 110 | 111 | HBTInt GlobalIdMin; 112 | if(IsContiguousId(world, GlobalIdMin)) 113 | { 114 | HBTInt & upperbound=GlobalIdMin; 115 | int rank=0, pid=0; 116 | while(pid SendOffsets(world.size()+1), SendSizes(world.size(), 0); 161 | PartitionParticles(world, SendOffsets); 162 | SendOffsets.back()=Particles.size(); 163 | for(int i=0;i ReceiveSizes(world.size(),0), ReceiveOffsets(world.size()); 167 | MPI_Alltoall(SendSizes.data(), 1, MPI_INT, ReceiveSizes.data(), 1, MPI_INT, world.Communicator); 168 | vector ReceivedParticles; 169 | ReceivedParticles.resize(CompileOffsets(ReceiveSizes, ReceiveOffsets)); 170 | 171 | MPI_Datatype MPI_HBT_Particle; 172 | Particle_t().create_MPI_type(MPI_HBT_Particle); 173 | MPI_Alltoallv(Particles.data(), SendSizes.data(), SendOffsets.data(), MPI_HBT_Particle, 174 | ReceivedParticles.data(), ReceiveSizes.data(), ReceiveOffsets.data(), MPI_HBT_Particle, world.Communicator); 175 | 176 | MPI_Type_free(&MPI_HBT_Particle); 177 | 178 | Particles.swap(ReceivedParticles); 179 | 180 | sort(Particles.begin(), Particles.end(), CompParticleId); 181 | IdMin=Particles.front().Id; 182 | IdMax=Particles.back().Id; 183 | ProcessIdRanges.resize(world.size()+1); 184 | MPI_Allgather(&IdMin, 1, MPI_HBT_INT, ProcessIdRanges.data(), 1, MPI_HBT_INT, world.Communicator); 185 | ProcessIdRanges.back()=IdMax+1; 186 | MPI_Bcast(&ProcessIdRanges.back(), 1, MPI_HBT_INT, world.size()-1, world.Communicator); 187 | 188 | // cout< 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mymath.h" 7 | #include "config_parser.h" 8 | #include "geometric_tree.h" 9 | 10 | inline void shift_center(const double oldcenter[3], int son, double delta, double newcenter[3]) 11 | { 12 | for(int dim=0;dim<3;dim++) 13 | { 14 | int bit=get_bit(son, dim); 15 | if(bit) 16 | newcenter[dim]=oldcenter[dim]+delta; 17 | else 18 | newcenter[dim]=oldcenter[dim]-delta; 19 | } 20 | } 21 | 22 | void GeoTree_t::ProcessNode(HBTInt nodeid, HBTInt nextid, int sonid, HBTInt &mass, double len, const double center[3]) 23 | { 24 | if(nodeid=HBTConfig.TreeNodeResolution)//only divide if above resolution; 32 | { 33 | double newcenter[3]; 34 | shift_center(center, sonid, len/4., newcenter); 35 | UpdateInternalNodes(nodeid, nextid, len/2., newcenter); 36 | } 37 | else 38 | UpdateInternalNodes(nodeid, nextid, len, center);//otherwise we don't divide the node seriouly so we don't have finer node length 39 | 40 | mass+=Nodes[nodeid].way.mass;//update mass after updating internal nodes 41 | } 42 | } 43 | 44 | inline void GeoTree_t::FillNodeCenter(HBTInt nodeid, const double center[3]) 45 | { 46 | copyXYZ(Nodes[nodeid].way.s, center); 47 | } 48 | 49 | void GeoTree_t::UpdateInternalNodes(HBTInt no, HBTInt sib, double len, const double center[3]) 50 | { 51 | HBTInt p,pp,sons[8]; 52 | int j,jj,i; 53 | HBTInt mass=0; 54 | 55 | for(j=0;j<8;j++) 56 | sons[j]=Nodes[no].sons[j];//backup sons 57 | Nodes[no].way.len=len; 58 | Nodes[no].way.sibling=sib; 59 | for(i=0;sons[i]<0;i++);//find first son 60 | jj=i; 61 | pp=sons[jj]; 62 | Nodes[no].way.nextnode=pp; 63 | for(i++;i<8;i++)//find sons in pairs,ie. find sibling 64 | { 65 | if(sons[i]>=0)//ok, found a sibling 66 | { 67 | j=jj; 68 | p=pp; 69 | jj=i; 70 | pp=sons[jj]; 71 | ProcessNode(p, pp, j, mass, len, center); 72 | } 73 | } 74 | ProcessNode(pp, sib, jj, mass, len, center); 75 | Nodes[no].way.mass=mass; 76 | FillNodeCenter(no, center); 77 | } 78 | 79 | HBTInt GeoTree_t::NearestNeighbour(const HBTxyz & cen, HBTReal rguess) 80 | //return the particle_index of the nearest neighbour 81 | { 82 | NearestNeighbourCollector_t collector; 83 | Search(cen, rguess, collector); 84 | while(collector.IsEmpty()) //WARNING: dead loop if tree is empty. 85 | { 86 | rguess *= 1.26;//double the guess volume 87 | Search(cen, rguess, collector); 88 | } 89 | return collector.Index; 90 | } 91 | 92 | void GeoTree_t::Search(const HBTxyz & searchcenter, HBTReal radius, ParticleCollector_t &collector) 93 | {/*find a list of particles from the tree, located within radius around searchcenter, 94 | * and process the particles with collector */ 95 | bool IsPeriodic=HBTConfig.PeriodicBoundaryOn; 96 | double x0=searchcenter[0], y0=searchcenter[1], z0=searchcenter[2]; 97 | double h2 = radius * radius; 98 | 99 | HBTInt numngb = 0; 100 | HBTInt node_id = RootNodeId; 101 | 102 | while(node_id >= 0) 103 | { 104 | if(node_id < RootNodeId) /* single particle */ 105 | { 106 | HBTInt pid=node_id; 107 | node_id = NextnodeFromParticle[node_id]; 108 | 109 | auto &pos=Snapshot->GetComovingPosition(pid); 110 | double dx = pos[0] - x0; 111 | if(IsPeriodic) dx=NEAREST(dx); 112 | if(dx > radius || dx < -radius) 113 | continue; 114 | 115 | double dy = pos[1] - y0; 116 | if(IsPeriodic) dy=NEAREST(dy); 117 | if(dy > radius || dy < -radius) 118 | continue; 119 | 120 | double dz = pos[2] - z0; 121 | if(IsPeriodic) dz=NEAREST(dz); 122 | if(dz > radius || dz < -radius) 123 | continue; 124 | 125 | double r2 = dx * dx + dy * dy + dz * dz; 126 | 127 | if(r2 < h2) 128 | collector.Collect(pid, r2); 129 | } 130 | else 131 | { 132 | auto &node = Nodes[node_id]; 133 | 134 | node_id = node.way.sibling; /* in case the node can be discarded */ 135 | double rmax=node.way.len/2.; 136 | rmax+=radius; 137 | 138 | auto &pos=node.way.s; 139 | double dx = pos[0] - x0; 140 | if(IsPeriodic) dx=NEAREST(dx); 141 | if(dx > rmax || dx < -rmax) 142 | continue; 143 | 144 | double dy = pos[1] - y0; 145 | if(IsPeriodic) dy=NEAREST(dy); 146 | if(dy > rmax || dy < -rmax) 147 | continue; 148 | 149 | double dz = pos[2] - z0; 150 | if(IsPeriodic) dz=NEAREST(dz); 151 | if(dz > rmax || dz < -rmax) 152 | continue; 153 | 154 | node_id = node.way.nextnode; /* ok, we need to open the node */ 155 | } 156 | } 157 | } 158 | 159 | double GeoTree_t::SphDensity(const HBTxyz &cen, HBTReal & hguess) 160 | { 161 | LocatedParticleCollector_t collector(NumNeighbourSPH*2); 162 | vector &founds=collector.Founds; 163 | Search(cen, hguess, collector); 164 | int numngb=founds.size(); 165 | while(numngbd2); 180 | // h=sqrtf(h); 181 | hguess=h*1.01; 182 | double hinv3 = 1.0 / (h * h * h); 183 | 184 | double rho=0.; 185 | for(auto it=founds.begin(); it <=pivot_particle; ++it) 186 | { 187 | double r = sqrt(it->d2); 188 | double u = r / h, wk; 189 | 190 | if(u < 0.5) 191 | wk = hinv3 * (2.546479089470 + 15.278874536822 * (u - 1) * u * u); 192 | else 193 | wk = hinv3 * 5.092958178941 * (1.0 - u) * (1.0 - u) * (1.0 - u); 194 | 195 | rho += wk; 196 | } 197 | return rho; 198 | } 199 | 200 | template class OctTree_t;//to wake up the functions for this type; trick! 201 | -------------------------------------------------------------------------------- /src/test/test_h5subset.cpp: -------------------------------------------------------------------------------- 1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 2 | * Copyright by The HDF Group. * 3 | * Copyright by the Board of Trustees of the University of Illinois. * 4 | * All rights reserved. * 5 | * * 6 | * This file is part of HDF5. The full HDF5 copyright notice, including * 7 | * terms governing use, modification, and redistribution, is contained in * 8 | * the files COPYING and Copyright.html. COPYING can be found at the root * 9 | * of the source code distribution tree; Copyright.html can be found at the * 10 | * root level of an installed copy of the electronic HDF5 document set and * 11 | * is linked from the top-level documents page. It can also be found at * 12 | * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * 13 | * access to either file, you may request a copy from help@hdfgroup.org. * 14 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 15 | 16 | /* 17 | * This example illustrates how to read/write a subset of data (a slab) 18 | * from/to a dataset in an HDF5 file. It is used in the HDF5 Tutorial. 19 | */ 20 | 21 | #include 22 | #include 23 | 24 | #include "H5Cpp.h" 25 | 26 | #ifndef H5_NO_NAMESPACE 27 | using namespace H5; 28 | #ifndef H5_NO_STD 29 | using std::cout; 30 | using std::endl; 31 | #endif // H5_NO_STD 32 | #endif 33 | 34 | const H5std_string FILE_NAME("h5tutr_subset.h5"); 35 | const H5std_string DATASET_NAME("IntArray"); 36 | 37 | const int RANK = 2; 38 | const int DIM0_SUB = 3; // subset dimensions 39 | const int DIM1_SUB = 4; 40 | const int DIM0 = 8; // size of dataset 41 | const int DIM1 = 10; 42 | 43 | int main (void) 44 | { 45 | int i,j; 46 | int data[DIM0][DIM1], sdata[DIM0_SUB][DIM1_SUB], rdata[DIM0][DIM1]; 47 | 48 | // Try block to detect exceptions raised by any of the calls inside it 49 | try 50 | { 51 | // Turn off the auto-printing when failure occurs so that we can 52 | // handle the errors appropriately 53 | // Exception::dontPrint(); 54 | 55 | // --------------------------------------------------- 56 | // Create a new file using the default property lists. 57 | // Then create a dataset and write data to it. 58 | // Close the file and dataset. 59 | // --------------------------------------------------- 60 | 61 | H5File file(FILE_NAME, H5F_ACC_TRUNC); 62 | 63 | hsize_t dims[2]; 64 | dims[0] = DIM0; 65 | dims[1] = DIM1; 66 | DataSpace dataspace = DataSpace (RANK, dims); 67 | 68 | DataSet dataset(file.createDataSet( DATASET_NAME, 69 | PredType::STD_I32BE, dataspace) ); 70 | 71 | 72 | for (j = 0; j < DIM0; j++) { 73 | for (i = 0; i < DIM1; i++) 74 | data[j][i]=j*10+i; 75 | } 76 | 77 | dataset.write(data, PredType::NATIVE_INT); 78 | 79 | cout << endl << "Data Written to File:" << endl; 80 | for (j = 0; j < DIM0; j++) { 81 | for (i = 0; i < DIM1; i++) 82 | cout << " " << data[j][i]; 83 | cout << endl; 84 | } 85 | 86 | dataspace.close(); 87 | dataset.close(); 88 | file.close(); 89 | 90 | // --------------------------------------------------- 91 | // Reopen the file and dataset and write a subset of 92 | // values to the dataset. 93 | // --------------------------------------------------- 94 | 95 | hsize_t offset[2], count[2], stride[2], block[2]; 96 | hsize_t dimsm[2]; 97 | 98 | file.openFile(FILE_NAME, H5F_ACC_RDWR); 99 | dataset = file.openDataSet(DATASET_NAME); 100 | 101 | // Specify size and shape of subset to write. 102 | 103 | offset[0] = 1; 104 | offset[1] = 2; 105 | 106 | count[0] = 3; 107 | count[1] = 2; 108 | 109 | stride[0] = 2; 110 | stride[1] = 3; 111 | 112 | block[0] = 1; 113 | block[1] = 2; 114 | 115 | // Define Memory Dataspace. Get file dataspace and select 116 | // a subset from the file dataspace. 117 | 118 | dimsm[0] = DIM0_SUB; 119 | dimsm[1] = DIM1_SUB; 120 | 121 | // DataSpace memspace(RANK, dimsm, NULL); 122 | hsize_t dimtmp=DIM0_SUB*DIM1_SUB; 123 | DataSpace memspace(1, &dimtmp, NULL); 124 | 125 | dataspace = dataset.getSpace();//this is probably a copy rather than a reference, so dataspace now has nothing to do with dataset 126 | dataspace.selectHyperslab(H5S_SELECT_SET, count, offset, stride, block); 127 | 128 | // Write a subset of data to the dataset, then read the 129 | // entire dataset back from the file. 130 | 131 | cout << endl << "Write subset to file specifying: " << endl; 132 | #define PRT(x) x[0]<<"x"< 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mymath.h" 7 | #include "config_parser.h" 8 | #include "gravity_tree.h" 9 | 10 | template 11 | inline void VectorAdd(double x[3], const T &y, double weight) 12 | { 13 | x[0]+=y[0]*weight; 14 | x[1]+=y[1]*weight; 15 | x[2]+=y[2]*weight; 16 | } 17 | 18 | inline void GravityTree_t::FillNodeCenter(HBTInt nodeid, const double center[3], double CoM[3], double mass) 19 | { 20 | Nodes[nodeid].way.s[0]=CoM[0]/mass; 21 | Nodes[nodeid].way.s[1]=CoM[1]/mass; 22 | Nodes[nodeid].way.s[2]=CoM[2]/mass; 23 | } 24 | 25 | void GravityTree_t::ProcessNode(HBTInt nodeid, HBTInt nextid, int sonid, double &mass, double CoM[3], double len, const double center[3]) 26 | { 27 | if(nodeidGetMass(nodeid); 30 | mass+=thismass; 31 | VectorAdd(CoM, Snapshot->GetComovingPosition(nodeid), thismass); 32 | 33 | NextnodeFromParticle[nodeid]=nextid; 34 | } 35 | else 36 | { 37 | if(len>=HBTConfig.TreeNodeResolution)//only divide if above resolution; 38 | UpdateInternalNodes(nodeid, nextid, len/2., center); 39 | else 40 | UpdateInternalNodes(nodeid, nextid, len, center);//otherwise we don't divide the node seriouly so we don't have finer node length 41 | 42 | double thismass=Nodes[nodeid].way.mass;//get mass after updating internal nodes 43 | mass+=thismass; 44 | VectorAdd(CoM, Nodes[nodeid].way.s, thismass); 45 | } 46 | } 47 | 48 | void GravityTree_t::UpdateInternalNodes(HBTInt no, HBTInt sib, double len, const double center[3]) 49 | { 50 | HBTInt p,pp,sons[8]; 51 | int j,jj,i; 52 | double mass=0., thismass; 53 | double CoM[3]={0.}; 54 | 55 | for(j=0;j<8;j++) 56 | sons[j]=Nodes[no].sons[j];//backup sons 57 | Nodes[no].way.len=len; 58 | Nodes[no].way.sibling=sib; 59 | for(i=0;sons[i]<0;i++);//find first son 60 | jj=i; 61 | pp=sons[jj]; 62 | Nodes[no].way.nextnode=pp; 63 | for(i++;i<8;i++)//find sons in pairs,ie. find sibling 64 | { 65 | if(sons[i]>=0)//ok, found a sibling 66 | { 67 | j=jj; 68 | p=pp; 69 | jj=i; 70 | pp=sons[jj]; 71 | ProcessNode(p, pp, j, mass, CoM, len, center); 72 | } 73 | } 74 | ProcessNode(pp, sib, jj, mass, CoM, len, center); 75 | Nodes[no].way.mass=mass; 76 | FillNodeCenter(no, center, CoM, mass); 77 | } 78 | 79 | double GravityTree_t::EvaluatePotential(const HBTxyz &targetPos, const HBTReal targetMass) 80 | /*return specific physical potential, GM/Rphysical. 81 | * targetPos[] is comoving. 82 | * if targetMass!=0, then the self-potential from targetMass is excluded. 83 | * do not set targetMass (i.e., keep to 0.) if target is outside the particlelist of tree*/ 84 | { 85 | bool IsPeriodic=HBTConfig.PeriodicBoundaryOn; 86 | OctTreeCell_t *nop = 0; 87 | HBTInt no; 88 | double r2, dx, dy, dz, mass, r, u, h, h_inv, wp; 89 | double pot, pos_x, pos_y, pos_z; 90 | 91 | pos_x = targetPos[0]; 92 | pos_y = targetPos[1]; 93 | pos_z = targetPos[2]; 94 | 95 | h = 2.8 * HBTConfig.SofteningHalo; 96 | h_inv = 1.0 / h; 97 | 98 | pot=targetMass/HBTConfig.SofteningHalo; //to cancle out the self-potential added during tree walk. 99 | 100 | no = NumberOfParticles;//start from root node 101 | 102 | while(no >= 0) 103 | { 104 | if(no < NumberOfParticles) /* single particle */ 105 | { 106 | auto &pos=Snapshot->GetComovingPosition(no); 107 | dx = pos[0] - pos_x; 108 | dy = pos[1] - pos_y; 109 | dz = pos[2] - pos_z; 110 | if(IsPeriodic) 111 | { 112 | dx=NEAREST(dx); 113 | dy=NEAREST(dy); 114 | dz=NEAREST(dz); 115 | } 116 | mass = Snapshot->GetMass(no); 117 | no = NextnodeFromParticle[no]; 118 | r2 = dx * dx + dy * dy + dz * dz; 119 | } 120 | else 121 | { 122 | nop = &Nodes[no]; 123 | dx = nop->way.s[0] - pos_x; 124 | dy = nop->way.s[1] - pos_y; 125 | dz = nop->way.s[2] - pos_z; 126 | if(IsPeriodic) 127 | { 128 | dx=NEAREST(dx); 129 | dy=NEAREST(dy); 130 | dz=NEAREST(dz); 131 | } 132 | mass = nop->way.mass; 133 | r2 = dx * dx + dy * dy + dz * dz; 134 | /* we have an internal node. Need to check opening criterion */ 135 | if((nop->way.len * nop->way.len )>( r2 * HBTConfig.TreeNodeOpenAngleSquare)) 136 | { 137 | /* open cell */ 138 | no = nop->way.nextnode; 139 | continue; 140 | } 141 | no = nop->way.sibling; /* node can be used */ 142 | } 143 | 144 | r = sqrt(r2); 145 | 146 | if(r >= h) 147 | pot -= mass / r; 148 | else 149 | { 150 | u = r * h_inv; 151 | 152 | if(u < 0.5) 153 | wp = -2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)); 154 | else 155 | wp = 156 | -3.2 + 0.066666666667 / u + u * u * (10.666666666667 + 157 | u * (-16.0 + u * (9.6 - 2.133333333333 * u))); 158 | 159 | pot += mass * h_inv * wp; 160 | } 161 | } 162 | 163 | return pot*PhysicalConst::G/Snapshot->Cosmology.ScaleFactor; 164 | } 165 | 166 | double GravityTree_t::BindingEnergy(const HBTxyz& targetPos, const HBTxyz& targetVel, const HBTxyz& refPos, const HBTxyz& refVel, const HBTReal targetMass) 167 | /* return specific binding energy 168 | * input Pos comoving, Vel physical 169 | * targetMass optional, can be set to exclude self-potential if target is contained in the tree*/ 170 | { 171 | double pot=EvaluatePotential(targetPos, targetMass); 172 | HBTxyz dv; 173 | Snapshot->RelativeVelocity(targetPos, targetVel, refPos, refVel, dv); 174 | return VecNorm(dv)*0.5+pot; 175 | } 176 | 177 | template class OctTree_t;//to wake up the functions for this type; trick! 178 | 179 | #ifdef TEST_gravity_tree 180 | #include "snapshot.h" 181 | #include "halo.h" 182 | 183 | int main(int argc, char **argv) 184 | { 185 | HBTConfig.ParseConfigFile("../configs/AqA5.conf"); 186 | HBTInt isnap=HBTConfig.MinSnapshotIndex; 187 | ParticleSnapshot_t snapshot; 188 | snapshot.Load(isnap); 189 | 190 | HaloSnapshot_t halo; 191 | halo.Load(isnap); 192 | halo.ParticleIdToIndex(snapshot); 193 | 194 | halo.AverageCoordinates(); 195 | Halo_t::ParticleList_t &P=halo.Halos[0].Particles; 196 | 197 | SnapshotView_t treesnap(P, snapshot); 198 | 199 | OctTree_t tree; 200 | tree.Reserve(2); 201 | tree.Build(treesnap); 202 | 203 | for(HBTInt i=0;i &particles) const 24 | { 25 | for(auto &&p: particles) 26 | p.index=restore_id(p.index); 27 | } 28 | }; 29 | class PositionSampleLattice_t: public PositionSampleBase_t 30 | {//sample by skipping. suitable for searching multiple samples in parallel 31 | int ThreadId, NumThreads; 32 | public: 33 | void init(int ithread, int nthread, PositionData_t *data) 34 | { 35 | Data=data; 36 | ThreadId=ithread; 37 | NumThreads=nthread; 38 | HBTInt n0=Data->size(); 39 | np=n0/nthread+((n0%nthread)>ithread); 40 | } 41 | HBTInt restore_id(HBTInt i) const//has to make it virtual to be overriden by derived class. otherwise the derived class will use the wrong []. 42 | { 43 | return i*NumThreads+ThreadId; 44 | } 45 | }; 46 | class PositionSampleBlock_t: public PositionSampleBase_t 47 | {//sample in blocks. suitable for merging. 48 | HBTInt offset; 49 | public: 50 | void init(int ithread, int nthread, PositionData_t *data) 51 | { 52 | AssignTasks(ithread, nthread, data->size(), offset, np); 53 | np-=offset; 54 | Data=data; 55 | } 56 | HBTInt restore_id(HBTInt i) const 57 | { 58 | return offset+i; 59 | } 60 | }; 61 | 62 | class LinkedlistPara_t 63 | {//built and searched in parallel. can be searched in serial as well. lower efficiency than Linkedlist_t, especially when built with larger number of threads. 64 | private: 65 | vector LLs; 66 | vector Samples; 67 | public: 68 | LinkedlistPara_t(int ndiv, PositionData_t *data, HBTReal boxsize=0., bool periodic=false); 69 | template 70 | void SearchSphere(HBTReal radius, const HBTxyz &searchcenter, ParticleCollector_t &collector) 71 | { 72 | SearchShell(-1., radius, searchcenter, collector); 73 | } 74 | void SearchSphereSerial(HBTReal radius, const HBTxyz &searchcenter, ParticleCollector_t &collector) 75 | { 76 | SearchShellSerial(-1., radius, searchcenter, collector); 77 | } 78 | template 79 | void SearchShell(HBTReal rmin, HBTReal rmax, const HBTxyz &searchcenter, ParticleCollector_t &collector); 80 | void SearchShellSerial(HBTReal rmin, HBTReal rmax, const HBTxyz &searchcenter, ParticleCollector_t &collector); 81 | template 82 | void SearchCylinder(HBTReal radius_z, HBTReal radius_p, const HBTxyz &searchcenter, ParticleCollector_t &collector); 83 | void SearchCylinderSerial(HBTReal radius_z, HBTReal radius_p, const HBTxyz &searchcenter, ParticleCollector_t &collector); 84 | }; 85 | 86 | class ReducibleCollector_t: public ParticleCollector_t 87 | { 88 | virtual void Reduce(ParticleCollector_t &final_collector)=0;//defines how to add the results from each thread together into final_collector. 89 | }; 90 | 91 | template 92 | class ReducibleSampleCollector_t: public ParticleCollector_t 93 | { 94 | REDUCIBLECOLLECTOR Collector; 95 | PositionSampleBase_t &Sample; 96 | public: 97 | ReducibleSampleCollector_t(PositionSampleBase_t &sample): Sample(sample), Collector() 98 | {} 99 | void Collect(HBTInt id, HBTReal d2) 100 | { 101 | Collector.Collect(Sample.restore_id(id), d2); 102 | } 103 | void Reduce(ParticleCollector_t &collector) 104 | { 105 | Collector.Reduce(collector); 106 | } 107 | }; 108 | 109 | template 110 | void LinkedlistPara_t::SearchShell(HBTReal rmin, HBTReal rmax, const HBTxyz &searchcenter, ParticleCollector_t &collector) 111 | {//parallel version. not suitable for use inside another parallel region. must specify a ReducibleCollector_t template parameter to define Collect and Reduce method for each thread collector. 112 | #pragma omp parallel for 113 | for(int thread_id=0;thread_id thread_founds(Samples[thread_id]); 116 | LLs[thread_id].SearchShell(rmin, rmax, searchcenter, thread_founds); 117 | #pragma omp critical(insert_linklist_founds) //this prevents nested parallelization 118 | { 119 | thread_founds.Reduce(collector); 120 | } 121 | } 122 | } 123 | 124 | template 125 | void LinkedlistPara_t::SearchCylinder(HBTReal radius_z, HBTReal radius_p, const HBTxyz &searchcenter, ParticleCollector_t &collector) 126 | {//parallel version. not suitable for use inside another parallel region. must specify a ReducibleCollector_t template parameter to define Collect and Reduce method for each thread collector. 127 | #pragma omp parallel for 128 | for(int thread_id=0;thread_id thread_founds(Samples[thread_id]); 131 | LLs[thread_id].SearchCylinder(radius_z, radius_p, searchcenter, thread_founds); 132 | #pragma omp critical(insert_linklist_founds) //this prevents nested parallelization 133 | { 134 | thread_founds.Reduce(collector); 135 | } 136 | } 137 | } 138 | 139 | class ReducibleLocatedParticleCollector_t: public LocatedParticleCollector_t, public ReducibleCollector_t 140 | /* a simple collector to be used for parallel search; it keeps a local vector to store located particles in each thread, and then dump them to the final output collector*/ 141 | { 142 | public: 143 | ReducibleLocatedParticleCollector_t(HBTInt n_reserve = 0):LocatedParticleCollector_t(n_reserve) 144 | {} 145 | void Collect(HBTInt index, HBTReal d2) 146 | { 147 | LocatedParticleCollector_t::Collect(index, d2); 148 | } 149 | void Reduce(ParticleCollector_t &collector) 150 | { 151 | for(auto &&p: Founds) 152 | collector.Collect(p.index, p.d2); 153 | } 154 | }; 155 | 156 | class Linkedlist_t:public LinkedlistBase_t 157 | {//built in parallel 158 | private: 159 | vector LLs; 160 | vector Samples; 161 | void merge(); 162 | public: 163 | Linkedlist_t():LinkedlistBase_t() 164 | {} 165 | Linkedlist_t(int ndiv, PositionData_t *data, HBTReal boxsize=0., bool periodic=false, bool build_in_parallel=true):LinkedlistBase_t() 166 | { 167 | if(build_in_parallel) 168 | parallel_build(ndiv, data, boxsize, periodic); 169 | else 170 | build(ndiv, data, boxsize, periodic); 171 | } 172 | void parallel_build(int ndiv, PositionData_t *data, HBTReal boxsize=0., bool periodic=false); 173 | }; 174 | 175 | extern void LinkedlistLinkGroup(HBTReal radius, const Snapshot_t &snapshot, vector &GrpLen, vector &GrpTags, int ndiv=256); 176 | #endif 177 | -------------------------------------------------------------------------------- /src/subhalo_merge.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "datatypes.h" 8 | #include "snapshot_number.h" 9 | #include "subhalo.h" 10 | 11 | #define NumPartCoreMax 20 12 | #define DeltaCrit 2. 13 | 14 | struct SubHelper_t 15 | { 16 | HBTInt HostTrackId; 17 | bool IsMerged; 18 | HBTxyz ComovingPosition; 19 | HBTxyz PhysicalVelocity; 20 | float ComovingSigmaR; 21 | float PhysicalSigmaV; 22 | void BuildPosition(const Subhalo_t &sub); 23 | void BuildVelocity(const Subhalo_t &sub); 24 | SubHelper_t(): HostTrackId(-1), IsMerged(false) 25 | { 26 | } 27 | }; 28 | 29 | void SubHelper_t::BuildPosition(const Subhalo_t &sub) 30 | { 31 | if(0==sub.Nbound) 32 | { 33 | ComovingSigmaR=0.; 34 | return; 35 | } 36 | if(1==sub.Nbound) 37 | { 38 | ComovingSigmaR=0.; 39 | copyHBTxyz(ComovingPosition, sub.Particles[0].ComovingPosition); 40 | return; 41 | } 42 | 43 | HBTInt NumPart=sub.Nbound; 44 | if(NumPart>NumPartCoreMax) NumPart=NumPartCoreMax; 45 | 46 | HBTInt i,j; 47 | double sx[3],sx2[3], origin[3],msum; 48 | 49 | sx[0]=sx[1]=sx[2]=0.; 50 | sx2[0]=sx2[1]=sx2[2]=0.; 51 | msum=0.; 52 | if(HBTConfig.PeriodicBoundaryOn) 53 | for(j=0;j<3;j++) 54 | origin[j]=sub.Particles[0].ComovingPosition[j]; 55 | 56 | for(i=0;iNumPartCoreMax) NumPart=NumPartCoreMax; 98 | 99 | HBTInt i,j; 100 | double sx[3],sx2[3],msum; 101 | 102 | sx[0]=sx[1]=sx[2]=0.; 103 | sx2[0]=sx2[1]=sx2[2]=0.; 104 | msum=0.; 105 | 106 | for(i=0;i &Subhalos, vector &Helpers, int isnap) 137 | { 138 | #pragma omp for schedule(dynamic,1) 139 | for(HBTInt i=0;i=0) 145 | { 146 | if(Subhalos[HostId].Nbound>1)//avoid orphans or nulls as hosts 147 | { 148 | float delta=SinkDistance(Subhalos[i], Helpers[HostId]); 149 | if(delta1) //only need to unbind if a real sub sinks 154 | Helpers[HostId].IsMerged=true; 155 | break; 156 | } 157 | } 158 | HostId=Helpers[HostId].HostTrackId; 159 | } 160 | } 161 | } 162 | 163 | void FillHostTrackIds(vector &Helpers, const vector &Subhalos) 164 | { 165 | #pragma omp for schedule(dynamic,1) 166 | for(HBTInt i=0;i &Helpers, const vector &Subhalos) 174 | { 175 | #pragma omp for schedule(dynamic,1) 176 | for(HBTInt i=0;i &Helpers, const vector &Subhalos) 183 | { 184 | FillHostTrackIds(Helpers, Subhalos); 185 | FillCores(Helpers, Subhalos); 186 | } 187 | void SubhaloSnapshot_t::MergeSubhalos() 188 | { 189 | HBTInt NumHalos=MemberTable.SubGroups.size(); 190 | vector Helpers(Subhalos.size()); 191 | int isnap=GetSnapshotIndex(); 192 | 193 | #pragma omp parallel 194 | { 195 | GlueHeadNests(); 196 | FillHelpers(Helpers, Subhalos); 197 | 198 | DetectTraps(Subhalos, Helpers, isnap); 199 | } 200 | 201 | if(HBTConfig.MergeTrappedSubhalos) 202 | { 203 | #pragma omp parallel for schedule(dynamic,1) 204 | for(HBTInt grpid=0;grpid UniqueIds(np_max); 251 | for(auto &&p: host.Particles) UniqueIds.insert(p.Id); 252 | host.Particles.reserve(np_max); 253 | for(auto &&p: Particles) 254 | if(UniqueIds.insert(p.Id).second)//inserted, meaning not excluded 255 | host.Particles.push_back(p); 256 | host.Nbound+=Nbound; 257 | #endif 258 | 259 | Particles.resize(1); 260 | Nbound=1; 261 | CountParticles(); 262 | } 263 | -------------------------------------------------------------------------------- /src/mymath.h: -------------------------------------------------------------------------------- 1 | #ifndef MYMATH_HEADER_INCLUDED 2 | #define MYMATH_HEADER_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "datatypes.h" 19 | 20 | #define VecDot(x,y) ((x)[0]*(y)[0]+(x)[1]*(y)[1]+(x)[2]*(y)[2]) 21 | #define VecNorm(x) VecDot(x,x) 22 | 23 | extern int GetGrid(HBTReal x, HBTReal step, int dim); 24 | extern int AssignCell(const HBTxyz & Pos, const HBTxyz &step, const vector &dims); 25 | 26 | template 27 | void VectorFree(vector &x) 28 | { 29 | vector ().swap(x); 30 | } 31 | 32 | template 33 | size_t CompileOffsets(const vector &Counts, vector &Offsets) 34 | { 35 | size_t offset=0; 36 | Offsets.resize(Counts.size()); 37 | for(size_t i=0;i 46 | size_t CompileOffsets(CountIterator_t CountBegin, CountIterator_t CountEnd, OffsetIterator_t OffsetBegin) 47 | { 48 | size_t offset=0; 49 | auto it_off=OffsetBegin; 50 | for(auto it=CountBegin;it!=CountEnd;++it) 51 | { 52 | *it_off++=offset; 53 | offset+=*it; 54 | } 55 | return offset; 56 | } 57 | 58 | template 59 | inline void RemoveFromVector(vector &v, UnaryPredicate p) 60 | { 61 | v.erase(remove_if(v.begin(), v.end(), p), v.end()); 62 | } 63 | 64 | class Timer_t 65 | { 66 | public: 67 | vector tickers; 68 | Timer_t() 69 | { 70 | tickers.reserve(20); 71 | } 72 | void Tick() 73 | { 74 | tickers.push_back(chrono::high_resolution_clock::now()); 75 | } 76 | void Tick(MPI_Comm comm) 77 | //synchronized tick. wait for all processes to tick together. 78 | { 79 | MPI_Barrier(comm); 80 | Tick(); 81 | } 82 | void Reset() 83 | { 84 | tickers.clear(); 85 | } 86 | size_t Size() 87 | { 88 | return tickers.size(); 89 | } 90 | int FixTickNum(int itick) 91 | { 92 | return itick<0?itick+Size():itick; 93 | } 94 | double GetSeconds(int itick=-1) 95 | /*get the time spent from the previous tick to the current tick 96 | * if itick not specified, return the current interval 97 | * if itick<0, it will be interpreted as end()+itick */ 98 | { 99 | itick=FixTickNum(itick); 100 | return GetSeconds(itick, itick-1); 101 | } 102 | double GetSeconds(int itick, int itick0) 103 | /*get the time spent from itick0 to itick*/ 104 | { 105 | itick=FixTickNum(itick); 106 | itick0=FixTickNum(itick0); 107 | if(itick >(tickers[itick]-tickers[itick0]).count(); 111 | } 112 | }; 113 | 114 | #define myfopen(filepointer,filename,filemode) if(!((filepointer)=fopen(filename,filemode))){fprintf(stderr,"Error opening file %s\n",filename); fflush(stderr); exit(1);} 115 | // #ifdef PERIODIC_BDR 116 | // #define NEAREST(x) (((x)>BOXHALF)?((x)-BOXSIZE):(((x)<-BOXHALF)?((x)+BOXSIZE):(x))) 117 | /*this macro can well manipulate boundary condition because 118 | * usually a halo is much smaller than boxhalf 119 | * so that any distance within the halo should be smaller than boxhalf */ 120 | // #endif 121 | #define get_bit(x,k) (((x)&(1<>k) 122 | extern int count_pattern_files(char *filename_pattern); 123 | // extern std::ostream& operator << (std::ostream& o, HBTxyz &a); 124 | extern void swap_Nbyte(void *data2swap,size_t nel,size_t mbyte); 125 | extern size_t SkipFortranBlock(FILE *fp, bool NeedByteSwap); 126 | template 127 | ostream& operator<<(ostream& o, const array& arr) 128 | { 129 | o<<"("; 130 | copy(arr.cbegin(), arr.cend(), ostream_iterator(o, ", ")); 131 | o<<")"; 132 | return o; 133 | } 134 | 135 | template 136 | ostream& operator<<(ostream& o, const vector& vec) 137 | { 138 | o<<"("; 139 | copy(vec.cbegin(), vec.cend(), ostream_iterator(o, ", ")); 140 | o<<")"; 141 | return o; 142 | } 143 | 144 | inline size_t fread_swap(void *buf,const size_t member_size, const size_t member_count,FILE *fp, const bool FlagByteSwap) 145 | { 146 | size_t Nread; 147 | Nread=std::fread(buf,member_size,member_count,fp); 148 | if(FlagByteSwap) 149 | swap_Nbyte(buf,member_count,member_size); 150 | return Nread; 151 | } 152 | inline bool file_exist(char * filename) 153 | { struct stat buffer; 154 | return (stat(filename, &buffer) == 0); 155 | } 156 | inline long int BytesToEOF(FILE *fp) 157 | { 158 | fpos_t fpos; 159 | fgetpos (fp,&fpos); 160 | 161 | long int offset=ftell(fp); 162 | fseek(fp, 0L, SEEK_END); 163 | long int offset_end=ftell (fp); 164 | 165 | fsetpos(fp, &fpos); 166 | 167 | return (offset_end-offset); 168 | } 169 | inline HBTReal position_modulus(HBTReal x, HBTReal boxsize) 170 | {//shift the positions to within [0,boxsize) 171 | HBTReal y; 172 | if(x>=0&&x 202 | class FortranBlock 203 | { 204 | vector Data; 205 | typedef T Txyz[3]; 206 | public: 207 | FortranBlock(): Data() 208 | { 209 | } 210 | FortranBlock(FILE *fp, const size_t n_read, const size_t n_skip, bool NeedByteSwap=false): Data(n_read) 211 | { 212 | Read(fp, n_read, n_skip, NeedByteSwap); 213 | } 214 | void Read(FILE *fp, const size_t n_read, const size_t n_skip, bool NeedByteSwap=false) 215 | /*read n_read members from the current block of fp. 216 | * skip n_skip elements before reading. 217 | * T specify the input datatype. if T and U has the same size, read directly into outbuffer; otherwise the elements are converted from type U to type T in a temporary buffer and then copied to outbuffer. 218 | */ 219 | { 220 | #define myfread(buf,size,count,fp) fread_swap(buf,size,count,fp,NeedByteSwap) 221 | #define ReadBlockSize(a) myfread(&a,sizeof(a),1,fp) 222 | int blocksize,blocksize2; 223 | ReadBlockSize(blocksize); 224 | size_t block_member_size=sizeof(T); 225 | Data.resize(n_read); 226 | fseek(fp, n_skip*block_member_size, SEEK_CUR); 227 | myfread(Data.data(), block_member_size, n_read, fp); 228 | fseek(fp, blocksize-(n_skip+n_read)*block_member_size, SEEK_CUR); 229 | ReadBlockSize(blocksize2); 230 | assert(blocksize==blocksize2); 231 | #undef ReadBlockSize 232 | #undef myfread 233 | } 234 | const T * data() 235 | { 236 | return Data.data(); 237 | } 238 | const T & operator [](const size_t index) 239 | { 240 | return Data[index]; 241 | } 242 | HBTInt size() const 243 | { 244 | return Data.size(); 245 | } 246 | T * begin() 247 | { 248 | return Data.data(); 249 | } 250 | T* end() 251 | { 252 | return Data.data()+Data.size(); 253 | } 254 | Txyz * data_reshape() 255 | { 256 | return (Txyz *)Data.data(); 257 | } 258 | }; 259 | 260 | extern int LargestRootFactor(int N, int dim); 261 | extern vector ClosestFactors(int N, int dim); 262 | extern void AssignTasks(HBTInt worker_id, HBTInt nworkers, HBTInt ntasks, HBTInt &task_begin, HBTInt &task_end); 263 | extern void logspace(double xmin,double xmax,int N, vector &x); 264 | 265 | #ifdef HAS_GSL 266 | extern void EigenAxis(double Ixx, double Ixy, double Ixz, double Iyy, double Iyz, double Izz, float Axis[3][3]); 267 | #endif 268 | 269 | #endif -------------------------------------------------------------------------------- /src/mymath.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "mymath.h" 6 | 7 | int GetGrid(HBTReal x, HBTReal step, int dim) 8 | { 9 | int i=floor(x/step); 10 | if(i<0) i=0; 11 | if(i>=dim) i=dim-1; 12 | return i; 13 | } 14 | int AssignCell(const HBTxyz & Pos, const HBTxyz &step, const vector &dims) 15 | { 16 | #define GRIDtoRank(g0,g1,g2) (((g0)*dims[1]+(g1))*dims[2]+(g2)) 17 | #define GID(i) GetGrid(Pos[i], step[i], dims[i]) 18 | return GRIDtoRank(GID(0), GID(1), GID(2)); 19 | #undef GID 20 | #undef GRIDtoRank 21 | } 22 | 23 | int count_pattern_files(char *filename_pattern) 24 | { 25 | glob_t globbuf; 26 | 27 | globbuf.gl_offs = 0; 28 | glob(filename_pattern, GLOB_ERR, NULL, &globbuf); 29 | // printf("couting %s...\n found %d files\n", filename_pattern, globbuf.gl_pathc); 30 | globfree(&globbuf); 31 | return globbuf.gl_pathc; 32 | } 33 | /* 34 | std::ostream& operator << (std::ostream& o, HBTxyz &a) 35 | { 36 | o << "(" << a[0] << ", " << a[1] << ", " << a[2] << ")"; 37 | return o; 38 | }; 39 | */ 40 | HBTInt compile_offset(HBTInt Len[], HBTInt Offset[], HBTInt n) 41 | {//fill offset info, and return total length. 42 | HBTInt i,offset; 43 | for(i=0,offset=0;i> 8) ) 52 | #define SWAP_4(x) ( ((x) << 24) | (((x) << 8) & 0x00ff0000) | (((x) >> 8) & 0x0000ff00) | ((unsigned) (x) >> 24) ) 53 | #define FIX_SHORT(x) (*(unsigned short *)&(x) = SWAP_2(*(unsigned short *)&(x))) 54 | #define FIX_LONG(x) (*(unsigned *)&(x) = SWAP_4(*(unsigned *)&(x))) 55 | //bit shift operation is invalid for 8byte+ data 56 | 57 | void swap_Nbyte(void *data2swap,size_t nel,size_t mbyte) 58 | /*This function is used to switch endian, for data2swap[nel] with element size mbyte*/ 59 | { 60 | size_t i,j; 61 | char *data, *old_data;//by definition, sizeof(char)=1, one byte 62 | 63 | data=(char *)data2swap; 64 | 65 | switch(mbyte) 66 | { 67 | case 1 :break; 68 | case 2 : 69 | for(j=0;j> 1; 165 | SWAP(arr[mid],arr[l+1]) 166 | if (arr[l+1] > arr[ir]) { 167 | SWAP(arr[l+1],arr[ir]) 168 | } 169 | if (arr[l] > arr[ir]) { 170 | SWAP(arr[l],arr[ir]) 171 | } 172 | if (arr[l+1] > arr[l]) { 173 | SWAP(arr[l+1],arr[l]) 174 | } 175 | i=l+1; 176 | j=ir; 177 | a=arr[l]; 178 | for (;;) { 179 | do i++; while (arr[i] < a); 180 | do j--; while (arr[j] > a); 181 | if (j < i) break; 182 | SWAP(arr[i],arr[j]) 183 | } 184 | arr[l]=arr[j]; 185 | arr[j]=a; 186 | if (j >= k) ir=j-1; 187 | if (j <= k) l=i; 188 | } 189 | } 190 | #undef SWAP 191 | } 192 | 193 | int LargestRootFactor(int N, int dim) 194 | /*find an integer factor of N that is the largest subject to x<=N**(1./dim)*/ 195 | { 196 | int x=floor(pow(N, 1./dim)); 197 | for(;x>0;x--) 198 | if(N%x==0) break; 199 | return x; 200 | } 201 | vector ClosestFactors(int N, int dim) 202 | /* return a factorization of `N` into `dim` factors that are as close as possible to each other*/ 203 | { 204 | vector factors; 205 | for(;dim>0;dim--) 206 | { 207 | int x=LargestRootFactor(N, dim); 208 | factors.push_back(x); 209 | N/=x; 210 | } 211 | // sort(factors.begin(), factors.end()); 212 | return factors; 213 | } 214 | 215 | void AssignTasks(HBTInt worker_id, HBTInt nworkers, HBTInt ntasks, HBTInt &task_begin, HBTInt &task_end) 216 | /*distribute ntasks to nworkers approximately fairly (equally if possible, otherwise the leading workers do one more task than others). 217 | * return the tasks assigned to worker_id as [task_begin, task_end). 218 | * worker_id is in the range [0, nworkers).*/ 219 | { 220 | HBTInt ntask_remainder=ntasks%nworkers; 221 | HBTInt ntask_this=ntasks/nworkers;; 222 | task_begin=ntask_this*worker_id+min(ntask_remainder, worker_id);//distribute remainder to leading nodes 223 | if(worker_id 231 | #include 232 | void EigenAxis(double Ixx, double Ixy, double Ixz, double Iyy, double Iyz, double Izz, float Axis[3][3]) 233 | /*find the eigenvector and eigenvalue of the symmetric matrix 234 | *return Axis[3][3]: 3 eigenvectors (Axis[i] being vector-i), normalized such that the norm of each eigenvector gives its eigenvalue. 235 | * the eigenvalues are sorted in descending order 236 | */ 237 | { 238 | array matrix_data={Ixx, Ixy, Ixz, Ixy, Iyy, Iyz, Ixz, Iyz, Izz}; 239 | gsl_matrix_view matrix= gsl_matrix_view_array (matrix_data.data(), 3, 3); 240 | gsl_vector * eigen_values = gsl_vector_alloc (3); 241 | gsl_matrix * eigen_vecs= gsl_matrix_alloc(3,3); 242 | 243 | gsl_eigen_symmv_workspace * workspace= gsl_eigen_symmv_alloc (3);; 244 | gsl_eigen_symmv(&matrix.matrix, eigen_values, eigen_vecs, workspace); 245 | gsl_eigen_symmv_free (workspace); 246 | 247 | gsl_eigen_symmv_sort(eigen_values, eigen_vecs, GSL_EIGEN_SORT_VAL_DESC); 248 | for(int i=0;i<3;i++) 249 | { 250 | HBTReal lambda=gsl_vector_get(eigen_values, i); 251 | gsl_vector_view vec=gsl_matrix_column(eigen_vecs, i); 252 | for(int j=0;j<3;j++) 253 | Axis[i][j]=lambda*gsl_vector_get(&vec.vector, j); 254 | } 255 | gsl_vector_free (eigen_values); 256 | gsl_matrix_free (eigen_vecs); 257 | } 258 | #endif 259 | 260 | size_t SkipFortranBlock(FILE *fp, bool NeedByteSwap) 261 | { 262 | int blocksize,blocksize2; 263 | #define ReadBlockSize(a) fread_swap(&a,sizeof(a),1,fp, NeedByteSwap) 264 | ReadBlockSize(blocksize); 265 | fseek(fp, blocksize, SEEK_CUR); 266 | ReadBlockSize(blocksize2); 267 | assert(blocksize==blocksize2); 268 | return blocksize; 269 | #undef ReadBlockSize 270 | } 271 | 272 | void logspace(double xmin,double xmax,int N, vector &x) 273 | { 274 | x.resize(N); 275 | int i; 276 | double dx; 277 | x[0]=xmin;x[N-1]=xmax; 278 | xmin=log(xmin); 279 | xmax=log(xmax); 280 | dx=exp((xmax-xmin)/(N-1)); 281 | for(i=1;i 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "datatypes.h" 11 | #include "mymath.h" 12 | #include "config_parser.h" 13 | #include "snapshot_number.h" 14 | #include "hash.h" 15 | #include "mpi_wrapper.h" 16 | 17 | struct Cosmology_t 18 | { 19 | HBTReal OmegaM0; 20 | HBTReal OmegaLambda0; 21 | HBTReal ScaleFactor; 22 | 23 | //derived parameters: 24 | HBTReal Hz; //current Hubble param in internal units 25 | HBTReal OmegaZ; 26 | 27 | void Set(double scalefactor, double omega0, double omegaLambda0) 28 | { 29 | OmegaM0=omega0; 30 | OmegaLambda0=omegaLambda0; 31 | ScaleFactor=scalefactor; 32 | double Hratio=sqrt(omega0 / (scalefactor * scalefactor * scalefactor) 33 | + (1 - omega0 - omegaLambda0) / (scalefactor * scalefactor) 34 | + omegaLambda0);//Hubble param for the current catalogue; 35 | 36 | Hz=Hratio*PhysicalConst::H0; 37 | OmegaZ=omega0/(scalefactor*scalefactor*scalefactor)/Hratio/Hratio; 38 | } 39 | }; 40 | 41 | struct RadMassVel_t 42 | { 43 | HBTReal r, m, v; 44 | RadMassVel_t(){}; 45 | RadMassVel_t(HBTReal r, HBTReal m):r(r), m(m) 46 | {} 47 | RadMassVel_t(HBTReal r, HBTReal m, HBTReal v):r(r),m(m),v(v) 48 | {} 49 | }; 50 | 51 | struct Particle_t 52 | { 53 | HBTInt Id; 54 | HBTxyz ComovingPosition; 55 | HBTxyz PhysicalVelocity; 56 | HBTReal Mass; 57 | #ifndef DM_ONLY 58 | #ifdef HAS_THERMAL_ENERGY 59 | HBTReal InternalEnergy; 60 | #endif 61 | ParticleType_t Type; 62 | #endif 63 | void create_MPI_type(MPI_Datatype &dtype); 64 | Particle_t(){}; 65 | Particle_t(HBTInt id): Id(id) 66 | { 67 | } 68 | bool operator==(const Particle_t &other) const 69 | { 70 | return Id==other.Id; 71 | } 72 | }; 73 | extern ostream& operator << (ostream& o, Particle_t &p); 74 | 75 | 76 | class Snapshot_t: public SnapshotNumber_t 77 | { 78 | public: 79 | Cosmology_t Cosmology; 80 | // Snapshot_t()=default; 81 | virtual HBTInt size() const=0; 82 | virtual HBTInt GetId(const HBTInt index) const 83 | { 84 | return index; 85 | } 86 | virtual const HBTxyz & GetComovingPosition(const HBTInt index) const=0; 87 | virtual const HBTxyz & GetPhysicalVelocity(const HBTInt index) const=0; 88 | virtual HBTReal GetMass(const HBTInt index) const=0; 89 | virtual HBTReal GetInternalEnergy(HBTInt index) const 90 | { 91 | return 0.; 92 | } 93 | void SphericalOverdensitySize(float &Mvir, float &Rvir, HBTReal VirialFactor, const vector &RSorted, HBTReal ParticleMass) const; 94 | void SphericalOverdensitySize(float &Mvir, float &Rvir, HBTReal VirialFactor, const vector &prof) const; 95 | void SphericalOverdensitySize2(float &Mvir, float &Rvir, HBTReal VirialFactor, const vector &RSorted, HBTReal ParticleMass) const; 96 | void HaloVirialFactors(HBTReal &virialF_tophat, HBTReal &virialF_b200, HBTReal &virialF_c200) const; 97 | void RelativeVelocity(const HBTxyz& targetPos, const HBTxyz& targetVel, const HBTxyz& refPos, const HBTxyz& refVel, HBTxyz& relativeVel) const; 98 | }; 99 | 100 | inline void Snapshot_t::RelativeVelocity(const HBTxyz& targetPos, const HBTxyz& targetVel, const HBTxyz& refPos, const HBTxyz& refVel, HBTxyz& relativeVel) const 101 | { 102 | HBTxyz dx; 103 | HBTxyz &dv=relativeVel; 104 | for(int j=0;j<3;j++) 105 | { 106 | dx[j]=targetPos[j]-refPos[j]; 107 | if(HBTConfig.PeriodicBoundaryOn) dx[j]=NEAREST(dx[j]); 108 | dv[j]=targetVel[j]-refVel[j]; 109 | dv[j]+=Cosmology.Hz*Cosmology.ScaleFactor*dx[j]; 110 | } 111 | } 112 | 113 | class SnapshotView_t: public Snapshot_t 114 | { 115 | public: 116 | HBTInt * Ids; 117 | HBTInt N; 118 | Snapshot_t & Snapshot; 119 | SnapshotView_t(vector & ids, Snapshot_t & fullsnapshot): Ids(ids.data()), N(ids.size()), Snapshot(fullsnapshot), Snapshot_t(fullsnapshot) 120 | { 121 | }; 122 | SnapshotView_t(VectorView_t &ids, Snapshot_t & fullsnapshot): Ids(ids.data()), N(ids.size()), Snapshot(fullsnapshot), Snapshot_t(fullsnapshot) 123 | { 124 | }; 125 | SnapshotView_t(HBTInt *ids, HBTInt n, Snapshot_t & fullsnapshot): Ids(ids), N(n), Snapshot(fullsnapshot), Snapshot_t(fullsnapshot) 126 | { 127 | }; 128 | void ReSize(HBTInt n) 129 | { 130 | N=n; 131 | } 132 | HBTInt size() const 133 | { 134 | return N; 135 | } 136 | HBTInt GetId(HBTInt i) const 137 | { 138 | return Snapshot.GetId(Ids[i]); 139 | } 140 | HBTReal GetMass(HBTInt i) const 141 | { 142 | return Snapshot.GetMass(Ids[i]); 143 | } 144 | const HBTxyz & GetPhysicalVelocity(HBTInt i) const 145 | { 146 | return Snapshot.GetPhysicalVelocity(Ids[i]); 147 | } 148 | const HBTxyz & GetComovingPosition(HBTInt i) const 149 | { 150 | return Snapshot.GetComovingPosition(Ids[i]); 151 | } 152 | }; 153 | 154 | class ParticleSnapshot_t: public Snapshot_t 155 | { 156 | typedef vector IndexList_t; 157 | 158 | FlatIndexTable_t FlatHash; 159 | MappedIndexTable_t MappedHash; 160 | IndexTable_t *ParticleHash; 161 | 162 | void ExchangeParticles(MpiWorker_t &world); 163 | void PartitionParticles(MpiWorker_t &world, vector &offset); 164 | bool IsContiguousId(MpiWorker_t &world, HBTInt &GlobalIdMin); 165 | HBTInt IdMin, IdMax; 166 | public: 167 | vector Particles; 168 | HBTInt NumberOfParticlesOnAllNodes; 169 | vector ProcessIdRanges; //IdRange on each processor is [ProcessIdRanges[i], ProcessIdRanges[i+1]). 170 | 171 | ParticleSnapshot_t(): Snapshot_t(), Particles(), ParticleHash(), MappedHash(), FlatHash(), NumberOfParticlesOnAllNodes(0) 172 | { 173 | if(HBTConfig.ParticleIdNeedHash) 174 | ParticleHash=&MappedHash; 175 | else 176 | ParticleHash=&FlatHash; 177 | } 178 | ParticleSnapshot_t(MpiWorker_t &world, int snapshot_index, bool fill_particle_hash=true): ParticleSnapshot_t() 179 | { 180 | Load(world, snapshot_index, fill_particle_hash); 181 | } 182 | ~ParticleSnapshot_t() 183 | { 184 | Clear();//not necessary 185 | } 186 | void FillParticleHash(); 187 | void ClearParticleHash(); 188 | 189 | HBTInt size() const; 190 | HBTInt GetId(HBTInt index) const; 191 | HBTInt GetIndex(HBTInt particle_id) const; 192 | HBTInt GetIndex(Particle_t & particle) const; 193 | template 194 | void GetIndices(ParticleIdList_t &particles) const; 195 | const HBTxyz & GetComovingPosition(HBTInt index) const; 196 | const HBTxyz & GetPhysicalVelocity(HBTInt index) const; 197 | HBTReal GetMass(HBTInt index) const; 198 | HBTReal GetInternalEnergy(HBTInt index) const; 199 | ParticleType_t GetParticleType(HBTInt index) const; 200 | 201 | void Load(MpiWorker_t &world, int snapshot_index, bool fill_particle_hash=true); 202 | void Clear(); 203 | 204 | void AveragePosition(HBTxyz & CoM, const HBTInt Particles[], HBTInt NumPart) const; 205 | void AverageVelocity(HBTxyz & CoV, const HBTInt Particles[], HBTInt NumPart) const; 206 | 207 | template 208 | void ExchangeHalos(MpiWorker_t &world, vector & InHalos, vector & OutHalos, MPI_Datatype MPI_Halo_Shell_Type) const; 209 | }; 210 | inline HBTInt ParticleSnapshot_t::size() const 211 | { 212 | return Particles.size(); 213 | } 214 | inline HBTInt ParticleSnapshot_t::GetId(HBTInt index) const 215 | { 216 | return Particles[index].Id; 217 | } 218 | inline HBTInt ParticleSnapshot_t::GetIndex(HBTInt particle_id) const 219 | { 220 | return ParticleHash->GetIndex(particle_id); 221 | } 222 | inline HBTInt ParticleSnapshot_t::GetIndex(Particle_t & particle) const 223 | { 224 | return ParticleHash->GetIndex(particle.Id); 225 | } 226 | inline const HBTxyz& ParticleSnapshot_t::GetComovingPosition(HBTInt index) const 227 | { 228 | return Particles[index].ComovingPosition; 229 | } 230 | inline const HBTxyz& ParticleSnapshot_t::GetPhysicalVelocity(HBTInt index) const 231 | { 232 | return Particles[index].PhysicalVelocity; 233 | } 234 | inline HBTReal ParticleSnapshot_t::GetMass(HBTInt index) const 235 | { 236 | return Particles[index].Mass; 237 | } 238 | inline HBTReal ParticleSnapshot_t::GetInternalEnergy(HBTInt index) const 239 | { 240 | #if !defined(DM_ONLY) && defined(HAS_THERMAL_ENERGY) 241 | return Particles[index].InternalEnergy; 242 | #else 243 | return 0.; 244 | #endif 245 | } 246 | inline ParticleType_t ParticleSnapshot_t::GetParticleType(HBTInt index) const 247 | { 248 | #ifdef DM_ONLY 249 | return TypeDM; 250 | #else 251 | return Particles[index].Type; 252 | #endif 253 | } 254 | 255 | extern double AveragePosition(HBTxyz& CoM, const Particle_t Particles[], HBTInt NumPart); 256 | extern double AverageVelocity(HBTxyz& CoV, const Particle_t Particles[], HBTInt NumPart); 257 | #endif -------------------------------------------------------------------------------- /src/mpi_wrapper.h: -------------------------------------------------------------------------------- 1 | #ifndef HBT_MPI_WRAPPER_H 2 | #define HBT_MPI_WRAPPER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "datatypes.h" 12 | #include "mymath.h" 13 | 14 | //fix deprecated MPI_Address function 15 | #if defined MPI_VERSION && MPI_VERSION >= 2 16 | #define MPI_Address(a,b) MPI_Get_address(a,b) 17 | #endif 18 | 19 | class MpiWorker_t 20 | { 21 | public: 22 | int NumberOfWorkers, WorkerId, NameLen; 23 | int NextWorkerId, PrevWorkerId;//for ring communication 24 | char HostName[MPI_MAX_PROCESSOR_NAME]; 25 | MPI_Comm Communicator; //do not use reference 26 | MpiWorker_t(MPI_Comm comm): Communicator(comm) //the default initializer will copy a handle? doesn't matter. 27 | { 28 | MPI_Comm_size(comm,&NumberOfWorkers); 29 | MPI_Comm_rank(comm,&WorkerId); 30 | MPI_Get_processor_name(HostName, &NameLen); 31 | NextWorkerId=WorkerId+1; 32 | if(NextWorkerId==NumberOfWorkers) NextWorkerId=0; 33 | PrevWorkerId=WorkerId-1; 34 | if(PrevWorkerId<0) PrevWorkerId=NumberOfWorkers-1; 35 | } 36 | int size() 37 | { 38 | return NumberOfWorkers; 39 | } 40 | int rank() 41 | { 42 | return WorkerId; 43 | } 44 | int next() 45 | { 46 | return NextWorkerId; 47 | } 48 | int prev() 49 | { 50 | return PrevWorkerId; 51 | } 52 | int RankAdd(int diff) 53 | { 54 | return (WorkerId+diff)%NumberOfWorkers; 55 | } 56 | template 57 | void SyncContainer(T &x, MPI_Datatype dtype, int root_worker); 58 | template 59 | void SyncAtom(T &x, MPI_Datatype dtype, int root_worker); 60 | void SyncAtomBool(bool &x, int root); 61 | void SyncVectorBool(vector &x, int root); 62 | void SyncVectorString(vector &x, int root); 63 | }; 64 | 65 | template 66 | void MpiWorker_t::SyncContainer(T &x, MPI_Datatype dtype, int root_worker) 67 | { 68 | int len; 69 | 70 | if(root_worker==WorkerId) 71 | { 72 | len=x.size(); 73 | if(len>=INT_MAX) 74 | throw runtime_error("Error: in SyncContainer(), sending more than INT_MAX elements with MPI causes overflow.\n"); 75 | } 76 | MPI_Bcast(&len, 1, MPI_INT, root_worker, Communicator); 77 | 78 | if(root_worker!=WorkerId) 79 | x.resize(len); 80 | MPI_Bcast((void *)x.data(), len, dtype, root_worker, Communicator); 81 | }; 82 | template 83 | inline void MpiWorker_t::SyncAtom(T& x, MPI_Datatype dtype, int root_worker) 84 | { 85 | MPI_Bcast(&x, 1, dtype, root_worker, Communicator); 86 | } 87 | 88 | template 89 | void VectorAllToAll(MpiWorker_t &world, vector < vector > &SendVecs, vector < vector > &ReceiveVecs, MPI_Datatype dtype) 90 | { 91 | vector SendSizes(world.size()), ReceiveSizes(world.size()); 92 | for(int i=0;i SendTypes(world.size()), ReceiveTypes(world.size()); 101 | for(int i=0;i Counts(world.size(),1), Disps(world.size(),0); 113 | MPI_Alltoallw(MPI_BOTTOM, Counts.data(), Disps.data(), SendTypes.data(), 114 | MPI_BOTTOM, Counts.data(), Disps.data(), ReceiveTypes.data(), world.Communicator); 115 | 116 | for(int i=0;i 125 | void MyAllToAll(MpiWorker_t &world, vector InParticleIterator, const vector &InParticleCount, vector OutParticleIterator, MPI_Datatype MPI_Particle_T) 126 | /*break the task into smaller pieces to avoid message size overflow 127 | * allocate a temporary buffer of type Particle_T to copy from InParticleIterator, send around, and copy out to OutParticleIterator. 128 | InParticleIterator should point to data directly assignable to Particle_T. 129 | OutParticleIterator should point to data directly assignable from Particle_T. 130 | MPI_Particle_T specifies the mpi datatype for Particle_T. 131 | */ 132 | { 133 | //determine loops 134 | const int chunksize=1024*1024; 135 | HBTInt InParticleSum=accumulate(InParticleCount.begin(), InParticleCount.end(), (HBTInt)0); 136 | HBTInt Nloop=ceil(1.*InParticleSum/chunksize); 137 | MPI_Allreduce(MPI_IN_PLACE, &Nloop, 1, MPI_HBT_INT, MPI_MAX, world.Communicator); 138 | if(0==Nloop) return; 139 | //prepare loop size 140 | vector SendParticleCounts(world.size()), RecvParticleCounts(world.size()), SendParticleDisps(world.size()), RecvParticleDisps(world.size()); 141 | vector SendParticleRemainder(world.size()); 142 | for(int rank=0;rank SendBuffer(chunksize+world.size()), RecvBuffer; 150 | for(HBTInt iloop=0;iloop 193 | void MyBcast(MpiWorker_t &world, InParticleIterator_T InParticleIterator, OutParticleIterator_T OutParticleIterator, HBTInt &ParticleCount, MPI_Datatype MPI_Particle_T, int root) 194 | /*break the task into smaller pieces to avoid message size overflow 195 | InParticleIterator only significant at root, and should be different from OutParticleIterator. 196 | ParticleCount automatically broadcasted from root to every process. 197 | */ 198 | { 199 | MPI_Bcast(&ParticleCount, 1, MPI_HBT_INT, root, world.Communicator); 200 | //determine loops 201 | const int chunksize=1024*1024; 202 | HBTInt Nloop=ceil(1.*ParticleCount/chunksize); 203 | if(0==Nloop) return; 204 | int buffersize=ParticleCount/Nloop+1, nremainder=ParticleCount%Nloop; 205 | //transmit 206 | vector buffer(buffersize); 207 | for(HBTInt iloop=0;iloop