├── config-makefile ├── Config.sh.template ├── Makefile.systype ├── src ├── domain │ ├── domain_rearrange.c │ ├── domain_counttogo.c │ ├── domain_vars.c │ ├── domain_box.c │ ├── domain_sort_kernels.c │ ├── pqueue.h │ ├── domain_exchange.c │ ├── domain.h │ ├── pqueue.c │ ├── domain.c │ └── domain_toplevel.c ├── mpi_utils │ ├── hypercube_allgatherv.c │ ├── sizelimited_sendrecv.c │ ├── mpi_util.c │ └── checksummed_sendrecv.c ├── disk.c ├── bulge.c ├── forcetree │ ├── forcetree.h │ ├── forcetree_walk.c │ └── forcetree_optimizebalance.c ├── init.c ├── allocate.c ├── structure.c ├── halo.c ├── system.c ├── proto.h ├── set_particles.c ├── orbit_response.c └── allvars.c ├── prepare-config.perl ├── README.md ├── Makefile.lib ├── Makefile.template ├── Model_M1.param ├── Model_H3.param ├── Model_D3.param ├── Model_D1.param ├── Model_H2.param └── Model_H1.param /config-makefile: -------------------------------------------------------------------------------- 1 | RESULT := $(shell mkdir -p $(BUILD_DIR) ) 2 | 3 | all: $(BUILD_DIR)/galicconfig.h 4 | 5 | $(BUILD_DIR)/galicconfig.h: $(CONFIG) 6 | $(PERL) prepare-config.perl $(CONFIG) $(BUILD_DIR) 7 | -------------------------------------------------------------------------------- /Config.sh.template: -------------------------------------------------------------------------------- 1 | #!/bin/bash # this line only there to enable syntax highlighting in this file 2 | 3 | 4 | #---------------------------------------- Single/Double Precision 5 | DOUBLEPRECISION=1 6 | #OUTPUT_IN_DOUBLEPRECISION # snapshot files will be written in double precision 7 | 8 | 9 | #--------------------------------------- Output/Input options 10 | #HAVE_HDF5 # needed when HDF5 I/O support is desired 11 | 12 | 13 | #DEBUG_ENABLE_FPU_EXCEPTIONS #enables floating point exceptions 14 | 15 | ##---------------------------- Modifications 16 | VER_1_1 #enables version GALIC 1.1 with velocity dispertions patch 17 | #VAR_1_1_KPARAMETER_MOD # changes vstr = k*vphi (experimental!) 18 | #VER_1_1_GNUPLOT_LOG 19 | 20 | -------------------------------------------------------------------------------- /Makefile.systype: -------------------------------------------------------------------------------- 1 | # Select Target Computer 2 | # 3 | # Please copy this file to Makefile.systype and uncomment your 4 | # system. Don't commit changes to this file unless you add support for 5 | # a new system. 6 | 7 | SYSTYPE="APHI" 8 | #SYSTYPE="Curie" 9 | #SYSTYPE="Hermite" 10 | #SYSTYPE="Ranger_pgi" 11 | #SYSTYPE="Ranger_intel" 12 | #SYSTYPE="lonestar" 13 | #SYSTYPE="Kraken_pgi" 14 | #SYSTYPE="aurora" 15 | #SYSTYPE="hecate" 16 | #SYSTYPE="Darwin" 17 | #SYSTYPE="Darwin-mpich" 18 | #SYSTYPE="MBM" 19 | #SYSTYPE="Magny" 20 | #SYSTYPE="Magny-Intel" 21 | #SYSTYPE="Nehalem" 22 | #SYSTYPE="OpenSuse" 23 | #SYSTYPE="OpenSuse64" 24 | #SYSTYPE="OpenSuse64-cuda" 25 | #SYSTYPE="Judge" 26 | #SYSTYPE="HLRB2" 27 | #SYSTYPE="OPA-Cluster64-Intel" 28 | #SYSTYPE="OPA-Cluster64-Gnu" 29 | #SYSTYPE="Odin" 30 | #SYSTYPE="OpteronMPA-Gnu" 31 | #SYSTYPE="OpteronMPA-Intel" 32 | #SYSTYPE="MPA" 33 | #SYSTYPE="VIP" 34 | #SYSTYPE="odyssey" 35 | #SYSTYPE="odyssey-intel" 36 | #SYSTYPE="odyssey-opteron" 37 | #SYSTYPE="Ubuntu" 38 | #SYSTYPE="Centos5-intel" 39 | #SYSTYPE="Centos5-Gnu" 40 | -------------------------------------------------------------------------------- /src/domain/domain_rearrange.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "../allvars.h" 10 | #include "../proto.h" 11 | #include "domain.h" 12 | 13 | 14 | void domain_rearrange_particle_sequence(void) 15 | { 16 | #ifdef USE_SFR 17 | if(Stars_converted) 18 | { 19 | struct particle_data psave; 20 | peanokey key; 21 | 22 | int i; 23 | for(i = 0; i < NumGas; i++) 24 | if((P[i].Type & 15) != 0) /*If not a gas particle, swap to the end of the list */ 25 | { 26 | psave = P[i]; 27 | key = Key[i]; 28 | 29 | P[i] = P[NumGas - 1]; 30 | SphP[i] = SphP[NumGas - 1]; 31 | Key[i] = Key[NumGas - 1]; 32 | 33 | P[NumGas - 1] = psave; 34 | Key[NumGas - 1] = key; 35 | 36 | NumGas--; 37 | i--; 38 | } 39 | /*Now we have rearranged the particles, 40 | *we don't need to do it again unless there are more stars*/ 41 | Stars_converted = 0; 42 | } 43 | #endif 44 | 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/domain/domain_counttogo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "../allvars.h" 10 | #include "../proto.h" 11 | #include "domain.h" 12 | 13 | 14 | 15 | /*! This function determines how many particles that are currently stored 16 | * on the local CPU have to be moved off according to the domain 17 | * decomposition. 18 | */ 19 | int domain_countToGo(void) 20 | { 21 | int n; 22 | 23 | for(n = 0; n < NTask; n++) 24 | { 25 | toGo[n] = 0; 26 | } 27 | 28 | 29 | for(n = 0; n < NumPart; n++) 30 | { 31 | int no = 0; 32 | 33 | while(topNodes[no].Daughter >= 0) 34 | no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size / 8); 35 | 36 | no = topNodes[no].Leaf; 37 | 38 | if(DomainTask[no] != ThisTask) 39 | { 40 | toGo[DomainTask[no]] += 1; 41 | } 42 | } 43 | 44 | MPI_Alltoall(toGo, 1, MPI_INT, toGet, 1, MPI_INT, MPI_COMM_WORLD); 45 | 46 | return 0; 47 | } 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /prepare-config.perl: -------------------------------------------------------------------------------- 1 | 2 | # This file processes the configurations options in Config.sh, producing 3 | # two files: 4 | # 5 | # galicconfig.h to be included in each source file (via allvars.h) 6 | # compile_time_info.c code to be compiled in, which will print the configuration 7 | # 8 | if( @ARGV != 2) 9 | { 10 | print "usage: perl prepare-config.perl \n"; 11 | exit; 12 | } 13 | 14 | open(FILE, @ARGV[0]); 15 | $path = @ARGV[1]; 16 | 17 | 18 | open(OUTFILE, ">${path}/galicconfig.h"); 19 | open(COUTF, ">${path}/compile_time_info.c"); 20 | 21 | print COUTF "#include \n"; 22 | print COUTF "void output_compile_time_options(void)\n\{\n"; 23 | print COUTF "printf(\n"; 24 | 25 | while($line=) 26 | { 27 | chop $line; 28 | 29 | @fields = split ' ' , $line; 30 | 31 | if(substr($fields[0], 0, 1) ne "#") 32 | { 33 | if(length($fields[0]) > 0) 34 | { 35 | @subfields = split '=', $fields[0]; 36 | 37 | print OUTFILE "#define $subfields[0] $subfields[1]\n"; 38 | print COUTF "\" $fields[0]\\n\"\n"; 39 | } 40 | } 41 | } 42 | 43 | print COUTF "\"\\n\");\n"; 44 | print COUTF "\}\n"; 45 | -------------------------------------------------------------------------------- /src/mpi_utils/hypercube_allgatherv.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | 11 | #ifdef MPI_HYPERCUBE_ALLGATHERV 12 | 13 | #define TAG 100 14 | 15 | int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount, 16 | MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm) 17 | { 18 | int ntask, thistask, ptask, ngrp, size_sendtype, size_recvtype; 19 | MPI_Status status; 20 | 21 | MPI_Comm_rank(comm, &thistask); 22 | MPI_Comm_size(comm, &ntask); 23 | 24 | MPI_Type_size(sendtype, &size_sendtype); 25 | MPI_Type_size(recvtype, &size_recvtype); 26 | 27 | for(ptask = 0; ntask > (1 << ptask); ptask++); 28 | 29 | for(ngrp = 1; ngrp < (1 << ptask); ngrp++) 30 | { 31 | int recvtask = thistask ^ ngrp; 32 | 33 | if(recvtask < ntask) 34 | MPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG, 35 | recvbuf + displs[recvtask] * size_recvtype, recvcount[recvtask], recvtype, recvtask, TAG, comm, &status); 36 | } 37 | 38 | if(sendbuf != recvbuf + displs[thistask] * size_recvtype) 39 | memcpy(recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype); 40 | 41 | return 0; 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /src/domain/domain_vars.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "../allvars.h" 10 | #include "../proto.h" 11 | #include "domain.h" 12 | 13 | struct domain_peano_hilbert_data *mp; 14 | 15 | struct local_topnode_data *topNodes, *branchNodes; /*!< points to the root node of the top-level tree */ 16 | 17 | 18 | double totpartcount; 19 | 20 | struct domain_cost_data *DomainLeaveNode; 21 | 22 | double fac_load; 23 | 24 | int Nbranch; 25 | 26 | /*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner' 27 | */ 28 | int *toGo; 29 | int *toGet; 30 | int *list_NumPart; 31 | int *list_load; 32 | 33 | 34 | 35 | 36 | void domain_allocate_lists(void) 37 | { 38 | Key = (peanokey *) mymalloc_movable(&Key, "domain_key", (sizeof(peanokey) * All.MaxPart)); 39 | toGo = (int *) mymalloc_movable(&toGo, "toGo", (sizeof(int) * NTask)); 40 | toGet = (int *) mymalloc_movable(&toGet, "toGet", (sizeof(int) * NTask)); 41 | list_NumPart = (int *) mymalloc_movable(&list_NumPart, "list_NumPart", (sizeof(int) * NTask)); 42 | list_load = (int *) mymalloc_movable(&list_load, "list_load", (sizeof(int) * NTask)); 43 | DomainLeaveNode = (struct domain_cost_data *) mymalloc_movable(&DomainLeaveNode, "DomainLeaveNode", (MaxTopNodes * sizeof(struct domain_cost_data))); 44 | } 45 | 46 | void domain_free_lists(void) 47 | { 48 | myfree(DomainLeaveNode); 49 | myfree(list_load); 50 | myfree(list_NumPart); 51 | myfree(toGet); 52 | myfree(toGo); 53 | } 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | GALIC v1.1 - A code for the creation of galaxy inititial conditions 2 | ------------------------------------------------------------------------ 3 | 4 | GALIC v1.1 is an updated version of GALIC code (http://www.h-its.org/tap/galic) 5 | which is implementation of a new iterative method to construct steady state 6 | composite halo-disk-bulge galaxy models with prescribed density distribution 7 | and velocity anisotropy. This update is mainly about the new constraints on the time averaged velocity structure in order to ensure its equality to the target distribution of velocity dispersions. 8 | 9 | The method and the original version of GALIC is described in full in the paper: 10 | Yurin D. & Springel, V. An iterative method for the construction of N-body galaxy models in collisionless equilibrium. MNRAS, 2014. (preprint: http://arxiv.org/abs/1402.1623). Users of the code are kindly asked to cite the paper if they make 11 | use of the code. 12 | 13 | The updated version is not fully tested and released "as is", without any guarantees 14 | or warrantees. To get support, please open a new issue. 15 | 16 | Copyright (c) 2014-2017 by Volker Springel and Denis Yurin 17 | 18 | Known Issues 19 | -------------------------------- 20 | #1. Calculation of target velocity dispersions is faulty beyond 8 Mpc for velocity structure of type 2, so don't use it for now for Halo and Bulge, instead if necessary mimic it with velocity structure of type 3 with dispersion R over Z ratio set to 1. 21 | 22 | #2. Putting the time-averaged velocity dispersions to the target ones is not fully successful in case of the disk. This means that one can still see the rapid change of the velocity structure in the disk which is not related to the disk relaxation. 23 | -------------------------------------------------------------------------------- /src/domain/domain_box.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "../allvars.h" 10 | #include "../proto.h" 11 | #include "domain.h" 12 | 13 | 14 | 15 | /*! This routine finds the extent of the global domain grid. 16 | 17 | If periodic is on, the minimum extent is the box size. Otherwise it 18 | looks at the maximum extent of the particles. 19 | */ 20 | void domain_findExtent(void) 21 | { 22 | int i, j; 23 | double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3]; 24 | 25 | /* determine local extension */ 26 | for(j = 0; j < 3; j++) 27 | { 28 | xmin[j] = MAX_REAL_NUMBER; 29 | xmax[j] = -MAX_REAL_NUMBER; 30 | } 31 | 32 | for(i = 0; i < NumPart; i++) 33 | { 34 | for(j = 0; j < 3; j++) 35 | { 36 | if(xmin[j] > P[i].Pos[j]) 37 | xmin[j] = P[i].Pos[j]; 38 | 39 | if(xmax[j] < P[i].Pos[j]) 40 | xmax[j] = P[i].Pos[j]; 41 | } 42 | } 43 | 44 | MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 45 | MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); 46 | 47 | 48 | len = 0; 49 | for(j = 0; j < 3; j++) 50 | if(xmax_glob[j] - xmin_glob[j] > len) 51 | len = xmax_glob[j] - xmin_glob[j]; 52 | 53 | len *= 1.00001; 54 | 55 | for(j = 0; j < 3; j++) 56 | { 57 | DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]); 58 | DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len; 59 | } 60 | 61 | DomainLen = len; 62 | DomainInverseLen = 1.0 / DomainLen; 63 | DomainFac = 1.0 / len * (((peanokey) 1) << (BITS_PER_DIMENSION)); 64 | DomainBigFac = (DomainLen / (((long long) 1) << 52)); 65 | } 66 | 67 | 68 | -------------------------------------------------------------------------------- /src/disk.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "allvars.h" 9 | #include "proto.h" 10 | 11 | 12 | 13 | 14 | 15 | /* this function returns a new random coordinate for the disk */ 16 | 17 | void disk_get_fresh_coordinate(double *pos) 18 | { 19 | double q, f, f_, R, R2, Rold, phi; 20 | 21 | do 22 | { 23 | q = gsl_rng_uniform(random_generator); 24 | 25 | pos[2] = All.Disk_Z0 / 2 * log(q / (1 - q)); 26 | 27 | q = gsl_rng_uniform(random_generator); 28 | 29 | R = 1.0; 30 | do 31 | { 32 | f = (1 + R) * exp(-R) + q - 1; 33 | f_ = -R * exp(-R); 34 | 35 | Rold = R; 36 | R = R - f / f_; 37 | } 38 | while(fabs(R - Rold) / R > 1e-7); 39 | 40 | R *= All.Disk_H; 41 | 42 | phi = gsl_rng_uniform(random_generator) * M_PI * 2; 43 | 44 | pos[0] = R * cos(phi); 45 | pos[1] = R * sin(phi); 46 | 47 | R2 = pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]; 48 | } 49 | while(R2 > All.Rmax * All.Rmax); 50 | 51 | } 52 | 53 | 54 | double disk_get_density(double *pos) 55 | { 56 | if(All.Disk_Mass > 0) 57 | { 58 | double R = sqrt(pos[0] * pos[0] + pos[1] * pos[1]); 59 | double z = pos[2]; 60 | 61 | double rho = All.Disk_Mass / (4 * M_PI * All.Disk_H * All.Disk_H * All.Disk_Z0) * 62 | exp(-R / All.Disk_H) * pow(2 / (exp(z / All.Disk_Z0) + exp(-z / All.Disk_Z0)), 2); 63 | 64 | if ( fabs(rho) < MIN_DENSITY) rho = 0; 65 | 66 | return rho; 67 | } 68 | else 69 | return 0; 70 | } 71 | 72 | 73 | 74 | double disk_get_mass_inside_radius(double R) 75 | { 76 | return All.Disk_Mass * (1 - (1 + R / All.Disk_H) * exp(-R / All.Disk_H)); 77 | } 78 | -------------------------------------------------------------------------------- /src/mpi_utils/sizelimited_sendrecv.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | 11 | #ifdef MPISENDRECV_SIZELIMIT 12 | 13 | 14 | #undef MPI_Sendrecv 15 | 16 | 17 | int MPI_Sizelimited_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, 18 | int dest, int sendtag, void *recvbuf, int recvcount, 19 | MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status * status) 20 | { 21 | int iter = 0, size_sendtype, size_recvtype, send_now, recv_now; 22 | int count_limit; 23 | 24 | 25 | if(dest != source) 26 | terminate("dest != source"); 27 | 28 | MPI_Type_size(sendtype, &size_sendtype); 29 | MPI_Type_size(recvtype, &size_recvtype); 30 | 31 | if(dest == ThisTask) 32 | { 33 | memcpy(recvbuf, sendbuf, recvcount * size_recvtype); 34 | return 0; 35 | } 36 | 37 | count_limit = (int) ((((long long) MPISENDRECV_SIZELIMIT) * 1024 * 1024) / size_sendtype); 38 | 39 | while(sendcount > 0 || recvcount > 0) 40 | { 41 | if(sendcount > count_limit) 42 | { 43 | send_now = count_limit; 44 | if(iter == 0) 45 | { 46 | printf("imposing size limit on MPI_Sendrecv() on task=%d (send of size=%d)\n", ThisTask, sendcount * size_sendtype); 47 | myflush(stdout); 48 | } 49 | iter++; 50 | } 51 | else 52 | send_now = sendcount; 53 | 54 | if(recvcount > count_limit) 55 | recv_now = count_limit; 56 | else 57 | recv_now = recvcount; 58 | 59 | MPI_Sendrecv(sendbuf, send_now, sendtype, dest, sendtag, recvbuf, recv_now, recvtype, source, recvtag, comm, status); 60 | 61 | sendcount -= send_now; 62 | recvcount -= recv_now; 63 | 64 | sendbuf += send_now * size_sendtype; 65 | recvbuf += recv_now * size_recvtype; 66 | } 67 | 68 | return 0; 69 | } 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /src/domain/domain_sort_kernels.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "../allvars.h" 10 | #include "../proto.h" 11 | #include "domain.h" 12 | 13 | 14 | int domain_compare_count(const void *a, const void *b) 15 | { 16 | if(((struct domain_count_data *) a)->count > (((struct domain_count_data *) b)->count)) 17 | return -1; 18 | 19 | if(((struct domain_count_data *) a)->count < (((struct domain_count_data *) b)->count)) 20 | return +1; 21 | 22 | return 0; 23 | } 24 | 25 | int domain_compare_key(const void *a, const void *b) 26 | { 27 | if(((struct domain_peano_hilbert_data *) a)->key < (((struct domain_peano_hilbert_data *) b)->key)) 28 | return -1; 29 | 30 | if(((struct domain_peano_hilbert_data *) a)->key > (((struct domain_peano_hilbert_data *) b)->key)) 31 | return +1; 32 | 33 | return 0; 34 | } 35 | 36 | 37 | static void msort_domain_with_tmp(struct domain_peano_hilbert_data *b, size_t n, struct domain_peano_hilbert_data *t) 38 | { 39 | struct domain_peano_hilbert_data *tmp; 40 | struct domain_peano_hilbert_data *b1, *b2; 41 | size_t n1, n2; 42 | 43 | if(n <= 1) 44 | return; 45 | 46 | n1 = n / 2; 47 | n2 = n - n1; 48 | b1 = b; 49 | b2 = b + n1; 50 | 51 | msort_domain_with_tmp(b1, n1, t); 52 | msort_domain_with_tmp(b2, n2, t); 53 | 54 | tmp = t; 55 | 56 | while(n1 > 0 && n2 > 0) 57 | { 58 | if(b1->key <= b2->key) 59 | { 60 | --n1; 61 | *tmp++ = *b1++; 62 | } 63 | else 64 | { 65 | --n2; 66 | *tmp++ = *b2++; 67 | } 68 | } 69 | 70 | if(n1 > 0) 71 | memcpy(tmp, b1, n1 * sizeof(struct domain_peano_hilbert_data)); 72 | 73 | memcpy(b, t, (n - n2) * sizeof(struct domain_peano_hilbert_data)); 74 | } 75 | 76 | void mysort_domain(void *b, size_t n, size_t s) 77 | { 78 | /* this function tends to work slightly faster than a call of qsort() for this particular 79 | * list, at least on most platforms 80 | */ 81 | 82 | const size_t size = n * s; 83 | struct domain_peano_hilbert_data *tmp; 84 | 85 | tmp = (struct domain_peano_hilbert_data *) mymalloc("tmp", size); 86 | 87 | msort_domain_with_tmp((struct domain_peano_hilbert_data *) b, n, tmp); 88 | 89 | myfree(tmp); 90 | } 91 | -------------------------------------------------------------------------------- /src/bulge.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "allvars.h" 9 | #include "proto.h" 10 | 11 | 12 | 13 | 14 | /* this function returns a new random coordinate for the bulge */ 15 | void bulge_get_fresh_coordinate(double *pos) 16 | { 17 | double r; 18 | 19 | do 20 | { 21 | double q = gsl_rng_uniform(random_generator); 22 | 23 | if(q > 0) 24 | r = All.Bulge_A * (q + sqrt(q)) / (1 - q); 25 | else 26 | r = 0; 27 | } 28 | while(r > All.Rmax); 29 | 30 | double phi = gsl_rng_uniform(random_generator) * M_PI * 2; 31 | double theta = acos(gsl_rng_uniform(random_generator) * 2 - 1); 32 | 33 | pos[0] = r * sin(theta) * cos(phi); 34 | pos[1] = r * sin(theta) * sin(phi); 35 | pos[2] = r * cos(theta) / All.BulgeStretch; 36 | } 37 | 38 | 39 | double bulge_get_density(double *pos) 40 | { 41 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 42 | 43 | double rho = All.BulgeStretch * All.Bulge_Mass / (2 * M_PI) * All.Bulge_Mass / (r + 1.0e-6 * All.Bulge_A) / pow(r + All.Bulge_A, 3); 44 | 45 | if ( fabs(rho) < MIN_DENSITY) rho = 0; 46 | 47 | return rho; 48 | } 49 | 50 | 51 | /* Note that the other functions below will only be called in a meaningfull for a spherical system */ 52 | 53 | 54 | double bulge_get_mass_inside_radius(double r) 55 | { 56 | if(All.Bulge_Mass > 0) 57 | return All.Bulge_Mass * pow(r / (r + All.Bulge_A), 2); 58 | else 59 | return 0; 60 | } 61 | 62 | 63 | 64 | double bulge_get_potential(double *pos) 65 | { 66 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 67 | return bulge_get_potential_from_radius(r); 68 | } 69 | 70 | double bulge_get_potential_from_radius(double r) 71 | { 72 | double phi = -All.G * All.Bulge_Mass / (r + All.Bulge_A); 73 | return phi; 74 | } 75 | 76 | /* returns the acceleration at coordinate pos[] */ 77 | void bulge_get_acceleration(double *pos, double *acc) 78 | { 79 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 80 | double fac = All.G * All.Bulge_Mass / ((r + 1.0e-6 * All.Bulge_A)* (r + All.Bulge_A) * (r + All.Bulge_A)); 81 | 82 | acc[0] = -fac * pos[0]; 83 | acc[1] = -fac * pos[1]; 84 | acc[2] = -fac * pos[2]; 85 | } 86 | 87 | double bulge_get_escape_speed(double *pos) 88 | { 89 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 90 | double phi = -All.G * All.Bulge_Mass / (r + All.Bulge_A); 91 | double vesc = sqrt(-2.0 * phi); 92 | 93 | return vesc; 94 | } 95 | -------------------------------------------------------------------------------- /src/forcetree/forcetree.h: -------------------------------------------------------------------------------- 1 | #ifndef FORCETREE_H 2 | #define FORCETREE_H 3 | 4 | #ifndef INLINE_FUNC 5 | #ifdef INLINE 6 | #define INLINE_FUNC inline 7 | #else 8 | #define INLINE_FUNC 9 | #endif 10 | #endif 11 | 12 | 13 | /*! length of lock-up table for short-range force kernel in TreePM algorithm */ 14 | #define NTAB 1000 15 | 16 | #define MAX_TREE_LEVEL 30 17 | #define MAX_TREE_ALLOC_FACTOR 30.0 18 | #define MAX_IMPACT_BEFORE_OPTIMIZATION 1.03 19 | 20 | 21 | #define BITFLAG_TOPLEVEL 0 22 | #define BITFLAG_DEPENDS_ON_LOCAL_MASS 1 23 | #define BITFLAG_DEPENDS_ON_EXTERN_MASS 2 24 | #define BITFLAG_INTERNAL_TOPLEVEL 6 25 | #define BITFLAG_MULTIPLEPARTICLES 7 26 | #define BITFLAG_NODEHASBEENKICKED 8 27 | #define BITFLAG_CONTAINS_GAS 10 28 | 29 | 30 | #define BITFLAG_MASK ((1<< BITFLAG_CONTAINS_GAS) + (1 << BITFLAG_MULTIPLEPARTICLES)) 31 | 32 | 33 | static inline unsigned long long force_double_to_int(double d) 34 | { 35 | union { double d; unsigned long long ull; } u; 36 | u.d=d; 37 | return (u.ull&0xFFFFFFFFFFFFFllu); 38 | } 39 | 40 | static inline double force_int_to_double(unsigned long long x) 41 | { 42 | union { double d; unsigned long long ull; } u; 43 | u.d = 1.0; 44 | u.ull |= x; 45 | return u.d; 46 | } 47 | 48 | int force_treebuild(int npart, int optimized_domain_mapping); 49 | int force_treebuild_construct(int npart, int optimized_domain_mapping); 50 | int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char level); 51 | int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z); 52 | void force_insert_pseudo_particles(void); 53 | #ifndef GPU_TREE 54 | void force_update_node_recursive(int no, int sib, int father, int *last); 55 | #else 56 | int force_update_node_recursive(int no, int sib, int father, int *last, int depth); 57 | #endif 58 | void force_exchange_topleafdata(void); 59 | void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z); 60 | void force_treeallocate(int maxpart, int maxindex); 61 | void force_treefree(void); 62 | void dump_particles(void); 63 | int force_add_empty_nodes(void); 64 | void force_short_range_init(void); 65 | int force_treeevaluate(int target, int mode, int thread_id); 66 | int force_treeevaluate_shortrange(int target, int mode, int thread_id, int measure_cost_flag); 67 | int force_treeevaluate_ewald_correction(int i, int mode, int thread_id); 68 | int force_treeevaluate_direct(int target, int mode); 69 | void force_assign_cost_values(void); 70 | void force_update_node_recursive_sse(int no, int sib, int father, int *last); 71 | void force_optimize_domain_mapping(void); 72 | double force_get_current_balance(double *impact); 73 | void force_get_global_cost_for_leavenodes(int nexport); 74 | 75 | 76 | 77 | #endif 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "allvars.h" 8 | #include "proto.h" 9 | 10 | 11 | void init(void) 12 | { 13 | if(ThisTask == 0) 14 | { 15 | char buf[2000]; 16 | sprintf(buf, "%s/memory.txt", All.OutputDir); 17 | if(!(FdMemory = fopen(buf, "w"))) 18 | terminate("can't open file '%s'", buf); 19 | } 20 | 21 | mymalloc_init(); 22 | 23 | set_units(); 24 | 25 | random_generator = gsl_rng_alloc(gsl_rng_ranlxd1); 26 | 27 | gsl_rng_set(random_generator, 42 + ThisTask); /* start-up seed */ 28 | 29 | set_softenings(); 30 | 31 | All.TopNodeAllocFactor = 0.1; 32 | All.TreeAllocFactor = 0.8; 33 | 34 | 35 | #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS 36 | enable_core_dumps_and_fpu_exceptions(); 37 | #endif 38 | } 39 | 40 | 41 | /*! \brief Computes conversion factors between internal code units and the 42 | * cgs-system. 43 | * 44 | * In addition constants like the gravitation constant are set. 45 | */ 46 | void set_units(void) 47 | { 48 | All.UnitTime_in_s = All.UnitLength_in_cm / All.UnitVelocity_in_cm_per_s; 49 | All.UnitTime_in_Megayears = All.UnitTime_in_s / SEC_PER_MEGAYEAR; 50 | 51 | if(All.GravityConstantInternal == 0) 52 | All.G = GRAVITY / pow(All.UnitLength_in_cm, 3) * All.UnitMass_in_g * pow(All.UnitTime_in_s, 2); 53 | else 54 | All.G = All.GravityConstantInternal; 55 | 56 | All.UnitDensity_in_cgs = All.UnitMass_in_g / pow(All.UnitLength_in_cm, 3); 57 | All.UnitPressure_in_cgs = All.UnitMass_in_g / All.UnitLength_in_cm / pow(All.UnitTime_in_s, 2); 58 | All.UnitCoolingRate_in_cgs = All.UnitPressure_in_cgs / All.UnitTime_in_s; 59 | All.UnitEnergy_in_cgs = All.UnitMass_in_g * pow(All.UnitLength_in_cm, 2) / pow(All.UnitTime_in_s, 2); 60 | 61 | /* convert some physical input parameters to internal units */ 62 | 63 | All.Hubble = HUBBLE * All.UnitTime_in_s; 64 | 65 | if(ThisTask == 0) 66 | { 67 | printf("\nHubble (internal units) = %g\n", All.Hubble); 68 | printf("G (internal units) = %g\n", All.G); 69 | printf("UnitMass_in_g = %g\n", All.UnitMass_in_g); 70 | printf("UnitTime_in_s = %g\n", All.UnitTime_in_s); 71 | printf("UnitVelocity_in_cm_per_s = %g\n", All.UnitVelocity_in_cm_per_s); 72 | printf("UnitDensity_in_cgs = %g\n", All.UnitDensity_in_cgs); 73 | printf("UnitEnergy_in_cgs = %g\n", All.UnitEnergy_in_cgs); 74 | printf("\n"); 75 | } 76 | } 77 | 78 | void set_softenings(void) 79 | { 80 | int i; 81 | 82 | for(i = 0; i < 6; i++) 83 | All.ForceSoftening = 2.8 * All.Softening; 84 | } 85 | 86 | 87 | void endrun(void) 88 | { 89 | mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n"); 90 | fflush(stdout); 91 | 92 | MPI_Finalize(); 93 | exit(0); 94 | } 95 | -------------------------------------------------------------------------------- /src/allocate.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "allvars.h" 8 | #include "proto.h" 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | /* This routine allocates memory for 17 | * particle storage, both the collisionless and the SPH particles. 18 | * The memory for the ordered binary tree of the timeline 19 | * is also allocated. 20 | */ 21 | void allocate_memory(void) 22 | { 23 | int NTaskTimesThreads; 24 | 25 | NTaskTimesThreads = MaxThreads * NTask; 26 | 27 | Exportflag = (int *) mymalloc("Exportflag", NTaskTimesThreads * sizeof(int)); 28 | Exportindex = (int *) mymalloc("Exportindex", NTaskTimesThreads * sizeof(int)); 29 | Exportnodecount = (int *) mymalloc("Exportnodecount", NTaskTimesThreads * sizeof(int)); 30 | 31 | Send_count = (int *) mymalloc("Send_count", sizeof(int) * NTaskTimesThreads); 32 | Send_offset = (int *) mymalloc("Send_offset", sizeof(int) * NTaskTimesThreads); 33 | Recv_count = (int *) mymalloc("Recv_count", sizeof(int) * NTask); 34 | Recv_offset = (int *) mymalloc("Recv_offset", sizeof(int) * NTask); 35 | 36 | Send_count_nodes = (int *) mymalloc("Send_count_nodes", sizeof(int) * NTask); 37 | Send_offset_nodes = (int *) mymalloc("Send_offset_nodes", sizeof(int) * NTask); 38 | Recv_count_nodes = (int *) mymalloc("Recv_count_nodes", sizeof(int) * NTask); 39 | Recv_offset_nodes = (int *) mymalloc("Recv_offset_nodes", sizeof(int) * NTask); 40 | 41 | Mesh_Send_count = (int *) mymalloc("Mesh_Send_count", sizeof(int) * NTask); 42 | Mesh_Send_offset = (int *) mymalloc("Mesh_Send_offset", sizeof(int) * NTask); 43 | Mesh_Recv_count = (int *) mymalloc("Mesh_Recv_count", sizeof(int) * NTask); 44 | Mesh_Recv_offset = (int *) mymalloc("Mesh_Recv_offset", sizeof(int) * NTask); 45 | 46 | P = (struct particle_data *) mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data)); 47 | 48 | ActiveGravityParticles = (int *) mymalloc_movable(&ActiveGravityParticles, "ActiveGravityParticle", All.MaxPart * sizeof(int)); 49 | 50 | /* set to zero */ 51 | memset(P, 0, All.MaxPart * sizeof(struct particle_data)); 52 | } 53 | 54 | void free_allocated_memory(void) 55 | { 56 | myfree(ActiveGravityParticles); 57 | myfree(P); 58 | 59 | myfree(Mesh_Recv_offset); 60 | myfree(Mesh_Recv_count); 61 | myfree(Mesh_Send_offset); 62 | myfree(Mesh_Send_count); 63 | 64 | myfree(Recv_offset_nodes); 65 | myfree(Recv_count_nodes); 66 | myfree(Send_offset_nodes); 67 | myfree(Send_count_nodes); 68 | 69 | myfree(Recv_offset); 70 | myfree(Recv_count); 71 | myfree(Send_offset); 72 | myfree(Send_count); 73 | 74 | myfree(Exportnodecount); 75 | myfree(Exportindex); 76 | myfree(Exportflag); 77 | } 78 | 79 | 80 | void reallocate_memory_maxpart(void) 81 | { 82 | mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart); 83 | 84 | P = (struct particle_data *) myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data)); 85 | ActiveGravityParticles = (int *) myrealloc_movable(ActiveGravityParticles, All.MaxPart * sizeof(int)); 86 | 87 | } 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /Makefile.lib: -------------------------------------------------------------------------------- 1 | 2 | LIB_DIR=libs 3 | 4 | FFTW_VERSION=2.1.5 5 | GSL_VERSION=1.15 6 | GMP_VERSION=5.0.5 7 | HDF5_VERSION=1.8.11 8 | HWLOC_VERSION=1.4.2 9 | 10 | build_libs: gmp gsl fftw-single fftw-double hdf5 hwloc 11 | 12 | 13 | 14 | SHELL=/bin/bash 15 | 16 | fftw-single: $(LIB_DIR)/include/sfftw.h 17 | 18 | fftw-double: $(LIB_DIR)/include/dfftw.h 19 | 20 | $(LIB_DIR): 21 | mkdir $(LIB_DIR) 22 | 23 | $(LIB_DIR)/fftw-$(FFTW_VERSION).tar.gz: | $(LIB_DIR) 24 | cd $(LIB_DIR); wget http://www.fftw.org/fftw-$(FFTW_VERSION).tar.gz 25 | 26 | $(LIB_DIR)/fftw-$(FFTW_VERSION)/: $(LIB_DIR)/fftw-$(FFTW_VERSION).tar.gz 27 | cd $(LIB_DIR); tar -xf fftw-$(FFTW_VERSION).tar.gz 28 | 29 | $(LIB_DIR)/include/sfftw.h: | $(LIB_DIR)/fftw-$(FFTW_VERSION)/ 30 | cd $(LIB_DIR)/fftw-$(FFTW_VERSION); export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/ --enable-mpi --enable-float --enable-type-prefix --enable-shared; make; make install 31 | 32 | $(LIB_DIR)/include/dfftw.h: | $(LIB_DIR)/fftw-$(FFTW_VERSION)/ 33 | cd $(LIB_DIR)/fftw-$(FFTW_VERSION); export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/ --enable-mpi --enable-type-prefix --enable-shared; make; make install 34 | 35 | gsl: $(LIB_DIR)/include/gsl/ 36 | 37 | $(LIB_DIR)/gsl-$(GSL_VERSION).tar.gz: | $(LIB_DIR) 38 | cd $(LIB_DIR); wget http://ftpmirror.gnu.org/gsl/gsl-$(GSL_VERSION).tar.gz 39 | 40 | $(LIB_DIR)/gsl-$(GSL_VERSION)/: $(LIB_DIR)/gsl-$(GSL_VERSION).tar.gz 41 | cd $(LIB_DIR); tar -xf gsl-$(GSL_VERSION).tar.gz 42 | 43 | $(LIB_DIR)/include/gsl/: | $(LIB_DIR)/gsl-$(GSL_VERSION)/ 44 | cd $(LIB_DIR)/gsl-$(GSL_VERSION);export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/; make; make install 45 | 46 | 47 | hwloc: $(LIB_DIR)/include/hwloc.h 48 | 49 | $(LIB_DIR)/hwloc-$(HWLOC_VERSION).tar.gz: | $(LIB_DIR) 50 | cd $(LIB_DIR); wget http://www.open-mpi.de/software/hwloc/v1.4/downloads/hwloc-$(HWLOC_VERSION).tar.gz 51 | 52 | $(LIB_DIR)/hwloc-$(HWLOC_VERSION)/: $(LIB_DIR)/hwloc-$(HWLOC_VERSION).tar.gz 53 | cd $(LIB_DIR); tar -xf hwloc-$(HWLOC_VERSION).tar.gz 54 | 55 | $(LIB_DIR)/include/hwloc.h: | $(LIB_DIR)/hwloc-$(HWLOC_VERSION)/ 56 | cd $(LIB_DIR)/hwloc-$(HWLOC_VERSION); ./configure --prefix=$(CURDIR)/$(LIB_DIR)/; make; make install 57 | 58 | gmp: $(LIB_DIR)/include/gmp.h 59 | 60 | $(LIB_DIR)/gmp-$(GMP_VERSION).tar.bz2: | $(LIB_DIR) 61 | cd $(LIB_DIR); wget http://ftpmirror.gnu.org/gmp/gmp-$(GMP_VERSION).tar.bz2 62 | 63 | $(LIB_DIR)/gmp-$(GMP_VERSION)/: $(LIB_DIR)/gmp-$(GMP_VERSION).tar.bz2 64 | cd $(LIB_DIR); tar -xf gmp-$(GMP_VERSION).tar.bz2 65 | 66 | $(LIB_DIR)/include/gmp.h: | $(LIB_DIR)/gmp-$(GMP_VERSION)/ 67 | cd $(LIB_DIR)/gmp-$(GMP_VERSION); ./configure --prefix=$(CURDIR)/$(LIB_DIR)/; make; make install 68 | 69 | 70 | hdf5: $(LIB_DIR)/include/hdf5.h 71 | 72 | $(LIB_DIR)/hdf5-$(HDF5_VERSION).tar.gz: | $(LIB_DIR) 73 | cd $(LIB_DIR); wget ftp://ftp.hdfgroup.org/HDF5/current/src/hdf5-$(HDF5_VERSION).tar.gz 74 | 75 | $(LIB_DIR)/hdf5-$(HDF5_VERSION): $(LIB_DIR)/hdf5-$(HDF5_VERSION).tar.gz 76 | cd $(LIB_DIR); tar -xf hdf5-$(HDF5_VERSION).tar.gz 77 | 78 | $(LIB_DIR)/include/hdf5.h: | $(LIB_DIR)/hdf5-$(HDF5_VERSION) 79 | /bin/bash -c 'cd $(LIB_DIR)/hdf5-$(HDF5_VERSION); export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/ --enable-parallel; make; make install' 80 | 81 | 82 | 83 | 84 | .PHONY= gmp gsl fftw-single fftw-double hdf5 hwloc 85 | -------------------------------------------------------------------------------- /Makefile.template: -------------------------------------------------------------------------------- 1 | EXEC = GalIC 2 | CONFIG = Config.sh 3 | BUILD_DIR = build 4 | SRC_DIR = src 5 | 6 | #PARAMFILE = Model_D3.param 7 | #N := 16 8 | 9 | 10 | ifdef SYSTYPE 11 | SYSTYPE := "$(SYSTYPE)" 12 | -include Makefile.systype 13 | else 14 | include Makefile.systype 15 | endif 16 | 17 | MAKEFILES = Makefile config-makefile 18 | ifeq ($(wildcard Makefile.systype), Makefile.systype) 19 | MAKEFILES += Makefile.systype 20 | endif 21 | 22 | 23 | 24 | PERL = /usr/bin/perl 25 | RESULT := $(shell CONFIG=$(CONFIG) PERL=$(PERL) BUILD_DIR=$(BUILD_DIR) make -f config-makefile) 26 | CONFIGVARS := $(shell cat $(BUILD_DIR)/galicconfig.h) 27 | 28 | 29 | 30 | #MPICHLIB = -lmpich 31 | GMPLIB = -lgmp 32 | GSLLIB = -lgsl -lgslcblas 33 | MATHLIB = -lm 34 | 35 | 36 | 37 | ############################### 38 | # Determine your SYSTEM here # 39 | ############################### 40 | 41 | ifeq ($(SYSTYPE),"APHI") 42 | CC = mpicc 43 | CXX = mpicxx 44 | #OPTIMIZE = -g -w -m64 -O3 -msse3 45 | OPTIMIZE = -g -w -m64 -O3 -march=native 46 | ifeq (NUM_THREADS,$(findstring NUM_THREADS,$(CONFIGVARS))) 47 | OPTIMIZE += -fopenmp 48 | else 49 | OPTIMIZE += -Wno-unknown-pragmas 50 | endif 51 | GSL_INCL = 52 | GSL_LIBS = 53 | FFTW_INCL= 54 | FFTW_LIBS= 55 | GMP_INCL = 56 | GMP_LIBS = 57 | MPICHLIB = 58 | HDF5INCL = 59 | HDF5LIB = 60 | #OPT += -DNOCALLSOFSYSTEM 61 | #OPT += -DIMPOSE_PINNING 62 | #OPT += -DUSE_SSE 63 | endif 64 | 65 | 66 | ifndef LINKER 67 | LINKER = $(CC) 68 | endif 69 | 70 | 71 | 72 | ########################################## 73 | #determine the needed object/header files# 74 | ########################################## 75 | 76 | SUBDIRS = . 77 | 78 | OBJS = main.o allocate.o allvars.o disk.o grid.o bulge.o set_particles.o parallel_sort.o \ 79 | halo.o init.o io.o mymalloc.o orbit_response.o parameters.o structure.o system.o disp_fields.o \ 80 | forcetree/gravtree.o forcetree/forcetree.o forcetree/forcetree_walk.o domain/peano.o domain/pqueue.o \ 81 | domain/domain.o domain/domain_balance.o domain/domain_counttogo.o domain/domain_exchange.o \ 82 | domain/domain_rearrange.o domain/domain_sort_kernels.o domain/domain_toplevel.o domain/domain_vars.o domain/domain_box.o 83 | 84 | 85 | INCL += allvars.h proto.h 86 | 87 | SUBDIRS += forcetree domain 88 | 89 | ################################ 90 | #determine the needed libraries# 91 | ################################ 92 | 93 | 94 | ifneq (HAVE_HDF5,$(findstring HAVE_HDF5,$(CONFIGVARS))) 95 | HDF5LIB = 96 | endif 97 | 98 | ifeq (NUM_THREADS,$(findstring NUM_THREADS,$(CONFIGVARS))) 99 | THREAD_LIB = 100 | endif 101 | 102 | 103 | ########################## 104 | #combine compiler options# 105 | ########################## 106 | 107 | CFLAGS = $(OPTIMIZE) $(OPT) $(HDF5INCL) $(GSL_INCL) $(FFTW_INCL) $(ODE_INCL) $(GMP_INCL) $(MKL_INCL) $(CUDA_INCL) -I$(BUILD_DIR) 108 | 109 | LIBS = $(MATHLIB) $(HDF5LIB) $(MPICHLIB) $(GSL_LIBS) $(GSLLIB) $(FFTW_LIB) $(GMP_LIBS) $(GMPLIB) $(ODE_LIB) $(MKL_LIBS) $(THREAD_LIB) $(CUDA_LIBS) 110 | 111 | 112 | SUBDIRS := $(addprefix $(BUILD_DIR)/,$(SUBDIRS)) 113 | OBJS := $(addprefix $(BUILD_DIR)/,$(OBJS)) $(BUILD_DIR)/compile_time_info.o 114 | INCL := $(addprefix $(SRC_DIR)/,$(INCL)) $(BUILD_DIR)/galicconfig.h 115 | 116 | 117 | ################ 118 | #create subdirs# 119 | ################ 120 | RESULT := $(shell mkdir -p $(SUBDIRS) ) 121 | 122 | 123 | 124 | ############# 125 | #build rules# 126 | ############# 127 | 128 | all: $(EXEC) 129 | 130 | $(EXEC): $(OBJS) 131 | $(LINKER) $(OPTIMIZE) $(OBJS) $(LIBS) -o $(EXEC) 132 | # mpirun -n $(N) -f hostfile ./$(EXEC) $(PARAMFILE) 133 | 134 | #bg: $(OBJS) 135 | # $(LINKER) $(OPTIMIZE) $(OBJS) $(LIBS) -o $(EXEC) 136 | # mpirun -n $(N) -f hostfile ./$(EXEC) $(PARAMFILE) 1> log.out.txt 2> log.err.txt & 137 | 138 | clean: 139 | rm -f $(OBJS) $(EXEC) lib$(LIBRARY).a 140 | rm -f $(BUILD_DIR)/compile_time_info.c $(BUILD_DIR)/galicconfig.h 141 | 142 | $(BUILD_DIR)/%.o: $(SRC_DIR)/%.c $(INCL) $(MAKEFILES) 143 | $(CC) $(CFLAGS) -c $< -o $@ 144 | 145 | $(BUILD_DIR)/compile_time_info.o: $(BUILD_DIR)/compile_time_info.c $(MAKEFILES) 146 | $(CC) $(CFLAGS) -c $< -o $@ 147 | 148 | -------------------------------------------------------------------------------- /src/domain/pqueue.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Volkan Yazıcı 3 | * Copyright 2006-2010 The Apache Software Foundation 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 6 | * use this file except in compliance with the License. You may obtain a copy of 7 | * the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 | * License for the specific language governing permissions and limitations under 15 | * the License. 16 | */ 17 | 18 | 19 | /** 20 | * @file pqueue.h 21 | * @brief Priority Queue function declarations 22 | * 23 | * @{ 24 | */ 25 | 26 | 27 | #ifndef PQUEUE_H 28 | #define PQUEUE_H 29 | 30 | /** priority data type */ 31 | typedef double pqueue_pri_t; 32 | 33 | /** callback functions to get/set/compare the priority of an element */ 34 | typedef pqueue_pri_t (*pqueue_get_pri_f)(void *a); 35 | typedef void (*pqueue_set_pri_f)(void *a, pqueue_pri_t pri); 36 | typedef int (*pqueue_cmp_pri_f)(pqueue_pri_t next, pqueue_pri_t curr); 37 | 38 | 39 | /** callback functions to get/set the position of an element */ 40 | typedef size_t (*pqueue_get_pos_f)(void *a); 41 | typedef void (*pqueue_set_pos_f)(void *a, size_t pos); 42 | 43 | 44 | /** debug callback function to print a entry */ 45 | typedef void (*pqueue_print_entry_f)(FILE *out, void *a); 46 | 47 | 48 | /** the priority queue handle */ 49 | typedef struct pqueue_t 50 | { 51 | size_t size; 52 | size_t avail; 53 | size_t step; 54 | pqueue_cmp_pri_f cmppri; 55 | pqueue_get_pri_f getpri; 56 | pqueue_set_pri_f setpri; 57 | pqueue_get_pos_f getpos; 58 | pqueue_set_pos_f setpos; 59 | void **d; 60 | } pqueue_t; 61 | 62 | 63 | /** 64 | * initialize the queue 65 | * 66 | * @param n the initial estimate of the number of queue items for which memory 67 | * should be preallocated 68 | * @param pri the callback function to run to assign a score to a element 69 | * @param get the callback function to get the current element's position 70 | * @param set the callback function to set the current element's position 71 | * 72 | * @Return the handle or NULL for insufficent memory 73 | */ 74 | pqueue_t * 75 | pqueue_init(size_t n, 76 | pqueue_cmp_pri_f cmppri, 77 | pqueue_get_pri_f getpri, 78 | pqueue_set_pri_f setpri, 79 | pqueue_get_pos_f getpos, 80 | pqueue_set_pos_f setpos); 81 | 82 | 83 | /** 84 | * free all memory used by the queue 85 | * @param q the queue 86 | */ 87 | void pqueue_free(pqueue_t *q); 88 | 89 | 90 | /** 91 | * return the size of the queue. 92 | * @param q the queue 93 | */ 94 | size_t pqueue_size(pqueue_t *q); 95 | 96 | 97 | /** 98 | * insert an item into the queue. 99 | * @param q the queue 100 | * @param d the item 101 | * @return 0 on success 102 | */ 103 | int pqueue_insert(pqueue_t *q, void *d); 104 | 105 | 106 | /** 107 | * move an existing entry to a different priority 108 | * @param q the queue 109 | * @param old the old priority 110 | * @param d the entry 111 | */ 112 | void 113 | pqueue_change_priority(pqueue_t *q, 114 | pqueue_pri_t new_pri, 115 | void *d); 116 | 117 | 118 | /** 119 | * pop the highest-ranking item from the queue. 120 | * @param p the queue 121 | * @param d where to copy the entry to 122 | * @return NULL on error, otherwise the entry 123 | */ 124 | void *pqueue_pop(pqueue_t *q); 125 | 126 | 127 | /** 128 | * remove an item from the queue. 129 | * @param p the queue 130 | * @param d the entry 131 | * @return 0 on success 132 | */ 133 | int pqueue_remove(pqueue_t *q, void *d); 134 | 135 | 136 | /** 137 | * access highest-ranking item without removing it. 138 | * @param q the queue 139 | * @param d the entry 140 | * @return NULL on error, otherwise the entry 141 | */ 142 | void *pqueue_peek(pqueue_t *q); 143 | 144 | 145 | /** 146 | * print the queue 147 | * @internal 148 | * DEBUG function only 149 | * @param q the queue 150 | * @param out the output handle 151 | * @param the callback function to print the entry 152 | */ 153 | void 154 | pqueue_print(pqueue_t *q, 155 | FILE *out, 156 | pqueue_print_entry_f print); 157 | 158 | 159 | /** 160 | * dump the queue and it's internal structure 161 | * @internal 162 | * debug function only 163 | * @param q the queue 164 | * @param out the output handle 165 | * @param the callback function to print the entry 166 | */ 167 | void 168 | pqueueu_dump(pqueue_t *q, 169 | FILE *out, 170 | pqueue_print_entry_f print); 171 | 172 | 173 | /** 174 | * checks that the pq is in the right order, etc 175 | * @internal 176 | * debug function only 177 | * @param q the queue 178 | */ 179 | int pqueue_is_valid(pqueue_t *q); 180 | 181 | 182 | #endif /* PQUEUE_H */ 183 | /** @} */ 184 | -------------------------------------------------------------------------------- /src/structure.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "allvars.h" 11 | #include "proto.h" 12 | 13 | 14 | 15 | static double fc(double c) 16 | { 17 | return c * (0.5 - 0.5 / pow(1 + c, 2) - log(1 + c) / (1 + c)) / pow(log(1 + c) - c / (1 + c), 2); 18 | } 19 | 20 | static double jdisk_int(double x, void *param) 21 | { 22 | double vc2, Sigma0, vc, y; 23 | 24 | if(x > 1.0e-10 * All.Halo_A) 25 | vc2 = All.G * (halo_get_mass_inside_radius(x) + bulge_get_mass_inside_radius(x)) / x; 26 | else 27 | vc2 = 0; 28 | 29 | if(vc2 < 0) 30 | terminate("vc2 < 0"); 31 | 32 | Sigma0 = All.Disk_Mass / (2 * M_PI * All.Disk_H * All.Disk_H); 33 | y = x / (2 * All.Disk_H); 34 | 35 | if(y > 1e-4) 36 | vc2 += 37 | x * 2 * M_PI * All.G * Sigma0 * y * (gsl_sf_bessel_I0(y) * gsl_sf_bessel_K0(y) - 38 | gsl_sf_bessel_I1(y) * gsl_sf_bessel_K1(y)); 39 | 40 | vc = sqrt(vc2); 41 | 42 | return pow(x / All.Disk_H, 2) * vc * exp(-x / All.Disk_H); 43 | } 44 | 45 | 46 | static double gc_int(double x, void *param) 47 | { 48 | return pow(log(1 + x) - x / (1 + x), 0.5) * pow(x, 1.5) / pow(1 + x, 2); 49 | } 50 | 51 | 52 | 53 | 54 | void structure_determination(void) 55 | { 56 | double jhalo, jdisk, jd; 57 | double hnew, dh; 58 | 59 | /* total galaxy mass */ 60 | All.M200 = pow(All.V200, 3) / (10 * All.G * All.Hubble); 61 | 62 | /* virial radius of galaxy */ 63 | All.R200 = All.V200 / (10 * All.Hubble); 64 | 65 | All.LowerDispLimit = pow(0.01 * All.V200, 2); 66 | 67 | /* halo scale radius */ 68 | All.Halo_Rs = All.R200 / All.Halo_C; 69 | 70 | /* determine the masses of all components */ 71 | All.Disk_Mass = All.MD * All.M200; 72 | All.Bulge_Mass = All.MB * All.M200; 73 | 74 | All.BH_Mass = All.MBH * All.M200; 75 | if(All.MBH > 0) 76 | All.BH_N = 1; 77 | else 78 | All.BH_N = 0; 79 | 80 | All.Halo_Mass = All.M200 - All.Disk_Mass - All.Bulge_Mass - All.BH_Mass; 81 | 82 | /* set the scale factor of the hernquist halo */ 83 | All.Halo_A = All.Halo_Rs * sqrt(2 * (log(1 + All.Halo_C) - All.Halo_C / (1 + All.Halo_C))); 84 | 85 | 86 | jhalo = All.Lambda * sqrt(All.G) * pow(All.M200, 1.5) * sqrt(2 * All.R200 / fc(All.Halo_C)); 87 | jdisk = All.JD * jhalo; 88 | 89 | double halo_spinfactor = 90 | 1.5 * All.Lambda * sqrt(2 * All.Halo_C / fc(All.Halo_C)) * pow(log(1 + All.Halo_C) - 91 | All.Halo_C / (1 + All.Halo_C), 92 | 1.5) / structure_gc(All.Halo_C); 93 | 94 | mpi_printf("\nStructural parameters:\n"); 95 | mpi_printf("R200 = %g\n", All.R200); 96 | mpi_printf("M200 = %g (this is the total mass)\n", All.M200); 97 | mpi_printf("A (halo) = %g\n", All.Halo_A); 98 | mpi_printf("halo_spinfactor = %g\n", halo_spinfactor); 99 | 100 | /* first guess for disk scale length */ 101 | All.Disk_H = sqrt(2.0) / 2.0 * All.Lambda / fc(All.Halo_C) * All.R200; 102 | All.Disk_Z0 = All.DiskHeight * All.Disk_H; /* sets disk thickness */ 103 | 104 | All.Bulge_A = All.BulgeSize * All.Halo_A; /* this will be used if no disk is present */ 105 | 106 | MType[1] = All.Halo_Mass; 107 | MType[2] = All.Disk_Mass; 108 | MType[3] = All.Bulge_Mass; 109 | 110 | NType[1] = All.Halo_N; 111 | NType[2] = All.Disk_N; 112 | NType[3] = All.Bulge_N; 113 | 114 | 115 | if(All.Disk_Mass > 0) 116 | { 117 | do 118 | { 119 | jd = structure_disk_angmomentum(); /* computes disk momentum */ 120 | 121 | hnew = jdisk / jd * All.Disk_H; 122 | 123 | dh = hnew - All.Disk_H; 124 | 125 | if(fabs(dh) > 0.5 * All.Disk_H) 126 | dh = 0.5 * All.Disk_H * dh / fabs(dh); 127 | else 128 | dh = dh * 0.1; 129 | 130 | All.Disk_H = All.Disk_H + dh; 131 | 132 | /* mpi_printf("Jd/J=%g hnew: %g \n", jd / jhalo, All.Disk_H); 133 | */ 134 | 135 | All.Disk_Z0 = All.DiskHeight * All.Disk_H; /* sets disk thickness */ 136 | } 137 | while(fabs(dh) / All.Disk_H > 1e-5); 138 | } 139 | 140 | mpi_printf("H (disk) = %g\n", All.Disk_H); 141 | mpi_printf("Z0 (disk) = %g\n", All.Disk_Z0); 142 | mpi_printf("A (bulge) = %g\n", All.Bulge_A); 143 | } 144 | 145 | 146 | double structure_disk_angmomentum(void) 147 | { 148 | gsl_function F; 149 | gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(WORKSIZE); 150 | F.function = &jdisk_int; 151 | 152 | double result, abserr; 153 | 154 | gsl_integration_qag(&F, 0, dmin(30 * All.Disk_H, All.R200), 155 | 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); 156 | 157 | result *= All.Disk_Mass; 158 | 159 | gsl_integration_workspace_free(workspace); 160 | 161 | return result; 162 | } 163 | 164 | 165 | double structure_gc(double c) 166 | { 167 | gsl_function F; 168 | gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(WORKSIZE); 169 | F.function = &gc_int; 170 | 171 | double result, abserr; 172 | 173 | gsl_integration_qag(&F, 0, c, 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr); 174 | 175 | gsl_integration_workspace_free(workspace); 176 | 177 | return result; 178 | } 179 | -------------------------------------------------------------------------------- /src/domain/domain_exchange.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "../allvars.h" 10 | #include "../proto.h" 11 | #include "domain.h" 12 | 13 | 14 | 15 | int myMPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, void *recvbuf, int *recvcounts, int *rdispls, int len, MPI_Comm comm) 16 | { 17 | int i, ntask; 18 | MPI_Comm_size(comm, &ntask); 19 | 20 | int *scount = mymalloc("scount", ntask * sizeof(int)); 21 | int *rcount = mymalloc("rcount", ntask * sizeof(int)); 22 | int *soff = mymalloc("soff", ntask * sizeof(int)); 23 | int *roff = mymalloc("roff", ntask * sizeof(int)); 24 | 25 | for(i=0; i < ntask; i++) 26 | { 27 | scount[i] = sendcounts[i] * len; 28 | rcount[i] = recvcounts[i] * len; 29 | soff[i] = sdispls[i] * len; 30 | roff[i] = rdispls[i] * len; 31 | } 32 | 33 | int ret = MPI_Alltoallv(sendbuf, scount, soff, MPI_BYTE, 34 | recvbuf, rcount, roff, MPI_BYTE, comm); 35 | 36 | myfree(roff); 37 | myfree(soff); 38 | myfree(rcount); 39 | myfree(scount); 40 | 41 | return ret; 42 | } 43 | 44 | 45 | 46 | 47 | void domain_resize_storage(int count_get, int count_get_sph, int option_flag) 48 | { 49 | int max_load, load = NumPart + count_get; 50 | int max_sphload, sphload = NumGas + count_get_sph; 51 | MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); 52 | MPI_Allreduce(&sphload, &max_sphload, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); 53 | 54 | if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPart) 55 | { 56 | All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE); 57 | 58 | mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart); 59 | 60 | P = (struct particle_data *) myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data)); 61 | 62 | if(option_flag == 1) 63 | Key = (peanokey *) myrealloc_movable(Key, sizeof(peanokey) * All.MaxPart); 64 | } 65 | } 66 | 67 | 68 | 69 | 70 | void domain_exchange(void) 71 | { 72 | double t0 = second(); 73 | 74 | int count_togo = 0, count_get = 0; 75 | int *count, *offset; 76 | int *count_recv, *offset_recv; 77 | int i, n, no, target; 78 | struct particle_data *partBuf; 79 | 80 | peanokey *keyBuf; 81 | 82 | long long sumtogo = 0; 83 | 84 | for(i = 0; i < NTask; i++) 85 | sumtogo += toGo[i]; 86 | 87 | sumup_longs(1, &sumtogo, &sumtogo); 88 | 89 | mpi_printf("DOMAIN: exchange of %lld particles\n", sumtogo); 90 | 91 | count = (int *) mymalloc_movable(&count, "count", NTask * sizeof(int)); 92 | offset = (int *) mymalloc_movable(&offset, "offset", NTask * sizeof(int)); 93 | count_recv = (int *) mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(int)); 94 | offset_recv = (int *) mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(int)); 95 | 96 | 97 | offset[0] = 0; 98 | for(i = 1; i < NTask; i++) 99 | offset[i] = offset[i - 1] + toGo[i - 1]; 100 | 101 | for(i = 0; i < NTask; i++) 102 | { 103 | count_togo += toGo[i]; 104 | count_get += toGet[i]; 105 | } 106 | 107 | 108 | partBuf = (struct particle_data *) mymalloc_movable(&partBuf, "partBuf", count_togo * sizeof(struct particle_data)); 109 | 110 | keyBuf = (peanokey *) mymalloc_movable(&keyBuf, "keyBuf", count_togo * sizeof(peanokey)); 111 | 112 | 113 | for(i = 0; i < NTask; i++) 114 | count[i] = 0; 115 | 116 | for(n = 0; n < NumPart; n++) 117 | { 118 | no = 0; 119 | 120 | while(topNodes[no].Daughter >= 0) 121 | no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size / 8); 122 | 123 | no = topNodes[no].Leaf; 124 | 125 | target = DomainTask[no]; 126 | 127 | if(target != ThisTask) 128 | { 129 | partBuf[offset[target] + count[target]] = P[n]; 130 | keyBuf[offset[target] + count[target]] = Key[n]; 131 | count[target]++; 132 | 133 | P[n] = P[NumPart - 1]; 134 | Key[n] = Key[NumPart - 1]; 135 | NumPart--; 136 | n--; 137 | } 138 | } 139 | 140 | 141 | /**** now resize the storage for the P[] and SphP[] arrays if needed ****/ 142 | domain_resize_storage(count_get, 0, 1); 143 | 144 | /***** space has been created, now can do the actual exchange *****/ 145 | 146 | 147 | for(i = 0; i < NTask; i++) 148 | count_recv[i] = toGet[i]; 149 | 150 | offset_recv[0] = NumPart; 151 | 152 | for(i = 1; i < NTask; i++) 153 | offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1]; 154 | 155 | myMPI_Alltoallv(partBuf, count, offset, 156 | P, count_recv, offset_recv, 157 | sizeof(struct particle_data), MPI_COMM_WORLD); 158 | 159 | myMPI_Alltoallv(keyBuf, count, offset, 160 | Key, count_recv, offset_recv, 161 | sizeof(peanokey), MPI_COMM_WORLD); 162 | 163 | 164 | NumPart += count_get; 165 | 166 | 167 | myfree(keyBuf); 168 | myfree(partBuf); 169 | myfree(offset_recv); 170 | myfree(count_recv); 171 | myfree(offset); 172 | myfree(count); 173 | 174 | double t1 = second(); 175 | 176 | mpi_printf("DOMAIN: particle exchange done. (took %g sec)\n", timediff(t0, t1)); 177 | } 178 | -------------------------------------------------------------------------------- /src/domain/domain.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLVARS_H 2 | #include "../allvars.h" 3 | #endif 4 | #ifndef DOMAIN_H 5 | #define DOMAIN_H 6 | 7 | 8 | extern struct local_topnode_data 9 | { 10 | peanokey Size; /*!< number of Peano-Hilbert mesh-cells represented by top-level node */ 11 | peanokey StartKey; /*!< first Peano-Hilbert key in top-level node */ 12 | long long Count; /*!< counts the number of particles in this top-level node */ 13 | int Daughter; /*!< index of first daughter cell (out of 8) of top-level node */ 14 | int Leaf; /*!< if the node is a leaf, this gives its number when all leaves are traversed in Peano-Hilbert order */ 15 | int Parent; 16 | int PIndex; /*!< first particle in node */ 17 | } 18 | *topNodes, *branchNodes; /*!< points to the root node of the top-level tree */ 19 | 20 | struct domain_count_data 21 | { 22 | int task; 23 | int count; 24 | int origintask; 25 | }; 26 | 27 | 28 | 29 | extern struct domain_peano_hilbert_data 30 | { 31 | peanokey key; 32 | int index; 33 | } 34 | *mp; 35 | 36 | 37 | 38 | extern struct trans_data 39 | { 40 | MyIDType ID; 41 | int new_task; 42 | int new_index; 43 | int wrapped; 44 | } 45 | *trans_table; 46 | 47 | extern int N_trans; 48 | 49 | extern int Nbranch; 50 | 51 | extern double fac_load; 52 | 53 | 54 | extern double totpartcount; 55 | 56 | extern struct domain_cost_data 57 | { 58 | int no; 59 | int Count; /*!< a table that gives the total number of particles held by each processor */ 60 | } 61 | *DomainLeaveNode; 62 | 63 | 64 | 65 | /*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner' 66 | */ 67 | extern int *toGo; 68 | extern int *toGet; 69 | extern int *list_NumPart; 70 | extern int *list_load; 71 | 72 | 73 | 74 | 75 | int domain_check_for_local_refine_new(int i, MPI_Comm current_comm); 76 | int domain_double_to_int(double d); 77 | double domain_grav_tot_costfactor(int i); 78 | double domain_hydro_tot_costfactor(int i); 79 | void domain_init_sum_cost(void); 80 | void domain_printf(char *buf); 81 | void domain_report_balance(void); 82 | int domain_sort_load(const void *a, const void *b); 83 | int domain_compare_count(const void *a, const void *b); 84 | int domain_sort_task(const void *a, const void *b); 85 | void domain_post_checks(void); 86 | void domain_prechecks(void); 87 | void domain_insertnode(struct local_topnode_data *treeA, struct local_topnode_data *treeB, int noA, int noB); 88 | void domain_add_cost(struct local_topnode_data *treeA, int noA, long long count, double cost, double sphcost); 89 | int domain_compare_count(const void *a, const void *b); 90 | void domain_rearrange_particle_sequence(void); 91 | void domain_combine_topleaves_to_domains(int ncpu, int ndomain); 92 | void domain_findSplit_load_balanced(int ncpu, int ndomain); 93 | int domain_sort_loadorigin(const void *a, const void *b); 94 | int domain_sort_segments(const void *a, const void *b); 95 | void domain_combine_multipledomains(void); 96 | void domain_allocate(void); 97 | void domain_Decomposition(void); 98 | int domain_check_memory_bound(void); 99 | int domain_compare_key(const void *a, const void *b); 100 | int domain_compare_key(const void *a, const void *b); 101 | int domain_compare_toplist(const void *a, const void *b); 102 | double domain_particle_costfactor(int i); 103 | int domain_countToGo(void); 104 | int domain_decompose(void); 105 | int domain_determineTopTree(void); 106 | void domain_exchange(void); 107 | void domain_findExchangeNumbers(int task, int partner, int sphflag, int *send, int *recv); 108 | void domain_findExtent(void); 109 | void domain_findSplit(int cpustart, int ncpu, int first, int last); 110 | void domain_findSplit_balanced(int cpustart, int ncpu, int first, int last); 111 | void domain_free(void); 112 | void domain_shiftSplit(void); 113 | void domain_sumCost(void); 114 | int domain_topsplit(int node, peanokey startkey); 115 | int domain_topsplit_local(int node, peanokey startkey, int mode); 116 | int domain_topsplit_special(void); 117 | int domain_compare_key(const void *a, const void *b); 118 | int domain_check_for_local_refine(int i, MPI_Comm comm, double work); 119 | void domain_free_trick(void); 120 | void domain_allocate_trick(void); 121 | int domain_recursively_combine_topTree(int start, int ncpu); 122 | void domain_walktoptree(int no); 123 | void domain_optimize_domain_to_task_mapping(void); 124 | int domain_compare_count(const void *a, const void *b); 125 | void domain_allocate_lists(void); 126 | void domain_free_lists(void); 127 | void domain_pack_tree_branch(int no, int parent); 128 | int domain_unpack_tree_branch(int no, int parent); 129 | int domain_check_for_local_refine_alt(int i, int *current_taskset); 130 | int domain_reduce_error_flag(int flag, int *current_taskset); 131 | int domain_do_local_refine(int n, int **list); 132 | void domain_preserve_relevant_topnode_data(void); 133 | void domain_find_total_cost(void); 134 | void domain_voronoi_dynamic_update_execute(void); 135 | void domain_prepare_voronoi_dynamic_update(void); 136 | void domain_voronoi_dynamic_flag_particles(void); 137 | void domain_mark_in_trans_table(int i, int task); 138 | void domain_exchange_and_update_DC(void); 139 | int domain_compare_connection_ID(const void *a, const void *b); 140 | int domain_compare_local_trans_data_ID(const void *a, const void *b); 141 | int domain_compare_recv_trans_data_ID(const void *a, const void *b); 142 | int domain_compare_recv_trans_data_oldtask(const void *a, const void *b); 143 | 144 | void mysort_domain(void *b, size_t n, size_t s); 145 | 146 | #endif 147 | -------------------------------------------------------------------------------- /src/halo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "allvars.h" 9 | #include "proto.h" 10 | 11 | 12 | /* this file contains auxiliary routines for the description of the halo, 13 | * here modeled as a Hernquist sphere 14 | */ 15 | 16 | /* this function returns a new random coordinate for the halo */ 17 | void halo_get_fresh_coordinate(double *pos) 18 | { 19 | double r; 20 | 21 | do 22 | { 23 | double q = gsl_rng_uniform(random_generator); 24 | 25 | if(q > 0) 26 | r = All.Halo_A * (q + sqrt(q)) / (1 - q); 27 | else 28 | r = 0; 29 | 30 | double phi = gsl_rng_uniform(random_generator) * M_PI * 2; 31 | double theta = acos(gsl_rng_uniform(random_generator) * 2 - 1); 32 | 33 | pos[0] = r * sin(theta) * cos(phi); 34 | pos[1] = r * sin(theta) * sin(phi); 35 | pos[2] = r * cos(theta) / All.HaloStretch; 36 | 37 | r = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]); 38 | } 39 | while(r > All.Rmax); 40 | } 41 | 42 | 43 | double halo_get_density(double *pos) { 44 | 45 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + All.HaloStretch * All.HaloStretch * pos[2] * pos[2]); 46 | 47 | double rho = All.HaloStretch * All.Halo_Mass / (2 * M_PI) * All.Halo_A / (r + 1.0e-6 * All.Halo_A) / pow(r + All.Halo_A, 3); 48 | 49 | if ( fabs(rho) < MIN_DENSITY) rho = 0; 50 | 51 | return rho; 52 | } 53 | 54 | 55 | /* Note that the other functions below will only be called in a meaningfull for a spherical system */ 56 | 57 | double halo_get_mass_inside_radius(double r) 58 | { 59 | return All.Halo_Mass * pow(r / (r + All.Halo_A), 2); 60 | } 61 | 62 | 63 | double halo_get_potential(double *pos) 64 | { 65 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 66 | return halo_get_potential_from_radius(r); 67 | } 68 | 69 | double halo_get_potential_from_radius(double r) 70 | { 71 | double phi = -All.G * All.Halo_Mass / (r + All.Halo_A); 72 | return phi; 73 | } 74 | 75 | /* returns the acceleration at coordinate pos[] */ 76 | void halo_get_acceleration(double *pos, double *acc) 77 | { 78 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 79 | double fac = All.G * All.Halo_Mass / ((r + 1.0e-6 * All.Halo_A)* (r + All.Halo_A) * (r + All.Halo_A)); 80 | 81 | acc[0] = -fac * pos[0]; 82 | acc[1] = -fac * pos[1]; 83 | acc[2] = -fac * pos[2]; 84 | } 85 | 86 | double halo_get_escape_speed(double *pos) 87 | { 88 | double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]); 89 | double phi = -All.G * All.Halo_Mass / (r + All.Halo_A); 90 | double vesc = sqrt(-2.0 * phi); 91 | 92 | return vesc; 93 | } 94 | 95 | double halo_get_sigma2(double *pos) { 96 | 97 | long double r = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]); 98 | 99 | long double m = All.Halo_Mass; 100 | long double r0 = All.Halo_A; 101 | long double r_over_r0 = r/r0; 102 | 103 | long double _sigma2 = 104 | (long double)(All.G*m)/(12.0*r0)* 105 | fabs( 12*r*powl(r+r0,3)/powl(r0,4)*logl((r+r0)/r) 106 | - 107 | r/(r+r0)*(25 + r_over_r0*(52 + 42*r_over_r0 + 12*(r_over_r0*r_over_r0) ) ) 108 | ); 109 | 110 | // precicion big rip so let it be like this for a while 111 | if (65000 v_guess * v_guess * halo_f(rad, v_guess)) 179 | { 180 | v_guess = gsl_rng_uniform(random_generator) * v_max; 181 | x_aux = gsl_rng_uniform(random_generator) * f_max; 182 | } 183 | return v_guess; 184 | } 185 | -------------------------------------------------------------------------------- /src/mpi_utils/mpi_util.c: -------------------------------------------------------------------------------- 1 | /** \file 2 | MPI utility functions. 3 | */ 4 | 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | 11 | 12 | /** Implements the common idiom of exchanging buffers with every other 13 | MPI task. The number of items to send/receive are in the 14 | send_count and recv_count arrays, respectively. The data to 15 | exchange are in send_buf and recv_buf, and the offset to the 16 | location of the data to/from each task is in send_offset and 17 | recv_offset. Since the buffer pointers are void*, the size of the 18 | items to be exchanged are in item_size, and the tag to apply to 19 | the MPI call is in commtag. If include_self is true, the send 20 | data for ThisTask is also copied to the recieve buffer. 21 | 22 | All arrays should be allocated with NTask size. */ 23 | void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset, 24 | void *recv_buf, int *recv_count, int *recv_offset, int item_size, int commtag, int include_self) 25 | { 26 | int ngrp; 27 | // this loop goes from 0 in some cases, but that doesn't make sense 28 | // because then recvTask==ThisTask and nothing is done. 29 | for(ngrp = include_self ? 0 : 1; ngrp < (1 << PTask); ngrp++) 30 | { 31 | int recvTask = ThisTask ^ ngrp; 32 | 33 | if(recvTask < NTask) 34 | { 35 | if(send_count[recvTask] > 0 || recv_count[recvTask] > 0) 36 | { 37 | /* exchange data */ 38 | MPI_Sendrecv((char *) send_buf + send_offset[recvTask] * item_size, 39 | send_count[recvTask] * item_size, MPI_BYTE, 40 | recvTask, commtag, 41 | (char *) recv_buf + recv_offset[recvTask] * item_size, 42 | recv_count[recvTask] * item_size, MPI_BYTE, recvTask, commtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 43 | } 44 | } 45 | } 46 | } 47 | 48 | /** Calculates the recv_count, send_offset, and recv_offset arrays 49 | based on the send_count. Returns nimport, the total number of 50 | particles to be received. If an identical set of copies are to be 51 | sent to all tasks, set send_identical=1 and the send_offset will 52 | be zero for all tasks. 53 | 54 | All arrays should be allocated with NTask size. */ 55 | int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical) 56 | { 57 | // Exchange the send/receive counts 58 | MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD); 59 | 60 | int nimport = 0; 61 | recv_offset[0] = 0; 62 | send_offset[0] = 0; 63 | int j; 64 | for(j = 0; j < NTask; j++) 65 | { 66 | nimport += recv_count[j]; 67 | 68 | if(j > 0) 69 | { 70 | send_offset[j] = send_offset[j - 1] + (send_identical ? 0 : send_count[j - 1]); 71 | recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1]; 72 | } 73 | } 74 | return nimport; 75 | } 76 | 77 | 78 | /** Compare function used to sort an array of int pointers into order 79 | of the pointer targets. */ 80 | int intpointer_compare(const void *a, const void *b) 81 | { 82 | if((**(int **) a) < (**(int **) b)) 83 | return -1; 84 | 85 | if((**(int **) a) > (**(int **) b)) 86 | return +1; 87 | 88 | return 0; 89 | } 90 | 91 | 92 | /** Sort an opaque array into increasing order of an int field, given 93 | by the specified offset. (This would typically be field indicating 94 | the task.) Returns a sorted copy of the data array, that needs to 95 | be myfreed. 96 | 97 | We do this by sorting an array of pointers to the task field, and 98 | then using this array to deduce the reordering of the data 99 | array. Unfortunately this means making a copy of the data, but 100 | this just replaces the copy after the mpi_exchange_buffers 101 | anyway. */ 102 | void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size) 103 | { 104 | int i; 105 | char *data2; 106 | int **perm; 107 | data2 = mymalloc("data2", n_items * item_size); 108 | perm = mymalloc("perm", n_items * sizeof(*perm)); 109 | 110 | for(i = 0; i < n_items; ++i) 111 | perm[i] = (int *) ((char *) data + i * item_size + field_offset); 112 | 113 | mysort(perm, n_items, sizeof(*perm), intpointer_compare); 114 | // reorder data into data2 115 | for(i = 0; i < n_items; ++i) 116 | { 117 | size_t orig_pos = ((char *) perm[i] - ((char *) data + field_offset)) / item_size; 118 | myassert(((char *) perm[i] - ((char *) data + field_offset)) % item_size == 0); 119 | memcpy(data2 + item_size * i, (char *) data + item_size * orig_pos, item_size); 120 | } 121 | 122 | myfree(perm); 123 | 124 | return (void *) data2; 125 | } 126 | 127 | /** This function distributes the members in an opaque structure to 128 | the tasks based on a task field given by a specified offset into 129 | the opaque struct. The task field must have int type. n_items is 130 | updated to the new size of data. max_n is the allocated size of 131 | the data array, and is updated if a realloc is necessary. */ 132 | void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag) 133 | { 134 | int i; 135 | 136 | for(i = 0; i < NTask; i++) 137 | Send_count[i] = 0; 138 | 139 | for(i = 0; i < *n_items; i++) 140 | { 141 | int task = *(int *) ((char *) data + i * item_size + task_offset); 142 | myassert(task >= 0 && task < NTask); 143 | Send_count[task]++; 144 | } 145 | 146 | void *data2 = sort_based_on_field(data, task_offset, 147 | *n_items, item_size); 148 | 149 | int nimport = mpi_calculate_offsets(Send_count, Send_offset, 150 | Recv_count, Recv_offset, 0); 151 | 152 | if(*max_n < nimport) 153 | { 154 | data = myrealloc_movable(data, nimport * item_size); 155 | *max_n = nimport; 156 | } 157 | 158 | mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1); 159 | 160 | myfree(data2); 161 | 162 | *n_items = nimport; 163 | } 164 | -------------------------------------------------------------------------------- /src/forcetree/forcetree_walk.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | 11 | 12 | int force_treeevaluate(int i, int mode, int thread_id) 13 | { 14 | struct NODE *nop = 0; 15 | int k, target, numnodes, no, task; 16 | double r2, dx, dy, dz, mass, r, u, hmax, h_inv, h3_inv; 17 | double pos_x, pos_y, pos_z; 18 | double fac; 19 | double acc_x = 0; 20 | double acc_y = 0; 21 | double acc_z = 0; 22 | double wp, pot = 0.0; 23 | 24 | int ninteractions = 0; 25 | 26 | hmax = All.ForceSoftening; 27 | 28 | if(mode == 0) 29 | { 30 | target = TargetList[i]; 31 | 32 | if(target < NumPart) 33 | { 34 | pos_x = Tree_Pos_list[3 * target + 0]; 35 | pos_y = Tree_Pos_list[3 * target + 1]; 36 | pos_z = Tree_Pos_list[3 * target + 2]; 37 | } 38 | else 39 | { 40 | terminate("target >= NumPart"); 41 | } 42 | 43 | numnodes = 1; 44 | } 45 | else 46 | { 47 | target = i; 48 | pos_x = GravDataGet[target].Pos[0]; 49 | pos_y = GravDataGet[target].Pos[1]; 50 | pos_z = GravDataGet[target].Pos[2]; 51 | 52 | if(target == Nimport - 1) 53 | numnodes = NimportNodes - GravDataGet[target].Firstnode; 54 | else 55 | numnodes = GravDataGet[target + 1].Firstnode - GravDataGet[target].Firstnode; 56 | } 57 | 58 | for(k = 0; k < numnodes; k++) 59 | { 60 | if(mode == 0) 61 | no = Tree_MaxPart; /* root node */ 62 | else 63 | { 64 | no = NodeDataGet[GravDataGet[target].Firstnode + k]; 65 | no = Nodes[no].u.d.nextnode; /* open it */ 66 | } 67 | 68 | while(no >= 0) 69 | { 70 | if(no < Tree_MaxPart) /* single particle */ 71 | { 72 | dx = Tree_Pos_list[3 * no + 0] - pos_x; 73 | dy = Tree_Pos_list[3 * no + 1] - pos_y; 74 | dz = Tree_Pos_list[3 * no + 2] - pos_z; 75 | 76 | r2 = dx * dx + dy * dy + dz * dz; 77 | 78 | mass = P[no].Mass; 79 | 80 | no = Nextnode[no]; 81 | } 82 | else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */ 83 | { 84 | if(mode == 1) 85 | { 86 | if(no < Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */ 87 | { 88 | no = -1; 89 | continue; 90 | } 91 | } 92 | 93 | nop = &Nodes[no]; 94 | mass = nop->u.d.mass; 95 | 96 | dx = nop->u.d.s[0] - pos_x; 97 | dy = nop->u.d.s[1] - pos_y; 98 | dz = nop->u.d.s[2] - pos_z; 99 | 100 | r2 = dx * dx + dy * dy + dz * dz; 101 | 102 | /* we have an internal node. Need to check opening criterion */ 103 | 104 | if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta) 105 | { 106 | /* open cell */ 107 | no = nop->u.d.nextnode; 108 | continue; 109 | } 110 | 111 | /* ok, node can be used */ 112 | 113 | no = nop->u.d.sibling; 114 | } 115 | else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */ 116 | { 117 | int n = no - Tree_ImportedNodeOffset; 118 | 119 | dx = Tree_Points[n].Pos[0] - pos_x; 120 | dy = Tree_Points[n].Pos[1] - pos_y; 121 | dz = Tree_Points[n].Pos[2] - pos_z; 122 | 123 | r2 = dx * dx + dy * dy + dz * dz; 124 | 125 | mass = Tree_Points[n].Mass; 126 | 127 | no = Nextnode[no - Tree_MaxNodes]; 128 | } 129 | else /* pseudo particle */ 130 | { 131 | if(mode == 0) 132 | { 133 | task = DomainNewTask[no - (Tree_MaxPart + Tree_MaxNodes)]; 134 | 135 | if(ThreadsExportflag[thread_id][task] != i) 136 | { 137 | ThreadsExportflag[thread_id][task] = i; 138 | int nexp = ThreadsNexport[thread_id]++; 139 | if(nexp >= MaxNexport) 140 | terminate("nexp >= MaxNexport"); 141 | ThreadsPartList[thread_id][nexp].Task = task; 142 | ThreadsPartList[thread_id][nexp].Index = i; 143 | } 144 | 145 | int nexp = ThreadsNexportNodes[thread_id]++; 146 | if(nexp >= MaxNexportNodes) 147 | terminate("nexp >= MaxNexportNodes"); 148 | ThreadsNodeList[thread_id][nexp].Task = task; 149 | ThreadsNodeList[thread_id][nexp].Index = i; 150 | ThreadsNodeList[thread_id][nexp].Node = DomainNodeIndex[no - (Tree_MaxPart + Tree_MaxNodes)]; 151 | } 152 | 153 | no = Nextnode[no - Tree_MaxNodes]; 154 | continue; 155 | } 156 | 157 | /* now evaluate the multipole moment */ 158 | if(mass) 159 | { 160 | r = sqrt(r2); 161 | 162 | if(r >= hmax) 163 | { 164 | fac = mass / (r2 * r); 165 | wp = -mass / r; 166 | } 167 | else 168 | { 169 | h_inv = 1.0 / hmax; 170 | h3_inv = h_inv * h_inv * h_inv; 171 | u = r * h_inv; 172 | 173 | if(u < 0.5) 174 | { 175 | fac = mass * h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4)); 176 | wp = mass * h_inv * (-2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6))); 177 | } 178 | else 179 | { 180 | fac = mass * h3_inv * (21.333333333333 - 48.0 * u + 38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u)); 181 | wp = mass * h_inv * (-3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u)))); 182 | } 183 | } 184 | 185 | acc_x += dx * fac; 186 | acc_y += dy * fac; 187 | acc_z += dz * fac; 188 | pot += wp; 189 | 190 | ninteractions++; 191 | } 192 | } 193 | } 194 | 195 | /* store result at the proper place */ 196 | if(mode == 0) 197 | { 198 | if(target < NumPart) 199 | { 200 | P[target].GravAccel[0] = acc_x; 201 | P[target].GravAccel[1] = acc_y; 202 | P[target].GravAccel[2] = acc_z; 203 | P[target].Potential = pot; 204 | } 205 | else 206 | { 207 | int idx = Tree_ResultIndexList[target - Tree_ImportedNodeOffset]; 208 | Tree_ResultsActiveImported[idx].GravAccel[0] = acc_x; 209 | Tree_ResultsActiveImported[idx].GravAccel[1] = acc_y; 210 | Tree_ResultsActiveImported[idx].GravAccel[2] = acc_z; 211 | Tree_ResultsActiveImported[idx].Potential = pot; 212 | } 213 | } 214 | else 215 | { 216 | GravDataResult[target].Acc[0] = acc_x; 217 | GravDataResult[target].Acc[1] = acc_y; 218 | GravDataResult[target].Acc[2] = acc_z; 219 | GravDataResult[target].Potential = pot; 220 | } 221 | return ninteractions; 222 | } 223 | -------------------------------------------------------------------------------- /src/domain/pqueue.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Volkan Yazıcı 3 | * Copyright 2006-2010 The Apache Software Foundation 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 6 | * use this file except in compliance with the License. You may obtain a copy of 7 | * the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 | * License for the specific language governing permissions and limitations under 15 | * the License. 16 | */ 17 | 18 | /* V. Springel modified some of the memory allocation calls to inline it with 19 | * our internal memory handler. 20 | */ 21 | 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "pqueue.h" 28 | #include "../allvars.h" 29 | #include "../proto.h" 30 | 31 | #define left(i) ((i) << 1) 32 | #define right(i) (((i) << 1) + 1) 33 | #define parent(i) ((i) >> 1) 34 | 35 | 36 | pqueue_t *pqueue_init(size_t n, 37 | pqueue_cmp_pri_f cmppri, pqueue_get_pri_f getpri, pqueue_set_pri_f setpri, pqueue_get_pos_f getpos, pqueue_set_pos_f setpos) 38 | { 39 | pqueue_t *q; 40 | 41 | q = mymalloc("q", sizeof(pqueue_t)); 42 | 43 | /* Need to allocate n+1 elements since element 0 isn't used. */ 44 | q->d = mymalloc("q->d", (n + 1) * sizeof(void *)); 45 | 46 | q->size = 1; 47 | q->avail = q->step = (n + 1); /* see comment above about n+1 */ 48 | q->cmppri = cmppri; 49 | q->setpri = setpri; 50 | q->getpri = getpri; 51 | q->getpos = getpos; 52 | q->setpos = setpos; 53 | 54 | return q; 55 | } 56 | 57 | 58 | void pqueue_free(pqueue_t * q) 59 | { 60 | myfree(q->d); 61 | myfree(q); 62 | } 63 | 64 | 65 | size_t pqueue_size(pqueue_t * q) 66 | { 67 | /* queue element 0 exists but doesn't count since it isn't used. */ 68 | return (q->size - 1); 69 | } 70 | 71 | 72 | static void bubble_up(pqueue_t * q, size_t i) 73 | { 74 | size_t parent_node; 75 | void *moving_node = q->d[i]; 76 | pqueue_pri_t moving_pri = q->getpri(moving_node); 77 | 78 | for(parent_node = parent(i); ((i > 1) && q->cmppri(q->getpri(q->d[parent_node]), moving_pri)); i = parent_node, parent_node = parent(i)) 79 | { 80 | q->d[i] = q->d[parent_node]; 81 | q->setpos(q->d[i], i); 82 | } 83 | 84 | q->d[i] = moving_node; 85 | q->setpos(moving_node, i); 86 | } 87 | 88 | 89 | static size_t maxchild(pqueue_t * q, size_t i) 90 | { 91 | size_t child_node = left(i); 92 | 93 | if(child_node >= q->size) 94 | return 0; 95 | 96 | if((child_node + 1) < q->size && q->cmppri(q->getpri(q->d[child_node]), q->getpri(q->d[child_node + 1]))) 97 | child_node++; /* use right child instead of left */ 98 | 99 | return child_node; 100 | } 101 | 102 | 103 | static void percolate_down(pqueue_t * q, size_t i) 104 | { 105 | size_t child_node; 106 | void *moving_node = q->d[i]; 107 | pqueue_pri_t moving_pri = q->getpri(moving_node); 108 | 109 | while((child_node = maxchild(q, i)) && q->cmppri(moving_pri, q->getpri(q->d[child_node]))) 110 | { 111 | q->d[i] = q->d[child_node]; 112 | q->setpos(q->d[i], i); 113 | i = child_node; 114 | } 115 | 116 | q->d[i] = moving_node; 117 | q->setpos(moving_node, i); 118 | } 119 | 120 | 121 | int pqueue_insert(pqueue_t * q, void *d) 122 | { 123 | size_t i; 124 | size_t newsize; 125 | 126 | if(!q) 127 | return 1; 128 | 129 | /* allocate more memory if necessary */ 130 | if(q->size >= q->avail) 131 | { 132 | newsize = q->size + q->step; 133 | q->d = myrealloc(q->d, sizeof(void *) * newsize); 134 | q->avail = newsize; 135 | } 136 | 137 | /* insert item */ 138 | i = q->size++; 139 | q->d[i] = d; 140 | bubble_up(q, i); 141 | 142 | return 0; 143 | } 144 | 145 | 146 | void pqueue_change_priority(pqueue_t * q, pqueue_pri_t new_pri, void *d) 147 | { 148 | size_t posn; 149 | pqueue_pri_t old_pri = q->getpri(d); 150 | 151 | q->setpri(d, new_pri); 152 | posn = q->getpos(d); 153 | if(q->cmppri(old_pri, new_pri)) 154 | bubble_up(q, posn); 155 | else 156 | percolate_down(q, posn); 157 | } 158 | 159 | 160 | int pqueue_remove(pqueue_t * q, void *d) 161 | { 162 | size_t posn = q->getpos(d); 163 | q->d[posn] = q->d[--q->size]; 164 | if(q->cmppri(q->getpri(d), q->getpri(q->d[posn]))) 165 | bubble_up(q, posn); 166 | else 167 | percolate_down(q, posn); 168 | 169 | return 0; 170 | } 171 | 172 | 173 | void *pqueue_pop(pqueue_t * q) 174 | { 175 | void *head; 176 | 177 | if(!q || q->size == 1) 178 | return NULL; 179 | 180 | head = q->d[1]; 181 | q->d[1] = q->d[--q->size]; 182 | percolate_down(q, 1); 183 | 184 | return head; 185 | } 186 | 187 | 188 | void *pqueue_peek(pqueue_t * q) 189 | { 190 | void *d; 191 | if(!q || q->size == 1) 192 | return NULL; 193 | d = q->d[1]; 194 | return d; 195 | } 196 | 197 | 198 | void pqueue_dump(pqueue_t * q, FILE * out, pqueue_print_entry_f print) 199 | { 200 | int i; 201 | 202 | fprintf(stdout, "posn\tleft\tright\tparent\tmaxchild\t...\n"); 203 | for(i = 1; i < q->size; i++) 204 | { 205 | fprintf(stdout, "%d\t%d\t%d\t%d\t%ul\t", i, left(i), right(i), parent(i), (unsigned int) maxchild(q, i)); 206 | print(out, q->d[i]); 207 | } 208 | } 209 | 210 | 211 | static void set_pos(void *d, size_t val) 212 | { 213 | /* do nothing */ 214 | } 215 | 216 | 217 | static void set_pri(void *d, pqueue_pri_t pri) 218 | { 219 | /* do nothing */ 220 | } 221 | 222 | 223 | void pqueue_print(pqueue_t * q, FILE * out, pqueue_print_entry_f print) 224 | { 225 | pqueue_t *dup; 226 | void *e; 227 | 228 | dup = pqueue_init(q->size, q->cmppri, q->getpri, set_pri, q->getpos, set_pos); 229 | dup->size = q->size; 230 | dup->avail = q->avail; 231 | dup->step = q->step; 232 | 233 | memcpy(dup->d, q->d, (q->size * sizeof(void *))); 234 | 235 | while((e = pqueue_pop(dup))) 236 | print(out, e); 237 | 238 | pqueue_free(dup); 239 | } 240 | 241 | 242 | static int subtree_is_valid(pqueue_t * q, int pos) 243 | { 244 | if(left(pos) < q->size) 245 | { 246 | /* has a left child */ 247 | if(q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[left(pos)]))) 248 | return 0; 249 | if(!subtree_is_valid(q, left(pos))) 250 | return 0; 251 | } 252 | if(right(pos) < q->size) 253 | { 254 | /* has a right child */ 255 | if(q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[right(pos)]))) 256 | return 0; 257 | if(!subtree_is_valid(q, right(pos))) 258 | return 0; 259 | } 260 | return 1; 261 | } 262 | 263 | 264 | int pqueue_is_valid(pqueue_t * q) 265 | { 266 | return subtree_is_valid(q, 1); 267 | } 268 | -------------------------------------------------------------------------------- /src/domain/domain.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | #include "domain.h" 11 | 12 | /*! \file domain.c 13 | * \brief code for domain decomposition 14 | * 15 | * This file contains the code for the domain decomposition of the 16 | * simulation volume. The domains are constructed from disjoint subsets 17 | * of the leaves of a fiducial top-level tree that covers the full 18 | * simulation volume. Domain boundaries hence run along tree-node 19 | * divisions of a fiducial global BH tree. As a result of this method, the 20 | * tree force are in principle strictly independent of the way the domains 21 | * are cut. The domain decomposition can be carried out for an arbitrary 22 | * number of CPUs. Individual domains are not cubical, but spatially 23 | * coherent since the leaves are traversed in a Peano-Hilbert order and 24 | * individual domains form segments along this order. This also ensures 25 | * that each domain has a small surface to volume ratio, which minimizes 26 | * communication. 27 | */ 28 | 29 | 30 | 31 | /*! This is the main routine for the domain decomposition. It acts as a 32 | * driver routine that allocates various temporary buffers, maps the 33 | * particles back onto the periodic box if needed, and then does the 34 | * domain decomposition, and a final Peano-Hilbert order of all particles 35 | * as a tuning measure. 36 | */ 37 | void domain_Decomposition(void) 38 | { 39 | mpi_printf("DOMAIN:\n"); 40 | mpi_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep); 41 | 42 | 43 | 44 | domain_allocate(); 45 | domain_allocate_lists(); 46 | topNodes = (struct local_topnode_data *) mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(struct local_topnode_data))); 47 | 48 | /* find total cost factors */ 49 | domain_find_total_cost(); 50 | 51 | /* determine global dimensions of domain grid */ 52 | domain_findExtent(); 53 | 54 | /* determine top-level tree */ 55 | domain_determineTopTree(); 56 | 57 | /* find the split of the top-level tree */ 58 | domain_combine_topleaves_to_domains(All.MultipleDomains * NTask, NTopleaves); 59 | 60 | /* combine on each MPI task several of the domains (namely the number All.MultipleDomains) */ 61 | domain_combine_multipledomains(); 62 | 63 | /* permutate the task assignment such that the smallest number of particles needs to be moved */ 64 | domain_optimize_domain_to_task_mapping(); 65 | 66 | /* determine for each cpu how many particles have to be shifted to other cpus */ 67 | domain_countToGo(); 68 | 69 | /* finally, carry out the actual particle exchange */ 70 | domain_exchange(); 71 | 72 | /* copy what we need for the topnodes */ 73 | domain_preserve_relevant_topnode_data(); 74 | myfree(topNodes); 75 | domain_free_lists(); 76 | 77 | int nummax; 78 | MPI_Allreduce(&NumPart, &nummax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); 79 | mpi_printf("\nDOMAIN: ----> Final load balance = %g <------\n\n", nummax / ( ((double)All.TotNumPart) / NTask)); 80 | mpi_printf("DOMAIN: domain decomposition done.\n"); 81 | 82 | peano_hilbert_order(); 83 | myfree(Key); 84 | 85 | TopNodes = (struct topnode_data *) myrealloc_movable(TopNodes, NTopnodes * sizeof(struct topnode_data)); 86 | DomainTask = (int *) myrealloc_movable(DomainTask, NTopleaves * sizeof(int)); 87 | } 88 | 89 | 90 | 91 | void domain_preserve_relevant_topnode_data(void) 92 | { 93 | int i; 94 | 95 | for(i = 0; i < NTopnodes; i++) 96 | { 97 | TopNodes[i].StartKey = topNodes[i].StartKey; 98 | TopNodes[i].Size = topNodes[i].Size; 99 | TopNodes[i].Daughter = topNodes[i].Daughter; 100 | TopNodes[i].Leaf = topNodes[i].Leaf; 101 | 102 | int j; 103 | int bits = my_ffsll(TopNodes[i].Size); 104 | int blocks = (bits - 1) / 3 - 1; 105 | 106 | for(j = 0; j < 8; j++) 107 | { 108 | int xb, yb, zb; 109 | peano_hilbert_key_inverse(TopNodes[i].StartKey + j * (TopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb); 110 | xb >>= blocks; 111 | yb >>= blocks; 112 | zb >>= blocks; 113 | int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2); 114 | if(idx < 0 || idx > 7) 115 | { 116 | char buf[1000]; 117 | sprintf(buf, "j=%d idx=%d xb=%d yb=%d zb=%d blocks=%d bits=%d size=%lld\n", j, idx, xb, yb, zb, blocks, bits, TopNodes[i].Size); 118 | terminate(buf); 119 | } 120 | TopNodes[i].MortonToPeanoSubnode[idx] = j; 121 | } 122 | } 123 | } 124 | 125 | 126 | void domain_find_total_cost(void) 127 | { 128 | int i; 129 | long long Ntype[6]; /*!< total number of particles of each type */ 130 | int NtypeLocal[6]; /*!< local number of particles of each type */ 131 | 132 | if(All.MultipleDomains < 1 || All.MultipleDomains > 512) 133 | terminate("All.MultipleDomains < 1 || All.MultipleDomains > 512"); 134 | 135 | for(i = 0; i < 6; i++) 136 | NtypeLocal[i] = 0; 137 | 138 | for(i = 0; i < NumPart; i++) 139 | NtypeLocal[P[i].Type]++; 140 | 141 | /* because Ntype[] is of type `long long', we cannot do a simple 142 | * MPI_Allreduce() to sum the total particle numbers 143 | */ 144 | sumup_large_ints(6, NtypeLocal, Ntype); 145 | 146 | for(i = 0, totpartcount = 0; i < 6; i++) 147 | totpartcount += Ntype[i]; 148 | 149 | fac_load = 1.0 / totpartcount; 150 | } 151 | 152 | 153 | 154 | 155 | int domain_double_to_int(double d) 156 | { 157 | union 158 | { 159 | double d; 160 | unsigned long long ull; 161 | } u; 162 | u.d = d; 163 | return (int) ((u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - BITS_PER_DIMENSION)); 164 | } 165 | 166 | 167 | 168 | /*! This function allocates all the stuff that will be required for the tree-construction/walk later on */ 169 | void domain_allocate(void) 170 | { 171 | MaxTopNodes = (int) (All.TopNodeAllocFactor * All.MaxPart + 1); 172 | 173 | if(DomainStartList) 174 | terminate("domain storage already allocated"); 175 | 176 | DomainStartList = (int *) mymalloc_movable(&DomainStartList, "DomainStartList", (NTask * All.MultipleDomains * sizeof(int))); 177 | DomainEndList = (int *) mymalloc_movable(&DomainEndList, "DomainEndList", (NTask * All.MultipleDomains * sizeof(int))); 178 | TopNodes = (struct topnode_data *) mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(struct topnode_data))); 179 | DomainTask = (int *) mymalloc_movable(&DomainTask, "DomainTask", (MaxTopNodes * sizeof(int))); 180 | } 181 | 182 | 183 | 184 | void domain_free(void) 185 | { 186 | if(!DomainStartList) 187 | terminate("domain storage not allocated"); 188 | 189 | myfree(DomainTask); 190 | myfree(TopNodes); 191 | myfree(DomainEndList); 192 | myfree(DomainStartList); 193 | 194 | DomainTask = NULL; 195 | TopNodes = NULL; 196 | DomainEndList = NULL; 197 | DomainStartList = NULL; 198 | } 199 | 200 | void domain_printf(char *buf) 201 | { 202 | if(RestartFlag <= 2) 203 | { 204 | printf("%s", buf); 205 | } 206 | } 207 | 208 | 209 | -------------------------------------------------------------------------------- /src/domain/domain_toplevel.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | #include "domain.h" 11 | 12 | /*! This function constructs the global top-level tree node that is used 13 | * for the domain decomposition. This is done by considering the string of 14 | * Peano-Hilbert keys for all particles, which is recursively chopped off 15 | * in pieces of eight segments until each segment holds at most a certain 16 | * number of particles. 17 | */ 18 | int domain_determineTopTree(void) 19 | { 20 | int i, count; 21 | 22 | mp = (struct domain_peano_hilbert_data *) mymalloc_movable(&mp, "mp", sizeof(struct domain_peano_hilbert_data) * NumPart); 23 | 24 | for(i = 0, count = 0; i < NumPart; i++) 25 | { 26 | int xb = domain_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0); 27 | int yb = domain_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0); 28 | int zb = domain_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0); 29 | 30 | mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION); 31 | mp[count].index = i; 32 | count++; 33 | } 34 | 35 | mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data)); 36 | 37 | NTopnodes = 1; 38 | topNodes[0].Daughter = -1; 39 | topNodes[0].Parent = -1; 40 | topNodes[0].Size = PEANOCELLS; 41 | topNodes[0].StartKey = 0; 42 | topNodes[0].PIndex = 0; 43 | topNodes[0].Count = count; 44 | 45 | int list[1] = { 0 }; 46 | int *listp = list; 47 | 48 | domain_do_local_refine(1, &listp); 49 | 50 | myfree(mp); 51 | 52 | /* count the number of top leaves */ 53 | NTopleaves = 0; 54 | domain_walktoptree(0); 55 | mpi_printf("DOMAIN: NTopleaves=%d\n", NTopleaves); 56 | 57 | if(NTopleaves < All.MultipleDomains * NTask) 58 | terminate("NTopleaves = %d < All.MultipleDomains * NTask = %d * %d = %d", NTopleaves, All.MultipleDomains, NTask, All.MultipleDomains * NTask); 59 | 60 | mpi_printf("DOMAIN: determination of top-level tree done\n"); 61 | 62 | domain_sumCost(); 63 | 64 | mpi_printf("DOMAIN: cost summation for top-level tree done\n"); 65 | 66 | return 0; 67 | } 68 | 69 | 70 | 71 | int domain_do_local_refine(int n, int **listp) /* In list[], we store the node indices hat should be refined, N is their number */ 72 | { 73 | static int message_printed = 0; 74 | int i, j, k, l, p, sub, ret, *list; 75 | 76 | list = *listp; 77 | 78 | double limit = 1.0 / (All.TopNodeFactor * All.MultipleDomains * NTask); 79 | 80 | if(list[0] == 0) 81 | message_printed = 0; 82 | 83 | while((NTopnodes + 8 * n) > MaxTopNodes) 84 | { 85 | mpi_printf("DOMAIN: Increasing TopNodeAllocFactor=%g ", All.TopNodeAllocFactor); 86 | All.TopNodeAllocFactor *= 1.3; 87 | mpi_printf("new value=%g\n", All.TopNodeAllocFactor); 88 | if(All.TopNodeAllocFactor > 1000) 89 | terminate("something seems to be going seriously wrong here. Stopping.\n"); 90 | 91 | MaxTopNodes = (int) (All.TopNodeAllocFactor * All.MaxPart + 1); 92 | 93 | topNodes = (struct local_topnode_data *) myrealloc_movable(topNodes, (MaxTopNodes * sizeof(struct local_topnode_data))); 94 | TopNodes = (struct topnode_data *) myrealloc_movable(TopNodes, (MaxTopNodes * sizeof(struct topnode_data))); 95 | DomainTask = (int *) myrealloc_movable(DomainTask, (MaxTopNodes * sizeof(int))); 96 | DomainLeaveNode = (struct domain_cost_data *) myrealloc_movable(DomainLeaveNode, (MaxTopNodes * sizeof(struct domain_cost_data))); 97 | 98 | list = *listp; /* update this here because the above reallocations may have moved the pointer to the memory block */ 99 | } 100 | 101 | int *new_list = mymalloc_movable(&new_list, "new_list", 8 * n * sizeof(int)); 102 | double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double)); 103 | double *worklist = mymalloc("worklist", 8 * n * sizeof(double)); 104 | 105 | double non_zero = 0, non_zero_tot; 106 | 107 | /* create the new nodes */ 108 | for(k = 0; k < n; k++) 109 | { 110 | i = list[k]; 111 | topNodes[i].Daughter = NTopnodes; 112 | NTopnodes += 8; 113 | 114 | for(j = 0; j < 8; j++) 115 | { 116 | sub = topNodes[i].Daughter + j; 117 | 118 | topNodes[sub].Daughter = -1; 119 | topNodes[sub].Parent = i; 120 | topNodes[sub].Size = (topNodes[i].Size >> 3); 121 | topNodes[sub].StartKey = topNodes[i].StartKey + j * topNodes[sub].Size; 122 | topNodes[sub].PIndex = topNodes[i].PIndex; 123 | topNodes[sub].Count = 0; 124 | } 125 | 126 | sub = topNodes[i].Daughter; 127 | 128 | for(p = topNodes[i].PIndex, j = 0; p < topNodes[i].PIndex + topNodes[i].Count; p++) 129 | { 130 | if(j < 7) 131 | while(mp[p].key >= topNodes[sub + 1].StartKey) 132 | { 133 | j++; 134 | sub++; 135 | topNodes[sub].PIndex = p; 136 | if(j >= 7) 137 | break; 138 | } 139 | 140 | topNodes[sub].Count++; 141 | } 142 | 143 | for(j = 0; j < 8; j++) 144 | { 145 | sub = topNodes[i].Daughter + j; 146 | worklist[k * 8 + j] = fac_load * topNodes[sub].Count; 147 | 148 | if(worklist[k * 8 + j] != 0) 149 | non_zero++; 150 | } 151 | } 152 | 153 | 154 | MPI_Allreduce(&non_zero, &non_zero_tot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 155 | MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 156 | 157 | int new_n = 0; 158 | for(k = 0, l = 0; k < n; k++) 159 | { 160 | i = list[k]; 161 | 162 | for(j = 0; j < 8; j++, l++) 163 | { 164 | sub = topNodes[i].Daughter + j; 165 | 166 | if(worktotlist[l] > limit) 167 | { 168 | if(topNodes[sub].Size < 8) 169 | { 170 | if(message_printed == 0) 171 | { 172 | mpi_printf("DOMAIN: Note: we would like to refine top-tree, but PEANOGRID is not fine enough\n"); 173 | message_printed = 1; 174 | } 175 | } 176 | else 177 | new_list[new_n++] = sub; 178 | } 179 | } 180 | } 181 | 182 | myfree(worklist); 183 | myfree(worktotlist); 184 | 185 | new_list = myrealloc(new_list, new_n * sizeof(int)); 186 | 187 | if(new_n > 0) 188 | ret = domain_do_local_refine(new_n, &new_list); 189 | else 190 | ret = 0; 191 | 192 | myfree(new_list); 193 | 194 | return ret; 195 | } 196 | 197 | 198 | 199 | /*! This function walks the global top tree in order to establish the 200 | * number of leaves it has, and for assigning the leaf numbers along the 201 | * Peano-Hilbert Curve. These leaves are later combined to domain pieces, 202 | * which are distributed to different processors. 203 | */ 204 | void domain_walktoptree(int no) 205 | { 206 | int i; 207 | 208 | if(topNodes[no].Daughter == -1) 209 | { 210 | topNodes[no].Leaf = NTopleaves; 211 | NTopleaves++; 212 | } 213 | else 214 | { 215 | for(i = 0; i < 8; i++) 216 | domain_walktoptree(topNodes[no].Daughter + i); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /Model_M1.param: -------------------------------------------------------------------------------- 1 | 2 | %------ File and path names, as well as output file format 3 | 4 | OutputDir ./Model-M1 5 | 6 | OutputFile snap % Base filename of generated sequence of files 7 | SnapFormat 1 % File format selection 8 | 9 | 10 | %------ Basic structural parameters of model 11 | 12 | CC 10.0 % halo concentration 13 | V200 200.0 % circular velocity v_200 (in km/sec) 14 | LAMBDA 0.035 % spin parameter 15 | MD 0.035 % disk mass fraction 16 | MB 0.05 % bulge mass fraction 17 | MBH 0.0 % black hole mass fraction. If zero, no black 18 | % hole is generated, otherwise one at the centre 19 | % is added. 20 | 21 | JD 0.035 % disk spin fraction, typically chosen equal to MD 22 | 23 | DiskHeight 0.2 % thickness of stellar disk in units of radial scale length 24 | BulgeSize 0.1 % bulge scale length in units of halo scale length 25 | 26 | HaloStretch 1.0 % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate 27 | BulgeStretch 1.0 % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate 28 | 29 | %------ Particle numbers in target model 30 | 31 | N_HALO 100000 % desired number of particles in dark halo 32 | N_DISK 100000 % desired number of collisionless particles in disk 33 | N_BULGE 100000 % number of bulge particles 34 | 35 | 36 | 37 | %------ Selection of symmetry constraints of velocity structure 38 | 39 | TypeOfHaloVelocityStructure 3 % 0 = spherically symmetric, isotropic 40 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 41 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 42 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 43 | 44 | TypeOfDiskVelocityStructure 2 % 0 = spherically symmetric, isotropic 45 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 46 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 47 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 48 | 49 | TypeOfBulgeVelocityStructure 3 % 0 = spherically symmetric, isotropic 50 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 51 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 52 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 53 | 54 | 55 | HaloBetaParameter 0 % only relevant for TypeOfHaloVelocityStructure=1 56 | BulgeBetaParameter 0 % only relevant for TypeOfBulgeVelocityStructure=1 57 | 58 | 59 | HaloDispersionRoverZratio 1.0 % only relevant for TypeOfHaloVelocityStructure=3 60 | DiskDispersionRoverZratio 1.0 % only relevant for TypeOfDiskVelocityStructure=3 61 | BulgeDispersionRoverZratio 1.0 % only relevant for TypeOfBulgeVelocityStructure=3 62 | 63 | 64 | HaloStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 65 | DiskStreamingVelocityParameter 1.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 66 | BulgeStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 67 | 68 | 69 | %------ Orbit integration accuracy 70 | 71 | TorbitFac 10.0 % regulates the integration time of orbits 72 | % (this is of the order of the typical number of orbits per particle) 73 | TimeStepFactorOrbit 0.01 74 | TimeStepFactorCellCross 0.25 75 | 76 | 77 | %------ Iterative optimization parameters 78 | 79 | FractionToOptimizeIndependendly 0.001 80 | IndepenentOptimizationsPerStep 100 81 | StepsBetweenDump 10 82 | MaximumNumberOfSteps 100 83 | 84 | MinParticlesPerBinForDispersionMeasurement 100 85 | MinParticlesPerBinForDensityMeasurement 50 86 | 87 | 88 | %------ Grid dimension and extenstion/resolution 89 | 90 | DG_MaxLevel 7 91 | EG_MaxLevel 7 92 | FG_Nbin 256 % number of bins for the acceleration grid in the R- and z-directions 93 | 94 | 95 | OutermostBinEnclosedMassFraction 0.999 % regulates the fraction of mass of the Hernquist 96 | % halo that must be inside the grid (determines grid extension) 97 | 98 | InnermostBinEnclosedMassFraction 0.0000001 % regulates the fraction of mass enclosed by the innermost 99 | % bin (regulates size of innermost grid cells) 100 | 101 | 102 | 103 | MaxVelInUnitsVesc 0.9999 % maximum allowed velocity in units of the local escape velocity 104 | 105 | 106 | %------ Construction of target density field 107 | 108 | SampleDensityFieldForTargetResponse 1 % if set to 1, the code will randomly sample points to construct the density field 109 | SampleParticleCount 100000000 % number of points sampled for target density field 110 | 111 | 112 | %------ Construction of force field 113 | 114 | SampleForceNhalo 0 % number of points to use to for computing force field with a tree 115 | SampleForceNdisk 100000000 116 | SampleForceNbulge 0 117 | 118 | Softening 0.05 119 | 120 | 121 | %------ Accuracy settings of tree code used in construction of force field 122 | 123 | TypeOfOpeningCriterion 1 124 | ErrTolTheta 0.4 125 | ErrTolForceAcc 0.0025 126 | 127 | %------ Domain decomposition parameters used in parallel tree code 128 | 129 | MultipleDomains 4 130 | TopNodeFactor 4 131 | 132 | 133 | %------ Parallel I/O paramaters, only affects writing of galaxy files 134 | 135 | NumFilesPerSnapshot 1 136 | NumFilesWrittenInParallel 1 137 | 138 | 139 | %------ Memory allocation parameters 140 | 141 | MaxMemSize 3600.0 % in MB 142 | BufferSize 100.0 143 | BufferSizeGravity 100.0 144 | 145 | 146 | %------ Specification of internal system of units 147 | 148 | UnitLength_in_cm 3.085678e21 % 1.0 kpc 149 | UnitMass_in_g 1.989e43 % 1.0e10 solar masses 150 | UnitVelocity_in_cm_per_s 1e5 % 1 km/sec 151 | GravityConstantInternal 0 152 | 153 | %------ Factors to tune merit function of time avaraged velocity structure (only ver 1.1) 154 | 155 | HaloValueRsFac 1.0 156 | HaloValueTsFac 1.0 157 | HaloValueQsFac 1.0 158 | HaloValuePsFac 1.0 159 | 160 | DiskValueRsFac 1.0 161 | DiskValueTsFac 1.0 162 | DiskValueQsFac 1.0 163 | DiskValuePsFac 1.0 164 | 165 | BulgeValueRsFac 1.0 166 | BulgeValueTsFac 1.0 167 | BulgeValueQsFac 1.0 168 | BulgeValuePsFac 1.0 169 | -------------------------------------------------------------------------------- /Model_H3.param: -------------------------------------------------------------------------------- 1 | 2 | %------ File and path names, as well as output file format 3 | 4 | OutputDir ./Model-H3-256K 5 | 6 | OutputFile snap % Base filename of generated sequence of files 7 | SnapFormat 1 % File format selection 8 | 9 | 10 | %------ Basic structural parameters of model 11 | 12 | CC 10.0 % halo concentration 13 | V200 200.0 % circular velocity v_200 (in km/sec) 14 | LAMBDA 0.0 % spin parameter 15 | MD 0.0 % disk mass fraction 16 | MB 0.0 % bulge mass fraction 17 | MBH 0.0 % black hole mass fraction. If zero, no black 18 | % hole is generated, otherwise one at the centre 19 | % is added. 20 | 21 | JD 0.00 % disk spin fraction, typically chosen equal to MD 22 | 23 | DiskHeight 0.2 % thickness of stellar disk in units of radial scale length 24 | BulgeSize 0.2 % bulge scale length in units of halo scale length 25 | 26 | HaloStretch 1.0 % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate 27 | BulgeStretch 1.0 % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate 28 | 29 | %------ Particle numbers in target model 30 | 31 | 32 | N_HALO 100000 % desired number of particles in dark halo 33 | N_DISK 0 % desired number of collisionless particles in disk 34 | N_BULGE 0 % number of bulge particles 35 | 36 | 37 | 38 | %------ Selection of symmetry constraints of velocity structure 39 | 40 | TypeOfHaloVelocityStructure 1 % 0 = spherically symmetric, isotropic 41 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 42 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 43 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 44 | 45 | TypeOfDiskVelocityStructure 0 % 0 = spherically symmetric, isotropic 46 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 47 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 48 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 49 | 50 | TypeOfBulgeVelocityStructure 0 % 0 = spherically symmetric, isotropic 51 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 52 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 53 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 54 | 55 | 56 | HaloBetaParameter -1.0 % only relevant for TypeOfHaloVelocityStructure=1 57 | BulgeBetaParameter 0 % only relevant for TypeOfBulgeVelocityStructure=1 58 | 59 | 60 | HaloDispersionRoverZratio 1.0 % only relevant for TypeOfHaloVelocityStructure=3 61 | DiskDispersionRoverZratio 1.0 % only relevant for TypeOfDiskVelocityStructure=3 62 | BulgeDispersionRoverZratio 1.0 % only relevant for TypeOfBulgeVelocityStructure=3 63 | 64 | 65 | HaloStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 66 | DiskStreamingVelocityParameter 1.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 67 | BulgeStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 68 | 69 | 70 | %------ Orbit integration accuracy 71 | 72 | TorbitFac 10.0 % regulates the integration time of orbits 73 | % (this is of the order of the typical number of orbits per particle) 74 | TimeStepFactorOrbit 0.01 75 | TimeStepFactorCellCross 0.25 76 | 77 | 78 | %------ Iterative optimization parameters 79 | 80 | FractionToOptimizeIndependendly 0.001 81 | IndepenentOptimizationsPerStep 100 82 | StepsBetweenDump 10 83 | MaximumNumberOfSteps 200 84 | 85 | MinParticlesPerBinForDispersionMeasurement 100 86 | MinParticlesPerBinForDensityMeasurement 50 87 | 88 | 89 | %------ Grid dimension and extenstion/resolution 90 | 91 | DG_MaxLevel 7 92 | EG_MaxLevel 7 93 | FG_Nbin 256 % number of bins for the acceleration grid in the R- and z-directions 94 | 95 | 96 | OutermostBinEnclosedMassFraction 0.999 % regulates the fraction of mass of the Hernquist 97 | % halo that must be inside the grid (determines grid extension) 98 | 99 | InnermostBinEnclosedMassFraction 0.0000001 % regulates the fraction of mass enclosed by the innermost 100 | % bin (regulates size of innermost grid cells) 101 | 102 | 103 | 104 | MaxVelInUnitsVesc 0.9999 % maximum allowed velocity in units of the local escape velocity 105 | 106 | 107 | %------ Construction of target density field 108 | 109 | SampleDensityFieldForTargetResponse 1 % if set to 1, the code will randomly sample points to construct the density field 110 | SampleParticleCount 100000000 % number of points sampled for target density field 111 | 112 | 113 | %------ Construction of force field 114 | 115 | SampleForceNhalo 0 % number of points to use to for computing force field with a tree 116 | SampleForceNdisk 0 117 | SampleForceNbulge 0 118 | 119 | Softening 0.05 120 | 121 | 122 | %------ Accuracy settings of tree code used in construction of force field 123 | 124 | TypeOfOpeningCriterion 1 125 | ErrTolTheta 0.4 126 | ErrTolForceAcc 0.0025 127 | 128 | %------ Domain decomposition parameters used in parallel tree code 129 | 130 | MultipleDomains 4 131 | TopNodeFactor 4 132 | 133 | 134 | %------ Parallel I/O paramaters, only affects writing of galaxy files 135 | 136 | NumFilesPerSnapshot 1 137 | NumFilesWrittenInParallel 1 138 | 139 | 140 | %------ Memory allocation parameters 141 | 142 | MaxMemSize 2300.0 % in MB 143 | BufferSize 100.0 144 | BufferSizeGravity 100.0 145 | 146 | 147 | %------ Specification of internal system of units 148 | 149 | UnitLength_in_cm 3.085678e21 % 1.0 kpc 150 | UnitMass_in_g 1.989e43 % 1.0e10 solar masses 151 | UnitVelocity_in_cm_per_s 1e5 % 1 km/sec 152 | GravityConstantInternal 0 153 | 154 | 155 | %------ Factors to tune merit function of time avaraged velocity structure (only ver 1.1) 156 | 157 | HaloValueRsFac 1.0 158 | HaloValueTsFac 1.0 159 | HaloValueQsFac 1.0 160 | HaloValuePsFac 1.0 161 | 162 | DiskValueRsFac 1.0 163 | DiskValueTsFac 1.0 164 | DiskValueQsFac 1.0 165 | DiskValuePsFac 1.0 166 | 167 | BulgeValueRsFac 1.0 168 | BulgeValueTsFac 1.0 169 | BulgeValueQsFac 1.0 170 | BulgeValuePsFac 1.0 171 | -------------------------------------------------------------------------------- /Model_D3.param: -------------------------------------------------------------------------------- 1 | 2 | %------ File and path names, as well as output file format 3 | 4 | OutputDir ./Model-D3 5 | 6 | OutputFile snap % Base filename of generated sequence of files 7 | SnapFormat 1 % File format selection 8 | 9 | 10 | %------ Basic structural parameters of model 11 | 12 | CC 10.0 % halo concentration 13 | V200 200.0 % circular velocity v_200 (in km/sec) 14 | LAMBDA 0.035 % spin parameter 15 | MD 0.035 % disk mass fraction 16 | MB 0.0 % bulge mass fraction 17 | MBH 0.0 % black hole mass fraction. If zero, no black 18 | % hole is generated, otherwise one at the centre 19 | % is added. 20 | 21 | JD 0.035 % disk spin fraction, typically chosen equal to MD 22 | 23 | DiskHeight 0.2 % thickness of stellar disk in units of radial scale length 24 | BulgeSize 0.1 % bulge scale length in units of halo scale length 25 | 26 | HaloStretch 1.0 % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate 27 | BulgeStretch 1.0 % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate 28 | 29 | %------ Particle numbers in target model 30 | 31 | N_HALO 100000 % desired number of particles in dark halo 32 | N_DISK 100000 % desired number of collisionless particles in disk 33 | N_BULGE 0 % number of bulge particles 34 | 35 | 36 | 37 | %------ Selection of symmetry constraints of velocity structure 38 | 39 | TypeOfHaloVelocityStructure 3 % 0 = spherically symmetric, isotropic 40 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 41 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 42 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 43 | 44 | TypeOfDiskVelocityStructure 3 % 0 = spherically symmetric, isotropic 45 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 46 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 47 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 48 | 49 | TypeOfBulgeVelocityStructure 0 % 0 = spherically symmetric, isotropic 50 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 51 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 52 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 53 | 54 | 55 | HaloBetaParameter 0 % only relevant for TypeOfHaloVelocityStructure=1 56 | BulgeBetaParameter 0 % only relevant for TypeOfBulgeVelocityStructure=1 57 | 58 | 59 | HaloDispersionRoverZratio 1.0 % only relevant for TypeOfHaloVelocityStructure=3 60 | DiskDispersionRoverZratio 2.0 % only relevant for TypeOfDiskVelocityStructure=3 61 | BulgeDispersionRoverZratio 1.0 % only relevant for TypeOfBulgeVelocityStructure=3 62 | 63 | 64 | HaloStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 65 | DiskStreamingVelocityParameter 1.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 66 | BulgeStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 67 | 68 | 69 | %------ Orbit integration accuracy 70 | 71 | TorbitFac 10.0 % regulates the integration time of orbits 72 | % (this is of the order of the typical number of orbits per particle) 73 | TimeStepFactorOrbit 0.01 74 | TimeStepFactorCellCross 0.25 75 | 76 | 77 | %------ Iterative optimization parameters 78 | 79 | FractionToOptimizeIndependendly 0.001 80 | IndepenentOptimizationsPerStep 100 81 | StepsBetweenDump 10 82 | MaximumNumberOfSteps 100 83 | 84 | MinParticlesPerBinForDispersionMeasurement 100 85 | MinParticlesPerBinForDensityMeasurement 50 86 | 87 | 88 | %------ Grid dimension and extenstion/resolution 89 | 90 | DG_MaxLevel 7 91 | EG_MaxLevel 7 92 | FG_Nbin 256 % number of bins for the acceleration grid in the R- and z-directions 93 | 94 | 95 | OutermostBinEnclosedMassFraction 0.999 % regulates the fraction of mass of the Hernquist 96 | % halo that must be inside the grid (determines grid extension) 97 | 98 | InnermostBinEnclosedMassFraction 0.0000001 % regulates the fraction of mass enclosed by the innermost 99 | % bin (regulates size of innermost grid cells) 100 | 101 | 102 | 103 | MaxVelInUnitsVesc 0.9999 % maximum allowed velocity in units of the local escape velocity 104 | 105 | 106 | %------ Construction of target density field 107 | 108 | SampleDensityFieldForTargetResponse 1 % if set to 1, the code will randomly sample points to construct the density field 109 | SampleParticleCount 100000000 % number of points sampled for target density field 110 | 111 | 112 | %------ Construction of force field 113 | 114 | SampleForceNhalo 0 % number of points to use to for computing force field with a tree 115 | SampleForceNdisk 100000000 116 | SampleForceNbulge 0 117 | 118 | Softening 0.05 119 | 120 | 121 | %------ Accuracy settings of tree code used in construction of force field 122 | 123 | TypeOfOpeningCriterion 1 124 | ErrTolTheta 0.4 125 | ErrTolForceAcc 0.0025 126 | 127 | %------ Domain decomposition parameters used in parallel tree code 128 | 129 | MultipleDomains 4 130 | TopNodeFactor 4 131 | 132 | 133 | %------ Parallel I/O paramaters, only affects writing of galaxy files 134 | 135 | NumFilesPerSnapshot 1 136 | NumFilesWrittenInParallel 1 137 | 138 | 139 | %------ Memory allocation parameters 140 | 141 | MaxMemSize 3600.0 % in MB 142 | BufferSize 100.0 143 | BufferSizeGravity 100.0 144 | 145 | 146 | %------ Specification of internal system of units 147 | 148 | UnitLength_in_cm 3.085678e21 % 1.0 kpc 149 | UnitMass_in_g 1.989e43 % 1.0e10 solar masses 150 | UnitVelocity_in_cm_per_s 1e5 % 1 km/sec 151 | GravityConstantInternal 0 152 | 153 | %------ Factors to tune merit function of time avaraged velocity structure (only ver 1.1) 154 | 155 | HaloValueRsFac 1.0 156 | HaloValueTsFac 1.0 157 | HaloValueQsFac 1.0 158 | HaloValuePsFac 1.0 159 | 160 | DiskValueRsFac 1.0 161 | DiskValueTsFac 1.0 162 | DiskValueQsFac 1.0 163 | DiskValuePsFac 1.0 164 | 165 | BulgeValueRsFac 1.0 166 | BulgeValueTsFac 1.0 167 | BulgeValueQsFac 1.0 168 | BulgeValuePsFac 1.0 169 | -------------------------------------------------------------------------------- /Model_D1.param: -------------------------------------------------------------------------------- 1 | 2 | %------ File and path names, as well as output file format 3 | 4 | OutputDir ./Model-D1 5 | 6 | OutputFile snap % Base filename of generated sequence of files 7 | SnapFormat 1 % File format selection 8 | 9 | 10 | %------ Basic structural parameters of model 11 | 12 | CC 10.0 % halo concentration 13 | V200 200.0 % circular velocity v_200 (in km/sec) 14 | LAMBDA 0.035 % spin parameter 15 | MD 0.035 % disk mass fraction 16 | MB 0.0 % bulge mass fraction 17 | MBH 0.0 % black hole mass fraction. If zero, no black 18 | % hole is generated, otherwise one at the centre 19 | % is added. 20 | 21 | JD 0.035 % disk spin fraction, typically chosen equal to MD 22 | 23 | DiskHeight 0.2 % thickness of stellar disk in units of radial scale length 24 | BulgeSize 0.1 % bulge scale length in units of halo scale length 25 | 26 | HaloStretch 1.0 % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate 27 | BulgeStretch 1.0 % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate 28 | 29 | %------ Particle numbers in target model 30 | 31 | N_HALO 100000 % desired number of particles in dark halo 32 | N_DISK 100000 % desired number of collisionless particles in disk 33 | N_BULGE 0 % number of bulge particles 34 | 35 | 36 | 37 | %------ Selection of symmetry constraints of velocity structure 38 | 39 | TypeOfHaloVelocityStructure 3 % 0 = spherically symmetric, isotropic 40 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 41 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 42 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 43 | 44 | TypeOfDiskVelocityStructure 2 % 0 = spherically symmetric, isotropic 45 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 46 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 47 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 48 | 49 | TypeOfBulgeVelocityStructure 0 % 0 = spherically symmetric, isotropic 50 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 51 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 52 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 53 | 54 | 55 | HaloBetaParameter 0 % only relevant for TypeOfHaloVelocityStructure=1 56 | BulgeBetaParameter 0 % only relevant for TypeOfBulgeVelocityStructure=1 57 | 58 | 59 | HaloDispersionRoverZratio 1.0 % only relevant for TypeOfHaloVelocityStructure=3 60 | DiskDispersionRoverZratio 1.0 % only relevant for TypeOfDiskVelocityStructure=3 61 | BulgeDispersionRoverZratio 1.0 % only relevant for TypeOfBulgeVelocityStructure=3 62 | 63 | 64 | HaloStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 65 | DiskStreamingVelocityParameter 1.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 66 | BulgeStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 67 | 68 | 69 | %------ Orbit integration accuracy 70 | 71 | TorbitFac 10.0 % regulates the integration time of orbits 72 | % (this is of the order of the typical number of orbits per particle) 73 | TimeStepFactorOrbit 0.01 74 | TimeStepFactorCellCross 0.25 75 | 76 | 77 | %------ Iterative optimization parameters 78 | 79 | FractionToOptimizeIndependendly 0.001 80 | IndepenentOptimizationsPerStep 100 81 | StepsBetweenDump 10 82 | MaximumNumberOfSteps 100 83 | 84 | MinParticlesPerBinForDispersionMeasurement 100 85 | MinParticlesPerBinForDensityMeasurement 50 86 | 87 | 88 | %------ Grid dimension and extenstion/resolution 89 | 90 | DG_MaxLevel 7 91 | EG_MaxLevel 7 92 | FG_Nbin 256 % number of bins for the acceleration grid in the R- and z-directions 93 | 94 | 95 | OutermostBinEnclosedMassFraction 0.999 % regulates the fraction of mass of the Hernquist 96 | % halo that must be inside the grid (determines grid extension) 97 | 98 | InnermostBinEnclosedMassFraction 0.0000001 % regulates the fraction of mass enclosed by the innermost 99 | % bin (regulates size of innermost grid cells) 100 | 101 | 102 | 103 | MaxVelInUnitsVesc 0.9999 % maximum allowed velocity in units of the local escape velocity 104 | 105 | 106 | %------ Construction of target density field 107 | 108 | SampleDensityFieldForTargetResponse 1 % if set to 1, the code will randomly sample points to construct the density field 109 | SampleParticleCount 100000000 % number of points sampled for target density field 110 | 111 | 112 | %------ Construction of force field 113 | 114 | SampleForceNhalo 0 % number of points to use to for computing force field with a tree 115 | SampleForceNdisk 100000000 116 | SampleForceNbulge 0 117 | 118 | Softening 0.05 119 | 120 | 121 | %------ Accuracy settings of tree code used in construction of force field 122 | 123 | TypeOfOpeningCriterion 1 124 | ErrTolTheta 0.4 125 | ErrTolForceAcc 0.0025 126 | 127 | %------ Domain decomposition parameters used in parallel tree code 128 | 129 | MultipleDomains 4 130 | TopNodeFactor 4 131 | 132 | 133 | %------ Parallel I/O paramaters, only affects writing of galaxy files 134 | 135 | NumFilesPerSnapshot 1 136 | NumFilesWrittenInParallel 1 137 | 138 | 139 | %------ Memory allocation parameters 140 | 141 | MaxMemSize 3600.0 % in MB 142 | BufferSize 100.0 143 | BufferSizeGravity 100.0 144 | 145 | 146 | %------ Specification of internal system of units 147 | 148 | UnitLength_in_cm 3.085678e21 % 1.0 kpc 149 | UnitMass_in_g 1.989e43 % 1.0e10 solar masses 150 | UnitVelocity_in_cm_per_s 1e5 % 1 km/sec 151 | GravityConstantInternal 0 152 | 153 | 154 | %------ Factors to tune merit function of time avaraged velocity structure (only ver 1.1) 155 | 156 | HaloValueRsFac 1.0 157 | HaloValueTsFac 1.0 158 | HaloValueQsFac 1.0 159 | HaloValuePsFac 1.0 160 | 161 | DiskValueRsFac 1.0 162 | DiskValueTsFac 1.0 163 | DiskValueQsFac 1.0 164 | DiskValuePsFac 1.0 165 | 166 | BulgeValueRsFac 1.0 167 | BulgeValueTsFac 1.0 168 | BulgeValueQsFac 1.0 169 | BulgeValuePsFac 1.0 170 | -------------------------------------------------------------------------------- /Model_H2.param: -------------------------------------------------------------------------------- 1 | 2 | %------ File and path names, as well as output file format 3 | 4 | OutputDir ./Model-H2 5 | 6 | OutputFile snap % Base filename of generated sequence of files 7 | SnapFormat 1 % File format selection 8 | 9 | 10 | 11 | %------ Basic structural parameters of model 12 | 13 | CC 10.0 % halo concentration 14 | V200 200.0 % circular velocity v_200 (in km/sec) 15 | LAMBDA 0.0 % spin parameter 16 | MD 0.0 % disk mass fraction 17 | MB 0.0 % bulge mass fraction 18 | MBH 0.0 % black hole mass fraction. If zero, no black 19 | % hole is generated, otherwise one at the centre 20 | % is added. 21 | 22 | JD 0.00 % disk spin fraction, typically chosen equal to MD 23 | 24 | DiskHeight 0.2 % thickness of stellar disk in units of radial scale length 25 | BulgeSize 0.2 % bulge scale length in units of halo scale length 26 | 27 | HaloStretch 1.0 % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate 28 | BulgeStretch 1.0 % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate 29 | 30 | %------ Particle numbers in target model 31 | 32 | N_HALO 100000 % desired number of particles in dark halo 33 | N_DISK 0 % desired number of collisionless particles in disk 34 | N_BULGE 0 % number of bulge particles 35 | 36 | 37 | 38 | %------ Selection of symmetry constraints of velocity structure 39 | 40 | TypeOfHaloVelocityStructure 1 % 0 = spherically symmetric, isotropic 41 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 42 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 43 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 44 | 45 | TypeOfDiskVelocityStructure 0 % 0 = spherically symmetric, isotropic 46 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 47 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 48 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 49 | 50 | TypeOfBulgeVelocityStructure 0 % 0 = spherically symmetric, isotropic 51 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 52 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 53 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 54 | 55 | 56 | HaloBetaParameter 0.5 % only relevant for TypeOfHaloVelocityStructure=1 57 | BulgeBetaParameter 0 % only relevant for TypeOfBulgeVelocityStructure=1 58 | 59 | 60 | HaloDispersionRoverZratio 1.0 % only relevant for TypeOfHaloVelocityStructure=3 61 | DiskDispersionRoverZratio 1.0 % only relevant for TypeOfDiskVelocityStructure=3 62 | BulgeDispersionRoverZratio 1.0 % only relevant for TypeOfBulgeVelocityStructure=3 63 | 64 | 65 | HaloStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 66 | DiskStreamingVelocityParameter 1.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 67 | BulgeStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 68 | 69 | 70 | %------ Orbit integration accuracy 71 | 72 | TorbitFac 10.0 % regulates the integration time of orbits 73 | % (this is of the order of the typical number of orbits per particle) 74 | TimeStepFactorOrbit 0.01 75 | TimeStepFactorCellCross 0.25 76 | 77 | 78 | %------ Iterative optimization parameters 79 | 80 | FractionToOptimizeIndependendly 0.001 81 | IndepenentOptimizationsPerStep 100 82 | StepsBetweenDump 10 83 | MaximumNumberOfSteps 200 84 | 85 | MinParticlesPerBinForDispersionMeasurement 100 86 | MinParticlesPerBinForDensityMeasurement 50 87 | 88 | 89 | %------ Grid dimension and extenstion/resolution 90 | 91 | DG_MaxLevel 7 92 | EG_MaxLevel 7 93 | FG_Nbin 256 % number of bins for the acceleration grid in the R- and z-directions 94 | 95 | 96 | OutermostBinEnclosedMassFraction 0.999 % regulates the fraction of mass of the Hernquist 97 | % halo that must be inside the grid (determines grid extension) 98 | 99 | InnermostBinEnclosedMassFraction 0.0000001 % regulates the fraction of mass enclosed by the innermost 100 | % bin (regulates size of innermost grid cells) 101 | 102 | 103 | 104 | MaxVelInUnitsVesc 0.99 % maximum allowed velocity in units of the local escape velocity 105 | 106 | %------ Construction of target density field 107 | 108 | SampleDensityFieldForTargetResponse 1 % if set to 1, the code will randomly sample points to construct the density field 109 | SampleParticleCount 100000000 % number of points sampled for target density field 110 | 111 | 112 | %------ Construction of force field 113 | 114 | SampleForceNhalo 0 % number of points to use to for computing force field with a tree 115 | SampleForceNdisk 0 116 | SampleForceNbulge 0 117 | 118 | Softening 0.05 119 | 120 | 121 | %------ Accuracy settings of tree code used in construction of force field 122 | 123 | TypeOfOpeningCriterion 1 124 | ErrTolTheta 0.4 125 | ErrTolForceAcc 0.0025 126 | 127 | %------ Domain decomposition parameters used in parallel tree code 128 | 129 | MultipleDomains 4 130 | TopNodeFactor 4 131 | 132 | 133 | %------ Parallel I/O paramaters, only affects writing of galaxy files 134 | 135 | NumFilesPerSnapshot 1 136 | NumFilesWrittenInParallel 1 137 | 138 | 139 | %------ Memory allocation parameters 140 | 141 | MaxMemSize 2300.0 % in MB 142 | BufferSize 100.0 143 | BufferSizeGravity 100.0 144 | 145 | 146 | %------ Specification of internal system of units 147 | 148 | UnitLength_in_cm 3.085678e21 % 1.0 kpc 149 | UnitMass_in_g 1.989e43 % 1.0e10 solar masses 150 | UnitVelocity_in_cm_per_s 1e5 % 1 km/sec 151 | GravityConstantInternal 0 152 | 153 | 154 | %------ Factors to tune merit function of time avaraged velocity structure (only ver 1.1) 155 | 156 | HaloValueRsFac 5.0 157 | HaloValueTsFac 1.0 158 | HaloValueQsFac 1.0 159 | HaloValuePsFac 1.0 160 | 161 | DiskValueRsFac 1.0 162 | DiskValueTsFac 1.0 163 | DiskValueQsFac 1.0 164 | DiskValuePsFac 1.0 165 | 166 | BulgeValueRsFac 1.0 167 | BulgeValueTsFac 1.0 168 | BulgeValueQsFac 1.0 169 | BulgeValuePsFac 1.0 170 | 171 | 172 | -------------------------------------------------------------------------------- /Model_H1.param: -------------------------------------------------------------------------------- 1 | 2 | %------ File and path names, as well as output file format 3 | 4 | OutputDir ./Model-H1 5 | 6 | 7 | OutputFile snap % Base filename of generated sequence of files 8 | SnapFormat 1 % File format selection 9 | 10 | 11 | %------ Basic structural parameters of model 12 | 13 | CC 10.0 % halo concentration 14 | V200 200.0 % circular velocity v_200 (in km/sec) 15 | LAMBDA 0.0 % spin parameter 16 | MD 0.0 % disk mass fraction 17 | MB 0.0 % bulge mass fraction 18 | MBH 0.0 % black hole mass fraction. If zero, no black 19 | % hole is generated, otherwise one at the centre 20 | % is added. 21 | 22 | JD 0.00 % disk spin fraction, typically chosen equal to MD 23 | 24 | DiskHeight 0.2 % thickness of stellar disk in units of radial scale length 25 | BulgeSize 0.2 % bulge scale length in units of disk scale length 26 | 27 | HaloStretch 1.0 % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate 28 | BulgeStretch 1.0 % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate 29 | 30 | %------ Particle numbers in target model 31 | 32 | N_HALO 100000 % desired number of particles in dark halo 33 | N_DISK 0 % desired number of collisionless particles in disk 34 | N_BULGE 0 % number of bulge particles 35 | 36 | 37 | 38 | 39 | %------ Selection of symmetry constraints of velocity structure 40 | 41 | TypeOfHaloVelocityStructure 0 % 0 = spherically symmetric, isotropic 42 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 43 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 44 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 45 | 46 | TypeOfDiskVelocityStructure 0 % 0 = spherically symmetric, isotropic 47 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 48 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 49 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 50 | 51 | TypeOfBulgeVelocityStructure 0 % 0 = spherically symmetric, isotropic 52 | % 1 = spherically symmetric, anisotropic (with beta parameter specified) 53 | % 2 = axisymmetric, f(E, Lz), with specified net rotation 54 | % 3 = axisymmetric, f(E, Lz, I_3), with / specified and net rotation specified 55 | 56 | 57 | HaloBetaParameter 0 % only relevant for TypeOfHaloVelocityStructure=1 58 | BulgeBetaParameter 0 % only relevant for TypeOfBulgeVelocityStructure=1 59 | 60 | 61 | HaloDispersionRoverZratio 1.0 % only relevant for TypeOfHaloVelocityStructure=3 62 | DiskDispersionRoverZratio 1.0 % only relevant for TypeOfDiskVelocityStructure=3 63 | BulgeDispersionRoverZratio 1.0 % only relevant for TypeOfBulgeVelocityStructure=3 64 | 65 | 66 | HaloStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 67 | DiskStreamingVelocityParameter 1.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 68 | BulgeStreamingVelocityParameter 0.0 % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter') 69 | 70 | 71 | %------ Orbit integration accuracy 72 | 73 | TorbitFac 10.0 % regulates the integration time of orbits 74 | % (this is of the order of the typical number of orbits per particle) 75 | TimeStepFactorOrbit 0.01 76 | TimeStepFactorCellCross 0.25 77 | 78 | 79 | %------ Iterative optimization parameters 80 | 81 | FractionToOptimizeIndependendly 0.001 82 | IndepenentOptimizationsPerStep 100 83 | StepsBetweenDump 10 84 | MaximumNumberOfSteps 100 85 | 86 | MinParticlesPerBinForDispersionMeasurement 100 87 | MinParticlesPerBinForDensityMeasurement 50 88 | 89 | 90 | %------ Grid dimension and extenstion/resolution 91 | 92 | DG_MaxLevel 7 93 | EG_MaxLevel 7 94 | FG_Nbin 256 % number of bins for the acceleration grid in the R- and z-directions 95 | 96 | 97 | OutermostBinEnclosedMassFraction 0.999 % regulates the fraction of mass of the Hernquist 98 | % halo that must be inside the grid (determines grid extension) 99 | 100 | InnermostBinEnclosedMassFraction 0.0000001 % regulates the fraction of mass enclosed by the innermost 101 | % bin (regulates size of innermost grid cells) 102 | 103 | 104 | 105 | MaxVelInUnitsVesc 0.99 % maximum allowed velocity in units of the local escape velocity 106 | 107 | %------ Construction of target density field 108 | 109 | SampleDensityFieldForTargetResponse 1 % if set to 1, the code will randomly sample points to construct the density field 110 | SampleParticleCount 100000000 % number of points sampled for target density field 111 | 112 | 113 | %------ Construction of force field 114 | 115 | SampleForceNhalo 0 % number of points to use to for computing force field with a tree 116 | SampleForceNdisk 0 117 | SampleForceNbulge 0 118 | 119 | Softening 0.05 120 | 121 | 122 | %------ Accuracy settings of tree code used in construction of force field 123 | 124 | TypeOfOpeningCriterion 1 125 | ErrTolTheta 0.4 126 | ErrTolForceAcc 0.0025 127 | 128 | %------ Domain decomposition parameters used in parallel tree code 129 | 130 | MultipleDomains 4 131 | TopNodeFactor 4 132 | 133 | 134 | %------ Parallel I/O paramaters, only affects writing of galaxy files 135 | 136 | NumFilesPerSnapshot 1 137 | NumFilesWrittenInParallel 1 138 | 139 | 140 | %------ Memory allocation parameters 141 | 142 | MaxMemSize 2300.0 % in MB 143 | BufferSize 100.0 144 | BufferSizeGravity 100.0 145 | 146 | 147 | %------ Specification of internal system of units 148 | 149 | UnitLength_in_cm 3.085678e21 % 1.0 kpc 150 | UnitMass_in_g 1.989e43 % 1.0e10 solar masses 151 | UnitVelocity_in_cm_per_s 1e5 % 1 km/sec 152 | GravityConstantInternal 0 153 | 154 | 155 | 156 | %------ Factors to tune merit function of time avaraged velocity structure (only ver 1.1) 157 | 158 | HaloValueRsFac 1.0 159 | HaloValueTsFac 1.0 160 | HaloValueQsFac 1.0 161 | HaloValuePsFac 1.0 162 | 163 | DiskValueRsFac 1.0 164 | DiskValueTsFac 1.0 165 | DiskValueQsFac 1.0 166 | DiskValuePsFac 1.0 167 | 168 | BulgeValueRsFac 1.0 169 | BulgeValueTsFac 1.0 170 | BulgeValueQsFac 1.0 171 | BulgeValuePsFac 1.0 172 | -------------------------------------------------------------------------------- /src/mpi_utils/checksummed_sendrecv.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../allvars.h" 9 | #include "../proto.h" 10 | 11 | #ifdef MPISENDRECV_CHECKSUM 12 | 13 | #undef MPI_Sendrecv 14 | 15 | 16 | int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, 17 | int dest, int sendtag, void *recvbufreal, int recvcount, 18 | MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status * status) 19 | { 20 | int checksumtag = 1000, errtag = 2000; 21 | int i, iter = 0, err_flag, err_flag_imported, size_sendtype, size_recvtype; 22 | long long sendCheckSum, recvCheckSum, importedCheckSum; 23 | unsigned char *p, *buf, *recvbuf; 24 | char msg[500]; 25 | 26 | if(dest != source) 27 | terminate("destination task different from source task"); 28 | 29 | MPI_Type_size(sendtype, &size_sendtype); 30 | MPI_Type_size(recvtype, &size_recvtype); 31 | 32 | if(dest == ThisTask) 33 | { 34 | memcpy(recvbufreal, sendbuf, recvcount * size_recvtype); 35 | return 0; 36 | } 37 | 38 | 39 | if(!(buf = mymalloc(recvcount * size_recvtype + 1024))) 40 | terminate("not enough memory to allocate the buffer buf"); 41 | 42 | for(i = 0, p = buf; i < recvcount * size_recvtype + 1024; i++) 43 | *p++ = 255; 44 | 45 | recvbuf = buf + 512; 46 | 47 | MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status); 48 | 49 | for(i = 0, p = buf; i < 512; i++, p++) 50 | { 51 | if(*p != 255) 52 | { 53 | sprintf 54 | (msg, "MPI-ERROR: Task=%d/%s: Recv occured before recv buffer. message-size=%d from %d, i=%d c=%d\n", 55 | ThisTask, getenv("HOST"), recvcount, dest, i, *p); 56 | terminate(msg); 57 | } 58 | } 59 | 60 | for(i = 0, p = recvbuf + recvcount * size_recvtype; i < 512; i++, p++) 61 | { 62 | if(*p != 255) 63 | { 64 | sprintf 65 | (msg, "MPI-ERROR: Task=%d/%s: Recv occured after recv buffer. message-size=%d from %d, i=%d c=%d\n", 66 | ThisTask, getenv("HOST"), recvcount, dest, i, *p); 67 | terminate(msg); 68 | } 69 | } 70 | 71 | 72 | for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++) 73 | sendCheckSum += *p; 74 | 75 | importedCheckSum = 0; 76 | 77 | if(dest > ThisTask) 78 | { 79 | if(sendcount > 0) 80 | MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); 81 | if(recvcount > 0) 82 | MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); 83 | } 84 | else 85 | { 86 | if(recvcount > 0) 87 | MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); 88 | if(sendcount > 0) 89 | MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); 90 | } 91 | 92 | checksumtag++; 93 | 94 | for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount * size_recvtype; i++, p++) 95 | recvCheckSum += *p; 96 | 97 | 98 | err_flag = err_flag_imported = 0; 99 | 100 | if(recvCheckSum != importedCheckSum) 101 | { 102 | printf 103 | ("MPI-ERROR: Receive error on task=%d/%s from task=%d, message size=%d, sendcount=%d checksums= %d %d %d %d. Try to fix it...\n", 104 | ThisTask, getenv("HOST"), source, recvcount, sendcount, (int) (recvCheckSum >> 32), 105 | (int) recvCheckSum, (int) (importedCheckSum >> 32), (int) importedCheckSum); 106 | myflush(stdout); 107 | 108 | err_flag = 1; 109 | } 110 | 111 | if(dest > ThisTask) 112 | { 113 | MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); 114 | MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); 115 | } 116 | else 117 | { 118 | MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); 119 | MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); 120 | } 121 | errtag++; 122 | 123 | if(err_flag > 0 || err_flag_imported > 0) 124 | { 125 | printf("Task=%d is on %s, wants to send %d and has checksum=%d %d of send data\n", 126 | ThisTask, getenv("HOST"), sendcount, (int) (sendCheckSum >> 32), (int) sendCheckSum); 127 | myflush(stdout); 128 | 129 | do 130 | { 131 | sendtag++; 132 | recvtag++; 133 | 134 | for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) 135 | *p = 0; 136 | 137 | if((iter & 1) == 0) 138 | { 139 | if(dest > ThisTask) 140 | { 141 | if(sendcount > 0) 142 | MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD); 143 | if(recvcount > 0) 144 | MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status); 145 | } 146 | else 147 | { 148 | if(recvcount > 0) 149 | MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status); 150 | if(sendcount > 0) 151 | MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD); 152 | } 153 | } 154 | else 155 | { 156 | if(iter > 5) 157 | { 158 | printf("we're trying to send each byte now on task=%d (iter=%d)\n", ThisTask, iter); 159 | myflush(stdout); 160 | if(dest > ThisTask) 161 | { 162 | for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++) 163 | MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD); 164 | for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) 165 | MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status); 166 | } 167 | else 168 | { 169 | for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++) 170 | MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status); 171 | for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++) 172 | MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD); 173 | } 174 | } 175 | else 176 | { 177 | MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status); 178 | } 179 | } 180 | 181 | importedCheckSum = 0; 182 | 183 | for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++) 184 | sendCheckSum += *p; 185 | 186 | printf("Task=%d gas send_checksum=%d %d\n", ThisTask, (int) (sendCheckSum >> 32), (int) sendCheckSum); 187 | myflush(stdout); 188 | 189 | if(dest > ThisTask) 190 | { 191 | if(sendcount > 0) 192 | MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); 193 | if(recvcount > 0) 194 | MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); 195 | } 196 | else 197 | { 198 | if(recvcount > 0) 199 | MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status); 200 | if(sendcount > 0) 201 | MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD); 202 | } 203 | 204 | for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount; i++, p++) 205 | recvCheckSum += *p; 206 | 207 | err_flag = err_flag_imported = 0; 208 | 209 | if(recvCheckSum != importedCheckSum) 210 | { 211 | printf 212 | ("MPI-ERROR: Again (iter=%d) a receive error on task=%d/%s from task=%d, message size=%d, checksums= %d %d %d %d. Try to fix it...\n", 213 | iter, ThisTask, getenv("HOST"), source, recvcount, (int) (recvCheckSum >> 32), 214 | (int) recvCheckSum, (int) (importedCheckSum >> 32), (int) importedCheckSum); 215 | myflush(stdout); 216 | err_flag = 1; 217 | } 218 | 219 | if(dest > ThisTask) 220 | { 221 | MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); 222 | MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); 223 | } 224 | else 225 | { 226 | MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status); 227 | MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD); 228 | } 229 | 230 | if(err_flag == 0 && err_flag_imported == 0) 231 | break; 232 | 233 | errtag++; 234 | checksumtag++; 235 | iter++; 236 | } 237 | while(iter < 10); 238 | 239 | if(iter >= 10) 240 | { 241 | char buf[1000]; 242 | int length; 243 | FILE *fd; 244 | 245 | sprintf(buf, "send_data_%d.dat", ThisTask); 246 | fd = fopen(buf, "w"); 247 | length = sendcount * size_sendtype; 248 | fwrite(&length, 1, sizeof(int), fd); 249 | fwrite(sendbuf, sendcount, size_sendtype, fd); 250 | fclose(fd); 251 | 252 | sprintf(buf, "recv_data_%d.dat", ThisTask); 253 | fd = fopen(buf, "w"); 254 | length = recvcount * size_recvtype; 255 | fwrite(&length, 1, sizeof(int), fd); 256 | fwrite(recvbuf, recvcount, size_recvtype, fd); 257 | fclose(fd); 258 | 259 | sprintf(msg, "MPI-ERROR: Even 10 trials proved to be insufficient on task=%d/%s. Stopping\n", ThisTask, getenv("HOST")); 260 | terminate(msg); 261 | } 262 | } 263 | 264 | memcpy(recvbufreal, recvbuf, recvcount * size_recvtype); 265 | 266 | myfree(buf); 267 | 268 | return 0; 269 | } 270 | 271 | #endif 272 | -------------------------------------------------------------------------------- /src/system.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "allvars.h" 17 | #include "proto.h" 18 | 19 | int get_thread_num(void) 20 | { 21 | #if (NUM_THREADS > 1) /* This enables OpenMP */ 22 | return omp_get_thread_num(); 23 | #else 24 | return 0; 25 | #endif 26 | } 27 | 28 | 29 | 30 | double dabs(double a) 31 | { 32 | if(a < 0) 33 | return -a; 34 | else 35 | return a; 36 | } 37 | 38 | double dmax(double a, double b) 39 | { 40 | if(a > b) 41 | return a; 42 | else 43 | return b; 44 | } 45 | 46 | double dmin(double a, double b) 47 | { 48 | if(a < b) 49 | return a; 50 | else 51 | return b; 52 | } 53 | 54 | int imax(int a, int b) 55 | { 56 | if(a > b) 57 | return a; 58 | else 59 | return b; 60 | } 61 | 62 | int imin(int a, int b) 63 | { 64 | if(a < b) 65 | return a; 66 | else 67 | return b; 68 | } 69 | 70 | 71 | #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS 72 | #include 73 | void enable_core_dumps_and_fpu_exceptions(void) 74 | { 75 | /* enable floating point exceptions */ 76 | 77 | extern int feenableexcept(int __excepts); 78 | feenableexcept(FE_DIVBYZERO | FE_INVALID); 79 | 80 | 81 | /* set core-dump size to infinity */ 82 | struct rlimit rlim; 83 | getrlimit(RLIMIT_CORE, &rlim); 84 | rlim.rlim_cur = RLIM_INFINITY; 85 | setrlimit(RLIMIT_CORE, &rlim); 86 | 87 | /* MPICH catches the signales SIGSEGV, SIGBUS, and SIGFPE.... 88 | * The following statements reset things to the default handlers, 89 | * which will generate a core file. 90 | */ 91 | signal(SIGSEGV, SIG_DFL); 92 | signal(SIGBUS, SIG_DFL); 93 | signal(SIGFPE, SIG_DFL); 94 | signal(SIGINT, SIG_DFL); 95 | } 96 | #endif 97 | 98 | 99 | 100 | 101 | /* returns the number of cpu-ticks in seconds that 102 | * have elapsed. (or the wall-clock time) 103 | */ 104 | double second(void) 105 | { 106 | return MPI_Wtime(); 107 | 108 | /* 109 | * possible alternative: 110 | * 111 | * return ((double) clock()) / CLOCKS_PER_SEC; 112 | * 113 | * but note: on AIX and presumably many other 32bit systems, 114 | * clock() has only a resolution of 10ms=0.01sec 115 | */ 116 | } 117 | 118 | double measure_time(void) /* strategy: call this at end of functions to account for time in this function, and before another (nontrivial) function is called */ 119 | { 120 | double t, dt; 121 | 122 | t = second(); 123 | dt = t - WallclockTime; 124 | WallclockTime = t; 125 | 126 | return dt; 127 | } 128 | 129 | /* returns the time difference between two measurements 130 | * obtained with second(). The routine takes care of the 131 | * possible overflow of the tick counter on 32bit systems. 132 | */ 133 | double timediff(double t0, double t1) 134 | { 135 | double dt; 136 | 137 | dt = t1 - t0; 138 | 139 | if(dt < 0) /* overflow has occured (for systems with 32bit tick counter) */ 140 | { 141 | #ifdef WALLCLOCK 142 | dt = 0; 143 | #else 144 | dt = t1 + pow(2, 32) / CLOCKS_PER_SEC - t0; 145 | #endif 146 | } 147 | 148 | return dt; 149 | } 150 | 151 | 152 | 153 | void minimum_large_ints(int n, long long *src, long long *res) 154 | { 155 | int i, j; 156 | long long *numlist; 157 | 158 | numlist = (long long *) mymalloc("numlist", NTask * n * sizeof(long long)); 159 | MPI_Allgather(src, n * sizeof(long long), MPI_BYTE, numlist, n * sizeof(long long), MPI_BYTE, 160 | MPI_COMM_WORLD); 161 | 162 | for(j = 0; j < n; j++) 163 | res[j] = src[j]; 164 | 165 | for(i = 0; i < NTask; i++) 166 | for(j = 0; j < n; j++) 167 | if(res[j] > numlist[i * n + j]) 168 | res[j] = numlist[i * n + j]; 169 | 170 | myfree(numlist); 171 | } 172 | 173 | 174 | void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm) 175 | { 176 | int i, j, *numlist; 177 | int ntask; 178 | 179 | MPI_Comm_size(comm, &ntask); 180 | 181 | numlist = (int *) mymalloc("numlist", ntask * n * sizeof(int)); 182 | MPI_Allgather(src, n, MPI_INT, numlist, n, MPI_INT, comm); 183 | 184 | for(j = 0; j < n; j++) 185 | res[j] = 0; 186 | 187 | for(i = 0; i < ntask; i++) 188 | for(j = 0; j < n; j++) 189 | res[j] += numlist[i * n + j]; 190 | 191 | myfree(numlist); 192 | } 193 | 194 | 195 | void sumup_large_ints(int n, int *src, long long *res) 196 | { 197 | sumup_large_ints_comm(n, src, res, MPI_COMM_WORLD); 198 | } 199 | 200 | void sumup_longs(int n, long long *src, long long *res) 201 | { 202 | int i, j; 203 | long long *numlist; 204 | 205 | numlist = (long long *) mymalloc("numlist", NTask * n * sizeof(long long)); 206 | MPI_Allgather(src, n * sizeof(long long), MPI_BYTE, numlist, n * sizeof(long long), MPI_BYTE, 207 | MPI_COMM_WORLD); 208 | 209 | for(j = 0; j < n; j++) 210 | res[j] = 0; 211 | 212 | for(i = 0; i < NTask; i++) 213 | for(j = 0; j < n; j++) 214 | res[j] += numlist[i * n + j]; 215 | 216 | myfree(numlist); 217 | } 218 | 219 | 220 | void sumup_floats(int n, float *x, float *res) 221 | { 222 | int i, j, p; 223 | float *numlist; 224 | 225 | double min_FreeBytes_glob, FreeBytes_local = 1.0 * FreeBytes; 226 | MPI_Allreduce(&FreeBytes_local, &min_FreeBytes_glob, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 227 | 228 | int sum_chunksize = (int) (min_FreeBytes_glob / sizeof(float) / NTask); 229 | int sum_pieces = n / sum_chunksize; 230 | int sum_restsize = n % sum_chunksize; 231 | 232 | if(sum_chunksize == 0) 233 | terminate("min_FreeBytes_glob too small - not enough memory for sumup_floats.\n"); 234 | 235 | for(j = 0; j < n; j++) 236 | res[j] = 0; 237 | 238 | for(p = 0; p < sum_pieces; p++) 239 | { 240 | numlist = (float *) mymalloc("numlist", NTask * sum_chunksize * sizeof(float)); 241 | MPI_Allgather(x + p * sum_chunksize, sum_chunksize, MPI_FLOAT, numlist, sum_chunksize, MPI_FLOAT, 242 | MPI_COMM_WORLD); 243 | 244 | for(i = 0; i < NTask; i++) 245 | for(j = 0; j < sum_chunksize; j++) 246 | res[p * sum_chunksize + j] += numlist[i * sum_chunksize + j]; 247 | myfree(numlist); 248 | } 249 | 250 | if(sum_restsize > 0) 251 | { 252 | numlist = (float *) mymalloc("numlist", NTask * sum_restsize * sizeof(float)); 253 | MPI_Allgather(x + sum_pieces * sum_chunksize, sum_restsize, MPI_FLOAT, numlist, sum_restsize, MPI_FLOAT, 254 | MPI_COMM_WORLD); 255 | 256 | for(i = 0; i < NTask; i++) 257 | for(j = 0; j < sum_restsize; j++) 258 | res[sum_pieces * sum_chunksize + j] += numlist[i * sum_restsize + j]; 259 | myfree(numlist); 260 | } 261 | } 262 | 263 | void sumup_doubles(int n, double *x, double *res) 264 | { 265 | int i, j, p; 266 | double *numlist; 267 | 268 | double min_FreeBytes_glob, FreeBytes_local = 1.0 * FreeBytes; 269 | MPI_Allreduce(&FreeBytes_local, &min_FreeBytes_glob, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); 270 | 271 | int sum_chunksize = (int) (min_FreeBytes_glob / sizeof(float) / NTask); 272 | int sum_pieces = n / sum_chunksize; 273 | int sum_restsize = n % sum_chunksize; 274 | 275 | if(sum_chunksize == 0) 276 | terminate("min_FreeBytes_glob too small - not enough memory for sumup_doubles.\n"); 277 | 278 | for(j = 0; j < n; j++) 279 | res[j] = 0; 280 | 281 | for(p = 0; p < sum_pieces; p++) 282 | { 283 | numlist = (double *) mymalloc("numlist", NTask * sum_chunksize * sizeof(double)); 284 | MPI_Allgather(x + p * sum_chunksize, sum_chunksize, MPI_DOUBLE, numlist, sum_chunksize, MPI_DOUBLE, 285 | MPI_COMM_WORLD); 286 | 287 | for(i = 0; i < NTask; i++) 288 | for(j = 0; j < sum_chunksize; j++) 289 | res[p * sum_chunksize + j] += numlist[i * sum_chunksize + j]; 290 | myfree(numlist); 291 | } 292 | 293 | if(sum_restsize > 0) 294 | { 295 | numlist = (double *) mymalloc("numlist", NTask * sum_restsize * sizeof(double)); 296 | MPI_Allgather(x + sum_pieces * sum_chunksize, sum_restsize, MPI_DOUBLE, numlist, sum_restsize, 297 | MPI_DOUBLE, MPI_COMM_WORLD); 298 | 299 | for(i = 0; i < NTask; i++) 300 | for(j = 0; j < sum_restsize; j++) 301 | res[sum_pieces * sum_chunksize + j] += numlist[i * sum_restsize + j]; 302 | myfree(numlist); 303 | } 304 | } 305 | 306 | 307 | size_t sizemax(size_t a, size_t b) 308 | { 309 | if(a < b) 310 | return b; 311 | else 312 | return a; 313 | } 314 | 315 | 316 | /* The following function is part of the GNU C Library. 317 | Contributed by Torbjorn Granlund (tege@sics.se) 318 | */ 319 | /* Find the first bit set in the argument */ 320 | int my_ffsll(long long int i) 321 | { 322 | unsigned long long int x = i & -i; 323 | if(x <= 0xffffffff) 324 | return ffs(i); 325 | else 326 | return 32 + ffs(i >> 32); 327 | } 328 | 329 | double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *)) 330 | { 331 | double t0, t1; 332 | 333 | t0 = second(); 334 | 335 | qsort(base, nel, width, compar); 336 | 337 | t1 = second(); 338 | 339 | return timediff(t0, t1); 340 | } 341 | 342 | -------------------------------------------------------------------------------- /src/proto.h: -------------------------------------------------------------------------------- 1 | #ifndef PROTO_H 2 | #define PROTO_H 3 | 4 | #include "allvars.h" 5 | #include "forcetree/forcetree.h" 6 | 7 | #include 8 | #include 9 | 10 | #ifdef HAVE_HDF5 11 | #include 12 | #endif 13 | 14 | int cmp_P_Rnd(const void *a, const void *b); 15 | void shuffle_energies(int iter); 16 | double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *)); 17 | double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *), MPI_Comm comm); 18 | void smooth_stack(double *data, int maxlevel); 19 | double calc_stack_difference(double *d1, double *d2, int l, int i, int j, int maxlevel, double *ref1, double *ref2, double thresh, double *dist, int flag); 20 | 21 | #ifdef VER_1_1 22 | double calc_stack_difference_mod(double *d1, double *d2, int l, int i, int j, int maxlevel, double *ref1, double *ref2, double thresh, double *dist, int flag); 23 | double calc_stack_sum( double *ref, double *thr, int l, int i, int j, int maxlevel, double thresh, double *dist ); 24 | #endif 25 | 26 | 27 | double calc_stack_difference_used(double *d1, double *d2, int l, int i, int j, int maxlevel, 28 | double *ref1, double *ref2, double *used1, double *used2, 29 | double thresh, int flag); 30 | 31 | double eval_smoothed_stack(double *din, int l, int i, int j, int maxlevel, double *ref, double thresh); 32 | void calc_smoothed_stack(double *din, double *dout, int maxlevel, double *ref, double thresh); 33 | 34 | double integrate_axisymmetric_jeans(double zstart, double zend, double R, int type); 35 | 36 | double h_factor(double R, double z, int type); 37 | double get_beta_of_type(double *pos, int type); 38 | 39 | void free_allocated_memory(void); 40 | void force_test(void); 41 | void forcegrid_get_cell(double *pos, int *iR, int *iz, double *fR, double *fz); 42 | 43 | double halo_get_potential(double *pos); 44 | void halo_get_acceleration(double *pos, double *acc); 45 | void halo_get_fresh_coordinate(double *pos); 46 | double halo_generate_v(double rad); 47 | double halo_get_potential_from_radius(double r); 48 | double halo_get_density(double *pos); 49 | double halo_get_mass_inside_radius(double r); 50 | double halo_get_escape_speed(double *pos); 51 | double halo_get_sigma2(double *pos); 52 | 53 | void disk_get_fresh_coordinate(double *pos); 54 | double disk_get_density(double *pos); 55 | double disk_get_mass_inside_radius(double R); 56 | 57 | double bugle_get_mass_inside_radius(double r); 58 | void bulge_get_fresh_coordinate(double *pos); 59 | double bulge_get_density(double *pos); 60 | double bulge_get_mass_inside_radius(double r); 61 | double bulge_get_escape_speed(double *pos); 62 | double bulge_get_potential(double *pos); 63 | double bulge_get_potential_from_radius(double r); 64 | void bulge_get_acceleration(double *pos, double *acc); 65 | double bulge_get_escape_speed(double *pos); 66 | void output_rotcurve(void); 67 | 68 | void densitygrid_sample_targetresponse(void); 69 | void enable_core_dumps_and_fpu_exceptions(void); 70 | 71 | double h_over_R(double R, double z, int type); 72 | 73 | 74 | void line_search(void); 75 | void calc_energy_grid_mass_maps(void); 76 | void energygrid_get_cell(double *pos, int *iR, int *iz, double *fR, double *fz); 77 | void calc_disp_components_for_particle(int n, double *v, double *vr2, double *vt2, double *vp2, double *vq2); 78 | 79 | void structure_determination(void); 80 | double structure_disk_angmomentum(void); 81 | double structure_gc(double c); 82 | 83 | double eval_fit(int n, double *vel, double *newdens, double *olddens); 84 | #ifdef VER_1_1 85 | double eval_fit_mod(int n, double *vel, double *newdens, double *olddens, double *egyROrbit_new, double *egyROrbit_old, 86 | double *egyTOrbit_new, double *egyTOrbit_old, 87 | double *egyQOrbit_new, double *egyQOrbit_old, 88 | double *egyPOrbit_new, double *egyPOrbit_old ); 89 | #endif 90 | 91 | double goldensection_search(int n, double ekin_a, double ekin_b, double ekin_c, double f_a, double f_b, double f_c, double *dir, double *egy, double *fnew, int *count); 92 | double eval_fit_anisotropy(int, double alpha, double v, double *rad, double *perp); 93 | void optimize(int n); 94 | //void optimize_std(int n); 95 | void free_all_response_fields(void); 96 | void calc_all_response_fields(void); 97 | void optimize_some_particles(void); 98 | 99 | void forcegrid_allocate(void); 100 | double forcegrid_get_potential(double *pos); 101 | void forcegrid_get_acceleration(double *pos, double *acc); 102 | double forcegrid_get_escape_speed(double *pos); 103 | 104 | void forcedensitygrid_create(void); 105 | void forcedensitygrid_calculate(void); 106 | 107 | void densitygrid_allocate(void); 108 | void densitygrid_get_cell(double *pos, int *iR, int *iz, double *fR, double *fz); 109 | void forcedensitygrid_load(void); 110 | void forcedensitygrid_save(void); 111 | 112 | void commit_updates(void); 113 | void init_updates(void); 114 | void calc_global_fit(void); 115 | 116 | 117 | void energygrid_allocate(void); 118 | 119 | 120 | void reorient_particle_velocities(int iter); 121 | void update_velocities(int iter); 122 | void initialize_particles(void); 123 | 124 | double get_density_of_type(double *pos, int type); 125 | double get_vstream(double *pos, int type); 126 | double get_z_disp_cylindrical(double *pos, int type); 127 | double get_radial_disp_spherical(double *pos, int type); 128 | void get_disp_rtp(double *pos, int type, double *disp_r, double *disp_t, double *disp_p, double *disp_q); 129 | double get_r_disp_tilted(double *pos, int type); 130 | double get_theta_disp_tilted(double *pos, int type); 131 | double get_phi_disp(double *pos, int type); 132 | 133 | void calculate_dispfield(void); 134 | void calc_all_response_fields_and_gradients(void); 135 | void log_message(int iter); 136 | void calc_response_dispersion(void); 137 | void allocate_memory(void); 138 | void output_toomre_Q(void); 139 | void add_to_energy_grid(double *pos, double mass, double vr2, double vt2, double vp2, double vq2, 140 | double *egyMass, double *egyResponse_r, double *egyResponse_t, double *egyResponse_p, double *egyResponse_q); 141 | 142 | double produce_orbit_response_field(double *pos, double *vel, int id, double *mfield, double mass, double timespan, int *orbitstaken); 143 | #ifdef VER_1_1 144 | double produce_orbit_response_field_mod(double *pos, double *vel, int id, double *mfield, double *egyfield_r, double *egyfield_t, double *egyfield_q, double *egyfield_p, double mass, double timespan, int *orbitstaken, int type); 145 | #endif 146 | 147 | void init(void); 148 | void set_units(void); 149 | void endrun(void); 150 | void output_compile_time_options(void); 151 | void set_softenings(void); 152 | 153 | void read_parameter_file(char *fname); 154 | 155 | void mpi_printf(const char *fmt, ...); 156 | size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE * stream); 157 | size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE * stream); 158 | void write_file(char *fname, int writeTask, int lastTask); 159 | void get_dataset_name(enum iofields blocknr, char *buf); 160 | void get_Tab_IO_Label(enum iofields blocknr, char *label); 161 | int blockpresent(enum iofields blocknr, int write); 162 | int get_particles_in_block(enum iofields blocknr, int *typelist); 163 | int get_values_per_blockelement(enum iofields blocknr); 164 | int get_datatype_in_block(enum iofields blocknr); 165 | int get_bytes_per_blockelement(enum iofields blocknr, int mode); 166 | void fill_write_buffer(enum iofields blocknr, int *startindex, int pc, int type); 167 | void output_particles(int iter); 168 | void output_density_field(int iter); 169 | void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last); 170 | 171 | 172 | void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int linenr); 173 | void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file, 174 | int line); 175 | 176 | void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line); 177 | void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line); 178 | 179 | void myfree_fullinfo(void *p, const char *func, const char *file, int line); 180 | void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line); 181 | int dump_memory_table_buffer(char *p); 182 | void mymalloc_init(void); 183 | 184 | int permutation_compare(const void *a, const void *b); 185 | 186 | double dabs(double a); 187 | double dmax(double a, double b); 188 | double dmin(double a, double b); 189 | int imax(int a, int b); 190 | int imin(int a, int b); 191 | int get_part_count_this_task(int n); 192 | size_t sizemax(size_t a, size_t b); 193 | int my_ffsll(long long int i); 194 | void reorder_particles(int *Id); 195 | void gravity(void); 196 | 197 | double second(void); 198 | void sumup_large_ints(int n, int *src, long long *res); 199 | void sumup_longs(int n, long long *src, long long *res); 200 | double timediff(double t0, double t1); 201 | 202 | int get_thread_num(void); 203 | peanokey peano_hilbert_key(int x, int y, int z, int bits); 204 | void peano_hilbert_order(void); 205 | void peano_hilbert_key_inverse(peanokey key, int bits, int *x, int *y, int *z); 206 | double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *)); 207 | 208 | #endif 209 | 210 | -------------------------------------------------------------------------------- /src/set_particles.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "allvars.h" 11 | #include "proto.h" 12 | 13 | 14 | 15 | void initialize_particles(void) { 16 | 17 | int n, i, k; 18 | double phi, theta, vr; 19 | double vsum2 = 0, rsum2 = 0, vsum2_exact = 0; 20 | int count_r[6], count_t[6], count_p[6], count_q[6]; 21 | int tot_count_r[6], tot_count_t[6], tot_count_p[6], tot_count_q[6]; 22 | 23 | int nhalo = get_part_count_this_task(All.Halo_N); 24 | int ndisk = get_part_count_this_task(All.Disk_N); 25 | int nbulge = get_part_count_this_task(All.Bulge_N); 26 | 27 | NumPart = nhalo + ndisk + nbulge; 28 | 29 | MPI_Allreduce(&NumPart, &All.MaxPart, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); 30 | sumup_large_ints(1, &NumPart, &All.TotNumPart); 31 | 32 | P = (struct particle_data *) mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data)); 33 | memset(P, 0, All.MaxPart * sizeof(struct particle_data)); 34 | 35 | permutation = (struct permutation_data *) mymalloc_movable(&permutation, "permutation", All.MaxPart * sizeof(struct permutation_data)); 36 | 37 | n = 0; 38 | 39 | for(i = 0; i < 6; i++) 40 | count_r[i] = count_t[i] = count_p[i] = count_q[i] = 0; 41 | 42 | for(i = 0; i < nhalo; i++, n++) { 43 | P[n].Type = 1; 44 | P[n].Mass = All.Halo_Mass / All.Halo_N; 45 | } 46 | 47 | for(i = 0; i < ndisk; i++, n++) { 48 | P[n].Type = 2; 49 | P[n].Mass = All.Disk_Mass / All.Disk_N; 50 | } 51 | 52 | for(i = 0; i < nbulge; i++, n++) { 53 | P[n].Type = 3; 54 | P[n].Mass = All.Bulge_Mass / All.Bulge_N; 55 | } 56 | 57 | int *nlist = mymalloc("nlist", NTask * sizeof(int)); 58 | MPI_Allgather(&NumPart, 1, MPI_INT, nlist, 1, MPI_INT, MPI_COMM_WORLD); 59 | int nbefore = 0; 60 | for(i = 0; i < ThisTask; i++) 61 | nbefore += nlist[i]; 62 | myfree(nlist); 63 | 64 | for(n = 0; n < NumPart; n++) 65 | P[n].ID = nbefore + n + 1; 66 | 67 | for(n = 0; n < NumPart; n++) { 68 | 69 | if(P[n].Type == 1) 70 | halo_get_fresh_coordinate ( P[n].Pos ); // a halo particle 71 | else if(P[n].Type == 2) 72 | disk_get_fresh_coordinate(P[n].Pos); // disk particle 73 | else if(P[n].Type == 3) 74 | bulge_get_fresh_coordinate(P[n].Pos); // disk particle 75 | 76 | double _r = sqrt(P[n].Pos[0] * P[n].Pos[0] + P[n].Pos[1] * P[n].Pos[1] + P[n].Pos[2] * P[n].Pos[2]); 77 | 78 | 79 | P[n].Vesc = forcegrid_get_escape_speed(P[n].Pos); 80 | 81 | double acc[3]; 82 | forcegrid_get_acceleration(P[n].Pos, acc); 83 | 84 | double a = sqrt(acc[0] * acc[0] + acc[1] * acc[1] + acc[2] * acc[2]); 85 | double r = sqrt(P[n].Pos[0] * P[n].Pos[0] + P[n].Pos[1] * P[n].Pos[1] + P[n].Pos[2] * P[n].Pos[2]); 86 | 87 | P[n].Tint = All.TorbitFac * 2 * M_PI * r / sqrt(r * a); 88 | 89 | P[n].RecalcFlag = 1; 90 | 91 | 92 | if(P[n].Type == 1) { 93 | 94 | // generate a realization in VelTheo[] with the exact spherically symmetric, isotropic Hernquist distribution function, for comparison 95 | 96 | do { 97 | 98 | vr = halo_generate_v(r); 99 | 100 | } while(vr >= All.MaxVelInUnitsVesc * P[n].Vesc); 101 | 102 | // isotropic velocity distribution 103 | 104 | phi = gsl_rng_uniform(random_generator) * M_PI * 2; 105 | theta = acos(gsl_rng_uniform(random_generator) * 2 - 1); 106 | 107 | P[n].VelTheo[0] = vr * sin(theta) * cos(phi); 108 | P[n].VelTheo[1] = vr * sin(theta) * sin(phi); 109 | P[n].VelTheo[2] = vr * cos(theta); 110 | 111 | vsum2_exact += vr * vr; 112 | rsum2 += r * r; 113 | } 114 | 115 | 116 | // generate an initial guess for the velocities 117 | // let's pick the Jeans moment for this, and use a Gaussian 118 | 119 | int typeOfVelocityStructure = 0; 120 | 121 | if(P[n].Type == 1) // a halo particle 122 | typeOfVelocityStructure = All.TypeOfHaloVelocityStructure; 123 | else if(P[n].Type == 2) // disk 124 | typeOfVelocityStructure = All.TypeOfDiskVelocityStructure; 125 | else if(P[n].Type == 3) // bulge 126 | typeOfVelocityStructure = All.TypeOfBulgeVelocityStructure; 127 | else 128 | terminate("unknown type"); 129 | 130 | double disp_r = 0, disp_t = 0, disp_p = 0, disp_q = 0; 131 | 132 | get_disp_rtp(P[n].Pos, P[n].Type, &disp_r, &disp_t, &disp_p, &disp_q); 133 | 134 | //disp_r = disp_t = disp_p = disp_q = halo_get_sigma2(P[n].Pos); 135 | 136 | 137 | if(disp_r <= All.LowerDispLimit) { 138 | count_r[P[n].Type]++; 139 | disp_r = All.LowerDispLimit; 140 | } 141 | 142 | if (disp_t <= All.LowerDispLimit) { 143 | count_t[P[n].Type]++; 144 | disp_t = All.LowerDispLimit; 145 | } 146 | 147 | if (disp_p <= All.LowerDispLimit) { 148 | count_p[P[n].Type]++; 149 | disp_p = All.LowerDispLimit; 150 | } 151 | 152 | if(disp_q <= All.LowerDispLimit) { 153 | count_q[P[n].Type]++; 154 | disp_q = All.LowerDispLimit; 155 | } 156 | 157 | 158 | P[n].vr2_target = disp_r; 159 | P[n].vt2_target = disp_t; 160 | P[n].vp2_target = disp_p; 161 | P[n].vq2_target = disp_q; 162 | 163 | double vstr = get_vstream(P[n].Pos, P[n].Type); 164 | 165 | // spherical case 166 | if(typeOfVelocityStructure == 0 || typeOfVelocityStructure == 1 || typeOfVelocityStructure == 3) { 167 | 168 | double sigmaR = sqrt(disp_r); 169 | double sigmaT = sqrt(disp_t); 170 | double sigmaP = sqrt(disp_p); 171 | double v, vr, vphi, vtheta; 172 | 173 | // draw three Gaussians with the relevant dispersions 174 | do { 175 | 176 | vr = gsl_ran_gaussian(random_generator, sigmaR); 177 | vtheta = gsl_ran_gaussian(random_generator, sigmaT); 178 | vphi = gsl_ran_gaussian(random_generator, sigmaP); 179 | 180 | vphi += vstr; 181 | 182 | v = sqrt(vr * vr + vphi * vphi + vtheta * vtheta); 183 | 184 | } while ( All.MaxVelInUnitsVesc * P[n].Vesc < v ); 185 | 186 | 187 | double phi = atan2(P[n].Pos[1], P[n].Pos[0]); 188 | double theta = acos(P[n].Pos[2] / sqrt(P[n].Pos[0] * P[n].Pos[0] + P[n].Pos[1] * P[n].Pos[1] + P[n].Pos[2] * P[n].Pos[2])); 189 | double er[3], ePhi[3], eTheta[3]; 190 | 191 | er[0] = sin(theta) * cos(phi); 192 | er[1] = sin(theta) * sin(phi); 193 | er[2] = cos(theta); 194 | 195 | ePhi[0] = -sin(phi); 196 | ePhi[1] = cos(phi); 197 | ePhi[2] = 0; 198 | 199 | eTheta[0] = -cos(theta) * cos(phi); 200 | eTheta[1] = -cos(theta) * sin(phi); 201 | eTheta[2] = sin(theta); 202 | 203 | 204 | for(k = 0; k < 3; k++) { 205 | //P[n].Vel[k] = P[n].VelTheo[k]; 206 | 207 | P[n].Vel[k] = vr * er[k] + vphi * ePhi[k] + vtheta * eTheta[k]; 208 | //double vesc = halo_get_escape_speed(P[n].Pos); 209 | //printf("%g %g\n", P[n].Vesc, vesc); 210 | } 211 | 212 | /* 213 | P[n].Vel[0] = vr; 214 | P[n].Vel[1] = vtheta; 215 | P[n].Vel[2] = vphi; 216 | */ 217 | 218 | // axisymmetric case, f(E,Lz), with net rotation 219 | } else if(typeOfVelocityStructure == 2) { 220 | 221 | double sigmaR = sqrt(disp_r); 222 | double sigmaT = sqrt(disp_t); 223 | double sigmaP = sqrt(disp_p); 224 | double v, vR, vphi, vz; 225 | 226 | // draw three Gaussians with the relevant dispersions 227 | do { 228 | 229 | vR = gsl_ran_gaussian(random_generator, sigmaR); 230 | vz = gsl_ran_gaussian(random_generator, sigmaT); 231 | vphi = gsl_ran_gaussian(random_generator, sigmaP); 232 | 233 | vphi += vstr; 234 | 235 | v = sqrt(vR * vR + vphi * vphi + vz * vz); 236 | 237 | } while ( v >= All.MaxVelInUnitsVesc * P[n].Vesc ); 238 | 239 | phi = atan2(P[n].Pos[1], P[n].Pos[0]); 240 | 241 | double eR[3], ePhi[3], eZ[3]; 242 | 243 | eR[0] = cos(phi); 244 | eR[1] = sin(phi); 245 | eR[2] = 0; 246 | 247 | ePhi[0] = -sin(phi); 248 | ePhi[1] = cos(phi); 249 | ePhi[2] = 0; 250 | 251 | eZ[0] = 0; 252 | eZ[1] = 0; 253 | eZ[2] = 1; 254 | 255 | for(k = 0; k < 3; k++) 256 | P[n].Vel[k] = vR * eR[k] + vphi * ePhi[k] + vz * eZ[k]; 257 | } 258 | 259 | vsum2 += P[n].Vel[0] * P[n].Vel[0] + P[n].Vel[1] * P[n].Vel[1] + P[n].Vel[2] * P[n].Vel[2]; 260 | } 261 | 262 | MPI_Allreduce(count_r, tot_count_r, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 263 | MPI_Allreduce(count_t, tot_count_t, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 264 | MPI_Allreduce(count_p, tot_count_p, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 265 | MPI_Allreduce(count_q, tot_count_q, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD); 266 | 267 | int type; 268 | for(type = 1; type <= 3; type++) { 269 | 270 | if(NType[type] == 0) continue; 271 | 272 | double frac_r = ((double)tot_count_r[type]) / NType[type]; 273 | double frac_t = ((double)tot_count_t[type]) / NType[type]; 274 | double frac_p = ((double)tot_count_p[type]) / NType[type]; 275 | double frac_q = ((double)tot_count_q[type]) / NType[type]; 276 | 277 | mpi_printf("Type=%d: fractions of particles with problematic low velocity dispersion: (r/R|t/z|phi/tot_phi) = (%g|%g|%g|%g)\n", type, frac_r, frac_t, frac_p, frac_q); 278 | 279 | if(frac_r > 0.05 || frac_t > 0.05 || frac_p > 0.05 || frac_q > 0.05) { 280 | mpi_printf("\nwe better stop, because there appears to be no valid velocity structure for this configuration.\n\n"); 281 | endrun(); 282 | } 283 | 284 | } 285 | 286 | 287 | if(ThisTask == 0) 288 | for (type = 1; type <= 3; type++) { 289 | 290 | if(NType[type] == 0) continue; 291 | 292 | char buf[2000]; 293 | sprintf(buf, "%s/fit_%d.txt", All.OutputDir, type); 294 | if(!(FdFit[type] = fopen(buf, "w"))) 295 | terminate("can't open file '%s'", buf); 296 | } 297 | 298 | for(n = 0; n < NumPart; n++) { 299 | permutation[n].rnd = gsl_rng_uniform(random_generator); 300 | permutation[n].index = n; 301 | } 302 | 303 | qsort(permutation, NumPart, sizeof(struct permutation_data), permutation_compare); 304 | 305 | //output_toomre_Q(); 306 | //output_rotcurve(); 307 | } 308 | 309 | int permutation_compare(const void *a, const void *b) { 310 | 311 | if(((struct permutation_data *) a)->rnd < (((struct permutation_data *) b)->rnd)) return -1; 312 | 313 | if(((struct permutation_data *) a)->rnd > (((struct permutation_data *) b)->rnd)) return +1; 314 | 315 | return 0; 316 | 317 | } 318 | 319 | 320 | int get_part_count_this_task(int n){ 321 | 322 | int avg = (n - 1) / NTask + 1; 323 | int exc = NTask * avg - n; 324 | int tasklastsection = NTask - exc; 325 | 326 | if(ThisTask < tasklastsection) 327 | return avg; 328 | else 329 | return avg - 1; 330 | } 331 | 332 | 333 | void output_toomre_Q(void) 334 | { 335 | if(ThisTask == 0 && NType[2] > 0) 336 | { 337 | double pos[3], R, acc[3], R2, acc2[3], R1, acc1[3]; 338 | double disp_r, disp_t, disp_p, disp_q; 339 | char buf[1000]; 340 | int j, n = 500; 341 | 342 | double Rmax = 5.0 * All.Disk_H; 343 | 344 | sprintf(buf, "%s/toomreQ.txt", All.OutputDir); 345 | FILE *fd = fopen(buf, "w"); 346 | fprintf(fd, "%d\n", n); 347 | 348 | for(j = 0; j < n; j++) 349 | { 350 | R = (Rmax / n) * (j + 0.5); 351 | 352 | pos[0] = R; 353 | pos[1] = 0; 354 | pos[2] = 0; 355 | forcegrid_get_acceleration(pos, acc); 356 | double dphiDR = -acc[0]; 357 | 358 | R2 = R + 0.05 * R; 359 | R1 = R - 0.05 * R; 360 | 361 | pos[0] = R2; 362 | forcegrid_get_acceleration(pos, acc2); 363 | pos[0] = R1; 364 | forcegrid_get_acceleration(pos, acc1); 365 | 366 | double d2phiDR2 = (-acc2[0] - (-acc1[0])) / (R2 - R1); 367 | 368 | double kappa2 = d2phiDR2 + 3.0 / R * dphiDR; 369 | 370 | if(kappa2 < 0) 371 | terminate("kappa2 = %g", kappa2); 372 | 373 | double kappa = sqrt(kappa2); 374 | 375 | pos[0] = R; 376 | pos[1] = 0; 377 | pos[2] = 0; 378 | get_disp_rtp(pos, 2, &disp_r, &disp_t, &disp_p, &disp_q); 379 | 380 | double sigmaR = sqrt(disp_r); 381 | 382 | double sigma_star = All.Disk_Mass / (2 * M_PI * All.Disk_H * All.Disk_H) * exp(-R / All.Disk_H); 383 | 384 | double Q = sigmaR * kappa / (3.36 * All.G * sigma_star); 385 | 386 | fprintf(fd, "%g %g\n", R, Q); 387 | } 388 | fclose(fd); 389 | } 390 | } 391 | 392 | 393 | void output_rotcurve(void) 394 | { 395 | if(ThisTask == 0) 396 | { 397 | double pos[3], R, acc[3]; 398 | char buf[1000]; 399 | int j, n = 5000; 400 | 401 | double Rmax = All.R200; 402 | 403 | sprintf(buf, "%s/rotcurve.txt", All.OutputDir); 404 | FILE *fd = fopen(buf, "w"); 405 | fprintf(fd, "%d\n", n); 406 | 407 | double vc2_tot, vc2_dm, vc2_disk, vc2_bulge; 408 | 409 | for(j = 0; j < n; j++) 410 | { 411 | R = (Rmax / n) * (j + 0.5); 412 | 413 | pos[0] = R; 414 | pos[1] = 0; 415 | pos[2] = 0; 416 | forcegrid_get_acceleration(pos, acc); 417 | vc2_tot = fabs(R * acc[0]); 418 | 419 | if(All.Bulge_Mass > 0) 420 | { 421 | bulge_get_acceleration(pos, acc); 422 | vc2_bulge = fabs(R * acc[0]); 423 | } 424 | else 425 | vc2_bulge = 0; 426 | 427 | if(All.Halo_Mass > 0) 428 | { 429 | halo_get_acceleration(pos, acc); 430 | vc2_dm = fabs(R * acc[0]); 431 | } 432 | else 433 | vc2_dm = 0; 434 | 435 | vc2_disk = vc2_tot - vc2_dm - vc2_bulge; 436 | if(vc2_disk < 0) 437 | vc2_disk = 0; 438 | 439 | fprintf(fd, "%g %g %g %g %g\n", R, sqrt(vc2_tot), sqrt(vc2_dm), sqrt(vc2_disk), sqrt(vc2_bulge)); 440 | } 441 | fclose(fd); 442 | } 443 | } 444 | -------------------------------------------------------------------------------- /src/forcetree/forcetree_optimizebalance.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "allvars.h" 9 | #include "proto.h" 10 | #include "domain.h" 11 | #include "pqueue.h" 12 | 13 | 14 | 15 | static struct force_segments_data 16 | { 17 | int start, end, task; 18 | double work, cost, count, normalized_load; 19 | } 20 | *force_domainAssign; 21 | 22 | 23 | int force_sort_load(const void *a, const void *b) 24 | { 25 | if(((struct force_segments_data *) a)->normalized_load > (((struct force_segments_data *) b)->normalized_load)) 26 | return -1; 27 | 28 | if(((struct force_segments_data *) a)->normalized_load < (((struct force_segments_data *) b)->normalized_load)) 29 | return +1; 30 | 31 | return 0; 32 | } 33 | 34 | /* mode structure for priority queues */ 35 | typedef struct node_t 36 | { 37 | double pri; 38 | int val; 39 | size_t pos; 40 | } node_t; 41 | 42 | 43 | /* define call back functions for priority queues */ 44 | static int cmp_pri(double next, double curr) 45 | { 46 | return (next > curr); 47 | } 48 | 49 | static double get_pri(void *a) 50 | { 51 | return (double) ((node_t *) a)->pri; 52 | } 53 | 54 | static void set_pri(void *a, double pri) 55 | { 56 | ((node_t *) a)->pri = pri; 57 | } 58 | 59 | static size_t get_pos(void *a) 60 | { 61 | return ((node_t *) a)->pos; 62 | } 63 | 64 | static void set_pos(void *a, size_t pos) 65 | { 66 | ((node_t *) a)->pos = pos; 67 | } 68 | 69 | 70 | static double oldmax, oldsum; 71 | 72 | double force_get_current_balance(double *impact) 73 | { 74 | #ifndef NO_MPI_IN_PLACE 75 | MPI_Allreduce(MPI_IN_PLACE, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 76 | #else 77 | double *inTaskCost = mymalloc("inTaskCost", NTask * sizeof(double));; 78 | memcpy(inTaskCost, TaskCost, NTask * sizeof(double)); 79 | MPI_Allreduce(inTaskCost, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); 80 | myfree(inTaskCost); 81 | #endif 82 | 83 | int i; 84 | for(i = 0, oldmax = oldsum = 0; i < NTask; i++) 85 | { 86 | oldsum += TaskCost[i]; 87 | if(oldmax < TaskCost[i]) 88 | oldmax = TaskCost[i]; 89 | } 90 | 91 | *impact = 1.0 + domain_full_weight[All.HighestActiveTimeBin] * (oldmax - oldsum / NTask) / All.TotGravCost; 92 | 93 | return oldmax / (oldsum / NTask); 94 | } 95 | 96 | void force_get_global_cost_for_leavenodes(int nexport) 97 | { 98 | int i, j, n, nimport, idx, task, ngrp; 99 | 100 | struct node_data 101 | { 102 | double domainCost; 103 | int domainCount; 104 | int no; 105 | } 106 | *export_node_data, *import_node_data; 107 | 108 | MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); 109 | 110 | for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) 111 | { 112 | nimport += Recv_count[j]; 113 | if(j > 0) 114 | { 115 | Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; 116 | Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; 117 | } 118 | } 119 | 120 | for(j = 0; j < NTask; j++) 121 | Send_count[j] = 0; 122 | 123 | export_node_data = mymalloc("export_node_data", nexport * sizeof(struct node_data)); 124 | import_node_data = mymalloc("import_node_data", nimport * sizeof(struct node_data)); 125 | 126 | for(i=0; i < nexport; i++) 127 | { 128 | int task = ListNoData[i].task; 129 | int ind = Send_offset[task] + Send_count[task]++; 130 | 131 | export_node_data[ind].domainCost = ListNoData[i].domainCost; 132 | export_node_data[ind].domainCount = ListNoData[i].domainCount; 133 | export_node_data[ind].no = ListNoData[i].no; 134 | } 135 | 136 | for(ngrp = 1; ngrp < (1 << PTask); ngrp++) 137 | { 138 | int recvTask = ThisTask ^ ngrp; 139 | if(recvTask < NTask) 140 | if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) 141 | MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct node_data), MPI_BYTE, 142 | recvTask, TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct node_data), 143 | MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); 144 | } 145 | 146 | for(i=0; i < nimport; i++) 147 | { 148 | int no = import_node_data[i].no; 149 | DomainCost[no] += import_node_data[i].domainCost; 150 | DomainCount[no] += import_node_data[i].domainCount; 151 | } 152 | 153 | myfree(import_node_data); 154 | myfree(export_node_data); 155 | 156 | 157 | /* now share the cost data across all processors */ 158 | 159 | struct DomainNODE 160 | { 161 | double domainCost; 162 | int domainCount; 163 | } 164 | *DomainMoment, *loc_DomainMoment; 165 | 166 | DomainMoment = (struct DomainNODE *) mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE)); 167 | 168 | /* share the cost data accross CPUs */ 169 | int *recvcounts = (int *) mymalloc("recvcounts", sizeof(int) * NTask); 170 | int *recvoffset = (int *) mymalloc("recvoffset", sizeof(int) * NTask); 171 | int *bytecounts = (int *) mymalloc("bytecounts", sizeof(int) * NTask); 172 | int *byteoffset = (int *) mymalloc("byteoffset", sizeof(int) * NTask); 173 | 174 | for(task = 0; task < NTask; task++) 175 | recvcounts[task] = 0; 176 | 177 | for(n = 0; n < NTopleaves; n++) 178 | recvcounts[DomainTask[n]]++; 179 | 180 | for(task = 0; task < NTask; task++) 181 | bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE); 182 | 183 | for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < NTask; task++) 184 | { 185 | recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1]; 186 | byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1]; 187 | } 188 | 189 | loc_DomainMoment = (struct DomainNODE *) mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE)); 190 | 191 | for(n = 0, idx = 0; n < NTopleaves; n++) 192 | { 193 | if(DomainTask[n] == ThisTask) 194 | { 195 | loc_DomainMoment[idx].domainCost = DomainCost[n]; 196 | loc_DomainMoment[idx].domainCount = DomainCount[n]; 197 | idx++; 198 | } 199 | } 200 | 201 | MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD); 202 | 203 | for(task = 0; task < NTask; task++) 204 | recvcounts[task] = 0; 205 | 206 | for(n = 0; n < NTopleaves; n++) 207 | { 208 | task = DomainTask[n]; 209 | if(task != ThisTask) 210 | { 211 | idx = recvoffset[task] + recvcounts[task]++; 212 | 213 | DomainCost[n] = DomainMoment[idx].domainCost; 214 | DomainCount[n] = DomainMoment[idx].domainCount; 215 | } 216 | } 217 | 218 | myfree(loc_DomainMoment); 219 | myfree(byteoffset); 220 | myfree(bytecounts); 221 | myfree(recvoffset); 222 | myfree(recvcounts); 223 | myfree(DomainMoment); 224 | } 225 | 226 | 227 | 228 | void force_optimize_domain_mapping(void) 229 | { 230 | int i, j; 231 | 232 | double fac_cost = 0.5 / oldsum; 233 | double fac_count = 0.5 / All.TotNumPart; 234 | 235 | 236 | int ncpu = NTask * All.MultipleDomains; 237 | int ndomain = NTopleaves; 238 | double workavg = 1.0 / ncpu; 239 | double workhalfnode = 0.5 / NTopleaves; 240 | double work_before = 0; 241 | double workavg_before = 0; 242 | 243 | int start = 0; 244 | 245 | force_domainAssign = mymalloc("force_domainAssign", ncpu * sizeof(struct force_segments_data)); 246 | 247 | for(i = 0; i < ncpu; i++) 248 | { 249 | double work = 0, cost = 0, count = 0; 250 | int end = start; 251 | 252 | cost += fac_cost * DomainCost[end]; 253 | count += fac_count * DomainCount[end]; 254 | work += fac_cost * DomainCost[end] + fac_count * DomainCount[end]; 255 | 256 | while((work + work_before + (end + 1 < NTopleaves ? fac_cost * DomainCost[end + 1] + fac_count * DomainCount[end + 1] : 0) < 257 | workavg + workavg_before + workhalfnode) || (i == ncpu - 1 && end < ndomain - 1)) 258 | { 259 | if((ndomain - end) > (ncpu - i)) 260 | end++; 261 | else 262 | break; 263 | 264 | cost += fac_cost * DomainCost[end]; 265 | count += fac_count * DomainCount[end]; 266 | work += fac_cost * DomainCost[end] + fac_count * DomainCount[end]; 267 | } 268 | 269 | force_domainAssign[i].start = start; 270 | force_domainAssign[i].end = end; 271 | force_domainAssign[i].work = work; 272 | force_domainAssign[i].cost = cost; 273 | force_domainAssign[i].count = count; 274 | 275 | force_domainAssign[i].normalized_load = cost + count; /* note: they are already multiplied by fac_cost/fac_count */ 276 | 277 | work_before += work; 278 | workavg_before += workavg; 279 | start = end + 1; 280 | } 281 | 282 | qsort(force_domainAssign, ncpu, sizeof(struct force_segments_data), force_sort_load); 283 | 284 | 285 | /* create three priority queues, one for the cost load, one for the particle count, and one for the combined cost */ 286 | pqueue_t *queue_cost = pqueue_init(NTask, cmp_pri, get_pri, set_pri, get_pos, set_pos); 287 | node_t *ncost = mymalloc("ncost", NTask * sizeof(node_t)); 288 | pqueue_t *queue_count = pqueue_init(NTask, cmp_pri, get_pri, set_pri, get_pos, set_pos); 289 | node_t *ncount = mymalloc("ncount", NTask * sizeof(node_t)); 290 | pqueue_t *queue_combi = pqueue_init(NTask, cmp_pri, get_pri, set_pri, get_pos, set_pos); 291 | node_t *ncombi = mymalloc("ncombi", NTask * sizeof(node_t)); 292 | 293 | /* fill in all the tasks into the queue. The priority will be the current cost/count, the tag 'val' is used to label the task */ 294 | for(i = 0; i < NTask; i++) 295 | { 296 | ncost[i].pri = 0; 297 | ncost[i].val = i; 298 | pqueue_insert(queue_cost, &ncost[i]); 299 | 300 | ncount[i].pri = 0; 301 | ncount[i].val = i; 302 | pqueue_insert(queue_count, &ncount[i]); 303 | 304 | ncombi[i].pri = 0; 305 | ncombi[i].val = i; 306 | pqueue_insert(queue_combi, &ncombi[i]); 307 | } 308 | 309 | double max_load = 0; 310 | double max_cost = 0; 311 | 312 | for(i = 0; i < ncpu; i++) 313 | { 314 | /* pick the least work-loaded target from the queue, and the least particle-loaded, and then decide which choice 315 | gives the smallest load overall */ 316 | double cost, load; 317 | 318 | node_t *node_cost = pqueue_peek(queue_cost); 319 | node_t *node_count = pqueue_peek(queue_count); 320 | node_t *node_combi = pqueue_peek(queue_combi); 321 | 322 | int targetA = node_cost->val; 323 | int targetB = node_count->val; 324 | int targetC = node_combi->val; 325 | 326 | cost = ncost[targetA].pri + force_domainAssign[i].cost; 327 | load = ncount[targetA].pri + force_domainAssign[i].count; 328 | if(cost < max_cost) 329 | cost = max_cost; 330 | if(load < max_load) 331 | load = max_load; 332 | double workA = cost + load; 333 | 334 | cost = ncost[targetB].pri + force_domainAssign[i].cost; 335 | load = ncount[targetB].pri + force_domainAssign[i].count; 336 | if(cost < max_cost) 337 | cost = max_cost; 338 | if(load < max_load) 339 | load = max_load; 340 | double workB = cost + load; 341 | 342 | cost = ncost[targetC].pri + force_domainAssign[i].cost; 343 | load = ncount[targetC].pri + force_domainAssign[i].count; 344 | if(cost < max_cost) 345 | cost = max_cost; 346 | if(load < max_load) 347 | load = max_load; 348 | double workC = cost + load; 349 | 350 | 351 | int target; 352 | 353 | if(workA < workB && workA < workC) 354 | target = targetA; 355 | else if(workC < workB) 356 | target = targetC; 357 | else 358 | target = targetB; 359 | 360 | force_domainAssign[i].task = target; 361 | 362 | cost = ncost[target].pri + force_domainAssign[i].cost; 363 | load = ncount[target].pri + force_domainAssign[i].count; 364 | 365 | pqueue_change_priority(queue_cost, cost, &ncost[target]); 366 | pqueue_change_priority(queue_count, load, &ncount[target]); 367 | pqueue_change_priority(queue_combi, cost + load, &ncombi[target]); 368 | 369 | if(max_cost < cost) 370 | max_cost = cost; 371 | 372 | if(max_load < load) 373 | max_load = load; 374 | } 375 | 376 | /* free queue again */ 377 | myfree(ncombi); 378 | pqueue_free(queue_combi); 379 | myfree(ncount); 380 | pqueue_free(queue_count); 381 | myfree(ncost); 382 | pqueue_free(queue_cost); 383 | 384 | for(i = 0; i < ncpu; i++) 385 | for(j = force_domainAssign[i].start; j <= force_domainAssign[i].end; j++) 386 | DomainNewTask[j] = force_domainAssign[i].task; 387 | 388 | 389 | myfree(force_domainAssign); 390 | 391 | for(i = 0; i < NTask; i++) 392 | { 393 | TaskCost[i] = 0; 394 | TaskCount[i] = 0; 395 | } 396 | 397 | for(i = 0; i < NTopleaves; i++) 398 | { 399 | TaskCost[DomainNewTask[i]] += DomainCost[i]; 400 | TaskCount[DomainNewTask[i]] += DomainCount[i]; 401 | } 402 | 403 | double max, sum, maxload, sumload; 404 | for(i = 0, max = sum = 0, maxload = sumload = 0; i < NTask; i++) 405 | { 406 | sum += TaskCost[i]; 407 | if(max < TaskCost[i]) 408 | max = TaskCost[i]; 409 | sumload += TaskCount[i]; 410 | if(maxload < TaskCount[i]) 411 | maxload = TaskCount[i]; 412 | } 413 | 414 | mpi_printf("FORCETREE: Active-TimeBin=%d [unoptimized work-balance=%g] new work-balance=%g, new load-balance=%g\n", 415 | All.HighestActiveTimeBin, oldmax / (oldsum / NTask), max / (sum / NTask), maxload / (sumload / NTask)); 416 | 417 | if((max / (sum / NTask) > oldmax / (oldsum / NTask)) || (maxload > All.MaxPart)) 418 | { 419 | mpi_printf("FORCETREE: The work-load is either worse than before or the memory-balance is not viable. We keep the old distribution.\n"); 420 | memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int)); 421 | } 422 | } 423 | -------------------------------------------------------------------------------- /src/orbit_response.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | #include "allvars.h" 12 | #include "proto.h" 13 | 14 | 15 | /* returns the acceleration at coordinate pos[] */ 16 | double get_timestep(double *pos, double *vel, double *acc, int icell) 17 | { 18 | // double r = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]); 19 | double v = sqrt(vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]); 20 | double aa = sqrt(acc[0] * acc[0] + acc[1] * acc[1] + acc[2] * acc[2]); 21 | 22 | double torbit = All.V200 / aa; 23 | double tcross = DG_CellSize[icell] / v; 24 | 25 | return dmin(All.TimeStepFactorOrbit * torbit, All.TimeStepFactorCellCross * tcross); 26 | } 27 | 28 | 29 | 30 | /* calculate the density response of a single particle starting from pos[]/vel[], averaged over time 'timespan'. If timespan=0, the routine 31 | * determines an appropriate time itself. 32 | */ 33 | double produce_orbit_response_field( double *pos, double *vel, int id, double *mfield, double mass, 34 | double timespan, int *orbitstaken ) { 35 | 36 | int i, norbit, icell, flag = 0, iR, iz; 37 | double x[3], v[3], a[3], dt, tall, radsign_previous = 0, radsign, fR, fz; 38 | 39 | for(i = 0; i < 3; i++) { 40 | x[i] = pos[i]; 41 | v[i] = vel[i]; 42 | } 43 | 44 | for(i = 0; i < DG_Ngrid; i++) 45 | mfield[i] = 0; 46 | 47 | norbit = 0; 48 | tall = 0; 49 | 50 | 51 | forcegrid_get_acceleration(x, a); 52 | 53 | densitygrid_get_cell(x, &iR, &iz, &fR, &fz); 54 | icell = iz * DG_Nbin + iR; 55 | 56 | int Norbits = 100000000; 57 | 58 | double E0 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x); 59 | int steps = 0; 60 | 61 | do { 62 | 63 | dt = get_timestep(x, v, a, icell); 64 | 65 | if (0 < timespan) 66 | if (timespan < dt + tall) { 67 | dt = timespan - tall; 68 | flag = 1; 69 | } 70 | 71 | mfield[iz * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (1 - fz); 72 | mfield[iz * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (1 - fz); 73 | mfield[(iz + 1) * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (fz); 74 | mfield[(iz + 1) * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (fz); 75 | 76 | /* 77 | 78 | insertion place 79 | 80 | */ 81 | 82 | for(i = 0; i < 3; i++) 83 | v[i] += 0.5 * dt * a[i]; 84 | 85 | for(i = 0; i < 3; i++) 86 | x[i] += dt * v[i]; 87 | 88 | forcegrid_get_acceleration(x, a); 89 | 90 | for(i = 0; i < 3; i++) 91 | v[i] += 0.5 * dt * a[i]; 92 | 93 | densitygrid_get_cell(x, &iR, &iz, &fR, &fz); 94 | icell = iz * DG_Nbin + iR; 95 | 96 | mfield[iz * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (1 - fz); 97 | mfield[iz * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (1 - fz); 98 | mfield[(iz + 1) * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (fz); 99 | mfield[(iz + 1) * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (fz); 100 | 101 | 102 | /* 103 | 104 | insertion place 105 | 106 | */ 107 | 108 | tall += dt; 109 | 110 | radsign = v[0] * x[0] + v[1] * x[1] + v[2] * x[2]; 111 | 112 | if(radsign > 0 && radsign_previous < 0) 113 | norbit++; 114 | 115 | radsign_previous = radsign; 116 | 117 | steps++; 118 | if(steps > 100000000) { 119 | printf("too many steps... pos=(%g|%g|%g) vel=(%g|%g|%g) dt=%g\n", 120 | pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], dt); 121 | double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x); 122 | printf("steps=%d: rel error = %g\n", steps, fabs(E1 - E0) / fabs(E0)); 123 | exit(1); 124 | } 125 | 126 | } while ((timespan == 0 && norbit < Norbits) || (timespan != 0 && flag == 0)); 127 | 128 | double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x); 129 | 130 | double rel_egy_error = fabs((E1 - E0) / E0); 131 | 132 | if(rel_egy_error > 0.5) { 133 | mpi_printf("relative energy error= %g orbits=%d steps=%d pos(=%g|%g|%g) vel=(%g|%g|%g)\n", rel_egy_error, norbit, steps, 134 | pos[0], pos[1], pos[2], vel[0], vel[1], vel[2]); 135 | /* 136 | terminate("error seems large, we better stop: pos=(%g|%g|%g) vel=(%g|%g|%g) id=%d v=%g vesc=%g", 137 | pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], id, 138 | sqrt(vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]), 139 | forcegrid_get_escape_speed(pos)); 140 | */ 141 | } 142 | 143 | double fac = mass / tall; 144 | 145 | for(i = 0; i < DG_Ngrid; i++) 146 | mfield[i] *= fac; 147 | 148 | *orbitstaken = norbit; 149 | 150 | return tall; 151 | } 152 | 153 | 154 | #ifdef VER_1_1 155 | double produce_orbit_response_field_mod( double *pos, double *vel, int id, 156 | double *mfield, double *egyfield_r, double *egyfield_t, double *egyfield_q, double *egyfield_p, 157 | double mass, double timespan, int *orbitstaken, int type ) { 158 | 159 | int typeOfVelocityStructure = 0; 160 | 161 | if(type == 1) /* a halo particle */ 162 | typeOfVelocityStructure = All.TypeOfHaloVelocityStructure; 163 | else if(type == 2) /* disk */ 164 | typeOfVelocityStructure = All.TypeOfDiskVelocityStructure; 165 | else if(type == 3) /* bulge */ 166 | typeOfVelocityStructure = All.TypeOfBulgeVelocityStructure; 167 | else 168 | terminate("unknown type"); 169 | 170 | int i, norbit, icell, flag = 0, iR, iz; 171 | double x[3], v[3], a[3], dt, tall, radsign_previous = 0, radsign, fR, fz; 172 | double r2, v_dot_x, vr2; 173 | double Z[] = {0,0,-1}; 174 | double T[3], Q[3]; 175 | double q, q2, vq, vq2, v_dot_Q; 176 | double t2, vt2, v_dot_T; 177 | double vstr, vp2; 178 | int irz[2][2]; 179 | double m[2][2]; 180 | 181 | for(i = 0; i < 3; i++) { 182 | x[i] = pos[i]; 183 | v[i] = vel[i]; 184 | } 185 | 186 | for(i = 0; i < DG_Ngrid; i++) 187 | mfield[i] = 0; 188 | 189 | for(i = 0; i < EG_Ngrid; i++) { 190 | egyfield_r[i] = 0; 191 | egyfield_t[i] = 0; 192 | egyfield_q[i] = 0; 193 | egyfield_p[i] = 0; 194 | } 195 | 196 | 197 | norbit = 0; 198 | tall = 0; 199 | 200 | 201 | forcegrid_get_acceleration(x, a); 202 | 203 | densitygrid_get_cell(x, &iR, &iz, &fR, &fz); 204 | icell = iz * DG_Nbin + iR; 205 | 206 | int Norbits = 100000000; 207 | 208 | double E0 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x); 209 | int steps = 0; 210 | 211 | do { 212 | 213 | dt = get_timestep(x, v, a, icell); 214 | 215 | if (0 < timespan) 216 | if (timespan < dt + tall) { 217 | dt = timespan - tall; 218 | flag = 1; 219 | } 220 | 221 | 222 | if(typeOfVelocityStructure == 2) { 223 | 224 | // radial 225 | r2 = x[0]*x[0] + x[1]*x[1]; 226 | v_dot_x = v[0]*x[0] + v[1]*x[1]; 227 | vr2 = v_dot_x * v_dot_x / r2; 228 | 229 | // phi 230 | Q[0] = -x[1]; 231 | Q[1] = x[0]; 232 | q2 = Q[0]*Q[0] + Q[1]*Q[1]; 233 | v_dot_Q = v[0]*Q[0] + v[1]*Q[1]; 234 | q = sqrt(q2); 235 | vq = v_dot_Q / q; 236 | vq2 = vq*vq; 237 | 238 | // phi - vstr 239 | vstr = get_vstream(x, type); 240 | vp2 = (vq-vstr)*(vq-vstr); 241 | 242 | // theta 243 | vt2 = v[2]*v[2]; 244 | 245 | } else { 246 | 247 | // radial 248 | r2 = x[0] * x[0] + x[1] * x[1] + x[2] * x[2]; 249 | v_dot_x = v[0] * x[0] + v[1] * x[1] + v[2] * x[2]; 250 | vr2 = v_dot_x * v_dot_x / r2; 251 | 252 | // phi 253 | Q[0] = x[1]*Z[2] - x[2]*Z[1]; 254 | Q[1] = x[2]*Z[0] - x[0]*Z[2]; 255 | Q[2] = x[0]*Z[1] - x[1]*Z[0]; 256 | q2 = Q[0]*Q[0] + Q[1]*Q[1] + Q[2]*Q[2]; 257 | v_dot_Q = v[0]*Q[0] + v[1]*Q[1] + v[2]*Q[2]; 258 | q = sqrt(q2); 259 | vq = v_dot_Q / q; 260 | vq2 = vq*vq; 261 | 262 | // phi - vstr 263 | vstr = get_vstream(x, type); 264 | vp2 = (vq-vstr)*(vq-vstr); 265 | 266 | // theta 267 | T[0] = x[1]*Q[2] - x[2]*Q[1]; 268 | T[1] = x[2]*Q[0] - x[0]*Q[2]; 269 | T[2] = x[0]*Q[1] - x[1]*Q[0]; 270 | t2 = T[0]*T[0] + T[1]*T[1] + T[2]*T[2]; 271 | v_dot_T = v[0]*T[0] + v[1]*T[1] + v[2]*T[2]; 272 | vt2 = v_dot_T * v_dot_T / t2; 273 | } 274 | 275 | 276 | // mass 277 | m[0][0] = 0.5 * dt * (1 - fR) * (1 - fz); 278 | m[1][0] = 0.5 * dt * (fR) * (1 - fz); 279 | m[0][1] = 0.5 * dt * (1 - fR) * (fz); 280 | m[1][1] = 0.5 * dt * (fR) * (fz); 281 | 282 | 283 | irz[0][0] = iz * DG_Nbin + iR; 284 | irz[1][0] = iz * DG_Nbin + (iR + 1); 285 | irz[0][1] = (iz + 1) * DG_Nbin + iR; 286 | irz[1][1] = (iz + 1) * DG_Nbin + (iR + 1); 287 | 288 | 289 | // m 290 | mfield[irz[0][0]] += m[0][0]; 291 | mfield[irz[1][0]] += m[1][0]; 292 | mfield[irz[0][1]] += m[0][1]; 293 | mfield[irz[1][1]] += m[1][1]; 294 | 295 | // mvr2 296 | egyfield_r[irz[0][0]] += m[0][0] * vr2; 297 | egyfield_r[irz[1][0]] += m[1][0] * vr2; 298 | egyfield_r[irz[0][1]] += m[0][1] * vr2; 299 | egyfield_r[irz[1][1]] += m[1][1] * vr2; 300 | 301 | // mvt2 302 | egyfield_t[irz[0][0]] += m[0][0] * vt2; 303 | egyfield_t[irz[1][0]] += m[1][0] * vt2; 304 | egyfield_t[irz[0][1]] += m[0][1] * vt2; 305 | egyfield_t[irz[1][1]] += m[1][1] * vt2; 306 | 307 | 308 | // mvq2 (2nd-moment) 309 | egyfield_q[irz[0][0]] += m[0][0] * vq2; 310 | egyfield_q[irz[1][0]] += m[1][0] * vq2; 311 | egyfield_q[irz[0][1]] += m[0][1] * vq2; 312 | egyfield_q[irz[1][1]] += m[1][1] * vq2; 313 | 314 | 315 | // mvp2 (dispersion) 316 | egyfield_p[irz[0][0]] += m[0][0] * vp2; 317 | egyfield_p[irz[1][0]] += m[1][0] * vp2; 318 | egyfield_p[irz[0][1]] += m[0][1] * vp2; 319 | egyfield_p[irz[1][1]] += m[1][1] * vp2; 320 | 321 | 322 | 323 | for(i = 0; i < 3; i++) 324 | v[i] += 0.5 * dt * a[i]; 325 | 326 | for(i = 0; i < 3; i++) 327 | x[i] += dt * v[i]; 328 | 329 | forcegrid_get_acceleration(x, a); 330 | 331 | for(i = 0; i < 3; i++) 332 | v[i] += 0.5 * dt * a[i]; 333 | 334 | densitygrid_get_cell(x, &iR, &iz, &fR, &fz); 335 | icell = iz * DG_Nbin + iR; 336 | 337 | 338 | if(typeOfVelocityStructure == 2) { 339 | 340 | // radial 341 | r2 = x[0]*x[0] + x[1]*x[1]; 342 | v_dot_x = v[0]*x[0] + v[1]*x[1]; 343 | vr2 = v_dot_x * v_dot_x / r2; 344 | 345 | // phi 346 | Q[0] = -x[1]; 347 | Q[1] = x[0]; 348 | q2 = Q[0]*Q[0] + Q[1]*Q[1]; 349 | v_dot_Q = v[0]*Q[0] + v[1]*Q[1]; 350 | q = sqrt(q2); 351 | vq = v_dot_Q / q; 352 | vq2 = vq*vq; 353 | 354 | // phi - vstr 355 | vstr = get_vstream(x, type); 356 | vp2 = (vq-vstr)*(vq-vstr); 357 | 358 | // theta 359 | vt2 = v[2]*v[2]; 360 | 361 | } else { 362 | 363 | r2 = x[0] * x[0] + x[1] * x[1] + x[2] * x[2]; 364 | v_dot_x = v[0] * x[0] + v[1] * x[1] + v[2] * x[2]; 365 | vr2 = v_dot_x * v_dot_x / r2; 366 | 367 | 368 | // phi 369 | Q[0] = x[1]*Z[2] - x[2]*Z[1]; 370 | Q[1] = x[2]*Z[0] - x[0]*Z[2]; 371 | Q[2] = x[0]*Z[1] - x[1]*Z[0]; 372 | q2 = Q[0]*Q[0] + Q[1]*Q[1] + Q[2]*Q[2]; 373 | v_dot_Q = v[0]*Q[0] + v[1]*Q[1] + v[2]*Q[2]; 374 | q = sqrt(q2); 375 | vq = v_dot_Q / q; 376 | vq2 = vq*vq; 377 | 378 | // phi - vstr 379 | vstr = get_vstream(x, type); 380 | vp2 = (vq-vstr)*(vq-vstr); 381 | 382 | 383 | // theta 384 | T[0] = x[1]*Q[2] - x[2]*Q[1]; 385 | T[1] = x[2]*Q[0] - x[0]*Q[2]; 386 | T[2] = x[0]*Q[1] - x[1]*Q[0]; 387 | t2 = T[0]*T[0] + T[1]*T[1] + T[2]*T[2]; 388 | v_dot_T = v[0]*T[0] + v[1]*T[1] + v[2]*T[2]; 389 | vt2 = v_dot_T * v_dot_T / t2; 390 | 391 | } 392 | 393 | // mass 394 | m[0][0] = 0.5 * dt * (1 - fR) * (1 - fz); 395 | m[1][0] = 0.5 * dt * (fR) * (1 - fz); 396 | m[0][1] = 0.5 * dt * (1 - fR) * (fz); 397 | m[1][1] = 0.5 * dt * (fR) * (fz); 398 | 399 | 400 | irz[0][0] = iz * DG_Nbin + iR; 401 | irz[1][0] = iz * DG_Nbin + (iR + 1); 402 | irz[0][1] = (iz + 1) * DG_Nbin + iR; 403 | irz[1][1] = (iz + 1) * DG_Nbin + (iR + 1); 404 | 405 | 406 | // m 407 | mfield[irz[0][0]] += m[0][0]; 408 | mfield[irz[1][0]] += m[1][0]; 409 | mfield[irz[0][1]] += m[0][1]; 410 | mfield[irz[1][1]] += m[1][1]; 411 | 412 | // mvr2 413 | egyfield_r[irz[0][0]] += m[0][0] * vr2; 414 | egyfield_r[irz[1][0]] += m[1][0] * vr2; 415 | egyfield_r[irz[0][1]] += m[0][1] * vr2; 416 | egyfield_r[irz[1][1]] += m[1][1] * vr2; 417 | 418 | // mvt2 419 | egyfield_t[irz[0][0]] += m[0][0] * vt2; 420 | egyfield_t[irz[1][0]] += m[1][0] * vt2; 421 | egyfield_t[irz[0][1]] += m[0][1] * vt2; 422 | egyfield_t[irz[1][1]] += m[1][1] * vt2; 423 | 424 | 425 | // mvp2 426 | egyfield_q[irz[0][0]] += m[0][0] * vq2; 427 | egyfield_q[irz[1][0]] += m[1][0] * vq2; 428 | egyfield_q[irz[0][1]] += m[0][1] * vq2; 429 | egyfield_q[irz[1][1]] += m[1][1] * vq2; 430 | 431 | 432 | // mvp2 (sigmap^2) 433 | egyfield_p[irz[0][0]] += m[0][0] * vp2; 434 | egyfield_p[irz[1][0]] += m[1][0] * vp2; 435 | egyfield_p[irz[0][1]] += m[0][1] * vp2; 436 | egyfield_p[irz[1][1]] += m[1][1] * vp2; 437 | 438 | 439 | 440 | tall += dt; 441 | 442 | radsign = v[0] * x[0] + v[1] * x[1] + v[2] * x[2]; 443 | 444 | if(radsign > 0 && radsign_previous < 0) 445 | norbit++; 446 | 447 | radsign_previous = radsign; 448 | 449 | steps++; 450 | if(steps > 100000000) { 451 | printf("too many steps... pos=(%g|%g|%g) vel=(%g|%g|%g) dt=%g\n", 452 | pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], dt); 453 | double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x); 454 | printf("steps=%d: rel error = %g\n", steps, fabs(E1 - E0) / fabs(E0)); 455 | exit(1); 456 | } 457 | 458 | } while ((timespan == 0 && norbit < Norbits) || (timespan != 0 && flag == 0)); 459 | 460 | double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x); 461 | 462 | double rel_egy_error = fabs((E1 - E0) / E0); 463 | 464 | if(rel_egy_error > 0.5) { 465 | mpi_printf("relative energy error= %g orbits=%d steps=%d pos(=%g|%g|%g) vel=(%g|%g|%g)\n", rel_egy_error, norbit, steps, 466 | pos[0], pos[1], pos[2], vel[0], vel[1], vel[2]); 467 | /* 468 | terminate("error seems large, we better stop: pos=(%g|%g|%g) vel=(%g|%g|%g) id=%d v=%g vesc=%g", 469 | pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], id, 470 | sqrt(vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]), 471 | forcegrid_get_escape_speed(pos)); 472 | */ 473 | } 474 | 475 | double fac = mass / tall; 476 | 477 | for(i = 0; i < DG_Ngrid; i++) 478 | mfield[i] *= fac; 479 | 480 | for(i = 0; i < EG_Ngrid; i++) { 481 | 482 | egyfield_r[i] *= fac; 483 | egyfield_t[i] *= fac; 484 | egyfield_q[i] *= fac; 485 | egyfield_p[i] *= fac; 486 | } 487 | 488 | *orbitstaken = norbit; 489 | 490 | return tall; 491 | } 492 | 493 | #endif 494 | -------------------------------------------------------------------------------- /src/allvars.c: -------------------------------------------------------------------------------- 1 | 2 | /*! \file allvars.h 3 | * \brief declares global variables. 4 | * 5 | * This file declares all global variables. Further variables should be added here, and declared as 6 | * 'extern'. The actual existence of these variables is provided by the file 'allvars.c'. To produce 7 | * 'allvars.c' from 'allvars.h', do the following: 8 | * 9 | * - Erase all #define statements 10 | * - add #include "allvars.h" 11 | * - delete all keywords 'extern' 12 | * - delete all struct definitions enclosed in {...}, e.g. 13 | * "extern struct global_data_all_processes {....} All;" 14 | * becomes "struct global_data_all_processes All;" 15 | */ 16 | 17 | #include "allvars.h" 18 | 19 | 20 | 21 | #ifdef PERIODIC 22 | MyDouble boxSize, boxHalf; 23 | 24 | #ifdef LONG_X 25 | MyDouble boxSize_X, boxHalf_X; 26 | #else 27 | #endif 28 | #ifdef LONG_Y 29 | MyDouble boxSize_Y, boxHalf_Y; 30 | #else 31 | #endif 32 | #ifdef LONG_Z 33 | MyDouble boxSize_Z, boxHalf_Z; 34 | #else 35 | #endif 36 | #endif 37 | 38 | #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG 39 | MPI_Status mpistat; 40 | #endif 41 | 42 | /*********************************************************/ 43 | /* Global variables */ 44 | /*********************************************************/ 45 | 46 | 47 | int FG_Nbin, FG_Ngrid; 48 | double FG_Rmin, FG_Fac, FG_Rin; 49 | 50 | double *FG_Pot; 51 | double *FG_DPotDR; 52 | double *FG_DPotDz; 53 | double *FG_Pot_exact; 54 | double *FG_DPotDR_exact; 55 | double *FG_DPotDz_exact; 56 | double *FG_Disp_r[6]; 57 | double *FG_DispZ[6]; 58 | double *FG_DispPhi[6]; 59 | double *FG_Vstream[6]; 60 | double *FG_tilted_vz2[6]; 61 | double *FG_tilted_vR2[6]; 62 | double *FG_tilted_vz2_prime[6]; 63 | double *FG_tilted_vR2_prime[6]; 64 | 65 | double *FG_R; 66 | 67 | int EG_MaxLevel, EG_Nstack, EG_Nbin, EG_Ngrid; 68 | double EG_Fac, EG_Rin, EG_Rmin; 69 | 70 | double *EG_R; 71 | double *EGs_EgyResponse_r[6]; 72 | double *EGs_EgyResponse_t[6]; 73 | double *EGs_EgyResponse_p[6]; 74 | double *EGs_EgyResponse_q[6]; 75 | double *EGs_EgyTarget_r[6]; 76 | double *EGs_EgyTarget_t[6]; 77 | double *EGs_EgyTarget_p[6]; 78 | double *EGs_EgyTarget_q[6]; 79 | double *EGs_MassTarget[6]; 80 | double *EGs_MassResponse[6]; 81 | 82 | 83 | double *EG_MassLoc[6]; 84 | double *EG_EgyResponseRLoc[6]; 85 | double *EG_EgyResponseTLoc[6]; 86 | double *EG_EgyResponsePLoc[6]; 87 | double *EG_EgyResponseQLoc[6]; 88 | double *EG_EgyResponseRLoc_delta[6]; 89 | double *EG_EgyResponseTLoc_delta[6]; 90 | double *EG_EgyResponsePLoc_delta[6]; 91 | double *EG_EgyResponseQLoc_delta[6]; 92 | 93 | 94 | 95 | #ifdef VER_1_1 96 | double *EG_MassLocS[6]; 97 | double *EG_EgyResponseRLocS[6]; 98 | double *EG_EgyResponseRLocS_delta[6]; 99 | double *EGs_EgyResponseRS[6]; 100 | 101 | double *EG_EgyResponseTLocS[6]; 102 | double *EG_EgyResponseTLocS_delta[6]; 103 | double *EGs_EgyResponseTS[6]; 104 | 105 | double *EG_EgyResponseQLocS[6]; 106 | double *EG_EgyResponseQLocS_delta[6]; 107 | double *EGs_EgyResponseQS[6]; 108 | 109 | double *EG_EgyResponsePLocS[6]; 110 | double *EG_EgyResponsePLocS_delta[6]; 111 | double *EGs_EgyResponsePS[6]; 112 | 113 | 114 | double fac_value_rs[6]; 115 | double fac_value_ts[6]; 116 | double fac_value_qs[6]; 117 | double fac_value_ps[6]; 118 | #endif 119 | 120 | 121 | 122 | 123 | 124 | int DG_MaxLevel, DG_Nstack, DG_Nbin, DG_Ngrid; 125 | double DG_Rmin, DG_Fac, DG_Rin; 126 | 127 | double *DG_CellVol; 128 | double *DG_CellSize; 129 | 130 | double *DGs_LogR; 131 | double *DGs_LogZ; 132 | double *DGs_Distance; 133 | 134 | double *DGs_MassTarget[6]; 135 | double *DGs_MassResponse[6]; 136 | double *DG_MassLoc[6]; 137 | double *DG_MassLoc_delta[6]; 138 | 139 | double Totorbits[6]; 140 | int Tries[6]; 141 | int Changes[6]; 142 | 143 | double TotDv2Sum[6]; 144 | double Epsilon; 145 | 146 | double Tintegrate; 147 | double S[6]; 148 | double Sdisp_r[6]; 149 | double Sdisp_t[6]; 150 | double Sdisp_p[6]; 151 | double Sdisp_q[6]; 152 | double Srelfac[6]; 153 | double Srelsfac[6]; 154 | 155 | 156 | double Srelfac_count[6]; 157 | double MType[6]; 158 | int NType[6]; 159 | double SizeType[6]; 160 | int CountLargeChange[6]; 161 | int Noptimized; 162 | FILE *FdFit[6]; 163 | 164 | int ThisTask; /*!< the number of the local processor */ 165 | int NTask; /*!< number of processors */ 166 | int PTask; /*!< note: NTask = 2^PTask */ 167 | 168 | 169 | double CPUThisRun; /*!< Sums CPU time of current process */ 170 | 171 | int NumForceUpdate; /*!< number of active particles on local processor in current timestep */ 172 | long long GlobNumForceUpdate; 173 | int NumSphUpdate; /*!< number of active SPH particles on local processor in current timestep */ 174 | 175 | int MaxTopNodes; /*!< Maximum number of nodes in the top-level tree used for domain decomposition */ 176 | 177 | int RestartFlag; /*!< taken from command line used to start code. 0 is normal start-up from 178 | initial conditions, 1 is resuming a run from a set of restart files, while 2 179 | marks a restart from a snapshot file. */ 180 | int RestartSnapNum; 181 | 182 | int Argc; 183 | char **Argv; 184 | 185 | 186 | int Nforces; 187 | int Ndensities; 188 | int Nhydroforces; 189 | int *TargetList; 190 | int *Threads_P_CostCount[NUM_THREADS]; 191 | int *Threads_TreePoints_CostCount[NUM_THREADS]; 192 | int *Threads_Node_CostCount[NUM_THREADS]; 193 | 194 | int maxThreads = NUM_THREADS; 195 | 196 | #ifdef IMPOSE_PINNING 197 | cpu_set_t cpuset_thread[NUM_THREADS]; 198 | #endif 199 | 200 | 201 | int *Exportflag, *ThreadsExportflag[NUM_THREADS]; /*!< Buffer used for flagging whether a particle needs to be exported to another process */ 202 | int *Exportnodecount; 203 | int *Exportindex; 204 | 205 | int *Send_offset, *Send_count, *Recv_count, *Recv_offset; 206 | int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes; 207 | 208 | int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset; 209 | 210 | int TakeLevel; 211 | int SelRnd; 212 | 213 | FILE *FdMemory; 214 | 215 | unsigned char *ProcessedFlag; 216 | 217 | int TimeBinCount[TIMEBINS]; 218 | int TimeBinCountSph[TIMEBINS]; 219 | int TimeBinCountSphHydro[TIMEBINS]; 220 | int TimeBinActive[TIMEBINS]; 221 | 222 | int NActiveHydro; 223 | int NActiveGravity; 224 | int *ActiveGravityParticles; 225 | int *ActiveHydroParticles; 226 | 227 | long long GlobalNActiveHydro; 228 | long long GlobalNActiveGravity; 229 | 230 | #ifdef USE_SFR 231 | double TimeBinSfr[TIMEBINS]; 232 | #endif 233 | 234 | 235 | 236 | #ifdef SUBFIND 237 | int GrNr; 238 | int NumPartGroup; 239 | #endif 240 | 241 | int FlagNyt = 0; 242 | char DumpFlag = 1; 243 | 244 | size_t AllocatedBytes; 245 | size_t HighMarkBytes; 246 | size_t FreeBytes; 247 | 248 | size_t HighMark_run, HighMark_domain, HighMark_gravtree, HighMark_pmperiodic, 249 | HighMark_pmnonperiodic, HighMark_sphdensity, HighMark_sphhydro, HighMark_subfind_processing, 250 | HighMark_subfind_density; 251 | 252 | 253 | 254 | 255 | double WallclockTime; /*!< This holds the last wallclock time measurement for timings measurements */ 256 | double StartOfRun; /*!< This stores the time of the start of the run for evaluating the elapsed time */ 257 | 258 | double EgyInjection; 259 | 260 | 261 | int NumPart; /*!< number of particles on the LOCAL processor */ 262 | int NumGas; /*!< number of gas particles on the LOCAL processor */ 263 | 264 | gsl_rng *random_generator; /*!< the random number generator used */ 265 | 266 | 267 | #ifdef USE_SFR 268 | int Stars_converted; /*!< current number of star particles in gas particle block */ 269 | #endif 270 | 271 | #ifdef TOLERATE_WRITE_ERROR 272 | int WriteErrorFlag; 273 | #endif 274 | 275 | double TimeOfLastDomainConstruction; /*!< holds what it says */ 276 | 277 | int *Ngblist; /*!< Buffer to hold indices of neighbours retrieved by the neighbour search 278 | routines */ 279 | 280 | double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac; 281 | double DomainInverseLen, DomainBigFac; 282 | int *DomainStartList, *DomainEndList; 283 | double *DomainCost, *TaskCost; 284 | int *DomainCount, *TaskCount; 285 | struct no_list_data *ListNoData; 286 | 287 | int domain_bintolevel[TIMEBINS]; 288 | int domain_refbin[TIMEBINS]; 289 | int domain_corr_weight[TIMEBINS]; 290 | int domain_full_weight[TIMEBINS]; 291 | double domain_reffactor[TIMEBINS]; 292 | int domain_to_be_balanced[TIMEBINS]; 293 | 294 | int *DomainTask; 295 | int *DomainNewTask; 296 | int *DomainNodeIndex; 297 | 298 | 299 | peanokey *Key, *KeySorted; 300 | 301 | struct topnode_data *TopNodes; 302 | 303 | int NTopnodes, NTopleaves; 304 | 305 | 306 | 307 | 308 | 309 | /* variables for input/output , usually only used on process 0 */ 310 | 311 | 312 | char ParameterFile[MAXLEN_PATH]; /*!< file name of parameterfile used for starting the simulation */ 313 | 314 | FILE *FdInfo, /*!< file handle for info.txt log-file. */ 315 | *FdEnergy, /*!< file handle for energy.txt log-file. */ 316 | *FdTimings, /*!< file handle for timings.txt log-file. */ 317 | *FdDomain, /*!< file handle for domain.txt log-file. */ 318 | *FdBalance, /*!< file handle for balance.txt log-file. */ 319 | *FdMemory, *FdTimebin, *FdCPU; /*!< file handle for cpu.txt log-file. */ 320 | 321 | #ifdef OUTPUT_CPU_CSV 322 | FILE *FdCPUCSV; 323 | #endif 324 | 325 | #ifdef USE_SFR 326 | FILE *FdSfr; /*!< file handle for sfr.txt log-file. */ 327 | #endif 328 | 329 | 330 | 331 | 332 | 333 | struct pair_data *Pairlist; 334 | 335 | 336 | #ifdef FORCETEST 337 | FILE *FdForceTest; /*!< file handle for forcetest.txt log-file. */ 338 | #endif 339 | 340 | 341 | #ifdef DARKENERGY 342 | FILE *FdDE; /*!< file handle for darkenergy.txt log-file. */ 343 | #endif 344 | 345 | int WriteMiscFiles = 1; 346 | 347 | 348 | void *CommBuffer; /*!< points to communication buffer, which is used at a few places */ 349 | 350 | 351 | /*! This structure contains data which is the SAME for all tasks (mostly code parameters read from the 352 | * parameter file). Holding this data in a structure is convenient for writing/reading the restart file, and 353 | * it allows the introduction of new global variables in a simple way. The only thing to do is to introduce 354 | * them into this structure. 355 | */ 356 | struct global_data_all_processes All; 357 | 358 | 359 | /*! This structure holds all the information that is 360 | * stored for each particle of the simulation. 361 | */ 362 | struct particle_data *P, /*!< holds particle data on local processor */ 363 | *DomainPartBuf; /*!< buffer for particle data used in domain decomposition */ 364 | 365 | struct subfind_data *PS; 366 | 367 | /* the following struture holds data that is stored for each SPH particle in addition to the collisionless 368 | * variables. 369 | */ 370 | struct sph_particle_data *SphP, /*!< holds SPH particle data on local processor */ 371 | *DomainSphBuf; /*!< buffer for SPH particle data in domain decomposition */ 372 | 373 | 374 | #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE 375 | struct special_particle_data *PartSpecialListGlobal; 376 | #endif 377 | 378 | 379 | 380 | peanokey *DomainKeyBuf; 381 | 382 | 383 | /* Various structures for communication during the gravity computation. 384 | */ 385 | 386 | struct data_index *DataIndexTable; /*!< the particles to be exported are grouped 387 | by task-number. This table allows the 388 | results to be disentangled again and to be 389 | assigned to the correct particle */ 390 | 391 | struct data_nodelist *DataNodeList; 392 | 393 | struct gravdata_in *GravDataIn, /*!< holds particle data to be exported to other processors */ 394 | *GravDataGet; /*!< holds particle data imported from other processors */ 395 | 396 | 397 | struct gravdata_out *GravDataResult, /*!< holds the partial results computed for imported particles. Note: We use GravDataResult = GravDataGet, such that the result replaces the imported data */ 398 | *GravDataOut; /*!< holds partial results received from other processors. This will overwrite the GravDataIn array */ 399 | 400 | 401 | int ThreadsNexport[NUM_THREADS], ThreadsNexportNodes[NUM_THREADS]; 402 | int *ThreadsNgblist[NUM_THREADS]; 403 | 404 | struct data_partlist *PartList, *ThreadsPartList[NUM_THREADS]; 405 | 406 | struct datanodelist *NodeList, *ThreadsNodeList[NUM_THREADS]; 407 | 408 | int *NodeDataGet, *NodeDataIn; 409 | 410 | 411 | struct potdata_out *PotDataResult, /*!< holds the partial results computed for imported particles. Note: We use GravDataResult = GravDataGet, such that the result replaces the imported data */ 412 | *PotDataOut; /*!< holds partial results received from other processors. This will overwrite the GravDataIn array */ 413 | 414 | 415 | 416 | 417 | /*! Header for the standard file format. 418 | */ 419 | struct io_header header; /*!< holds header for snapshot files */ 420 | 421 | #ifdef PARAMS_IN_SNAP 422 | char Parameters[MAX_PARAMETERS][MAXLEN_PARAM_TAG]; /*!< holds the tags of the parameters defined in the parameter file */ 423 | char ParameterValues[MAX_PARAMETERS][MAXLEN_PARAM_VALUE]; /*!< holds the values for the parameters defined in the parameter file */ 424 | #endif 425 | 426 | 427 | /* 428 | * Variables for Tree 429 | * ------------------ 430 | */ 431 | int Nexport, Nimport; 432 | int NexportNodes, NimportNodes; 433 | int MaxNexport, MaxNexportNodes; 434 | int BufferFullFlag; 435 | int NextParticle; 436 | int NextJ; 437 | 438 | 439 | struct permutation_data *permutation; 440 | 441 | 442 | 443 | /** Variables for gravitational tree */ 444 | int Tree_MaxPart; 445 | int Tree_NumNodes; 446 | int Tree_MaxNodes; 447 | int Tree_FirstNonTopLevelNode; 448 | int Tree_NumPartImported; 449 | int Tree_NumPartExported; 450 | int Tree_ImportedNodeOffset; 451 | int Tree_NextFreeNode; 452 | MyDouble *Tree_Pos_list; 453 | unsigned long long *Tree_IntPos_list; 454 | int *Tree_Task_list; 455 | int *Tree_ResultIndexList; 456 | 457 | struct treepoint_data *Tree_Points; 458 | struct resultsactiveimported_data *Tree_ResultsActiveImported; 459 | 460 | 461 | 462 | int *Nextnode; /*!< gives next node in tree walk (nodes array) */ 463 | int *Father; /*!< gives parent node in tree (Prenodes array) */ 464 | 465 | struct NODE *Nodes; /*!< points to the actual memory allocted for the nodes */ 466 | /*!< this is a pointer used to access the nodes which is shifted such that Nodes[All.MaxPart] 467 | gives the first allocated node */ 468 | float *Nodes_GravCost; 469 | 470 | /** Variables for neighbor tree */ 471 | int Ngb_MaxPart; 472 | int Ngb_NumNodes; 473 | int Ngb_MaxNodes; 474 | int Ngb_FirstNonTopLevelNode; 475 | int Ngb_NextFreeNode; 476 | 477 | int *Ngb_DomainNodeIndex; 478 | int *Ngb_Nextnode; 479 | 480 | 481 | /** The ngb-tree data structure 482 | */ 483 | struct NgbNODE *Ngb_Nodes; 484 | struct ExtNgbNODE *ExtNgb_Nodes; 485 | 486 | 487 | 488 | 489 | #ifdef STATICNFW 490 | double Rs, R200; 491 | double Dc; 492 | double RhoCrit, V200; 493 | double fac; 494 | #endif 495 | 496 | 497 | #ifdef NUM_THREADS 498 | int MaxThreads = NUM_THREADS; 499 | #else 500 | int MaxThreads = 1; 501 | #endif 502 | --------------------------------------------------------------------------------