├── config-makefile
├── Config.sh.template
├── Makefile.systype
├── src
    ├── domain
    │   ├── domain_rearrange.c
    │   ├── domain_counttogo.c
    │   ├── domain_vars.c
    │   ├── domain_box.c
    │   ├── domain_sort_kernels.c
    │   ├── pqueue.h
    │   ├── domain_exchange.c
    │   ├── domain.h
    │   ├── pqueue.c
    │   ├── domain.c
    │   └── domain_toplevel.c
    ├── mpi_utils
    │   ├── hypercube_allgatherv.c
    │   ├── sizelimited_sendrecv.c
    │   ├── mpi_util.c
    │   └── checksummed_sendrecv.c
    ├── disk.c
    ├── bulge.c
    ├── forcetree
    │   ├── forcetree.h
    │   ├── forcetree_walk.c
    │   └── forcetree_optimizebalance.c
    ├── init.c
    ├── allocate.c
    ├── structure.c
    ├── halo.c
    ├── system.c
    ├── proto.h
    ├── set_particles.c
    ├── orbit_response.c
    └── allvars.c
├── prepare-config.perl
├── README.md
├── Makefile.lib
├── Makefile.template
├── Model_M1.param
├── Model_H3.param
├── Model_D3.param
├── Model_D1.param
├── Model_H2.param
└── Model_H1.param


/config-makefile:
--------------------------------------------------------------------------------
1 | RESULT     := $(shell mkdir -p $(BUILD_DIR)  )
2 | 
3 | all: $(BUILD_DIR)/galicconfig.h 
4 | 
5 | $(BUILD_DIR)/galicconfig.h:  $(CONFIG)
6 | 	$(PERL) prepare-config.perl $(CONFIG) $(BUILD_DIR)
7 | 


--------------------------------------------------------------------------------
/Config.sh.template:
--------------------------------------------------------------------------------
 1 | #!/bin/bash            # this line only there to enable syntax highlighting in this file
 2 | 
 3 | 
 4 | #---------------------------------------- Single/Double Precision
 5 | DOUBLEPRECISION=1
 6 | #OUTPUT_IN_DOUBLEPRECISION # snapshot files will be written in double precision
 7 | 
 8 | 
 9 | #--------------------------------------- Output/Input options
10 | #HAVE_HDF5                     # needed when HDF5 I/O support is desired
11 | 
12 | 
13 | #DEBUG_ENABLE_FPU_EXCEPTIONS   #enables floating point exceptions
14 | 
15 | ##---------------------------- Modifications
16 | VER_1_1   #enables version GALIC 1.1 with velocity dispertions patch
17 | #VAR_1_1_KPARAMETER_MOD # changes vstr = k*vphi (experimental!)
18 | #VER_1_1_GNUPLOT_LOG
19 | 
20 | 


--------------------------------------------------------------------------------
/Makefile.systype:
--------------------------------------------------------------------------------
 1 | # Select Target Computer 
 2 | #
 3 | # Please copy this file to Makefile.systype and uncomment your
 4 | # system. Don't commit changes to this file unless you add support for
 5 | # a new system.
 6 | 
 7 | SYSTYPE="APHI"
 8 | #SYSTYPE="Curie"
 9 | #SYSTYPE="Hermite"
10 | #SYSTYPE="Ranger_pgi"
11 | #SYSTYPE="Ranger_intel"
12 | #SYSTYPE="lonestar"
13 | #SYSTYPE="Kraken_pgi"
14 | #SYSTYPE="aurora"
15 | #SYSTYPE="hecate"
16 | #SYSTYPE="Darwin"
17 | #SYSTYPE="Darwin-mpich"
18 | #SYSTYPE="MBM"
19 | #SYSTYPE="Magny"
20 | #SYSTYPE="Magny-Intel"
21 | #SYSTYPE="Nehalem"
22 | #SYSTYPE="OpenSuse"
23 | #SYSTYPE="OpenSuse64"
24 | #SYSTYPE="OpenSuse64-cuda"
25 | #SYSTYPE="Judge"
26 | #SYSTYPE="HLRB2"
27 | #SYSTYPE="OPA-Cluster64-Intel"
28 | #SYSTYPE="OPA-Cluster64-Gnu"
29 | #SYSTYPE="Odin"
30 | #SYSTYPE="OpteronMPA-Gnu"
31 | #SYSTYPE="OpteronMPA-Intel"
32 | #SYSTYPE="MPA"	
33 | #SYSTYPE="VIP"
34 | #SYSTYPE="odyssey"
35 | #SYSTYPE="odyssey-intel"
36 | #SYSTYPE="odyssey-opteron"
37 | #SYSTYPE="Ubuntu"
38 | #SYSTYPE="Centos5-intel"
39 | #SYSTYPE="Centos5-Gnu"
40 | 


--------------------------------------------------------------------------------
/src/domain/domain_rearrange.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <strings.h>
 6 | #include <math.h>
 7 | 
 8 | 
 9 | #include "../allvars.h"
10 | #include "../proto.h"
11 | #include "domain.h"
12 | 
13 | 
14 | void domain_rearrange_particle_sequence(void)
15 | {
16 | #ifdef USE_SFR
17 |   if(Stars_converted)
18 |     {
19 |       struct particle_data psave;
20 |       peanokey key;
21 | 
22 |       int i;
23 |       for(i = 0; i < NumGas; i++)
24 | 	if((P[i].Type & 15) != 0)	/*If not a gas particle, swap to the end of the list */
25 | 	  {
26 | 	    psave = P[i];
27 | 	    key = Key[i];
28 | 
29 | 	    P[i] = P[NumGas - 1];
30 | 	    SphP[i] = SphP[NumGas - 1];
31 | 	    Key[i] = Key[NumGas - 1];
32 | 
33 | 	    P[NumGas - 1] = psave;
34 | 	    Key[NumGas - 1] = key;
35 | 
36 | 	    NumGas--;
37 | 	    i--;
38 | 	  }
39 |       /*Now we have rearranged the particles,
40 |        *we don't need to do it again unless there are more stars*/
41 |       Stars_converted = 0;
42 |     }
43 | #endif
44 | 
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/domain/domain_counttogo.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <strings.h>
 6 | #include <math.h>
 7 | 
 8 | 
 9 | #include "../allvars.h"
10 | #include "../proto.h"
11 | #include "domain.h"
12 | 
13 | 
14 | 
15 | /*! This function determines how many particles that are currently stored
16 |  *  on the local CPU have to be moved off according to the domain
17 |  *  decomposition.
18 |  */
19 | int domain_countToGo(void)
20 | {
21 |   int n;
22 | 
23 |   for(n = 0; n < NTask; n++)
24 |     {
25 |       toGo[n] = 0;
26 |     }
27 | 
28 | 
29 |   for(n = 0; n < NumPart; n++)
30 |     {
31 |       int no = 0;
32 | 
33 |       while(topNodes[no].Daughter >= 0)
34 |         no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size / 8);
35 | 
36 |       no = topNodes[no].Leaf;
37 | 
38 |       if(DomainTask[no] != ThisTask)
39 |         {
40 |           toGo[DomainTask[no]] += 1;
41 |         }
42 |     }
43 | 
44 |   MPI_Alltoall(toGo, 1, MPI_INT, toGet, 1, MPI_INT, MPI_COMM_WORLD);
45 | 
46 |   return 0;
47 | }
48 | 
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/prepare-config.perl:
--------------------------------------------------------------------------------
 1 | 
 2 | # This file processes the configurations options in Config.sh, producing 
 3 | # two files:
 4 | #
 5 | #   galicconfig.h          to be included in each source file (via allvars.h)
 6 | #   compile_time_info.c    code to be compiled in, which will print the configuration 
 7 | #
 8 | if( @ARGV != 2)
 9 | {
10 |     print "usage: perl prepare-config.perl <Config.sh> <build dir>\n";
11 |     exit;
12 | }
13 | 
14 | open(FILE, @ARGV[0]);
15 | $path = @ARGV[1];
16 | 
17 | 
18 | open(OUTFILE, ">${path}/galicconfig.h");
19 | open(COUTF,   ">${path}/compile_time_info.c");
20 | 
21 | print COUTF "#include <stdio.h>\n";
22 | print COUTF "void output_compile_time_options(void)\n\{\n";
23 | print COUTF "printf(\n";
24 | 
25 | while($line=<FILE>)
26 | {
27 |     chop $line;
28 | 
29 |     @fields = split ' ' , $line;
30 | 
31 |     if(substr($fields[0], 0, 1) ne "#")
32 |     {
33 | 	if(length($fields[0]) > 0)
34 | 	{
35 | 	    @subfields = split '=', $fields[0];
36 | 
37 | 	    print OUTFILE "#define $subfields[0] $subfields[1]\n";
38 |             print COUTF   "\"        $fields[0]\\n\"\n";
39 | 	}
40 |     }
41 | }
42 | 
43 | print COUTF "\"\\n\");\n";
44 | print COUTF "\}\n";
45 | 


--------------------------------------------------------------------------------
/src/mpi_utils/hypercube_allgatherv.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <math.h>
 6 | #include <gsl/gsl_math.h>
 7 | 
 8 | #include "../allvars.h"
 9 | #include "../proto.h"
10 | 
11 | #ifdef MPI_HYPERCUBE_ALLGATHERV
12 | 
13 | #define TAG 100
14 | 
15 | int MPI_hypercube_Allgatherv(void *sendbuf, int sendcount,
16 | 			     MPI_Datatype sendtype, void *recvbuf, int *recvcount, int *displs, MPI_Datatype recvtype, MPI_Comm comm)
17 | {
18 |   int ntask, thistask, ptask, ngrp, size_sendtype, size_recvtype;
19 |   MPI_Status status;
20 | 
21 |   MPI_Comm_rank(comm, &thistask);
22 |   MPI_Comm_size(comm, &ntask);
23 | 
24 |   MPI_Type_size(sendtype, &size_sendtype);
25 |   MPI_Type_size(recvtype, &size_recvtype);
26 | 
27 |   for(ptask = 0; ntask > (1 << ptask); ptask++);
28 | 
29 |   for(ngrp = 1; ngrp < (1 << ptask); ngrp++)
30 |     {
31 |       int recvtask = thistask ^ ngrp;
32 | 
33 |       if(recvtask < ntask)
34 | 	MPI_Sendrecv(sendbuf, sendcount, sendtype, recvtask, TAG,
35 | 		     recvbuf + displs[recvtask] * size_recvtype, recvcount[recvtask], recvtype, recvtask, TAG, comm, &status);
36 |     }
37 | 
38 |   if(sendbuf != recvbuf + displs[thistask] * size_recvtype)
39 |     memcpy(recvbuf + displs[thistask] * size_recvtype, sendbuf, sendcount * size_sendtype);
40 | 
41 |   return 0;
42 | }
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/src/domain/domain_vars.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <strings.h>
 6 | #include <math.h>
 7 | 
 8 | 
 9 | #include "../allvars.h"
10 | #include "../proto.h"
11 | #include "domain.h"
12 | 
13 | struct domain_peano_hilbert_data *mp;
14 | 
15 | struct local_topnode_data *topNodes, *branchNodes;	/*!< points to the root node of the top-level tree */
16 | 
17 | 
18 | double totpartcount;
19 | 
20 | struct domain_cost_data *DomainLeaveNode;
21 | 
22 | double fac_load;
23 | 
24 | int Nbranch;
25 | 
26 | /*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner'
27 |  */
28 | int *toGo;
29 | int *toGet;
30 | int *list_NumPart;
31 | int *list_load;
32 | 
33 | 
34 | 
35 | 
36 | void domain_allocate_lists(void)
37 | {
38 |   Key = (peanokey *) mymalloc_movable(&Key, "domain_key", (sizeof(peanokey) * All.MaxPart));
39 |   toGo = (int *) mymalloc_movable(&toGo, "toGo", (sizeof(int) * NTask));
40 |   toGet = (int *) mymalloc_movable(&toGet, "toGet", (sizeof(int) * NTask));
41 |   list_NumPart = (int *) mymalloc_movable(&list_NumPart, "list_NumPart", (sizeof(int) * NTask));
42 |   list_load = (int *) mymalloc_movable(&list_load, "list_load", (sizeof(int) * NTask));
43 |   DomainLeaveNode = (struct domain_cost_data *) mymalloc_movable(&DomainLeaveNode, "DomainLeaveNode", (MaxTopNodes * sizeof(struct domain_cost_data)));
44 | }
45 | 
46 | void domain_free_lists(void)
47 | {
48 |   myfree(DomainLeaveNode);
49 |   myfree(list_load);
50 |   myfree(list_NumPart);
51 |   myfree(toGet);
52 |   myfree(toGo);
53 | }
54 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | GALIC v1.1  - A code for the creation of galaxy inititial conditions 
 2 | ------------------------------------------------------------------------
 3 | 
 4 | GALIC v1.1 is an updated version of GALIC code (http://www.h-its.org/tap/galic) 
 5 | which is implementation of a new iterative method to construct steady state
 6 | composite halo-disk-bulge galaxy models with prescribed density distribution 
 7 | and velocity anisotropy. This update is mainly about the new constraints on the time averaged velocity structure in order to ensure its equality to the target distribution of velocity dispersions.
 8 |   
 9 | The method and the original version of GALIC is described in full in the paper:
10 | Yurin D. & Springel, V. An iterative method for the construction of N-body galaxy models in collisionless equilibrium. MNRAS, 2014. (preprint: http://arxiv.org/abs/1402.1623). Users of the code are kindly asked to cite the paper if they make
11 | use of the code. 
12 | 
13 | The updated version is not fully tested and released "as is", without any guarantees
14 | or warrantees. To get support, please open a new issue.
15 | 
16 | Copyright (c) 2014-2017 by Volker Springel and Denis Yurin 
17 | 
18 | Known Issues
19 | --------------------------------
20 | #1. Calculation of target velocity dispersions is faulty beyond 8 Mpc for velocity structure of type 2, so don't use it for now for Halo and Bulge, instead if necessary mimic it with velocity structure of type 3 with dispersion R over Z ratio set to 1.
21 | 
22 | #2. Putting the time-averaged velocity dispersions to the target ones is not fully successful in case of the disk. This means that one can still see the rapid change of the velocity structure in the disk which is not related to the disk relaxation.
23 | 


--------------------------------------------------------------------------------
/src/domain/domain_box.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <strings.h>
 6 | #include <math.h>
 7 | 
 8 | 
 9 | #include "../allvars.h"
10 | #include "../proto.h"
11 | #include "domain.h"
12 | 
13 | 
14 | 
15 | /*! This routine finds the extent of the global domain grid.
16 | 
17 |   If periodic is on, the minimum extent is the box size. Otherwise it
18 |   looks at the maximum extent of the particles.
19 |  */
20 | void domain_findExtent(void)
21 | {
22 |   int i, j;
23 |   double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
24 | 
25 |   /* determine local extension */
26 |   for(j = 0; j < 3; j++)
27 |     {
28 |       xmin[j] = MAX_REAL_NUMBER;
29 |       xmax[j] = -MAX_REAL_NUMBER;
30 |     }
31 | 
32 |   for(i = 0; i < NumPart; i++)
33 |     {
34 |       for(j = 0; j < 3; j++)
35 | 	{
36 | 	  if(xmin[j] > P[i].Pos[j])
37 | 	    xmin[j] = P[i].Pos[j];
38 | 
39 | 	  if(xmax[j] < P[i].Pos[j])
40 | 	    xmax[j] = P[i].Pos[j];
41 | 	}
42 |     }
43 | 
44 |   MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
45 |   MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
46 | 
47 | 
48 |   len = 0;
49 |   for(j = 0; j < 3; j++)
50 |     if(xmax_glob[j] - xmin_glob[j] > len)
51 |       len = xmax_glob[j] - xmin_glob[j];
52 | 
53 |   len *= 1.00001;
54 | 
55 |   for(j = 0; j < 3; j++)
56 |     {
57 |       DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
58 |       DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len;
59 |     }
60 | 
61 |   DomainLen = len;
62 |   DomainInverseLen = 1.0 / DomainLen;
63 |   DomainFac = 1.0 / len * (((peanokey) 1) << (BITS_PER_DIMENSION));
64 |   DomainBigFac = (DomainLen / (((long long) 1) << 52));
65 | }
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/src/disk.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <math.h>
 6 | #include <gsl/gsl_math.h>
 7 | 
 8 | #include "allvars.h"
 9 | #include "proto.h"
10 | 
11 | 
12 | 
13 | 
14 | 
15 | /* this function returns a new random coordinate for the disk */
16 | 
17 | void disk_get_fresh_coordinate(double *pos)
18 | {
19 |   double q, f, f_, R, R2, Rold, phi;
20 | 
21 |   do
22 |     {
23 |       q = gsl_rng_uniform(random_generator);
24 | 
25 |       pos[2] = All.Disk_Z0 / 2 * log(q / (1 - q));
26 | 
27 |       q = gsl_rng_uniform(random_generator);
28 | 
29 |       R = 1.0;
30 |       do
31 | 	{
32 | 	  f = (1 + R) * exp(-R) + q - 1;
33 | 	  f_ = -R * exp(-R);
34 | 
35 | 	  Rold = R;
36 | 	  R = R - f / f_;
37 | 	}
38 |       while(fabs(R - Rold) / R > 1e-7);
39 | 
40 |       R *= All.Disk_H;
41 | 
42 |       phi = gsl_rng_uniform(random_generator) * M_PI * 2;
43 | 
44 |       pos[0] = R * cos(phi);
45 |       pos[1] = R * sin(phi);
46 | 
47 |       R2 = pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2];
48 |     }
49 |   while(R2 > All.Rmax * All.Rmax);
50 | 
51 | }
52 | 
53 | 
54 | double disk_get_density(double *pos)
55 | {
56 |   if(All.Disk_Mass > 0)
57 |     {
58 |       double R = sqrt(pos[0] * pos[0] + pos[1] * pos[1]);
59 |       double z = pos[2];
60 | 
61 |       double rho = All.Disk_Mass / (4 * M_PI * All.Disk_H * All.Disk_H * All.Disk_Z0) *
62 | 	exp(-R / All.Disk_H) * pow(2 / (exp(z / All.Disk_Z0) + exp(-z / All.Disk_Z0)), 2);
63 | 
64 | 		if ( fabs(rho) <  MIN_DENSITY) rho = 0;
65 |   
66 |       return rho;
67 |     }
68 |   else
69 |     return 0;
70 | }
71 | 
72 | 
73 | 
74 | double disk_get_mass_inside_radius(double R)
75 | {
76 |   return All.Disk_Mass * (1 - (1 + R / All.Disk_H) * exp(-R / All.Disk_H));
77 | }
78 | 


--------------------------------------------------------------------------------
/src/mpi_utils/sizelimited_sendrecv.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <math.h>
 6 | #include <gsl/gsl_math.h>
 7 | 
 8 | #include "../allvars.h"
 9 | #include "../proto.h"
10 | 
11 | #ifdef MPISENDRECV_SIZELIMIT
12 | 
13 | 
14 | #undef MPI_Sendrecv
15 | 
16 | 
17 | int MPI_Sizelimited_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
18 | 			     int dest, int sendtag, void *recvbuf, int recvcount,
19 | 			     MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status * status)
20 | {
21 |   int iter = 0, size_sendtype, size_recvtype, send_now, recv_now;
22 |   int count_limit;
23 | 
24 | 
25 |   if(dest != source)
26 |     terminate("dest != source");
27 | 
28 |   MPI_Type_size(sendtype, &size_sendtype);
29 |   MPI_Type_size(recvtype, &size_recvtype);
30 | 
31 |   if(dest == ThisTask)
32 |     {
33 |       memcpy(recvbuf, sendbuf, recvcount * size_recvtype);
34 |       return 0;
35 |     }
36 | 
37 |   count_limit = (int) ((((long long) MPISENDRECV_SIZELIMIT) * 1024 * 1024) / size_sendtype);
38 | 
39 |   while(sendcount > 0 || recvcount > 0)
40 |     {
41 |       if(sendcount > count_limit)
42 | 	{
43 | 	  send_now = count_limit;
44 | 	  if(iter == 0)
45 | 	    {
46 | 	      printf("imposing size limit on MPI_Sendrecv() on task=%d (send of size=%d)\n", ThisTask, sendcount * size_sendtype);
47 | 	      myflush(stdout);
48 | 	    }
49 | 	  iter++;
50 | 	}
51 |       else
52 | 	send_now = sendcount;
53 | 
54 |       if(recvcount > count_limit)
55 | 	recv_now = count_limit;
56 |       else
57 | 	recv_now = recvcount;
58 | 
59 |       MPI_Sendrecv(sendbuf, send_now, sendtype, dest, sendtag, recvbuf, recv_now, recvtype, source, recvtag, comm, status);
60 | 
61 |       sendcount -= send_now;
62 |       recvcount -= recv_now;
63 | 
64 |       sendbuf += send_now * size_sendtype;
65 |       recvbuf += recv_now * size_recvtype;
66 |     }
67 | 
68 |   return 0;
69 | }
70 | 
71 | #endif
72 | 


--------------------------------------------------------------------------------
/src/domain/domain_sort_kernels.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <strings.h>
 6 | #include <math.h>
 7 | 
 8 | 
 9 | #include "../allvars.h"
10 | #include "../proto.h"
11 | #include "domain.h"
12 | 
13 | 
14 | int domain_compare_count(const void *a, const void *b)
15 | {
16 |   if(((struct domain_count_data *) a)->count > (((struct domain_count_data *) b)->count))
17 |     return -1;
18 | 
19 |   if(((struct domain_count_data *) a)->count < (((struct domain_count_data *) b)->count))
20 |     return +1;
21 | 
22 |   return 0;
23 | }
24 | 
25 | int domain_compare_key(const void *a, const void *b)
26 | {
27 |   if(((struct domain_peano_hilbert_data *) a)->key < (((struct domain_peano_hilbert_data *) b)->key))
28 |     return -1;
29 | 
30 |   if(((struct domain_peano_hilbert_data *) a)->key > (((struct domain_peano_hilbert_data *) b)->key))
31 |     return +1;
32 | 
33 |   return 0;
34 | }
35 | 
36 | 
37 | static void msort_domain_with_tmp(struct domain_peano_hilbert_data *b, size_t n, struct domain_peano_hilbert_data *t)
38 | {
39 |   struct domain_peano_hilbert_data *tmp;
40 |   struct domain_peano_hilbert_data *b1, *b2;
41 |   size_t n1, n2;
42 | 
43 |   if(n <= 1)
44 |     return;
45 | 
46 |   n1 = n / 2;
47 |   n2 = n - n1;
48 |   b1 = b;
49 |   b2 = b + n1;
50 | 
51 |   msort_domain_with_tmp(b1, n1, t);
52 |   msort_domain_with_tmp(b2, n2, t);
53 | 
54 |   tmp = t;
55 | 
56 |   while(n1 > 0 && n2 > 0)
57 |     {
58 |       if(b1->key <= b2->key)
59 | 	{
60 | 	  --n1;
61 | 	  *tmp++ = *b1++;
62 | 	}
63 |       else
64 | 	{
65 | 	  --n2;
66 | 	  *tmp++ = *b2++;
67 | 	}
68 |     }
69 | 
70 |   if(n1 > 0)
71 |     memcpy(tmp, b1, n1 * sizeof(struct domain_peano_hilbert_data));
72 | 
73 |   memcpy(b, t, (n - n2) * sizeof(struct domain_peano_hilbert_data));
74 | }
75 | 
76 | void mysort_domain(void *b, size_t n, size_t s)
77 | {
78 |   /* this function tends to work slightly faster than a call of qsort() for this particular
79 |    * list, at least on most platforms
80 |    */
81 | 
82 |   const size_t size = n * s;
83 |   struct domain_peano_hilbert_data *tmp;
84 | 
85 |   tmp = (struct domain_peano_hilbert_data *) mymalloc("tmp", size);
86 | 
87 |   msort_domain_with_tmp((struct domain_peano_hilbert_data *) b, n, tmp);
88 | 
89 |   myfree(tmp);
90 | }
91 | 


--------------------------------------------------------------------------------
/src/bulge.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <math.h>
 6 | #include <gsl/gsl_math.h>
 7 | 
 8 | #include "allvars.h"
 9 | #include "proto.h"
10 | 
11 | 
12 | 
13 | 
14 | /* this function returns a new random coordinate for the bulge */
15 | void bulge_get_fresh_coordinate(double *pos)
16 | {
17 |   double r;
18 | 
19 |   do
20 |     {
21 |       double q = gsl_rng_uniform(random_generator);
22 | 
23 |       if(q > 0)
24 |         r = All.Bulge_A * (q + sqrt(q)) / (1 - q);
25 |       else
26 |         r = 0;
27 |     }
28 |   while(r > All.Rmax);
29 | 
30 |   double phi = gsl_rng_uniform(random_generator) * M_PI * 2;
31 |   double theta = acos(gsl_rng_uniform(random_generator) * 2 - 1);
32 | 
33 |   pos[0] = r * sin(theta) * cos(phi);
34 |   pos[1] = r * sin(theta) * sin(phi);
35 |   pos[2] = r * cos(theta) / All.BulgeStretch;
36 | }
37 | 
38 | 
39 | double bulge_get_density(double *pos)
40 | {
41 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
42 | 
43 |   double rho = All.BulgeStretch * All.Bulge_Mass / (2 * M_PI) * All.Bulge_Mass / (r + 1.0e-6 * All.Bulge_A) / pow(r + All.Bulge_A, 3);
44 |   
45 |   if ( fabs(rho) <  MIN_DENSITY) rho = 0;
46 | 	  
47 |   return rho;
48 | }
49 | 
50 | 
51 | /* Note that the other functions below will only be called in a meaningfull for a spherical system */
52 | 
53 | 
54 | double bulge_get_mass_inside_radius(double r)
55 | {
56 |   if(All.Bulge_Mass > 0)
57 |     return All.Bulge_Mass * pow(r / (r + All.Bulge_A), 2);
58 |   else
59 |     return 0;
60 | }
61 | 
62 | 
63 | 
64 | double bulge_get_potential(double *pos)
65 | {
66 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
67 |   return bulge_get_potential_from_radius(r);
68 | }
69 | 
70 | double bulge_get_potential_from_radius(double r)
71 | {
72 |   double phi = -All.G * All.Bulge_Mass / (r + All.Bulge_A);
73 |   return phi;
74 | }
75 | 
76 | /* returns the acceleration at coordinate pos[] */
77 | void bulge_get_acceleration(double *pos, double *acc)
78 | {
79 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
80 |   double fac = All.G * All.Bulge_Mass / ((r + 1.0e-6 * All.Bulge_A)* (r + All.Bulge_A) * (r + All.Bulge_A));
81 | 
82 |   acc[0] = -fac * pos[0];
83 |   acc[1] = -fac * pos[1];
84 |   acc[2] = -fac * pos[2];
85 | }
86 | 
87 | double bulge_get_escape_speed(double *pos)
88 | {
89 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
90 |   double phi = -All.G * All.Bulge_Mass / (r + All.Bulge_A);
91 |   double vesc = sqrt(-2.0 * phi);
92 | 
93 |   return vesc;
94 | }
95 | 


--------------------------------------------------------------------------------
/src/forcetree/forcetree.h:
--------------------------------------------------------------------------------
 1 | #ifndef FORCETREE_H
 2 | #define FORCETREE_H
 3 | 
 4 | #ifndef INLINE_FUNC
 5 | #ifdef INLINE
 6 | #define INLINE_FUNC inline
 7 | #else
 8 | #define INLINE_FUNC
 9 | #endif
10 | #endif
11 | 
12 | 
13 | /*! length of lock-up table for short-range force kernel in TreePM algorithm */
14 | #define NTAB 1000
15 | 
16 | #define MAX_TREE_LEVEL        30
17 | #define MAX_TREE_ALLOC_FACTOR 30.0
18 | #define MAX_IMPACT_BEFORE_OPTIMIZATION 1.03
19 | 
20 | 
21 | #define BITFLAG_TOPLEVEL                    0
22 | #define BITFLAG_DEPENDS_ON_LOCAL_MASS       1
23 | #define BITFLAG_DEPENDS_ON_EXTERN_MASS      2
24 | #define BITFLAG_INTERNAL_TOPLEVEL           6
25 | #define BITFLAG_MULTIPLEPARTICLES           7
26 | #define BITFLAG_NODEHASBEENKICKED           8
27 | #define BITFLAG_CONTAINS_GAS                10
28 | 
29 | 
30 | #define BITFLAG_MASK  ((1<< BITFLAG_CONTAINS_GAS) + (1 << BITFLAG_MULTIPLEPARTICLES))
31 | 
32 | 
33 | static inline unsigned long long force_double_to_int(double d)
34 | {
35 |    union { double d; unsigned long long ull; } u;
36 |    u.d=d;
37 |    return (u.ull&0xFFFFFFFFFFFFFllu); 
38 | }
39 | 
40 | static inline double force_int_to_double(unsigned long long x)
41 | {
42 |    union { double d; unsigned long long ull; } u;
43 |    u.d = 1.0;
44 |    u.ull |= x;
45 |    return u.d;
46 | }
47 | 
48 | int force_treebuild(int npart, int optimized_domain_mapping);
49 | int force_treebuild_construct(int npart, int optimized_domain_mapping);
50 | int force_treebuild_insert_single_point(int i, unsigned long long *intpos, int th, unsigned char level);
51 | int force_create_empty_nodes(int no, int topnode, int bits, int x, int y, int z);
52 | void force_insert_pseudo_particles(void);
53 | #ifndef GPU_TREE
54 | void force_update_node_recursive(int no, int sib, int father, int *last);
55 | #else
56 | int force_update_node_recursive(int no, int sib, int father, int *last, int depth);
57 | #endif
58 | void force_exchange_topleafdata(void);
59 | void force_treeupdate_toplevel(int no, int topnode, int bits, int x, int y, int z);
60 | void force_treeallocate(int maxpart, int maxindex);
61 | void force_treefree(void);
62 | void dump_particles(void);
63 | int force_add_empty_nodes(void);
64 | void force_short_range_init(void);
65 | int force_treeevaluate(int target, int mode, int thread_id);
66 | int force_treeevaluate_shortrange(int target, int mode, int thread_id, int measure_cost_flag);
67 | int force_treeevaluate_ewald_correction(int i, int mode, int thread_id);
68 | int force_treeevaluate_direct(int target, int mode);
69 | void force_assign_cost_values(void);
70 | void force_update_node_recursive_sse(int no, int sib, int father, int *last);
71 | void force_optimize_domain_mapping(void);
72 | double force_get_current_balance(double *impact);
73 | void force_get_global_cost_for_leavenodes(int nexport);
74 | 
75 | 
76 | 
77 | #endif
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <string.h>
 3 | #include <math.h>
 4 | #include <mpi.h>
 5 | #include <gsl/gsl_sf_gamma.h>
 6 | 
 7 | #include "allvars.h"
 8 | #include "proto.h"
 9 | 
10 | 
11 | void init(void)
12 | {
13 |   if(ThisTask == 0)
14 |     {
15 |       char buf[2000];
16 |       sprintf(buf, "%s/memory.txt", All.OutputDir);
17 |       if(!(FdMemory = fopen(buf, "w")))
18 | 	terminate("can't open file '%s'", buf);
19 |     }
20 | 
21 |   mymalloc_init();
22 | 
23 |   set_units();
24 | 
25 |   random_generator = gsl_rng_alloc(gsl_rng_ranlxd1);
26 | 
27 |   gsl_rng_set(random_generator, 42 + ThisTask);	/* start-up seed */
28 | 
29 |   set_softenings();
30 | 
31 |   All.TopNodeAllocFactor = 0.1;
32 |   All.TreeAllocFactor = 0.8;
33 | 
34 |   
35 | #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS
36 |   enable_core_dumps_and_fpu_exceptions();
37 | #endif
38 | }
39 | 
40 | 
41 | /*! \brief Computes conversion factors between internal code units and the
42 |  *  cgs-system.
43 |  *
44 |  *  In addition constants like the gravitation constant are set.
45 |  */
46 | void set_units(void)
47 | {
48 |   All.UnitTime_in_s = All.UnitLength_in_cm / All.UnitVelocity_in_cm_per_s;
49 |   All.UnitTime_in_Megayears = All.UnitTime_in_s / SEC_PER_MEGAYEAR;
50 | 
51 |   if(All.GravityConstantInternal == 0)
52 |     All.G = GRAVITY / pow(All.UnitLength_in_cm, 3) * All.UnitMass_in_g * pow(All.UnitTime_in_s, 2);
53 |   else
54 |     All.G = All.GravityConstantInternal;
55 | 
56 |   All.UnitDensity_in_cgs = All.UnitMass_in_g / pow(All.UnitLength_in_cm, 3);
57 |   All.UnitPressure_in_cgs = All.UnitMass_in_g / All.UnitLength_in_cm / pow(All.UnitTime_in_s, 2);
58 |   All.UnitCoolingRate_in_cgs = All.UnitPressure_in_cgs / All.UnitTime_in_s;
59 |   All.UnitEnergy_in_cgs = All.UnitMass_in_g * pow(All.UnitLength_in_cm, 2) / pow(All.UnitTime_in_s, 2);
60 | 
61 |   /* convert some physical input parameters to internal units */
62 | 
63 |   All.Hubble = HUBBLE * All.UnitTime_in_s;
64 | 
65 |   if(ThisTask == 0)
66 |     {
67 |       printf("\nHubble (internal units)  = %g\n", All.Hubble);
68 |       printf("G (internal units)       = %g\n", All.G);
69 |       printf("UnitMass_in_g            = %g\n", All.UnitMass_in_g);
70 |       printf("UnitTime_in_s            = %g\n", All.UnitTime_in_s);
71 |       printf("UnitVelocity_in_cm_per_s = %g\n", All.UnitVelocity_in_cm_per_s);
72 |       printf("UnitDensity_in_cgs       = %g\n", All.UnitDensity_in_cgs);
73 |       printf("UnitEnergy_in_cgs        = %g\n", All.UnitEnergy_in_cgs);
74 |       printf("\n");
75 |     }
76 | }
77 | 
78 | void set_softenings(void)
79 | {
80 |   int i;
81 | 
82 |   for(i = 0; i < 6; i++)
83 |     All.ForceSoftening = 2.8 * All.Softening;
84 | }
85 | 
86 | 
87 | void endrun(void)
88 | {
89 |   mpi_printf("endrun called, calling MPI_Finalize()\nbye!\n\n");
90 |   fflush(stdout);
91 | 
92 |   MPI_Finalize();
93 |   exit(0);
94 | }
95 | 


--------------------------------------------------------------------------------
/src/allocate.c:
--------------------------------------------------------------------------------
 1 | #include <mpi.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <math.h>
 6 | 
 7 | #include "allvars.h"
 8 | #include "proto.h"
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 
16 | /* This routine allocates memory for
17 |  * particle storage, both the collisionless and the SPH particles.
18 |  * The memory for the ordered binary tree of the timeline
19 |  * is also allocated.
20 |  */
21 | void allocate_memory(void)
22 | {
23 |   int NTaskTimesThreads;
24 | 
25 |   NTaskTimesThreads = MaxThreads * NTask;
26 | 
27 |   Exportflag = (int *) mymalloc("Exportflag", NTaskTimesThreads * sizeof(int));
28 |   Exportindex = (int *) mymalloc("Exportindex", NTaskTimesThreads * sizeof(int));
29 |   Exportnodecount = (int *) mymalloc("Exportnodecount", NTaskTimesThreads * sizeof(int));
30 | 
31 |   Send_count = (int *) mymalloc("Send_count", sizeof(int) * NTaskTimesThreads);
32 |   Send_offset = (int *) mymalloc("Send_offset", sizeof(int) * NTaskTimesThreads);
33 |   Recv_count = (int *) mymalloc("Recv_count", sizeof(int) * NTask);
34 |   Recv_offset = (int *) mymalloc("Recv_offset", sizeof(int) * NTask);
35 | 
36 |   Send_count_nodes = (int *) mymalloc("Send_count_nodes", sizeof(int) * NTask);
37 |   Send_offset_nodes = (int *) mymalloc("Send_offset_nodes", sizeof(int) * NTask);
38 |   Recv_count_nodes = (int *) mymalloc("Recv_count_nodes", sizeof(int) * NTask);
39 |   Recv_offset_nodes = (int *) mymalloc("Recv_offset_nodes", sizeof(int) * NTask);
40 | 
41 |   Mesh_Send_count = (int *) mymalloc("Mesh_Send_count", sizeof(int) * NTask);
42 |   Mesh_Send_offset = (int *) mymalloc("Mesh_Send_offset", sizeof(int) * NTask);
43 |   Mesh_Recv_count = (int *) mymalloc("Mesh_Recv_count", sizeof(int) * NTask);
44 |   Mesh_Recv_offset = (int *) mymalloc("Mesh_Recv_offset", sizeof(int) * NTask);
45 | 
46 |   P = (struct particle_data *) mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data));
47 | 
48 |   ActiveGravityParticles = (int *) mymalloc_movable(&ActiveGravityParticles, "ActiveGravityParticle", All.MaxPart * sizeof(int));
49 | 
50 |   /* set to zero */
51 |   memset(P, 0, All.MaxPart * sizeof(struct particle_data));
52 | }
53 | 
54 | void free_allocated_memory(void)
55 | {
56 |   myfree(ActiveGravityParticles);
57 |   myfree(P);
58 | 
59 |   myfree(Mesh_Recv_offset);
60 |   myfree(Mesh_Recv_count);
61 |   myfree(Mesh_Send_offset);
62 |   myfree(Mesh_Send_count);
63 | 
64 |   myfree(Recv_offset_nodes);
65 |   myfree(Recv_count_nodes);
66 |   myfree(Send_offset_nodes);
67 |   myfree(Send_count_nodes);
68 | 
69 |   myfree(Recv_offset);
70 |   myfree(Recv_count);
71 |   myfree(Send_offset);
72 |   myfree(Send_count);
73 | 
74 |   myfree(Exportnodecount);
75 |   myfree(Exportindex);
76 |   myfree(Exportflag);
77 | }
78 | 
79 | 
80 | void reallocate_memory_maxpart(void)
81 | {
82 |   mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart);
83 | 
84 |   P = (struct particle_data *) myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data));
85 |   ActiveGravityParticles = (int *) myrealloc_movable(ActiveGravityParticles,  All.MaxPart * sizeof(int));
86 | 
87 | }
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/Makefile.lib:
--------------------------------------------------------------------------------
 1 | 
 2 | LIB_DIR=libs
 3 | 
 4 | FFTW_VERSION=2.1.5
 5 | GSL_VERSION=1.15
 6 | GMP_VERSION=5.0.5
 7 | HDF5_VERSION=1.8.11
 8 | HWLOC_VERSION=1.4.2
 9 | 
10 | build_libs: gmp gsl fftw-single fftw-double hdf5 hwloc
11 | 
12 | 
13 | 
14 | SHELL=/bin/bash
15 | 
16 | fftw-single: $(LIB_DIR)/include/sfftw.h
17 | 
18 | fftw-double: $(LIB_DIR)/include/dfftw.h
19 | 
20 | $(LIB_DIR):
21 | 	mkdir $(LIB_DIR)
22 | 
23 | $(LIB_DIR)/fftw-$(FFTW_VERSION).tar.gz: | $(LIB_DIR)
24 | 	cd $(LIB_DIR); wget http://www.fftw.org/fftw-$(FFTW_VERSION).tar.gz
25 | 	
26 | $(LIB_DIR)/fftw-$(FFTW_VERSION)/: $(LIB_DIR)/fftw-$(FFTW_VERSION).tar.gz
27 | 	cd $(LIB_DIR); tar -xf fftw-$(FFTW_VERSION).tar.gz
28 | 		
29 | $(LIB_DIR)/include/sfftw.h: | $(LIB_DIR)/fftw-$(FFTW_VERSION)/
30 | 	cd $(LIB_DIR)/fftw-$(FFTW_VERSION); export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/ --enable-mpi --enable-float --enable-type-prefix --enable-shared; make; make install
31 | 	
32 | $(LIB_DIR)/include/dfftw.h: | $(LIB_DIR)/fftw-$(FFTW_VERSION)/	
33 | 	cd $(LIB_DIR)/fftw-$(FFTW_VERSION); export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/ --enable-mpi --enable-type-prefix --enable-shared; make; make install
34 | 
35 | gsl: $(LIB_DIR)/include/gsl/
36 | 
37 | $(LIB_DIR)/gsl-$(GSL_VERSION).tar.gz: | $(LIB_DIR)
38 | 	cd $(LIB_DIR); wget  http://ftpmirror.gnu.org/gsl/gsl-$(GSL_VERSION).tar.gz
39 | 		
40 | $(LIB_DIR)/gsl-$(GSL_VERSION)/: $(LIB_DIR)/gsl-$(GSL_VERSION).tar.gz
41 | 	cd $(LIB_DIR); tar -xf gsl-$(GSL_VERSION).tar.gz
42 | 	
43 | $(LIB_DIR)/include/gsl/: | $(LIB_DIR)/gsl-$(GSL_VERSION)/
44 | 	cd $(LIB_DIR)/gsl-$(GSL_VERSION);export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/; make; make install
45 | 	
46 | 	
47 | hwloc: $(LIB_DIR)/include/hwloc.h
48 | 
49 | $(LIB_DIR)/hwloc-$(HWLOC_VERSION).tar.gz: | $(LIB_DIR)
50 | 	cd $(LIB_DIR); wget  http://www.open-mpi.de/software/hwloc/v1.4/downloads/hwloc-$(HWLOC_VERSION).tar.gz
51 | 
52 | $(LIB_DIR)/hwloc-$(HWLOC_VERSION)/:	$(LIB_DIR)/hwloc-$(HWLOC_VERSION).tar.gz
53 | 	cd $(LIB_DIR); tar -xf hwloc-$(HWLOC_VERSION).tar.gz
54 | 	
55 | $(LIB_DIR)/include/hwloc.h: | $(LIB_DIR)/hwloc-$(HWLOC_VERSION)/
56 | 	cd $(LIB_DIR)/hwloc-$(HWLOC_VERSION); ./configure --prefix=$(CURDIR)/$(LIB_DIR)/; make; make install
57 | 	
58 | gmp: $(LIB_DIR)/include/gmp.h
59 | 
60 | $(LIB_DIR)/gmp-$(GMP_VERSION).tar.bz2: | $(LIB_DIR)
61 | 	cd $(LIB_DIR); wget  http://ftpmirror.gnu.org/gmp/gmp-$(GMP_VERSION).tar.bz2
62 | 
63 | $(LIB_DIR)/gmp-$(GMP_VERSION)/:	$(LIB_DIR)/gmp-$(GMP_VERSION).tar.bz2
64 | 	cd $(LIB_DIR); tar -xf gmp-$(GMP_VERSION).tar.bz2
65 | 
66 | $(LIB_DIR)/include/gmp.h: | $(LIB_DIR)/gmp-$(GMP_VERSION)/
67 | 	cd $(LIB_DIR)/gmp-$(GMP_VERSION); ./configure --prefix=$(CURDIR)/$(LIB_DIR)/; make; make install
68 | 	
69 | 	
70 | hdf5: $(LIB_DIR)/include/hdf5.h
71 | 
72 | $(LIB_DIR)/hdf5-$(HDF5_VERSION).tar.gz: | $(LIB_DIR)
73 | 	cd $(LIB_DIR); wget ftp://ftp.hdfgroup.org/HDF5/current/src/hdf5-$(HDF5_VERSION).tar.gz
74 | 	
75 | $(LIB_DIR)/hdf5-$(HDF5_VERSION): $(LIB_DIR)/hdf5-$(HDF5_VERSION).tar.gz
76 | 	cd $(LIB_DIR); tar -xf hdf5-$(HDF5_VERSION).tar.gz
77 | 	
78 | $(LIB_DIR)/include/hdf5.h: | $(LIB_DIR)/hdf5-$(HDF5_VERSION)
79 | 	/bin/bash -c 'cd $(LIB_DIR)/hdf5-$(HDF5_VERSION); export LD_LIBRARY_PATH=$(LIB_DIR)/lib; ./configure --prefix=$(CURDIR)/$(LIB_DIR)/ --enable-parallel; make; make install'
80 | 
81 | 
82 | 
83 | 	
84 | .PHONY= gmp gsl fftw-single fftw-double hdf5 hwloc
85 | 


--------------------------------------------------------------------------------
/Makefile.template:
--------------------------------------------------------------------------------
  1 | EXEC   = GalIC
  2 | CONFIG   = Config.sh
  3 | BUILD_DIR = build
  4 | SRC_DIR = src
  5 | 
  6 | #PARAMFILE = Model_D3.param
  7 | #N := 16
  8 | 
  9 | 
 10 | ifdef SYSTYPE
 11 | SYSTYPE := "$(SYSTYPE)"
 12 | -include Makefile.systype
 13 | else
 14 | include Makefile.systype
 15 | endif
 16 | 
 17 | MAKEFILES = Makefile config-makefile
 18 | ifeq ($(wildcard Makefile.systype), Makefile.systype)
 19 | MAKEFILES += Makefile.systype
 20 | endif
 21 | 
 22 | 
 23 | 
 24 | PERL	 = /usr/bin/perl
 25 | RESULT     := $(shell CONFIG=$(CONFIG) PERL=$(PERL) BUILD_DIR=$(BUILD_DIR) make -f config-makefile)
 26 | CONFIGVARS := $(shell cat $(BUILD_DIR)/galicconfig.h)
 27 | 
 28 | 
 29 | 
 30 | #MPICHLIB = -lmpich
 31 | GMPLIB   = -lgmp
 32 | GSLLIB   = -lgsl -lgslcblas
 33 | MATHLIB  = -lm
 34 | 
 35 | 
 36 | 
 37 | ###############################
 38 | # Determine your SYSTEM here #
 39 | ###############################
 40 | 
 41 | ifeq ($(SYSTYPE),"APHI") 
 42 | CC       =   mpicc
 43 | CXX      =   mpicxx
 44 | #OPTIMIZE =   -g -w -m64 -O3 -msse3
 45 | OPTIMIZE =   -g -w -m64 -O3 -march=native
 46 | ifeq (NUM_THREADS,$(findstring NUM_THREADS,$(CONFIGVARS)))
 47 | OPTIMIZE +=  -fopenmp
 48 | else
 49 | OPTIMIZE +=  -Wno-unknown-pragmas
 50 | endif
 51 | GSL_INCL =  
 52 | GSL_LIBS =  
 53 | FFTW_INCL=  
 54 | FFTW_LIBS=  
 55 | GMP_INCL =  
 56 | GMP_LIBS =  
 57 | MPICHLIB =
 58 | HDF5INCL = 
 59 | HDF5LIB  = 
 60 | #OPT      +=  -DNOCALLSOFSYSTEM
 61 | #OPT      +=  -DIMPOSE_PINNING
 62 | #OPT      +=  -DUSE_SSE
 63 | endif
 64 | 
 65 | 
 66 | ifndef LINKER
 67 | LINKER = $(CC)
 68 | endif
 69 | 
 70 | 
 71 | 
 72 | ##########################################
 73 | #determine the needed object/header files#
 74 | ##########################################
 75 | 
 76 | SUBDIRS = . 
 77 | 
 78 | OBJS =   main.o allocate.o  allvars.o  disk.o   grid.o  bulge.o  set_particles.o parallel_sort.o \
 79 | 	     halo.o  init.o  io.o  mymalloc.o  orbit_response.o  parameters.o  structure.o  system.o  disp_fields.o \
 80 | 	     forcetree/gravtree.o forcetree/forcetree.o forcetree/forcetree_walk.o domain/peano.o domain/pqueue.o \
 81 | 	     domain/domain.o domain/domain_balance.o domain/domain_counttogo.o  domain/domain_exchange.o \
 82 | 	     domain/domain_rearrange.o domain/domain_sort_kernels.o domain/domain_toplevel.o domain/domain_vars.o domain/domain_box.o
 83 | 
 84 | 
 85 | INCL += allvars.h proto.h
 86 | 
 87 | SUBDIRS += forcetree domain
 88 | 
 89 | ################################
 90 | #determine the needed libraries#
 91 | ################################
 92 | 
 93 | 
 94 | ifneq (HAVE_HDF5,$(findstring HAVE_HDF5,$(CONFIGVARS)))
 95 | HDF5LIB  = 
 96 | endif
 97 | 
 98 | ifeq (NUM_THREADS,$(findstring NUM_THREADS,$(CONFIGVARS)))
 99 | THREAD_LIB = 
100 | endif
101 | 
102 | 
103 | ##########################
104 | #combine compiler options#
105 | ##########################
106 | 
107 | CFLAGS = $(OPTIMIZE) $(OPT) $(HDF5INCL) $(GSL_INCL) $(FFTW_INCL) $(ODE_INCL) $(GMP_INCL) $(MKL_INCL) $(CUDA_INCL) -I$(BUILD_DIR)
108 | 
109 | LIBS = $(MATHLIB) $(HDF5LIB) $(MPICHLIB) $(GSL_LIBS) $(GSLLIB) $(FFTW_LIB) $(GMP_LIBS) $(GMPLIB) $(ODE_LIB) $(MKL_LIBS) $(THREAD_LIB) $(CUDA_LIBS)
110 | 
111 | 
112 | SUBDIRS := $(addprefix $(BUILD_DIR)/,$(SUBDIRS))
113 | OBJS := $(addprefix $(BUILD_DIR)/,$(OBJS)) $(BUILD_DIR)/compile_time_info.o
114 | INCL := $(addprefix $(SRC_DIR)/,$(INCL)) $(BUILD_DIR)/galicconfig.h
115 | 
116 | 
117 | ################
118 | #create subdirs#
119 | ################
120 | RESULT := $(shell mkdir -p $(SUBDIRS)  )
121 | 
122 | 
123 | 
124 | #############
125 | #build rules#
126 | #############
127 | 
128 | all: $(EXEC)
129 | 
130 | $(EXEC): $(OBJS)
131 | 	$(LINKER) $(OPTIMIZE) $(OBJS) $(LIBS) -o $(EXEC)
132 | #	mpirun -n $(N) -f hostfile ./$(EXEC) $(PARAMFILE)
133 | 
134 | #bg: $(OBJS)
135 | #	$(LINKER) $(OPTIMIZE) $(OBJS) $(LIBS) -o $(EXEC)
136 | #	mpirun -n $(N) -f hostfile ./$(EXEC) $(PARAMFILE)  1> log.out.txt 2> log.err.txt &
137 | 
138 | clean:
139 | 	rm -f $(OBJS) $(EXEC) lib$(LIBRARY).a
140 | 	rm -f $(BUILD_DIR)/compile_time_info.c $(BUILD_DIR)/galicconfig.h
141 | 
142 | $(BUILD_DIR)/%.o: $(SRC_DIR)/%.c $(INCL) $(MAKEFILES)
143 | 	$(CC) $(CFLAGS) -c $< -o $@
144 | 
145 | $(BUILD_DIR)/compile_time_info.o: $(BUILD_DIR)/compile_time_info.c $(MAKEFILES)
146 | 	$(CC) $(CFLAGS) -c $< -o $@
147 | 
148 | 


--------------------------------------------------------------------------------
/src/domain/pqueue.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Volkan Yazıcı <volkan.yazici@gmail.com>
  3 |  * Copyright 2006-2010 The Apache Software Foundation
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6 |  * use this file except in compliance with the License. You may obtain a copy of
  7 |  * the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 13 |  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 14 |  * License for the specific language governing permissions and limitations under
 15 |  * the License.
 16 |  */
 17 | 
 18 | 
 19 | /**
 20 |  * @file  pqueue.h
 21 |  * @brief Priority Queue function declarations
 22 |  *
 23 |  * @{
 24 |  */
 25 | 
 26 | 
 27 | #ifndef PQUEUE_H
 28 | #define PQUEUE_H
 29 | 
 30 | /** priority data type */
 31 | typedef double pqueue_pri_t;
 32 | 
 33 | /** callback functions to get/set/compare the priority of an element */
 34 | typedef pqueue_pri_t (*pqueue_get_pri_f)(void *a);
 35 | typedef void (*pqueue_set_pri_f)(void *a, pqueue_pri_t pri);
 36 | typedef int (*pqueue_cmp_pri_f)(pqueue_pri_t next, pqueue_pri_t curr);
 37 | 
 38 | 
 39 | /** callback functions to get/set the position of an element */
 40 | typedef size_t (*pqueue_get_pos_f)(void *a);
 41 | typedef void (*pqueue_set_pos_f)(void *a, size_t pos);
 42 | 
 43 | 
 44 | /** debug callback function to print a entry */
 45 | typedef void (*pqueue_print_entry_f)(FILE *out, void *a);
 46 | 
 47 | 
 48 | /** the priority queue handle */
 49 | typedef struct pqueue_t
 50 | {
 51 |     size_t size;
 52 |     size_t avail;
 53 |     size_t step;
 54 |     pqueue_cmp_pri_f cmppri;
 55 |     pqueue_get_pri_f getpri;
 56 |     pqueue_set_pri_f setpri;
 57 |     pqueue_get_pos_f getpos;
 58 |     pqueue_set_pos_f setpos;
 59 |     void **d;
 60 | } pqueue_t;
 61 | 
 62 | 
 63 | /**
 64 |  * initialize the queue
 65 |  *
 66 |  * @param n the initial estimate of the number of queue items for which memory
 67 |  *          should be preallocated
 68 |  * @param pri the callback function to run to assign a score to a element
 69 |  * @param get the callback function to get the current element's position
 70 |  * @param set the callback function to set the current element's position
 71 |  *
 72 |  * @Return the handle or NULL for insufficent memory
 73 |  */
 74 | pqueue_t *
 75 | pqueue_init(size_t n,
 76 |             pqueue_cmp_pri_f cmppri,
 77 |             pqueue_get_pri_f getpri,
 78 |             pqueue_set_pri_f setpri,
 79 |             pqueue_get_pos_f getpos,
 80 |             pqueue_set_pos_f setpos);
 81 | 
 82 | 
 83 | /**
 84 |  * free all memory used by the queue
 85 |  * @param q the queue
 86 |  */
 87 | void pqueue_free(pqueue_t *q);
 88 | 
 89 | 
 90 | /**
 91 |  * return the size of the queue.
 92 |  * @param q the queue
 93 |  */
 94 | size_t pqueue_size(pqueue_t *q);
 95 | 
 96 | 
 97 | /**
 98 |  * insert an item into the queue.
 99 |  * @param q the queue
100 |  * @param d the item
101 |  * @return 0 on success
102 |  */
103 | int pqueue_insert(pqueue_t *q, void *d);
104 | 
105 | 
106 | /**
107 |  * move an existing entry to a different priority
108 |  * @param q the queue
109 |  * @param old the old priority
110 |  * @param d the entry
111 |  */
112 | void
113 | pqueue_change_priority(pqueue_t *q,
114 |                        pqueue_pri_t new_pri,
115 |                        void *d);
116 | 
117 | 
118 | /**
119 |  * pop the highest-ranking item from the queue.
120 |  * @param p the queue
121 |  * @param d where to copy the entry to
122 |  * @return NULL on error, otherwise the entry
123 |  */
124 | void *pqueue_pop(pqueue_t *q);
125 | 
126 | 
127 | /**
128 |  * remove an item from the queue.
129 |  * @param p the queue
130 |  * @param d the entry
131 |  * @return 0 on success
132 |  */
133 | int pqueue_remove(pqueue_t *q, void *d);
134 | 
135 | 
136 | /**
137 |  * access highest-ranking item without removing it.
138 |  * @param q the queue
139 |  * @param d the entry
140 |  * @return NULL on error, otherwise the entry
141 |  */
142 | void *pqueue_peek(pqueue_t *q);
143 | 
144 | 
145 | /**
146 |  * print the queue
147 |  * @internal
148 |  * DEBUG function only
149 |  * @param q the queue
150 |  * @param out the output handle
151 |  * @param the callback function to print the entry
152 |  */
153 | void
154 | pqueue_print(pqueue_t *q,
155 |              FILE *out,
156 |              pqueue_print_entry_f print);
157 | 
158 | 
159 | /**
160 |  * dump the queue and it's internal structure
161 |  * @internal
162 |  * debug function only
163 |  * @param q the queue
164 |  * @param out the output handle
165 |  * @param the callback function to print the entry
166 |  */
167 | void
168 | pqueueu_dump(pqueue_t *q,
169 |              FILE *out,
170 |              pqueue_print_entry_f print);
171 | 
172 | 
173 | /**
174 |  * checks that the pq is in the right order, etc
175 |  * @internal
176 |  * debug function only
177 |  * @param q the queue
178 |  */
179 | int pqueue_is_valid(pqueue_t *q);
180 | 
181 | 
182 | #endif /* PQUEUE_H */
183 | /** @} */
184 | 


--------------------------------------------------------------------------------
/src/structure.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <gsl/gsl_math.h>
  7 | #include <gsl/gsl_math.h>
  8 | #include <gsl/gsl_sf_bessel.h>
  9 | 
 10 | #include "allvars.h"
 11 | #include "proto.h"
 12 | 
 13 | 
 14 | 
 15 | static double fc(double c)
 16 | {
 17 |   return c * (0.5 - 0.5 / pow(1 + c, 2) - log(1 + c) / (1 + c)) / pow(log(1 + c) - c / (1 + c), 2);
 18 | }
 19 | 
 20 | static double jdisk_int(double x, void *param)
 21 | {
 22 |   double vc2, Sigma0, vc, y;
 23 | 
 24 |   if(x > 1.0e-10 * All.Halo_A)
 25 |     vc2 = All.G * (halo_get_mass_inside_radius(x) + bulge_get_mass_inside_radius(x)) / x;
 26 |   else
 27 |     vc2 = 0;
 28 | 
 29 |   if(vc2 < 0)
 30 |     terminate("vc2 < 0");
 31 | 
 32 |   Sigma0 = All.Disk_Mass / (2 * M_PI * All.Disk_H * All.Disk_H);
 33 |   y = x / (2 * All.Disk_H);
 34 | 
 35 |   if(y > 1e-4)
 36 |     vc2 +=
 37 |       x * 2 * M_PI * All.G * Sigma0 * y * (gsl_sf_bessel_I0(y) * gsl_sf_bessel_K0(y) -
 38 | 					   gsl_sf_bessel_I1(y) * gsl_sf_bessel_K1(y));
 39 | 
 40 |   vc = sqrt(vc2);
 41 | 
 42 |   return pow(x / All.Disk_H, 2) * vc * exp(-x / All.Disk_H);
 43 | }
 44 | 
 45 | 
 46 | static double gc_int(double x, void *param)
 47 | {
 48 |   return pow(log(1 + x) - x / (1 + x), 0.5) * pow(x, 1.5) / pow(1 + x, 2);
 49 | }
 50 | 
 51 | 
 52 | 
 53 | 
 54 | void structure_determination(void)
 55 | {
 56 |   double jhalo, jdisk, jd;
 57 |   double hnew, dh;
 58 | 
 59 |   /* total galaxy mass */
 60 |   All.M200 = pow(All.V200, 3) / (10 * All.G * All.Hubble);
 61 | 
 62 |   /* virial radius of galaxy */
 63 |   All.R200 = All.V200 / (10 * All.Hubble);
 64 | 
 65 |   All.LowerDispLimit = pow(0.01 * All.V200, 2);
 66 | 
 67 |   /* halo scale radius */
 68 |   All.Halo_Rs = All.R200 / All.Halo_C;
 69 | 
 70 |   /* determine the masses of all components */
 71 |   All.Disk_Mass = All.MD * All.M200;
 72 |   All.Bulge_Mass = All.MB * All.M200;
 73 | 
 74 |   All.BH_Mass = All.MBH * All.M200;
 75 |   if(All.MBH > 0)
 76 |     All.BH_N = 1;
 77 |   else
 78 |     All.BH_N = 0;
 79 | 
 80 |   All.Halo_Mass = All.M200 - All.Disk_Mass - All.Bulge_Mass - All.BH_Mass;
 81 | 
 82 |   /* set the scale factor of the hernquist halo */
 83 |   All.Halo_A = All.Halo_Rs * sqrt(2 * (log(1 + All.Halo_C) - All.Halo_C / (1 + All.Halo_C)));
 84 | 
 85 | 
 86 |   jhalo = All.Lambda * sqrt(All.G) * pow(All.M200, 1.5) * sqrt(2 * All.R200 / fc(All.Halo_C));
 87 |   jdisk = All.JD * jhalo;
 88 | 
 89 |   double halo_spinfactor =
 90 |     1.5 * All.Lambda * sqrt(2 * All.Halo_C / fc(All.Halo_C)) * pow(log(1 + All.Halo_C) -
 91 | 								   All.Halo_C / (1 + All.Halo_C),
 92 | 								   1.5) / structure_gc(All.Halo_C);
 93 | 
 94 |   mpi_printf("\nStructural parameters:\n");
 95 |   mpi_printf("R200            = %g\n", All.R200);
 96 |   mpi_printf("M200            = %g  (this is the total mass)\n", All.M200);
 97 |   mpi_printf("A (halo)        = %g\n", All.Halo_A);
 98 |   mpi_printf("halo_spinfactor = %g\n", halo_spinfactor);
 99 | 
100 |   /* first guess for disk scale length */
101 |   All.Disk_H = sqrt(2.0) / 2.0 * All.Lambda / fc(All.Halo_C) * All.R200;
102 |   All.Disk_Z0 = All.DiskHeight * All.Disk_H;	/* sets disk thickness */
103 | 
104 |   All.Bulge_A = All.BulgeSize * All.Halo_A;	/* this will be used if no disk is present */
105 | 
106 |   MType[1] = All.Halo_Mass;
107 |   MType[2] = All.Disk_Mass;
108 |   MType[3] = All.Bulge_Mass;
109 | 
110 |   NType[1] = All.Halo_N;
111 |   NType[2] = All.Disk_N;
112 |   NType[3] = All.Bulge_N;
113 | 
114 | 
115 |   if(All.Disk_Mass > 0)
116 |     {
117 |       do
118 | 	{
119 | 	  jd = structure_disk_angmomentum();	/* computes disk momentum */
120 | 
121 | 	  hnew = jdisk / jd * All.Disk_H;
122 | 
123 | 	  dh = hnew - All.Disk_H;
124 | 
125 | 	  if(fabs(dh) > 0.5 * All.Disk_H)
126 | 	    dh = 0.5 * All.Disk_H * dh / fabs(dh);
127 | 	  else
128 | 	    dh = dh * 0.1;
129 | 
130 | 	  All.Disk_H = All.Disk_H + dh;
131 | 
132 | 	  /* mpi_printf("Jd/J=%g   hnew: %g  \n", jd / jhalo, All.Disk_H);
133 | 	   */
134 | 
135 | 	  All.Disk_Z0 = All.DiskHeight * All.Disk_H;	/* sets disk thickness */
136 | 	}
137 |       while(fabs(dh) / All.Disk_H > 1e-5);
138 |     }
139 | 
140 |   mpi_printf("H  (disk)       = %g\n", All.Disk_H);
141 |   mpi_printf("Z0 (disk)       = %g\n", All.Disk_Z0);
142 |   mpi_printf("A (bulge)       = %g\n", All.Bulge_A);
143 | }
144 | 
145 | 
146 | double structure_disk_angmomentum(void)
147 | {
148 |   gsl_function F;
149 |   gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(WORKSIZE);
150 |   F.function = &jdisk_int;
151 | 
152 |   double result, abserr;
153 | 
154 |   gsl_integration_qag(&F, 0, dmin(30 * All.Disk_H, All.R200),
155 | 		      0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr);
156 | 
157 |   result *= All.Disk_Mass;
158 | 
159 |   gsl_integration_workspace_free(workspace);
160 | 
161 |   return result;
162 | }
163 | 
164 | 
165 | double structure_gc(double c)
166 | {
167 |   gsl_function F;
168 |   gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(WORKSIZE);
169 |   F.function = &gc_int;
170 | 
171 |   double result, abserr;
172 | 
173 |   gsl_integration_qag(&F, 0, c, 0, 1.0e-8, WORKSIZE, GSL_INTEG_GAUSS41, workspace, &result, &abserr);
174 | 
175 |   gsl_integration_workspace_free(workspace);
176 | 
177 |   return result;
178 | }
179 | 


--------------------------------------------------------------------------------
/src/domain/domain_exchange.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <strings.h>
  6 | #include <math.h>
  7 | 
  8 | 
  9 | #include "../allvars.h"
 10 | #include "../proto.h"
 11 | #include "domain.h"
 12 | 
 13 | 
 14 | 
 15 | int myMPI_Alltoallv(void *sendbuf, int *sendcounts, int *sdispls, void *recvbuf, int *recvcounts, int *rdispls, int len, MPI_Comm comm)
 16 | {
 17 |   int i, ntask;
 18 |   MPI_Comm_size(comm, &ntask);
 19 | 
 20 |   int *scount = mymalloc("scount", ntask * sizeof(int));
 21 |   int *rcount = mymalloc("rcount", ntask * sizeof(int));
 22 |   int *soff = mymalloc("soff", ntask * sizeof(int));
 23 |   int *roff = mymalloc("roff", ntask * sizeof(int));
 24 | 
 25 |   for(i=0; i < ntask; i++)
 26 |     {
 27 |       scount[i] = sendcounts[i] * len;
 28 |       rcount[i] = recvcounts[i] * len;
 29 |       soff[i] = sdispls[i] * len;
 30 |       roff[i] = rdispls[i] * len;
 31 |     }
 32 | 
 33 |   int ret = MPI_Alltoallv(sendbuf, scount, soff, MPI_BYTE,
 34 |                          recvbuf, rcount,  roff, MPI_BYTE, comm);
 35 | 
 36 |   myfree(roff);
 37 |   myfree(soff);
 38 |   myfree(rcount);
 39 |   myfree(scount);
 40 | 
 41 |   return ret;
 42 | }
 43 | 
 44 | 
 45 | 
 46 | 
 47 | void domain_resize_storage(int count_get, int count_get_sph, int option_flag)
 48 | {
 49 |   int max_load, load = NumPart + count_get;
 50 |   int max_sphload, sphload = NumGas + count_get_sph;
 51 |   MPI_Allreduce(&load, &max_load, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
 52 |   MPI_Allreduce(&sphload, &max_sphload, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
 53 | 
 54 |   if(max_load > (1.0 - ALLOC_TOLERANCE) * All.MaxPart || max_load < (1.0 - 3 * ALLOC_TOLERANCE) * All.MaxPart)
 55 |     {
 56 |       All.MaxPart = max_load / (1.0 - 2 * ALLOC_TOLERANCE);
 57 | 
 58 |       mpi_printf("ALLOCATE: Changing to MaxPart = %d\n", All.MaxPart);
 59 | 
 60 |       P = (struct particle_data *) myrealloc_movable(P, All.MaxPart * sizeof(struct particle_data));
 61 | 
 62 |       if(option_flag == 1)
 63 | 	Key = (peanokey *) myrealloc_movable(Key, sizeof(peanokey) * All.MaxPart);
 64 |     }
 65 | }
 66 | 
 67 | 
 68 | 
 69 | 
 70 | void domain_exchange(void)
 71 | {
 72 |   double t0 = second();
 73 | 
 74 |   int count_togo = 0, count_get = 0;
 75 |   int *count, *offset;
 76 |   int *count_recv, *offset_recv;
 77 |   int i, n, no, target;
 78 |   struct particle_data *partBuf;
 79 | 
 80 |   peanokey *keyBuf;
 81 | 
 82 |   long long sumtogo = 0;
 83 | 
 84 |   for(i = 0; i < NTask; i++)
 85 |     sumtogo += toGo[i];
 86 | 
 87 |   sumup_longs(1, &sumtogo, &sumtogo);
 88 | 
 89 |   mpi_printf("DOMAIN: exchange of %lld particles\n", sumtogo);
 90 | 
 91 |   count = (int *) mymalloc_movable(&count, "count", NTask * sizeof(int));
 92 |   offset = (int *) mymalloc_movable(&offset, "offset", NTask * sizeof(int));
 93 |   count_recv = (int *) mymalloc_movable(&count_recv, "count_recv", NTask * sizeof(int));
 94 |   offset_recv = (int *) mymalloc_movable(&offset_recv, "offset_recv", NTask * sizeof(int));
 95 | 
 96 | 
 97 |   offset[0] = 0;
 98 |   for(i = 1; i < NTask; i++)
 99 |     offset[i] = offset[i - 1] + toGo[i - 1];
100 | 
101 |   for(i = 0; i < NTask; i++)
102 |     {
103 |       count_togo += toGo[i];
104 |       count_get += toGet[i];
105 |     }
106 | 
107 | 
108 |   partBuf = (struct particle_data *) mymalloc_movable(&partBuf, "partBuf", count_togo * sizeof(struct particle_data));
109 | 
110 |   keyBuf = (peanokey *) mymalloc_movable(&keyBuf, "keyBuf", count_togo * sizeof(peanokey));
111 | 
112 | 
113 |   for(i = 0; i < NTask; i++)
114 |     count[i] = 0;
115 | 
116 |   for(n = 0; n < NumPart; n++)
117 |     {
118 |       no = 0;
119 | 
120 |       while(topNodes[no].Daughter >= 0)
121 |         no = topNodes[no].Daughter + (Key[n] - topNodes[no].StartKey) / (topNodes[no].Size / 8);
122 | 
123 |       no = topNodes[no].Leaf;
124 | 
125 |       target = DomainTask[no];
126 | 
127 |       if(target != ThisTask)
128 | 	{
129 | 	  partBuf[offset[target] + count[target]] = P[n];
130 | 	  keyBuf[offset[target] + count[target]] = Key[n];
131 | 	  count[target]++;
132 | 
133 | 	  P[n] = P[NumPart - 1];
134 | 	  Key[n] = Key[NumPart - 1];
135 | 	  NumPart--;
136 | 	  n--;
137 | 	}
138 |     }
139 | 
140 | 
141 |   /**** now resize the storage for the P[] and SphP[] arrays if needed ****/
142 |   domain_resize_storage(count_get, 0, 1);
143 | 
144 |   /*****  space has been created, now can do the actual exchange *****/
145 | 
146 | 
147 |   for(i = 0; i < NTask; i++)
148 |     count_recv[i] = toGet[i];
149 |  
150 |   offset_recv[0] = NumPart;
151 | 
152 |   for(i = 1; i < NTask; i++)
153 |     offset_recv[i] = offset_recv[i - 1] + count_recv[i - 1];
154 | 
155 |   myMPI_Alltoallv(partBuf, count, offset,
156 |                   P, count_recv, offset_recv,
157 |                   sizeof(struct particle_data), MPI_COMM_WORLD);
158 | 
159 |   myMPI_Alltoallv(keyBuf, count, offset,
160 |                   Key, count_recv, offset_recv,
161 |                   sizeof(peanokey), MPI_COMM_WORLD);
162 | 
163 | 
164 |   NumPart += count_get;
165 | 
166 | 
167 |   myfree(keyBuf);
168 |   myfree(partBuf);
169 |   myfree(offset_recv);
170 |   myfree(count_recv);
171 |   myfree(offset);
172 |   myfree(count);
173 | 
174 |   double t1 = second();
175 | 
176 |   mpi_printf("DOMAIN: particle exchange done. (took %g sec)\n", timediff(t0, t1));
177 | }
178 | 


--------------------------------------------------------------------------------
/src/domain/domain.h:
--------------------------------------------------------------------------------
  1 | #ifndef ALLVARS_H
  2 | #include "../allvars.h"
  3 | #endif
  4 | #ifndef DOMAIN_H
  5 | #define DOMAIN_H
  6 | 
  7 | 
  8 | extern struct local_topnode_data
  9 | {
 10 |   peanokey Size;		/*!< number of Peano-Hilbert mesh-cells represented by top-level node */
 11 |   peanokey StartKey;		/*!< first Peano-Hilbert key in top-level node */
 12 |   long long Count;		/*!< counts the number of particles in this top-level node */
 13 |   int Daughter;			/*!< index of first daughter cell (out of 8) of top-level node */
 14 |   int Leaf;			/*!< if the node is a leaf, this gives its number when all leaves are traversed in Peano-Hilbert order */
 15 |   int Parent;
 16 |   int PIndex;			/*!< first particle in node */
 17 | }
 18 |  *topNodes, *branchNodes;	/*!< points to the root node of the top-level tree */
 19 | 
 20 |  struct domain_count_data
 21 |  {
 22 |    int task;
 23 |    int count;
 24 |    int origintask;
 25 |  };
 26 | 
 27 | 
 28 | 
 29 | extern struct domain_peano_hilbert_data
 30 |  {
 31 |    peanokey key;
 32 |    int index;
 33 |  }
 34 |   *mp;
 35 | 
 36 | 
 37 | 
 38 | extern struct trans_data
 39 | {
 40 |   MyIDType ID;
 41 |   int new_task;
 42 |   int new_index;
 43 |   int wrapped;
 44 | }
 45 |  *trans_table;
 46 | 
 47 | extern int N_trans;
 48 | 
 49 | extern int Nbranch;
 50 | 
 51 | extern double fac_load;
 52 | 
 53 | 
 54 | extern double totpartcount;
 55 | 
 56 | extern struct domain_cost_data
 57 | {
 58 |   int   no;
 59 |   int   Count;        /*!< a table that gives the total number of particles held by each processor */
 60 | }
 61 |  *DomainLeaveNode;
 62 | 
 63 | 
 64 | 
 65 | /*! toGo[partner] gives the number of particles on the current task that have to go to task 'partner'
 66 |  */
 67 | extern int *toGo;
 68 | extern int *toGet;
 69 | extern int *list_NumPart;
 70 | extern int *list_load;
 71 | 
 72 | 
 73 | 
 74 | 
 75 | int domain_check_for_local_refine_new(int i, MPI_Comm current_comm);
 76 | int domain_double_to_int(double d);
 77 | double domain_grav_tot_costfactor(int i);
 78 | double domain_hydro_tot_costfactor(int i);
 79 | void domain_init_sum_cost(void);
 80 | void domain_printf(char *buf);
 81 | void domain_report_balance(void);
 82 | int domain_sort_load(const void *a, const void *b);
 83 | int domain_compare_count(const void *a, const void *b);
 84 | int domain_sort_task(const void *a, const void *b);
 85 | void domain_post_checks(void);
 86 | void domain_prechecks(void);
 87 | void domain_insertnode(struct local_topnode_data *treeA, struct local_topnode_data *treeB, int noA, int noB);
 88 | void domain_add_cost(struct local_topnode_data *treeA, int noA, long long count, double cost, double sphcost);
 89 | int domain_compare_count(const void *a, const void *b);
 90 | void domain_rearrange_particle_sequence(void);
 91 | void domain_combine_topleaves_to_domains(int ncpu, int ndomain);
 92 | void domain_findSplit_load_balanced(int ncpu, int ndomain);
 93 | int domain_sort_loadorigin(const void *a, const void *b);
 94 | int domain_sort_segments(const void *a, const void *b);
 95 | void domain_combine_multipledomains(void);
 96 | void domain_allocate(void);
 97 | void domain_Decomposition(void);
 98 | int domain_check_memory_bound(void);
 99 | int domain_compare_key(const void *a, const void *b);
100 | int domain_compare_key(const void *a, const void *b);
101 | int domain_compare_toplist(const void *a, const void *b);
102 | double domain_particle_costfactor(int i);
103 | int domain_countToGo(void);
104 | int domain_decompose(void);
105 | int domain_determineTopTree(void);
106 | void domain_exchange(void);
107 | void domain_findExchangeNumbers(int task, int partner, int sphflag, int *send, int *recv);
108 | void domain_findExtent(void);
109 | void domain_findSplit(int cpustart, int ncpu, int first, int last);
110 | void domain_findSplit_balanced(int cpustart, int ncpu, int first, int last);
111 | void domain_free(void);
112 | void domain_shiftSplit(void);
113 | void domain_sumCost(void);
114 | int domain_topsplit(int node, peanokey startkey);
115 | int domain_topsplit_local(int node, peanokey startkey, int mode);
116 | int domain_topsplit_special(void);
117 | int domain_compare_key(const void *a, const void *b);
118 | int domain_check_for_local_refine(int i, MPI_Comm comm, double work);
119 | void domain_free_trick(void);
120 | void domain_allocate_trick(void);
121 | int domain_recursively_combine_topTree(int start, int ncpu);
122 | void domain_walktoptree(int no);
123 | void domain_optimize_domain_to_task_mapping(void);
124 | int domain_compare_count(const void *a, const void *b);
125 | void domain_allocate_lists(void);
126 | void domain_free_lists(void);
127 | void domain_pack_tree_branch(int no, int parent);
128 | int domain_unpack_tree_branch(int no, int parent);
129 | int domain_check_for_local_refine_alt(int i, int *current_taskset);
130 | int domain_reduce_error_flag(int flag, int *current_taskset);
131 | int domain_do_local_refine(int n, int **list);
132 | void domain_preserve_relevant_topnode_data(void);
133 | void domain_find_total_cost(void);
134 | void domain_voronoi_dynamic_update_execute(void);
135 | void domain_prepare_voronoi_dynamic_update(void);
136 | void domain_voronoi_dynamic_flag_particles(void);
137 | void domain_mark_in_trans_table(int i, int task);
138 | void domain_exchange_and_update_DC(void);
139 | int domain_compare_connection_ID(const void *a, const void *b);
140 | int domain_compare_local_trans_data_ID(const void *a, const void *b);
141 | int domain_compare_recv_trans_data_ID(const void *a, const void *b);
142 | int domain_compare_recv_trans_data_oldtask(const void *a, const void *b);
143 | 
144 | void mysort_domain(void *b, size_t n, size_t s);
145 | 
146 | #endif
147 | 


--------------------------------------------------------------------------------
/src/halo.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <gsl/gsl_math.h>
  7 | 
  8 | #include "allvars.h"
  9 | #include "proto.h"
 10 | 
 11 | 
 12 | /* this file contains auxiliary routines for the description of the halo,
 13 |  * here modeled as a Hernquist sphere
 14 |  */
 15 | 
 16 | /* this function returns a new random coordinate for the halo */
 17 | void halo_get_fresh_coordinate(double *pos)
 18 | {
 19 |   double r;
 20 | 
 21 |   do
 22 |     {
 23 |       double q = gsl_rng_uniform(random_generator);
 24 | 
 25 |       if(q > 0)
 26 |         r = All.Halo_A * (q + sqrt(q)) / (1 - q);
 27 |       else
 28 |         r = 0;
 29 | 
 30 |       double phi = gsl_rng_uniform(random_generator) * M_PI * 2;
 31 |       double theta = acos(gsl_rng_uniform(random_generator) * 2 - 1);
 32 |       
 33 |       pos[0] = r * sin(theta) * cos(phi);
 34 |       pos[1] = r * sin(theta) * sin(phi);
 35 |       pos[2] = r * cos(theta) / All.HaloStretch;
 36 | 
 37 |       r = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]);
 38 |     }
 39 |   while(r > All.Rmax);
 40 | }
 41 | 
 42 | 
 43 | double halo_get_density(double *pos) {
 44 | 	
 45 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + All.HaloStretch * All.HaloStretch * pos[2] * pos[2]);
 46 | 
 47 |   double rho = All.HaloStretch * All.Halo_Mass / (2 * M_PI) * All.Halo_A / (r + 1.0e-6 * All.Halo_A) / pow(r + All.Halo_A, 3);
 48 |   
 49 |   if ( fabs(rho) <  MIN_DENSITY) rho = 0;
 50 |   
 51 |   return rho;
 52 | }
 53 | 
 54 | 
 55 | /* Note that the other functions below will only be called in a meaningfull for a spherical system */
 56 | 
 57 | double halo_get_mass_inside_radius(double r)
 58 | {
 59 |   return All.Halo_Mass * pow(r / (r + All.Halo_A), 2);
 60 | }
 61 | 
 62 | 
 63 | double halo_get_potential(double *pos)
 64 | {
 65 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
 66 |   return halo_get_potential_from_radius(r);
 67 | }
 68 | 
 69 | double halo_get_potential_from_radius(double r)
 70 | {
 71 |   double phi = -All.G * All.Halo_Mass / (r + All.Halo_A);
 72 |   return phi;
 73 | }
 74 | 
 75 | /* returns the acceleration at coordinate pos[] */
 76 | void halo_get_acceleration(double *pos, double *acc)
 77 | {
 78 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
 79 |   double fac = All.G * All.Halo_Mass / ((r + 1.0e-6 * All.Halo_A)* (r + All.Halo_A) * (r + All.Halo_A));
 80 | 
 81 |   acc[0] = -fac * pos[0];
 82 |   acc[1] = -fac * pos[1];
 83 |   acc[2] = -fac * pos[2];
 84 | }
 85 | 
 86 | double halo_get_escape_speed(double *pos)
 87 | {
 88 |   double r = sqrt(pos[0] * pos[0] + pos[1] * pos[1] + pos[2] * pos[2]);
 89 |   double phi = -All.G * All.Halo_Mass / (r + All.Halo_A);
 90 |   double vesc = sqrt(-2.0 * phi);
 91 | 
 92 |   return vesc;
 93 | }
 94 | 
 95 | double halo_get_sigma2(double *pos) {
 96 | 	
 97 | 	long double r = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]);
 98 | 
 99 | 	long double m = All.Halo_Mass;
100 | 	long double r0 = All.Halo_A;
101 | 	long double r_over_r0 = r/r0;
102 | 
103 | 	long double _sigma2 = 
104 | 	(long double)(All.G*m)/(12.0*r0)*
105 | 	fabs( 12*r*powl(r+r0,3)/powl(r0,4)*logl((r+r0)/r) 
106 | 			- 
107 | 			r/(r+r0)*(25 + r_over_r0*(52 + 42*r_over_r0 + 12*(r_over_r0*r_over_r0) ) ) 
108 | 		 );
109 | 	
110 | 	// precicion big rip so let it be like this for a while
111 | 	if (65000<r) 
112 | 		_sigma2 = pow(halo_get_escape_speed(pos)/3.14, 2);
113 | 		//_sigma2 = SQR(vesc(_r)/3.14);
114 | 
115 | 
116 | 	return _sigma2;
117 | 	
118 | }
119 | 		
120 | 
121 | 
122 | 
123 | /*E to q conversion*/
124 | double halo_E_to_q(double E)
125 | {
126 |   return sqrt(-E * All.Halo_A / (All.G * All.Halo_Mass));
127 | }
128 | 
129 | 
130 | 
131 | /*Hernquist density of states (as a function of q)*/
132 | double halo_g_q(double q)
133 | {
134 |   double pre =
135 |     2 * sqrt(2) * M_PI * M_PI * All.Halo_A * All.Halo_A * All.Halo_A * sqrt(All.G * All.Halo_Mass /
136 | 									    All.Halo_A);
137 | 
138 |   return pre * (3 * (8 * q * q * q * q - 4 * q * q + 1) * acos(q) -
139 | 		q * sqrt(1 - q * q) * (4 * q * q - 1) * (2 * q * q + 3)) / (3 * q * q * q * q * q);
140 | }
141 | 
142 | 
143 | /*Hernquist distribution function (as a function of q)*/
144 | double halo_f_q(double q)
145 | {
146 |   double pre =
147 |     (All.Halo_Mass / (All.Halo_A * All.Halo_A * All.Halo_A)) / (4 * M_PI * M_PI * M_PI *
148 | 								pow(2 * All.G * All.Halo_Mass / All.Halo_A,
149 | 								    1.5));
150 | 
151 |   return pre * (3 * asin(q) +
152 | 		q * sqrt(1 - q * q) * (1 - 2 * q * q) * (8 * q * q * q * q - 8 * q * q - 3)) / pow(1 - q * q,
153 | 												   2.5);
154 | }
155 | 
156 | 
157 | /*Hernquist distribution function (as a function of radius and velocity)*/
158 | double halo_f(double rad, double vel)
159 | {
160 |   double E = 0.5 * vel * vel + halo_get_potential_from_radius(rad);
161 |   double q = halo_E_to_q(E);
162 | 
163 |   return halo_f_q(q);
164 | }
165 | 
166 | 
167 | /*generate velocities for Hernquist distribution function with von Neumann rejection technique*/
168 | double halo_generate_v(double rad)
169 | {
170 |   double pot = halo_get_potential_from_radius(rad);
171 |   double v_max = sqrt(-2 * pot);	// escape velocity
172 |   double v_guess, x_aux;
173 |   double f_max = v_max * v_max * halo_f(rad, 0);
174 | 
175 |   v_guess = gsl_rng_uniform(random_generator) * v_max;
176 |   x_aux = gsl_rng_uniform(random_generator) * f_max;
177 | 
178 |   while(x_aux > v_guess * v_guess * halo_f(rad, v_guess))
179 |     {
180 |       v_guess = gsl_rng_uniform(random_generator) * v_max;
181 |       x_aux = gsl_rng_uniform(random_generator) * f_max;
182 |     }
183 |   return v_guess;
184 | }
185 | 


--------------------------------------------------------------------------------
/src/mpi_utils/mpi_util.c:
--------------------------------------------------------------------------------
  1 | /** \file
  2 |     MPI utility functions.
  3 | */
  4 | 
  5 | #include <mpi.h>
  6 | #include <string.h>
  7 | 
  8 | #include "../allvars.h"
  9 | #include "../proto.h"
 10 | 
 11 | 
 12 | /** Implements the common idiom of exchanging buffers with every other
 13 |     MPI task. The number of items to send/receive are in the
 14 |     send_count and recv_count arrays, respectively. The data to
 15 |     exchange are in send_buf and recv_buf, and the offset to the
 16 |     location of the data to/from each task is in send_offset and
 17 |     recv_offset. Since the buffer pointers are void*, the size of the
 18 |     items to be exchanged are in item_size, and the tag to apply to
 19 |     the MPI call is in commtag. If include_self is true, the send
 20 |     data for ThisTask is also copied to the recieve buffer.
 21 | 
 22 |     All arrays should be allocated with NTask size. */
 23 | void mpi_exchange_buffers(void *send_buf, int *send_count, int *send_offset,
 24 | 			  void *recv_buf, int *recv_count, int *recv_offset, int item_size, int commtag, int include_self)
 25 | {
 26 |   int ngrp;
 27 |   // this loop goes from 0 in some cases, but that doesn't make sense
 28 |   // because then recvTask==ThisTask and nothing is done.
 29 |   for(ngrp = include_self ? 0 : 1; ngrp < (1 << PTask); ngrp++)
 30 |     {
 31 |       int recvTask = ThisTask ^ ngrp;
 32 | 
 33 |       if(recvTask < NTask)
 34 | 	{
 35 | 	  if(send_count[recvTask] > 0 || recv_count[recvTask] > 0)
 36 | 	    {
 37 | 	      /* exchange data */
 38 | 	      MPI_Sendrecv((char *) send_buf + send_offset[recvTask] * item_size,
 39 | 			   send_count[recvTask] * item_size, MPI_BYTE,
 40 | 			   recvTask, commtag,
 41 | 			   (char *) recv_buf + recv_offset[recvTask] * item_size,
 42 | 			   recv_count[recvTask] * item_size, MPI_BYTE, recvTask, commtag, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
 43 | 	    }
 44 | 	}
 45 |     }
 46 | }
 47 | 
 48 | /** Calculates the recv_count, send_offset, and recv_offset arrays
 49 |     based on the send_count. Returns nimport, the total number of
 50 |     particles to be received. If an identical set of copies are to be
 51 |     sent to all tasks, set send_identical=1 and the send_offset will
 52 |     be zero for all tasks.
 53 | 
 54 |     All arrays should be allocated with NTask size. */
 55 | int mpi_calculate_offsets(int *send_count, int *send_offset, int *recv_count, int *recv_offset, int send_identical)
 56 | {
 57 |   // Exchange the send/receive counts
 58 |   MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, MPI_COMM_WORLD);
 59 | 
 60 |   int nimport = 0;
 61 |   recv_offset[0] = 0;
 62 |   send_offset[0] = 0;
 63 |   int j;
 64 |   for(j = 0; j < NTask; j++)
 65 |     {
 66 |       nimport += recv_count[j];
 67 | 
 68 |       if(j > 0)
 69 | 	{
 70 | 	  send_offset[j] = send_offset[j - 1] + (send_identical ? 0 : send_count[j - 1]);
 71 | 	  recv_offset[j] = recv_offset[j - 1] + recv_count[j - 1];
 72 | 	}
 73 |     }
 74 |   return nimport;
 75 | }
 76 | 
 77 | 
 78 | /** Compare function used to sort an array of int pointers into order
 79 |     of the pointer targets. */
 80 | int intpointer_compare(const void *a, const void *b)
 81 | {
 82 |   if((**(int **) a) < (**(int **) b))
 83 |     return -1;
 84 | 
 85 |   if((**(int **) a) > (**(int **) b))
 86 |     return +1;
 87 | 
 88 |   return 0;
 89 | }
 90 | 
 91 | 
 92 | /** Sort an opaque array into increasing order of an int field, given
 93 |     by the specified offset. (This would typically be field indicating
 94 |     the task.) Returns a sorted copy of the data array, that needs to
 95 |     be myfreed.
 96 | 
 97 |     We do this by sorting an array of pointers to the task field, and
 98 |     then using this array to deduce the reordering of the data
 99 |     array. Unfortunately this means making a copy of the data, but
100 |     this just replaces the copy after the mpi_exchange_buffers
101 |     anyway.  */
102 | void *sort_based_on_field(void *data, int field_offset, int n_items, int item_size)
103 | {
104 |   int i;
105 |   char *data2;
106 |   int **perm;
107 |   data2 = mymalloc("data2", n_items * item_size);
108 |   perm = mymalloc("perm", n_items * sizeof(*perm));
109 | 
110 |   for(i = 0; i < n_items; ++i)
111 |     perm[i] = (int *) ((char *) data + i * item_size + field_offset);
112 | 
113 |   mysort(perm, n_items, sizeof(*perm), intpointer_compare);
114 |   // reorder data into data2
115 |   for(i = 0; i < n_items; ++i)
116 |     {
117 |       size_t orig_pos = ((char *) perm[i] - ((char *) data + field_offset)) / item_size;
118 |       myassert(((char *) perm[i] - ((char *) data + field_offset)) % item_size == 0);
119 |       memcpy(data2 + item_size * i, (char *) data + item_size * orig_pos, item_size);
120 |     }
121 | 
122 |   myfree(perm);
123 | 
124 |   return (void *) data2;
125 | }
126 | 
127 | /** This function distributes the members in an opaque structure to
128 |     the tasks based on a task field given by a specified offset into
129 |     the opaque struct. The task field must have int type. n_items is
130 |     updated to the new size of data. max_n is the allocated size of
131 |     the data array, and is updated if a realloc is necessary.  */
132 | void mpi_distribute_items_to_tasks(void *data, int task_offset, int *n_items, int *max_n, int item_size, int commtag)
133 | {
134 |   int i;
135 | 
136 |   for(i = 0; i < NTask; i++)
137 |     Send_count[i] = 0;
138 | 
139 |   for(i = 0; i < *n_items; i++)
140 |     {
141 |       int task = *(int *) ((char *) data + i * item_size + task_offset);
142 |       myassert(task >= 0 && task < NTask);
143 |       Send_count[task]++;
144 |     }
145 | 
146 |   void *data2 = sort_based_on_field(data, task_offset,
147 | 				    *n_items, item_size);
148 | 
149 |   int nimport = mpi_calculate_offsets(Send_count, Send_offset,
150 | 				      Recv_count, Recv_offset, 0);
151 | 
152 |   if(*max_n < nimport)
153 |     {
154 |       data = myrealloc_movable(data, nimport * item_size);
155 |       *max_n = nimport;
156 |     }
157 | 
158 |   mpi_exchange_buffers(data2, Send_count, Send_offset, data, Recv_count, Recv_offset, item_size, commtag, 1);
159 | 
160 |   myfree(data2);
161 | 
162 |   *n_items = nimport;
163 | }
164 | 


--------------------------------------------------------------------------------
/src/forcetree/forcetree_walk.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <time.h>
  7 | 
  8 | #include "../allvars.h"
  9 | #include "../proto.h"
 10 | 
 11 | 
 12 | int force_treeevaluate(int i, int mode, int thread_id)
 13 | {
 14 |   struct NODE *nop = 0;
 15 |   int k, target, numnodes, no, task;
 16 |   double r2, dx, dy, dz, mass, r, u, hmax, h_inv, h3_inv;
 17 |   double pos_x, pos_y, pos_z;
 18 |   double fac;
 19 |   double acc_x = 0;
 20 |   double acc_y = 0;
 21 |   double acc_z = 0;
 22 |   double wp, pot = 0.0;
 23 | 
 24 |   int ninteractions = 0;
 25 | 
 26 |   hmax = All.ForceSoftening;
 27 | 
 28 |   if(mode == 0)
 29 |     {
 30 |       target = TargetList[i];
 31 | 
 32 |       if(target < NumPart)
 33 | 	{
 34 | 	  pos_x = Tree_Pos_list[3 * target + 0];
 35 | 	  pos_y = Tree_Pos_list[3 * target + 1];
 36 | 	  pos_z = Tree_Pos_list[3 * target + 2];
 37 | 	}
 38 |       else
 39 | 	{
 40 | 	  terminate("target >= NumPart");
 41 | 	}
 42 | 	
 43 |       numnodes = 1;
 44 |     }
 45 |   else
 46 |     {
 47 |       target = i;
 48 |       pos_x = GravDataGet[target].Pos[0];
 49 |       pos_y = GravDataGet[target].Pos[1];
 50 |       pos_z = GravDataGet[target].Pos[2];
 51 |   
 52 |       if(target == Nimport - 1)
 53 |         numnodes = NimportNodes - GravDataGet[target].Firstnode;
 54 |       else
 55 |         numnodes = GravDataGet[target + 1].Firstnode - GravDataGet[target].Firstnode;
 56 |     }
 57 | 
 58 |   for(k = 0; k < numnodes; k++)
 59 |     {
 60 |       if(mode == 0)
 61 | 	no = Tree_MaxPart; /* root node */
 62 |       else
 63 | 	{
 64 | 	  no = NodeDataGet[GravDataGet[target].Firstnode + k];
 65 | 	  no = Nodes[no].u.d.nextnode;  /* open it */
 66 | 	}
 67 | 
 68 |       while(no >= 0)
 69 | 	{
 70 | 	  if(no < Tree_MaxPart) /* single particle */
 71 | 	    {
 72 | 	      dx = Tree_Pos_list[3 * no + 0] - pos_x;
 73 | 	      dy = Tree_Pos_list[3 * no + 1] - pos_y;
 74 | 	      dz = Tree_Pos_list[3 * no + 2] - pos_z;
 75 | 
 76 | 	      r2 = dx * dx + dy * dy + dz * dz;
 77 | 
 78 | 	      mass = P[no].Mass;
 79 | 
 80 | 	      no = Nextnode[no];
 81 | 	    }
 82 | 	  else if(no < Tree_MaxPart + Tree_MaxNodes) /* internal node */
 83 | 	    {
 84 | 	      if(mode == 1)
 85 | 		{
 86 | 		  if(no < Tree_FirstNonTopLevelNode) /* we reached a top-level node again, which means that we are done with the branch */
 87 | 		    {
 88 | 		      no = -1;
 89 | 		      continue;
 90 | 		    }
 91 | 		}
 92 | 
 93 | 	      nop = &Nodes[no];
 94 | 	      mass = nop->u.d.mass;
 95 | 
 96 | 	      dx = nop->u.d.s[0] - pos_x;
 97 | 	      dy = nop->u.d.s[1] - pos_y;
 98 | 	      dz = nop->u.d.s[2] - pos_z;
 99 | 
100 | 	      r2 = dx * dx + dy * dy + dz * dz;
101 | 
102 | 	      /* we have an  internal node. Need to check opening criterion */
103 | 
104 | 	      if(nop->len * nop->len > r2 * All.ErrTolTheta * All.ErrTolTheta)
105 | 	        {
106 | 	          /* open cell */
107 | 	          no = nop->u.d.nextnode;
108 | 	          continue;
109 | 	        }
110 | 
111 | 	      /* ok, node can be used */
112 | 
113 | 	      no = nop->u.d.sibling;
114 | 	    }
115 | 	  else if(no >= Tree_ImportedNodeOffset) /* point from imported nodelist */
116 | 	    {
117 | 	      int n = no - Tree_ImportedNodeOffset;
118 | 
119 | 	      dx = Tree_Points[n].Pos[0] - pos_x;
120 | 	      dy = Tree_Points[n].Pos[1] - pos_y;
121 | 	      dz = Tree_Points[n].Pos[2] - pos_z;
122 | 
123 | 	      r2 = dx * dx + dy * dy + dz * dz;
124 | 
125 | 	      mass = Tree_Points[n].Mass;
126 | 
127 | 	      no = Nextnode[no - Tree_MaxNodes];
128 | 	    }
129 | 	  else /* pseudo particle */
130 | 	    {
131 | 	      if(mode == 0)
132 | 		{
133 | 	          task = DomainNewTask[no - (Tree_MaxPart + Tree_MaxNodes)];
134 | 
135 | 		  if(ThreadsExportflag[thread_id][task] != i)
136 | 		    {
137 | 		      ThreadsExportflag[thread_id][task] = i;
138 | 		      int nexp = ThreadsNexport[thread_id]++;
139 | 		      if(nexp >= MaxNexport)
140 | 			terminate("nexp >= MaxNexport");
141 | 		      ThreadsPartList[thread_id][nexp].Task = task;
142 | 		      ThreadsPartList[thread_id][nexp].Index = i;
143 | 		    }
144 | 
145 | 		  int nexp = ThreadsNexportNodes[thread_id]++;
146 | 		  if(nexp >= MaxNexportNodes)
147 | 		    terminate("nexp >= MaxNexportNodes");
148 | 		  ThreadsNodeList[thread_id][nexp].Task = task;
149 |                   ThreadsNodeList[thread_id][nexp].Index = i;
150 |                   ThreadsNodeList[thread_id][nexp].Node = DomainNodeIndex[no - (Tree_MaxPart + Tree_MaxNodes)];
151 |   		}
152 | 
153 | 	      no = Nextnode[no - Tree_MaxNodes];
154 | 	      continue;
155 | 	    }
156 | 
157 | 	  /* now evaluate the multipole moment */
158 | 	  if(mass)
159 | 	    {
160 | 	      r = sqrt(r2);
161 | 
162 | 	      if(r >= hmax)
163 | 		{
164 | 		  fac = mass / (r2 * r);
165 | 		  wp = -mass / r;
166 | 		}
167 | 	      else
168 | 		{
169 | 		  h_inv = 1.0 / hmax;
170 | 		  h3_inv = h_inv * h_inv * h_inv;
171 | 		  u = r * h_inv;
172 | 
173 | 		  if(u < 0.5)
174 | 		    {
175 | 		      fac = mass * h3_inv * (10.666666666667 + u * u * (32.0 * u - 38.4));
176 | 		      wp = mass * h_inv * (-2.8 + u * u * (5.333333333333 + u * u * (6.4 * u - 9.6)));
177 | 		    }
178 | 		  else
179 | 		    {
180 | 		      fac = mass * h3_inv * (21.333333333333 - 48.0 * u + 38.4 * u * u - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
181 | 		      wp = mass * h_inv * (-3.2 + 0.066666666667 / u + u * u * (10.666666666667 + u * (-16.0 + u * (9.6 - 2.133333333333 * u))));
182 | 		    }
183 | 		}
184 | 
185 | 	      acc_x += dx * fac;
186 | 	      acc_y += dy * fac;
187 | 	      acc_z += dz * fac;
188 | 	      pot += wp;
189 | 
190 | 	      ninteractions++;
191 | 	    }
192 | 	}
193 |     }
194 | 
195 |   /* store result at the proper place */
196 |   if(mode == 0)
197 |     {
198 |       if(target < NumPart)
199 | 	{
200 | 	  P[target].GravAccel[0] = acc_x;
201 | 	  P[target].GravAccel[1] = acc_y;
202 | 	  P[target].GravAccel[2] = acc_z;
203 | 	  P[target].Potential = pot;
204 | 	}
205 |       else
206 | 	{
207 | 	  int idx = Tree_ResultIndexList[target - Tree_ImportedNodeOffset];
208 | 	  Tree_ResultsActiveImported[idx].GravAccel[0] = acc_x;
209 | 	  Tree_ResultsActiveImported[idx].GravAccel[1] = acc_y;
210 | 	  Tree_ResultsActiveImported[idx].GravAccel[2] = acc_z;
211 | 	  Tree_ResultsActiveImported[idx].Potential = pot;
212 | 	}
213 |     }
214 |   else
215 |     {
216 |       GravDataResult[target].Acc[0] = acc_x;
217 |       GravDataResult[target].Acc[1] = acc_y;
218 |       GravDataResult[target].Acc[2] = acc_z;
219 |       GravDataResult[target].Potential = pot;
220 |     }
221 |   return ninteractions;
222 | }
223 | 


--------------------------------------------------------------------------------
/src/domain/pqueue.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2010 Volkan Yazıcı <volkan.yazici@gmail.com>
  3 |  * Copyright 2006-2010 The Apache Software Foundation
  4 |  *
  5 |  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6 |  * use this file except in compliance with the License. You may obtain a copy of
  7 |  * the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 13 |  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 14 |  * License for the specific language governing permissions and limitations under
 15 |  * the License.
 16 |  */
 17 | 
 18 | /* V. Springel modified some of the memory allocation calls to inline it with
 19 |  * our internal memory handler.
 20 |  */
 21 | 
 22 | 
 23 | #include <stdlib.h>
 24 | #include <stdio.h>
 25 | #include <string.h>
 26 | 
 27 | #include "pqueue.h"
 28 | #include "../allvars.h"
 29 | #include "../proto.h"
 30 | 
 31 | #define left(i)   ((i) << 1)
 32 | #define right(i)  (((i) << 1) + 1)
 33 | #define parent(i) ((i) >> 1)
 34 | 
 35 | 
 36 | pqueue_t *pqueue_init(size_t n,
 37 | 		      pqueue_cmp_pri_f cmppri, pqueue_get_pri_f getpri, pqueue_set_pri_f setpri, pqueue_get_pos_f getpos, pqueue_set_pos_f setpos)
 38 | {
 39 |   pqueue_t *q;
 40 | 
 41 |   q = mymalloc("q", sizeof(pqueue_t));
 42 | 
 43 |   /* Need to allocate n+1 elements since element 0 isn't used. */
 44 |   q->d = mymalloc("q->d", (n + 1) * sizeof(void *));
 45 | 
 46 |   q->size = 1;
 47 |   q->avail = q->step = (n + 1);	/* see comment above about n+1 */
 48 |   q->cmppri = cmppri;
 49 |   q->setpri = setpri;
 50 |   q->getpri = getpri;
 51 |   q->getpos = getpos;
 52 |   q->setpos = setpos;
 53 | 
 54 |   return q;
 55 | }
 56 | 
 57 | 
 58 | void pqueue_free(pqueue_t * q)
 59 | {
 60 |   myfree(q->d);
 61 |   myfree(q);
 62 | }
 63 | 
 64 | 
 65 | size_t pqueue_size(pqueue_t * q)
 66 | {
 67 |   /* queue element 0 exists but doesn't count since it isn't used. */
 68 |   return (q->size - 1);
 69 | }
 70 | 
 71 | 
 72 | static void bubble_up(pqueue_t * q, size_t i)
 73 | {
 74 |   size_t parent_node;
 75 |   void *moving_node = q->d[i];
 76 |   pqueue_pri_t moving_pri = q->getpri(moving_node);
 77 | 
 78 |   for(parent_node = parent(i); ((i > 1) && q->cmppri(q->getpri(q->d[parent_node]), moving_pri)); i = parent_node, parent_node = parent(i))
 79 |     {
 80 |       q->d[i] = q->d[parent_node];
 81 |       q->setpos(q->d[i], i);
 82 |     }
 83 | 
 84 |   q->d[i] = moving_node;
 85 |   q->setpos(moving_node, i);
 86 | }
 87 | 
 88 | 
 89 | static size_t maxchild(pqueue_t * q, size_t i)
 90 | {
 91 |   size_t child_node = left(i);
 92 | 
 93 |   if(child_node >= q->size)
 94 |     return 0;
 95 | 
 96 |   if((child_node + 1) < q->size && q->cmppri(q->getpri(q->d[child_node]), q->getpri(q->d[child_node + 1])))
 97 |     child_node++;		/* use right child instead of left */
 98 | 
 99 |   return child_node;
100 | }
101 | 
102 | 
103 | static void percolate_down(pqueue_t * q, size_t i)
104 | {
105 |   size_t child_node;
106 |   void *moving_node = q->d[i];
107 |   pqueue_pri_t moving_pri = q->getpri(moving_node);
108 | 
109 |   while((child_node = maxchild(q, i)) && q->cmppri(moving_pri, q->getpri(q->d[child_node])))
110 |     {
111 |       q->d[i] = q->d[child_node];
112 |       q->setpos(q->d[i], i);
113 |       i = child_node;
114 |     }
115 | 
116 |   q->d[i] = moving_node;
117 |   q->setpos(moving_node, i);
118 | }
119 | 
120 | 
121 | int pqueue_insert(pqueue_t * q, void *d)
122 | {
123 |   size_t i;
124 |   size_t newsize;
125 | 
126 |   if(!q)
127 |     return 1;
128 | 
129 |   /* allocate more memory if necessary */
130 |   if(q->size >= q->avail)
131 |     {
132 |       newsize = q->size + q->step;
133 |       q->d = myrealloc(q->d, sizeof(void *) * newsize);
134 |       q->avail = newsize;
135 |     }
136 | 
137 |   /* insert item */
138 |   i = q->size++;
139 |   q->d[i] = d;
140 |   bubble_up(q, i);
141 | 
142 |   return 0;
143 | }
144 | 
145 | 
146 | void pqueue_change_priority(pqueue_t * q, pqueue_pri_t new_pri, void *d)
147 | {
148 |   size_t posn;
149 |   pqueue_pri_t old_pri = q->getpri(d);
150 | 
151 |   q->setpri(d, new_pri);
152 |   posn = q->getpos(d);
153 |   if(q->cmppri(old_pri, new_pri))
154 |     bubble_up(q, posn);
155 |   else
156 |     percolate_down(q, posn);
157 | }
158 | 
159 | 
160 | int pqueue_remove(pqueue_t * q, void *d)
161 | {
162 |   size_t posn = q->getpos(d);
163 |   q->d[posn] = q->d[--q->size];
164 |   if(q->cmppri(q->getpri(d), q->getpri(q->d[posn])))
165 |     bubble_up(q, posn);
166 |   else
167 |     percolate_down(q, posn);
168 | 
169 |   return 0;
170 | }
171 | 
172 | 
173 | void *pqueue_pop(pqueue_t * q)
174 | {
175 |   void *head;
176 | 
177 |   if(!q || q->size == 1)
178 |     return NULL;
179 | 
180 |   head = q->d[1];
181 |   q->d[1] = q->d[--q->size];
182 |   percolate_down(q, 1);
183 | 
184 |   return head;
185 | }
186 | 
187 | 
188 | void *pqueue_peek(pqueue_t * q)
189 | {
190 |   void *d;
191 |   if(!q || q->size == 1)
192 |     return NULL;
193 |   d = q->d[1];
194 |   return d;
195 | }
196 | 
197 | 
198 | void pqueue_dump(pqueue_t * q, FILE * out, pqueue_print_entry_f print)
199 | {
200 |   int i;
201 | 
202 |   fprintf(stdout, "posn\tleft\tright\tparent\tmaxchild\t...\n");
203 |   for(i = 1; i < q->size; i++)
204 |     {
205 |       fprintf(stdout, "%d\t%d\t%d\t%d\t%ul\t", i, left(i), right(i), parent(i), (unsigned int) maxchild(q, i));
206 |       print(out, q->d[i]);
207 |     }
208 | }
209 | 
210 | 
211 | static void set_pos(void *d, size_t val)
212 | {
213 |   /* do nothing */
214 | }
215 | 
216 | 
217 | static void set_pri(void *d, pqueue_pri_t pri)
218 | {
219 |   /* do nothing */
220 | }
221 | 
222 | 
223 | void pqueue_print(pqueue_t * q, FILE * out, pqueue_print_entry_f print)
224 | {
225 |   pqueue_t *dup;
226 |   void *e;
227 | 
228 |   dup = pqueue_init(q->size, q->cmppri, q->getpri, set_pri, q->getpos, set_pos);
229 |   dup->size = q->size;
230 |   dup->avail = q->avail;
231 |   dup->step = q->step;
232 | 
233 |   memcpy(dup->d, q->d, (q->size * sizeof(void *)));
234 | 
235 |   while((e = pqueue_pop(dup)))
236 |     print(out, e);
237 | 
238 |   pqueue_free(dup);
239 | }
240 | 
241 | 
242 | static int subtree_is_valid(pqueue_t * q, int pos)
243 | {
244 |   if(left(pos) < q->size)
245 |     {
246 |       /* has a left child */
247 |       if(q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[left(pos)])))
248 | 	return 0;
249 |       if(!subtree_is_valid(q, left(pos)))
250 | 	return 0;
251 |     }
252 |   if(right(pos) < q->size)
253 |     {
254 |       /* has a right child */
255 |       if(q->cmppri(q->getpri(q->d[pos]), q->getpri(q->d[right(pos)])))
256 | 	return 0;
257 |       if(!subtree_is_valid(q, right(pos)))
258 | 	return 0;
259 |     }
260 |   return 1;
261 | }
262 | 
263 | 
264 | int pqueue_is_valid(pqueue_t * q)
265 | {
266 |   return subtree_is_valid(q, 1);
267 | }
268 | 


--------------------------------------------------------------------------------
/src/domain/domain.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <strings.h>
  6 | #include <math.h>
  7 | 
  8 | #include "../allvars.h"
  9 | #include "../proto.h"
 10 | #include "domain.h"
 11 | 
 12 | /*! \file domain.c
 13 |  *  \brief code for domain decomposition
 14 |  *
 15 |  *  This file contains the code for the domain decomposition of the
 16 |  *  simulation volume.  The domains are constructed from disjoint subsets
 17 |  *  of the leaves of a fiducial top-level tree that covers the full
 18 |  *  simulation volume. Domain boundaries hence run along tree-node
 19 |  *  divisions of a fiducial global BH tree. As a result of this method, the
 20 |  *  tree force are in principle strictly independent of the way the domains
 21 |  *  are cut. The domain decomposition can be carried out for an arbitrary
 22 |  *  number of CPUs. Individual domains are not cubical, but spatially
 23 |  *  coherent since the leaves are traversed in a Peano-Hilbert order and
 24 |  *  individual domains form segments along this order.  This also ensures
 25 |  *  that each domain has a small surface to volume ratio, which minimizes
 26 |  *  communication.
 27 |  */
 28 | 
 29 | 
 30 | 
 31 | /*! This is the main routine for the domain decomposition.  It acts as a
 32 |  *  driver routine that allocates various temporary buffers, maps the
 33 |  *  particles back onto the periodic box if needed, and then does the
 34 |  *  domain decomposition, and a final Peano-Hilbert order of all particles
 35 |  *  as a tuning measure.
 36 |  */
 37 | void domain_Decomposition(void)
 38 | {
 39 |   mpi_printf("DOMAIN:\n");
 40 |   mpi_printf("DOMAIN: Begin domain decomposition (sync-point %d).\n", All.NumCurrentTiStep);
 41 | 
 42 | 
 43 | 
 44 |   domain_allocate();
 45 |   domain_allocate_lists();
 46 |   topNodes = (struct local_topnode_data *) mymalloc_movable(&topNodes, "topNodes", (MaxTopNodes * sizeof(struct local_topnode_data)));
 47 | 
 48 |   /* find total cost factors */
 49 |   domain_find_total_cost();
 50 | 
 51 |   /* determine global dimensions of domain grid */
 52 |   domain_findExtent();
 53 | 
 54 |   /* determine top-level tree */
 55 |   domain_determineTopTree();
 56 | 
 57 |   /* find the split of the top-level tree */
 58 |   domain_combine_topleaves_to_domains(All.MultipleDomains * NTask, NTopleaves);
 59 | 
 60 |   /* combine on each MPI task several of the domains (namely the number All.MultipleDomains) */
 61 |   domain_combine_multipledomains();
 62 | 
 63 |   /* permutate the task assignment such that the smallest number of particles needs to be moved */
 64 |   domain_optimize_domain_to_task_mapping();
 65 | 
 66 |   /* determine for each cpu how many particles have to be shifted to other cpus */
 67 |   domain_countToGo();
 68 | 
 69 |   /* finally, carry out the actual particle exchange */
 70 |   domain_exchange();
 71 | 
 72 |   /* copy what we need for the topnodes */
 73 |   domain_preserve_relevant_topnode_data();
 74 |   myfree(topNodes);
 75 |   domain_free_lists();
 76 | 
 77 |   int nummax;
 78 |   MPI_Allreduce(&NumPart, &nummax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
 79 |   mpi_printf("\nDOMAIN: ---->    Final load balance = %g    <------\n\n", nummax / ( ((double)All.TotNumPart) / NTask));
 80 |   mpi_printf("DOMAIN: domain decomposition done.\n");
 81 | 
 82 |   peano_hilbert_order();
 83 |   myfree(Key);
 84 | 
 85 |   TopNodes = (struct topnode_data *) myrealloc_movable(TopNodes, NTopnodes * sizeof(struct topnode_data));
 86 |   DomainTask = (int *) myrealloc_movable(DomainTask, NTopleaves * sizeof(int));
 87 | }
 88 | 
 89 | 
 90 | 
 91 | void domain_preserve_relevant_topnode_data(void)
 92 | {
 93 |   int i;
 94 | 
 95 |   for(i = 0; i < NTopnodes; i++)
 96 |     {
 97 |       TopNodes[i].StartKey = topNodes[i].StartKey;
 98 |       TopNodes[i].Size = topNodes[i].Size;
 99 |       TopNodes[i].Daughter = topNodes[i].Daughter;
100 |       TopNodes[i].Leaf = topNodes[i].Leaf;
101 | 
102 |       int j;
103 |       int bits = my_ffsll(TopNodes[i].Size);
104 |       int blocks = (bits - 1) / 3 - 1;
105 | 
106 |       for(j = 0; j < 8; j++)
107 |         {
108 |           int xb, yb, zb;
109 |           peano_hilbert_key_inverse(TopNodes[i].StartKey + j * (TopNodes[i].Size >> 3), BITS_PER_DIMENSION, &xb, &yb, &zb);
110 |           xb >>= blocks;
111 |           yb >>= blocks;
112 |           zb >>= blocks;
113 |           int idx = (xb & 1) | ((yb & 1) << 1) | ((zb & 1) << 2);
114 |           if(idx < 0 || idx > 7)
115 |             {
116 |               char buf[1000];
117 |               sprintf(buf, "j=%d  idx=%d  xb=%d yb=%d zb=%d  blocks=%d bits=%d size=%lld\n", j, idx, xb, yb, zb, blocks, bits, TopNodes[i].Size);
118 |               terminate(buf);
119 |             }
120 |           TopNodes[i].MortonToPeanoSubnode[idx] = j;
121 |         }
122 |     }
123 | }
124 | 
125 | 
126 | void domain_find_total_cost(void)
127 | {
128 |   int i;
129 |   long long Ntype[6];           /*!< total number of particles of each type */
130 |   int NtypeLocal[6];              /*!< local number of particles of each type */
131 | 
132 |   if(All.MultipleDomains < 1 || All.MultipleDomains > 512)
133 |     terminate("All.MultipleDomains < 1 || All.MultipleDomains > 512");
134 | 
135 |   for(i = 0; i < 6; i++)
136 |     NtypeLocal[i] = 0;
137 | 
138 |   for(i = 0; i < NumPart; i++)
139 |     NtypeLocal[P[i].Type]++;
140 | 
141 |   /* because Ntype[] is of type `long long', we cannot do a simple
142 |    * MPI_Allreduce() to sum the total particle numbers
143 |    */
144 |   sumup_large_ints(6, NtypeLocal, Ntype);
145 | 
146 |   for(i = 0, totpartcount = 0; i < 6; i++)
147 |     totpartcount += Ntype[i];
148 | 
149 |   fac_load = 1.0 / totpartcount;
150 | }
151 | 
152 | 
153 | 
154 | 
155 | int domain_double_to_int(double d)
156 | {
157 |   union
158 |   {
159 |     double d;
160 |     unsigned long long ull;
161 |   } u;
162 |   u.d = d;
163 |   return (int) ((u.ull & 0xFFFFFFFFFFFFFllu) >> (52 - BITS_PER_DIMENSION));
164 | }
165 | 
166 | 
167 | 
168 | /*! This function allocates all the stuff that will be required for the tree-construction/walk later on */
169 | void domain_allocate(void)
170 | {
171 |   MaxTopNodes = (int) (All.TopNodeAllocFactor * All.MaxPart + 1);
172 | 
173 |   if(DomainStartList)
174 |     terminate("domain storage already allocated");
175 | 
176 |   DomainStartList = (int *) mymalloc_movable(&DomainStartList, "DomainStartList", (NTask * All.MultipleDomains * sizeof(int)));
177 |   DomainEndList = (int *) mymalloc_movable(&DomainEndList, "DomainEndList", (NTask * All.MultipleDomains * sizeof(int)));
178 |   TopNodes = (struct topnode_data *) mymalloc_movable(&TopNodes, "TopNodes", (MaxTopNodes * sizeof(struct topnode_data)));
179 |   DomainTask = (int *) mymalloc_movable(&DomainTask, "DomainTask", (MaxTopNodes * sizeof(int)));
180 | }
181 | 
182 | 
183 | 
184 | void domain_free(void)
185 | {
186 |   if(!DomainStartList)
187 |     terminate("domain storage not allocated");
188 | 
189 |   myfree(DomainTask);
190 |   myfree(TopNodes);
191 |   myfree(DomainEndList);
192 |   myfree(DomainStartList);
193 | 
194 |   DomainTask = NULL;
195 |   TopNodes = NULL;
196 |   DomainEndList = NULL;
197 |   DomainStartList = NULL;
198 | }
199 | 
200 | void domain_printf(char *buf)
201 | {
202 |   if(RestartFlag <= 2)
203 |     {
204 |       printf("%s", buf);
205 |     }
206 | }
207 | 
208 | 
209 | 


--------------------------------------------------------------------------------
/src/domain/domain_toplevel.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <strings.h>
  6 | #include <math.h>
  7 | 
  8 | #include "../allvars.h"
  9 | #include "../proto.h"
 10 | #include "domain.h"
 11 | 
 12 | /*! This function constructs the global top-level tree node that is used
 13 |  *  for the domain decomposition. This is done by considering the string of
 14 |  *  Peano-Hilbert keys for all particles, which is recursively chopped off
 15 |  *  in pieces of eight segments until each segment holds at most a certain
 16 |  *  number of particles.
 17 |  */
 18 | int domain_determineTopTree(void)
 19 | {
 20 |   int i, count;
 21 | 
 22 |   mp = (struct domain_peano_hilbert_data *) mymalloc_movable(&mp, "mp", sizeof(struct domain_peano_hilbert_data) * NumPart);
 23 | 
 24 |   for(i = 0, count = 0; i < NumPart; i++)
 25 |     {
 26 |       int xb = domain_double_to_int(((P[i].Pos[0] - DomainCorner[0]) * DomainInverseLen) + 1.0);
 27 |       int yb = domain_double_to_int(((P[i].Pos[1] - DomainCorner[1]) * DomainInverseLen) + 1.0);
 28 |       int zb = domain_double_to_int(((P[i].Pos[2] - DomainCorner[2]) * DomainInverseLen) + 1.0);
 29 | 
 30 |       mp[count].key = Key[i] = peano_hilbert_key(xb, yb, zb, BITS_PER_DIMENSION);
 31 |       mp[count].index = i;
 32 |       count++;
 33 |     }
 34 | 
 35 |   mysort_domain(mp, count, sizeof(struct domain_peano_hilbert_data));
 36 | 
 37 |   NTopnodes = 1;
 38 |   topNodes[0].Daughter = -1;
 39 |   topNodes[0].Parent = -1;
 40 |   topNodes[0].Size = PEANOCELLS;
 41 |   topNodes[0].StartKey = 0;
 42 |   topNodes[0].PIndex = 0;
 43 |   topNodes[0].Count = count;
 44 | 
 45 |   int list[1] = { 0 };
 46 |   int *listp = list;
 47 | 
 48 |   domain_do_local_refine(1, &listp);
 49 | 
 50 |   myfree(mp);
 51 | 
 52 |   /* count the number of top leaves */
 53 |   NTopleaves = 0;
 54 |   domain_walktoptree(0);
 55 |   mpi_printf("DOMAIN: NTopleaves=%d\n", NTopleaves);
 56 | 
 57 |   if(NTopleaves < All.MultipleDomains * NTask)
 58 |     terminate("NTopleaves = %d < All.MultipleDomains * NTask = %d * %d = %d", NTopleaves, All.MultipleDomains, NTask, All.MultipleDomains * NTask);
 59 | 
 60 |   mpi_printf("DOMAIN: determination of top-level tree done\n");
 61 | 
 62 |   domain_sumCost();
 63 | 
 64 |   mpi_printf("DOMAIN: cost summation for top-level tree done\n");
 65 | 
 66 |   return 0;
 67 | }
 68 | 
 69 | 
 70 | 
 71 | int domain_do_local_refine(int n, int **listp)	/* In list[], we store the node indices hat should be refined, N is their number */
 72 | {
 73 |   static int message_printed = 0;
 74 |   int i, j, k, l, p, sub, ret, *list;
 75 | 
 76 |   list = *listp;
 77 | 
 78 |   double limit = 1.0 / (All.TopNodeFactor * All.MultipleDomains * NTask);
 79 | 
 80 |   if(list[0] == 0)
 81 |     message_printed = 0;
 82 | 
 83 |   while((NTopnodes + 8 * n) > MaxTopNodes)
 84 |     {
 85 |       mpi_printf("DOMAIN: Increasing TopNodeAllocFactor=%g  ", All.TopNodeAllocFactor);
 86 |       All.TopNodeAllocFactor *= 1.3;
 87 |       mpi_printf("new value=%g\n", All.TopNodeAllocFactor);
 88 |       if(All.TopNodeAllocFactor > 1000)
 89 |         terminate("something seems to be going seriously wrong here. Stopping.\n");
 90 | 
 91 |       MaxTopNodes = (int) (All.TopNodeAllocFactor * All.MaxPart + 1);
 92 | 
 93 |       topNodes = (struct local_topnode_data *) myrealloc_movable(topNodes, (MaxTopNodes * sizeof(struct local_topnode_data)));
 94 |       TopNodes = (struct topnode_data *) myrealloc_movable(TopNodes, (MaxTopNodes * sizeof(struct topnode_data)));
 95 |       DomainTask = (int *) myrealloc_movable(DomainTask, (MaxTopNodes * sizeof(int)));
 96 |       DomainLeaveNode = (struct domain_cost_data *) myrealloc_movable(DomainLeaveNode, (MaxTopNodes * sizeof(struct domain_cost_data)));
 97 | 
 98 |       list = *listp;  /* update this here because the above reallocations may have moved the pointer to the memory block */
 99 |     }
100 | 
101 |   int *new_list = mymalloc_movable(&new_list, "new_list", 8 * n * sizeof(int));
102 |   double *worktotlist = mymalloc("worktotlist", 8 * n * sizeof(double));
103 |   double *worklist = mymalloc("worklist", 8 * n * sizeof(double));
104 | 
105 |   double non_zero = 0, non_zero_tot;
106 | 
107 |   /* create the new nodes */
108 |   for(k = 0; k < n; k++)
109 |     {
110 |       i = list[k];
111 |       topNodes[i].Daughter = NTopnodes;
112 |       NTopnodes += 8;
113 | 
114 |       for(j = 0; j < 8; j++)
115 |         {
116 |           sub = topNodes[i].Daughter + j;
117 | 
118 |           topNodes[sub].Daughter = -1;
119 |           topNodes[sub].Parent = i;
120 |           topNodes[sub].Size = (topNodes[i].Size >> 3);
121 |           topNodes[sub].StartKey = topNodes[i].StartKey + j * topNodes[sub].Size;
122 |           topNodes[sub].PIndex = topNodes[i].PIndex;
123 |           topNodes[sub].Count = 0;
124 |         }
125 | 
126 |       sub = topNodes[i].Daughter;
127 | 
128 |       for(p = topNodes[i].PIndex, j = 0; p < topNodes[i].PIndex + topNodes[i].Count; p++)
129 |         {
130 |           if(j < 7)
131 |             while(mp[p].key >= topNodes[sub + 1].StartKey)
132 |               {
133 |                 j++;
134 |                 sub++;
135 |                 topNodes[sub].PIndex = p;
136 |                 if(j >= 7)
137 |                   break;
138 |               }
139 | 
140 |           topNodes[sub].Count++;
141 |         }
142 | 
143 |       for(j = 0; j < 8; j++)
144 |         {
145 |           sub = topNodes[i].Daughter + j;
146 |           worklist[k * 8 + j] = fac_load * topNodes[sub].Count;
147 | 
148 |           if(worklist[k * 8 + j] != 0)
149 |             non_zero++;
150 |         }
151 |     }
152 | 
153 | 
154 |   MPI_Allreduce(&non_zero, &non_zero_tot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
155 |   MPI_Allreduce(worklist, worktotlist, 8 * n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
156 | 
157 |   int new_n = 0;
158 |   for(k = 0, l = 0; k < n; k++)
159 |     {
160 |       i = list[k];
161 | 
162 |       for(j = 0; j < 8; j++, l++)
163 |         {
164 |           sub = topNodes[i].Daughter + j;
165 | 
166 | 	  if(worktotlist[l] > limit)
167 | 	    {
168 | 	      if(topNodes[sub].Size < 8)
169 | 	        {
170 | 	          if(message_printed == 0)
171 | 	            {
172 | 	              mpi_printf("DOMAIN: Note: we would like to refine top-tree, but PEANOGRID is not fine enough\n");
173 | 	              message_printed = 1;
174 | 	            }
175 | 	        }
176 | 	      else
177 | 	        new_list[new_n++] = sub;
178 | 	    }
179 |         }
180 |     }
181 | 
182 |   myfree(worklist);
183 |   myfree(worktotlist);
184 | 
185 |   new_list = myrealloc(new_list, new_n * sizeof(int));
186 | 
187 |   if(new_n > 0)
188 |     ret = domain_do_local_refine(new_n, &new_list);
189 |   else
190 |     ret = 0;
191 | 
192 |   myfree(new_list);
193 | 
194 |   return ret;
195 | }
196 | 
197 | 
198 | 
199 | /*! This function walks the global top tree in order to establish the
200 |  *  number of leaves it has, and for assigning the leaf numbers along the
201 |  *  Peano-Hilbert Curve. These leaves are later combined to domain pieces,
202 |  *  which are distributed to different processors.
203 |  */
204 | void domain_walktoptree(int no)
205 | {
206 |   int i;
207 | 
208 |   if(topNodes[no].Daughter == -1)
209 |     {
210 |       topNodes[no].Leaf = NTopleaves;
211 |       NTopleaves++;
212 |     }
213 |   else
214 |     {
215 |       for(i = 0; i < 8; i++)
216 | 	domain_walktoptree(topNodes[no].Daughter + i);
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/Model_M1.param:
--------------------------------------------------------------------------------
  1 | 
  2 | %------   File and path names, as well as output file format
  3 | 
  4 | OutputDir       ./Model-M1
  5 | 
  6 | OutputFile      snap    % Base filename of generated sequence of files
  7 | SnapFormat      1       % File format selection
  8 | 
  9 | 
 10 | %------   Basic structural parameters of model
 11 | 
 12 | CC             10.0       % halo concentration
 13 | V200          200.0       % circular velocity v_200 (in km/sec)
 14 | LAMBDA         0.035      % spin parameter          
 15 | MD             0.035      % disk mass fraction      
 16 | MB             0.05       % bulge mass fraction     
 17 | MBH            0.0        % black hole mass fraction. If zero, no black
 18 |                           % hole is generated, otherwise one at the centre
 19 |                           % is added.
 20 |  
 21 | JD             0.035      % disk spin fraction, typically chosen equal to MD
 22 | 
 23 | DiskHeight     0.2        % thickness of stellar disk in units of radial scale length 
 24 | BulgeSize      0.1        % bulge scale length in units of halo scale length 
 25 | 
 26 | HaloStretch    1.0        % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate
 27 | BulgeStretch   1.0        % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate
 28 | 
 29 | %------   Particle numbers in target model
 30 | 
 31 | N_HALO      100000        % desired number of particles in dark halo 
 32 | N_DISK      100000        % desired number of collisionless particles in disk 
 33 | N_BULGE     100000        % number of bulge particles 
 34 | 
 35 | 
 36 | 
 37 | %------   Selection of symmetry constraints of velocity structure
 38 | 
 39 | TypeOfHaloVelocityStructure    3      %  0 = spherically symmetric, isotropic
 40 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 41 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 42 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 43 |                                       
 44 | TypeOfDiskVelocityStructure    2      %  0 = spherically symmetric, isotropic
 45 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 46 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 47 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 48 |                                       
 49 | TypeOfBulgeVelocityStructure   3      %  0 = spherically symmetric, isotropic
 50 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 51 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 52 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 53 |                                       
 54 | 
 55 | HaloBetaParameter              0    %  only relevant for TypeOfHaloVelocityStructure=1
 56 | BulgeBetaParameter             0    %  only relevant for TypeOfBulgeVelocityStructure=1
 57 | 
 58 | 
 59 | HaloDispersionRoverZratio      1.0   %  only relevant for TypeOfHaloVelocityStructure=3
 60 | DiskDispersionRoverZratio      1.0   %  only relevant for TypeOfDiskVelocityStructure=3
 61 | BulgeDispersionRoverZratio     1.0   %  only relevant for TypeOfBulgeVelocityStructure=3
 62 | 
 63 | 
 64 | HaloStreamingVelocityParameter     0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 65 | DiskStreamingVelocityParameter     1.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 66 | BulgeStreamingVelocityParameter    0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 67 | 
 68 | 
 69 | %------   Orbit integration accuracy
 70 | 
 71 | TorbitFac                          10.0  % regulates the integration time of orbits
 72 |                                          % (this is of the order of the typical number of orbits per particle)
 73 | TimeStepFactorOrbit                0.01
 74 | TimeStepFactorCellCross            0.25
 75 | 
 76 | 
 77 | %------   Iterative optimization parameters
 78 | 
 79 | FractionToOptimizeIndependendly    0.001
 80 | IndepenentOptimizationsPerStep     100
 81 | StepsBetweenDump                   10
 82 | MaximumNumberOfSteps               100
 83 | 
 84 | MinParticlesPerBinForDispersionMeasurement  100
 85 | MinParticlesPerBinForDensityMeasurement     50  
 86 | 
 87 | 
 88 | %------   Grid dimension and extenstion/resolution
 89 | 
 90 | DG_MaxLevel    7
 91 | EG_MaxLevel    7
 92 | FG_Nbin        256                  % number of bins for the acceleration grid in the R- and z-directions 
 93 | 
 94 | 
 95 | OutermostBinEnclosedMassFraction  0.999   % regulates the fraction of mass of the Hernquist 
 96 |                                           % halo that must be inside the grid (determines grid extension)
 97 | 
 98 | InnermostBinEnclosedMassFraction  0.0000001 % regulates the fraction of mass enclosed by the innermost 
 99 | 				            % bin (regulates size of innermost grid cells)
100 | 
101 | 
102 | 
103 | MaxVelInUnitsVesc                 0.9999  % maximum allowed velocity in units of the local escape velocity
104 | 
105 | 
106 | %------   Construction of target density field
107 | 
108 | SampleDensityFieldForTargetResponse 1               % if set to 1, the code will randomly sample points to construct the density field
109 | SampleParticleCount                 100000000       % number of points sampled for target density field 
110 | 
111 | 
112 | %------   Construction of force field
113 | 
114 | SampleForceNhalo                    0               % number of points to use to for computing force field with a tree 
115 | SampleForceNdisk                    100000000
116 | SampleForceNbulge                   0
117 | 
118 | Softening                           0.05
119 | 
120 | 
121 | %------   Accuracy settings of tree code used in construction of force field
122 | 
123 | TypeOfOpeningCriterion    1
124 | ErrTolTheta               0.4
125 | ErrTolForceAcc            0.0025
126 | 
127 | %------   Domain decomposition parameters used in parallel tree code
128 | 
129 | MultipleDomains 4
130 | TopNodeFactor   4
131 | 
132 | 
133 | %------   Parallel I/O paramaters, only affects writing of galaxy files
134 | 
135 | NumFilesPerSnapshot       1
136 | NumFilesWrittenInParallel 1
137 | 
138 | 
139 | %------   Memory allocation parameters
140 | 
141 | MaxMemSize                3600.0    % in MB
142 | BufferSize                100.0
143 | BufferSizeGravity         100.0
144 | 
145 | 
146 | %------   Specification of internal system of units
147 | 
148 | UnitLength_in_cm         3.085678e21        %  1.0 kpc
149 | UnitMass_in_g            1.989e43           %  1.0e10 solar masses
150 | UnitVelocity_in_cm_per_s 1e5                %  1 km/sec
151 | GravityConstantInternal  0
152 | 
153 | %------  Factors to tune merit function of time avaraged velocity structure (only ver 1.1)
154 | 
155 | HaloValueRsFac 1.0
156 | HaloValueTsFac 1.0
157 | HaloValueQsFac 1.0
158 | HaloValuePsFac 1.0
159 | 					 
160 | DiskValueRsFac 1.0
161 | DiskValueTsFac 1.0
162 | DiskValueQsFac 1.0
163 | DiskValuePsFac 1.0
164 | 
165 | BulgeValueRsFac 1.0
166 | BulgeValueTsFac 1.0
167 | BulgeValueQsFac 1.0
168 | BulgeValuePsFac 1.0
169 | 


--------------------------------------------------------------------------------
/Model_H3.param:
--------------------------------------------------------------------------------
  1 | 
  2 | %------   File and path names, as well as output file format
  3 | 
  4 | OutputDir       ./Model-H3-256K
  5 | 
  6 | OutputFile      snap    % Base filename of generated sequence of files
  7 | SnapFormat      1       % File format selection
  8 | 
  9 | 
 10 | %------   Basic structural parameters of model
 11 | 
 12 | CC             10.0       % halo concentration
 13 | V200          200.0       % circular velocity v_200 (in km/sec)
 14 | LAMBDA         0.0        % spin parameter          
 15 | MD             0.0        % disk mass fraction      
 16 | MB             0.0        % bulge mass fraction     
 17 | MBH            0.0        % black hole mass fraction. If zero, no black
 18 |                           % hole is generated, otherwise one at the centre
 19 |                           % is added.
 20 |  
 21 | JD             0.00       % disk spin fraction, typically chosen equal to MD
 22 | 
 23 | DiskHeight     0.2        % thickness of stellar disk in units of radial scale length 
 24 | BulgeSize      0.2        % bulge scale length in units of halo scale length 
 25 | 
 26 | HaloStretch    1.0        % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate
 27 | BulgeStretch   1.0        % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate
 28 | 
 29 | %------   Particle numbers in target model
 30 | 
 31 | 
 32 | N_HALO    100000          % desired number of particles in dark halo 
 33 | N_DISK         0          % desired number of collisionless particles in disk 
 34 | N_BULGE        0          % number of bulge particles 
 35 | 
 36 | 
 37 | 
 38 | %------   Selection of symmetry constraints of velocity structure
 39 | 
 40 | TypeOfHaloVelocityStructure    1      %  0 = spherically symmetric, isotropic
 41 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 42 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 43 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 44 |                                       
 45 | TypeOfDiskVelocityStructure    0      %  0 = spherically symmetric, isotropic
 46 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 47 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 48 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 49 |                                       
 50 | TypeOfBulgeVelocityStructure   0      %  0 = spherically symmetric, isotropic
 51 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 52 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 53 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 54 |                                       
 55 | 
 56 | HaloBetaParameter             -1.0   %  only relevant for TypeOfHaloVelocityStructure=1
 57 | BulgeBetaParameter             0     %  only relevant for TypeOfBulgeVelocityStructure=1
 58 | 
 59 | 
 60 | HaloDispersionRoverZratio      1.0   %  only relevant for TypeOfHaloVelocityStructure=3
 61 | DiskDispersionRoverZratio      1.0   %  only relevant for TypeOfDiskVelocityStructure=3
 62 | BulgeDispersionRoverZratio     1.0   %  only relevant for TypeOfBulgeVelocityStructure=3
 63 | 
 64 | 
 65 | HaloStreamingVelocityParameter     0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 66 | DiskStreamingVelocityParameter     1.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 67 | BulgeStreamingVelocityParameter    0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 68 | 
 69 | 
 70 | %------   Orbit integration accuracy
 71 | 
 72 | TorbitFac                          10.0  % regulates the integration time of orbits
 73 |                                          % (this is of the order of the typical number of orbits per particle)
 74 | TimeStepFactorOrbit                0.01
 75 | TimeStepFactorCellCross            0.25
 76 | 
 77 | 
 78 | %------   Iterative optimization parameters
 79 | 
 80 | FractionToOptimizeIndependendly    0.001
 81 | IndepenentOptimizationsPerStep     100
 82 | StepsBetweenDump                   10
 83 | MaximumNumberOfSteps               200
 84 | 
 85 | MinParticlesPerBinForDispersionMeasurement   100
 86 | MinParticlesPerBinForDensityMeasurement      50  
 87 | 
 88 | 
 89 | %------   Grid dimension and extenstion/resolution
 90 | 
 91 | DG_MaxLevel    7
 92 | EG_MaxLevel    7
 93 | FG_Nbin        256                   % number of bins for the acceleration grid in the R- and z-directions 
 94 | 
 95 | 
 96 | OutermostBinEnclosedMassFraction  0.999   % regulates the fraction of mass of the Hernquist 
 97 |                                           % halo that must be inside the grid (determines grid extension)
 98 | 
 99 | InnermostBinEnclosedMassFraction  0.0000001 % regulates the fraction of mass enclosed by the innermost 
100 | 				            % bin (regulates size of innermost grid cells)
101 | 
102 | 
103 | 
104 | MaxVelInUnitsVesc                 0.9999  % maximum allowed velocity in units of the local escape velocity
105 | 
106 | 
107 | %------   Construction of target density field
108 | 
109 | SampleDensityFieldForTargetResponse 1               % if set to 1, the code will randomly sample points to construct the density field
110 | SampleParticleCount                 100000000       % number of points sampled for target density field 
111 | 
112 | 
113 | %------   Construction of force field
114 | 
115 | SampleForceNhalo                    0               % number of points to use to for computing force field with a tree 
116 | SampleForceNdisk                    0
117 | SampleForceNbulge                   0
118 | 
119 | Softening                          0.05
120 | 
121 | 
122 | %------   Accuracy settings of tree code used in construction of force field
123 | 
124 | TypeOfOpeningCriterion    1
125 | ErrTolTheta               0.4
126 | ErrTolForceAcc            0.0025
127 | 
128 | %------   Domain decomposition parameters used in parallel tree code
129 | 
130 | MultipleDomains 4
131 | TopNodeFactor   4
132 | 
133 | 
134 | %------   Parallel I/O paramaters, only affects writing of galaxy files
135 | 
136 | NumFilesPerSnapshot       1
137 | NumFilesWrittenInParallel 1
138 | 
139 | 
140 | %------   Memory allocation parameters
141 | 
142 | MaxMemSize                2300.0    % in MB
143 | BufferSize                100.0
144 | BufferSizeGravity         100.0
145 | 
146 | 
147 | %------   Specification of internal system of units
148 | 
149 | UnitLength_in_cm         3.085678e21        %  1.0 kpc
150 | UnitMass_in_g            1.989e43           %  1.0e10 solar masses
151 | UnitVelocity_in_cm_per_s 1e5                %  1 km/sec
152 | GravityConstantInternal  0
153 | 
154 | 
155 | %------  Factors to tune merit function of time avaraged velocity structure (only ver 1.1)
156 | 
157 | HaloValueRsFac 1.0
158 | HaloValueTsFac 1.0
159 | HaloValueQsFac 1.0
160 | HaloValuePsFac 1.0
161 | 					 
162 | DiskValueRsFac 1.0
163 | DiskValueTsFac 1.0
164 | DiskValueQsFac 1.0
165 | DiskValuePsFac 1.0
166 | 
167 | BulgeValueRsFac 1.0
168 | BulgeValueTsFac 1.0
169 | BulgeValueQsFac 1.0
170 | BulgeValuePsFac 1.0
171 | 


--------------------------------------------------------------------------------
/Model_D3.param:
--------------------------------------------------------------------------------
  1 | 
  2 | %------   File and path names, as well as output file format
  3 | 
  4 | OutputDir       ./Model-D3
  5 | 
  6 | OutputFile      snap    % Base filename of generated sequence of files
  7 | SnapFormat      1       % File format selection
  8 | 
  9 | 
 10 | %------   Basic structural parameters of model
 11 | 
 12 | CC             10.0       % halo concentration
 13 | V200          200.0       % circular velocity v_200 (in km/sec)
 14 | LAMBDA         0.035      % spin parameter          
 15 | MD             0.035      % disk mass fraction      
 16 | MB             0.0        % bulge mass fraction     
 17 | MBH            0.0        % black hole mass fraction. If zero, no black
 18 |                           % hole is generated, otherwise one at the centre
 19 |                           % is added.
 20 |  
 21 | JD             0.035      % disk spin fraction, typically chosen equal to MD
 22 | 
 23 | DiskHeight     0.2        % thickness of stellar disk in units of radial scale length 
 24 | BulgeSize      0.1        % bulge scale length in units of halo scale length 
 25 | 
 26 | HaloStretch    1.0        % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate
 27 | BulgeStretch   1.0        % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate
 28 | 
 29 | %------   Particle numbers in target model
 30 | 
 31 | N_HALO         100000           % desired number of particles in dark halo 
 32 | N_DISK         100000           % desired number of collisionless particles in disk 
 33 | N_BULGE        0                % number of bulge particles 
 34 | 
 35 | 
 36 | 
 37 | %------   Selection of symmetry constraints of velocity structure
 38 | 
 39 | TypeOfHaloVelocityStructure    3      %  0 = spherically symmetric, isotropic
 40 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 41 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 42 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 43 |                                       
 44 | TypeOfDiskVelocityStructure    3      %  0 = spherically symmetric, isotropic
 45 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 46 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 47 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 48 |                                       
 49 | TypeOfBulgeVelocityStructure   0      %  0 = spherically symmetric, isotropic
 50 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 51 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 52 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 53 |                                       
 54 | 
 55 | HaloBetaParameter              0    %  only relevant for TypeOfHaloVelocityStructure=1
 56 | BulgeBetaParameter             0    %  only relevant for TypeOfBulgeVelocityStructure=1
 57 | 
 58 | 
 59 | HaloDispersionRoverZratio      1.0   %  only relevant for TypeOfHaloVelocityStructure=3
 60 | DiskDispersionRoverZratio      2.0   %  only relevant for TypeOfDiskVelocityStructure=3
 61 | BulgeDispersionRoverZratio     1.0   %  only relevant for TypeOfBulgeVelocityStructure=3
 62 | 
 63 | 
 64 | HaloStreamingVelocityParameter     0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 65 | DiskStreamingVelocityParameter     1.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 66 | BulgeStreamingVelocityParameter    0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 67 | 
 68 | 
 69 | %------   Orbit integration accuracy
 70 | 
 71 | TorbitFac                          10.0  % regulates the integration time of orbits
 72 |                                          % (this is of the order of the typical number of orbits per particle)
 73 | TimeStepFactorOrbit                0.01
 74 | TimeStepFactorCellCross            0.25
 75 | 
 76 | 
 77 | %------   Iterative optimization parameters
 78 | 
 79 | FractionToOptimizeIndependendly    0.001
 80 | IndepenentOptimizationsPerStep     100
 81 | StepsBetweenDump                   10
 82 | MaximumNumberOfSteps               100
 83 | 
 84 | MinParticlesPerBinForDispersionMeasurement   100
 85 | MinParticlesPerBinForDensityMeasurement      50  
 86 | 
 87 | 
 88 | %------   Grid dimension and extenstion/resolution
 89 | 
 90 | DG_MaxLevel    7
 91 | EG_MaxLevel    7
 92 | FG_Nbin        256                  % number of bins for the acceleration grid in the R- and z-directions 
 93 | 
 94 | 
 95 | OutermostBinEnclosedMassFraction  0.999   % regulates the fraction of mass of the Hernquist 
 96 |                                           % halo that must be inside the grid (determines grid extension)
 97 | 
 98 | InnermostBinEnclosedMassFraction  0.0000001 % regulates the fraction of mass enclosed by the innermost 
 99 | 				            % bin (regulates size of innermost grid cells)
100 | 
101 | 
102 | 
103 | MaxVelInUnitsVesc                 0.9999  % maximum allowed velocity in units of the local escape velocity
104 | 
105 | 
106 | %------   Construction of target density field
107 | 
108 | SampleDensityFieldForTargetResponse 1               % if set to 1, the code will randomly sample points to construct the density field
109 | SampleParticleCount                 100000000       % number of points sampled for target density field 
110 | 
111 | 
112 | %------   Construction of force field
113 | 
114 | SampleForceNhalo                    0               % number of points to use to for computing force field with a tree 
115 | SampleForceNdisk                    100000000
116 | SampleForceNbulge                   0
117 | 
118 | Softening                           0.05
119 | 
120 | 
121 | %------   Accuracy settings of tree code used in construction of force field
122 | 
123 | TypeOfOpeningCriterion    1
124 | ErrTolTheta               0.4
125 | ErrTolForceAcc            0.0025
126 | 
127 | %------   Domain decomposition parameters used in parallel tree code
128 | 
129 | MultipleDomains 4
130 | TopNodeFactor   4
131 | 
132 | 
133 | %------   Parallel I/O paramaters, only affects writing of galaxy files
134 | 
135 | NumFilesPerSnapshot       1
136 | NumFilesWrittenInParallel 1
137 | 
138 | 
139 | %------   Memory allocation parameters
140 | 
141 | MaxMemSize               3600.0    % in MB
142 | BufferSize                100.0
143 | BufferSizeGravity         100.0
144 | 
145 | 
146 | %------   Specification of internal system of units
147 | 
148 | UnitLength_in_cm         3.085678e21        %  1.0 kpc
149 | UnitMass_in_g            1.989e43           %  1.0e10 solar masses
150 | UnitVelocity_in_cm_per_s 1e5                %  1 km/sec
151 | GravityConstantInternal  0
152 | 
153 | %------  Factors to tune merit function of time avaraged velocity structure (only ver 1.1)
154 | 
155 | HaloValueRsFac 1.0
156 | HaloValueTsFac 1.0
157 | HaloValueQsFac 1.0
158 | HaloValuePsFac 1.0
159 | 					 
160 | DiskValueRsFac 1.0
161 | DiskValueTsFac 1.0
162 | DiskValueQsFac 1.0
163 | DiskValuePsFac 1.0
164 | 
165 | BulgeValueRsFac 1.0
166 | BulgeValueTsFac 1.0
167 | BulgeValueQsFac 1.0
168 | BulgeValuePsFac 1.0
169 | 


--------------------------------------------------------------------------------
/Model_D1.param:
--------------------------------------------------------------------------------
  1 | 
  2 | %------   File and path names, as well as output file format
  3 | 
  4 | OutputDir       ./Model-D1
  5 | 
  6 | OutputFile      snap    % Base filename of generated sequence of files
  7 | SnapFormat      1       % File format selection
  8 | 
  9 | 
 10 | %------   Basic structural parameters of model
 11 | 
 12 | CC             10.0       % halo concentration
 13 | V200          200.0       % circular velocity v_200 (in km/sec)
 14 | LAMBDA         0.035      % spin parameter          
 15 | MD             0.035      % disk mass fraction      
 16 | MB             0.0        % bulge mass fraction     
 17 | MBH            0.0        % black hole mass fraction. If zero, no black
 18 |                           % hole is generated, otherwise one at the centre
 19 |                           % is added.
 20 |  
 21 | JD             0.035      % disk spin fraction, typically chosen equal to MD
 22 | 
 23 | DiskHeight     0.2        % thickness of stellar disk in units of radial scale length 
 24 | BulgeSize      0.1        % bulge scale length in units of halo scale length 
 25 | 
 26 | HaloStretch    1.0        % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate
 27 | BulgeStretch   1.0        % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate
 28 | 
 29 | %------   Particle numbers in target model
 30 | 
 31 | N_HALO         100000           % desired number of particles in dark halo 
 32 | N_DISK         100000           % desired number of collisionless particles in disk 
 33 | N_BULGE        0                % number of bulge particles 
 34 | 
 35 | 
 36 | 
 37 | %------   Selection of symmetry constraints of velocity structure
 38 | 
 39 | TypeOfHaloVelocityStructure    3      %  0 = spherically symmetric, isotropic
 40 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 41 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 42 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 43 |                                       
 44 | TypeOfDiskVelocityStructure    2      %  0 = spherically symmetric, isotropic
 45 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 46 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 47 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 48 |                                       
 49 | TypeOfBulgeVelocityStructure   0      %  0 = spherically symmetric, isotropic
 50 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 51 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 52 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 53 |                                       
 54 | 
 55 | HaloBetaParameter              0    %  only relevant for TypeOfHaloVelocityStructure=1
 56 | BulgeBetaParameter             0    %  only relevant for TypeOfBulgeVelocityStructure=1
 57 | 
 58 | 
 59 | HaloDispersionRoverZratio      1.0   %  only relevant for TypeOfHaloVelocityStructure=3
 60 | DiskDispersionRoverZratio      1.0   %  only relevant for TypeOfDiskVelocityStructure=3
 61 | BulgeDispersionRoverZratio     1.0   %  only relevant for TypeOfBulgeVelocityStructure=3
 62 | 
 63 | 
 64 | HaloStreamingVelocityParameter     0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 65 | DiskStreamingVelocityParameter     1.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 66 | BulgeStreamingVelocityParameter    0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 67 | 
 68 | 
 69 | %------   Orbit integration accuracy
 70 | 
 71 | TorbitFac                          10.0  % regulates the integration time of orbits
 72 |                                          % (this is of the order of the typical number of orbits per particle)
 73 | TimeStepFactorOrbit                0.01
 74 | TimeStepFactorCellCross            0.25
 75 | 
 76 | 
 77 | %------   Iterative optimization parameters
 78 | 
 79 | FractionToOptimizeIndependendly    0.001
 80 | IndepenentOptimizationsPerStep     100
 81 | StepsBetweenDump                   10
 82 | MaximumNumberOfSteps               100
 83 | 
 84 | MinParticlesPerBinForDispersionMeasurement  100
 85 | MinParticlesPerBinForDensityMeasurement     50  
 86 | 
 87 | 
 88 | %------   Grid dimension and extenstion/resolution
 89 | 
 90 | DG_MaxLevel    7
 91 | EG_MaxLevel    7
 92 | FG_Nbin        256                  % number of bins for the acceleration grid in the R- and z-directions 
 93 | 
 94 | 
 95 | OutermostBinEnclosedMassFraction  0.999   % regulates the fraction of mass of the Hernquist 
 96 |                                           % halo that must be inside the grid (determines grid extension)
 97 | 
 98 | InnermostBinEnclosedMassFraction  0.0000001 % regulates the fraction of mass enclosed by the innermost 
 99 | 				            % bin (regulates size of innermost grid cells)
100 | 
101 | 
102 | 
103 | MaxVelInUnitsVesc                 0.9999  % maximum allowed velocity in units of the local escape velocity
104 | 
105 | 
106 | %------   Construction of target density field
107 | 
108 | SampleDensityFieldForTargetResponse 1               % if set to 1, the code will randomly sample points to construct the density field
109 | SampleParticleCount                 100000000       % number of points sampled for target density field 
110 | 
111 | 
112 | %------   Construction of force field
113 | 
114 | SampleForceNhalo                    0               % number of points to use to for computing force field with a tree 
115 | SampleForceNdisk                    100000000
116 | SampleForceNbulge                   0
117 | 
118 | Softening                           0.05
119 | 
120 | 
121 | %------   Accuracy settings of tree code used in construction of force field
122 | 
123 | TypeOfOpeningCriterion    1
124 | ErrTolTheta               0.4
125 | ErrTolForceAcc            0.0025
126 | 
127 | %------   Domain decomposition parameters used in parallel tree code
128 | 
129 | MultipleDomains 4
130 | TopNodeFactor   4
131 | 
132 | 
133 | %------   Parallel I/O paramaters, only affects writing of galaxy files
134 | 
135 | NumFilesPerSnapshot       1
136 | NumFilesWrittenInParallel 1
137 | 
138 | 
139 | %------   Memory allocation parameters
140 | 
141 | MaxMemSize                3600.0    % in MB
142 | BufferSize                100.0
143 | BufferSizeGravity         100.0
144 | 
145 | 
146 | %------   Specification of internal system of units
147 | 
148 | UnitLength_in_cm         3.085678e21        %  1.0 kpc
149 | UnitMass_in_g            1.989e43           %  1.0e10 solar masses
150 | UnitVelocity_in_cm_per_s 1e5                %  1 km/sec
151 | GravityConstantInternal  0
152 | 
153 | 
154 | %------  Factors to tune merit function of time avaraged velocity structure (only ver 1.1)
155 | 
156 | HaloValueRsFac 1.0
157 | HaloValueTsFac 1.0
158 | HaloValueQsFac 1.0
159 | HaloValuePsFac 1.0
160 | 					 
161 | DiskValueRsFac 1.0
162 | DiskValueTsFac 1.0
163 | DiskValueQsFac 1.0
164 | DiskValuePsFac 1.0
165 | 
166 | BulgeValueRsFac 1.0
167 | BulgeValueTsFac 1.0
168 | BulgeValueQsFac 1.0
169 | BulgeValuePsFac 1.0
170 | 


--------------------------------------------------------------------------------
/Model_H2.param:
--------------------------------------------------------------------------------
  1 | 
  2 | %------   File and path names, as well as output file format
  3 | 
  4 | OutputDir       ./Model-H2
  5 | 
  6 | OutputFile      snap    % Base filename of generated sequence of files
  7 | SnapFormat      1       % File format selection
  8 | 
  9 | 
 10 | 
 11 | %------   Basic structural parameters of model
 12 | 
 13 | CC             10.0       % halo concentration
 14 | V200          200.0       % circular velocity v_200 (in km/sec)
 15 | LAMBDA         0.0        % spin parameter          
 16 | MD             0.0        % disk mass fraction      
 17 | MB             0.0        % bulge mass fraction     
 18 | MBH            0.0        % black hole mass fraction. If zero, no black
 19 |                           % hole is generated, otherwise one at the centre
 20 |                           % is added.
 21 |  
 22 | JD             0.00       % disk spin fraction, typically chosen equal to MD
 23 | 
 24 | DiskHeight     0.2        % thickness of stellar disk in units of radial scale length 
 25 | BulgeSize      0.2        % bulge scale length in units of halo scale length 
 26 | 
 27 | HaloStretch    1.0        % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate
 28 | BulgeStretch   1.0        % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate
 29 | 
 30 | %------   Particle numbers in target model
 31 | 
 32 | N_HALO         100000           % desired number of particles in dark halo 
 33 | N_DISK         0                % desired number of collisionless particles in disk 
 34 | N_BULGE        0                % number of bulge particles 
 35 | 
 36 | 
 37 | 
 38 | %------   Selection of symmetry constraints of velocity structure
 39 | 
 40 | TypeOfHaloVelocityStructure    1      %  0 = spherically symmetric, isotropic
 41 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 42 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 43 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 44 |                                       
 45 | TypeOfDiskVelocityStructure    0      %  0 = spherically symmetric, isotropic
 46 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 47 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 48 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 49 |                                       
 50 | TypeOfBulgeVelocityStructure   0      %  0 = spherically symmetric, isotropic
 51 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 52 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 53 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 54 |                                       
 55 | 
 56 | HaloBetaParameter              0.5   %  only relevant for TypeOfHaloVelocityStructure=1
 57 | BulgeBetaParameter             0     %  only relevant for TypeOfBulgeVelocityStructure=1
 58 | 
 59 | 
 60 | HaloDispersionRoverZratio      1.0   %  only relevant for TypeOfHaloVelocityStructure=3
 61 | DiskDispersionRoverZratio      1.0   %  only relevant for TypeOfDiskVelocityStructure=3
 62 | BulgeDispersionRoverZratio     1.0   %  only relevant for TypeOfBulgeVelocityStructure=3
 63 | 
 64 | 
 65 | HaloStreamingVelocityParameter     0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 66 | DiskStreamingVelocityParameter     1.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 67 | BulgeStreamingVelocityParameter    0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 68 | 
 69 | 
 70 | %------   Orbit integration accuracy
 71 | 
 72 | TorbitFac                          10.0  % regulates the integration time of orbits
 73 |                                          % (this is of the order of the typical number of orbits per particle)
 74 | TimeStepFactorOrbit                0.01
 75 | TimeStepFactorCellCross            0.25
 76 | 
 77 | 
 78 | %------   Iterative optimization parameters
 79 | 
 80 | FractionToOptimizeIndependendly    0.001
 81 | IndepenentOptimizationsPerStep     100
 82 | StepsBetweenDump                   10
 83 | MaximumNumberOfSteps               200
 84 | 
 85 | MinParticlesPerBinForDispersionMeasurement   100
 86 | MinParticlesPerBinForDensityMeasurement      50  
 87 | 
 88 | 
 89 | %------   Grid dimension and extenstion/resolution
 90 | 
 91 | DG_MaxLevel    7
 92 | EG_MaxLevel    7
 93 | FG_Nbin        256                   % number of bins for the acceleration grid in the R- and z-directions 
 94 | 
 95 | 
 96 | OutermostBinEnclosedMassFraction  0.999   % regulates the fraction of mass of the Hernquist 
 97 |                                           % halo that must be inside the grid (determines grid extension)
 98 | 
 99 | InnermostBinEnclosedMassFraction  0.0000001 % regulates the fraction of mass enclosed by the innermost 
100 | 				            % bin (regulates size of innermost grid cells)
101 | 
102 | 
103 | 
104 | MaxVelInUnitsVesc                 0.99  % maximum allowed velocity in units of the local escape velocity
105 | 
106 | %------   Construction of target density field
107 | 
108 | SampleDensityFieldForTargetResponse 1               % if set to 1, the code will randomly sample points to construct the density field
109 | SampleParticleCount                 100000000       % number of points sampled for target density field 
110 | 
111 | 
112 | %------   Construction of force field
113 | 
114 | SampleForceNhalo                    0               % number of points to use to for computing force field with a tree 
115 | SampleForceNdisk                    0
116 | SampleForceNbulge                   0
117 | 
118 | Softening                           0.05
119 | 
120 | 
121 | %------   Accuracy settings of tree code used in construction of force field
122 | 
123 | TypeOfOpeningCriterion    1
124 | ErrTolTheta               0.4
125 | ErrTolForceAcc            0.0025
126 | 
127 | %------   Domain decomposition parameters used in parallel tree code
128 | 
129 | MultipleDomains 4
130 | TopNodeFactor   4
131 | 
132 | 
133 | %------   Parallel I/O paramaters, only affects writing of galaxy files
134 | 
135 | NumFilesPerSnapshot       1
136 | NumFilesWrittenInParallel 1
137 | 
138 | 
139 | %------   Memory allocation parameters
140 | 
141 | MaxMemSize                2300.0    % in MB
142 | BufferSize                100.0
143 | BufferSizeGravity         100.0
144 | 
145 | 
146 | %------   Specification of internal system of units
147 | 
148 | UnitLength_in_cm         3.085678e21        %  1.0 kpc
149 | UnitMass_in_g            1.989e43           %  1.0e10 solar masses
150 | UnitVelocity_in_cm_per_s 1e5                %  1 km/sec
151 | GravityConstantInternal  0
152 | 
153 | 
154 | %------  Factors to tune merit function of time avaraged velocity structure (only ver 1.1)
155 | 
156 | HaloValueRsFac 5.0
157 | HaloValueTsFac 1.0
158 | HaloValueQsFac 1.0
159 | HaloValuePsFac 1.0
160 | 					 
161 | DiskValueRsFac 1.0
162 | DiskValueTsFac 1.0
163 | DiskValueQsFac 1.0
164 | DiskValuePsFac 1.0
165 | 
166 | BulgeValueRsFac 1.0
167 | BulgeValueTsFac 1.0
168 | BulgeValueQsFac 1.0
169 | BulgeValuePsFac 1.0
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/Model_H1.param:
--------------------------------------------------------------------------------
  1 | 
  2 | %------   File and path names, as well as output file format
  3 | 
  4 | OutputDir       ./Model-H1
  5 | 
  6 | 
  7 | OutputFile      snap    % Base filename of generated sequence of files
  8 | SnapFormat      1       % File format selection
  9 | 
 10 | 
 11 | %------   Basic structural parameters of model
 12 | 
 13 | CC             10.0       % halo concentration
 14 | V200          200.0       % circular velocity v_200 (in km/sec)
 15 | LAMBDA         0.0        % spin parameter          
 16 | MD             0.0        % disk mass fraction      
 17 | MB             0.0        % bulge mass fraction     
 18 | MBH            0.0        % black hole mass fraction. If zero, no black
 19 |                           % hole is generated, otherwise one at the centre
 20 |                           % is added.
 21 |  
 22 | JD             0.00       % disk spin fraction, typically chosen equal to MD
 23 | 
 24 | DiskHeight     0.2        % thickness of stellar disk in units of radial scale length 
 25 | BulgeSize      0.2        % bulge scale length in units of disk scale length 
 26 | 
 27 | HaloStretch    1.0        % should be one for a spherical halo, smaller than one corresponds to prolate distortion, otherwise oblate
 28 | BulgeStretch   1.0        % should be one for a spherical bulge, smaller than one corresponds to prolate distortion, otherwise oblate
 29 | 
 30 | %------   Particle numbers in target model
 31 | 
 32 | N_HALO         100000           % desired number of particles in dark halo 
 33 | N_DISK         0                % desired number of collisionless particles in disk 
 34 | N_BULGE        0                % number of bulge particles 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | %------   Selection of symmetry constraints of velocity structure
 40 | 
 41 | TypeOfHaloVelocityStructure    0      %  0 = spherically symmetric, isotropic
 42 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 43 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 44 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 45 |                                       
 46 | TypeOfDiskVelocityStructure    0      %  0 = spherically symmetric, isotropic
 47 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 48 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 49 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 50 |                                       
 51 | TypeOfBulgeVelocityStructure   0      %  0 = spherically symmetric, isotropic
 52 |                                       %  1 = spherically symmetric, anisotropic (with beta parameter specified)
 53 |                                       %  2 = axisymmetric, f(E, Lz), with specified net rotation
 54 |                                       %  3 = axisymmetric, f(E, Lz, I_3), with <vz^2>/<vR^2> specified and net rotation specified
 55 |                                       
 56 | 
 57 | HaloBetaParameter              0    %  only relevant for TypeOfHaloVelocityStructure=1
 58 | BulgeBetaParameter             0    %  only relevant for TypeOfBulgeVelocityStructure=1
 59 | 
 60 | 
 61 | HaloDispersionRoverZratio      1.0   %  only relevant for TypeOfHaloVelocityStructure=3
 62 | DiskDispersionRoverZratio      1.0   %  only relevant for TypeOfDiskVelocityStructure=3
 63 | BulgeDispersionRoverZratio     1.0   %  only relevant for TypeOfBulgeVelocityStructure=3
 64 | 
 65 | 
 66 | HaloStreamingVelocityParameter     0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 67 | DiskStreamingVelocityParameter     1.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 68 | BulgeStreamingVelocityParameter    0.0    % gives the azimuthal streaming velocity in the TypeOf*VelocityStructure=2/3 cases ('k parameter')
 69 | 
 70 | 
 71 | %------   Orbit integration accuracy
 72 | 
 73 | TorbitFac                          10.0  % regulates the integration time of orbits
 74 |                                          % (this is of the order of the typical number of orbits per particle)
 75 | TimeStepFactorOrbit                0.01
 76 | TimeStepFactorCellCross            0.25
 77 | 
 78 | 
 79 | %------   Iterative optimization parameters
 80 | 
 81 | FractionToOptimizeIndependendly    0.001
 82 | IndepenentOptimizationsPerStep     100
 83 | StepsBetweenDump                   10
 84 | MaximumNumberOfSteps               100
 85 | 
 86 | MinParticlesPerBinForDispersionMeasurement 100 
 87 | MinParticlesPerBinForDensityMeasurement    50 
 88 | 
 89 | 
 90 | %------   Grid dimension and extenstion/resolution
 91 | 
 92 | DG_MaxLevel    7
 93 | EG_MaxLevel    7
 94 | FG_Nbin        256                   % number of bins for the acceleration grid in the R- and z-directions 
 95 | 
 96 | 
 97 | OutermostBinEnclosedMassFraction  0.999   % regulates the fraction of mass of the Hernquist 
 98 |                                           % halo that must be inside the grid (determines grid extension)
 99 | 
100 | InnermostBinEnclosedMassFraction  0.0000001 % regulates the fraction of mass enclosed by the innermost 
101 | 				            % bin (regulates size of innermost grid cells)
102 | 
103 | 
104 | 				            
105 | MaxVelInUnitsVesc                 0.99  % maximum allowed velocity in units of the local escape velocity
106 | 
107 | %------   Construction of target density field
108 | 
109 | SampleDensityFieldForTargetResponse 1               % if set to 1, the code will randomly sample points to construct the density field
110 | SampleParticleCount                 100000000       % number of points sampled for target density field 
111 | 
112 | 
113 | %------   Construction of force field
114 | 
115 | SampleForceNhalo                    0               % number of points to use to for computing force field with a tree 
116 | SampleForceNdisk                    0
117 | SampleForceNbulge                   0
118 | 
119 | Softening                           0.05
120 | 
121 | 
122 | %------   Accuracy settings of tree code used in construction of force field
123 | 
124 | TypeOfOpeningCriterion    1
125 | ErrTolTheta               0.4
126 | ErrTolForceAcc            0.0025
127 | 
128 | %------   Domain decomposition parameters used in parallel tree code
129 | 
130 | MultipleDomains 4
131 | TopNodeFactor   4
132 | 
133 | 
134 | %------   Parallel I/O paramaters, only affects writing of galaxy files
135 | 
136 | NumFilesPerSnapshot       1
137 | NumFilesWrittenInParallel 1
138 | 
139 | 
140 | %------   Memory allocation parameters
141 | 
142 | MaxMemSize                2300.0    % in MB
143 | BufferSize                100.0
144 | BufferSizeGravity         100.0
145 | 
146 | 
147 | %------   Specification of internal system of units
148 | 
149 | UnitLength_in_cm         3.085678e21        %  1.0 kpc
150 | UnitMass_in_g            1.989e43           %  1.0e10 solar masses
151 | UnitVelocity_in_cm_per_s 1e5                %  1 km/sec
152 | GravityConstantInternal  0
153 | 
154 | 
155 | 
156 | %------  Factors to tune merit function of time avaraged velocity structure (only ver 1.1)
157 | 
158 | HaloValueRsFac 1.0
159 | HaloValueTsFac 1.0
160 | HaloValueQsFac 1.0
161 | HaloValuePsFac 1.0
162 | 					 
163 | DiskValueRsFac 1.0
164 | DiskValueTsFac 1.0
165 | DiskValueQsFac 1.0
166 | DiskValuePsFac 1.0
167 | 
168 | BulgeValueRsFac 1.0
169 | BulgeValueTsFac 1.0
170 | BulgeValueQsFac 1.0
171 | BulgeValuePsFac 1.0
172 | 


--------------------------------------------------------------------------------
/src/mpi_utils/checksummed_sendrecv.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <gsl/gsl_math.h>
  7 | 
  8 | #include "../allvars.h"
  9 | #include "../proto.h"
 10 | 
 11 | #ifdef MPISENDRECV_CHECKSUM
 12 | 
 13 | #undef MPI_Sendrecv
 14 | 
 15 | 
 16 | int MPI_Check_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
 17 | 		       int dest, int sendtag, void *recvbufreal, int recvcount,
 18 | 		       MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status * status)
 19 | {
 20 |   int checksumtag = 1000, errtag = 2000;
 21 |   int i, iter = 0, err_flag, err_flag_imported, size_sendtype, size_recvtype;
 22 |   long long sendCheckSum, recvCheckSum, importedCheckSum;
 23 |   unsigned char *p, *buf, *recvbuf;
 24 |   char msg[500];
 25 | 
 26 |   if(dest != source)
 27 |     terminate("destination task different from source task");
 28 | 
 29 |   MPI_Type_size(sendtype, &size_sendtype);
 30 |   MPI_Type_size(recvtype, &size_recvtype);
 31 | 
 32 |   if(dest == ThisTask)
 33 |     {
 34 |       memcpy(recvbufreal, sendbuf, recvcount * size_recvtype);
 35 |       return 0;
 36 |     }
 37 | 
 38 | 
 39 |   if(!(buf = mymalloc(recvcount * size_recvtype + 1024)))
 40 |     terminate("not enough memory to allocate the buffer buf");
 41 | 
 42 |   for(i = 0, p = buf; i < recvcount * size_recvtype + 1024; i++)
 43 |     *p++ = 255;
 44 | 
 45 |   recvbuf = buf + 512;
 46 | 
 47 |   MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status);
 48 | 
 49 |   for(i = 0, p = buf; i < 512; i++, p++)
 50 |     {
 51 |       if(*p != 255)
 52 | 	{
 53 | 	  sprintf
 54 | 	    (msg, "MPI-ERROR: Task=%d/%s: Recv occured before recv buffer. message-size=%d from %d, i=%d c=%d\n",
 55 | 	     ThisTask, getenv("HOST"), recvcount, dest, i, *p);
 56 | 	  terminate(msg);
 57 | 	}
 58 |     }
 59 | 
 60 |   for(i = 0, p = recvbuf + recvcount * size_recvtype; i < 512; i++, p++)
 61 |     {
 62 |       if(*p != 255)
 63 | 	{
 64 | 	  sprintf
 65 | 	    (msg, "MPI-ERROR: Task=%d/%s: Recv occured after recv buffer. message-size=%d from %d, i=%d c=%d\n",
 66 | 	     ThisTask, getenv("HOST"), recvcount, dest, i, *p);
 67 | 	  terminate(msg);
 68 | 	}
 69 |     }
 70 | 
 71 | 
 72 |   for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++)
 73 |     sendCheckSum += *p;
 74 | 
 75 |   importedCheckSum = 0;
 76 | 
 77 |   if(dest > ThisTask)
 78 |     {
 79 |       if(sendcount > 0)
 80 | 	MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
 81 |       if(recvcount > 0)
 82 | 	MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
 83 |     }
 84 |   else
 85 |     {
 86 |       if(recvcount > 0)
 87 | 	MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
 88 |       if(sendcount > 0)
 89 | 	MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
 90 |     }
 91 | 
 92 |   checksumtag++;
 93 | 
 94 |   for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount * size_recvtype; i++, p++)
 95 |     recvCheckSum += *p;
 96 | 
 97 | 
 98 |   err_flag = err_flag_imported = 0;
 99 | 
100 |   if(recvCheckSum != importedCheckSum)
101 |     {
102 |       printf
103 | 	("MPI-ERROR: Receive error on task=%d/%s from task=%d, message size=%d, sendcount=%d checksums= %d %d  %d %d. Try to fix it...\n",
104 | 	 ThisTask, getenv("HOST"), source, recvcount, sendcount, (int) (recvCheckSum >> 32),
105 | 	 (int) recvCheckSum, (int) (importedCheckSum >> 32), (int) importedCheckSum);
106 |       myflush(stdout);
107 | 
108 |       err_flag = 1;
109 |     }
110 | 
111 |   if(dest > ThisTask)
112 |     {
113 |       MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
114 |       MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
115 |     }
116 |   else
117 |     {
118 |       MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
119 |       MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
120 |     }
121 |   errtag++;
122 | 
123 |   if(err_flag > 0 || err_flag_imported > 0)
124 |     {
125 |       printf("Task=%d is on %s, wants to send %d and has checksum=%d %d of send data\n",
126 | 	     ThisTask, getenv("HOST"), sendcount, (int) (sendCheckSum >> 32), (int) sendCheckSum);
127 |       myflush(stdout);
128 | 
129 |       do
130 | 	{
131 | 	  sendtag++;
132 | 	  recvtag++;
133 | 
134 | 	  for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++)
135 | 	    *p = 0;
136 | 
137 | 	  if((iter & 1) == 0)
138 | 	    {
139 | 	      if(dest > ThisTask)
140 | 		{
141 | 		  if(sendcount > 0)
142 | 		    MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD);
143 | 		  if(recvcount > 0)
144 | 		    MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status);
145 | 		}
146 | 	      else
147 | 		{
148 | 		  if(recvcount > 0)
149 | 		    MPI_Recv(recvbuf, recvcount, recvtype, dest, recvtag, MPI_COMM_WORLD, status);
150 | 		  if(sendcount > 0)
151 | 		    MPI_Ssend(sendbuf, sendcount, sendtype, dest, sendtag, MPI_COMM_WORLD);
152 | 		}
153 | 	    }
154 | 	  else
155 | 	    {
156 | 	      if(iter > 5)
157 | 		{
158 | 		  printf("we're trying to send each byte now on task=%d (iter=%d)\n", ThisTask, iter);
159 | 		  myflush(stdout);
160 | 		  if(dest > ThisTask)
161 | 		    {
162 | 		      for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++)
163 | 			MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD);
164 | 		      for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++)
165 | 			MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status);
166 | 		    }
167 | 		  else
168 | 		    {
169 | 		      for(i = 0, p = recvbuf; i < recvcount * size_recvtype; i++, p++)
170 | 			MPI_Recv(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD, status);
171 | 		      for(i = 0, p = sendbuf; i < sendcount * size_sendtype; i++, p++)
172 | 			MPI_Ssend(p, 1, MPI_BYTE, dest, i, MPI_COMM_WORLD);
173 | 		    }
174 | 		}
175 | 	      else
176 | 		{
177 | 		  MPI_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status);
178 | 		}
179 | 	    }
180 | 
181 | 	  importedCheckSum = 0;
182 | 
183 | 	  for(i = 0, p = sendbuf, sendCheckSum = 0; i < sendcount * size_sendtype; i++, p++)
184 | 	    sendCheckSum += *p;
185 | 
186 | 	  printf("Task=%d gas send_checksum=%d %d\n", ThisTask, (int) (sendCheckSum >> 32), (int) sendCheckSum);
187 | 	  myflush(stdout);
188 | 
189 | 	  if(dest > ThisTask)
190 | 	    {
191 | 	      if(sendcount > 0)
192 | 		MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
193 | 	      if(recvcount > 0)
194 | 		MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
195 | 	    }
196 | 	  else
197 | 	    {
198 | 	      if(recvcount > 0)
199 | 		MPI_Recv(&importedCheckSum, sizeof(importedCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD, status);
200 | 	      if(sendcount > 0)
201 | 		MPI_Ssend(&sendCheckSum, sizeof(sendCheckSum), MPI_BYTE, dest, checksumtag, MPI_COMM_WORLD);
202 | 	    }
203 | 
204 | 	  for(i = 0, p = recvbuf, recvCheckSum = 0; i < recvcount; i++, p++)
205 | 	    recvCheckSum += *p;
206 | 
207 | 	  err_flag = err_flag_imported = 0;
208 | 
209 | 	  if(recvCheckSum != importedCheckSum)
210 | 	    {
211 | 	      printf
212 | 		("MPI-ERROR: Again (iter=%d) a receive error on task=%d/%s from task=%d, message size=%d, checksums= %d %d  %d %d. Try to fix it...\n",
213 | 		 iter, ThisTask, getenv("HOST"), source, recvcount, (int) (recvCheckSum >> 32),
214 | 		 (int) recvCheckSum, (int) (importedCheckSum >> 32), (int) importedCheckSum);
215 | 	      myflush(stdout);
216 | 	      err_flag = 1;
217 | 	    }
218 | 
219 | 	  if(dest > ThisTask)
220 | 	    {
221 | 	      MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
222 | 	      MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
223 | 	    }
224 | 	  else
225 | 	    {
226 | 	      MPI_Recv(&err_flag_imported, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD, status);
227 | 	      MPI_Ssend(&err_flag, 1, MPI_INT, dest, errtag, MPI_COMM_WORLD);
228 | 	    }
229 | 
230 | 	  if(err_flag == 0 && err_flag_imported == 0)
231 | 	    break;
232 | 
233 | 	  errtag++;
234 | 	  checksumtag++;
235 | 	  iter++;
236 | 	}
237 |       while(iter < 10);
238 | 
239 |       if(iter >= 10)
240 | 	{
241 | 	  char buf[1000];
242 | 	  int length;
243 | 	  FILE *fd;
244 | 
245 | 	  sprintf(buf, "send_data_%d.dat", ThisTask);
246 | 	  fd = fopen(buf, "w");
247 | 	  length = sendcount * size_sendtype;
248 | 	  fwrite(&length, 1, sizeof(int), fd);
249 | 	  fwrite(sendbuf, sendcount, size_sendtype, fd);
250 | 	  fclose(fd);
251 | 
252 | 	  sprintf(buf, "recv_data_%d.dat", ThisTask);
253 | 	  fd = fopen(buf, "w");
254 | 	  length = recvcount * size_recvtype;
255 | 	  fwrite(&length, 1, sizeof(int), fd);
256 | 	  fwrite(recvbuf, recvcount, size_recvtype, fd);
257 | 	  fclose(fd);
258 | 
259 | 	  sprintf(msg, "MPI-ERROR: Even 10 trials proved to be insufficient on task=%d/%s. Stopping\n", ThisTask, getenv("HOST"));
260 |           terminate(msg);
261 | 	}
262 |     }
263 | 
264 |   memcpy(recvbufreal, recvbuf, recvcount * size_recvtype);
265 | 
266 |   myfree(buf);
267 | 
268 |   return 0;
269 | }
270 | 
271 | #endif
272 | 


--------------------------------------------------------------------------------
/src/system.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <strings.h>
  6 | #include <math.h>
  7 | #include <time.h>
  8 | #include <sys/time.h>
  9 | #include <sys/resource.h>
 10 | #include <sys/types.h>
 11 | #include <unistd.h>
 12 | #include <signal.h>
 13 | #include <gsl/gsl_rng.h>
 14 | #include <fenv.h>
 15 | 
 16 | #include "allvars.h"
 17 | #include "proto.h"
 18 | 
 19 | int get_thread_num(void)
 20 | {
 21 | #if (NUM_THREADS > 1) /* This enables OpenMP */
 22 |   return omp_get_thread_num();
 23 | #else
 24 |   return 0;
 25 | #endif
 26 | }
 27 | 
 28 | 
 29 | 
 30 | double dabs(double a)
 31 | {
 32 |   if(a < 0)
 33 |     return -a;
 34 |   else
 35 |     return a;
 36 | }
 37 | 
 38 | double dmax(double a, double b)
 39 | {
 40 |   if(a > b)
 41 |     return a;
 42 |   else
 43 |     return b;
 44 | }
 45 | 
 46 | double dmin(double a, double b)
 47 | {
 48 |   if(a < b)
 49 |     return a;
 50 |   else
 51 |     return b;
 52 | }
 53 | 
 54 | int imax(int a, int b)
 55 | {
 56 |   if(a > b)
 57 |     return a;
 58 |   else
 59 |     return b;
 60 | }
 61 | 
 62 | int imin(int a, int b)
 63 | {
 64 |   if(a < b)
 65 |     return a;
 66 |   else
 67 |     return b;
 68 | }
 69 | 
 70 | 
 71 | #ifdef DEBUG_ENABLE_FPU_EXCEPTIONS
 72 | #include <fenv.h>
 73 | void enable_core_dumps_and_fpu_exceptions(void)
 74 | {
 75 |   /* enable floating point exceptions */
 76 | 
 77 |   extern int feenableexcept(int __excepts);
 78 |   feenableexcept(FE_DIVBYZERO | FE_INVALID);
 79 | 
 80 | 
 81 |   /* set core-dump size to infinity */
 82 |   struct rlimit rlim;
 83 |   getrlimit(RLIMIT_CORE, &rlim);
 84 |   rlim.rlim_cur = RLIM_INFINITY;
 85 |   setrlimit(RLIMIT_CORE, &rlim);
 86 | 
 87 |   /* MPICH catches the signales SIGSEGV, SIGBUS, and SIGFPE....                                                                   
 88 |    * The following statements reset things to the default handlers,                                                               
 89 |    * which will generate a core file.                                                                                             
 90 |    */
 91 |   signal(SIGSEGV, SIG_DFL);
 92 |   signal(SIGBUS, SIG_DFL);
 93 |   signal(SIGFPE, SIG_DFL);
 94 |   signal(SIGINT, SIG_DFL);
 95 | }
 96 | #endif
 97 | 
 98 | 
 99 | 
100 | 
101 | /* returns the number of cpu-ticks in seconds that
102 |  * have elapsed. (or the wall-clock time)
103 |  */
104 | double second(void)
105 | {
106 |   return MPI_Wtime();
107 | 
108 |   /*
109 |    * possible alternative:
110 |    *
111 |    * return ((double) clock()) / CLOCKS_PER_SEC;
112 |    *
113 |    * but note: on AIX and presumably many other 32bit systems,
114 |    * clock() has only a resolution of 10ms=0.01sec
115 |    */
116 | }
117 | 
118 | double measure_time(void)	/* strategy: call this at end of functions to account for time in this function, and before another (nontrivial) function is called */
119 | {
120 |   double t, dt;
121 | 
122 |   t = second();
123 |   dt = t - WallclockTime;
124 |   WallclockTime = t;
125 | 
126 |   return dt;
127 | }
128 | 
129 | /* returns the time difference between two measurements
130 |  * obtained with second(). The routine takes care of the
131 |  * possible overflow of the tick counter on 32bit systems.
132 |  */
133 | double timediff(double t0, double t1)
134 | {
135 |   double dt;
136 | 
137 |   dt = t1 - t0;
138 | 
139 |   if(dt < 0)			/* overflow has occured (for systems with 32bit tick counter) */
140 |     {
141 | #ifdef WALLCLOCK
142 |       dt = 0;
143 | #else
144 |       dt = t1 + pow(2, 32) / CLOCKS_PER_SEC - t0;
145 | #endif
146 |     }
147 | 
148 |   return dt;
149 | }
150 | 
151 | 
152 | 
153 | void minimum_large_ints(int n, long long *src, long long *res)
154 | {
155 |   int i, j;
156 |   long long *numlist;
157 | 
158 |   numlist = (long long *) mymalloc("numlist", NTask * n * sizeof(long long));
159 |   MPI_Allgather(src, n * sizeof(long long), MPI_BYTE, numlist, n * sizeof(long long), MPI_BYTE,
160 | 		MPI_COMM_WORLD);
161 | 
162 |   for(j = 0; j < n; j++)
163 |     res[j] = src[j];
164 | 
165 |   for(i = 0; i < NTask; i++)
166 |     for(j = 0; j < n; j++)
167 |       if(res[j] > numlist[i * n + j])
168 | 	res[j] = numlist[i * n + j];
169 | 
170 |   myfree(numlist);
171 | }
172 | 
173 | 
174 | void sumup_large_ints_comm(int n, int *src, long long *res, MPI_Comm comm)
175 | {
176 |   int i, j, *numlist;
177 |   int ntask;
178 | 
179 |   MPI_Comm_size(comm, &ntask);
180 | 
181 |   numlist = (int *) mymalloc("numlist", ntask * n * sizeof(int));
182 |   MPI_Allgather(src, n, MPI_INT, numlist, n, MPI_INT, comm);
183 | 
184 |   for(j = 0; j < n; j++)
185 |     res[j] = 0;
186 | 
187 |   for(i = 0; i < ntask; i++)
188 |     for(j = 0; j < n; j++)
189 |       res[j] += numlist[i * n + j];
190 | 
191 |   myfree(numlist);
192 | }
193 | 
194 | 
195 | void sumup_large_ints(int n, int *src, long long *res)
196 | {
197 |   sumup_large_ints_comm(n, src, res, MPI_COMM_WORLD);
198 | }
199 | 
200 | void sumup_longs(int n, long long *src, long long *res)
201 | {
202 |   int i, j;
203 |   long long *numlist;
204 | 
205 |   numlist = (long long *) mymalloc("numlist", NTask * n * sizeof(long long));
206 |   MPI_Allgather(src, n * sizeof(long long), MPI_BYTE, numlist, n * sizeof(long long), MPI_BYTE,
207 | 		MPI_COMM_WORLD);
208 | 
209 |   for(j = 0; j < n; j++)
210 |     res[j] = 0;
211 | 
212 |   for(i = 0; i < NTask; i++)
213 |     for(j = 0; j < n; j++)
214 |       res[j] += numlist[i * n + j];
215 | 
216 |   myfree(numlist);
217 | }
218 | 
219 | 
220 | void sumup_floats(int n, float *x, float *res)
221 | {
222 |   int i, j, p;
223 |   float *numlist;
224 | 
225 |   double min_FreeBytes_glob, FreeBytes_local = 1.0 * FreeBytes;
226 |   MPI_Allreduce(&FreeBytes_local, &min_FreeBytes_glob, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
227 | 
228 |   int sum_chunksize = (int) (min_FreeBytes_glob / sizeof(float) / NTask);
229 |   int sum_pieces = n / sum_chunksize;
230 |   int sum_restsize = n % sum_chunksize;
231 | 
232 |   if(sum_chunksize == 0)
233 |     terminate("min_FreeBytes_glob too small - not enough memory for sumup_floats.\n");
234 | 
235 |   for(j = 0; j < n; j++)
236 |     res[j] = 0;
237 | 
238 |   for(p = 0; p < sum_pieces; p++)
239 |     {
240 |       numlist = (float *) mymalloc("numlist", NTask * sum_chunksize * sizeof(float));
241 |       MPI_Allgather(x + p * sum_chunksize, sum_chunksize, MPI_FLOAT, numlist, sum_chunksize, MPI_FLOAT,
242 | 		    MPI_COMM_WORLD);
243 | 
244 |       for(i = 0; i < NTask; i++)
245 | 	for(j = 0; j < sum_chunksize; j++)
246 | 	  res[p * sum_chunksize + j] += numlist[i * sum_chunksize + j];
247 |       myfree(numlist);
248 |     }
249 | 
250 |   if(sum_restsize > 0)
251 |     {
252 |       numlist = (float *) mymalloc("numlist", NTask * sum_restsize * sizeof(float));
253 |       MPI_Allgather(x + sum_pieces * sum_chunksize, sum_restsize, MPI_FLOAT, numlist, sum_restsize, MPI_FLOAT,
254 | 		    MPI_COMM_WORLD);
255 | 
256 |       for(i = 0; i < NTask; i++)
257 | 	for(j = 0; j < sum_restsize; j++)
258 | 	  res[sum_pieces * sum_chunksize + j] += numlist[i * sum_restsize + j];
259 |       myfree(numlist);
260 |     }
261 | }
262 | 
263 | void sumup_doubles(int n, double *x, double *res)
264 | {
265 |   int i, j, p;
266 |   double *numlist;
267 | 
268 |   double min_FreeBytes_glob, FreeBytes_local = 1.0 * FreeBytes;
269 |   MPI_Allreduce(&FreeBytes_local, &min_FreeBytes_glob, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
270 | 
271 |   int sum_chunksize = (int) (min_FreeBytes_glob / sizeof(float) / NTask);
272 |   int sum_pieces = n / sum_chunksize;
273 |   int sum_restsize = n % sum_chunksize;
274 | 
275 |   if(sum_chunksize == 0)
276 |     terminate("min_FreeBytes_glob too small - not enough memory for sumup_doubles.\n");
277 | 
278 |   for(j = 0; j < n; j++)
279 |     res[j] = 0;
280 | 
281 |   for(p = 0; p < sum_pieces; p++)
282 |     {
283 |       numlist = (double *) mymalloc("numlist", NTask * sum_chunksize * sizeof(double));
284 |       MPI_Allgather(x + p * sum_chunksize, sum_chunksize, MPI_DOUBLE, numlist, sum_chunksize, MPI_DOUBLE,
285 | 		    MPI_COMM_WORLD);
286 | 
287 |       for(i = 0; i < NTask; i++)
288 | 	for(j = 0; j < sum_chunksize; j++)
289 | 	  res[p * sum_chunksize + j] += numlist[i * sum_chunksize + j];
290 |       myfree(numlist);
291 |     }
292 | 
293 |   if(sum_restsize > 0)
294 |     {
295 |       numlist = (double *) mymalloc("numlist", NTask * sum_restsize * sizeof(double));
296 |       MPI_Allgather(x + sum_pieces * sum_chunksize, sum_restsize, MPI_DOUBLE, numlist, sum_restsize,
297 | 		    MPI_DOUBLE, MPI_COMM_WORLD);
298 | 
299 |       for(i = 0; i < NTask; i++)
300 | 	for(j = 0; j < sum_restsize; j++)
301 | 	  res[sum_pieces * sum_chunksize + j] += numlist[i * sum_restsize + j];
302 |       myfree(numlist);
303 |     }
304 | }
305 | 
306 | 
307 | size_t sizemax(size_t a, size_t b)
308 | {
309 |   if(a < b)
310 |     return b;
311 |   else
312 |     return a;
313 | }
314 | 
315 | 
316 | /* The following function is part of the GNU C Library.
317 |    Contributed by Torbjorn Granlund (tege@sics.se)
318 |  */
319 | /* Find the first bit set in the argument  */
320 | int my_ffsll(long long int i)
321 | {
322 |   unsigned long long int x = i & -i;
323 |   if(x <= 0xffffffff)
324 |     return ffs(i);
325 |   else
326 |     return 32 + ffs(i >> 32);
327 | }
328 | 
329 | double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *))
330 | {
331 |   double t0, t1;
332 | 
333 |   t0 = second();
334 | 
335 |   qsort(base, nel, width, compar);
336 | 
337 |   t1 = second();
338 | 
339 |   return timediff(t0, t1);
340 | }
341 | 
342 | 


--------------------------------------------------------------------------------
/src/proto.h:
--------------------------------------------------------------------------------
  1 | #ifndef PROTO_H
  2 | #define PROTO_H
  3 | 
  4 | #include "allvars.h"
  5 | #include "forcetree/forcetree.h"
  6 | 
  7 | #include <math.h>
  8 | #include <stdlib.h>
  9 | 
 10 | #ifdef HAVE_HDF5
 11 | #include <hdf5.h>
 12 | #endif
 13 | 
 14 | int cmp_P_Rnd(const void *a, const void *b);
 15 | void shuffle_energies(int iter);
 16 | double parallel_sort(void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *));
 17 | double parallel_sort_comm(void *base, size_t nmemb, size_t size, int (*compar) (const void *, const void *), MPI_Comm comm);
 18 | void smooth_stack(double *data, int maxlevel);
 19 | double calc_stack_difference(double *d1, double *d2, int l, int i, int j, int maxlevel, double *ref1, double *ref2, double thresh, double *dist, int flag);
 20 | 
 21 | #ifdef VER_1_1
 22 | double calc_stack_difference_mod(double *d1, double *d2, int l, int i, int j, int maxlevel, double *ref1, double *ref2, double thresh, double *dist, int flag);
 23 | double calc_stack_sum(	double *ref, double *thr, int l, int i, int j, int maxlevel, double thresh, double *dist ); 
 24 | #endif
 25 | 
 26 | 
 27 | double calc_stack_difference_used(double *d1, double *d2, int l, int i, int j, int maxlevel, 
 28 | 				  double *ref1, double *ref2, double *used1, double *used2,
 29 | 				  double thresh, int flag);
 30 | 
 31 | double eval_smoothed_stack(double *din, int l, int i, int j, int maxlevel, double *ref, double thresh);
 32 | void calc_smoothed_stack(double *din, double *dout, int maxlevel, double *ref, double thresh);
 33 | 
 34 | double integrate_axisymmetric_jeans(double zstart, double zend, double R, int type);
 35 | 
 36 | double h_factor(double R, double z, int type);
 37 | double get_beta_of_type(double *pos, int type);
 38 | 
 39 | void free_allocated_memory(void);
 40 | void force_test(void);
 41 | void forcegrid_get_cell(double *pos, int *iR, int *iz, double *fR, double *fz);
 42 | 
 43 | double halo_get_potential(double *pos);
 44 | void halo_get_acceleration(double *pos, double *acc);
 45 | void halo_get_fresh_coordinate(double *pos);
 46 | double halo_generate_v(double rad);
 47 | double halo_get_potential_from_radius(double r);
 48 | double halo_get_density(double *pos);
 49 | double halo_get_mass_inside_radius(double r);
 50 | double halo_get_escape_speed(double *pos);
 51 | double halo_get_sigma2(double *pos);
 52 | 
 53 | void disk_get_fresh_coordinate(double *pos);
 54 | double disk_get_density(double *pos);
 55 | double disk_get_mass_inside_radius(double R);
 56 | 
 57 | double bugle_get_mass_inside_radius(double r);
 58 | void bulge_get_fresh_coordinate(double *pos);
 59 | double bulge_get_density(double *pos);
 60 | double bulge_get_mass_inside_radius(double r);
 61 | double bulge_get_escape_speed(double *pos);
 62 | double bulge_get_potential(double *pos);
 63 | double bulge_get_potential_from_radius(double r);
 64 | void bulge_get_acceleration(double *pos, double *acc);
 65 | double bulge_get_escape_speed(double *pos);
 66 | void output_rotcurve(void);
 67 | 
 68 | void densitygrid_sample_targetresponse(void);
 69 | void enable_core_dumps_and_fpu_exceptions(void);
 70 | 
 71 | double h_over_R(double R, double z, int type);
 72 | 
 73 | 
 74 | void line_search(void);
 75 | void calc_energy_grid_mass_maps(void);
 76 | void energygrid_get_cell(double *pos, int *iR, int *iz, double *fR, double *fz);
 77 | void calc_disp_components_for_particle(int n, double *v, double *vr2, double *vt2, double *vp2, double *vq2);
 78 | 
 79 | void structure_determination(void);
 80 | double structure_disk_angmomentum(void);
 81 | double structure_gc(double c);
 82 | 
 83 | double eval_fit(int n, double *vel, double *newdens, double *olddens);
 84 | #ifdef VER_1_1
 85 | double eval_fit_mod(int n, double *vel, double *newdens, double *olddens, double *egyROrbit_new, double *egyROrbit_old, 
 86 | 																								  double *egyTOrbit_new, double *egyTOrbit_old,
 87 | 																								  double *egyQOrbit_new, double *egyQOrbit_old,
 88 | 																								  double *egyPOrbit_new, double *egyPOrbit_old );
 89 | #endif
 90 | 
 91 | double goldensection_search(int n, double ekin_a, double ekin_b, double ekin_c, double f_a, double f_b, double f_c, double *dir, double *egy, double *fnew, int *count);
 92 | double eval_fit_anisotropy(int, double alpha, double v, double *rad, double *perp);
 93 | void optimize(int n);
 94 | //void optimize_std(int n);
 95 | void free_all_response_fields(void);
 96 | void calc_all_response_fields(void);
 97 | void optimize_some_particles(void);
 98 | 
 99 | void forcegrid_allocate(void);
100 | double forcegrid_get_potential(double *pos);
101 | void forcegrid_get_acceleration(double *pos, double *acc);
102 | double forcegrid_get_escape_speed(double *pos);
103 | 
104 | void forcedensitygrid_create(void);
105 | void forcedensitygrid_calculate(void);
106 | 
107 | void densitygrid_allocate(void);
108 | void densitygrid_get_cell(double *pos, int *iR, int *iz, double *fR, double *fz);
109 | void forcedensitygrid_load(void);
110 | void forcedensitygrid_save(void);
111 | 
112 | void commit_updates(void);
113 | void init_updates(void);
114 | void calc_global_fit(void);
115 | 
116 | 
117 | void energygrid_allocate(void);
118 | 
119 | 
120 | void reorient_particle_velocities(int iter);
121 | void update_velocities(int iter);
122 | void initialize_particles(void);
123 | 
124 | double get_density_of_type(double *pos, int type);
125 | double get_vstream(double *pos, int type);
126 | double get_z_disp_cylindrical(double *pos, int type);
127 | double get_radial_disp_spherical(double *pos, int type);
128 | void get_disp_rtp(double *pos, int type, double *disp_r, double *disp_t, double *disp_p, double *disp_q);
129 | double get_r_disp_tilted(double *pos, int type);
130 | double get_theta_disp_tilted(double *pos, int type);
131 | double get_phi_disp(double *pos, int type);
132 | 
133 | void calculate_dispfield(void);
134 | void calc_all_response_fields_and_gradients(void);
135 | void log_message(int iter);
136 | void calc_response_dispersion(void);
137 | void allocate_memory(void);
138 | void output_toomre_Q(void);
139 | void add_to_energy_grid(double *pos, double mass, double vr2, double vt2, double vp2, double vq2,
140 |                         double *egyMass, double *egyResponse_r, double *egyResponse_t, double *egyResponse_p, double *egyResponse_q);
141 | 
142 | double produce_orbit_response_field(double *pos, double *vel, int id, double *mfield, double mass, double timespan, int *orbitstaken);
143 | #ifdef VER_1_1
144 | double produce_orbit_response_field_mod(double *pos, double *vel, int id, double *mfield, double *egyfield_r, double *egyfield_t, double *egyfield_q, double *egyfield_p, double mass, double timespan, int *orbitstaken, int type);
145 | #endif
146 | 
147 | void init(void);
148 | void set_units(void);
149 | void endrun(void);
150 | void output_compile_time_options(void);
151 | void set_softenings(void);
152 | 
153 | void read_parameter_file(char *fname);
154 | 
155 | void mpi_printf(const char *fmt, ...);
156 | size_t my_fread(void *ptr, size_t size, size_t nmemb, FILE * stream);
157 | size_t my_fwrite(void *ptr, size_t size, size_t nmemb, FILE * stream);
158 | void write_file(char *fname, int writeTask, int lastTask);
159 | void get_dataset_name(enum iofields blocknr, char *buf);
160 | void get_Tab_IO_Label(enum iofields blocknr, char *label);
161 | int blockpresent(enum iofields blocknr, int write);
162 | int get_particles_in_block(enum iofields blocknr, int *typelist);
163 | int get_values_per_blockelement(enum iofields blocknr);
164 | int get_datatype_in_block(enum iofields blocknr);
165 | int get_bytes_per_blockelement(enum iofields blocknr, int mode);
166 | void fill_write_buffer(enum iofields blocknr, int *startindex, int pc, int type);
167 | void output_particles(int iter);
168 | void output_density_field(int iter);
169 | void distribute_file(int nfiles, int firstfile, int firsttask, int lasttask, int *filenr, int *master, int *last);
170 | 
171 | 
172 | void *mymalloc_fullinfo(const char *varname, size_t n, const char *func, const char *file, int linenr);
173 | void *mymalloc_movable_fullinfo(void *ptr, const char *varname, size_t n, const char *func, const char *file,
174 |                                 int line);
175 | 
176 | void *myrealloc_fullinfo(void *p, size_t n, const char *func, const char *file, int line);
177 | void *myrealloc_movable_fullinfo(void *p, size_t n, const char *func, const char *file, int line);
178 | 
179 | void myfree_fullinfo(void *p, const char *func, const char *file, int line);
180 | void myfree_movable_fullinfo(void *p, const char *func, const char *file, int line);
181 | int dump_memory_table_buffer(char *p);
182 | void mymalloc_init(void);
183 | 
184 | int permutation_compare(const void *a, const void *b);
185 | 
186 | double dabs(double a);
187 | double dmax(double a, double b);
188 | double dmin(double a, double b);
189 | int imax(int a, int b);
190 | int imin(int a, int b);
191 | int get_part_count_this_task(int n);
192 | size_t sizemax(size_t a, size_t b);
193 | int my_ffsll(long long int i);
194 | void reorder_particles(int *Id);
195 | void gravity(void);
196 | 
197 | double second(void);
198 | void sumup_large_ints(int n, int *src, long long *res);
199 | void sumup_longs(int n, long long *src, long long *res);
200 | double timediff(double t0, double t1);
201 | 
202 | int get_thread_num(void);
203 | peanokey peano_hilbert_key(int x, int y, int z, int bits);
204 | void peano_hilbert_order(void);
205 | void peano_hilbert_key_inverse(peanokey key, int bits, int *x, int *y, int *z);
206 | double mysort(void *base, size_t nel, size_t width, int (*compar) (const void *, const void *));
207 | 
208 | #endif
209 | 
210 | 


--------------------------------------------------------------------------------
/src/set_particles.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <gsl/gsl_math.h>
  7 | #include <gsl/gsl_rng.h>
  8 | #include <gsl/gsl_randist.h>
  9 | 
 10 | #include "allvars.h"
 11 | #include "proto.h"
 12 | 
 13 | 
 14 | 
 15 | void initialize_particles(void) {
 16 | 	
 17 | 	int n, i, k;
 18 | 	double phi, theta, vr;
 19 | 	double vsum2 = 0, rsum2 = 0, vsum2_exact = 0;
 20 | 	int count_r[6], count_t[6], count_p[6], count_q[6];
 21 | 	int tot_count_r[6], tot_count_t[6], tot_count_p[6], tot_count_q[6];
 22 | 
 23 | 	int nhalo = get_part_count_this_task(All.Halo_N);
 24 | 	int ndisk = get_part_count_this_task(All.Disk_N);
 25 | 	int nbulge = get_part_count_this_task(All.Bulge_N);
 26 | 
 27 | 	NumPart = nhalo + ndisk + nbulge;
 28 | 
 29 | 	MPI_Allreduce(&NumPart, &All.MaxPart, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
 30 | 	sumup_large_ints(1, &NumPart, &All.TotNumPart);
 31 | 
 32 | 	P = (struct particle_data *) mymalloc_movable(&P, "P", All.MaxPart * sizeof(struct particle_data));
 33 | 	memset(P, 0, All.MaxPart * sizeof(struct particle_data));
 34 | 
 35 | 	permutation = (struct permutation_data *) mymalloc_movable(&permutation, "permutation", All.MaxPart * sizeof(struct permutation_data));
 36 | 
 37 | 	n = 0;
 38 | 
 39 | 	for(i = 0; i < 6; i++)
 40 | 		count_r[i] = count_t[i] = count_p[i] = count_q[i] = 0;
 41 | 
 42 | 	for(i = 0; i < nhalo; i++, n++) {
 43 | 		P[n].Type = 1;
 44 | 		P[n].Mass = All.Halo_Mass / All.Halo_N;
 45 | 	}
 46 | 
 47 | 	for(i = 0; i < ndisk; i++, n++) {
 48 | 		P[n].Type = 2;
 49 | 		P[n].Mass = All.Disk_Mass / All.Disk_N;
 50 | 	}
 51 | 
 52 | 	for(i = 0; i < nbulge; i++, n++) {
 53 | 		P[n].Type = 3;
 54 | 		P[n].Mass = All.Bulge_Mass / All.Bulge_N;
 55 | 	}
 56 | 
 57 | 	int *nlist = mymalloc("nlist", NTask * sizeof(int));
 58 | 	MPI_Allgather(&NumPart, 1, MPI_INT, nlist, 1, MPI_INT, MPI_COMM_WORLD);
 59 | 	int nbefore = 0;
 60 | 	for(i = 0; i < ThisTask; i++)
 61 | 		nbefore += nlist[i];
 62 | 	myfree(nlist);
 63 | 
 64 | 	for(n = 0; n < NumPart; n++)
 65 | 		P[n].ID = nbefore + n + 1;
 66 | 
 67 | 	for(n = 0; n < NumPart; n++) {
 68 | 		
 69 | 		if(P[n].Type == 1)
 70 | 			halo_get_fresh_coordinate ( P[n].Pos );	// a halo particle
 71 | 		else if(P[n].Type == 2)
 72 | 			disk_get_fresh_coordinate(P[n].Pos);		// disk particle 
 73 | 		else if(P[n].Type == 3)
 74 | 			bulge_get_fresh_coordinate(P[n].Pos);		// disk particle 
 75 | 
 76 | 		double _r = sqrt(P[n].Pos[0] * P[n].Pos[0] + P[n].Pos[1] * P[n].Pos[1] + P[n].Pos[2] * P[n].Pos[2]);
 77 | 		
 78 | 		
 79 |       P[n].Vesc = forcegrid_get_escape_speed(P[n].Pos);
 80 | 
 81 |       double acc[3];
 82 |       forcegrid_get_acceleration(P[n].Pos, acc);
 83 | 		
 84 |       double a = sqrt(acc[0] * acc[0] + acc[1] * acc[1] + acc[2] * acc[2]);
 85 |       double r = sqrt(P[n].Pos[0] * P[n].Pos[0] + P[n].Pos[1] * P[n].Pos[1] + P[n].Pos[2] * P[n].Pos[2]);
 86 | 
 87 |       P[n].Tint = All.TorbitFac * 2 * M_PI * r / sqrt(r * a);
 88 | 
 89 |       P[n].RecalcFlag = 1;
 90 | 
 91 | 
 92 | 		if(P[n].Type == 1) {
 93 | 			
 94 | 			// generate a realization in VelTheo[] with the exact spherically symmetric, isotropic Hernquist distribution function, for comparison 
 95 | 		
 96 | 			do {
 97 | 			
 98 | 				vr = halo_generate_v(r);
 99 | 			
100 | 			} while(vr >= All.MaxVelInUnitsVesc * P[n].Vesc);
101 | 
102 | 			// isotropic velocity distribution 
103 | 
104 | 			phi = gsl_rng_uniform(random_generator) * M_PI * 2;
105 | 			theta = acos(gsl_rng_uniform(random_generator) * 2 - 1);
106 | 
107 | 			P[n].VelTheo[0] = vr * sin(theta) * cos(phi);
108 | 			P[n].VelTheo[1] = vr * sin(theta) * sin(phi);
109 | 			P[n].VelTheo[2] = vr * cos(theta);
110 | 	  
111 | 			vsum2_exact += vr * vr;
112 | 			rsum2 += r * r;
113 | 		}
114 | 
115 | 
116 | 		// generate an initial guess for the velocities 
117 | 		// let's pick the Jeans moment for this, and use a Gaussian 
118 | 
119 | 		int typeOfVelocityStructure = 0;
120 | 
121 | 		if(P[n].Type == 1) // a halo particle 
122 | 			typeOfVelocityStructure = All.TypeOfHaloVelocityStructure;
123 | 		else if(P[n].Type == 2) // disk 
124 | 			typeOfVelocityStructure = All.TypeOfDiskVelocityStructure;
125 | 		else if(P[n].Type == 3) // bulge 
126 | 			typeOfVelocityStructure = All.TypeOfBulgeVelocityStructure;
127 | 		else
128 | 			terminate("unknown type");
129 | 
130 | 		double disp_r = 0, disp_t = 0, disp_p = 0, disp_q = 0;
131 | 
132 | 		get_disp_rtp(P[n].Pos, P[n].Type, &disp_r, &disp_t, &disp_p, &disp_q);
133 | 
134 | 		//disp_r = disp_t = disp_p = disp_q = halo_get_sigma2(P[n].Pos);
135 | 
136 | 				
137 | 		if(disp_r <= All.LowerDispLimit) {
138 | 			count_r[P[n].Type]++;
139 | 			disp_r = All.LowerDispLimit;
140 | 		}
141 | 
142 | 		if (disp_t <= All.LowerDispLimit) {
143 | 			count_t[P[n].Type]++;
144 | 			disp_t = All.LowerDispLimit;
145 | 		}
146 | 
147 | 		if (disp_p <= All.LowerDispLimit) {
148 | 			count_p[P[n].Type]++;
149 | 			disp_p = All.LowerDispLimit;
150 | 		}
151 | 
152 | 		if(disp_q <= All.LowerDispLimit) {
153 | 			count_q[P[n].Type]++;
154 | 			disp_q = All.LowerDispLimit;
155 | 		}
156 | 		
157 | 
158 |       P[n].vr2_target = disp_r;
159 |       P[n].vt2_target = disp_t;
160 |       P[n].vp2_target = disp_p;
161 |       P[n].vq2_target = disp_q;
162 | 
163 |       double vstr = get_vstream(P[n].Pos, P[n].Type);
164 | 
165 | 		// spherical case 
166 | 		if(typeOfVelocityStructure == 0 || typeOfVelocityStructure == 1 || typeOfVelocityStructure == 3) {
167 | 			
168 | 			double sigmaR = sqrt(disp_r);
169 | 			double sigmaT = sqrt(disp_t);
170 | 			double sigmaP = sqrt(disp_p);
171 | 			double v, vr, vphi, vtheta;
172 | 
173 | 			// draw three Gaussians with the relevant dispersions 
174 | 			do {
175 | 				
176 | 				vr = gsl_ran_gaussian(random_generator, sigmaR);
177 | 				vtheta = gsl_ran_gaussian(random_generator, sigmaT);
178 | 				vphi = gsl_ran_gaussian(random_generator, sigmaP);
179 | 
180 | 				vphi += vstr;
181 | 
182 | 				v = sqrt(vr * vr + vphi * vphi + vtheta * vtheta);
183 | 				
184 | 			} while ( All.MaxVelInUnitsVesc * P[n].Vesc < v );
185 | 
186 | 			
187 | 			double phi = atan2(P[n].Pos[1], P[n].Pos[0]);
188 | 			double theta = acos(P[n].Pos[2] / sqrt(P[n].Pos[0] * P[n].Pos[0] + P[n].Pos[1] * P[n].Pos[1] + P[n].Pos[2] * P[n].Pos[2]));
189 | 			double er[3], ePhi[3], eTheta[3];
190 | 
191 | 			er[0] = sin(theta) * cos(phi);
192 | 			er[1] = sin(theta) * sin(phi);
193 | 			er[2] = cos(theta);
194 | 
195 | 			ePhi[0] = -sin(phi);
196 | 			ePhi[1] = cos(phi);
197 | 			ePhi[2] = 0;
198 | 
199 | 			eTheta[0] = -cos(theta) * cos(phi);
200 | 			eTheta[1] = -cos(theta) * sin(phi);
201 | 			eTheta[2] = sin(theta);
202 | 			
203 | 			
204 | 			for(k = 0; k < 3; k++) {
205 | 				//P[n].Vel[k] = P[n].VelTheo[k];
206 | 				
207 | 				P[n].Vel[k] = vr * er[k] + vphi * ePhi[k] + vtheta * eTheta[k];
208 | 				//double vesc = halo_get_escape_speed(P[n].Pos);
209 | 				//printf("%g %g\n", P[n].Vesc, vesc); 
210 | 			}
211 | 			
212 | 			/*
213 | 			P[n].Vel[0] = vr; 
214 | 			P[n].Vel[1] = vtheta; 
215 | 			P[n].Vel[2] = vphi; 
216 | 			*/
217 | 			
218 | 		// axisymmetric case, f(E,Lz), with net rotation 
219 | 		} else if(typeOfVelocityStructure == 2) {
220 | 			
221 | 			double sigmaR = sqrt(disp_r);
222 | 			double sigmaT = sqrt(disp_t);
223 | 			double sigmaP = sqrt(disp_p);
224 | 			double v, vR, vphi, vz;
225 | 
226 | 			// draw three Gaussians with the relevant dispersions
227 | 			do {
228 | 				
229 | 				vR = gsl_ran_gaussian(random_generator, sigmaR);
230 | 				vz = gsl_ran_gaussian(random_generator, sigmaT);
231 | 				vphi = gsl_ran_gaussian(random_generator, sigmaP);
232 | 
233 | 				vphi += vstr;
234 | 
235 | 				v = sqrt(vR * vR + vphi * vphi + vz * vz);
236 | 				
237 | 			} while ( v >= All.MaxVelInUnitsVesc * P[n].Vesc );
238 | 			
239 | 			phi = atan2(P[n].Pos[1], P[n].Pos[0]);
240 | 
241 | 			double eR[3], ePhi[3], eZ[3];
242 | 
243 | 			eR[0] = cos(phi);
244 | 			eR[1] = sin(phi);
245 | 			eR[2] = 0;
246 | 
247 | 			ePhi[0] = -sin(phi);
248 | 			ePhi[1] = cos(phi);
249 | 			ePhi[2] = 0;
250 | 
251 | 			eZ[0] = 0;
252 | 			eZ[1] = 0;
253 | 			eZ[2] = 1;
254 | 
255 | 			for(k = 0; k < 3; k++)
256 | 				P[n].Vel[k] = vR * eR[k] + vphi * ePhi[k] + vz * eZ[k];
257 | 		}
258 | 
259 |       vsum2 += P[n].Vel[0] * P[n].Vel[0] + P[n].Vel[1] * P[n].Vel[1] + P[n].Vel[2] * P[n].Vel[2];
260 | 	}
261 | 
262 | 	MPI_Allreduce(count_r, tot_count_r, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
263 | 	MPI_Allreduce(count_t, tot_count_t, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
264 | 	MPI_Allreduce(count_p, tot_count_p, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
265 | 	MPI_Allreduce(count_q, tot_count_q, 6, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
266 | 
267 | 	int type;
268 | 	for(type = 1; type <= 3; type++) {
269 | 		
270 | 		if(NType[type] == 0) continue;
271 | 
272 | 		double frac_r = ((double)tot_count_r[type]) / NType[type]; 
273 | 		double frac_t = ((double)tot_count_t[type]) / NType[type]; 
274 | 		double frac_p = ((double)tot_count_p[type]) / NType[type]; 
275 | 		double frac_q = ((double)tot_count_q[type]) / NType[type];
276 | 
277 | 		mpi_printf("Type=%d:  fractions of particles with problematic low velocity dispersion: (r/R|t/z|phi/tot_phi) = (%g|%g|%g|%g)\n", type, frac_r, frac_t, frac_p, frac_q);
278 | 
279 | 		if(frac_r > 0.05 || frac_t > 0.05 || frac_p > 0.05 || frac_q > 0.05) {
280 | 			mpi_printf("\nwe better stop, because there appears to be no valid velocity structure for this configuration.\n\n");
281 | 			endrun();
282 | 		}
283 | 		
284 | 	}
285 | 
286 | 
287 | 	if(ThisTask == 0)
288 |       for (type = 1; type <= 3; type++) {
289 |           
290 | 			if(NType[type] == 0) continue;
291 | 
292 | 			char buf[2000];
293 | 			sprintf(buf, "%s/fit_%d.txt", All.OutputDir, type);
294 | 			if(!(FdFit[type] = fopen(buf, "w")))
295 |             terminate("can't open file '%s'", buf);
296 | 		}
297 | 		
298 | 	for(n = 0; n < NumPart; n++) {
299 |       permutation[n].rnd = gsl_rng_uniform(random_generator);
300 |       permutation[n].index = n;
301 | 	}
302 | 
303 | 	qsort(permutation, NumPart, sizeof(struct permutation_data), permutation_compare);
304 | 
305 | 	//output_toomre_Q();
306 | 	//output_rotcurve();
307 | }
308 | 
309 | int permutation_compare(const void *a, const void *b) {
310 | 	
311 |   if(((struct permutation_data *) a)->rnd < (((struct permutation_data *) b)->rnd)) return -1;
312 | 
313 |   if(((struct permutation_data *) a)->rnd > (((struct permutation_data *) b)->rnd)) return +1;
314 | 
315 |   return 0;
316 |   
317 | }
318 | 
319 | 
320 | int get_part_count_this_task(int n){
321 | 	
322 |   int avg = (n - 1) / NTask + 1;
323 |   int exc = NTask * avg - n;
324 |   int tasklastsection = NTask - exc;
325 | 
326 |   if(ThisTask < tasklastsection)
327 |     return avg;
328 |   else
329 |     return avg - 1;
330 | }
331 | 
332 | 
333 | void output_toomre_Q(void)
334 | {
335 |   if(ThisTask == 0 && NType[2] > 0)
336 |     {
337 |       double pos[3], R, acc[3], R2, acc2[3], R1, acc1[3];
338 |       double disp_r, disp_t, disp_p, disp_q;
339 |       char buf[1000];
340 |       int j, n = 500;
341 | 
342 |       double Rmax = 5.0 * All.Disk_H;
343 | 
344 |       sprintf(buf, "%s/toomreQ.txt", All.OutputDir);
345 |       FILE *fd = fopen(buf, "w");
346 |       fprintf(fd, "%d\n", n);
347 | 
348 |       for(j = 0; j < n; j++)
349 |         {
350 |           R = (Rmax / n) * (j + 0.5);
351 | 
352 |           pos[0] = R;
353 |           pos[1] = 0;
354 |           pos[2] = 0;
355 |           forcegrid_get_acceleration(pos, acc);
356 |           double dphiDR = -acc[0];
357 | 
358 |           R2 = R + 0.05 * R;
359 |           R1 = R - 0.05 * R;
360 | 
361 |           pos[0] = R2;
362 |           forcegrid_get_acceleration(pos, acc2);
363 |           pos[0] = R1;
364 |           forcegrid_get_acceleration(pos, acc1);
365 | 
366 |           double d2phiDR2 = (-acc2[0] - (-acc1[0])) / (R2 - R1);
367 | 
368 |           double kappa2 = d2phiDR2 + 3.0 / R * dphiDR;
369 | 
370 |           if(kappa2 < 0)
371 |             terminate("kappa2 = %g", kappa2);
372 | 
373 |           double kappa = sqrt(kappa2);
374 | 
375 |           pos[0] = R;
376 |           pos[1] = 0;
377 |           pos[2] = 0;
378 |           get_disp_rtp(pos, 2, &disp_r, &disp_t, &disp_p, &disp_q);
379 | 
380 |           double sigmaR = sqrt(disp_r);
381 | 
382 |           double sigma_star = All.Disk_Mass / (2 * M_PI * All.Disk_H * All.Disk_H) * exp(-R / All.Disk_H);
383 | 
384 |           double Q = sigmaR * kappa / (3.36 * All.G * sigma_star);
385 | 
386 |           fprintf(fd, "%g %g\n", R, Q);
387 |         }
388 |       fclose(fd);
389 |     }
390 | }
391 | 
392 | 
393 | void output_rotcurve(void)
394 | {
395 |   if(ThisTask == 0)
396 |     {
397 |       double pos[3], R, acc[3];
398 |       char buf[1000];
399 |       int j, n = 5000;
400 | 
401 |       double Rmax = All.R200;
402 | 
403 |       sprintf(buf, "%s/rotcurve.txt", All.OutputDir);
404 |       FILE *fd = fopen(buf, "w");
405 |       fprintf(fd, "%d\n", n);
406 | 
407 |       double vc2_tot, vc2_dm, vc2_disk, vc2_bulge;
408 | 
409 |       for(j = 0; j < n; j++)
410 |         {
411 |           R = (Rmax / n) * (j + 0.5);
412 | 
413 |           pos[0] = R;
414 |           pos[1] = 0;
415 |           pos[2] = 0;
416 |           forcegrid_get_acceleration(pos, acc);
417 |           vc2_tot = fabs(R * acc[0]);
418 | 
419 |           if(All.Bulge_Mass > 0)
420 |             {
421 |               bulge_get_acceleration(pos, acc);
422 |               vc2_bulge = fabs(R * acc[0]);
423 |             }
424 |           else
425 |             vc2_bulge = 0;
426 | 
427 |           if(All.Halo_Mass > 0)
428 |             {
429 |               halo_get_acceleration(pos, acc);
430 |               vc2_dm = fabs(R * acc[0]);
431 |             }
432 |           else
433 |             vc2_dm = 0;
434 | 
435 |           vc2_disk = vc2_tot - vc2_dm - vc2_bulge;
436 |           if(vc2_disk < 0)
437 |             vc2_disk = 0;
438 | 
439 |           fprintf(fd, "%g   %g   %g %g %g\n", R, sqrt(vc2_tot), sqrt(vc2_dm), sqrt(vc2_disk), sqrt(vc2_bulge));
440 |         }
441 |       fclose(fd);
442 |     }
443 | }
444 | 


--------------------------------------------------------------------------------
/src/forcetree/forcetree_optimizebalance.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <time.h>
  7 | 
  8 | #include "allvars.h"
  9 | #include "proto.h"
 10 | #include "domain.h"
 11 | #include "pqueue.h"
 12 | 
 13 | 
 14 | 
 15 | static struct force_segments_data
 16 | {
 17 |   int start, end, task;
 18 |   double work, cost, count, normalized_load;
 19 | }
 20 |  *force_domainAssign;
 21 | 
 22 | 
 23 | int force_sort_load(const void *a, const void *b)
 24 | {
 25 |   if(((struct force_segments_data *) a)->normalized_load > (((struct force_segments_data *) b)->normalized_load))
 26 |     return -1;
 27 | 
 28 |   if(((struct force_segments_data *) a)->normalized_load < (((struct force_segments_data *) b)->normalized_load))
 29 |     return +1;
 30 | 
 31 |   return 0;
 32 | }
 33 | 
 34 | /* mode structure for priority queues */
 35 | typedef struct node_t
 36 | {
 37 |   double pri;
 38 |   int val;
 39 |   size_t pos;
 40 | } node_t;
 41 | 
 42 | 
 43 | /* define call back functions for priority queues */
 44 | static int cmp_pri(double next, double curr)
 45 | {
 46 |   return (next > curr);
 47 | }
 48 | 
 49 | static double get_pri(void *a)
 50 | {
 51 |   return (double) ((node_t *) a)->pri;
 52 | }
 53 | 
 54 | static void set_pri(void *a, double pri)
 55 | {
 56 |   ((node_t *) a)->pri = pri;
 57 | }
 58 | 
 59 | static size_t get_pos(void *a)
 60 | {
 61 |   return ((node_t *) a)->pos;
 62 | }
 63 | 
 64 | static void set_pos(void *a, size_t pos)
 65 | {
 66 |   ((node_t *) a)->pos = pos;
 67 | }
 68 | 
 69 | 
 70 | static double oldmax, oldsum;
 71 | 
 72 | double force_get_current_balance(double *impact)
 73 | {
 74 | #ifndef NO_MPI_IN_PLACE
 75 |   MPI_Allreduce(MPI_IN_PLACE, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 76 | #else
 77 |   double *inTaskCost = mymalloc("inTaskCost", NTask * sizeof(double));;
 78 |   memcpy(inTaskCost, TaskCost, NTask * sizeof(double));
 79 |   MPI_Allreduce(inTaskCost, TaskCost, NTask, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 80 |   myfree(inTaskCost);
 81 | #endif
 82 | 
 83 |   int i;
 84 |   for(i = 0, oldmax = oldsum = 0; i < NTask; i++)
 85 |     {
 86 |       oldsum += TaskCost[i];
 87 |       if(oldmax < TaskCost[i])
 88 | 	oldmax = TaskCost[i];
 89 |     }
 90 | 
 91 |   *impact = 1.0 + domain_full_weight[All.HighestActiveTimeBin] * (oldmax - oldsum / NTask) / All.TotGravCost;
 92 | 
 93 |   return oldmax / (oldsum / NTask);
 94 | }
 95 | 
 96 | void force_get_global_cost_for_leavenodes(int nexport)
 97 | {
 98 |   int i, j, n, nimport, idx, task, ngrp;
 99 | 
100 |   struct node_data
101 |   {
102 |     double domainCost;
103 |     int   domainCount;
104 |     int   no;
105 |   }
106 |   *export_node_data, *import_node_data;
107 | 
108 |   MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD);
109 | 
110 |   for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
111 |     {
112 |       nimport += Recv_count[j];
113 |       if(j > 0)
114 |         {
115 |           Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
116 |           Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
117 |         }
118 |     }
119 | 
120 |   for(j = 0; j < NTask; j++)
121 |     Send_count[j] = 0;
122 | 
123 |   export_node_data = mymalloc("export_node_data", nexport * sizeof(struct node_data));
124 |   import_node_data = mymalloc("import_node_data", nimport * sizeof(struct node_data));
125 | 
126 |   for(i=0; i < nexport; i++)
127 |     {
128 |       int task = ListNoData[i].task;
129 |       int ind = Send_offset[task] + Send_count[task]++;
130 | 
131 |       export_node_data[ind].domainCost =  ListNoData[i].domainCost;
132 |       export_node_data[ind].domainCount =  ListNoData[i].domainCount;
133 |       export_node_data[ind].no =  ListNoData[i].no;
134 |     }
135 | 
136 |   for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
137 |     {
138 |       int recvTask = ThisTask ^ ngrp;
139 |       if(recvTask < NTask)
140 |         if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
141 |           MPI_Sendrecv(&export_node_data[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct node_data), MPI_BYTE,
142 |                        recvTask, TAG_DENS_B, &import_node_data[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct node_data),
143 |                        MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
144 |     }
145 | 
146 |   for(i=0; i < nimport; i++)
147 |     {
148 |       int no = import_node_data[i].no;
149 |       DomainCost[no] +=  import_node_data[i].domainCost;
150 |       DomainCount[no] +=  import_node_data[i].domainCount;
151 |     }
152 | 
153 |   myfree(import_node_data);
154 |   myfree(export_node_data);
155 | 
156 | 
157 |   /* now share the cost data across all processors */
158 | 
159 |   struct DomainNODE
160 |   {
161 |     double domainCost;
162 |     int domainCount;
163 |   }
164 |    *DomainMoment, *loc_DomainMoment;
165 | 
166 |   DomainMoment = (struct DomainNODE *) mymalloc("DomainMoment", NTopleaves * sizeof(struct DomainNODE));
167 | 
168 |   /* share the cost data accross CPUs */
169 |   int *recvcounts = (int *) mymalloc("recvcounts", sizeof(int) * NTask);
170 |   int *recvoffset = (int *) mymalloc("recvoffset", sizeof(int) * NTask);
171 |   int *bytecounts = (int *) mymalloc("bytecounts", sizeof(int) * NTask);
172 |   int *byteoffset = (int *) mymalloc("byteoffset", sizeof(int) * NTask);
173 | 
174 |   for(task = 0; task < NTask; task++)
175 |     recvcounts[task] = 0;
176 | 
177 |   for(n = 0; n < NTopleaves; n++)
178 |     recvcounts[DomainTask[n]]++;
179 | 
180 |   for(task = 0; task < NTask; task++)
181 |     bytecounts[task] = recvcounts[task] * sizeof(struct DomainNODE);
182 | 
183 |   for(task = 1, recvoffset[0] = 0, byteoffset[0] = 0; task < NTask; task++)
184 |     {
185 |       recvoffset[task] = recvoffset[task - 1] + recvcounts[task - 1];
186 |       byteoffset[task] = byteoffset[task - 1] + bytecounts[task - 1];
187 |     }
188 | 
189 |   loc_DomainMoment = (struct DomainNODE *) mymalloc("loc_DomainMoment", recvcounts[ThisTask] * sizeof(struct DomainNODE));
190 | 
191 |   for(n = 0, idx = 0; n < NTopleaves; n++)
192 |     {
193 |       if(DomainTask[n] == ThisTask)
194 |         {
195 |           loc_DomainMoment[idx].domainCost = DomainCost[n];
196 |           loc_DomainMoment[idx].domainCount = DomainCount[n];
197 |           idx++;
198 |         }
199 |     }
200 | 
201 |   MPI_Allgatherv(loc_DomainMoment, bytecounts[ThisTask], MPI_BYTE, DomainMoment, bytecounts, byteoffset, MPI_BYTE, MPI_COMM_WORLD);
202 | 
203 |   for(task = 0; task < NTask; task++)
204 |     recvcounts[task] = 0;
205 | 
206 |   for(n = 0; n < NTopleaves; n++)
207 |     {
208 |       task = DomainTask[n];
209 |       if(task != ThisTask)
210 |          {
211 |            idx = recvoffset[task] + recvcounts[task]++;
212 | 
213 |            DomainCost[n] = DomainMoment[idx].domainCost;
214 |            DomainCount[n] = DomainMoment[idx].domainCount;
215 |          }
216 |      }
217 | 
218 |    myfree(loc_DomainMoment);
219 |    myfree(byteoffset);
220 |    myfree(bytecounts);
221 |    myfree(recvoffset);
222 |    myfree(recvcounts);
223 |    myfree(DomainMoment);
224 | }
225 | 
226 | 
227 | 
228 | void force_optimize_domain_mapping(void)
229 | {
230 |   int i, j;
231 | 
232 |   double fac_cost = 0.5 / oldsum;
233 |   double fac_count = 0.5 / All.TotNumPart;
234 | 
235 | 
236 |   int ncpu = NTask * All.MultipleDomains;
237 |   int ndomain = NTopleaves;
238 |   double workavg = 1.0 / ncpu;
239 |   double workhalfnode = 0.5 / NTopleaves;
240 |   double work_before = 0;
241 |   double workavg_before = 0;
242 | 
243 |   int start = 0;
244 | 
245 |   force_domainAssign = mymalloc("force_domainAssign", ncpu * sizeof(struct force_segments_data));
246 | 
247 |   for(i = 0; i < ncpu; i++)
248 |     {
249 |       double work = 0, cost = 0, count = 0;
250 |       int end = start;
251 | 
252 |       cost += fac_cost * DomainCost[end];
253 |       count += fac_count * DomainCount[end];
254 |       work += fac_cost * DomainCost[end] + fac_count * DomainCount[end];
255 | 
256 |       while((work + work_before + (end + 1 < NTopleaves ? fac_cost * DomainCost[end + 1] + fac_count * DomainCount[end + 1] : 0) <
257 | 	     workavg + workavg_before + workhalfnode) || (i == ncpu - 1 && end < ndomain - 1))
258 | 	{
259 | 	  if((ndomain - end) > (ncpu - i))
260 | 	    end++;
261 | 	  else
262 | 	    break;
263 | 
264 | 	  cost += fac_cost * DomainCost[end];
265 | 	  count += fac_count * DomainCount[end];
266 | 	  work += fac_cost * DomainCost[end] + fac_count * DomainCount[end];
267 | 	}
268 | 
269 |       force_domainAssign[i].start = start;
270 |       force_domainAssign[i].end = end;
271 |       force_domainAssign[i].work = work;
272 |       force_domainAssign[i].cost = cost;
273 |       force_domainAssign[i].count = count;
274 | 
275 |       force_domainAssign[i].normalized_load = cost + count;	/* note: they are already multiplied by fac_cost/fac_count */
276 | 
277 |       work_before += work;
278 |       workavg_before += workavg;
279 |       start = end + 1;
280 |     }
281 | 
282 |   qsort(force_domainAssign, ncpu, sizeof(struct force_segments_data), force_sort_load);
283 | 
284 | 
285 |   /* create three priority queues, one for the cost load, one for the particle count, and one for the combined cost */
286 |   pqueue_t *queue_cost = pqueue_init(NTask, cmp_pri, get_pri, set_pri, get_pos, set_pos);
287 |   node_t *ncost = mymalloc("ncost", NTask * sizeof(node_t));
288 |   pqueue_t *queue_count = pqueue_init(NTask, cmp_pri, get_pri, set_pri, get_pos, set_pos);
289 |   node_t *ncount = mymalloc("ncount", NTask * sizeof(node_t));
290 |   pqueue_t *queue_combi = pqueue_init(NTask, cmp_pri, get_pri, set_pri, get_pos, set_pos);
291 |   node_t *ncombi = mymalloc("ncombi", NTask * sizeof(node_t));
292 | 
293 |   /* fill in all the tasks into the queue. The priority will be the current cost/count, the tag 'val' is used to label the task */
294 |   for(i = 0; i < NTask; i++)
295 |     {
296 |       ncost[i].pri = 0;
297 |       ncost[i].val = i;
298 |       pqueue_insert(queue_cost, &ncost[i]);
299 | 
300 |       ncount[i].pri = 0;
301 |       ncount[i].val = i;
302 |       pqueue_insert(queue_count, &ncount[i]);
303 | 
304 |       ncombi[i].pri = 0;
305 |       ncombi[i].val = i;
306 |       pqueue_insert(queue_combi, &ncombi[i]);
307 |     }
308 | 
309 |   double max_load = 0;
310 |   double max_cost = 0;
311 | 
312 |   for(i = 0; i < ncpu; i++)
313 |     {
314 |       /* pick the least work-loaded target from the queue, and the least particle-loaded, and then decide which choice
315 |          gives the smallest load overall */
316 |       double cost, load;
317 | 
318 |       node_t *node_cost = pqueue_peek(queue_cost);
319 |       node_t *node_count = pqueue_peek(queue_count);
320 |       node_t *node_combi = pqueue_peek(queue_combi);
321 | 
322 |       int targetA = node_cost->val;
323 |       int targetB = node_count->val;
324 |       int targetC = node_combi->val;
325 | 
326 |       cost = ncost[targetA].pri + force_domainAssign[i].cost;
327 |       load = ncount[targetA].pri + force_domainAssign[i].count;
328 |       if(cost < max_cost)
329 | 	cost = max_cost;
330 |       if(load < max_load)
331 | 	load = max_load;
332 |       double workA = cost + load;
333 | 
334 |       cost = ncost[targetB].pri + force_domainAssign[i].cost;
335 |       load = ncount[targetB].pri + force_domainAssign[i].count;
336 |       if(cost < max_cost)
337 | 	cost = max_cost;
338 |       if(load < max_load)
339 | 	load = max_load;
340 |       double workB = cost + load;
341 | 
342 |       cost = ncost[targetC].pri + force_domainAssign[i].cost;
343 |       load = ncount[targetC].pri + force_domainAssign[i].count;
344 |       if(cost < max_cost)
345 | 	cost = max_cost;
346 |       if(load < max_load)
347 | 	load = max_load;
348 |       double workC = cost + load;
349 | 
350 | 
351 |       int target;
352 | 
353 |       if(workA < workB && workA < workC)
354 | 	target = targetA;
355 |       else if(workC < workB)
356 | 	target = targetC;
357 |       else
358 | 	target = targetB;
359 | 
360 |       force_domainAssign[i].task = target;
361 | 
362 |       cost = ncost[target].pri + force_domainAssign[i].cost;
363 |       load = ncount[target].pri + force_domainAssign[i].count;
364 | 
365 |       pqueue_change_priority(queue_cost, cost, &ncost[target]);
366 |       pqueue_change_priority(queue_count, load, &ncount[target]);
367 |       pqueue_change_priority(queue_combi, cost + load, &ncombi[target]);
368 | 
369 |       if(max_cost < cost)
370 | 	max_cost = cost;
371 | 
372 |       if(max_load < load)
373 | 	max_load = load;
374 |     }
375 | 
376 |   /* free queue again */
377 |   myfree(ncombi);
378 |   pqueue_free(queue_combi);
379 |   myfree(ncount);
380 |   pqueue_free(queue_count);
381 |   myfree(ncost);
382 |   pqueue_free(queue_cost);
383 | 
384 |   for(i = 0; i < ncpu; i++)
385 |     for(j = force_domainAssign[i].start; j <= force_domainAssign[i].end; j++)
386 |       DomainNewTask[j] = force_domainAssign[i].task;
387 | 
388 | 
389 |   myfree(force_domainAssign);
390 | 
391 |   for(i = 0; i < NTask; i++)
392 |     {
393 |       TaskCost[i] = 0;
394 |       TaskCount[i] = 0;
395 |     }
396 | 
397 |   for(i = 0; i < NTopleaves; i++)
398 |     {
399 |       TaskCost[DomainNewTask[i]] += DomainCost[i];
400 |       TaskCount[DomainNewTask[i]] += DomainCount[i];
401 |     }
402 | 
403 |   double max, sum, maxload, sumload;
404 |   for(i = 0, max = sum = 0, maxload = sumload = 0; i < NTask; i++)
405 |     {
406 |       sum += TaskCost[i];
407 |       if(max < TaskCost[i])
408 | 	max = TaskCost[i];
409 |       sumload += TaskCount[i];
410 |       if(maxload < TaskCount[i])
411 | 	maxload = TaskCount[i];
412 |     }
413 | 
414 |   mpi_printf("FORCETREE: Active-TimeBin=%d  [unoptimized work-balance=%g]  new work-balance=%g, new load-balance=%g\n",
415 | 	     All.HighestActiveTimeBin, oldmax / (oldsum / NTask), max / (sum / NTask), maxload / (sumload / NTask));
416 | 
417 |   if((max / (sum / NTask) > oldmax / (oldsum / NTask)) || (maxload > All.MaxPart))
418 |     {
419 |       mpi_printf("FORCETREE: The work-load is either worse than before or the memory-balance is not viable. We keep the old distribution.\n");
420 |       memcpy(DomainNewTask, DomainTask, NTopleaves * sizeof(int));
421 |     }
422 | }
423 | 


--------------------------------------------------------------------------------
/src/orbit_response.c:
--------------------------------------------------------------------------------
  1 | #include <mpi.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <gsl/gsl_math.h>
  7 | #include <gsl/gsl_rng.h>
  8 | #include <gsl/gsl_randist.h>
  9 | 
 10 | 
 11 | #include "allvars.h"
 12 | #include "proto.h"
 13 | 
 14 | 
 15 | /* returns the acceleration at coordinate pos[] */
 16 | double get_timestep(double *pos, double *vel, double *acc, int icell)
 17 | {
 18 |   // double r = sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]);
 19 |   double v = sqrt(vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]);
 20 |   double aa = sqrt(acc[0] * acc[0] + acc[1] * acc[1] + acc[2] * acc[2]);
 21 | 
 22 |   double torbit = All.V200 / aa;
 23 |   double tcross = DG_CellSize[icell] / v;
 24 | 
 25 |   return dmin(All.TimeStepFactorOrbit * torbit, All.TimeStepFactorCellCross * tcross);
 26 | }
 27 | 
 28 | 
 29 | 
 30 | /* calculate the density response of a single particle starting from pos[]/vel[], averaged over time 'timespan'. If timespan=0, the routine
 31 |  * determines an appropriate time itself.
 32 |  */
 33 | double produce_orbit_response_field( double *pos, double *vel, int id, double *mfield, double mass,
 34 | 												 double timespan, int *orbitstaken ) {
 35 | 	
 36 | 	int i, norbit, icell, flag = 0, iR, iz;
 37 | 	double x[3], v[3], a[3], dt, tall, radsign_previous = 0, radsign, fR, fz;
 38 | 
 39 | 	for(i = 0; i < 3; i++) {
 40 | 		x[i] = pos[i];
 41 | 		v[i] = vel[i];
 42 | 	}
 43 | 
 44 | 	for(i = 0; i < DG_Ngrid; i++)
 45 | 		mfield[i] = 0;
 46 | 
 47 | 	norbit = 0;
 48 | 	tall = 0;
 49 | 
 50 | 
 51 | 	forcegrid_get_acceleration(x, a);
 52 | 
 53 | 	densitygrid_get_cell(x, &iR, &iz, &fR, &fz);
 54 | 	icell = iz * DG_Nbin + iR;
 55 | 
 56 | 	int Norbits = 100000000;
 57 | 
 58 | 	double E0 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x);
 59 | 	int steps = 0;
 60 | 
 61 | 	do {
 62 | 		
 63 | 		dt = get_timestep(x, v, a, icell);
 64 | 
 65 | 		if (0 < timespan)
 66 | 		if (timespan < dt + tall) {
 67 | 			dt = timespan - tall;
 68 | 			flag = 1;
 69 | 		}
 70 | 
 71 | 		mfield[iz * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (1 - fz);
 72 | 		mfield[iz * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (1 - fz);
 73 | 		mfield[(iz + 1) * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (fz);
 74 | 		mfield[(iz + 1) * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (fz);
 75 | 
 76 | 		/*
 77 | 		
 78 | 		insertion place
 79 | 		
 80 | 		*/
 81 | 		
 82 | 		for(i = 0; i < 3; i++)
 83 | 			v[i] += 0.5 * dt * a[i];
 84 | 
 85 | 		for(i = 0; i < 3; i++)
 86 | 			x[i] += dt * v[i];
 87 | 
 88 | 		forcegrid_get_acceleration(x, a);
 89 | 
 90 | 		for(i = 0; i < 3; i++)
 91 | 			v[i] += 0.5 * dt * a[i];
 92 | 
 93 | 		densitygrid_get_cell(x, &iR, &iz, &fR, &fz);
 94 | 		icell = iz * DG_Nbin + iR;
 95 | 
 96 | 		mfield[iz * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (1 - fz);
 97 | 		mfield[iz * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (1 - fz);
 98 | 		mfield[(iz + 1) * DG_Nbin + iR] += 0.5 * dt * (1 - fR) * (fz);
 99 | 		mfield[(iz + 1) * DG_Nbin + (iR + 1)] += 0.5 * dt * (fR) * (fz);
100 | 
101 | 		
102 | 		/*
103 | 		
104 | 		insertion place
105 | 		
106 | 		*/
107 | 		
108 | 		tall += dt;
109 | 
110 | 		radsign = v[0] * x[0] + v[1] * x[1] + v[2] * x[2];
111 | 
112 | 		if(radsign > 0 && radsign_previous < 0)
113 | 			norbit++;
114 | 
115 | 		radsign_previous = radsign;
116 | 
117 | 		steps++;
118 | 		if(steps > 100000000) {
119 | 			printf("too many steps...  pos=(%g|%g|%g)  vel=(%g|%g|%g)  dt=%g\n",
120 | 			pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], dt);
121 | 			double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x);
122 | 			printf("steps=%d:  rel error = %g\n", steps, fabs(E1 - E0) / fabs(E0));
123 | 			exit(1);
124 | 		}
125 | 		
126 | 	} while ((timespan == 0 && norbit < Norbits) || (timespan != 0 && flag == 0));
127 | 
128 | 	double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x);
129 | 
130 | 	double rel_egy_error = fabs((E1 - E0) / E0);
131 | 
132 | 	if(rel_egy_error > 0.5) {
133 |       mpi_printf("relative energy error= %g  orbits=%d   steps=%d  pos(=%g|%g|%g) vel=(%g|%g|%g)\n", rel_egy_error, norbit, steps,
134 | 		 pos[0], pos[1], pos[2], vel[0], vel[1], vel[2]);
135 |       /*
136 | 		terminate("error seems large, we better stop:  pos=(%g|%g|%g)  vel=(%g|%g|%g) id=%d  v=%g  vesc=%g",
137 | 		pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], id, 
138 | 		sqrt(vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]), 
139 | 		forcegrid_get_escape_speed(pos));
140 |        */
141 | 	}
142 | 
143 | 	double fac = mass / tall;
144 | 
145 | 	for(i = 0; i < DG_Ngrid; i++)
146 | 		mfield[i] *= fac;
147 | 
148 | 	*orbitstaken = norbit;
149 | 
150 | 	return tall;
151 | }
152 | 
153 | 
154 | #ifdef VER_1_1
155 | double produce_orbit_response_field_mod( double *pos, double *vel, int id, 
156 | 													  double *mfield, double *egyfield_r, double *egyfield_t, double *egyfield_q, double *egyfield_p,
157 | 													  double mass, double timespan, int *orbitstaken, int type ) {
158 | 	
159 | 	int typeOfVelocityStructure = 0;
160 | 
161 |   if(type == 1)               /* a halo particle */
162 |     typeOfVelocityStructure = All.TypeOfHaloVelocityStructure;
163 |   else if(type == 2)          /* disk */
164 |     typeOfVelocityStructure = All.TypeOfDiskVelocityStructure;
165 |   else if(type == 3)          /* bulge */
166 |     typeOfVelocityStructure = All.TypeOfBulgeVelocityStructure;
167 |   else
168 |     terminate("unknown type");
169 | 	
170 | 	int i, norbit, icell, flag = 0, iR, iz;
171 | 	double x[3], v[3], a[3], dt, tall, radsign_previous = 0, radsign, fR, fz;
172 | 	double r2, v_dot_x, vr2;
173 | 	double Z[] = {0,0,-1};
174 | 	double T[3], Q[3];
175 | 	double q, q2, vq, vq2, v_dot_Q;
176 | 	double t2, vt2, v_dot_T;
177 | 	double vstr, vp2;
178 | 	int irz[2][2];
179 | 	double m[2][2];
180 | 		
181 | 	for(i = 0; i < 3; i++) {
182 | 		x[i] = pos[i];
183 | 		v[i] = vel[i];
184 | 	}
185 | 
186 | 	for(i = 0; i < DG_Ngrid; i++)
187 | 		mfield[i] = 0;
188 | 
189 | 	for(i = 0; i < EG_Ngrid; i++) {
190 | 		egyfield_r[i] = 0;
191 | 		egyfield_t[i] = 0;
192 | 		egyfield_q[i] = 0;
193 | 		egyfield_p[i] = 0;		
194 | 	}
195 | 	
196 | 	
197 | 	norbit = 0;
198 | 	tall = 0;
199 | 
200 | 
201 | 	forcegrid_get_acceleration(x, a);
202 | 
203 | 	densitygrid_get_cell(x, &iR, &iz, &fR, &fz);
204 | 	icell = iz * DG_Nbin + iR;
205 | 
206 | 	int Norbits = 100000000;
207 | 
208 | 	double E0 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x);
209 | 	int steps = 0;
210 | 
211 | 	do {
212 | 		
213 | 		dt = get_timestep(x, v, a, icell);
214 | 
215 | 		if (0 < timespan)
216 | 		if (timespan < dt + tall) {
217 | 			dt = timespan - tall;
218 | 			flag = 1;
219 | 		}
220 | 
221 | 		
222 | 		if(typeOfVelocityStructure == 2) {
223 | 
224 | 			// radial
225 | 			r2 = x[0]*x[0] + x[1]*x[1];
226 | 			v_dot_x = v[0]*x[0] + v[1]*x[1];
227 | 			vr2 = v_dot_x * v_dot_x / r2;
228 | 
229 | 			// phi
230 | 			Q[0] = -x[1];
231 | 			Q[1] =  x[0];
232 | 			q2 = Q[0]*Q[0] + Q[1]*Q[1];
233 | 			v_dot_Q = v[0]*Q[0] + v[1]*Q[1];
234 | 			q = sqrt(q2);
235 | 			vq = v_dot_Q / q;
236 | 			vq2 = vq*vq;
237 | 			
238 | 			// phi - vstr
239 | 			vstr = get_vstream(x, type);
240 | 			vp2 = (vq-vstr)*(vq-vstr);	
241 | 					
242 | 			// theta
243 | 			vt2 = v[2]*v[2];
244 | 				
245 | 		} else {
246 | 		
247 | 			// radial
248 | 			r2 = x[0] * x[0] + x[1] * x[1] + x[2] * x[2];
249 | 			v_dot_x = v[0] * x[0] + v[1] * x[1] + v[2] * x[2];
250 | 			vr2 = v_dot_x * v_dot_x / r2;
251 | 
252 | 			// phi
253 | 			Q[0] = x[1]*Z[2] - x[2]*Z[1];
254 | 			Q[1] = x[2]*Z[0] - x[0]*Z[2];
255 | 			Q[2] = x[0]*Z[1] - x[1]*Z[0];
256 | 			q2 = Q[0]*Q[0] + Q[1]*Q[1] + Q[2]*Q[2];
257 | 			v_dot_Q = v[0]*Q[0] + v[1]*Q[1] + v[2]*Q[2];
258 | 			q = sqrt(q2);
259 | 			vq = v_dot_Q / q;
260 | 			vq2 = vq*vq;
261 | 			
262 | 			// phi - vstr
263 | 			vstr = get_vstream(x, type);
264 | 			vp2 = (vq-vstr)*(vq-vstr);
265 | 			
266 | 			// theta
267 | 			T[0] = x[1]*Q[2] - x[2]*Q[1];
268 | 			T[1] = x[2]*Q[0] - x[0]*Q[2];
269 | 			T[2] = x[0]*Q[1] - x[1]*Q[0];
270 | 			t2 = T[0]*T[0] + T[1]*T[1] + T[2]*T[2];
271 | 			v_dot_T = v[0]*T[0] + v[1]*T[1] + v[2]*T[2];
272 | 			vt2 = v_dot_T * v_dot_T / t2;
273 | 		}
274 | 		
275 | 		
276 | 		// mass
277 | 		m[0][0] = 0.5 * dt * (1 - fR) * (1 - fz);
278 | 		m[1][0] = 0.5 * dt * (fR) * (1 - fz);
279 | 		m[0][1] = 0.5 * dt * (1 - fR) * (fz);
280 | 		m[1][1] = 0.5 * dt * (fR) * (fz);
281 | 
282 | 		
283 | 		irz[0][0] = iz * DG_Nbin + iR;
284 | 		irz[1][0] = iz * DG_Nbin + (iR + 1);
285 | 		irz[0][1] = (iz + 1) * DG_Nbin + iR;
286 | 		irz[1][1] = (iz + 1) * DG_Nbin + (iR + 1);
287 | 		
288 | 		
289 | 		// m
290 | 		mfield[irz[0][0]] += m[0][0];
291 | 		mfield[irz[1][0]] += m[1][0];
292 | 		mfield[irz[0][1]] += m[0][1];
293 | 		mfield[irz[1][1]] += m[1][1];
294 | 		
295 | 		// mvr2
296 | 		egyfield_r[irz[0][0]] += m[0][0] * vr2;
297 | 		egyfield_r[irz[1][0]] += m[1][0] * vr2;
298 | 		egyfield_r[irz[0][1]] += m[0][1] * vr2;
299 | 		egyfield_r[irz[1][1]] += m[1][1] * vr2;
300 | 
301 | 		// mvt2
302 | 		egyfield_t[irz[0][0]] += m[0][0] * vt2;
303 | 		egyfield_t[irz[1][0]] += m[1][0] * vt2;
304 | 		egyfield_t[irz[0][1]] += m[0][1] * vt2;
305 | 		egyfield_t[irz[1][1]] += m[1][1] * vt2;
306 | 
307 | 		
308 | 		// mvq2 (2nd-moment)
309 | 		egyfield_q[irz[0][0]] += m[0][0] * vq2;
310 | 		egyfield_q[irz[1][0]] += m[1][0] * vq2;
311 | 		egyfield_q[irz[0][1]] += m[0][1] * vq2;
312 | 		egyfield_q[irz[1][1]] += m[1][1] * vq2;		
313 | 		
314 | 		
315 | 		// mvp2 (dispersion)
316 | 		egyfield_p[irz[0][0]] += m[0][0] * vp2;
317 | 		egyfield_p[irz[1][0]] += m[1][0] * vp2;
318 | 		egyfield_p[irz[0][1]] += m[0][1] * vp2;
319 | 		egyfield_p[irz[1][1]] += m[1][1] * vp2;		
320 | 		
321 | 		
322 | 		
323 | 		for(i = 0; i < 3; i++)
324 | 			v[i] += 0.5 * dt * a[i];
325 | 
326 | 		for(i = 0; i < 3; i++)
327 | 			x[i] += dt * v[i];
328 | 
329 | 		forcegrid_get_acceleration(x, a);
330 | 
331 | 		for(i = 0; i < 3; i++)
332 | 			v[i] += 0.5 * dt * a[i];
333 | 
334 | 		densitygrid_get_cell(x, &iR, &iz, &fR, &fz);
335 | 		icell = iz * DG_Nbin + iR;
336 | 
337 | 		
338 | 		if(typeOfVelocityStructure == 2) {
339 | 
340 | 			// radial
341 | 			r2 = x[0]*x[0] + x[1]*x[1];
342 | 			v_dot_x = v[0]*x[0] + v[1]*x[1];
343 | 			vr2 = v_dot_x * v_dot_x / r2;
344 | 
345 | 			// phi
346 | 			Q[0] = -x[1];
347 | 			Q[1] =  x[0];
348 | 			q2 = Q[0]*Q[0] + Q[1]*Q[1];
349 | 			v_dot_Q = v[0]*Q[0] + v[1]*Q[1];
350 | 			q = sqrt(q2);
351 | 			vq = v_dot_Q / q;
352 | 			vq2 = vq*vq;
353 | 			
354 | 			// phi - vstr
355 | 			vstr = get_vstream(x, type);
356 | 			vp2 = (vq-vstr)*(vq-vstr);	
357 | 					
358 | 			// theta
359 | 			vt2 = v[2]*v[2];
360 | 				
361 | 		} else {
362 | 		
363 | 			r2 = x[0] * x[0] + x[1] * x[1] + x[2] * x[2];
364 | 			v_dot_x = v[0] * x[0] + v[1] * x[1] + v[2] * x[2];
365 | 			vr2 = v_dot_x * v_dot_x / r2;
366 | 			
367 | 			
368 | 			// phi
369 | 			Q[0] = x[1]*Z[2] - x[2]*Z[1];
370 | 			Q[1] = x[2]*Z[0] - x[0]*Z[2];
371 | 			Q[2] = x[0]*Z[1] - x[1]*Z[0];
372 | 			q2 = Q[0]*Q[0] + Q[1]*Q[1] + Q[2]*Q[2];
373 | 			v_dot_Q = v[0]*Q[0] + v[1]*Q[1] + v[2]*Q[2];
374 | 			q = sqrt(q2);
375 | 			vq = v_dot_Q / q;
376 | 			vq2 = vq*vq;
377 | 			
378 | 			// phi - vstr
379 | 			vstr = get_vstream(x, type);
380 | 			vp2 = (vq-vstr)*(vq-vstr);
381 | 			
382 | 			
383 | 			// theta
384 | 			T[0] = x[1]*Q[2] - x[2]*Q[1];
385 | 			T[1] = x[2]*Q[0] - x[0]*Q[2];
386 | 			T[2] = x[0]*Q[1] - x[1]*Q[0];
387 | 			t2 = T[0]*T[0] + T[1]*T[1] + T[2]*T[2];
388 | 			v_dot_T = v[0]*T[0] + v[1]*T[1] + v[2]*T[2];
389 | 			vt2 = v_dot_T * v_dot_T / t2;
390 | 		
391 | 		}
392 | 		
393 | 		// mass
394 | 		m[0][0] = 0.5 * dt * (1 - fR) * (1 - fz);
395 | 		m[1][0] = 0.5 * dt * (fR) * (1 - fz);
396 | 		m[0][1] = 0.5 * dt * (1 - fR) * (fz);
397 | 		m[1][1] = 0.5 * dt * (fR) * (fz);
398 | 
399 | 		
400 | 		irz[0][0] = iz * DG_Nbin + iR;
401 | 		irz[1][0] = iz * DG_Nbin + (iR + 1);
402 | 		irz[0][1] = (iz + 1) * DG_Nbin + iR;
403 | 		irz[1][1] = (iz + 1) * DG_Nbin + (iR + 1);
404 | 		
405 | 		
406 | 		// m
407 | 		mfield[irz[0][0]] += m[0][0];
408 | 		mfield[irz[1][0]] += m[1][0];
409 | 		mfield[irz[0][1]] += m[0][1];
410 | 		mfield[irz[1][1]] += m[1][1];
411 | 		
412 | 		// mvr2
413 | 		egyfield_r[irz[0][0]] += m[0][0] * vr2;
414 | 		egyfield_r[irz[1][0]] += m[1][0] * vr2;
415 | 		egyfield_r[irz[0][1]] += m[0][1] * vr2;
416 | 		egyfield_r[irz[1][1]] += m[1][1] * vr2;
417 | 		
418 | 		// mvt2
419 | 		egyfield_t[irz[0][0]] += m[0][0] * vt2;
420 | 		egyfield_t[irz[1][0]] += m[1][0] * vt2;
421 | 		egyfield_t[irz[0][1]] += m[0][1] * vt2;
422 | 		egyfield_t[irz[1][1]] += m[1][1] * vt2;
423 | 		
424 | 
425 | 		// mvp2
426 | 		egyfield_q[irz[0][0]] += m[0][0] * vq2;
427 | 		egyfield_q[irz[1][0]] += m[1][0] * vq2;
428 | 		egyfield_q[irz[0][1]] += m[0][1] * vq2;
429 | 		egyfield_q[irz[1][1]] += m[1][1] * vq2;
430 | 		
431 | 		
432 | 		// mvp2 (sigmap^2)
433 | 		egyfield_p[irz[0][0]] += m[0][0] * vp2;
434 | 		egyfield_p[irz[1][0]] += m[1][0] * vp2;
435 | 		egyfield_p[irz[0][1]] += m[0][1] * vp2;
436 | 		egyfield_p[irz[1][1]] += m[1][1] * vp2;		
437 | 		
438 | 		
439 | 		
440 | 		tall += dt;
441 | 
442 | 		radsign = v[0] * x[0] + v[1] * x[1] + v[2] * x[2];
443 | 
444 | 		if(radsign > 0 && radsign_previous < 0)
445 | 			norbit++;
446 | 
447 | 		radsign_previous = radsign;
448 | 
449 | 		steps++;
450 | 		if(steps > 100000000) {
451 | 			printf("too many steps...  pos=(%g|%g|%g)  vel=(%g|%g|%g)  dt=%g\n",
452 | 			pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], dt);
453 | 			double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x);
454 | 			printf("steps=%d:  rel error = %g\n", steps, fabs(E1 - E0) / fabs(E0));
455 | 			exit(1);
456 | 		}
457 | 		
458 | 	} while ((timespan == 0 && norbit < Norbits) || (timespan != 0 && flag == 0));
459 | 
460 | 	double E1 = 0.5 * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]) + forcegrid_get_potential(x);
461 | 
462 | 	double rel_egy_error = fabs((E1 - E0) / E0);
463 | 
464 | 	if(rel_egy_error > 0.5) {
465 |       mpi_printf("relative energy error= %g  orbits=%d   steps=%d  pos(=%g|%g|%g) vel=(%g|%g|%g)\n", rel_egy_error, norbit, steps,
466 | 		 pos[0], pos[1], pos[2], vel[0], vel[1], vel[2]);
467 |       /*
468 | 		terminate("error seems large, we better stop:  pos=(%g|%g|%g)  vel=(%g|%g|%g) id=%d  v=%g  vesc=%g",
469 | 		pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], id, 
470 | 		sqrt(vel[0] * vel[0] + vel[1] * vel[1] + vel[2] * vel[2]), 
471 | 		forcegrid_get_escape_speed(pos));
472 |        */
473 | 	}
474 | 
475 | 	double fac = mass / tall;
476 | 
477 | 	for(i = 0; i < DG_Ngrid; i++)
478 | 		mfield[i] *= fac;
479 | 	
480 | 	for(i = 0; i < EG_Ngrid; i++) {
481 | 
482 | 		egyfield_r[i] *= fac;
483 | 		egyfield_t[i] *= fac;
484 | 		egyfield_q[i] *= fac;
485 | 		egyfield_p[i] *= fac;		
486 | 	}
487 | 
488 | 	*orbitstaken = norbit;
489 | 
490 | 	return tall;
491 | }
492 | 
493 | #endif
494 | 


--------------------------------------------------------------------------------
/src/allvars.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*! \file allvars.h
  3 |  *  \brief declares global variables.
  4 |  *
  5 |  *  This file declares all global variables. Further variables should be added here, and declared as
  6 |  *  'extern'. The actual existence of these variables is provided by the file 'allvars.c'. To produce
  7 |  *  'allvars.c' from 'allvars.h', do the following:
  8 |  *
  9 |  *     - Erase all #define statements
 10 |  *     - add #include "allvars.h"
 11 |  *     - delete all keywords 'extern'
 12 |  *     - delete all struct definitions enclosed in {...}, e.g.
 13 |  *        "extern struct global_data_all_processes {....} All;"
 14 |  *        becomes "struct global_data_all_processes All;"
 15 |  */
 16 | 
 17 | #include "allvars.h"
 18 | 
 19 | 
 20 | 
 21 | #ifdef PERIODIC
 22 | MyDouble boxSize, boxHalf;
 23 | 
 24 | #ifdef LONG_X
 25 | MyDouble boxSize_X, boxHalf_X;
 26 | #else
 27 | #endif
 28 | #ifdef LONG_Y
 29 | MyDouble boxSize_Y, boxHalf_Y;
 30 | #else
 31 | #endif
 32 | #ifdef LONG_Z
 33 | MyDouble boxSize_Z, boxHalf_Z;
 34 | #else
 35 | #endif
 36 | #endif
 37 | 
 38 | #ifdef FIX_PATHSCALE_MPI_STATUS_IGNORE_BUG
 39 | MPI_Status mpistat;
 40 | #endif
 41 | 
 42 | /*********************************************************/
 43 | /*  Global variables                                     */
 44 | /*********************************************************/
 45 | 
 46 | 
 47 | int FG_Nbin, FG_Ngrid;
 48 | double FG_Rmin, FG_Fac, FG_Rin;
 49 | 
 50 | double *FG_Pot;
 51 | double *FG_DPotDR;
 52 | double *FG_DPotDz;
 53 | double *FG_Pot_exact;
 54 | double *FG_DPotDR_exact;
 55 | double *FG_DPotDz_exact;
 56 | double *FG_Disp_r[6];
 57 | double *FG_DispZ[6];
 58 | double *FG_DispPhi[6];
 59 | double *FG_Vstream[6];
 60 | double *FG_tilted_vz2[6];
 61 | double *FG_tilted_vR2[6];
 62 | double *FG_tilted_vz2_prime[6];
 63 | double *FG_tilted_vR2_prime[6];
 64 | 
 65 | double *FG_R;
 66 | 
 67 | int EG_MaxLevel, EG_Nstack, EG_Nbin, EG_Ngrid;
 68 | double EG_Fac, EG_Rin, EG_Rmin;
 69 | 
 70 | double *EG_R;
 71 | double *EGs_EgyResponse_r[6];
 72 | double *EGs_EgyResponse_t[6];
 73 | double *EGs_EgyResponse_p[6];
 74 | double *EGs_EgyResponse_q[6];
 75 | double *EGs_EgyTarget_r[6];
 76 | double *EGs_EgyTarget_t[6];
 77 | double *EGs_EgyTarget_p[6];
 78 | double *EGs_EgyTarget_q[6];
 79 | double *EGs_MassTarget[6];
 80 | double *EGs_MassResponse[6];
 81 | 
 82 | 
 83 | double *EG_MassLoc[6];
 84 | double *EG_EgyResponseRLoc[6];
 85 | double *EG_EgyResponseTLoc[6];
 86 | double *EG_EgyResponsePLoc[6];
 87 | double *EG_EgyResponseQLoc[6];
 88 | double *EG_EgyResponseRLoc_delta[6];
 89 | double *EG_EgyResponseTLoc_delta[6];
 90 | double *EG_EgyResponsePLoc_delta[6];
 91 | double *EG_EgyResponseQLoc_delta[6];
 92 | 
 93 | 
 94 | 
 95 | #ifdef VER_1_1
 96 | double *EG_MassLocS[6];
 97 | double *EG_EgyResponseRLocS[6];
 98 | double *EG_EgyResponseRLocS_delta[6];
 99 | double *EGs_EgyResponseRS[6];
100 | 
101 | double *EG_EgyResponseTLocS[6];
102 | double *EG_EgyResponseTLocS_delta[6];
103 | double *EGs_EgyResponseTS[6];
104 | 
105 | double *EG_EgyResponseQLocS[6];
106 | double *EG_EgyResponseQLocS_delta[6];
107 | double *EGs_EgyResponseQS[6];
108 | 
109 | double *EG_EgyResponsePLocS[6];
110 | double *EG_EgyResponsePLocS_delta[6];
111 | double *EGs_EgyResponsePS[6];
112 | 
113 | 
114 | double fac_value_rs[6];
115 | double fac_value_ts[6];
116 | double fac_value_qs[6];
117 | double fac_value_ps[6];
118 | #endif
119 | 
120 | 
121 | 
122 | 
123 | 
124 | int DG_MaxLevel, DG_Nstack, DG_Nbin, DG_Ngrid;
125 | double DG_Rmin, DG_Fac, DG_Rin;
126 | 
127 | double *DG_CellVol;
128 | double *DG_CellSize;
129 | 
130 | double *DGs_LogR;
131 | double *DGs_LogZ;
132 | double *DGs_Distance;
133 | 
134 | double *DGs_MassTarget[6];
135 | double *DGs_MassResponse[6];
136 | double *DG_MassLoc[6];
137 | double *DG_MassLoc_delta[6];
138 | 
139 | double Totorbits[6];
140 | int Tries[6];
141 | int Changes[6];
142 | 
143 | double TotDv2Sum[6];
144 | double Epsilon;
145 | 
146 | double Tintegrate;
147 | double S[6];
148 | double Sdisp_r[6];
149 | double Sdisp_t[6];
150 | double Sdisp_p[6];
151 | double Sdisp_q[6];
152 | double Srelfac[6];
153 | double Srelsfac[6];
154 | 
155 | 
156 | double Srelfac_count[6];
157 | double MType[6];
158 | int NType[6];
159 | double SizeType[6];
160 | int CountLargeChange[6];
161 | int Noptimized;
162 | FILE *FdFit[6];
163 | 
164 | int ThisTask;			/*!< the number of the local processor  */
165 | int NTask;			/*!< number of processors */
166 | int PTask;			/*!< note: NTask = 2^PTask */
167 | 
168 | 
169 | double CPUThisRun;		/*!< Sums CPU time of current process */
170 | 
171 | int NumForceUpdate;		/*!< number of active particles on local processor in current timestep  */
172 | long long GlobNumForceUpdate;
173 | int NumSphUpdate;		/*!< number of active SPH particles on local processor in current timestep  */
174 | 
175 | int MaxTopNodes;		/*!< Maximum number of nodes in the top-level tree used for domain decomposition */
176 | 
177 | int RestartFlag;		/*!< taken from command line used to start code. 0 is normal start-up from
178 | 				   initial conditions, 1 is resuming a run from a set of restart files, while 2
179 | 				   marks a restart from a snapshot file. */
180 | int RestartSnapNum;
181 | 
182 | int Argc;
183 | char **Argv;
184 | 
185 | 
186 | int Nforces;
187 | int Ndensities;
188 | int Nhydroforces;
189 | int *TargetList;
190 | int *Threads_P_CostCount[NUM_THREADS];
191 | int *Threads_TreePoints_CostCount[NUM_THREADS];
192 | int *Threads_Node_CostCount[NUM_THREADS];
193 | 
194 | int maxThreads = NUM_THREADS;
195 | 
196 | #ifdef IMPOSE_PINNING
197 | cpu_set_t cpuset_thread[NUM_THREADS];
198 | #endif
199 | 
200 | 
201 | int *Exportflag, *ThreadsExportflag[NUM_THREADS];	/*!< Buffer used for flagging whether a particle needs to be exported to another process */
202 | int *Exportnodecount;
203 | int *Exportindex;
204 | 
205 | int *Send_offset, *Send_count, *Recv_count, *Recv_offset;
206 | int *Send_offset_nodes, *Send_count_nodes, *Recv_count_nodes, *Recv_offset_nodes;
207 | 
208 | int Mesh_nimport, Mesh_nexport, *Mesh_Send_offset, *Mesh_Send_count, *Mesh_Recv_count, *Mesh_Recv_offset;
209 | 
210 | int TakeLevel;
211 | int SelRnd;
212 | 
213 | FILE *FdMemory;
214 | 
215 | unsigned char *ProcessedFlag;
216 | 
217 | int TimeBinCount[TIMEBINS];
218 | int TimeBinCountSph[TIMEBINS];
219 | int TimeBinCountSphHydro[TIMEBINS];
220 | int TimeBinActive[TIMEBINS];
221 | 
222 | int NActiveHydro;
223 | int NActiveGravity;
224 | int *ActiveGravityParticles;
225 | int *ActiveHydroParticles;
226 | 
227 | long long GlobalNActiveHydro;
228 | long long GlobalNActiveGravity;
229 | 
230 | #ifdef USE_SFR
231 | double TimeBinSfr[TIMEBINS];
232 | #endif
233 | 
234 | 
235 | 
236 | #ifdef SUBFIND
237 | int GrNr;
238 | int NumPartGroup;
239 | #endif
240 | 
241 | int FlagNyt = 0;
242 | char DumpFlag = 1;
243 | 
244 | size_t AllocatedBytes;
245 | size_t HighMarkBytes;
246 | size_t FreeBytes;
247 | 
248 | size_t HighMark_run, HighMark_domain, HighMark_gravtree, HighMark_pmperiodic,
249 |   HighMark_pmnonperiodic, HighMark_sphdensity, HighMark_sphhydro, HighMark_subfind_processing,
250 |   HighMark_subfind_density;
251 | 
252 | 
253 | 
254 | 
255 | double WallclockTime;		/*!< This holds the last wallclock time measurement for timings measurements */
256 | double StartOfRun;		/*!< This stores the time of the start of the run for evaluating the elapsed time */
257 | 
258 | double EgyInjection;
259 | 
260 | 
261 | int NumPart;			/*!< number of particles on the LOCAL processor */
262 | int NumGas;			/*!< number of gas particles on the LOCAL processor  */
263 | 
264 | gsl_rng *random_generator;	/*!< the random number generator used */
265 | 
266 | 
267 | #ifdef USE_SFR
268 | int Stars_converted;		/*!< current number of star particles in gas particle block */
269 | #endif
270 | 
271 | #ifdef TOLERATE_WRITE_ERROR
272 | int WriteErrorFlag;
273 | #endif
274 | 
275 | double TimeOfLastDomainConstruction;	/*!< holds what it says */
276 | 
277 | int *Ngblist;			/*!< Buffer to hold indices of neighbours retrieved by the neighbour search
278 | 				   routines */
279 | 
280 | double DomainCorner[3], DomainCenter[3], DomainLen, DomainFac;
281 | double DomainInverseLen, DomainBigFac;
282 | int *DomainStartList, *DomainEndList;
283 | double *DomainCost, *TaskCost;
284 | int *DomainCount, *TaskCount;
285 | struct no_list_data *ListNoData;
286 | 
287 | int domain_bintolevel[TIMEBINS];
288 | int domain_refbin[TIMEBINS];
289 | int domain_corr_weight[TIMEBINS];
290 | int domain_full_weight[TIMEBINS];
291 | double domain_reffactor[TIMEBINS];
292 | int domain_to_be_balanced[TIMEBINS];
293 | 
294 | int *DomainTask;
295 | int *DomainNewTask;
296 | int *DomainNodeIndex;
297 | 
298 | 
299 | peanokey *Key, *KeySorted;
300 | 
301 | struct topnode_data *TopNodes;
302 | 
303 | int NTopnodes, NTopleaves;
304 | 
305 | 
306 | 
307 | 
308 | 
309 | /* variables for input/output , usually only used on process 0 */
310 | 
311 | 
312 | char ParameterFile[MAXLEN_PATH];	/*!< file name of parameterfile used for starting the simulation */
313 | 
314 | FILE *FdInfo,			/*!< file handle for info.txt log-file. */
315 |  *FdEnergy,			/*!< file handle for energy.txt log-file. */
316 |  *FdTimings,			/*!< file handle for timings.txt log-file. */
317 |  *FdDomain,			/*!< file handle for domain.txt log-file. */
318 |  *FdBalance,			/*!< file handle for balance.txt log-file. */
319 |  *FdMemory, *FdTimebin, *FdCPU;	/*!< file handle for cpu.txt log-file. */
320 | 
321 | #ifdef OUTPUT_CPU_CSV
322 | FILE *FdCPUCSV;
323 | #endif
324 | 
325 | #ifdef USE_SFR
326 | FILE *FdSfr;			/*!< file handle for sfr.txt log-file. */
327 | #endif
328 | 
329 | 
330 | 
331 | 
332 | 
333 | struct pair_data *Pairlist;
334 | 
335 | 
336 | #ifdef FORCETEST
337 | FILE *FdForceTest;		/*!< file handle for forcetest.txt log-file. */
338 | #endif
339 | 
340 | 
341 | #ifdef DARKENERGY
342 | FILE *FdDE;			/*!< file handle for darkenergy.txt log-file. */
343 | #endif
344 | 
345 | int WriteMiscFiles = 1;
346 | 
347 | 
348 | void *CommBuffer;		/*!< points to communication buffer, which is used at a few places */
349 | 
350 | 
351 | /*! This structure contains data which is the SAME for all tasks (mostly code parameters read from the
352 |  * parameter file).  Holding this data in a structure is convenient for writing/reading the restart file, and
353 |  * it allows the introduction of new global variables in a simple way. The only thing to do is to introduce
354 |  * them into this structure.
355 |  */
356 | struct global_data_all_processes All;
357 | 
358 | 
359 | /*! This structure holds all the information that is
360 |  * stored for each particle of the simulation.
361 |  */
362 | struct particle_data *P,	/*!< holds particle data on local processor */
363 |  *DomainPartBuf;		/*!< buffer for particle data used in domain decomposition */
364 | 
365 | struct subfind_data *PS;
366 | 
367 | /* the following struture holds data that is stored for each SPH particle in addition to the collisionless
368 |  * variables.
369 |  */
370 | struct sph_particle_data *SphP,	/*!< holds SPH particle data on local processor */
371 |  *DomainSphBuf;			/*!< buffer for SPH particle data in domain decomposition */
372 | 
373 | 
374 | #ifdef EXACT_GRAVITY_FOR_PARTICLE_TYPE
375 | struct special_particle_data *PartSpecialListGlobal;
376 | #endif
377 | 
378 | 
379 | 
380 | peanokey *DomainKeyBuf;
381 | 
382 | 
383 | /* Various structures for communication during the gravity computation.
384 |  */
385 | 
386 | struct data_index *DataIndexTable;	/*!< the particles to be exported are grouped
387 | 					   by task-number. This table allows the
388 | 					   results to be disentangled again and to be
389 | 					   assigned to the correct particle */
390 | 
391 | struct data_nodelist *DataNodeList;
392 | 
393 | struct gravdata_in *GravDataIn,	/*!< holds particle data to be exported to other processors */
394 |  *GravDataGet;			/*!< holds particle data imported from other processors */
395 | 
396 | 
397 | struct gravdata_out *GravDataResult,	/*!< holds the partial results computed for imported particles. Note: We use GravDataResult = GravDataGet, such that the result replaces the imported data */
398 |  *GravDataOut;			/*!< holds partial results received from other processors. This will overwrite the GravDataIn array */
399 | 
400 | 
401 | int ThreadsNexport[NUM_THREADS], ThreadsNexportNodes[NUM_THREADS];
402 | int *ThreadsNgblist[NUM_THREADS];
403 | 
404 | struct data_partlist *PartList, *ThreadsPartList[NUM_THREADS];
405 | 
406 | struct datanodelist *NodeList, *ThreadsNodeList[NUM_THREADS];
407 | 
408 | int *NodeDataGet, *NodeDataIn;
409 | 
410 | 
411 | struct potdata_out *PotDataResult,	/*!< holds the partial results computed for imported particles. Note: We use GravDataResult = GravDataGet, such that the result replaces the imported data */
412 |  *PotDataOut;			/*!< holds partial results received from other processors. This will overwrite the GravDataIn array */
413 | 
414 | 
415 | 
416 | 
417 | /*! Header for the standard file format.
418 |  */
419 | struct io_header header;	/*!< holds header for snapshot files */
420 | 
421 | #ifdef PARAMS_IN_SNAP
422 | char Parameters[MAX_PARAMETERS][MAXLEN_PARAM_TAG];	/*!< holds the tags of the parameters defined in the parameter file */
423 | char ParameterValues[MAX_PARAMETERS][MAXLEN_PARAM_VALUE];	/*!< holds the values for the parameters defined in the parameter file */
424 | #endif
425 | 
426 | 
427 | /*
428 |  * Variables for Tree
429 |  * ------------------
430 |  */
431 | int Nexport, Nimport;
432 | int NexportNodes, NimportNodes;
433 | int MaxNexport, MaxNexportNodes;
434 | int BufferFullFlag;
435 | int NextParticle;
436 | int NextJ;
437 | 
438 | 
439 | struct permutation_data *permutation;
440 | 
441 | 
442 | 
443 | /** Variables for gravitational tree */
444 | int Tree_MaxPart;
445 | int Tree_NumNodes;
446 | int Tree_MaxNodes;
447 | int Tree_FirstNonTopLevelNode;
448 | int Tree_NumPartImported;
449 | int Tree_NumPartExported;
450 | int Tree_ImportedNodeOffset;
451 | int Tree_NextFreeNode;
452 | MyDouble *Tree_Pos_list;
453 | unsigned long long *Tree_IntPos_list;
454 | int *Tree_Task_list;
455 | int *Tree_ResultIndexList;
456 | 
457 | struct treepoint_data *Tree_Points;
458 | struct resultsactiveimported_data *Tree_ResultsActiveImported;
459 | 
460 | 
461 | 
462 | int *Nextnode;			/*!< gives next node in tree walk  (nodes array) */
463 | int *Father;			/*!< gives parent node in tree (Prenodes array) */
464 | 
465 | struct NODE *Nodes;		/*!< points to the actual memory allocted for the nodes */
466 | 			/*!< this is a pointer used to access the nodes which is shifted such that Nodes[All.MaxPart]
467 | 			   gives the first allocated node */
468 | float *Nodes_GravCost;
469 | 
470 | /** Variables for neighbor tree */
471 | int Ngb_MaxPart;
472 | int Ngb_NumNodes;
473 | int Ngb_MaxNodes;
474 | int Ngb_FirstNonTopLevelNode;
475 | int Ngb_NextFreeNode;
476 | 
477 | int *Ngb_DomainNodeIndex;
478 | int *Ngb_Nextnode;
479 | 
480 | 
481 | /** The ngb-tree data structure
482 |  */
483 | struct NgbNODE *Ngb_Nodes;
484 | struct ExtNgbNODE *ExtNgb_Nodes;
485 | 
486 | 
487 | 
488 | 
489 | #ifdef STATICNFW
490 | double Rs, R200;
491 | double Dc;
492 | double RhoCrit, V200;
493 | double fac;
494 | #endif
495 | 
496 | 
497 | #ifdef NUM_THREADS
498 | int MaxThreads = NUM_THREADS;
499 | #else
500 | int MaxThreads = 1;
501 | #endif
502 | 


--------------------------------------------------------------------------------