├── .gitignore
├── Examples
    ├── AllAbsorb
    │   └── allAbsorb.inp
    ├── AllEscape
    │   └── allEscape.inp
    ├── AllScattering
    │   └── scatteringOnly.inp
    ├── CORAL2_Benchmark
    │   ├── Problem1
    │   │   ├── 00_README.TXT
    │   │   ├── Coral2_P1.inp
    │   │   ├── Coral2_P1_1.inp
    │   │   ├── Coral2_P1_4096.inp
    │   │   ├── P1_04t.sh
    │   │   ├── P1_16t.sh
    │   │   └── P1_64t.sh
    │   └── Problem2
    │   │   ├── 00_README.TXT
    │   │   ├── Coral2_P2.inp
    │   │   ├── Coral2_P2_1.inp
    │   │   ├── Coral2_P2_4096.inp
    │   │   └── P2_64t.sh
    ├── CTS2_Benchmark
    │   ├── 00_README.TXT
    │   ├── CTS2.inp
    │   ├── CTS2_1.inp
    │   ├── CTS2_36.inp
    │   └── CTS2_scaling.sh
    ├── Homogeneous
    │   ├── homogeneousProblem.inp
    │   ├── homogeneousProblem_v3.inp
    │   ├── homogeneousProblem_v3_wq.inp
    │   ├── homogeneousProblem_v4_tm.inp
    │   ├── homogeneousProblem_v4_ts.inp
    │   ├── homogeneousProblem_v5_ts.inp
    │   ├── homogeneousProblem_v7_ts.inp
    │   ├── quicksilver_aprun_trinity_01.sh
    │   ├── quicksilver_aprun_trinity_02.sh
    │   ├── quicksilver_aprun_trinity_04.sh
    │   ├── quicksilver_slurm_rzalast_01.sh
    │   ├── quicksilver_slurm_rzgenie_01.sh
    │   └── run.homogeneousProblem_v4.rzoz7.x
    ├── NoCollisions
    │   └── no.collisions.inp
    ├── NoFission
    │   └── noFission.inp
    └── NonFlatXC
    │   └── NonFlatXC.inp
├── LICENSE.md
├── README.md
└── src
    ├── BulkStorage.hh
    ├── CollisionEvent.cc
    ├── CollisionEvent.hh
    ├── CommObject.hh
    ├── CoralBenchmark.cc
    ├── CoralBenchmark.hh
    ├── CycleTracking.cc
    ├── CycleTracking.hh
    ├── DeclareMacro.hh
    ├── DecompositionObject.cc
    ├── DecompositionObject.hh
    ├── DirectionCosine.cc
    ├── DirectionCosine.hh
    ├── Doxyfile
    ├── EnergySpectrum.cc
    ├── EnergySpectrum.hh
    ├── FacetPair.hh
    ├── GlobalFccGrid.cc
    ├── GlobalFccGrid.hh
    ├── Globals.hh
    ├── GridAssignmentObject.cc
    ├── GridAssignmentObject.hh
    ├── IndexToTuple.hh
    ├── IndexToTuple4.hh
    ├── InputBlock.cc
    ├── InputBlock.hh
    ├── Long64.hh
    ├── MCT.cc
    ├── MCT.hh
    ├── MC_Adjacent_Facet.cc
    ├── MC_Base_Particle.cc
    ├── MC_Base_Particle.hh
    ├── MC_Cell_State.hh
    ├── MC_Distance_To_Facet.hh
    ├── MC_Domain.cc
    ├── MC_Domain.hh
    ├── MC_Facet_Adjacency.hh
    ├── MC_Facet_Crossing_Event.cc
    ├── MC_Facet_Crossing_Event.hh
    ├── MC_Facet_Geometry.hh
    ├── MC_Fast_Timer.cc
    ├── MC_Fast_Timer.hh
    ├── MC_Load_Particle.cc
    ├── MC_Location.cc
    ├── MC_Location.hh
    ├── MC_Nearest_Facet.hh
    ├── MC_Particle.hh
    ├── MC_Particle_Buffer.cc
    ├── MC_Particle_Buffer.hh
    ├── MC_Processor_Info.hh
    ├── MC_RNG_State.cc
    ├── MC_RNG_State.hh
    ├── MC_Segment_Outcome.cc
    ├── MC_Segment_Outcome.hh
    ├── MC_SourceNow.cc
    ├── MC_SourceNow.hh
    ├── MC_Time_Info.hh
    ├── MC_Vector.hh
    ├── MacroscopicCrossSection.cc
    ├── MacroscopicCrossSection.hh
    ├── Makefile
    ├── MaterialDatabase.hh
    ├── MemoryControl.hh
    ├── MeshPartition.cc
    ├── MeshPartition.hh
    ├── MonteCarlo.cc
    ├── MonteCarlo.hh
    ├── MpiCommObject.cc
    ├── MpiCommObject.hh
    ├── NVTX_Range.hh
    ├── NuclearData.cc
    ├── NuclearData.hh
    ├── Parameters.cc
    ├── Parameters.hh
    ├── ParticleVault.cc
    ├── ParticleVault.hh
    ├── ParticleVaultContainer.cc
    ├── ParticleVaultContainer.hh
    ├── PhysicalConstants.cc
    ├── PhysicalConstants.hh
    ├── PopulationControl.cc
    ├── PopulationControl.hh
    ├── QS_Vector.hh
    ├── QS_atomics.hh
    ├── READ.ME.HOW.TO.RUN
    ├── SendQueue.cc
    ├── SendQueue.hh
    ├── SharedMemoryCommObject.cc
    ├── SharedMemoryCommObject.hh
    ├── Tallies.cc
    ├── Tallies.hh
    ├── Tuple.hh
    ├── Tuple4.hh
    ├── Tuple4ToIndex.hh
    ├── TupleToIndex.hh
    ├── cmdLineParser.cc
    ├── cmdLineParser.hh
    ├── cudaFunctions.cc
    ├── cudaFunctions.hh
    ├── cudaUtils.hh
    ├── gpuPortability.hh
    ├── initMC.cc
    ├── initMC.hh
    ├── macros.hh
    ├── main.cc
    ├── mc_omp_critical.hh
    ├── mc_omp_parallel_for_schedule_static.hh
    ├── mc_omp_parallel_for_schedule_static_if.hh
    ├── mc_omp_parallel_for_schedule_static_num_physical_cores.hh
    ├── memUtils.hh
    ├── mpi_stubs.hh
    ├── mpi_stubs_internal.hh
    ├── parseUtils.cc
    ├── parseUtils.hh
    ├── portability.hh
    ├── qs_assert.hh
    ├── utils.cc
    ├── utils.hh
    ├── utilsMpi.cc
    └── utilsMpi.hh


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | # Project-specific
35 | .depend
36 | .depend.bak
37 | git_hash.hh
38 | git_vers.hh
39 | qs
40 | 


--------------------------------------------------------------------------------
/Examples/AllAbsorb/allAbsorb.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e6
 3 |    fMax: 0.1
 4 |    inputFile: 
 5 |    loadBalance: 1
 6 |    lx: 10
 7 |    ly: 10
 8 |    lz: 10
 9 |    nParticles: 9999
10 |    nSteps: 20
11 |    nx: 10
12 |    ny: 10
13 |    nz: 10
14 |    seed: 1029384761
15 |    xDom: 0
16 |    yDom: 0
17 |    zDom: 0
18 |    eMax: 1
19 |    eMin: 0.99999
20 |    nGroups: 1
21 | 
22 | Geometry:
23 |    material: boxMaterial
24 |    shape: brick
25 |    xMax: 10
26 |    xMin: 0
27 |    yMax: 10
28 |    yMin: 0
29 |    zMax: 10
30 |    zMin: 0
31 | 
32 | Geometry:
33 |    material: sourceMaterial
34 |    shape: brick
35 |    xMax: 1
36 |    xMin: 0
37 |    yMax: 1
38 |    yMin: 0
39 |    zMax: 1
40 |    zMin: 0
41 | 
42 | Material:
43 |    name: boxMaterial
44 |    nIsotopes: 10
45 |    nReactions: 9
46 |    sourceRate: 0
47 |    totalCrossSection: 1e10
48 |    absorptionCrossSection: flat
49 |    fissionCrossSection: flat
50 |    scatteringCrossSection: flat
51 |    absorptionCrossSectionRatio: 1
52 |    fissionCrossSectionRatio: 0
53 |    scatteringCrossSectionRatio: 0
54 | 
55 | Material:
56 |    name: sourceMaterial
57 |    nIsotopes: 10
58 |    nReactions: 9
59 |    sourceRate: 1e-2
60 |    totalCrossSection: 1e10
61 |    absorptionCrossSection: flat
62 |    fissionCrossSection: flat
63 |    scatteringCrossSection: flat
64 |    absorptionCrossSectionRatio: 1
65 |    fissionCrossSectionRatio: 0
66 |    scatteringCrossSectionRatio: 0
67 | 
68 | CrossSection:
69 |    name: flat
70 |    A: 0
71 |    B: 0
72 |    C: 0
73 |    D: 0
74 |    E: 1
75 |    nuBar: 2.4
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/Examples/AllEscape/allEscape.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e6
 3 |    boundaryCondition: escape
 4 |    fMax: 0.1
 5 |    inputFile: 
 6 |    loadBalance: 1
 7 |    lx: 10
 8 |    ly: 10
 9 |    lz: 10
10 |    nParticles: 9999
11 |    nSteps: 20
12 |    nx: 10
13 |    ny: 10
14 |    nz: 10
15 |    seed: 1029384761
16 |    xDom: 0
17 |    yDom: 0
18 |    zDom: 0
19 |    eMax: 1
20 |    eMin: 0.99999
21 |    nGroups: 1
22 | 
23 | Geometry:
24 |    material: boxMaterial
25 |    shape: brick
26 |    xMax: 10
27 |    xMin: 0
28 |    yMax: 10
29 |    yMin: 0
30 |    zMax: 10
31 |    zMin: 0
32 | 
33 | Geometry:
34 |    material: sourceMaterial
35 |    shape: brick
36 |    xMax: 1
37 |    xMin: 0
38 |    yMax: 1
39 |    yMin: 0
40 |    zMax: 1
41 |    zMin: 0
42 | 
43 | Material:
44 |    name: boxMaterial
45 |    nIsotopes: 10
46 |    nReactions: 9
47 |    sourceRate: 0
48 |    totalCrossSection: 1e-20
49 |    absorptionCrossSection: flat
50 |    fissionCrossSection: flat
51 |    scatteringCrossSection: flat
52 |    absorptionCrossSectionRatio: 0
53 |    fissionCrossSectionRatio: 0
54 |    scatteringCrossSectionRatio: 1
55 | 
56 | Material:
57 |    name: sourceMaterial
58 |    nIsotopes: 10
59 |    nReactions: 9
60 |    sourceRate: 1e-2
61 |    totalCrossSection: 1e-20
62 |    absorptionCrossSection: flat
63 |    fissionCrossSection: flat
64 |    scatteringCrossSection: flat
65 |    absorptionCrossSectionRatio: 0
66 |    fissionCrossSectionRatio: 0
67 |    scatteringCrossSectionRatio: 1
68 | 
69 | CrossSection:
70 |    name: flat
71 |    A: 0
72 |    B: 0
73 |    C: 0
74 |    D: 0
75 |    E: 1
76 |    nuBar: 2.4
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/Examples/AllScattering/scatteringOnly.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: streamingProblem.inp
 5 |    boundaryCondition: octant
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    lx: 100
10 |    ly: 100
11 |    lz: 100
12 |    nParticles: 10000000
13 |    nSteps: 10
14 |    nx: 10
15 |    ny: 10
16 |    nz: 10
17 |    seed: 1029384756
18 |    xDom: 0
19 |    yDom: 0
20 |    zDom: 0
21 |    eMax: 20
22 |    eMin: 1e-9
23 |    nGroups: 230
24 | 
25 | Geometry:
26 |    material: sourceMaterial
27 |    shape: brick
28 |    xMax: 100
29 |    xMin: 0
30 |    yMax: 100
31 |    yMin: 0
32 |    zMax: 100
33 |    zMin: 0
34 | 
35 | Material:
36 |    name: sourceMaterial
37 |    nIsotopes: 10
38 |    nReactions: 9
39 |    sourceRate: 1e+10
40 |    totalCrossSection: 0.1
41 |    absorptionCrossSection: flat
42 |    fissionCrossSection: flat
43 |    scatteringCrossSection: flat
44 |    absorptionCrossSectionRatio: 0
45 |    fissionCrossSectionRatio: 0
46 |    scatteringCrossSectionRatio: 1
47 | 
48 | CrossSection:
49 |    name: flat
50 |    A: 0
51 |    B: 0
52 |    C: 0
53 |    D: 0
54 |    E: 1
55 |    nuBar: 2.4
56 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/00_README.TXT:
--------------------------------------------------------------------------------
 1 | This is Quicksilver Problem #1 for the CORAL2 Benchmark.
 2 | 
 3 | This problem is required.
 4 | 
 5 | The input files in this directory are configured to support a scaling
 6 | study and collection of the Figure of Merit on Vulcan (BG/Q) at LLNL.
 7 | 
 8 | The essential physics of the problem are defined in the input file
 9 | Coral2_P1.inp.  The parameters in this file should not be changed.
10 | Parameters to set the size of the problem (number of particles, number
11 | of mesh elements, size of domain, and MPI decomposition), can all be
12 | specified on the command line (for example see P1_64t.sh).  Alternately,
13 | you can copy Coral2_P1.inp to a new file and add the necessary
14 | parameters (see Coral2_P1_1.inp).
15 | 
16 | Note that parameters in the input deck overrride corresponding command
17 | line arguments.
18 | 
19 | For the scaling study here, we have chosen 4096 mesh elements per node.
20 | This allows a uniform spatial decomopostion of mesh elements for both 1
21 | rank per node and 64 ranks per node (and any power of two in between).
22 | We also choose 40 particles per mesh element.  This is divisible by 10
23 | (so we get an integer number of particles sourced in) and gives a
24 | reasonable cyle time of 2-4 seconds.
25 | 
26 | 
27 | MANIFEST:
28 | 
29 | 00_README.TXT        This file
30 | Coral2_P1.inp        Input without problem size specification.
31 |                      Useful to build scaling study with commmand line
32 |                      arguments. 
33 | Coral2_P1_1.inp      Input file for a single MPI rank
34 | Coral2_P1_4096.inp   Input file for 4096 MPI ranks
35 | P1_04t.sh            Example scaling study for BG/Q with 4 threads per rank
36 | P1_16t.sh            Example scaling study for BG/Q with 16 threads per rank
37 | P1_64t.sh            Example scaling study for BG/Q with 64 threads per rank
38 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 2e-09
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    mpiThreadMultiple: 0
 9 |    nSteps: 100 
10 |    seed: 1029384756
11 |    eMax: 20
12 |    eMin: 1e-09
13 |    nGroups: 230
14 |    lowWeightCutoff: 0.001
15 |    coralBenchmark: 1
16 | 
17 | Geometry:
18 |    material: sourceMaterial
19 |    shape: brick
20 |    xMax: 10000
21 |    xMin: 0
22 |    yMax: 10000
23 |    yMin: 0
24 |    zMax: 10000
25 |    zMin: 0
26 | 
27 | Material:
28 |    name: sourceMaterial
29 |    mass: 12.011
30 |    nIsotopes: 20
31 |    nReactions: 9
32 |    sourceRate: 1e+10
33 |    totalCrossSection: 1.5
34 |    absorptionCrossSection: flat
35 |    fissionCrossSection: flat
36 |    scatteringCrossSection: flat
37 |    absorptionCrossSectionRatio: 0.04
38 |    fissionCrossSectionRatio: 0.05
39 |    scatteringCrossSectionRatio: 1
40 | 
41 | CrossSection:
42 |    name: flat
43 |    A: 0
44 |    B: 0
45 |    C: 0
46 |    D: 0
47 |    E: 1
48 |    nuBar: 1.6
49 | 
50 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 2e-09
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    mpiThreadMultiple: 0
 9 |    lx: 16
10 |    ly: 16
11 |    lz: 16
12 |    nParticles: 163840
13 |    nSteps: 100 
14 |    nx: 16
15 |    ny: 16
16 |    nz: 16
17 |    xDom: 1
18 |    yDom: 1
19 |    zDom: 1
20 |    seed: 1029384756
21 |    eMax: 20
22 |    eMin: 1e-09
23 |    nGroups: 230
24 |    lowWeightCutoff: 0.001
25 |    coralBenchmark: 1
26 | 
27 | Geometry:
28 |    material: sourceMaterial
29 |    shape: brick
30 |    xMax: 10000
31 |    xMin: 0
32 |    yMax: 10000
33 |    yMin: 0
34 |    zMax: 10000
35 |    zMin: 0
36 | 
37 | Material:
38 |    name: sourceMaterial
39 |    mass: 12.011
40 |    nIsotopes: 20
41 |    nReactions: 9
42 |    sourceRate: 1e+10
43 |    totalCrossSection: 1.5
44 |    absorptionCrossSection: flat
45 |    fissionCrossSection: flat
46 |    scatteringCrossSection: flat
47 |    absorptionCrossSectionRatio: 0.04
48 |    fissionCrossSectionRatio: 0.05
49 |    scatteringCrossSectionRatio: 1
50 | 
51 | CrossSection:
52 |    name: flat
53 |    A: 0
54 |    B: 0
55 |    C: 0
56 |    D: 0
57 |    E: 1
58 |    nuBar: 1.6
59 | 
60 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 2e-09
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    mpiThreadMultiple: 0
 9 |    lx: 256
10 |    ly: 256
11 |    lz: 256
12 |    nParticles: 671088640
13 |    nSteps: 100 
14 |    nx: 256
15 |    ny: 256
16 |    nz: 256
17 |    xDom: 16
18 |    yDom: 16
19 |    zDom: 16
20 |    seed: 1029384756
21 |    eMax: 20
22 |    eMin: 1e-09
23 |    nGroups: 230
24 |    lowWeightCutoff: 0.001
25 |    coralBenchmark: 1
26 | 
27 | Geometry:
28 |    material: sourceMaterial
29 |    shape: brick
30 |    xMax: 10000
31 |    xMin: 0
32 |    yMax: 10000
33 |    yMin: 0
34 |    zMax: 10000
35 |    zMin: 0
36 | 
37 | Material:
38 |    name: sourceMaterial
39 |    mass: 12.011
40 |    nIsotopes: 20
41 |    nReactions: 9
42 |    sourceRate: 1e+10
43 |    totalCrossSection: 1.5
44 |    absorptionCrossSection: flat
45 |    fissionCrossSection: flat
46 |    scatteringCrossSection: flat
47 |    absorptionCrossSectionRatio: 0.04
48 |    fissionCrossSectionRatio: 0.05
49 |    scatteringCrossSectionRatio: 1
50 | 
51 | CrossSection:
52 |    name: flat
53 |    A: 0
54 |    B: 0
55 |    C: 0
56 |    D: 0
57 |    E: 1
58 |    nuBar: 1.6
59 | 
60 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/P1_04t.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Problem 1:
 4 | 
 5 | # 16 ranks per node 
 6 | # 4 threads per rank
 7 | # 4096 mesh elements per node
 8 | # 40 particles per mesh element -> 163840 particles per node
 9 | 
10 | export OMP_NUM_THREADS=4
11 | 
12 | QS=../../../src/qs
13 | 
14 | srun -N24576 -n393216 $QS -i Coral2_P1.inp -X 768 -Y 512 -Z 256 -x 768 -y 512 -z 256 -I 96 -J 64 -K 64 -n 4026531840 > p1n24576t04
15 | srun -N16384 -n262144 $QS -i Coral2_P1.inp -X 512 -Y 512 -Z 256 -x 512 -y 512 -z 256 -I 64 -J 64 -K 64 -n 2684354560 > p1n16384t04
16 | srun -N8192  -n131072 $QS -i Coral2_P1.inp -X 512 -Y 256 -Z 256 -x 512 -y 256 -z 256 -I 64 -J 64 -K 32 -n 1342117280 > p1n08192t04
17 | srun -N4096  -n65536  $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 256 -x 256 -y 256 -z 256 -I 64 -J 32 -K 32 -n 671088640  > p1n04092t04
18 | srun -N2048  -n32768  $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 128 -x 256 -y 256 -z 128 -I 32 -J 32 -K 32 -n 335544320  > p1n02048t04
19 | srun -N1024  -n16384  $QS -i Coral2_P1.inp -X 256 -Y 128 -Z 128 -x 256 -y 128 -z 128 -I 32 -J 32 -K 16 -n 167772160  > p1n01024t04
20 | srun -N512   -n8192   $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 128 -x 128 -y 128 -z 128 -I 32 -J 16 -K 16 -n 83886080   > p1n00512t04
21 | srun -N256   -n4096   $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 64  -x 128 -y 128 -z 64  -I 16 -J 16 -K 16 -n 41943040   > p1n00256t04
22 | srun -N128   -n2048   $QS -i Coral2_P1.inp -X 128 -Y 64  -Z 64  -x 128 -y 64  -z 64  -I 16 -J 16 -K 8  -n 20971520   > p1n00128t04
23 | srun -N64    -n1024   $QS -i Coral2_P1.inp -X 64  -Y 64  -Z 64  -x 64  -y 64  -z 64  -I 16 -J 8  -K 8  -n 10485760   > p1n00064t04
24 | srun -N32    -n512    $QS -i Coral2_P1.inp -X 64  -Y 64  -Z 32  -x 64  -y 64  -z 32  -I 8  -J 8  -K 8  -n 5242880    > p1n00032t04
25 | srun -N16    -n256    $QS -i Coral2_P1.inp -X 64  -Y 32  -Z 32  -x 64  -y 32  -z 32  -I 8  -J 8  -K 4  -n 2621440    > p1n00016t04
26 | srun -N8     -n128    $QS -i Coral2_P1.inp -X 32  -Y 32  -Z 32  -x 32  -y 32  -z 32  -I 8  -J 4  -K 4  -n 1310720    > p1n00008t04
27 | srun -N4     -n64     $QS -i Coral2_P1.inp -X 32  -Y 32  -Z 16  -x 32  -y 32  -z 16  -I 4  -J 4  -K 4  -n 655360     > p1n00004t04
28 | srun -N2     -n32     $QS -i Coral2_P1.inp -X 32  -Y 16  -Z 16  -x 32  -y 16  -z 16  -I 4  -J 4  -K 2  -n 327680     > p1n00002t04
29 | srun -N1     -n16     $QS -i Coral2_P1.inp -X 16  -Y 16  -Z 16  -x 16  -y 16  -z 16  -I 4  -J 2  -K 2  -n 163840     > p1n00001t04
30 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/P1_16t.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Problem 1:
 4 | 
 5 | # 4 ranks per node 
 6 | # 16 threads per rank
 7 | # 4096 mesh elements per node
 8 | # 40 particles per mesh element -> 163840 particles per node
 9 | 
10 | export OMP_NUM_THREADS=16
11 | 
12 | QS=../../../src/qs
13 | 
14 | srun -N24576 -n98304 $QS -i Coral2_P1.inp -X 768 -Y 512 -Z 256 -x 768 -y 512 -z 256 -I 96 -J 32 -K 32 -n 4026531840 > p1n24576t16
15 | srun -N16384 -n65536 $QS -i Coral2_P1.inp -X 512 -Y 512 -Z 256 -x 512 -y 512 -z 256 -I 64 -J 32 -K 32 -n 2684354560 > p1n16384t16
16 | srun -N8192  -n32768 $QS -i Coral2_P1.inp -X 512 -Y 256 -Z 256 -x 512 -y 256 -z 256 -I 32 -J 32 -K 32 -n 1342117280 > p1n08192t16
17 | srun -N4096  -n16384 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 256 -x 256 -y 256 -z 256 -I 32 -J 32 -K 16 -n 671088640  > p1n04092t16
18 | srun -N2048  -n8192  $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 128 -x 256 -y 256 -z 128 -I 32 -J 16 -K 16 -n 335544320  > p1n02048t16
19 | srun -N1024  -n4096  $QS -i Coral2_P1.inp -X 256 -Y 128 -Z 128 -x 256 -y 128 -z 128 -I 16 -J 16 -K 16 -n 167772160  > p1n01024t16
20 | srun -N512   -n2048  $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 128 -x 128 -y 128 -z 128 -I 16 -J 16 -K 8  -n 83886080   > p1n00512t16
21 | srun -N256   -n1024  $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 64  -x 128 -y 128 -z 64  -I 16 -J 8  -K 8  -n 41943040   > p1n00256t16
22 | srun -N128   -n512   $QS -i Coral2_P1.inp -X 128 -Y 64  -Z 64  -x 128 -y 64  -z 64  -I 8  -J 8  -K 8  -n 20971520   > p1n00128t16
23 | srun -N64    -n256   $QS -i Coral2_P1.inp -X 64  -Y 64  -Z 64  -x 64  -y 64  -z 64  -I 8  -J 8  -K 4  -n 10485760   > p1n00064t16
24 | srun -N32    -n128   $QS -i Coral2_P1.inp -X 64  -Y 64  -Z 32  -x 64  -y 64  -z 32  -I 8  -J 4  -K 4  -n 5242880    > p1n00032t16
25 | srun -N16    -n64    $QS -i Coral2_P1.inp -X 64  -Y 32  -Z 32  -x 64  -y 32  -z 32  -I 4  -J 4  -K 4  -n 2621440    > p1n00016t16
26 | srun -N8     -n32    $QS -i Coral2_P1.inp -X 32  -Y 32  -Z 32  -x 32  -y 32  -z 32  -I 4  -J 4  -K 2  -n 1310720    > p1n00008t16
27 | srun -N4     -n16    $QS -i Coral2_P1.inp -X 32  -Y 32  -Z 16  -x 32  -y 32  -z 16  -I 4  -J 2  -K 2  -n 655360     > p1n00004t16
28 | srun -N2     -n8     $QS -i Coral2_P1.inp -X 32  -Y 16  -Z 16  -x 32  -y 16  -z 16  -I 2  -J 2  -K 2  -n 327680     > p1n00002t16
29 | srun -N1     -n4     $QS -i Coral2_P1.inp -X 16  -Y 16  -Z 16  -x 16  -y 16  -z 16  -I 2  -J 2  -K 1  -n 163840     > p1n00001t16
30 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem1/P1_64t.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Problem 1:
 4 | 
 5 | # 1 rank per node 
 6 | # 64 threads per rank
 7 | # 4096 mesh elements per node
 8 | # 40 particles per mesh element -> 163840 particles per node
 9 | 
10 | export OMP_NUM_THREADS=64
11 | 
12 | QS=../../../src/qs
13 | 
14 | srun -N24576 -n24576 $QS -i Coral2_P1.inp -X 768 -Y 512 -Z 256 -x 768 -y 512 -z 256 -I 48 -J 32 -K 16 -n 4026531840 > p1n24576t64
15 | srun -N16384 -n16384 $QS -i Coral2_P1.inp -X 512 -Y 512 -Z 256 -x 512 -y 512 -z 256 -I 32 -J 32 -K 16 -n 2684354560 > p1n16384t64
16 | srun -N8192  -n8192  $QS -i Coral2_P1.inp -X 512 -Y 256 -Z 256 -x 512 -y 256 -z 256 -I 32 -J 16 -K 16 -n 1342117280 > p1n08192t64
17 | srun -N4096  -n4096  $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 256 -x 256 -y 256 -z 256 -I 16 -J 16 -K 16 -n 671088640  > p1n04092t64
18 | srun -N2048  -n2048  $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 128 -x 256 -y 256 -z 128 -I 16 -J 16 -K 8  -n 335544320  > p1n02048t64
19 | srun -N1024  -n1024  $QS -i Coral2_P1.inp -X 256 -Y 128 -Z 128 -x 256 -y 128 -z 128 -I 16 -J 8  -K 8  -n 167772160  > p1n01024t64
20 | srun -N512   -n512   $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 128 -x 128 -y 128 -z 128 -I 8  -J 8  -K 8  -n 83886080   > p1n00512t64
21 | srun -N256   -n256   $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 64  -x 128 -y 128 -z 64  -I 8  -J 8  -K 4  -n 41943040   > p1n00256t64
22 | srun -N128   -n128   $QS -i Coral2_P1.inp -X 128 -Y 64  -Z 64  -x 128 -y 64  -z 64  -I 8  -J 4  -K 4  -n 20971520   > p1n00128t64
23 | srun -N64    -n64    $QS -i Coral2_P1.inp -X 64  -Y 64  -Z 64  -x 64  -y 64  -z 64  -I 4  -J 4  -K 4  -n 10485760   > p1n00064t64
24 | srun -N32    -n32    $QS -i Coral2_P1.inp -X 64  -Y 64  -Z 32  -x 64  -y 64  -z 32  -I 4  -J 4  -K 2  -n 5242880    > p1n00032t64
25 | srun -N16    -n16    $QS -i Coral2_P1.inp -X 64  -Y 32  -Z 32  -x 64  -y 32  -z 32  -I 4  -J 2  -K 2  -n 2621440    > p1n00016t64
26 | srun -N8     -n8     $QS -i Coral2_P1.inp -X 32  -Y 32  -Z 32  -x 32  -y 32  -z 32  -I 2  -J 2  -K 2  -n 1310720    > p1n00008t64
27 | srun -N4     -n4     $QS -i Coral2_P1.inp -X 32  -Y 32  -Z 16  -x 32  -y 32  -z 16  -I 2  -J 2  -K 1  -n 655360     > p1n00004t64
28 | srun -N2     -n2     $QS -i Coral2_P1.inp -X 32  -Y 16  -Z 16  -x 32  -y 16  -z 16  -I 2  -J 1  -K 1  -n 327680     > p1n00002t64
29 | srun -N1     -n1     $QS -i Coral2_P1.inp -X 16  -Y 16  -Z 16  -x 16  -y 16  -z 16  -I 1  -J 1  -K 1  -n 163840     > p1n00001t64
30 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem2/00_README.TXT:
--------------------------------------------------------------------------------
 1 | This is Quicksilver Problem #2 for the CORAL2 Benchmark.
 2 | 
 3 | This problem is *not* required.
 4 | 
 5 | Cross sections in this problem are tailored to give a broader energy
 6 | spectrum for the particles and a different reaction mix compared to
 7 | problem 1.   
 8 | 
 9 | The essential physics of the problem are defined in the input file
10 | Coral2_P2.inp.  The parameters in this file should not be changed.
11 | Parameters to set the size of the problem (number of particles, number
12 | of mesh elements, size of domain, and MPI decomposition), can all be
13 | specified on the command line (for example see P2_64t.sh).  Alternately,
14 | you can copy Coral2_P2.inp to a new file and add the necessary
15 | parameters (see Coral2_P2_1.inp).
16 | 
17 | Note that parameters in the input deck overrride corresponding command
18 | line arguments.
19 | 
20 | For the scaling study here, we have chosen 11^3 = 1331 mesh elements per
21 | node.  This makes it difficult to uniformly decomopostion of mesh
22 | elements anything other than 1 rank per node.  We also choose 40
23 | particles per mesh element.  This is divisible by 10 (so we get an
24 | integer number of particles sourced in) and gives a reasonable cyle time
25 | of 2-4 seconds.
26 | 
27 | MANIFEST:
28 | 
29 | 00_README.TXT        This file
30 | Coral2_P2.inp        Input without problem size specification.
31 |                      Useful to build scaling study with commmand line
32 |                      arguments. 
33 | Coral2_P2_1.inp      Input file for a single MPI rank
34 | Coral2_P2_4096.inp   Input file for 4096 MPI ranks
35 | P1_64t.sh            Example scaling study for BG/Q with 64 threads per rank
36 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem2/Coral2_P2.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    nSteps: 100
 9 |    seed: 1029384756
10 |    eMax: 20
11 |    eMin: 1e-08
12 |    nGroups: 230
13 |    lowWeightCutoff: 0.001
14 |    bTally: 1
15 |    fTally: 1
16 |    cTally: 1
17 |    coralBenchmark: 2
18 | 
19 | Geometry:
20 |    material: sourceMaterial
21 |    shape: brick
22 |    xMax: 10000
23 |    xMin: 0
24 |    yMax: 10000
25 |    yMin: 0
26 |    zMax: 10000
27 |    zMin: 0
28 | 
29 | Material:
30 |    name: sourceMaterial
31 |    mass: 1.5
32 |    nIsotopes: 10
33 |    nReactions: 3
34 |    sourceRate: 1e+10
35 |    totalCrossSection: 16.75
36 |    absorptionCrossSection: absorb
37 |    fissionCrossSection: fission
38 |    scatteringCrossSection: scatter
39 |    absorptionCrossSectionRatio: 10
40 |    fissionCrossSectionRatio: 8
41 |    scatteringCrossSectionRatio: 82
42 |    absorptionCrossSection: absorb
43 |    fissionCrossSection: fission
44 |    scatteringCrossSection: scatter
45 | 
46 | CrossSection:
47 |    name: absorb
48 |    A: 0 
49 |    B: 0 
50 |    C: 0 
51 |    D: -0.2
52 |    E: 2
53 | 
54 | CrossSection:
55 |    name: fission
56 |    A: 0 
57 |    B: 0 
58 |    C: 0
59 |    D: -0.2
60 |    E: 2
61 |    nuBar: 2
62 | 
63 | CrossSection:
64 |    name: scatter
65 |    A: 0
66 |    B: 0
67 |    C: 0
68 |    D: 0
69 |    E: 97
70 | 
71 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    lx: 1
 9 |    ly: 1
10 |    lz: 1
11 |    nParticles: 53240
12 |    nSteps: 100
13 |    nx: 11
14 |    ny: 11
15 |    nz: 11
16 |    seed: 1029384756
17 |    xDom: 1
18 |    yDom: 1
19 |    zDom: 1
20 |    eMax: 20
21 |    eMin: 1e-08
22 |    nGroups: 230
23 |    lowWeightCutoff: 0.001
24 |    bTally: 1
25 |    fTally: 1
26 |    cTally: 1
27 |    coralBenchmark: 2
28 | 
29 | Geometry:
30 |    material: sourceMaterial
31 |    shape: brick
32 |    xMax: 10000
33 |    xMin: 0
34 |    yMax: 10000
35 |    yMin: 0
36 |    zMax: 10000
37 |    zMin: 0
38 | 
39 | Material:
40 |    name: sourceMaterial
41 |    mass: 1.5
42 |    nIsotopes: 10
43 |    nReactions: 3
44 |    sourceRate: 1e+10
45 |    totalCrossSection: 16.75
46 |    absorptionCrossSection: absorb
47 |    fissionCrossSection: fission
48 |    scatteringCrossSection: scatter
49 |    absorptionCrossSectionRatio: 10
50 |    fissionCrossSectionRatio: 8
51 |    scatteringCrossSectionRatio: 82
52 |    absorptionCrossSection: absorb
53 |    fissionCrossSection: fission
54 |    scatteringCrossSection: scatter
55 | 
56 | CrossSection:
57 |    name: absorb
58 |    A: 0 
59 |    B: 0 
60 |    C: 0 
61 |    D: -0.2
62 |    E: 2
63 | 
64 | CrossSection:
65 |    name: fission
66 |    A: 0 
67 |    B: 0 
68 |    C: 0
69 |    D: -0.2
70 |    E: 2
71 |    nuBar: 2
72 | 
73 | CrossSection:
74 |    name: scatter
75 |    A: 0
76 |    B: 0
77 |    C: 0
78 |    D: 0
79 |    E: 97
80 | 
81 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    lx: 16
 9 |    ly: 16
10 |    lz: 16
11 |    nParticles: 436142080
12 |    nSteps: 100
13 |    nx: 176
14 |    ny: 176
15 |    nz: 176
16 |    seed: 1029384756
17 |    xDom: 16
18 |    yDom: 16
19 |    zDom: 16
20 |    eMax: 20
21 |    eMin: 1e-08
22 |    nGroups: 230
23 |    lowWeightCutoff: 0.001
24 |    bTally: 1
25 |    fTally: 1
26 |    cTally: 1
27 |    coralBenchmark: 2
28 | 
29 | Geometry:
30 |    material: sourceMaterial
31 |    shape: brick
32 |    xMax: 10000
33 |    xMin: 0
34 |    yMax: 10000
35 |    yMin: 0
36 |    zMax: 10000
37 |    zMin: 0
38 | 
39 | Material:
40 |    name: sourceMaterial
41 |    mass: 1.5
42 |    nIsotopes: 10
43 |    nReactions: 3
44 |    sourceRate: 1e+10
45 |    totalCrossSection: 16.75
46 |    absorptionCrossSection: absorb
47 |    fissionCrossSection: fission
48 |    scatteringCrossSection: scatter
49 |    absorptionCrossSectionRatio: 10
50 |    fissionCrossSectionRatio: 8
51 |    scatteringCrossSectionRatio: 82
52 |    absorptionCrossSection: absorb
53 |    fissionCrossSection: fission
54 |    scatteringCrossSection: scatter
55 | 
56 | CrossSection:
57 |    name: absorb
58 |    A: 0 
59 |    B: 0 
60 |    C: 0 
61 |    D: -0.2
62 |    E: 2
63 | 
64 | CrossSection:
65 |    name: fission
66 |    A: 0 
67 |    B: 0 
68 |    C: 0
69 |    D: -0.2
70 |    E: 2
71 |    nuBar: 2
72 | 
73 | CrossSection:
74 |    name: scatter
75 |    A: 0
76 |    B: 0
77 |    C: 0
78 |    D: 0
79 |    E: 97
80 | 
81 | 


--------------------------------------------------------------------------------
/Examples/CORAL2_Benchmark/Problem2/P2_64t.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #Problem 2:
 4 | 
 5 | # 1 rank per node 
 6 | # 64 threads per rank
 7 | # 1311 mesh elements per node (11^3)
 8 | # 40 particles per mesh element -> 53240 particles per node
 9 | 
10 | export OMP_NUM_THREADS=64
11 | 
12 | QS=../../../src/qs
13 | 
14 | srun -N1     -n1     $QS -i Coral2_P2.inp -X 1  -Y 1  -Z 1  -x 11  -y 11  -z 11  -I 1  -J 1  -K 1  -n 53240      > p2n00001t64
15 | srun -N2     -n2     $QS -i Coral2_P2.inp -X 2  -Y 1  -Z 1  -x 22  -y 11  -z 11  -I 2  -J 1  -K 1  -n 106480     > p2n00002t64
16 | srun -N4     -n4     $QS -i Coral2_P2.inp -X 2  -Y 2  -Z 1  -x 22  -y 22  -z 11  -I 2  -J 2  -K 1  -n 212960     > p2n00004t64
17 | srun -N8     -n8     $QS -i Coral2_P2.inp -X 2  -Y 2  -Z 2  -x 22  -y 22  -z 22  -I 2  -J 2  -K 2  -n 425920     > p2n00008t64
18 | srun -N16    -n16    $QS -i Coral2_P2.inp -X 4  -Y 2  -Z 2  -x 44  -y 22  -z 22  -I 4  -J 2  -K 2  -n 851840     > p2n00016t64
19 | srun -N32    -n32    $QS -i Coral2_P2.inp -X 4  -Y 4  -Z 2  -x 44  -y 44  -z 22  -I 4  -J 4  -K 2  -n 1703680    > p2n00032t64
20 | srun -N64    -n64    $QS -i Coral2_P2.inp -X 4  -Y 4  -Z 4  -x 44  -y 44  -z 44  -I 4  -J 4  -K 4  -n 3407360    > p2n00064t64
21 | srun -N128   -n128   $QS -i Coral2_P2.inp -X 8  -Y 4  -Z 4  -x 88  -y 44  -z 44  -I 8  -J 4  -K 4  -n 6814720    > p2n00128t64
22 | srun -N256   -n256   $QS -i Coral2_P2.inp -X 8  -Y 8  -Z 4  -x 88  -y 88  -z 44  -I 8  -J 8  -K 4  -n 13629440   > p2n00256t64
23 | srun -N512   -n512   $QS -i Coral2_P2.inp -X 8  -Y 8  -Z 8  -x 88  -y 88  -z 88  -I 8  -J 8  -K 8  -n 27258880   > p2n00512t64
24 | srun -N1024  -n1024  $QS -i Coral2_P2.inp -X 16 -Y 8  -Z 8  -x 176 -y 88  -z 88  -I 16 -J 8  -K 8  -n 54517760   > p2n01024t64
25 | srun -N2048  -n2048  $QS -i Coral2_P2.inp -X 16 -Y 16 -Z 8  -x 176 -y 176 -z 88  -I 16 -J 16 -K 8  -n 109035520  > p2n02048t64
26 | srun -N4096  -n4096  $QS -i Coral2_P2.inp -X 16 -Y 16 -Z 16 -x 176 -y 176 -z 176 -I 16 -J 16 -K 16 -n 218071040  > p2n04096t64
27 | srun -N8192  -n8192  $QS -i Coral2_P2.inp -X 32 -Y 16 -Z 16 -x 352 -y 176 -z 176 -I 32 -J 16 -K 16 -n 436142080  > p2n08192t64
28 | srun -N16384 -n16384 $QS -i Coral2_P2.inp -X 32 -Y 32 -Z 16 -x 352 -y 352 -z 176 -I 32 -J 32 -K 16 -n 872284160  > p2n16384t64
29 | srun -N24576 -n24576 $QS -i Coral2_P2.inp -X 48 -Y 32 -Z 16 -x 528 -y 532 -z 176 -I 48 -J 32 -K 16 -n 1308426240 > p2n24768t64
30 | 


--------------------------------------------------------------------------------
/Examples/CTS2_Benchmark/00_README.TXT:
--------------------------------------------------------------------------------
 1 | This the Quicksilver CTS2 Benchmark Problem.
 2 | 
 3 | This problem is based on the optional Problem #2 from the CORAL2
 4 | Benchmark, but it has been changed slightly.
 5 | 
 6 | The essential physics of the problem are defined in the input file
 7 | CTS2.inp.  The parameters in this file should not be changed.
 8 | Parameters to set the size of the problem (number of particles, number
 9 | of mesh elements, size of domain, and MPI decomposition), can all be
10 | specified on the command line (for example see CTS2_scaling.sh).
11 | Alternately, you can copy CTS2.inp to a new file and add the
12 | necessary parameters (see CTS2_1.inp).
13 | 
14 | Note that parameters in the input deck overrride corresponding command
15 | line arguments.
16 | 
17 | This problem should be run on a single node with weak scaling
18 | for the number of cores on a node.  You are required to run this
19 | problem with
20 |  * 1 rank per core
21 |  * 16^3 = 4096 mesh elements per rank
22 |  * 40960 partilces per rank
23 | 
24 | 
25 | MANIFEST:
26 | 
27 | 00_README.TXT     This file
28 | CTS2.inp          Input without problem size specification.
29 |                   Useful to build scaling study with commmand line
30 |                   arguments. 
31 | CTS2_1.inp        Input file for a single MPI rank
32 | CTS2_36.inp       Input file for 36 MPI ranks
33 | CTS2_scaling.sh   Example scaling study for 36 cores per node
34 | 


--------------------------------------------------------------------------------
/Examples/CTS2_Benchmark/CTS2.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1.1e-07
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    nSteps: 100
 9 |    seed: 1029384756
10 |    eMax: 20
11 |    eMin: 1e-08
12 |    nGroups: 230
13 |    lowWeightCutoff: 0.001
14 |    bTally: 1
15 |    fTally: 1
16 |    cTally: 1
17 |    coralBenchmark: 2
18 | 
19 | Geometry:
20 |    material: sourceMaterial
21 |    shape: brick
22 |    xMax: 10000
23 |    xMin: 0
24 |    yMax: 10000
25 |    yMin: 0
26 |    zMax: 10000
27 |    zMin: 0
28 | 
29 | Material:
30 |    name: sourceMaterial
31 |    mass: 1.5
32 |    nIsotopes: 20
33 |    nReactions: 9
34 |    sourceRate: 1e+10
35 |    totalCrossSection: 1.5227
36 |    absorptionCrossSection: absorb
37 |    fissionCrossSection: fission
38 |    scatteringCrossSection: scatter
39 |    absorptionCrossSectionRatio: 10
40 |    fissionCrossSectionRatio: 8
41 |    scatteringCrossSectionRatio: 82
42 |    absorptionCrossSection: absorb
43 |    fissionCrossSection: fission
44 |    scatteringCrossSection: scatter
45 | 
46 | CrossSection:
47 |    name: absorb
48 |    A: 0 
49 |    B: 0 
50 |    C: 0 
51 |    D: -0.2
52 |    E: 2
53 | 
54 | CrossSection:
55 |    name: fission
56 |    A: 0 
57 |    B: 0 
58 |    C: 0
59 |    D: -0.2
60 |    E: 2
61 |    nuBar: 2
62 | 
63 | CrossSection:
64 |    name: scatter
65 |    A: 0
66 |    B: 0
67 |    C: 0
68 |    D: 0
69 |    E: 97
70 | 
71 | 


--------------------------------------------------------------------------------
/Examples/CTS2_Benchmark/CTS2_1.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1.1e-07
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    lx: 16
 9 |    ly: 16
10 |    lz: 16
11 |    nParticles: 40960
12 |    nSteps: 100
13 |    nx: 16
14 |    ny: 16
15 |    nz: 16
16 |    seed: 1029384756
17 |    xDom: 1
18 |    yDom: 1
19 |    zDom: 1
20 |    eMax: 20
21 |    eMin: 1e-08
22 |    nGroups: 230
23 |    lowWeightCutoff: 0.001
24 |    bTally: 1
25 |    fTally: 1
26 |    cTally: 1
27 |    coralBenchmark: 2
28 | 
29 | Geometry:
30 |    material: sourceMaterial
31 |    shape: brick
32 |    xMax: 10000
33 |    xMin: 0
34 |    yMax: 10000
35 |    yMin: 0
36 |    zMax: 10000
37 |    zMin: 0
38 | 
39 | Material:
40 |    name: sourceMaterial
41 |    mass: 1.5
42 |    nIsotopes: 20
43 |    nReactions: 9
44 |    sourceRate: 1e+10
45 |    totalCrossSection: 1.5227
46 |    absorptionCrossSection: absorb
47 |    fissionCrossSection: fission
48 |    scatteringCrossSection: scatter
49 |    absorptionCrossSectionRatio: 10
50 |    fissionCrossSectionRatio: 8
51 |    scatteringCrossSectionRatio: 82
52 |    absorptionCrossSection: absorb
53 |    fissionCrossSection: fission
54 |    scatteringCrossSection: scatter
55 | 
56 | CrossSection:
57 |    name: absorb
58 |    A: 0 
59 |    B: 0 
60 |    C: 0 
61 |    D: -0.2
62 |    E: 2
63 | 
64 | CrossSection:
65 |    name: fission
66 |    A: 0 
67 |    B: 0 
68 |    C: 0
69 |    D: -0.2
70 |    E: 2
71 |    nuBar: 2
72 | 
73 | CrossSection:
74 |    name: scatter
75 |    A: 0
76 |    B: 0
77 |    C: 0
78 |    D: 0
79 |    E: 97
80 | 
81 | 


--------------------------------------------------------------------------------
/Examples/CTS2_Benchmark/CTS2_36.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1.1e-07
 3 |    fMax: 0.1
 4 |    boundaryCondition: reflect
 5 |    loadBalance: 0
 6 |    cycleTimers: 0
 7 |    debugThreads: 0
 8 |    lx: 48
 9 |    ly: 48
10 |    lz: 64
11 |    nParticles: 1474560
12 |    nSteps: 100
13 |    nx: 48
14 |    ny: 48
15 |    nz: 64
16 |    seed: 1029384756
17 |    xDom: 3
18 |    yDom: 3
19 |    zDom: 4
20 |    eMax: 20
21 |    eMin: 1e-08
22 |    nGroups: 230
23 |    lowWeightCutoff: 0.001
24 |    bTally: 1
25 |    fTally: 1
26 |    cTally: 1
27 |    coralBenchmark: 2
28 | 
29 | Geometry:
30 |    material: sourceMaterial
31 |    shape: brick
32 |    xMax: 10000
33 |    xMin: 0
34 |    yMax: 10000
35 |    yMin: 0
36 |    zMax: 10000
37 |    zMin: 0
38 | 
39 | Material:
40 |    name: sourceMaterial
41 |    mass: 1.5
42 |    nIsotopes: 20
43 |    nReactions: 9
44 |    sourceRate: 1e+10
45 |    totalCrossSection: 1.5227
46 |    absorptionCrossSection: absorb
47 |    fissionCrossSection: fission
48 |    scatteringCrossSection: scatter
49 |    absorptionCrossSectionRatio: 10
50 |    fissionCrossSectionRatio: 8
51 |    scatteringCrossSectionRatio: 82
52 |    absorptionCrossSection: absorb
53 |    fissionCrossSection: fission
54 |    scatteringCrossSection: scatter
55 | 
56 | CrossSection:
57 |    name: absorb
58 |    A: 0 
59 |    B: 0 
60 |    C: 0 
61 |    D: -0.2
62 |    E: 2
63 | 
64 | CrossSection:
65 |    name: fission
66 |    A: 0 
67 |    B: 0 
68 |    C: 0
69 |    D: -0.2
70 |    E: 2
71 |    nuBar: 2
72 | 
73 | CrossSection:
74 |    name: scatter
75 |    A: 0
76 |    B: 0
77 |    C: 0
78 |    D: 0
79 |    E: 97
80 | 
81 | 


--------------------------------------------------------------------------------
/Examples/CTS2_Benchmark/CTS2_scaling.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Quicksilver CTS Benchmark
 4 | # weak scaling on a single node:
 5 | 
 6 | # 1 rank per core 
 7 | # 4096 mesh elements per rank (16^3)
 8 | # 10 particles per mesh element -> 40960 particles per rank
 9 | 
10 | export OMP_NUM_THREADS=1
11 | 
12 | QS=../../src/qs
13 | 
14 | srun -N1 -n1  $QS -i CTS2.inp -X 16  -Y 16  -Z 16  -x 16  -y 16  -z 16  -I 1  -J 1  -K 1  -n 40960      > CTS2_01.out
15 | srun -N1 -n2  $QS -i CTS2.inp -X 32  -Y 16  -Z 16  -x 32  -y 16  -z 16  -I 2  -J 1  -K 1  -n 81920      > CTS2_02.out
16 | srun -N1 -n4  $QS -i CTS2.inp -X 32  -Y 32  -Z 16  -x 32  -y 32  -z 16  -I 2  -J 2  -K 1  -n 163840     > CTS2_04.out
17 | srun -N1 -n8  $QS -i CTS2.inp -X 32  -Y 32  -Z 32  -x 32  -y 32  -z 32  -I 2  -J 2  -K 2  -n 327680     > CTS2_08.out
18 | srun -N1 -n16 $QS -i CTS2.inp -X 64  -Y 32  -Z 32  -x 64  -y 32  -z 32  -I 4  -J 2  -K 2  -n 655360     > CTS2_16.out
19 | srun -N1 -n32 $QS -i CTS2.inp -X 64  -Y 64  -Z 32  -x 64  -y 64  -z 32  -I 4  -J 4  -K 2  -n 1310720    > CTS2_32.out
20 | srun -N1 -n36 $QS -i CTS2.inp -X 48  -Y 48  -Z 64  -x 48  -y 48  -z 64  -I 3  -J 3  -K 4  -n 1474560    > CTS2_36.out
21 | 
22 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: 
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    lx: 100
10 |    ly: 100
11 |    lz: 100
12 |    nParticles: 100000000
13 |    nSteps: 10
14 |    nx: 10
15 |    ny: 10
16 |    nz: 10
17 |    seed: 1029384756
18 |    xDom: 0
19 |    yDom: 0
20 |    zDom: 0
21 |    eMax: 20
22 |    eMin: 1e-09
23 |    nGroups: 230
24 | 
25 | Geometry:
26 |    material: sourceMaterial
27 |    shape: brick
28 |    xMax: 100
29 |    xMin: 0
30 |    yMax: 100
31 |    yMin: 0
32 |    zMax: 100
33 |    zMin: 0
34 | 
35 | Material:
36 |    name: sourceMaterial
37 |    nIsotopes: 10
38 |    nReactions: 9
39 |    sourceRate: 1e+10
40 |    totalCrossSection: 1
41 |    absorptionCrossSection: flat
42 |    fissionCrossSection: flat
43 |    scatteringCrossSection: flat
44 |    absorptionCrossSectionRatio: 1
45 |    fissionCrossSectionRatio: 0.1
46 |    scatteringCrossSectionRatio: 1
47 | 
48 | CrossSection:
49 |    name: flat
50 |    A: 0
51 |    B: 0
52 |    C: 0
53 |    D: 0
54 |    E: 1
55 |    nuBar: 2.4
56 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem_v3.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: homogeneousProblem_v3.inp
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    mpiThreadMultiple: 1
10 |    lx: 100
11 |    ly: 100
12 |    lz: 100
13 |    nParticles: 20000000
14 |    nSteps: 10
15 |    nx: 12
16 |    ny: 12
17 |    nz: 12
18 |    seed: 1029384756
19 |    eMax: 20
20 |    eMin: 1e-09
21 |    nGroups: 230
22 | 
23 | Geometry:
24 |    material: sourceMaterial
25 |    shape: brick
26 |    xMax: 100
27 |    xMin: 0
28 |    yMax: 100
29 |    yMin: 0
30 |    zMax: 100
31 |    zMin: 0
32 | 
33 | Material:
34 |    name: sourceMaterial
35 |    nIsotopes: 10
36 |    nReactions: 9
37 |    sourceRate: 1e+10
38 |    totalCrossSection: 1
39 |    absorptionCrossSection: flat
40 |    fissionCrossSection: flat
41 |    scatteringCrossSection: flat
42 |    absorptionCrossSectionRatio: 1
43 |    fissionCrossSectionRatio: 0.1
44 |    scatteringCrossSectionRatio: 1
45 | 
46 | CrossSection:
47 |    name: flat
48 |    A: 0
49 |    B: 0
50 |    C: 0
51 |    D: 0
52 |    E: 1
53 |    nuBar: 2.4
54 | 
55 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem_v3_wq.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: homogeneousProblem_v3.inp
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    mpiThreadMultiple: 0
10 |    lx: 100
11 |    ly: 100
12 |    lz: 100
13 |    nParticles: 20000000
14 |    nSteps: 10
15 |    nx: 12
16 |    ny: 12
17 |    nz: 12
18 |    seed: 1029384756
19 |    eMax: 20
20 |    eMin: 1e-09
21 |    nGroups: 230
22 | 
23 | Geometry:
24 |    material: sourceMaterial
25 |    shape: brick
26 |    xMax: 100
27 |    xMin: 0
28 |    yMax: 100
29 |    yMin: 0
30 |    zMax: 100
31 |    zMin: 0
32 | 
33 | Material:
34 |    name: sourceMaterial
35 |    nIsotopes: 10
36 |    nReactions: 9
37 |    sourceRate: 1e+10
38 |    totalCrossSection: 1
39 |    absorptionCrossSection: flat
40 |    fissionCrossSection: flat
41 |    scatteringCrossSection: flat
42 |    absorptionCrossSectionRatio: 1
43 |    fissionCrossSectionRatio: 0.1
44 |    scatteringCrossSectionRatio: 1
45 | 
46 | CrossSection:
47 |    name: flat
48 |    A: 0
49 |    B: 0
50 |    C: 0
51 |    D: 0
52 |    E: 1
53 |    nuBar: 2.4
54 | 
55 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem_v4_tm.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: homogeneousProblem_v3.inp
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    mpiThreadMultiple: 1
10 |    nSteps: 10
11 |    seed: 1029384756
12 |    eMax: 20
13 |    eMin: 1e-09
14 |    nGroups: 230
15 |    mpiThreadMultiple: 1
16 | 
17 | Geometry:
18 |    material: sourceMaterial
19 |    shape: brick
20 |    xMax: 1000
21 |    xMin: 0
22 |    yMax: 1000
23 |    yMin: 0
24 |    zMax: 1000
25 |    zMin: 0
26 | 
27 | Material:
28 |    name: sourceMaterial
29 |    nIsotopes: 10
30 |    nReactions: 9
31 |    sourceRate: 1e+10
32 |    totalCrossSection: 1
33 |    absorptionCrossSection: flat
34 |    fissionCrossSection: flat
35 |    scatteringCrossSection: flat
36 |    absorptionCrossSectionRatio: 1
37 |    fissionCrossSectionRatio: 0.1
38 |    scatteringCrossSectionRatio: 1
39 | 
40 | CrossSection:
41 |    name: flat
42 |    A: 0
43 |    B: 0
44 |    C: 0
45 |    D: 0
46 |    E: 1
47 |    nuBar: 2.4
48 | 
49 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem_v4_ts.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: homogeneousProblem_v3.inp
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    mpiThreadMultiple: 1
10 |    nSteps: 10
11 |    seed: 1029384756
12 |    eMax: 20
13 |    eMin: 1e-09
14 |    nGroups: 230
15 |    mpiThreadMultiple: 0
16 | 
17 | 
18 | Geometry:
19 |    material: sourceMaterial
20 |    shape: brick
21 |    xMax: 1000
22 |    xMin: 0
23 |    yMax: 1000
24 |    yMin: 0
25 |    zMax: 1000
26 |    zMin: 0
27 | 
28 | Material:
29 |    name: sourceMaterial
30 |    nIsotopes: 10
31 |    nReactions: 9
32 |    sourceRate: 1e+10
33 |    totalCrossSection: 1
34 |    absorptionCrossSection: flat
35 |    fissionCrossSection: flat
36 |    scatteringCrossSection: flat
37 |    absorptionCrossSectionRatio: 1
38 |    fissionCrossSectionRatio: 0.1
39 |    scatteringCrossSectionRatio: 1
40 | 
41 | CrossSection:
42 |    name: flat
43 |    A: 0
44 |    B: 0
45 |    C: 0
46 |    D: 0
47 |    E: 1
48 |    nuBar: 2.4
49 | 
50 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem_v5_ts.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: homogeneousProblem_v5_ts.inp
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 0
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    mpiThreadMultiple: 0
10 |    nSteps: 10
11 |    seed: 1029384756
12 |    eMax: 20
13 |    eMin: 1e-09
14 |    nGroups: 230
15 |    lowWeightCutoff: 0.001
16 | 
17 | Geometry:
18 |    material: sourceMaterial
19 |    shape: brick
20 |    xMax: 1000
21 |    xMin: 0
22 |    yMax: 1000
23 |    yMin: 0
24 |    zMax: 1000
25 |    zMin: 0
26 | 
27 | Material:
28 |    name: sourceMaterial
29 |    nIsotopes: 10
30 |    nReactions: 9
31 |    sourceRate: 1e+10
32 |    totalCrossSection: 10
33 |    absorptionCrossSection: flat
34 |    fissionCrossSection: flat
35 |    scatteringCrossSection: flat
36 |    absorptionCrossSectionRatio: 0.04
37 |    fissionCrossSectionRatio: 0.05
38 |    scatteringCrossSectionRatio: 1
39 | 
40 | CrossSection:
41 |    name: flat
42 |    A: 0
43 |    B: 0
44 |    C: 0
45 |    D: 0
46 |    E: 1
47 |    nuBar: 1.0
48 | 
49 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/homogeneousProblem_v7_ts.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-06
 3 |    fMax: 0.1
 4 |    inputFile: homogeneousProblem_v7_ts.inp
 5 |    boundaryCondition: reflect
 6 |    loadBalance: 0
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    mpiThreadMultiple: 0
10 |    nSteps: 10
11 |    seed: 1029384756
12 |    eMax: 20
13 |    eMin: 1e-09
14 |    nGroups: 230
15 |    lowWeightCutoff: 0.001
16 | 
17 | 
18 | Geometry:
19 |    material: sourceMaterial
20 |    shape: brick
21 |    xMax: 1000
22 |    xMin: 0
23 |    yMax: 1000
24 |    yMin: 0
25 |    zMax: 1000
26 |    zMin: 0
27 | 
28 | Material:
29 |    name: sourceMaterial
30 |    mass: 12.011
31 |    nIsotopes: 10
32 |    nReactions: 9
33 |    sourceRate: 1e+10
34 |    totalCrossSection: 0.1
35 |    absorptionCrossSection: flat
36 |    fissionCrossSection: flat
37 |    scatteringCrossSection: flat
38 |    absorptionCrossSectionRatio: 0.1086
39 |    fissionCrossSectionRatio: 0.0969
40 |    scatteringCrossSectionRatio: 0.7946
41 | 
42 | CrossSection:
43 |    name: flat
44 |    A: 0
45 |    B: 0
46 |    C: 0
47 |    D: 0
48 |    E: 1
49 |    nuBar: 1.0
50 | 
51 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/quicksilver_aprun_trinity_01.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #MSUB -lnodes=1:knl,os=CLE_quad_cache
  3 | #MSUB -l walltime=2:00:00
  4 | #MSUB -A tos2-8
  5 | 
  6 | #
  7 | #
  8 | # To run interactively, grab a node like so:
  9 | #
 10 | # msub -I -lnodes=1:knl,os=CLE_quad_cache
 11 | #
 12 | # This relies on the bash shell for the 2>&2 | tee to work.
 13 | #
 14 | # To get average and max cycleTracking times:
 15 | # grep "cycleTracking                       10" *out | awk -F " " '{print $1 " " $4 " " $5}'
 16 | #
 17 | 
 18 | # ####################
 19 | # Thread Funneled Runs - No Hyper Threads
 20 | # ####################
 21 | 
 22 | # Set this to where you have the code built on lustre
 23 | cd /users/sdawson/Quicksilver-2017-Apr-19-12-45-27
 24 | 
 25 | export MPICH_MAX_THREAD_SAFETY=funneled
 26 | export OMP_PLACES=cores
 27 | 
 28 | # (Per Node) 64 MPI x  1 Threads - Thread Funneled
 29 | #export OMP_NUM_THREADS=1
 30 | #time aprun -r 4 -n 64 -d 1 -j 1 -cc depth ./qs \
 31 | #    --lx=400 --ly=400 --lz=400 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=2000000 \
 32 | #    -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0064.d001.j01-ts.out
 33 | 
 34 | # (Per Node) 32 MPI x  2 Threads - Thread Funneled
 35 | #export OMP_NUM_THREADS=2
 36 | #time aprun -r 4 -n 32 -d 2 -j 1 -cc depth ./qs \
 37 | #    --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=2000000 \
 38 | #    -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0032.d002.j01-ts.out
 39 | 
 40 | # (Per Node) 16 MPI x  4 Threads - Thread Funneled
 41 | #export OMP_NUM_THREADS=4
 42 | #time aprun -r 4 -n 16 -d 4 -j 1 -cc depth ./qs \
 43 | #    --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \
 44 | #    -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0016.d004.j01-ts.out
 45 | 
 46 | # (Per Node)  8 MPI x  8 Threads - Thread Funneled
 47 | #export OMP_NUM_THREADS=8
 48 | #time aprun -r 4 -n  8 -d 8 -j 1 -cc depth ./qs \
 49 | #    --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \
 50 | #    -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0008.d008.j01-ts.out
 51 | 
 52 | # (Per Node)  4 MPI x 16 Threads - Thread Funneled
 53 | #export OMP_NUM_THREADS=16
 54 | #time aprun -r 4 -n 4 -d 16 -j 1 -cc depth ./qs \
 55 | #    --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \
 56 | #    -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0004.d016.j01-ts.out
 57 | 
 58 | # (Per Node)  2 MPI x 32 Threads - Thread Funneled
 59 | #export OMP_NUM_THREADS=32
 60 | #time aprun -r 4 -n 2 -d 32 -j 1 -cc depth ./qs \
 61 | #    --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \
 62 | #    -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0002.d032.j01-ts.out
 63 | 
 64 | # ####################
 65 | # Thread Funneled Runs - 2 Hyper Threads
 66 | #
 67 | # As we add hyper threads, we do not change the problem size, ideally this will decrease time
 68 | # spent in the threaded tracking though.
 69 | #
 70 | # Prior experience shows that while 4 hyper threads pays off on small node count, it is a wash
 71 | # at higher node count, so let's stop at 2 hyper threads.
 72 | # ####################
 73 | 
 74 | export MPICH_MAX_THREAD_SAFETY=funneled
 75 | export OMP_PLACES=threads
 76 | 
 77 | export OMP_NUM_THREADS=2
 78 | time aprun -r 4 -n 64 -d 2 -j 2 -cc depth ./qs \
 79 |     --lx=400 --ly=400 --lz=400 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=2000000 \
 80 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0064.d002.j02-ts.out
 81 | 
 82 | export OMP_NUM_THREADS=4
 83 | time aprun -r 4 -n 32 -d 4 -j 2 -cc depth ./qs \
 84 |     --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=2000000 \
 85 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0032.d004.j02-ts.out
 86 | 
 87 | export OMP_NUM_THREADS=8
 88 | time aprun -r 4 -n 16 -d 8 -j 2 -cc depth ./qs \
 89 |     --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \
 90 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0016.d008.j02-ts.out
 91 | 
 92 | export OMP_NUM_THREADS=16
 93 | time aprun -r 4 -n  8 -d 16 -j 2 -cc depth ./qs \
 94 |     --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \
 95 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0008.d016.j02-ts.out
 96 | 
 97 | export OMP_NUM_THREADS=32
 98 | time aprun -r 4 -n 4 -d 32 -j 2 -cc depth ./qs \
 99 |     --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \
100 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0004.d032.j02-ts.out
101 | 
102 | export OMP_NUM_THREADS=64
103 | time aprun -r 4 -n 2 -d 64 -j 2 -cc depth ./qs \
104 |     --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \
105 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0002.d064.j02-ts.out
106 | 
107 | #
108 | # end of file
109 | #
110 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/quicksilver_aprun_trinity_02.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #MSUB -lnodes=2:knl,os=CLE_quad_cache
  3 | #MSUB -l walltime=2:00:00
  4 | #MSUB -A tos2-8
  5 | 
  6 | #
  7 | #
  8 | # To run interactively, grab a node like so:
  9 | #
 10 | # msub -I -lnodes=1:knl,os=CLE_quad_cache
 11 | #
 12 | # This relies on the bash shell for the 2>&2 | tee to work.
 13 | #
 14 | # To get average and max cycleTracking times:
 15 | # grep "cycleTracking                       10" *out | awk -F " " '{print $1 " " $4 " " $5}'
 16 | #
 17 | 
 18 | # ####################
 19 | # Thread Funneled Runs - No Hyper Threads
 20 | # ####################
 21 | 
 22 | # Set this to where you have the code built on lustre
 23 | cd /users/sdawson/Quicksilver-2017-Apr-19-12-45-27
 24 | 
 25 | export MPICH_MAX_THREAD_SAFETY=funneled
 26 | export OMP_PLACES=cores
 27 | 
 28 | # (Per Node) 64 MPI x  1 Threads - Thread Funneled
 29 | export OMP_NUM_THREADS=1
 30 | time aprun -r 4 -n 128 -d 1 -j 1 -cc depth ./qs \
 31 |     --lx=800 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=4000000 \
 32 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0128.d001.j01-ts.out
 33 | 
 34 | # (Per Node) 32 MPI x  2 Threads - Thread Funneled
 35 | export OMP_NUM_THREADS=2
 36 | time aprun -r 4 -n 64 -d 2 -j 1 -cc depth ./qs \
 37 |     --lx=400 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=4000000 \
 38 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0064.d002.j01-ts.out
 39 | 
 40 | # (Per Node) 16 MPI x  4 Threads - Thread Funneled
 41 | export OMP_NUM_THREADS=4
 42 | time aprun -r 4 -n 32 -d 4 -j 1 -cc depth ./qs \
 43 |     --lx=400 --ly=400 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \
 44 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0032.d004.j01-ts.out
 45 | 
 46 | # (Per Node)  8 MPI x  8 Threads - Thread Funneled
 47 | export OMP_NUM_THREADS=8
 48 | time aprun -r 4 -n 16 -d 8 -j 1 -cc depth ./qs \
 49 |     --lx=400 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \
 50 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0016.d008.j01-ts.out
 51 | 
 52 | # (Per Node)  4 MPI x 16 Threads - Thread Funneled
 53 | export OMP_NUM_THREADS=16
 54 | time aprun -r 4 -n 8 -d 16 -j 1 -cc depth ./qs \
 55 |     --lx=200 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \
 56 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0008.d016.j01-ts.out
 57 | 
 58 | # (Per Node)  2 MPI x 32 Threads - Thread Funneled
 59 | export OMP_NUM_THREADS=32
 60 | time aprun -r 4 -n 4 -d 32 -j 1 -cc depth ./qs \
 61 |     --lx=200 --ly=200 --lz=100 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \
 62 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0004.d032.j01-ts.out
 63 | 
 64 | # ####################
 65 | # Thread Funneled Runs - 2 Hyper Threads
 66 | #
 67 | # As we add hyper threads, we do not change the problem size, ideally this will decrease time
 68 | # spent in the threaded tracking though.
 69 | #
 70 | # Prior experience shows that while 4 hyper threads pays off on small node count, it is a wash
 71 | # at higher node count, so let's stop at 2 hyper threads.
 72 | # ####################
 73 | 
 74 | export MPICH_MAX_THREAD_SAFETY=funneled
 75 | export OMP_PLACES=threads
 76 | 
 77 | export OMP_NUM_THREADS=2
 78 | time aprun -r 4 -n 128 -d 2 -j 2 -cc depth ./qs \
 79 |     --lx=800 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=4000000 \
 80 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0128.d002.j02-ts.out
 81 | 
 82 | export OMP_NUM_THREADS=4
 83 | time aprun -r 4 -n 64 -d 4 -j 2 -cc depth ./qs \
 84 |     --lx=400 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=4000000 \
 85 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0064.d004.j02-ts.out
 86 | 
 87 | export OMP_NUM_THREADS=8
 88 | time aprun -r 4 -n 32 -d 8 -j 2 -cc depth ./qs \
 89 |     --lx=400 --ly=400 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \
 90 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0032.d008.j02-ts.out
 91 | 
 92 | export OMP_NUM_THREADS=16
 93 | time aprun -r 4 -n 16 -d 16 -j 2 -cc depth ./qs \
 94 |     --lx=400 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \
 95 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0016.d016.j02-ts.out
 96 | 
 97 | export OMP_NUM_THREADS=32
 98 | time aprun -r 4 -n 8 -d 32 -j 2 -cc depth ./qs \
 99 |     --lx=200 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \
100 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0008.d032.j02-ts.out
101 | 
102 | export OMP_NUM_THREADS=64
103 | time aprun -r 4 -n 4 -d 64 -j 2 -cc depth ./qs \
104 |     --lx=200 --ly=200 --lz=100 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \
105 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0004.d064.j02-ts.out
106 | 
107 | #
108 | # end of file
109 | #
110 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/quicksilver_aprun_trinity_04.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #MSUB -lnodes=4:knl,os=CLE_quad_cache
  3 | #MSUB -l walltime=2:00:00
  4 | #MSUB -A tos2-8
  5 | 
  6 | #
  7 | #
  8 | # To run interactively, grab a node like so:
  9 | #
 10 | # msub -I -lnodes=1:knl,os=CLE_quad_cache
 11 | #
 12 | # This relies on the bash shell for the 2>&2 | tee to work.
 13 | #
 14 | # To get average and max cycleTracking times:
 15 | # grep "cycleTracking                       10" *out | awk -F " " '{print $1 " " $4 " " $5}'
 16 | #
 17 | 
 18 | # ####################
 19 | # Thread Funneled Runs - No Hyper Threads
 20 | # ####################
 21 | 
 22 | # Set this to where you have the code built on lustre
 23 | cd /users/sdawson/Quicksilver-2017-Apr-19-12-45-27
 24 | 
 25 | export MPICH_MAX_THREAD_SAFETY=funneled
 26 | export OMP_PLACES=cores
 27 | 
 28 | # (Per Node) 64 MPI x  1 Threads - Thread Funneled
 29 | export OMP_NUM_THREADS=1
 30 | time aprun -r 4 -n 256 -d 1 -j 1 -cc depth ./qs \
 31 |     --lx=800 --ly=800 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=8 --zDom=4 --nParticles=8000000 \
 32 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0256.d001.j01-ts.out
 33 | 
 34 | # (Per Node) 32 MPI x  2 Threads - Thread Funneled
 35 | export OMP_NUM_THREADS=2
 36 | time aprun -r 4 -n 128 -d 2 -j 1 -cc depth ./qs \
 37 |     --lx=800 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=8000000 \
 38 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0128.d002.j01-ts.out
 39 | 
 40 | # (Per Node) 16 MPI x  4 Threads - Thread Funneled
 41 | export OMP_NUM_THREADS=4
 42 | time aprun -r 4 -n 64 -d 4 -j 1 -cc depth ./qs \
 43 |     --lx=400 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=8000000 \
 44 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0064.d004.j01-ts.out
 45 | 
 46 | # (Per Node)  8 MPI x  8 Threads - Thread Funneled
 47 | export OMP_NUM_THREADS=8
 48 | time aprun -r 4 -n 32 -d 8 -j 1 -cc depth ./qs \
 49 |     --lx=400 --ly=400 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=8000000 \
 50 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0032.d008.j01-ts.out
 51 | 
 52 | # (Per Node)  4 MPI x 16 Threads - Thread Funneled
 53 | export OMP_NUM_THREADS=16
 54 | time aprun -r 4 -n 16 -d 16 -j 1 -cc depth ./qs \
 55 |     --lx=400 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=8000000 \
 56 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0016.d016.j01-ts.out
 57 | 
 58 | # (Per Node)  2 MPI x 32 Threads - Thread Funneled
 59 | export OMP_NUM_THREADS=32
 60 | time aprun -r 4 -n 8 -d 32 -j 1 -cc depth ./qs \
 61 |     --lx=200 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=8000000 \
 62 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0008.d032.j01-ts.out
 63 | 
 64 | # ####################
 65 | # Thread Funneled Runs - 2 Hyper Threads
 66 | #
 67 | # As we add hyper threads, we do not change the problem size, ideally this will decrease time
 68 | # spent in the threaded tracking though.
 69 | #
 70 | # Prior experience shows that while 4 hyper threads pays off on small node count, it is a wash
 71 | # at higher node count, so let's stop at 2 hyper threads.
 72 | # ####################
 73 | 
 74 | export MPICH_MAX_THREAD_SAFETY=funneled
 75 | export OMP_PLACES=threads
 76 | 
 77 | export OMP_NUM_THREADS=2
 78 | time aprun -r 4 -n 256 -d 2 -j 2 -cc depth ./qs \
 79 |     --lx=800 --ly=800 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=8 --zDom=4 --nParticles=8000000 \
 80 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0256.d002.j02-ts.out
 81 | 
 82 | export OMP_NUM_THREADS=4
 83 | time aprun -r 4 -n 128 -d 4 -j 2 -cc depth ./qs \
 84 |     --lx=800 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=8000000 \
 85 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0128.d004.j02-ts.out
 86 | 
 87 | export OMP_NUM_THREADS=8
 88 | time aprun -r 4 -n 64 -d 8 -j 2 -cc depth ./qs \
 89 |     --lx=400 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=8000000 \
 90 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0064.d008.j02-ts.out
 91 | 
 92 | export OMP_NUM_THREADS=16
 93 | time aprun -r 4 -n 32 -d 16 -j 2 -cc depth ./qs \
 94 |     --lx=400 --ly=400 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=8000000 \
 95 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0032.d016.j02-ts.out
 96 | 
 97 | export OMP_NUM_THREADS=32
 98 | time aprun -r 4 -n 16 -d 32 -j 2 -cc depth ./qs \
 99 |     --lx=400 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=8000000 \
100 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0016.d032.j02-ts.out
101 | 
102 | export OMP_NUM_THREADS=64
103 | time aprun -r 4 -n 8 -d 64 -j 2 -cc depth ./qs \
104 |     --lx=200 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=8000000 \
105 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0008.d064.j02-ts.out
106 | 
107 | #
108 | # end of file
109 | #
110 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/quicksilver_slurm_rzalast_01.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # salloc 1 nodes exclusively, then run these tests.  
 3 | # Or put them in batch script
 4 | #
 5 | 
 6 | export -n KMP_AFFINITY
 7 | export OMP_PROC_BIND=FALSE
 8 | 
 9 | # ####################
10 | # Thread Funneled Runs
11 | # ####################
12 | 
13 | # (Per Node) 16 MPI x 1 Threads - Thread Funneled
14 | export OMP_NUM_THREADS=1;
15 | srun -n16 --distribution=cyclic ./qs \
16 |     --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \
17 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0016.t0001-ts.out
18 | 
19 | # (Per Node) 8 MPI x 2 Threads - Thread Funneled
20 | export OMP_NUM_THREADS=2;
21 | srun -n8 --distribution=cyclic ./qs \
22 |     --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \
23 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0008.t0002-ts.out
24 | 
25 | # (Per Node) 4 MPI x 4 Threads - Thread Funneled
26 | export OMP_NUM_THREADS=4;
27 | srun -n4 --distribution=cyclic ./qs \
28 |     --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \
29 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0004.t0004-ts.out
30 | 
31 | # (Per Node) 2 MPI x 8 Threads - Thread Funneled
32 | export OMP_NUM_THREADS=8;
33 | srun -n2 --distribution=cyclic ./qs \
34 |     --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \
35 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0002.t0008-ts.out
36 | 
37 | # ####################
38 | # Thread Multiple Runs 
39 | # For testing, does not show improvement on Xeon
40 | # ####################
41 | 
42 | # (Per Node) 16 MPI x 1 Threads - Thread Multiple
43 | #export OMP_NUM_THREADS=1;
44 | #srun -n16 --distribution=cyclic ./qs --mpiThreadMultiple \
45 | #    --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \
46 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0016.t0001-tm.out
47 | 
48 | # (Per Node) 8 MPI x 2 Threads - Thread Multiple
49 | #export OMP_NUM_THREADS=2;
50 | #srun -n8 --distribution=cyclic ./qs --mpiThreadMultiple \
51 | #    --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \
52 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0008.t0002-tm.out
53 | 
54 | # (Per Node) 4 MPI x 4 Threads - Thread Multiple
55 | #export OMP_NUM_THREADS=4;
56 | #srun -n4 --distribution=cyclic ./qs --mpiThreadMultiple \
57 | #    --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \
58 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0004.t0004-tm.out
59 | 
60 | # (Per Node) 2 MPI x 8 Threads - Thread Multiple
61 | #export OMP_NUM_THREADS=8;
62 | #srun -n2 --distribution=cyclic ./qs --mpiThreadMultiple \
63 | #    --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \
64 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0002.t0008-tm.out
65 | 
66 | #
67 | # end of file
68 | #
69 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/quicksilver_slurm_rzgenie_01.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # salloc 1 nodes exclusively, then run these tests.
 3 | # Or put them in batch script
 4 | #
 5 | 
 6 | export -n KMP_AFFINITY
 7 | export OMP_PROC_BIND=FALSE
 8 | 
 9 | # ####################
10 | # Thread Funneled Runs
11 | # ####################
12 | 
13 | # 32 MPI x 1 Thread - Thread Funneled
14 | export OMP_NUM_THREADS=1
15 | srun -n 32 --distribution=cyclic --mpibind ./qs \
16 |     --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \
17 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0032.t0001-ts.out
18 | 
19 | # 16 MPI x 2 Thread - Thread Funneled
20 | export OMP_NUM_THREADS=2
21 | srun -n 16 --distribution=cyclic --mpibind ./qs \
22 |     --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \
23 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0016.t0002-ts.out
24 | 
25 | #  8 MPI x 4 Thread - Thread Funneled
26 | export OMP_NUM_THREADS=4
27 | srun -n 8 --distribution=cyclic ./qs \
28 |     --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \
29 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0008.t0004-ts.out
30 | 
31 | #  4 MPI x 8 Thread - Thread Funneled
32 | export OMP_NUM_THREADS=8
33 | srun -n 4 --distribution=cyclic ./qs \
34 |     --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \
35 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0004.t0008-ts.out
36 | 
37 | #  4 MPI x 16 Thread - Thread Funneled
38 | export OMP_NUM_THREADS=16
39 | srun -n 2 --distribution=cyclic ./qs \
40 |     --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=4000000 \
41 |     -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0002.t0016-ts.out
42 | 
43 | # ####################
44 | # Thread Multiple Runs
45 | # For testing, does not show improvement on Xeon
46 | # ####################
47 | 
48 | # 32 MPI x 1 Thread - Thread Multiple
49 | #export OMP_NUM_THREADS=1
50 | #srun -n 32 --distribution=cyclic ./qs --mpiThreadMultiple  \
51 | #    --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \
52 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0032.t0001-tm.out
53 | 
54 | # 16 MPI x 2 Thread - Thread Multiple
55 | #export OMP_NUM_THREADS=2
56 | #srun -n 16 --distribution=cyclic ./qs --mpiThreadMultiple \
57 | #    --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \
58 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0016.t0002-tm.out
59 | 
60 | #  8 MPI x 4 Thread - Thread Multiple
61 | #export OMP_NUM_THREADS=4
62 | #srun -n 8 --distribution=cyclic ./qs --mpiThreadMultiple \
63 | #    --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \
64 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0008.t0004-tm.out
65 | 
66 | #  4 MPI x 8 Thread - Thread Multiple
67 | #export OMP_NUM_THREADS=8
68 | #srun -n 4 --distribution=cyclic ./qs --mpiThreadMultiple \
69 | #    --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \
70 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0004.t0008-tm.out
71 | 
72 | #  4 MPI x 16 Thread - Thread Multiple
73 | #export OMP_NUM_THREADS=16
74 | #srun -n 2 --distribution=cyclic ./qs --mpiThreadMultiple \
75 | #    --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=4000000 \
76 | #    -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0002.t0016-tm.out
77 | 
78 | #
79 | # end of file
80 | #
81 | 


--------------------------------------------------------------------------------
/Examples/Homogeneous/run.homogeneousProblem_v4.rzoz7.x:
--------------------------------------------------------------------------------
 1 | #
 2 | # 2016-Oct-06 Note by S. Dawson
 3 | #
 4 | # Note on running thread multiple vs thread single.
 5 | #
 6 | # Its a bit clunky as one has to set up a separate test deck for thread single vs thread multiple, AS WELL AS
 7 | # specify the correct command line argument.  
 8 | #
 9 | # This has to do with the desire to fire up MPI before processing the command line arguments, yet still
10 | # have the input deck reflect how the deck is run.
11 | #
12 | # Also, despite with the command line help says, one can not specify a flag to --mpiThreadMultiple.  If one
13 | # says --mpiThreadMultiple=1 or --mpiThreadMultiple=0 the code complains, it is just --mpiThreadMultiple
14 | # to turn it on and the default is to be in thrad single mode
15 | #
16 | 
17 | export -n KMP_CPUINFO_FILE
18 | export KMP_CPUINFO_FILE=/home/dawson/cpuinfo_sad;
19 | export I_MPI_PIN_DOMAIN=64:compact
20 | export KMP_AFFINITY="granularity=fine,scatter"
21 | export KMP_HW_SUBSET=1T
22 | export KMP_BLOCKTIME=0
23 | export OMP_NUM_THREADS=16
24 | #export OMP_PLACES=cores
25 | 
26 | export MPICH_MAX_THREAD_SAFETY=multiple
27 | time mpirun -np 4 ./qs --mpiThreadMultiple --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_tm.inp | tee rzoz18.N01.n04.t016.tm.out
28 | export MPICH_MAX_THREAD_SAFETY=funneled
29 | time mpirun -np 4 ./qs                     --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_ts.inp | tee rzoz18.N01.n04.t016.ts.out
30 | 
31 | export KMP_HW_SUBSET=2T;
32 | export OMP_NUM_THREADS=32;
33 | export MPICH_MAX_THREAD_SAFETY=multiple
34 | time mpirun -np 4 ./qs --mpiThreadMultiple --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_tm.inp | tee rzoz18.N01.n04.t032.tm.out
35 | export MPICH_MAX_THREAD_SAFETY=funneled
36 | time mpirun -np 4 ./qs                     --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_ts.inp | tee rzoz18.N01.n04.t032.ts.out
37 | 
38 | export KMP_HW_SUBSET=4T;
39 | export OMP_NUM_THREADS=64;
40 | export MPICH_MAX_THREAD_SAFETY=multiple
41 | time mpirun -np 4 ./qs --mpiThreadMultiple --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_tm.inp | tee rzoz18.N01.n04.t064.tm.out
42 | export MPICH_MAX_THREAD_SAFETY=funneled
43 | time mpirun -np 4 ./qs                     --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_ts.inp | tee rzoz18.N01.n04.t064.ts.out
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/Examples/NoCollisions/no.collisions.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: 
 5 |    loadBalance: 1
 6 |    lx: 100
 7 |    ly: 100
 8 |    lz: 100
 9 |    nParticles: 1000000
10 |    nSteps: 10
11 |    nx: 10
12 |    ny: 10
13 |    nz: 10
14 |    seed: 1029384756
15 |    xDom: 0
16 |    yDom: 0
17 |    zDom: 0
18 |    eMax: 1.000001
19 |    eMin: 1.000000
20 |    nGroups: 230
21 | 
22 | Geometry:
23 |    material: boxMaterial
24 |    shape: brick
25 |    xMax: 100
26 |    xMin: 0
27 |    yMax: 100
28 |    yMin: 0
29 |    zMax: 100
30 |    zMin: 0
31 | 
32 | Geometry:
33 |    material: sourceMaterial
34 |    shape: brick
35 |    xMax: 10
36 |    xMin: 0
37 |    yMax: 10
38 |    yMin: 0
39 |    zMax: 10
40 |    zMin: 0
41 | 
42 | Material:
43 |    name: boxMaterial
44 |    nIsotopes: 10
45 |    nReactions: 9
46 |    sourceRate: 0
47 |    totalCrossSection: 1e-80
48 |    absorptionCrossSection: flat
49 |    fissionCrossSection: flat
50 |    scatteringCrossSection: flat
51 |    absorptionCrossSectionRatio: 1
52 |    fissionCrossSectionRatio: 0
53 |    scatteringCrossSectionRatio: 1
54 | 
55 | Material:
56 |    name: sourceMaterial
57 |    nIsotopes: 10
58 |    nReactions: 9
59 |    sourceRate: 1e+10
60 |    totalCrossSection: 1e-80
61 |    absorptionCrossSection: flat
62 |    fissionCrossSection: flat
63 |    scatteringCrossSection: flat
64 |    absorptionCrossSectionRatio: 1
65 |    fissionCrossSectionRatio: 1
66 |    scatteringCrossSectionRatio: 1
67 | 
68 | CrossSection:
69 |    name: flat
70 |    A: 0
71 |    B: 0
72 |    C: 0
73 |    D: 0
74 |    E: 1
75 |    nuBar: 2.4
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/Examples/NoFission/noFission.inp:
--------------------------------------------------------------------------------
 1 | Simulation:
 2 |    dt: 1e-08
 3 |    fMax: 0.1
 4 |    inputFile: streamingProblem.inp
 5 |    boundaryCondition: octant
 6 |    loadBalance: 1
 7 |    cycleTimers: 0
 8 |    debugThreads: 0
 9 |    lx: 100
10 |    ly: 100
11 |    lz: 100
12 |    nParticles: 10000000
13 |    nSteps: 10
14 |    nx: 10
15 |    ny: 10
16 |    nz: 10
17 |    seed: 1029384756
18 |    xDom: 0
19 |    yDom: 0
20 |    zDom: 0
21 |    eMax: 20
22 |    eMin: 1e-9
23 |    nGroups: 230
24 | 
25 | Geometry:
26 |    material: sourceMaterial
27 |    shape: brick
28 |    xMax: 100
29 |    xMin: 0
30 |    yMax: 100
31 |    yMin: 0
32 |    zMax: 100
33 |    zMin: 0
34 | 
35 | Material:
36 |    name: sourceMaterial
37 |    nIsotopes: 10
38 |    nReactions: 9
39 |    sourceRate: 1e+10
40 |    totalCrossSection: 0.198
41 |    absorptionCrossSection: flat
42 |    fissionCrossSection: flat
43 |    scatteringCrossSection: flat
44 |    absorptionCrossSectionRatio: 0.494949495
45 |    fissionCrossSectionRatio: 0
46 |    scatteringCrossSectionRatio: 0.505050505
47 | 
48 | CrossSection:
49 |    name: flat
50 |    A: 0
51 |    B: 0
52 |    C: 0
53 |    D: 0
54 |    E: 1
55 |    nuBar: 2.4
56 | 


--------------------------------------------------------------------------------
/Examples/NonFlatXC/NonFlatXC.inp:
--------------------------------------------------------------------------------
  1 | Simulation:
  2 |    dt: 1e-08
  3 |    fMax: 0.1
  4 |    boundaryCondition: reflect
  5 |    loadBalance: 0
  6 |    cycleTimers: 0
  7 |    debugThreads: 0
  8 |    lx: 100
  9 |    ly: 100
 10 |    lz: 100
 11 |    nParticles: 1000000
 12 |    batchSize: 0
 13 |    nBatches: 10
 14 |    nSteps: 10
 15 |    nx: 10
 16 |    ny: 10
 17 |    nz: 10
 18 |    seed: 1029384756
 19 |    xDom: 0
 20 |    yDom: 0
 21 |    zDom: 0
 22 |    eMax: 20
 23 |    eMin: 1e-08
 24 |    nGroups: 230
 25 |    lowWeightCutoff: 0.001
 26 |    bTally: 1
 27 |    fTally: 1
 28 |    cTally: 1
 29 |    coralBenchmark: 0
 30 | 
 31 | Geometry:
 32 |    material: sourceMaterial
 33 |    shape: brick
 34 |    xMax: 100
 35 |    xMin: 0
 36 |    yMax: 100
 37 |    yMin: 0
 38 |    zMax: 100
 39 |    zMin: 0
 40 | 
 41 | Material:
 42 |    name: sourceMaterial
 43 |    mass: 1000.0
 44 |    nIsotopes: 10
 45 |    nReactions: 9
 46 |    sourceRate: 1e+10
 47 |    totalCrossSection: 6
 48 |    absorptionCrossSection: absorb
 49 |    fissionCrossSection: fission
 50 |    scatteringCrossSection: scatter
 51 |    absorptionCrossSectionRatio: 6e-3
 52 |    fissionCrossSectionRatio: 1
 53 |    scatteringCrossSectionRatio: 5
 54 | 
 55 | Material:
 56 |    name: flatMaterial
 57 |    nIsotopes: 20
 58 |    nReactions: 9
 59 |    sourceRate: 1e+10
 60 |    totalCrossSection: 1
 61 |    absorptionCrossSection: flat
 62 |    fissionCrossSection: flat
 63 |    scatteringCrossSection: flat
 64 |    absorptionCrossSectionRatio: 1
 65 |    fissionCrossSectionRatio: 1
 66 |    scatteringCrossSectionRatio: 1
 67 | 
 68 | CrossSection:
 69 |    name: flat
 70 |    A: 0
 71 |    B: 0
 72 |    C: 0
 73 |    D: 0
 74 |    E: 1
 75 |    nuBar: 2.4
 76 | 
 77 | CrossSection:
 78 |    name: absorb
 79 |    A: 0
 80 |    B: 0
 81 |    C: 0
 82 |    D: -0.8446
 83 |    D: -0.5243
 84 |    E: -2.22
 85 | 
 86 | CrossSection:
 87 |    name: fission
 88 |    A: 0
 89 |    B: 0
 90 |    C: 0
 91 |    D: -0.342
 92 |    E: 0
 93 |    nuBar: 2.4
 94 | 
 95 | CrossSection:
 96 |    name: scatter
 97 |    A: 0
 98 |    B: 0
 99 |    C: 0
100 |    D: 0
101 |    E: 0.7
102 | 
103 |    
104 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | LICENSE
 2 | =======
 3 | 
 4 | Copyright (c) 2017, Lawrence Livermore National Security, LLC.
 5 | 
 6 | Produced at the Lawrence Livermore National Laboratory
 7 | 
 8 | Written by David Richards [richards12@llnl.gov], Ryan Bleile, 
 9 | Patrick Brantley, Shawn Dawson, Scott McKinley, Matt O'Brien
10 | 
11 | LLNL-CODE-684037.
12 | 
13 | All rights reserved.
14 | 
15 | This file is part of Quicksilver. For details, see
16 |    http://www.github.com/LLNL/Quicksilver.  Please also read 
17 | the Additional BSD Notice below.
18 | 
19 | Redistribution and use in source and binary forms, with or
20 | without modification, are permitted provided that the following
21 | conditions are met:
22 | 
23 | * Redistributions of source code must retain the above copyright
24 |   notice, this list of conditions and the disclaimer below.
25 | 
26 | * Redistributions in binary form must reproduce the above copyright
27 |   notice, this list of conditions and the disclaimer (as noted below)
28 |   in the documentation and/or other materials provided with the
29 |   distribution.
30 | 
31 | * Neither the name of the LLNS/LLNL nor the names of its contributors
32 |   may be used to endorse or promote products derived from this
33 |   software without specific prior written permission.
34 | 
35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
36 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
37 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
38 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 | DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL
40 | SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE
41 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
42 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
43 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
47 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
48 | THE POSSIBILITY OF SUCH DAMAGE.
49 | 
50 | 
51 | Additional BSD Notice
52 | ---------------------
53 | 
54 | 1. This notice is required to be provided under our contract with the
55 |    U.S. Department of Energy (DOE). This work was produced at Lawrence
56 |    Livermore National Laboratory under Contract No. DE-AC52-07NA27344
57 |    with the DOE.
58 | 
59 | 2. Neither the United States Government nor Lawrence Livermore
60 |    National Security, LLC nor any of their employees, makes any
61 |    warranty, express or implied, or assumes any liability or
62 |    responsibility for the accuracy, completeness, or usefulness of any
63 |    information, apparatus, product, or process disclosed, or
64 |    represents that its use would not infringe privately-owned rights.
65 | 
66 | 3. Also, reference herein to any specific commercial products,
67 |    process, or services by trade name, trademark, manufacturer or
68 |    otherwise does not necessarily constitute or imply its endorsement,
69 |    recommendation, or favoring by the United States Government or
70 |    Lawrence Livermore National Security, LLC. The views and opinions
71 |    of authors expressed herein do not necessarily state or reflect
72 |    those of the United States Government or Lawrence Livermore
73 |    National Security, LLC, and shall not be used for advertising or
74 |    product endorsement purposes.
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Quicksilver
 2 | ===========
 3 | 
 4 | Introduction
 5 | ------------
 6 | 
 7 | Quicksilver is a proxy application that represents some elements of
 8 | the Mercury workload by solving a simpliﬁed dynamic monte carlo
 9 | particle transport problem.  Quicksilver attempts to replicate the
10 | memory access patterns, communication patterns, and the branching or
11 | divergence of Mercury for problems using multigroup cross sections.
12 | OpenMP and MPI are used for parallelization.  A GPU version is
13 | available.  Unified memory is assumed.
14 | 
15 | Performance of Quicksilver is likely to be dominated by latency bound
16 | table look-ups, a highly branchy/divergent code path, and poor
17 | vectorization potential.
18 | 
19 | For more information, visit the
20 | [LLNL co-design pages.](https://codesign.llnl.gov/quicksilver.php)
21 | 
22 | 
23 | Building Quicksilver
24 | --------------------
25 | 
26 | Instructions to build Quicksilver can be found in the
27 | Makefile. Quicksilver is a relatively easy to build code with no
28 | external dependencies (except MPI and OpenMP).  You should be able to
29 | build Quicksilver on nearly any system by customizing the values of
30 | only four variables in the Makefile:
31 | 
32 | * CXX The name of the C++ compiler (with path if necessary)
33 |   Quicksilver uses C++11 features, so a C++11 compliant compiler
34 |   should be used.
35 | 
36 | * CXXFLAGS Command line switches to pass to the C++ compiler when
37 |   compiling objects *and* when linking the executable.
38 | 
39 | * CPPFLAGS Command line switches to pass to the compiler *only* when
40 |   compiling objects
41 | 
42 | * LDFLAGS Command line switches to pass to the compiler *only*
43 |   when linking the executable
44 | 
45 | Sample definitions for a number of common systems are provided.
46 | 
47 | Quicksilver recognizes a number of pre-processor macros that enable or
48 | disable various code features such as MPI, OpenMP, etc.  These are
49 | described in the Makefile.
50 | 
51 | 
52 | Running Quicksilver
53 | -------------------
54 | 
55 | Quicksilver’s behavior is controlled by a combination of command line
56 | options and an input file.  All of the parameters that can be set on
57 | the command line can also be set in the input file.  The input file
58 | values will override the command line.  Run `$ qs –h` to see
59 | documentation on the available command line switches.  Documentation
60 | of the input file parameters is in preparation.
61 | 
62 | Quicksilver also has the property that the output of every run is a
63 | valid input file.  Hence you can repeat any run for which you have the
64 | output file by using that output as an input file.
65 | 
66 | 
67 | License and Distribution Information
68 | ------------------------------------
69 | 
70 | Quicksilver is available [on github](https://github.com/LLNL/Quicksilver)
71 | 
72 | 
73 | Quicksilver is open source software with a BSD license.  See
74 | [LICENSE.md](https://github.com/LLNL/Quicksilver/blob/master/LICENSE.md)
75 | 
76 | This work was performed under the auspices of the U.S. Department of
77 | Energy by Lawrence Livermore National Laboratory under Contract
78 | DE-AC52-07NA27344.
79 | 
80 | LLNL-CODE-684037
81 | 


--------------------------------------------------------------------------------
/src/BulkStorage.hh:
--------------------------------------------------------------------------------
 1 | #ifndef BULK_STORAGE_HH
 2 | #define BULK_STORAGE_HH
 3 | 
 4 | #include "MemoryControl.hh"
 5 | 
 6 | template <typename T>
 7 | class BulkStorage
 8 | {
 9 |  public:
10 |    BulkStorage()
11 |    : _bulkStorage(0),
12 |      _refCount(0),
13 |      _size(0),
14 |      _capacity(0),
15 |      _memPolicy(MemoryControl::AllocationPolicy::UNDEFINED_POLICY)
16 |    {
17 |       _refCount = new int;
18 |       *_refCount = 1;
19 |    }
20 | 
21 |    BulkStorage(const BulkStorage& aa)
22 |    : _bulkStorage(aa._bulkStorage), _refCount(aa._refCount), _size(aa._size), _capacity(aa._capacity), _memPolicy(aa._memPolicy)
23 |    {
24 |       ++(*_refCount);
25 |    }
26 |    
27 |    ~BulkStorage()
28 |    {
29 |       --(*_refCount);
30 |       if (*_refCount > 0)
31 |          return;
32 | 
33 |       delete _refCount;
34 | 
35 |       // Catch the case that the storage was never allocated.  This
36 |       // happens when setCapacity is never called on this instance.
37 |       if (_bulkStorage != 0) 
38 |          MemoryControl::deallocate(_bulkStorage, _capacity, _memPolicy);
39 |    }
40 |    
41 |    /// Needed for copy-swap idiom
42 |    void swap(BulkStorage<T>& other)
43 |    {
44 |       std::swap(_bulkStorage, other._bulkStorage);
45 |       std::swap(_refCount,    other._refCount);
46 |       std::swap(_size,        other._size);
47 |       std::swap(_capacity,    other._capacity);
48 |       std::swap(_memPolicy,   other._memPolicy);
49 |    }
50 | 
51 |    /// Implement assignment using copy-swap idiom
52 |    BulkStorage& operator=(const BulkStorage& aa)
53 |    {
54 |       if (&aa != this)
55 |       {
56 |          BulkStorage<T> temp(aa);
57 |          this->swap(temp);
58 |       }
59 |       return *this;
60 |    }
61 |    
62 |    void setCapacity(int capacity, MemoryControl::AllocationPolicy policy)
63 |    {
64 |       qs_assert(_bulkStorage == 0);
65 |       _bulkStorage = MemoryControl::allocate<T>(capacity, policy);
66 |       _capacity = capacity;
67 |       _memPolicy = policy;
68 |    }
69 |    
70 |    T* getBlock(int nItems)
71 |    {
72 |       T* blockStart = _bulkStorage + _size;
73 |       _size += nItems;
74 |       qs_assert(_size <= _capacity);
75 |       return blockStart;
76 |    }
77 |    
78 | 
79 |  private:
80 | 
81 |    // This class doesn't have well defined copy semantics.  However,
82 |    // just disabling copy operations breaks the build since we haven't
83 |    // been consistent about dealing with copy semantics in classes like
84 |    // MC_Mesh_Domain.
85 |    
86 | 
87 | 
88 |    T* _bulkStorage;
89 |    int* _refCount;
90 |    int _size;
91 |    int _capacity;
92 |    MemoryControl::AllocationPolicy _memPolicy;
93 |    
94 | };
95 | 
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/src/CollisionEvent.hh:
--------------------------------------------------------------------------------
 1 | #ifndef COLLISION_EVENT_HH
 2 | #define COLLISION_EVENT_HH
 3 | 
 4 | #include "DeclareMacro.hh"
 5 | 
 6 | class MonteCarlo;
 7 | class MC_Particle;
 8 | 
 9 | HOST_DEVICE
10 | bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index );
11 | HOST_DEVICE_END
12 | 
13 | 
14 | #endif
15 | 
16 | 


--------------------------------------------------------------------------------
/src/CommObject.hh:
--------------------------------------------------------------------------------
 1 | #ifndef COMM_OBJECT_HH
 2 | #define COMM_OBJECT_HH
 3 | 
 4 | #include <set>
 5 | 
 6 | #include <vector>
 7 | #include "MeshPartition.hh"
 8 | #include "Long64.hh"
 9 | #include "FacetPair.hh"
10 | 
11 | class CommObject
12 | {
13 |  public:
14 |    virtual ~CommObject(){};
15 |    virtual void exchange(MeshPartition::MapType& cellInfo,
16 |                          const std::vector<int>& nbrDomain,
17 |                          std::vector<std::set<Long64> > sendSet,
18 |                          std::vector<std::set<Long64> > recvSet) = 0;
19 |    virtual void exchange(std::vector<FacetPair> sendBuf,
20 |                          std::vector<FacetPair>& recvBuf) = 0;
21 | 
22 | };
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/src/CoralBenchmark.hh:
--------------------------------------------------------------------------------
 1 | #ifndef CORALBENCHMARK_HH
 2 | #define CORALBENCHMARK_HH
 3 | 
 4 | class MonteCarlo;
 5 | class Parameters;
 6 | 
 7 | void coralBenchmarkCorrectness( MonteCarlo* monteCarlo, Parameters &params );
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/src/CycleTracking.cc:
--------------------------------------------------------------------------------
  1 | #include "CycleTracking.hh"
  2 | #include "MonteCarlo.hh"
  3 | #include "ParticleVaultContainer.hh"
  4 | #include "ParticleVault.hh"
  5 | #include "MC_Segment_Outcome.hh"
  6 | #include "CollisionEvent.hh"
  7 | #include "MC_Facet_Crossing_Event.hh"
  8 | #include "MCT.hh"
  9 | #include "DeclareMacro.hh"
 10 | #include "QS_atomics.hh"
 11 | #include "macros.hh"
 12 | #include "qs_assert.hh"
 13 | 
 14 | HOST_DEVICE
 15 | void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault )
 16 | {
 17 |     MC_Particle mc_particle;
 18 | 
 19 |     // Copy a single particle from the particle vault into mc_particle
 20 |     MC_Load_Particle(monteCarlo, mc_particle, processingVault, particle_index);
 21 | 
 22 |     // set the particle.task to the index of the processed vault the particle will census into.
 23 |     mc_particle.task = 0;//processed_vault;
 24 | 
 25 |     // loop over this particle until we cannot do anything more with it on this processor
 26 |     CycleTrackingFunction( monteCarlo, mc_particle, particle_index, processingVault, processedVault );
 27 | 
 28 |     //Make sure this particle is marked as completed
 29 |     processingVault->invalidateParticle( particle_index );
 30 | }
 31 | HOST_DEVICE_END
 32 | 
 33 | HOST_DEVICE
 34 | void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault)
 35 | {
 36 |     bool keepTrackingThisParticle = false;
 37 |     unsigned int tally_index =      (particle_index) % monteCarlo->_tallies->GetNumBalanceReplications();
 38 |     unsigned int flux_tally_index = (particle_index) % monteCarlo->_tallies->GetNumFluxReplications();
 39 |     unsigned int cell_tally_index = (particle_index) % monteCarlo->_tallies->GetNumCellTallyReplications();
 40 |     do
 41 |     {
 42 |         // Determine the outcome of a particle at the end of this segment such as:
 43 |         //
 44 |         //   (0) Undergo a collision within the current cell,
 45 |         //   (1) Cross a facet of the current cell,
 46 |         //   (2) Reach the end of the time step and enter census,
 47 |         //
 48 | #ifdef EXPONENTIAL_TALLY
 49 |         monteCarlo->_tallies->TallyCellValue( exp(rngSample(&mc_particle.random_number_seed)) , mc_particle.domain, cell_tally_index, mc_particle.cell);
 50 | #endif   
 51 |         MC_Segment_Outcome_type::Enum segment_outcome = MC_Segment_Outcome(monteCarlo, mc_particle, flux_tally_index);
 52 | 
 53 |         QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._numSegments);
 54 | 
 55 |         mc_particle.num_segments += 1.;  /* Track the number of segments this particle has
 56 |                                             undergone this cycle on all processes. */
 57 |         switch (segment_outcome) {
 58 |         case MC_Segment_Outcome_type::Collision:
 59 |             {
 60 |             // The particle undergoes a collision event producing:
 61 |             //   (0) Other-than-one same-species secondary particle, or
 62 |             //   (1) Exactly one same-species secondary particle.
 63 |             if (CollisionEvent(monteCarlo, mc_particle, tally_index ) == MC_Collision_Event_Return::Continue_Tracking)
 64 |             {
 65 |                 keepTrackingThisParticle = true;
 66 |             }
 67 |             else
 68 |             {
 69 |                 keepTrackingThisParticle = false;
 70 |             }
 71 |             }
 72 |             break;
 73 |     
 74 |         case MC_Segment_Outcome_type::Facet_Crossing:
 75 |             {
 76 |                 // The particle has reached a cell facet.
 77 |                 MC_Tally_Event::Enum facet_crossing_type = MC_Facet_Crossing_Event(mc_particle, monteCarlo, particle_index, processingVault);
 78 | 
 79 |                 if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Transit_Exit)
 80 |                 {
 81 |                     keepTrackingThisParticle = true;  // Transit Event
 82 |                 }
 83 |                 else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Escape)
 84 |                 {
 85 |                     QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._escape);
 86 |                     mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape;
 87 |                     mc_particle.species = -1;
 88 |                     keepTrackingThisParticle = false;
 89 |                 }
 90 |                 else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Reflection)
 91 |                 {
 92 |                     MCT_Reflect_Particle(monteCarlo, mc_particle);
 93 |                     keepTrackingThisParticle = true;
 94 |                 }
 95 |                 else
 96 |                 {
 97 |                     // Enters an adjacent cell in an off-processor domain.
 98 |                     //mc_particle.species = -1;
 99 |                     keepTrackingThisParticle = false;
100 |                 }
101 |             }
102 |             break;
103 |     
104 |         case MC_Segment_Outcome_type::Census:
105 |             {
106 |                 // The particle has reached the end of the time step.
107 |                 processedVault->pushParticle(mc_particle);
108 |                 QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._census);
109 |                 keepTrackingThisParticle = false;
110 |                 break;
111 |             }
112 |             
113 |         default:
114 |            qs_assert(false);
115 |            break;  // should this be an error
116 |         }
117 |     
118 |     } while ( keepTrackingThisParticle );
119 | }
120 | HOST_DEVICE_END
121 | 
122 | 


--------------------------------------------------------------------------------
/src/CycleTracking.hh:
--------------------------------------------------------------------------------
 1 | #include "DeclareMacro.hh"
 2 | 
 3 | // Forward Declaration
 4 | class ParticleVault;
 5 | class MonteCarlo;
 6 | class MC_Particle;
 7 | 
 8 | HOST_DEVICE
 9 | void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault );
10 | HOST_DEVICE_END
11 | 
12 | HOST_DEVICE
13 | void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault);
14 | HOST_DEVICE_END
15 | 


--------------------------------------------------------------------------------
/src/DeclareMacro.hh:
--------------------------------------------------------------------------------
 1 | #ifndef DECLAREMACRO_HH
 2 | #define DECLAREMACRO_HH
 3 | 
 4 | #if defined HAVE_CUDA || defined HAVE_HIP
 5 |     #define HOST_DEVICE __host__ __device__
 6 |     #define HOST_DEVICE_CUDA __host__ __device__
 7 |     #define HOST_DEVICE_CLASS 
 8 |     #define HOST_DEVICE_END
 9 |     #define DEVICE __device__
10 |     #define DEVICE_END 
11 |     //#define HOST __host__
12 |     #define HOST_END 
13 |     #define GLOBAL __global__
14 | #elif HAVE_OPENMP_TARGET
15 |     #define HOST_DEVICE _Pragma( "omp declare target" )
16 |     #define HOST_DEVICE_CUDA
17 |     #define HOST_DEVICE_CLASS _Pragma( "omp declare target" )
18 |     #define HOST_DEVICE_END _Pragma("omp end declare target")
19 |     //#define HOST_DEVICE #pragma omp declare target
20 |     //#define HOST_DEVICE_END #pragma omp end declare target
21 |     //#define DEVICE #pragma omp declare target 
22 |     //#define DEVICE_END #pragma omp end declare target
23 |     //#define HOST 
24 |     #define HOST_END 
25 |     #define GLOBAL
26 | #else
27 |     #define HOST_DEVICE
28 |     #define HOST_DEVICE_CUDA
29 |     #define HOST_DEVICE_CLASS
30 |     #define HOST_DEVICE_END
31 |     #define DEVICE
32 |     #define DEVICE_END 
33 |     //#define HOST
34 |     #define HOST_END 
35 |     #define GLOBAL
36 | #endif
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/src/DecompositionObject.cc:
--------------------------------------------------------------------------------
 1 | #include "DecompositionObject.hh"
 2 | #include <stdlib.h>
 3 | #include <algorithm>
 4 | #include <utility>
 5 | #include <set>
 6 | #include "qs_assert.hh"
 7 | 
 8 | using std::vector;
 9 | using std::find;
10 | using std::swap;
11 | using std::pair;
12 | using std::set;
13 | using std::make_pair;
14 | namespace
15 | {
16 |    void fisherYates(vector<int>& vv)
17 |    {
18 |       int nItems = vv.size();
19 |       for (unsigned ii=0; ii<nItems-1; ++ii)
20 |       {
21 |          int jj = (drand48() * (nItems - ii)) + ii;
22 |          swap(vv[ii], vv[jj]);
23 |       }
24 |    }
25 | }
26 | 
27 | DecompositionObject::DecompositionObject(
28 |    int myRank, int nRanks, int nDomainsPerRank, int mode)
29 | {
30 |    qs_assert(mode == 0 || mode == 1);
31 | 
32 |    int nDomains = nRanks*nDomainsPerRank;
33 |    _rank.resize(nDomains);
34 |    _index.resize(nDomains);
35 |    _assignedGids.resize(nDomainsPerRank);
36 | 
37 |    //Directly Computed Domain Assignments
38 |    if( mode == 0 )
39 |    {
40 |       //assign domains to ranks
41 |       for (unsigned ii=0; ii<nDomains; ++ii)
42 |       {
43 |          _rank[ii]  = ii/nDomainsPerRank;
44 |          _index[ii] = ii%nDomainsPerRank;
45 |       }
46 | 
47 |       for(unsigned int ii = 0; ii < nDomainsPerRank; ii++)
48 |       {
49 |           unsigned int index = nDomainsPerRank*myRank + ii;
50 |          _assignedGids[ii] = nDomainsPerRank*_rank[index] + _index[index];
51 |       }
52 |    }
53 |    else
54 |    {
55 |       //Mode 1 is a debugging mode that performs and O(n^2) algorithm. 
56 |       //   This will be increadibly slow when nRanks is large
57 |       qs_assert(nRanks < 1000);
58 | 
59 |       for (unsigned ii=0; ii<nDomains; ++ii)
60 |          _rank[ii]  = ii/nDomainsPerRank;
61 | 
62 |       fisherYates(_rank);
63 | 
64 |       // set up the local domain indices for all ranks
65 |       for (unsigned iRank=0; iRank<nRanks; ++iRank)
66 |       {
67 |          vector<int> localGid;
68 | 
69 |          for (unsigned jGid=0; jGid<nDomains; ++jGid)
70 |             if (_rank[jGid] == iRank)
71 |                localGid.push_back(jGid);
72 | 
73 |          qs_assert(localGid.size() == nDomainsPerRank);
74 | 
75 |          fisherYates(localGid);
76 | 
77 |          for (unsigned ii=0; ii<localGid.size(); ++ii)
78 |             _index[localGid[ii]] = ii;
79 | 
80 |          if (iRank == myRank)
81 |             _assignedGids = localGid;
82 |       }
83 |    }
84 | 
85 |    // tests
86 |    for (unsigned ii=0; ii<nDomainsPerRank; ++ii)
87 |       qs_assert(_rank[_assignedGids[ii]] == myRank);
88 | 
89 |    set<pair<int, int> > tmp;
90 |    for (unsigned ii=0; ii<nDomains; ++ii)
91 |    {
92 |       qs_assert(_rank[ii] < nRanks);
93 |       qs_assert(_index[ii] <nDomainsPerRank);
94 |       tmp.insert(make_pair(_rank[ii], _index[ii]));
95 |    }
96 |    qs_assert(tmp.size() == nDomains);
97 | }
98 | 
99 | 


--------------------------------------------------------------------------------
/src/DecompositionObject.hh:
--------------------------------------------------------------------------------
 1 | #ifndef DECOMPOSITION_OBJECT_HH
 2 | #define DECOMPOSITION_OBJECT_HH
 3 | 
 4 | #include <vector>
 5 | 
 6 | class DecompositionObject
 7 | {
 8 |  public:
 9 |    DecompositionObject(int myRank, int nRanks, int nDomainsPerRank, int mode);
10 | 
11 |    int getRank(int domainGid) const {return _rank[domainGid];}
12 |    int getIndex(int domainGid) const {return _index[domainGid];}
13 |    const std::vector<int>& getAssignedDomainGids() const {return _assignedGids;}
14 | 
15 |  private:
16 |    std::vector<int> _assignedGids;
17 |    std::vector<int> _rank;  // rank for given gid
18 |    std::vector<int> _index; // index for given gid
19 | };
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/src/DirectionCosine.cc:
--------------------------------------------------------------------------------
 1 | #include "DirectionCosine.hh"
 2 | #include "MC_RNG_State.hh"
 3 | #include "PhysicalConstants.hh"
 4 | 
 5 | void DirectionCosine::Sample_Isotropic(uint64_t *seed)
 6 | {
 7 |     this->gamma  = 1.0 - 2.0*rngSample(seed);
 8 |     double sine_gamma  = sqrt((1.0 - (gamma*gamma)));
 9 |     double phi         = PhysicalConstants::_pi*(2.0*rngSample(seed) - 1.0);
10 | 
11 |     this->alpha  = sine_gamma * cos(phi);
12 |     this->beta   = sine_gamma * sin(phi);
13 | }
14 | 


--------------------------------------------------------------------------------
/src/EnergySpectrum.cc:
--------------------------------------------------------------------------------
 1 | #include "EnergySpectrum.hh"
 2 | #include "MonteCarlo.hh"
 3 | #include "ParticleVault.hh"
 4 | #include "ParticleVaultContainer.hh"
 5 | #include "utilsMpi.hh"
 6 | #include "MC_Processor_Info.hh"
 7 | #include "Parameters.hh"
 8 | #include <string>
 9 | 
10 | using std::string;
11 | 
12 | void EnergySpectrum::UpdateSpectrum(MonteCarlo* monteCarlo)
13 | {
14 |     if( _fileName == "" ) return;
15 | 
16 |     for( uint64_t ii = 0; ii < monteCarlo->_particleVaultContainer->processingSize(); ii++)
17 |     {
18 |         ParticleVault* processing = monteCarlo->_particleVaultContainer->getTaskProcessingVault( ii );
19 |         for( uint64_t jj = 0; jj < processing->size(); jj++ )
20 |         {
21 |             MC_Particle mc_particle;
22 |             MC_Load_Particle(monteCarlo, mc_particle, processing, jj);
23 |             _censusEnergySpectrum[mc_particle.energy_group]++;
24 |         }
25 |     }
26 |     for( uint64_t ii = 0; ii < monteCarlo->_particleVaultContainer->processedSize(); ii++)
27 |     {
28 |         ParticleVault* processed = monteCarlo->_particleVaultContainer->getTaskProcessedVault( ii );
29 |         for( uint64_t jj = 0; jj < processed->size(); jj++ )
30 |         {
31 |             MC_Particle mc_particle;
32 |             MC_Load_Particle(monteCarlo, mc_particle, processed, jj);
33 |             _censusEnergySpectrum[mc_particle.energy_group]++;
34 |         }
35 |     }
36 | }
37 | 
38 | void EnergySpectrum::PrintSpectrum(MonteCarlo* monteCarlo)
39 | {
40 |     if( _fileName == "" ) return;
41 | 
42 |     const int count = monteCarlo->_nuclearData->_energies.size();
43 |     uint64_t *sumHist = new uint64_t[ count ]();
44 | 
45 |     mpiAllreduce( _censusEnergySpectrum.data(), sumHist, count, MPI_INT64_T, MPI_SUM, monteCarlo->processor_info->comm_mc_world );
46 | 
47 |     if( monteCarlo->processor_info->rank == 0 )
48 |     {
49 |         _fileName += ".dat";
50 |         FILE* spectrumFile;
51 |         spectrumFile = fopen( _fileName.c_str(), "w" );
52 | 
53 |         for( int ii = 0; ii < count; ii++ )
54 |         {
55 |             fprintf( spectrumFile, "%d\t%g\t%" PRIu64 "\n", ii, monteCarlo->_nuclearData->_energies[ii], sumHist[ii] );
56 |         }
57 | 
58 |         fclose( spectrumFile );
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/EnergySpectrum.hh:
--------------------------------------------------------------------------------
 1 | #ifndef ENERGYSPECTRUM_HH
 2 | #define ENERGYSPECTRUM_HH
 3 | #include <string>
 4 | #include <vector>
 5 | 
 6 | class MonteCarlo;
 7 | 
 8 | class EnergySpectrum
 9 | {
10 |     public:
11 |         EnergySpectrum(std::string name, uint64_t size) : _fileName(name), _censusEnergySpectrum(size,0) {};
12 |         void UpdateSpectrum(MonteCarlo* monteCarlo);
13 |         void PrintSpectrum(MonteCarlo* monteCarlo);
14 | 
15 |     private:
16 |         std::string _fileName;
17 |         std::vector<uint64_t> _censusEnergySpectrum;
18 | };
19 | 
20 | #endif
21 | 
22 | 


--------------------------------------------------------------------------------
/src/FacetPair.hh:
--------------------------------------------------------------------------------
 1 | #ifndef FACET_PAIR_HH
 2 | #define FACET_PAIR_HH
 3 | 
 4 | #include "MC_Location.hh"
 5 | 
 6 | class FacetPair
 7 | {
 8 |  public:
 9 |    FacetPair(){};
10 |    FacetPair(int domainGid1, const MC_Location& location1,
11 |              int domainGid2, const MC_Location& location2)
12 |    : _domainGid1(domainGid1),
13 |      _domainIndex1(location1.domain),
14 |      _cellIndex1(location1.cell),
15 |      _facetIndex1(location1.facet),
16 |      _domainGid2(domainGid2),
17 |      _domainIndex2(location2.domain),
18 |      _cellIndex2(location2.cell),
19 |      _facetIndex2(location2.facet)
20 |    {
21 |    }
22 | 
23 |    int _domainGid1;
24 |    int _domainIndex1;
25 |    int _facetIndex1;
26 |    int _cellIndex1;
27 |    int _domainGid2;
28 |    int _domainIndex2;
29 |    int _facetIndex2;
30 |    int _cellIndex2;
31 | };
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/src/GlobalFccGrid.cc:
--------------------------------------------------------------------------------
  1 | #include "GlobalFccGrid.hh"
  2 | #include <algorithm>
  3 | #include <cstdio>
  4 | #include "MC_Vector.hh"
  5 | #include "Tuple.hh"
  6 | 
  7 | using std::vector;
  8 | using std::min;
  9 | using std::max;
 10 | 
 11 | namespace
 12 | {
 13 |    const vector<Tuple>& getFaceTupleOffset();
 14 | }
 15 | 
 16 | 
 17 | GlobalFccGrid::GlobalFccGrid(int nx, int ny, int nz,
 18 |                              double lx, double ly, double lz)
 19 | : _nx(nx), _ny(ny), _nz(nz),
 20 |   _lx(lx), _ly(ly), _lz(lz),
 21 |   _cellTupleToIndex(nx, ny, nz),
 22 |   _cellIndexToTuple(nx, ny, nz),
 23 |   _nodeTupleToIndex(nx+1, ny+1, nz+1, 4),
 24 |   _nodeIndexToTuple(nx+1, ny+1, nz+1, 4)
 25 | {
 26 |    _dx = _lx/_nx;
 27 |    _dy = _ly/_ny;
 28 |    _dz = _lz/_nz;
 29 | }
 30 | 
 31 | Long64 GlobalFccGrid::whichCell(const MC_Vector& r) const
 32 | {
 33 |    int ix = r.x/_dx;
 34 |    int iy = r.y/_dy;
 35 |    int iz = r.z/_dz;
 36 |    return _cellTupleToIndex(ix, iy, iz);
 37 | }
 38 | 
 39 | 
 40 | MC_Vector GlobalFccGrid::cellCenter(Long64 iCell) const
 41 | {
 42 |    Tuple tt = _cellIndexToTuple(iCell);
 43 |    MC_Vector r = nodeCoord(Tuple4(tt.x(), tt.y(), tt.z(), 0) );
 44 |    r += MC_Vector(_dx/2., _dy/2., _dz/2.);
 45 |    return r;
 46 | }
 47 | 
 48 | const vector<Tuple4>& GlobalFccGrid::cornerTupleOffsets() const
 49 | {
 50 |    static vector<Tuple4> offset;
 51 |    if (offset.size() == 0)
 52 |    {
 53 |       offset.reserve(14);
 54 |       offset.push_back(Tuple4(0, 0, 0, 0)); // 0
 55 |       offset.push_back(Tuple4(1, 0, 0, 0)); // 1
 56 |       offset.push_back(Tuple4(0, 1, 0, 0)); // 2
 57 |       offset.push_back(Tuple4(1, 1, 0, 0)); // 3
 58 |       offset.push_back(Tuple4(0, 0, 1, 0)); // 4
 59 |       offset.push_back(Tuple4(1, 0, 1, 0)); // 5
 60 |       offset.push_back(Tuple4(0, 1, 1, 0)); // 6
 61 |       offset.push_back(Tuple4(1, 1, 1, 0)); // 7
 62 |       offset.push_back(Tuple4(1, 0, 0, 1)); // 8
 63 |       offset.push_back(Tuple4(0, 0, 0, 1)); // 9
 64 |       offset.push_back(Tuple4(0, 1, 0, 2)); // 10
 65 |       offset.push_back(Tuple4(0, 0, 0, 2)); // 11
 66 |       offset.push_back(Tuple4(0, 0, 1, 3)); // 12
 67 |       offset.push_back(Tuple4(0, 0, 0, 3)); // 13
 68 |    }
 69 |    return offset;
 70 | }
 71 | 
 72 | void GlobalFccGrid::getNodeGids(Long64 cellGid, vector<Long64>& nodeGid) const
 73 | {
 74 |     if( nodeGid.size() == 0 )
 75 |     {
 76 |         nodeGid.resize(14);
 77 |     }
 78 | 
 79 |    Tuple tt = _cellIndexToTuple(cellGid);
 80 |    Tuple4 baseNodeTuple = Tuple4(tt.x(), tt.y(), tt.z(), 0);
 81 |    const vector<Tuple4>& cornerTupleOffset = cornerTupleOffsets();
 82 |    for (unsigned ii=0; ii<14; ++ii)
 83 |       nodeGid[ii] = _nodeTupleToIndex(baseNodeTuple + cornerTupleOffset[ii]);
 84 | }
 85 | 
 86 | // for faces on the outer surface of the global grid, the returned cell
 87 | // gid will be the same as the input cellGid
 88 | void GlobalFccGrid::getFaceNbrGids(Long64 cellGid, vector<Long64>& nbrCellGid) const
 89 | {
 90 |     if( nbrCellGid.size() == 0 )
 91 |     {
 92 |         nbrCellGid.resize(6);
 93 |     }
 94 | 
 95 |     Tuple cellTuple = _cellIndexToTuple(cellGid);
 96 |     const vector<Tuple>& faceTupleOffset = getFaceTupleOffset();
 97 | 
 98 |    for (unsigned ii=0; ii<6; ++ii)
 99 |    {
100 |       Tuple faceNbr = cellTuple + faceTupleOffset[ii];
101 |       snapTuple(faceNbr);
102 |       nbrCellGid[ii] = _cellTupleToIndex(faceNbr);
103 |    }
104 | }
105 | 
106 | 
107 | MC_Vector GlobalFccGrid::nodeCoord(Long64 index) const
108 | {
109 |    return nodeCoord(_nodeIndexToTuple(index));
110 | }
111 | 
112 | MC_Vector GlobalFccGrid::nodeCoord(const Tuple4& tt) const
113 | {
114 |    vector<MC_Vector> basisOffset;
115 |    basisOffset.reserve(4);
116 |    if (basisOffset.size() == 0)
117 |    {
118 |       basisOffset.push_back(MC_Vector(0.,      0.,      0.     ));
119 |       basisOffset.push_back(MC_Vector(0.,      _dy/2.0, _dz/2.0));
120 |       basisOffset.push_back(MC_Vector(_dx/2.0, 0.,      _dz/2.0));
121 |       basisOffset.push_back(MC_Vector(_dx/2.0, _dy/2.0, 0.     ));
122 |    }
123 | 
124 |    double rx = tt.x()*_dx;
125 |    double ry = tt.y()*_dy;
126 |    double rz = tt.z()*_dz;
127 | 
128 |    MC_Vector rr = MC_Vector(rx, ry, rz) + basisOffset[tt.b()];
129 | 
130 |    return rr;
131 | }
132 | 
133 | void GlobalFccGrid::snapTuple(Tuple& tt) const
134 | {
135 |    tt.x() = min(max(0, tt.x()), _nx-1);
136 |    tt.y() = min(max(0, tt.y()), _ny-1);
137 |    tt.z() = min(max(0, tt.z()), _nz-1);
138 | }
139 | 
140 | namespace
141 | {
142 |    const vector<Tuple>& getFaceTupleOffset()
143 |    {
144 |       static vector<Tuple> faceTupleOffset;
145 | 
146 |       if (faceTupleOffset.size() == 0)
147 |       {
148 |          faceTupleOffset.reserve(6);
149 |          faceTupleOffset.push_back( Tuple( 1,  0,  0) );
150 |          faceTupleOffset.push_back( Tuple(-1,  0,  0) );
151 |          faceTupleOffset.push_back( Tuple( 0,  1,  0) );
152 |          faceTupleOffset.push_back( Tuple( 0, -1,  0) );
153 |          faceTupleOffset.push_back( Tuple( 0,  0,  1) );
154 |          faceTupleOffset.push_back( Tuple( 0,  0, -1) );
155 |       }
156 | 
157 |       return faceTupleOffset;
158 |    }
159 | }
160 | 
161 | 


--------------------------------------------------------------------------------
/src/GlobalFccGrid.hh:
--------------------------------------------------------------------------------
 1 | #ifndef GLOBAL_FCC_GRID_HH
 2 | #define GLOBAL_FCC_GRID_HH
 3 | 
 4 | #include <vector>
 5 | #include "TupleToIndex.hh"
 6 | #include "IndexToTuple.hh"
 7 | #include "Tuple4ToIndex.hh"
 8 | #include "IndexToTuple4.hh"
 9 | 
10 | 
11 | class MC_Vector;
12 | 
13 | class GlobalFccGrid
14 | {
15 |  public:
16 |    GlobalFccGrid(int nx, int ny, int nz,
17 |                  double lx, double ly, double lz);
18 | 
19 |    double lx() const {return _lx;}
20 |    double ly() const {return _ly;}
21 |    double lz() const {return _lz;}
22 |    double nx() const {return _nx;}
23 |    double ny() const {return _ny;}
24 |    double nz() const {return _nz;}
25 | 
26 |    Long64 whichCell(const MC_Vector& r) const;
27 | 
28 |    MC_Vector cellCenter(Long64 iCell) const;
29 |    Tuple  cellIndexToTuple(Long64 iCell)    const {return _cellIndexToTuple(iCell);}
30 |    Long64 cellTupleToIndex(const Tuple& tt) const {return _cellTupleToIndex(tt);}
31 | 
32 |    Long64 nodeIndex(const Tuple4& tt) const {return _nodeTupleToIndex(tt);}
33 | 
34 |    const std::vector<Tuple4>& cornerTupleOffsets() const;
35 |    void getNodeGids(Long64 cellGid, std::vector<Long64>& nodeGid) const;
36 |    void getFaceNbrGids(Long64 cellGid, std::vector<Long64>& nbrCellGid) const;
37 | 
38 |    MC_Vector nodeCoord(Long64 index) const;
39 |    MC_Vector nodeCoord(const Tuple4& tt) const;
40 | 
41 |    // We should get rid of snap tuple and provide a way to get the
42 |    // indices of face nbrs.
43 |    void snapTuple(Tuple& tt) const;
44 | 
45 |  private:
46 |    int _nx, _ny, _nz;     // number of cells (i.e., elements)
47 |    double _lx, _ly, _lz;  // size of problem space (in cm)
48 |    double _dx, _dy, _dz;  // size of a mesh cell (in cm)
49 | 
50 |    TupleToIndex  _cellTupleToIndex;
51 |    IndexToTuple  _cellIndexToTuple;
52 |    Tuple4ToIndex _nodeTupleToIndex;
53 |    IndexToTuple4 _nodeIndexToTuple;
54 | };
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/src/Globals.hh:
--------------------------------------------------------------------------------
1 | #ifndef GLOBALS_HH
2 | #define GLOBALS_HH
3 | 
4 | class MonteCarlo;
5 | extern MonteCarlo* mcco;
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/GridAssignmentObject.hh:
--------------------------------------------------------------------------------
 1 | #ifndef GRID_ASSIGNMENT_OBJECT_HH
 2 | #define GRID_ASSIGNMENT_OBJECT_HH
 3 | 
 4 | #include <vector>
 5 | #include <queue>
 6 | #include "MC_Vector.hh"
 7 | #include "Tuple.hh"
 8 | 
 9 | /** The GRID_ASSIGNMENT_OBJECT computes the closest center to a given
10 |  * particle coordinate using a grid/flood approach.  The intent of this
11 |  * code is to provide an initial assignment method that scales only as
12 |  * the number of particles to assign.  (I.e., it is independent of the
13 |  * number of centers).
14 |  *
15 |  * To vastly simplify the code we completely ignore periodic boundary
16 |  * conditions.  We can get away with this because the initial assignment
17 |  * doesn't have to be perfect, it only needs to be close.  If we can get
18 |  * a particle into a domain that is close to its correct Voronoi domain
19 |  * then the regular assignment will do the right thing.  */
20 | 
21 | class GridAssignmentObject
22 | {
23 |  public:
24 | 
25 |    GridAssignmentObject(const std::vector<MC_Vector>& centers);
26 | 
27 |    int nearestCenter(const MC_Vector rr);
28 | 
29 |  private:
30 | 
31 |    struct GridCell
32 |    {
33 |       GridCell() : _burned(false) {};
34 | 
35 |       bool _burned;
36 |       std::vector<int> _myCenters;
37 |    };
38 | 
39 |    Tuple whichCellTuple(const MC_Vector r) const;
40 |    int whichCell(const MC_Vector r) const;
41 |    int tupleToIndex(Tuple tuple) const;
42 |    Tuple indexToTuple(int index) const;
43 |    double minDist2(const MC_Vector r, int iCell) const;
44 |    void addTupleToQueue(Tuple iTuple);
45 |    void addNbrsToQueue(int iCell);
46 | 
47 |    int _nx, _ny, _nz;
48 |    double _dx, _dy, _dz;
49 |    MC_Vector _corner;
50 |    const std::vector<MC_Vector>& _centers;
51 | 
52 |    std::vector<GridCell> _grid;
53 |    std::queue<int> _floodQueue;
54 |    std::queue<int> _wetList;
55 | };
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/IndexToTuple.hh:
--------------------------------------------------------------------------------
 1 | #ifndef INDEX_TO_TUPLE_HH
 2 | #define INDEX_TO_TUPLE_HH
 3 | 
 4 | #include "Tuple.hh"
 5 | #include "Long64.hh"
 6 | 
 7 | class IndexToTuple
 8 | {
 9 |  public:
10 |    IndexToTuple(int nx, int ny, int nz)
11 |    : nx_(nx), ny_(ny), nz_(nz)
12 |    {};
13 | 
14 |    Tuple operator()(Long64 index) const
15 |    {
16 |       int x = index % nx_;
17 |       index /= nx_;
18 |       int y = index % ny_;
19 |       int z = index / ny_;
20 | 
21 |       return Tuple(x, y, z);
22 |    }
23 | 
24 |  private:
25 |    int nx_;
26 |    int ny_;
27 |    int nz_;
28 | };
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/src/IndexToTuple4.hh:
--------------------------------------------------------------------------------
 1 | #ifndef INDEX_TO_TUPLE4_HH
 2 | #define INDEX_TO_TUPLE4_HH
 3 | 
 4 | #include "Tuple4.hh"
 5 | #include "Long64.hh"
 6 | 
 7 | class IndexToTuple4
 8 | {
 9 |  public:
10 |    IndexToTuple4(int nx, int ny, int nz, int nb)
11 |    : nx_(nx), ny_(ny), nz_(nz), nb_(nb)
12 |    {};
13 | 
14 |    Tuple4 operator()(Long64 index) const
15 |    {
16 |       int x = index % nx_;
17 |       index /= nx_;
18 |       int y = index % ny_;
19 |       index /= ny_;
20 |       int z = index % nz_;
21 |       int b = index / nz_;
22 | 
23 |       return Tuple4(x, y, z, b);
24 |    }
25 | 
26 |  private:
27 |    int nx_;
28 |    int ny_;
29 |    int nz_;
30 |    int nb_;
31 | };
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/src/InputBlock.cc:
--------------------------------------------------------------------------------
 1 | #include "InputBlock.hh"
 2 | #include <sstream>
 3 | #include <cstring>
 4 | 
 5 | using std::map;
 6 | using std::ostringstream;
 7 | using std::string;
 8 | 
 9 | 
10 | InputBlock::InputBlock(const string& blockName)
11 | :_blockName(blockName)
12 | {}
13 | 
14 | void InputBlock::addPair(const string& keyword, const string& value)
15 | {
16 |    _kvPair[keyword] = value;
17 | }
18 | 
19 | void InputBlock::serialize(std::vector<char>& buf) const
20 | {
21 |    ostringstream out;
22 |    out << _blockName << '\0';
23 |    for (auto iter=_kvPair.begin(); iter!=_kvPair.end(); ++iter)
24 |       out << iter->first << '\0' << iter->second <<'\0';
25 |    string tmp = out.str();
26 |    buf.clear();
27 |    buf.insert(buf.begin(), tmp.begin(), tmp.end());
28 | }
29 | 
30 | void InputBlock::deserialize(const std::vector<char>& buf)
31 | {
32 |    const char* tmp = &buf[0];
33 |    const char* end = tmp + buf.size();
34 |    
35 |    _blockName = tmp;
36 |    tmp += strlen(tmp) +1;
37 | 
38 |    while (tmp < end)
39 |    {
40 |       const char* keyword = tmp;
41 |       tmp += strlen(tmp) +1;
42 |       const char* value = tmp;
43 |       tmp += strlen(tmp) +1;
44 |       _kvPair[keyword] = value;
45 |    }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/InputBlock.hh:
--------------------------------------------------------------------------------
 1 | #ifndef INPUT_BLOCK_HH
 2 | #define INPUT_BLOCK_HH
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | #include <map>
 7 | #include <sstream>
 8 | #include "qs_assert.hh"
 9 | 
10 | 
11 | class InputBlock
12 | {
13 |  public:
14 | 
15 |    InputBlock(const std::string& blockName);
16 |    void addPair(const std::string& keyword, const std::string& value);
17 | 
18 |    template<typename T>
19 |    void getValue(const std::string& keyword, T& value) const;
20 | 
21 |    const std::string& name() const {return _blockName;}
22 |    unsigned nPairs() const {return _kvPair.size();}
23 | 
24 |    void serialize(std::vector<char>& buf) const;
25 |    void deserialize(const std::vector<char>& buf);
26 | 
27 |  private:
28 |    void parseError(const std::string& keyword) const;
29 | 
30 |    std::string                        _blockName;
31 |    std::map<std::string, std::string> _kvPair;
32 | };
33 | 
34 | // If the keyword isn't found, value is unchanged.
35 | template<typename T>
36 | void InputBlock::getValue(const std::string& keyword, T& value) const
37 | {
38 |    auto here = _kvPair.find(keyword);
39 |    if (here == _kvPair.end())
40 |       return;
41 | 
42 |    std::istringstream tmp(here->second);
43 |    tmp >> value;
44 | 
45 |    if (!tmp)
46 |       parseError(keyword);
47 | }
48 | 
49 | inline void InputBlock::parseError(const std::string& keyword) const
50 | {
51 |    qs_assert(false);
52 | }
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/src/Long64.hh:
--------------------------------------------------------------------------------
1 | #ifndef LONG64_H
2 | #define LONG64_H
3 | 
4 | #include "portability.hh"
5 | typedef uint64_t Long64;
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/MCT.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MCT_HH
 2 | #define MCT_HH
 3 | 
 4 | #include "portability.hh"
 5 | #include "DeclareMacro.hh"
 6 | 
 7 | class MC_Particle;
 8 | class MC_Domain;
 9 | class MC_Location;
10 | class MC_Vector;
11 | class DirectionCosine;
12 | class MC_Nearest_Facet;
13 | class Subfacet_Adjacency;
14 | class MonteCarlo;
15 | 
16 | 
17 | HOST_DEVICE
18 | MC_Nearest_Facet MCT_Nearest_Facet(
19 |    MC_Particle *mc_particle,
20 |    MC_Location &location,
21 |    MC_Vector &coordinate,
22 |    const DirectionCosine *direction_cosine,
23 |    double distance_threshold,
24 |    double current_best_distance,
25 |    bool new_segment, 
26 |    MonteCarlo* monteCarlo);
27 | HOST_DEVICE_END
28 | 
29 | 
30 | HOST_DEVICE
31 | void MCT_Generate_Coordinate_3D_G(
32 |    uint64_t *random_number_seed,
33 |    int domain_num,
34 |    int cell,
35 |    MC_Vector &coordinate,
36 |    MonteCarlo* monteCarlo);
37 | HOST_DEVICE_END
38 | 
39 | HOST_DEVICE
40 | MC_Vector MCT_Cell_Position_3D_G(
41 |    const MC_Domain   &domain,
42 |    int cell_index);
43 | HOST_DEVICE_END
44 | 
45 | HOST_DEVICE
46 | Subfacet_Adjacency &MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo* monteCarlo);
47 | HOST_DEVICE_END
48 | 
49 | HOST_DEVICE
50 | void MCT_Reflect_Particle(MonteCarlo *mcco, MC_Particle &particle);
51 | HOST_DEVICE_END
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/src/MC_Adjacent_Facet.cc:
--------------------------------------------------------------------------------
 1 | #include "MCT.hh"
 2 | #include "MC_Domain.hh"
 3 | #include "Globals.hh"
 4 | #include "MonteCarlo.hh"
 5 | #include "DeclareMacro.hh"
 6 | 
 7 | class MC_Particle;
 8 | 
 9 | HOST_DEVICE
10 | 
11 | Subfacet_Adjacency &MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo* monteCarlo)
12 | 
13 | {
14 |    MC_Domain &domain = monteCarlo->domain[location.domain];
15 | 
16 |    Subfacet_Adjacency &adjacency =domain.mesh._cellConnectivity[location.cell]._facet[location.facet].subfacet;
17 | 
18 |    return adjacency;
19 | }
20 | 
21 | HOST_DEVICE_END
22 | 


--------------------------------------------------------------------------------
/src/MC_Base_Particle.cc:
--------------------------------------------------------------------------------
  1 | #include "MC_Base_Particle.hh"
  2 | 
  3 | #define MCP_DATA_MEMBER_OLD(member, buffer, index, mode) \
  4 |    { if (     mode == MC_Data_Member_Operation::Count )  { (index)++; } \
  5 |     else if ( mode == MC_Data_Member_Operation::Pack   ) { buffer[ (index)++ ] = (member); } \
  6 |     else if ( mode == MC_Data_Member_Operation::Unpack ) { member = buffer[ (index)++ ]; }   \
  7 |     else if ( mode == MC_Data_Member_Operation::Reset )  { (index)++; member = 0; } }
  8 | 
  9 | #define MCP_DATA_MEMBER_CAST_OLD(member, buffer, index, mode, someType) \
 10 |    { if (     mode == MC_Data_Member_Operation::Count )  { (index)++; } \
 11 |     else if ( mode == MC_Data_Member_Operation::Pack   ) { buffer[ (index)++ ] = (member); } \
 12 |     else if ( mode == MC_Data_Member_Operation::Unpack ) { member = (someType) buffer[ (index)++ ]; } \
 13 |     else if ( mode == MC_Data_Member_Operation::Reset )  { (index)++; member = (someType) 0; } }
 14 | 
 15 | #define MCP_DATA_MEMBER_LONG_TO_CHAR8(member, buffer, index, mode) \
 16 |   {      if ( mode == MC_Data_Member_Operation::Count  ) { (index) += 8; } \
 17 |     else if ( mode == MC_Data_Member_Operation::Pack   ) { MC_Long_To_Char8(&member, &buffer[(index)]); (index) += 8; } \
 18 |     else if ( mode == MC_Data_Member_Operation::Unpack ) { MC_Char8_To_Long(&member, &buffer[(index)]); (index) += 8; } \
 19 |     else if ( mode == MC_Data_Member_Operation::Reset )  { (index) += 8; member = 0; }}
 20 | 
 21 | void MC_Char8_To_Long(uint64_t *long_out, char char_in[8])
 22 | {
 23 |     *long_out = 0 ;
 24 | 
 25 |     for (int char_index = 0; char_index < 8; char_index++)
 26 |     {
 27 |         *long_out = *long_out | (unsigned char) char_in[char_index]; // OR in next byte
 28 |         if (char_index < 7)
 29 |         {
 30 |             *long_out  = *long_out << 8;              // Shift Left one byte
 31 |         }
 32 |     }
 33 | 
 34 | }
 35 | 
 36 | void MC_Long_To_Char8(const uint64_t *long_in,
 37 |                       char char_out[8])
 38 | {
 39 |     uint64_t long_tmp;
 40 |     uint64_t mask = 0xffff;
 41 | 
 42 |     // Examine long_in from Right > Left, byte by byte.
 43 |     long_tmp = *long_in;
 44 |     for (int char_index = 7; char_index >= 0; char_index--)
 45 |     {
 46 |         char_out[char_index] = (char)(long_tmp & mask); // Get right-most byte
 47 |         long_tmp             = long_tmp >> 8; // Shift Right one byte
 48 |     }
 49 | 
 50 | }
 51 | 
 52 | 
 53 | //----------------------------------------------------------------------------------------------------------------------
 54 | //  Count, pack or unpack a single base particle.  This routine operates in 3
 55 | //  different modes.  This is so that the exact same code does the counting, packing and
 56 | //  unpacking so they  will always stay synchronized and the communication will happen correctly.
 57 | //  Also, when the data structure changes, you only have to change this one place.
 58 | //
 59 | //----------------------------------------------------------------------------------------------------------------------
 60 | void MC_Base_Particle::Serialize(int *int_data, double *float_data, char *char_data, int &int_index, int &float_index,
 61 |                                 int &char_index, MC_Data_Member_Operation::Enum mode)
 62 | {
 63 |     MCP_DATA_MEMBER_OLD(coordinate.x, float_data, float_index, mode);
 64 |     MCP_DATA_MEMBER_OLD(coordinate.y, float_data, float_index, mode);
 65 |     MCP_DATA_MEMBER_OLD(coordinate.z, float_data, float_index, mode);
 66 |     MCP_DATA_MEMBER_OLD(velocity.x, float_data, float_index, mode);
 67 |     MCP_DATA_MEMBER_OLD(velocity.y, float_data, float_index, mode);
 68 |     MCP_DATA_MEMBER_OLD(velocity.z, float_data, float_index, mode);
 69 |     MCP_DATA_MEMBER_OLD(kinetic_energy, float_data, float_index, mode);
 70 |     MCP_DATA_MEMBER_OLD(weight, float_data, float_index, mode);
 71 |     MCP_DATA_MEMBER_OLD(time_to_census, float_data, float_index, mode);
 72 |     MCP_DATA_MEMBER_OLD(age, float_data, float_index, mode);
 73 |     MCP_DATA_MEMBER_OLD(num_mean_free_paths, float_data, float_index, mode);
 74 |     MCP_DATA_MEMBER_OLD(num_segments, float_data, float_index, mode);
 75 | 
 76 |     MCP_DATA_MEMBER_LONG_TO_CHAR8(random_number_seed, char_data, char_index, mode);
 77 |     MCP_DATA_MEMBER_LONG_TO_CHAR8(identifier, char_data, char_index, mode);
 78 | 
 79 |     MCP_DATA_MEMBER_CAST_OLD(last_event, int_data, int_index, mode, MC_Tally_Event::Enum);
 80 |     MCP_DATA_MEMBER_OLD(num_collisions, int_data, int_index, mode);
 81 |     MCP_DATA_MEMBER_OLD(breed, int_data, int_index, mode);
 82 |     MCP_DATA_MEMBER_OLD(species, int_data, int_index, mode);
 83 |     MCP_DATA_MEMBER_OLD(domain, int_data, int_index, mode);
 84 |     MCP_DATA_MEMBER_OLD(cell, int_data, int_index, mode);
 85 | }
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | int MC_Base_Particle::num_base_ints = 0;
 93 | int MC_Base_Particle::num_base_floats = 0;
 94 | int MC_Base_Particle::num_base_chars = 0;
 95 | 
 96 | 
 97 | //----------------------------------------------------------------------------------------------------------------------
 98 | //  Updates the num base counts by creating an instance and callingthe broadcast routine.
 99 | //
100 | //----------------------------------------------------------------------------------------------------------------------
101 | void MC_Base_Particle::Update_Counts()
102 | {
103 |     MC_Base_Particle base_particle;
104 |     num_base_ints = 0;
105 |     num_base_floats = 0;
106 |     num_base_chars = 0;
107 |     base_particle.Serialize(NULL, NULL, NULL, num_base_ints, num_base_floats,
108 |                             num_base_chars, MC_Data_Member_Operation::Count);
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/src/MC_Cell_State.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_CELL_STATE_INCLUDE
 2 | #define MC_CELL_STATE_INCLUDE
 3 | 
 4 | #include <cstdio>
 5 | #include "QS_Vector.hh"
 6 | #include "macros.hh"
 7 | 
 8 | 
 9 | // this stores all the material information on a cell
10 | class MC_Cell_State
11 | {
12 |  public:
13 | 
14 |    int _material; // gid of material
15 | 
16 |    // pre-computed cross-sections for material
17 |    double* _total;  // [energy groups]
18 | 
19 |    double  _volume;                 // cell volume
20 |    double  _cellNumberDensity;         // number density of ions in cel
21 | 
22 |    uint64_t _id;
23 |    uint64_t _sourceTally;
24 |    
25 |    MC_Cell_State();
26 | 
27 |  private:
28 | };
29 | 
30 | inline MC_Cell_State::MC_Cell_State()
31 |   : _material(0),
32 |     _total(),
33 |     _volume(0.0),
34 |     _cellNumberDensity(0.0),
35 |     _sourceTally(0)
36 | {
37 | }
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/src/MC_Distance_To_Facet.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MCT_DISTANCE_INCLUDE
 2 | #define MCT_DISTANCE_INCLUDE
 3 | 
 4 | #include "DeclareMacro.hh"
 5 | 
 6 | HOST_DEVICE_CLASS
 7 | class MC_Distance_To_Facet
 8 | {
 9 | public:
10 |     double distance;
11 |     int facet;
12 |     int subfacet;
13 |     HOST_DEVICE_CUDA
14 |     MC_Distance_To_Facet(): distance(0.0), facet(0), subfacet(0) {}
15 | private:
16 |     MC_Distance_To_Facet( const MC_Distance_To_Facet& );                    // disable copy constructor
17 |     MC_Distance_To_Facet& operator=( const MC_Distance_To_Facet& tmp );     // disable assignment operator
18 | 
19 | };
20 | HOST_DEVICE_END
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/MC_Domain.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_DOMAIN_INCLUDE
 2 | #define MC_DOMAIN_INCLUDE
 3 | 
 4 | 
 5 | #include "QS_Vector.hh"
 6 | #include "MC_Facet_Adjacency.hh"
 7 | #include "MC_Vector.hh"
 8 | #include "MC_Cell_State.hh"
 9 | #include "MC_Facet_Geometry.hh"
10 | #include "BulkStorage.hh"
11 | 
12 | class Parameters;
13 | class MeshPartition;
14 | class GlobalFccGrid;
15 | class DecompositionObject;
16 | class MaterialDatabase;
17 | 
18 | 
19 | //----------------------------------------------------------------------------------------------------------------------
20 | // class that manages data set on a mesh like geometry
21 | //----------------------------------------------------------------------------------------------------------------------
22 | 
23 | class MC_Mesh_Domain
24 | {
25 |  public:
26 | 
27 |    int _domainGid; //dfr: Might be able to delete this later.
28 | 
29 |    qs_vector<int> _nbrDomainGid;
30 |    qs_vector<int> _nbrRank;
31 | 
32 |    qs_vector<MC_Vector> _node;
33 |    qs_vector<MC_Facet_Adjacency_Cell> _cellConnectivity;
34 | 
35 |    qs_vector<MC_Facet_Geometry_Cell> _cellGeometry;
36 | 
37 | 
38 | 
39 |    BulkStorage<MC_Facet_Adjacency> _connectivityFacetStorage;
40 |    BulkStorage<int> _connectivityPointStorage;
41 |    BulkStorage<MC_General_Plane> _geomFacetStorage;
42 |    
43 |     // -------------------------- public interface
44 |    MC_Mesh_Domain(){};
45 |    MC_Mesh_Domain(const MeshPartition& meshPartition,
46 |                   const GlobalFccGrid& grid,
47 |                   const DecompositionObject& ddc,
48 |                   const qs_vector<MC_Subfacet_Adjacency_Event::Enum>& boundaryCondition);
49 | 
50 | };
51 | 
52 | 
53 | //----------------------------------------------------------------------------------------------------------------------
54 | // class that manages a region on a domain.
55 | //----------------------------------------------------------------------------------------------------------------------
56 | 
57 | class MC_Domain
58 | {
59 | public:
60 |    int domainIndex;  // This appears to be unused.
61 |    int global_domain;
62 | 
63 |    qs_vector<MC_Cell_State> cell_state;
64 | 
65 |    BulkStorage<double> _cachedCrossSectionStorage;
66 |    
67 |     // hold mesh information
68 |     MC_Mesh_Domain mesh;
69 | 
70 |    // -------------------------- public interface
71 |     MC_Domain(){};
72 |     MC_Domain(const MeshPartition& meshPartition, const GlobalFccGrid& grid,
73 |               const DecompositionObject& ddc, const Parameters& params,
74 |               const MaterialDatabase& materialDatabase, int numEnergyGroups);
75 | 
76 | 
77 |    void clearCrossSectionCache(int numEnergyGroups);
78 | };
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/src/MC_Facet_Adjacency.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MCT_FACET_ADJACENCY_INCLUDE
 2 | #define MCT_FACET_ADJACENCY_INCLUDE
 3 | 
 4 | 
 5 | #include <vector>
 6 | #include "MC_Location.hh"
 7 | #include "macros.hh"
 8 | 
 9 | struct MC_Subfacet_Adjacency_Event
10 | {
11 |  public:
12 |    enum Enum
13 |    {
14 |       Adjacency_Undefined = 0,
15 |       Boundary_Escape,
16 |       Boundary_Reflection,
17 |       Transit_On_Processor,
18 |       Transit_Off_Processor
19 |    };
20 | };
21 | 
22 | class Subfacet_Adjacency
23 | {
24 |  public:
25 |    MC_Subfacet_Adjacency_Event::Enum event;
26 |    MC_Location current;
27 |    MC_Location adjacent;
28 |    int neighbor_index;
29 |    int neighbor_global_domain;
30 |    int neighbor_foreman;
31 | 
32 | 
33 |    Subfacet_Adjacency()
34 |    : event(MC_Subfacet_Adjacency_Event::Adjacency_Undefined),
35 |      current(),
36 |      adjacent(),
37 |      neighbor_index(-1),
38 |      neighbor_global_domain(-1),
39 |      neighbor_foreman(-1)
40 |    {}
41 | };
42 | 
43 | class MC_Facet_Adjacency
44 | {
45 |  public:
46 |    Subfacet_Adjacency   subfacet;
47 |    int                  num_points;   // the number of points defining that facet, for polyhedra
48 |    int                  point[3];       //  the points defining that facet, for polyhedra
49 | 
50 |    MC_Facet_Adjacency() : subfacet(), num_points(3) {point[0] = point[1] = point[2] = -1;}
51 | };
52 | 
53 | class MC_Facet_Adjacency_Cell
54 | {
55 |  public:
56 |    int                  num_facets; // 6 quad faces, each quad has 3 triangles = 24 faces
57 |    MC_Facet_Adjacency*  _facet;
58 |    int                  num_points;  // 8 hex corners + 6 face centers = 14 points
59 |    int*                 _point;       
60 | 
61 |    MC_Facet_Adjacency_Cell() : num_facets(24), _facet(0), num_points(14), _point(0) {}
62 | private:
63 | 
64 | };
65 | 
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/src/MC_Facet_Crossing_Event.cc:
--------------------------------------------------------------------------------
 1 | #include "MC_Facet_Crossing_Event.hh"
 2 | #include "ParticleVaultContainer.hh"
 3 | #include "ParticleVault.hh"
 4 | #include "MC_Domain.hh"
 5 | #include "Tallies.hh"
 6 | #include "MC_Particle.hh"
 7 | #include "MC_Facet_Adjacency.hh"
 8 | #include "Globals.hh"
 9 | #include "MCT.hh"
10 | #include "MC_Particle_Buffer.hh"
11 | #include "DeclareMacro.hh"
12 | #include "macros.hh"
13 | #include "SendQueue.hh"
14 | 
15 | //----------------------------------------------------------------------------------------------------------------------
16 | //  Determines whether the particle has been tracked to a facet such that it:
17 | //    (i) enters into an adjacent cell
18 | //   (ii) escapes across the system boundary (Vacuum BC), or
19 | //  (iii) reflects off of the system boundary (Reflection BC).
20 | //
21 | //----------------------------------------------------------------------------------------------------------------------
22 | 
23 | HOST_DEVICE
24 | 
25 | MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo* monteCarlo, int particle_index, ParticleVault* processingVault)
26 | {
27 |     MC_Location location = mc_particle.Get_Location();
28 | 
29 |     Subfacet_Adjacency &facet_adjacency = MCT_Adjacent_Facet(location, mc_particle, monteCarlo);
30 | 
31 |     if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_On_Processor )
32 |     {
33 |         // The particle will enter into an adjacent cell.
34 |         mc_particle.domain     = facet_adjacency.adjacent.domain;
35 |         mc_particle.cell       = facet_adjacency.adjacent.cell;
36 |         mc_particle.facet      = facet_adjacency.adjacent.facet;
37 |         mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Transit_Exit;
38 |     }
39 |     else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Escape )
40 |     {
41 |         // The particle will escape across the system boundary.
42 |         mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape;
43 |     }
44 |     else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection )
45 |     {
46 |         // The particle will reflect off of the system boundary.
47 |         mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Reflection;
48 |     }
49 |     else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_Off_Processor )
50 |     {
51 |         // The particle will enter into an adjacent cell on a spatial neighbor.
52 |         // The neighboring domain is on another processor. Set domain local domain on neighbor proc
53 |         
54 |         mc_particle.domain     = facet_adjacency.adjacent.domain;
55 |         mc_particle.cell       = facet_adjacency.adjacent.cell;
56 |         mc_particle.facet      = facet_adjacency.adjacent.facet;
57 |         mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Communication;
58 | 
59 |         // Select particle buffer
60 |         int neighbor_rank = monteCarlo->domain[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index];
61 | 
62 |         processingVault->putParticle( mc_particle, particle_index );
63 | 
64 |         //Push neighbor rank and mc_particle onto the send queue
65 |         monteCarlo->_particleVaultContainer->getSendQueue()->push( neighbor_rank, particle_index );
66 | 
67 |     }
68 | 
69 |     return mc_particle.last_event;
70 | }
71 | 
72 | HOST_DEVICE_END
73 | 


--------------------------------------------------------------------------------
/src/MC_Facet_Crossing_Event.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_FACET_CROSSING_EVENT_HH
 2 | #define MC_FACET_CROSSING_EVENT_HH
 3 | 
 4 | #include "Tallies.hh"
 5 | #include "DeclareMacro.hh"
 6 | 
 7 | class ParticleVault;
 8 | class MC_Particle;
 9 | 
10 | HOST_DEVICE
11 | MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo* monteCarlo, int particle_index, ParticleVault* processingVault);
12 | HOST_DEVICE_END
13 | 
14 | #endif
15 | 
16 | 


--------------------------------------------------------------------------------
/src/MC_Facet_Geometry.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MCT_FACET_GEOMETRY_3D_INCLUDE
 2 | #define MCT_FACET_GEOMETRY_3D_INCLUDE
 3 | 
 4 | #include "macros.hh"
 5 | #include <cstddef> // NULL
 6 | 
 7 | // A x + B y + C z + D = 0,  (A,B,C) is the plane normal and is normalized.
 8 | class MC_General_Plane
 9 | {
10 | public:
11 |     double A;
12 |     double B;
13 |     double C;
14 |     double D;
15 | 
16 |    // Code to compute coefficients stolen from MCT_Facet_Adjacency_3D_G
17 |    MC_General_Plane(){};
18 |    MC_General_Plane(const MC_Vector& r0, const MC_Vector& r1, const MC_Vector& r2)
19 |    {
20 |       A = ((r1.y - r0.y)*(r2.z - r0.z)) - ((r1.z - r0.z)*(r2.y - r0.y));
21 |       B = ((r1.z - r0.z)*(r2.x - r0.x)) - ((r1.x - r0.x)*(r2.z - r0.z));
22 |       C = ((r1.x - r0.x)*(r2.y - r0.y)) - ((r1.y - r0.y)*(r2.x - r0.x));
23 |       D = -1.0*(A*r0.x + B*r0.y + C*r0.z);
24 | 
25 |       double magnitude = sqrt(A * A + B * B + C * C);
26 | 
27 |       if ( magnitude == 0.0 )
28 |       {
29 |          A = 1.0;
30 |          magnitude = 1.0;
31 |       }
32 |       // Normalize the planar-facet geometric cofficients.
33 |       double inv_denominator = 1.0 / magnitude;
34 | 
35 |       A *= inv_denominator;
36 |       B *= inv_denominator;
37 |       C *= inv_denominator;
38 |       D *= inv_denominator;
39 |    }
40 | 
41 | };
42 | 
43 | 
44 | class MC_Facet_Geometry_Cell
45 | {
46 |  public:
47 |    MC_General_Plane* _facet;
48 |    int _size;
49 | };
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/MC_Fast_Timer.hh:
--------------------------------------------------------------------------------
  1 | #ifndef MC_FAST_TIMER_INCLUDE
  2 | #define MC_FAST_TIMER_INCLUDE
  3 | 
  4 | #include <iostream>
  5 | #ifndef CHRONO_MISSING
  6 | #include <chrono>
  7 | #endif
  8 | 
  9 | #include "portability.hh"   // needed for uint64_t in this file
 10 | #include "utilsMpi.hh"      // needed for MPI_Comm type in this file
 11 | 
 12 | class MC_Fast_Timer
 13 | {
 14 |     public:
 15 |     uint64_t numCalls;
 16 | #ifdef CHRONO_MISSING
 17 |     double startClock;                                              // from MPI
 18 |     double stopClock;
 19 | #else
 20 |     std::chrono::high_resolution_clock::time_point startClock;      // from c++11 high resolution timer calls
 21 |     std::chrono::high_resolution_clock::time_point stopClock;
 22 | #endif
 23 |     uint64_t lastCycleClock;                                        // in microseconds
 24 |     uint64_t cumulativeClock;                                       // in microseconds
 25 | 
 26 | 
 27 |   MC_Fast_Timer() : numCalls(0), startClock(), stopClock(), lastCycleClock(0), cumulativeClock(0)  {} ; // consturctor
 28 | 
 29 | 
 30 |   // 1 enumerated type for each timed section, this is hardcoded for efficiency.
 31 |   enum Enum
 32 |   {
 33 |     main = 0,
 34 |     cycleInit,
 35 |     cycleTracking,
 36 |     cycleTracking_Kernel,
 37 |     cycleTracking_MPI,
 38 |     cycleTracking_Test_Done,
 39 |     cycleFinalize,
 40 |     Num_Timers
 41 |   };
 42 | };
 43 | 
 44 | class MC_Fast_Timer_Container
 45 | {
 46 | public:
 47 |     MC_Fast_Timer_Container() {} ; // constructor
 48 |     void Cumulative_Report(int mpi_rank, int num_ranks, MPI_Comm comm_world, uint64_t numSegments);
 49 |     void Last_Cycle_Report(int report_time, int mpi_rank, int num_ranks, MPI_Comm comm_world);
 50 |     void Clear_Last_Cycle_Timers();
 51 |     MC_Fast_Timer  timers[MC_Fast_Timer::Num_Timers];  // timers for various routines
 52 | 
 53 | private:
 54 |     void Print_Cumulative_Heading(int mpi_rank);
 55 |     void Print_Last_Cycle_Heading(int mpi_rank);
 56 | };
 57 | 
 58 | 
 59 | extern const int   mc_fast_timer_enums[MC_Fast_Timer::Num_Timers];
 60 | extern const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers];
 61 | 
 62 | #ifdef DISABLE_TIMERS // Disable timers with empty macros -- do not make timer calls
 63 | 
 64 |    #define MC_FASTTIMER_START(timerIndex)
 65 |    #define MC_FASTTIMER_STOP(timerIndex)
 66 |    #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) 0.0
 67 | 
 68 | #else   // DISABLE_TIMERS not defined.  Set up timers 
 69 | 
 70 |    #ifdef CHRONO_MISSING   // compiler does not support high resolution timer, use MPI timer instead
 71 | 
 72 |       #define MC_FASTTIMER_START(timerIndex) \
 73 |          if (omp_get_thread_num() == 0) {				      \
 74 |             mcco->fast_timer->timers[timerIndex].startClock = mpiWtime(); \
 75 |          }
 76 | 
 77 |       #define MC_FASTTIMER_STOP(timerIndex) \
 78 |           if ( omp_get_thread_num() == 0 ) { \
 79 |               mcco->fast_timer->timers[timerIndex].stopClock = mpiWtime(); \
 80 |               mcco->fast_timer->timers[timerIndex].lastCycleClock   += \
 81 | 		(long unsigned) ((mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock) * 1000000.0); \
 82 |               mcco->fast_timer->timers[timerIndex].cumulativeClock += \
 83 | 		(long unsigned) ((mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock) * 1000000.0); \
 84 |               mcco->fast_timer->timers[timerIndex].numCalls++; \
 85 |           }
 86 | 
 87 |       #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) (float)mcco->fast_timer->timers[timerIndex].lastCycleClock / 1000000.
 88 | 
 89 |    #else // else CHRONO_MISSING is not defined, so high resolution clock is available
 90 | 
 91 |       #define MC_FASTTIMER_START(timerIndex) \
 92 |           if (omp_get_thread_num() == 0) { \
 93 |               mcco->fast_timer->timers[timerIndex].startClock = std::chrono::high_resolution_clock::now(); \
 94 |           }
 95 | 
 96 |       #define MC_FASTTIMER_STOP(timerIndex) \
 97 |           if ( omp_get_thread_num() == 0 ) { \
 98 |               mcco->fast_timer->timers[timerIndex].stopClock = std::chrono::high_resolution_clock::now(); \
 99 |               mcco->fast_timer->timers[timerIndex].lastCycleClock += \
100 |                 std::chrono::duration_cast<std::chrono::microseconds> \
101 | 		(mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \
102 |               mcco->fast_timer->timers[timerIndex].cumulativeClock += \
103 | 	        std::chrono::duration_cast<std::chrono::microseconds> \
104 | 	        (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \
105 |               mcco->fast_timer->timers[timerIndex].numCalls++;		\
106 |           }
107 | 
108 |       #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) (float)mcco->fast_timer->timers[timerIndex].lastCycleClock / 1000000.
109 | 
110 | 
111 |    #endif // end ifdef CHRONO_MISSING else section
112 | #endif // end if DISABLE_TIMERS
113 | 
114 | #endif // end ifdef MC_FAST_TIMER_INCLUDE
115 | 


--------------------------------------------------------------------------------
/src/MC_Load_Particle.cc:
--------------------------------------------------------------------------------
 1 | #include "ParticleVault.hh"
 2 | #include "MC_Particle.hh"
 3 | #include "MC_Time_Info.hh"
 4 | #include "DeclareMacro.hh"
 5 | 
 6 | //----------------------------------------------------------------------------------------------------------------------
 7 | //  Copies a single particle from the particle-vault data structure into the active-particle data structure.
 8 | //----------------------------------------------------------------------------------------------------------------------
 9 | 
10 | HOST_DEVICE
11 | void MC_Load_Particle(MonteCarlo *monteCarlo, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index)
12 | {
13 |     //particleVault.popParticle(mc_particle);
14 |     particleVault->getParticle(mc_particle, particle_index);
15 | 
16 |     // Time to Census
17 |     if ( mc_particle.time_to_census <= 0.0 )
18 |     {
19 |         mc_particle.time_to_census += monteCarlo->time_info->time_step;
20 |     }
21 | 
22 |     // Age
23 |     if (mc_particle.age < 0.0) { mc_particle.age = 0.0; }
24 | 
25 |     //    Energy Group
26 |     mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy);
27 | //                    printf("file=%s line=%d\n",__FILE__,__LINE__);
28 | 
29 | }
30 | HOST_DEVICE_END
31 | 
32 | 


--------------------------------------------------------------------------------
/src/MC_Location.cc:
--------------------------------------------------------------------------------
 1 | #include "MC_Location.hh"
 2 | #include "MonteCarlo.hh"
 3 | #include "MC_Domain.hh"
 4 | #include "DeclareMacro.hh"
 5 | 
 6 | //  Return a reference to the domain for this location.
 7 | 
 8 | HOST_DEVICE
 9 | const MC_Domain &MC_Location::get_domain(MonteCarlo *mcco) const
10 | {
11 |     return mcco->domain[domain];
12 | }
13 | 
14 | HOST_DEVICE_END
15 | 


--------------------------------------------------------------------------------
/src/MC_Location.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_LOCATION_INCLUDE
 2 | #define MC_LOCATION_INCLUDE
 3 | 
 4 | 
 5 | // ToDo:  How much chaos would be caused by removing the default constructor?
 6 | 
 7 | #include <string>
 8 | #include "DeclareMacro.hh"
 9 | 
10 | class  MC_Domain;
11 | class  MC_Cell_State;
12 | class  MonteCarlo;
13 | 
14 | HOST_DEVICE_CLASS
15 | class MC_Location
16 | {
17 |  public:
18 |    int domain;
19 |    int cell;
20 |    int facet;
21 | 
22 | HOST_DEVICE_CUDA
23 |    MC_Location(int adomain, int acell, int afacet)
24 |    : domain(adomain),
25 |      cell(acell),
26 |      facet(afacet)
27 |    {}
28 | 
29 | HOST_DEVICE_CUDA
30 |    MC_Location()
31 |    : domain(-1),
32 |      cell(-1),
33 |      facet(-1)
34 |    {}
35 | 
36 |    HOST_DEVICE_CUDA
37 |    const MC_Domain& get_domain(MonteCarlo *mcco) const;
38 | };
39 | HOST_DEVICE_END
40 | 
41 | HOST_DEVICE_CUDA
42 | inline bool operator==(const MC_Location& a, const MC_Location b)
43 | {
44 |    return
45 |       a.domain == b.domain &&
46 |       a.cell == b.cell &&
47 |       a.facet == b.facet;
48 | }
49 | 
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/MC_Nearest_Facet.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MCT_NEAREST_FACET_INCLUDE
 2 | #define MCT_NEAREST_FACET_INCLUDE
 3 | 
 4 | #include "DeclareMacro.hh"
 5 | 
 6 | class MC_Nearest_Facet
 7 | {
 8 |  public:
 9 | 
10 |    int    facet;
11 |    double distance_to_facet;
12 |    double dot_product;
13 | 
14 |    HOST_DEVICE
15 |    MC_Nearest_Facet()
16 |    : facet(0),
17 |      distance_to_facet(1e80),
18 |      dot_product(0.0)
19 |    {}
20 | 
21 |    HOST_DEVICE_CUDA
22 |    MC_Nearest_Facet& operator=( const MC_Nearest_Facet& nf )
23 |    {
24 |         this->facet             = nf.facet;
25 |         this->distance_to_facet = nf.distance_to_facet;
26 |         this->dot_product       = nf.dot_product;
27 |         return *this;
28 |    }
29 |    HOST_DEVICE_END
30 | 
31 | };
32 | #endif
33 | 


--------------------------------------------------------------------------------
/src/MC_Processor_Info.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_PROCESSOR_INFO_HH
 2 | #define MC_PROCESSOR_INFO_HH
 3 | 
 4 | #include "utilsMpi.hh"
 5 | 
 6 | class MC_Processor_Info
 7 | {
 8 | public:
 9 | 
10 |     int rank;
11 |     int num_processors;
12 |     int use_gpu;
13 |     int gpu_id;
14 | 
15 |     MPI_Comm  comm_mc_world;
16 | 
17 |     MC_Processor_Info()
18 |     : comm_mc_world(MPI_COMM_WORLD),
19 |       use_gpu(0),
20 |       gpu_id(0)
21 |     {
22 |       mpiComm_rank(comm_mc_world, &rank);
23 |       mpiComm_size(comm_mc_world, &num_processors);
24 |     }
25 | 
26 | };
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/MC_RNG_State.cc:
--------------------------------------------------------------------------------
  1 | #include "MC_RNG_State.hh"
  2 | #include "DeclareMacro.hh"
  3 | 
  4 | //---------------------------------------------------------------------------//
  5 | 
  6 | namespace
  7 | {
  8 | HOST_DEVICE
  9 |    // Break a 64 bit state into 2 32 bit ints.
 10 |    void breakup_uint64( uint64_t uint64_in,
 11 |                         uint32_t& front_bits, uint32_t& back_bits )
 12 |    {
 13 |       front_bits = static_cast<uint32_t>( uint64_in >> 32 );
 14 |       back_bits = static_cast<uint32_t>( uint64_in & 0xffffffff );
 15 |    }
 16 | HOST_DEVICE_END
 17 | }
 18 | 
 19 | //---------------------------------------------------------------------------//
 20 | 
 21 | namespace
 22 | {
 23 |    // Function sed to hash a 64 bit int into another, unrelated one.  It
 24 |    // does this in two 32 bit chuncks. This function uses the algorithm
 25 |    // from Numerical Recipies in C, 2nd edition: psdes, p. 302.  This is
 26 |    // used to make 64 bit numbers for use as initial states for the 64
 27 |    // bit lcg random number generator.
 28 | HOST_DEVICE
 29 |    void pseudo_des( uint32_t& lword, uint32_t& irword )
 30 |    {
 31 |       // This random number generator assumes that type uint32_t is a 32 bit int
 32 |       // = 1/2 of a 64 bit int. The sizeof operator returns the size in bytes = 8 bits.
 33 |       
 34 |       const int NITER = 2;
 35 |       const uint32_t c1[] = { 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L };
 36 |       const uint32_t c2[] = { 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L};
 37 |       
 38 |       uint32_t ia,ib,iswap,itmph=0,itmpl=0;
 39 |       
 40 |       for( int i = 0; i < NITER; i++)
 41 |       {
 42 |          ia = ( iswap = irword ) ^ c1[i];
 43 |          itmpl = ia & 0xffff;
 44 |          itmph = ia >> 16;
 45 |          ib = itmpl*itmpl+ ~(itmph*itmph);
 46 |          
 47 |          irword = lword ^ (((ia = (ib >> 16) |
 48 |                              ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph);
 49 |          
 50 |          lword=iswap;
 51 |       }
 52 |    }
 53 | HOST_DEVICE_END
 54 | }
 55 | 
 56 | //---------------------------------------------------------------------------//
 57 | 
 58 | namespace
 59 | {
 60 | 
 61 |    HOST_DEVICE
 62 |    // Function used to reconstruct  a 64 bit from 2 32 bit ints.
 63 |    uint64_t reconstruct_uint64( uint32_t front_bits, uint32_t back_bits )
 64 |    {
 65 |       uint64_t reconstructed, temp;
 66 |       reconstructed = static_cast<uint64_t>( front_bits );
 67 |       temp = static_cast<uint64_t>( back_bits );
 68 |       
 69 |       // shift first bits 32 bits to left
 70 |       reconstructed = reconstructed << 32;
 71 |       
 72 |       // temp must be masked to kill leading 1's.  Then 'or' with reconstructed
 73 |       // to get the last bits in
 74 |       reconstructed |= (temp & 0x00000000ffffffff);
 75 |       
 76 |       return reconstructed;
 77 |    }
 78 |    HOST_DEVICE_END
 79 | }
 80 | 
 81 | //---------------------------------------------------------------------------//
 82 | 
 83 | namespace
 84 | {
 85 | HOST_DEVICE
 86 |    // Function used to hash a 64 bit int to get an initial state.
 87 |    uint64_t hash_state( uint64_t initial_number )
 88 |    {
 89 |       // break initial number apart into 2 32 bit ints
 90 |       uint32_t front_bits, back_bits;
 91 |       breakup_uint64( initial_number, front_bits, back_bits );
 92 |       
 93 |       // hash the bits
 94 |       pseudo_des( front_bits, back_bits );
 95 |       
 96 |       // put the hashed parts together into 1 64 bit int
 97 |       return reconstruct_uint64( front_bits, back_bits );
 98 |    }
 99 | HOST_DEVICE_END
100 | }
101 | 
102 | //----------------------------------------------------------------------------------------------------------------------
103 | //  This routine spawns a "child" random number seed from a "parent" random number seed.
104 | //----------------------------------------------------------------------------------------------------------------------
105 | 
106 | HOST_DEVICE
107 | uint64_t rngSpawn_Random_Number_Seed(uint64_t *parent_seed)
108 | {
109 |   uint64_t spawned_seed = hash_state(*parent_seed);
110 |   // Bump the parent seed as that is what is expected from the interface.
111 |   rngSample(parent_seed);
112 |   return spawned_seed;
113 | }
114 | 
115 | HOST_DEVICE_END
116 | 


--------------------------------------------------------------------------------
/src/MC_RNG_State.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_RNG_STATE_INCLUDE
 2 | #define MC_RNG_STATE_INCLUDE
 3 | 
 4 | #include "portability.hh"
 5 | #include "DeclareMacro.hh"
 6 | 
 7 | //----------------------------------------------------------------------------------------------------------------------
 8 | //  A random number generator that implements a 64 bit linear congruential generator (lcg).
 9 | //
10 | //  This implementation is based on the rng class from Nick Gentile.
11 | //----------------------------------------------------------------------------------------------------------------------
12 | 
13 | // Generate a new random number seed
14 | HOST_DEVICE
15 | uint64_t rngSpawn_Random_Number_Seed(uint64_t *parent_seed);
16 | HOST_DEVICE_END
17 | 
18 | //----------------------------------------------------------------------------------------------------------------------
19 | //  Sample returns the pseudo-random number produced by a call to a random
20 | //  number generator.
21 | //----------------------------------------------------------------------------------------------------------------------
22 | HOST_DEVICE
23 | inline double rngSample(uint64_t *seed)
24 | {
25 |    // Reset the state from the previous value.
26 |    *seed = 2862933555777941757ULL*(*seed) + 3037000493ULL;
27 | 
28 |    // Map the int state in (0,2**64) to double (0,1)
29 |    // by multiplying by
30 |    // 1/(2**64 - 1) = 1/18446744073709551615.
31 |    return 5.4210108624275222e-20*(*seed);
32 | }
33 | HOST_DEVICE_END
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/src/MC_Segment_Outcome.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_SEGMENT_OUTCOME_INCLUDE
 2 | #define MC_SEGMENT_OUTCOME_INCLUDE
 3 | 
 4 | class MC_Particle;
 5 | class MC_Vector;
 6 | class MonteCarlo;
 7 | 
 8 | 
 9 | struct MC_Segment_Outcome_type
10 | {
11 |     public:
12 |     enum Enum
13 |     {
14 |         Initialize                    = -1,
15 |         Collision                     = 0,
16 |         Facet_Crossing                = 1,
17 |         Census                        = 2,
18 |         Max_Number                    = 3
19 |     };
20 | };
21 | 
22 | 
23 | struct MC_Collision_Event_Return
24 | {
25 |     public:
26 |     enum Enum
27 |     {
28 |         Stop_Tracking     = 0,
29 |         Continue_Tracking = 1,
30 |         Continue_Collision = 2
31 |     };
32 | };
33 | 
34 | #include "DeclareMacro.hh"
35 | HOST_DEVICE
36 | MC_Segment_Outcome_type::Enum MC_Segment_Outcome(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int &flux_tally_index);
37 | HOST_DEVICE_END
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/src/MC_SourceNow.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_SOURCE_NOW_HH
 2 | #define MC_SOURCE_NOW_HH
 3 | 
 4 | class MonteCarlo;
 5 | 
 6 | void MC_SourceNow(MonteCarlo *mcco);
 7 | 
 8 | #endif
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/MC_Time_Info.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MC_TIME_INFO_INCLUDE
 2 | #define MC_TIME_INFO_INCLUDE
 3 | 
 4 | 
 5 | class MC_Time_Info
 6 | {
 7 | public:
 8 |     int    cycle;
 9 |     double initial_time;
10 |     double final_time;
11 |     double time;
12 |     double time_step;
13 | 
14 |     MC_Time_Info() : cycle(0), initial_time(0.0), final_time(), time(0.0), time_step(1.0) {}
15 | 
16 | };
17 | 
18 | 
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/MC_Vector.hh:
--------------------------------------------------------------------------------
  1 | #ifndef MC_VECTOR_INCLUDE
  2 | #define MC_VECTOR_INCLUDE
  3 | 
  4 | #include <cmath>
  5 | #include "DeclareMacro.hh"
  6 | 
  7 | HOST_DEVICE_CLASS
  8 | class MC_Vector
  9 | {
 10 |  public:
 11 |    double x;
 12 |    double y;
 13 |    double z;
 14 | 
 15 |    HOST_DEVICE_CUDA
 16 |    MC_Vector() : x(0), y(0), z(0) {}
 17 |    HOST_DEVICE_CUDA
 18 |    MC_Vector(double a, double b, double c) : x(a), y(b), z(c) {}
 19 | 
 20 |    HOST_DEVICE_CUDA
 21 |    MC_Vector& operator=( const MC_Vector&tmp )
 22 |    {
 23 |       if ( this == &tmp ) { return *this; }
 24 | 
 25 |       x = tmp.x;
 26 |       y = tmp.y;
 27 |       z = tmp.z;
 28 | 
 29 |       return *this;
 30 |    }
 31 | 
 32 |    HOST_DEVICE_CUDA
 33 |    bool operator==( const MC_Vector& tmp )
 34 |    {
 35 |       return tmp.x == x && tmp.y == y && tmp.z == z;
 36 |    }
 37 | 
 38 |    HOST_DEVICE_CUDA
 39 |    MC_Vector& operator+=( const MC_Vector &tmp )
 40 |    {
 41 |       x += tmp.x;
 42 |       y += tmp.y;
 43 |       z += tmp.z;
 44 |       return *this;
 45 |    }
 46 | 
 47 |    HOST_DEVICE_CUDA
 48 |    MC_Vector& operator-=( const MC_Vector &tmp )
 49 |    {
 50 |       x -= tmp.x;
 51 |       y -= tmp.y;
 52 |       z -= tmp.z;
 53 |       return *this;
 54 |    }
 55 | 
 56 |    HOST_DEVICE_CUDA
 57 |    MC_Vector& operator*=(const double scalar)
 58 |    {
 59 |       x *= scalar;
 60 |       y *= scalar;
 61 |       z *= scalar;
 62 |       return *this;
 63 |    }
 64 | 
 65 |    HOST_DEVICE_CUDA
 66 |    MC_Vector& operator/=(const double scalar)
 67 |    {
 68 |       x /= scalar;
 69 |       y /= scalar;
 70 |       z /= scalar;
 71 |       return *this;
 72 |    }
 73 | 
 74 |    HOST_DEVICE_CUDA
 75 |    const MC_Vector operator+( const MC_Vector &tmp ) const
 76 |    {
 77 |       return MC_Vector(x + tmp.x, y + tmp.y, z + tmp.z);
 78 |    }
 79 | 
 80 |    HOST_DEVICE_CUDA
 81 |    const MC_Vector operator-( const MC_Vector &tmp ) const
 82 |    {
 83 |       return MC_Vector(x - tmp.x, y - tmp.y, z - tmp.z);
 84 |    }
 85 | 
 86 |    HOST_DEVICE_CUDA
 87 |    const MC_Vector operator*(const double scalar) const
 88 |    {
 89 |       return MC_Vector(scalar*x, scalar*y, scalar*z);
 90 |    }
 91 | 
 92 |    HOST_DEVICE_CUDA
 93 |    inline double Length() const { return std::sqrt(x*x + y*y + z*z); }
 94 | 
 95 |    // Distance from this vector to another point.
 96 |    HOST_DEVICE_CUDA
 97 |    inline double Distance(const MC_Vector& vv) const
 98 |    { return std::sqrt((x - vv.x)*(x - vv.x) + (y - vv.y)*(y - vv.y)+ (z - vv.z)*(z - vv.z)); }
 99 | 
100 |    HOST_DEVICE_CUDA
101 |    inline double Dot(const MC_Vector &tmp) const
102 |    {
103 |       return this->x*tmp.x + this->y*tmp.y + this->z*tmp.z;
104 |    }
105 | 
106 |    HOST_DEVICE_CUDA
107 |    inline MC_Vector Cross(const MC_Vector &v) const
108 |    {
109 |       return MC_Vector(y * v.z - z * v.y,
110 |                        z * v.x - x * v.z,
111 |                        x * v.y - y * v.x);
112 |    }
113 | 
114 | };
115 | HOST_DEVICE_END
116 | 
117 | 
118 | #endif
119 | 


--------------------------------------------------------------------------------
/src/MacroscopicCrossSection.cc:
--------------------------------------------------------------------------------
 1 | #include "MacroscopicCrossSection.hh"
 2 | #include "MonteCarlo.hh"
 3 | #include "MaterialDatabase.hh"
 4 | #include "NuclearData.hh"
 5 | #include "MC_Cell_State.hh"
 6 | #include "DeclareMacro.hh"
 7 | 
 8 | //----------------------------------------------------------------------------------------------------------------------
 9 | //  Routine MacroscopicCrossSection calculates the number-density-weighted macroscopic cross
10 | //  section of a cell.
11 | //
12 | //  A reactionIndex of -1 means total cross section.
13 | //----------------------------------------------------------------------------------------------------------------------
14 | HOST_DEVICE 
15 | double macroscopicCrossSection(MonteCarlo* monteCarlo, int reactionIndex, int domainIndex, int cellIndex,
16 |                                int isoIndex, int energyGroup)
17 | {
18 |    // Initialize various data items.
19 |    int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material;
20 | 
21 |    double atomFraction = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._atomFraction;
22 | 
23 |    double microscopicCrossSection = 0.0;
24 |    // The cell number density is the fraction of the atoms in cell
25 |    // volume of this isotope.  We set this (elsewhere) to 1/nIsotopes.
26 |    // This is a statement that we treat materials as if all of their
27 |    // isotopes are present in equal amounts
28 |    double cellNumberDensity = monteCarlo->domain[domainIndex].cell_state[cellIndex]._cellNumberDensity;
29 | 
30 |    int isotopeGid = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid;
31 |    if ( atomFraction == 0.0 || cellNumberDensity == 0.0) { return 1e-20; }
32 | 
33 |    if (reactionIndex < 0)
34 |    {
35 |       // Return total cross section
36 |       microscopicCrossSection = monteCarlo->_nuclearData->getTotalCrossSection(isotopeGid, energyGroup);
37 |    }
38 |    else
39 |    {
40 |       // Return the reaction cross section
41 |       microscopicCrossSection = monteCarlo->_nuclearData->getReactionCrossSection((unsigned int)reactionIndex,
42 |                 isotopeGid, energyGroup);
43 |    }
44 | 
45 |    return atomFraction * cellNumberDensity * microscopicCrossSection;
46 | 
47 | }
48 | HOST_DEVICE_END
49 | 
50 | 
51 | //----------------------------------------------------------------------------------------------------------------------
52 | //  Routine weightedMacroscopicCrossSection calculates the number-density-weighted
53 | //  macroscopic cross section of the collection of isotopes in a cell.
54 | //dfr Weighted is a bit of a misnomer here, since there is no weighting
55 | //applied by this routine.  In Mercury we would weight for multiple
56 | //materials in a cell.
57 | //----------------------------------------------------------------------------------------------------------------------
58 | HOST_DEVICE
59 | double weightedMacroscopicCrossSection(MonteCarlo* monteCarlo, int taskIndex, int domainIndex,
60 |                                        int cellIndex, int energyGroup)
61 | {
62 |    double* precomputedCrossSection =
63 |       &monteCarlo->domain[domainIndex].cell_state[cellIndex]._total[energyGroup];
64 |    qs_assert (precomputedCrossSection != NULL);
65 |    if (*precomputedCrossSection > 0.0)
66 |       return *precomputedCrossSection;
67 |    
68 |    int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material;
69 |    int nIsotopes = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size();
70 |    double sum = 0.0;
71 |    for (int isoIndex = 0; isoIndex < nIsotopes; isoIndex++)
72 |    {
73 |       sum += macroscopicCrossSection(monteCarlo, -1, domainIndex, cellIndex,
74 |                                      isoIndex, energyGroup);
75 |    }
76 | 
77 |    QS::atomicWrite( *precomputedCrossSection, sum );
78 | 
79 |    return sum;
80 | }
81 | HOST_DEVICE_END
82 | 


--------------------------------------------------------------------------------
/src/MacroscopicCrossSection.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MACROSCOPIC_CROSS_SECTION_HH
 2 | #define MACROSCOPIC_CROSS_SECTION_HH
 3 | 
 4 | #include "DeclareMacro.hh"
 5 | 
 6 | class MonteCarlo;
 7 | 
 8 | HOST_DEVICE
 9 | double macroscopicCrossSection(MonteCarlo* monteCarlo, int reactionIndex, int domainIndex, int cellIndex,
10 |                                int isoIndex, int energyGroup);
11 | HOST_DEVICE_END
12 | 
13 | HOST_DEVICE
14 | double weightedMacroscopicCrossSection(MonteCarlo* monteCarlo, int taskIndex, int domainIndex,
15 |                                        int cellIndex, int energyGroup);
16 | HOST_DEVICE_END
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/MaterialDatabase.hh:
--------------------------------------------------------------------------------
  1 | #ifndef MATERIALDATABASE_HH
  2 | #define MATERIALDATABASE_HH
  3 | 
  4 | #include <string>
  5 | #include <vector>
  6 | #include <cstdlib>
  7 | #include <cmath>
  8 | #include "qs_assert.hh"
  9 | 
 10 | // For this material, store the global id in NuclearData of the isotope
 11 | class Isotope
 12 | {
 13 |  public:
 14 |    Isotope()
 15 |    : _gid(0), _atomFraction(0) { }
 16 |    
 17 |    Isotope(int isotopeGid, double atomFraction) 
 18 |    : _gid(isotopeGid), _atomFraction(atomFraction) { }
 19 |   
 20 |    ~Isotope() {}
 21 |  
 22 |    int _gid; //!< index into NuclearData
 23 |    double _atomFraction;
 24 |    
 25 | };
 26 | 
 27 | // Material information
 28 | class Material
 29 | {
 30 |    public:
 31 |    std::string _name;
 32 |    double _mass;
 33 |    qs_vector<Isotope> _iso;
 34 | 
 35 |    Material()
 36 |    : _name("0"), _mass(1000.0) {}
 37 | 
 38 |    Material(const std::string &name)
 39 |    :   _name(name), _mass(1000.0){}
 40 | 
 41 |    Material(const std::string &name, double mass)
 42 |    :   _name(name), _mass(mass){}
 43 |    
 44 |    ~Material() {}
 45 | 
 46 |    void addIsotope(const Isotope& isotope)
 47 |    {
 48 |        _iso.Open();
 49 |        _iso.push_back(isotope);
 50 |        _iso.Close();
 51 |    }
 52 |    
 53 | };
 54 | 
 55 | 
 56 | // Top level class to store material information
 57 | class MaterialDatabase
 58 | {
 59 |  public:
 60 |    
 61 |    void addMaterial(const Material& material)
 62 |    {
 63 |       _mat.Open();
 64 |       _mat.push_back(material);
 65 |       _mat.Close();
 66 |    }
 67 |    
 68 |    int findMaterial(const std::string& name) const
 69 |    {
 70 |       for (int matIndex = 0; matIndex < _mat.size(); matIndex++)
 71 |       {
 72 |          if (_mat[matIndex]._name == name) { return matIndex; }
 73 |       }
 74 |       qs_assert(false);
 75 |       return -1;
 76 |    }
 77 |    
 78 |    // Store the cross sections and reactions by isotope, which stores it by species
 79 |    qs_vector<Material> _mat;
 80 | 
 81 | };
 82 | 
 83 | #endif
 84 | 
 85 | // The input for the nuclear data comes from the material section
 86 | // The input looks may like
 87 | //
 88 | // material NAME
 89 | // nIsotope=XXX
 90 | // nReactions=XXX
 91 | // fissionCrossSection="XXX"
 92 | // scatterCrossSection="XXX"
 93 | // absorptionCrossSection="XXX"
 94 | // nuBar=XXX
 95 | // totalCrossSection=XXX
 96 | // fissionWeight=XXX
 97 | // scatterWeight=XXX
 98 | // absorptionWeight=XXX
 99 | //
100 | // Material NAME2
101 | // ...
102 | //
103 | // table NAME
104 | // a=XXX
105 | // b=XXX
106 | // c=XXX
107 | // d=XXX
108 | // e=XXX
109 | //
110 | // table NAME2
111 | //
112 | // Each isotope inside a material will have identical cross sections.
113 | // However, it will be treated as unique in the nuclear data.
114 | 


--------------------------------------------------------------------------------
/src/MemoryControl.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MEMORY_CONTROL_HH
 2 | #define MEMORY_CONTROL_HH
 3 | 
 4 | #include "gpuPortability.hh"
 5 | #include "qs_assert.hh"
 6 | 
 7 | namespace MemoryControl
 8 | {
 9 |    enum AllocationPolicy {HOST_MEM, UVM_MEM, UNDEFINED_POLICY};
10 | 
11 |    template <typename T>
12 |    T* allocate(const int size, const AllocationPolicy policy)
13 |    {
14 |       if (size == 0) { return NULL;}
15 |       T* tmp = NULL;
16 |       
17 |       switch (policy)
18 |       {
19 |         case AllocationPolicy::HOST_MEM:
20 |          tmp = new T [size];
21 |          break;
22 | #ifdef HAVE_UVM
23 |         case AllocationPolicy::UVM_MEM:
24 |          void *ptr;
25 |          gpuMallocManaged(&ptr, size*sizeof(T));
26 |          tmp = new(ptr) T[size]; 
27 |          break;
28 | #endif
29 |       default:
30 |          qs_assert(false);
31 |          break;
32 |       }
33 |       return tmp;
34 |    }
35 | 
36 |    template <typename T>
37 |    void deallocate(T* data, const int size, const AllocationPolicy policy)
38 |    {
39 |       switch (policy)
40 |       {
41 |         case AllocationPolicy::HOST_MEM:
42 |          delete[] data; 
43 |          break;
44 | #ifdef HAVE_UVM
45 |         case AllocationPolicy::UVM_MEM:
46 |          for (int i=0; i < size; ++i)
47 |             data[i].~T();
48 |          gpuFree(data);
49 |          break;
50 | #endif
51 |       default:
52 |          qs_assert(false);
53 |          break;
54 |       }
55 |    }
56 | }
57 | 
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/src/MeshPartition.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MESH_PARTITION_HH
 2 | #define MESH_PARTITION_HH
 3 | 
 4 | #include <map>
 5 | #include <vector>
 6 | #include "Long64.hh"
 7 | 
 8 | class MC_Vector;
 9 | class GlobalFccGrid;
10 | class CommObject;
11 | 
12 | struct CellInfo
13 | {
14 |    CellInfo()
15 |    : _domainGid(-1), _foreman(-1), _domainIndex(-1), _cellIndex(-1){}
16 |    CellInfo(int domainGid, int foreman, int domainIndex, int cellIndex)
17 |    :_domainGid(domainGid), _foreman(foreman), _domainIndex(domainIndex), _cellIndex(cellIndex){}
18 | 
19 |    int _domainGid;
20 |    int _foreman;
21 |    int _domainIndex;
22 |    int _cellIndex;
23 | };
24 | 
25 | 
26 | class MeshPartition
27 | {
28 |  public:
29 | 
30 |    typedef std::map<Long64, CellInfo> MapType;
31 | 
32 |    MeshPartition(){};
33 |    MeshPartition(int domainGid, int domainIndex, int foreman);
34 | 
35 |    const int& domainGid() const {return _domainGid;}
36 |    const int& domainIndex() const {return _domainIndex;}
37 |    const int& foreman() const {return _foreman;}
38 |    const std::vector<int>& nbrDomains() const {return _nbrDomains;}
39 | 
40 |    const CellInfo& getCell(Long64 cellGid){return _cellInfoMap[cellGid];}
41 |    MapType::const_iterator findCell(Long64 cellGid) const
42 |    {return _cellInfoMap.find(cellGid);}
43 | 
44 |    MapType::const_iterator begin() const {return _cellInfoMap.begin();}
45 |    MapType::const_iterator end()   const {return _cellInfoMap.end();}
46 |    int size() const { return _cellInfoMap.size(); }
47 | 
48 | 
49 |    void addCell(Long64 cellGid, const CellInfo& cellInfo){_cellInfoMap[cellGid] = cellInfo;}
50 | 
51 |    // Warning: parition will contain some remote cells with invalid
52 |    // domainIndex and cellIndex.  These cells are not connected by a
53 |    // face to any local cell so they are harmless.  We could write code
54 |    // to delete them if having them around is a problem.
55 |    void buildMeshPartition(const GlobalFccGrid& grid,
56 |                            const std::vector<MC_Vector> centers,
57 |                            CommObject* comm);
58 | 
59 |  private:
60 |    int _domainGid;   //!< gid of this domain
61 |    int _domainIndex; //!< local index of this domain
62 |    int _foreman;
63 |    MapType _cellInfoMap;
64 |    std::vector<int> _nbrDomains; //<! gids of nbr domains
65 | };
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/src/MonteCarlo.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MONTECARLO_HH
 2 | #define MONTECARLO_HH
 3 | 
 4 | #include "QS_Vector.hh"
 5 | #include "MC_Domain.hh"
 6 | #include "Parameters.hh"
 7 | 
 8 | class MC_RNG_State;
 9 | class NuclearData;
10 | class MaterialDatabase;
11 | class ParticleVaultContainer;
12 | class Tallies;
13 | class MC_Processor_Info;
14 | class MC_Time_Info;
15 | class MC_Particle_Buffer;
16 | class MC_Fast_Timer_Container;
17 | 
18 | class MonteCarlo
19 | {
20 | public:
21 | 
22 |    MonteCarlo(const Parameters& params);
23 |    ~MonteCarlo();
24 | 
25 | public:
26 | 
27 |    void clearCrossSectionCache();
28 | 
29 |    qs_vector<MC_Domain> domain;
30 | 
31 |     Parameters _params;
32 |     NuclearData* _nuclearData;
33 |     ParticleVaultContainer* _particleVaultContainer;
34 |     MaterialDatabase* _materialDatabase;
35 |     Tallies *_tallies;
36 |     MC_Time_Info *time_info;
37 |     MC_Fast_Timer_Container *fast_timer;
38 |     MC_Processor_Info *processor_info;
39 |     MC_Particle_Buffer *particle_buffer;
40 | 
41 |     double source_particle_weight;
42 | 
43 | private:
44 |    // Disable copy constructor and assignment operator
45 |    MonteCarlo(const MonteCarlo&);
46 |    MonteCarlo& operator=(const MonteCarlo&);
47 | };
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/src/MpiCommObject.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MPI_COMM_OBJECT_HH
 2 | #define MPI_COMM_OBJECT_HH
 3 | 
 4 | #include "CommObject.hh"
 5 | 
 6 | #include <vector>
 7 | #include <set>
 8 | #include "utilsMpi.hh"
 9 | 
10 | #include "MeshPartition.hh"
11 | #include "Long64.hh"
12 | #include "DecompositionObject.hh"
13 | 
14 | class MpiCommObject : public CommObject
15 | {
16 |  public:
17 |    MpiCommObject(const MPI_Comm& comm, const DecompositionObject& ddc);
18 | 
19 |    void exchange(MeshPartition::MapType& cellInfo,
20 |                  const std::vector<int>& nbrDomain,
21 |                  std::vector<std::set<Long64> > sendSet,
22 |                  std::vector<std::set<Long64> > recvSet);
23 | 
24 |    void exchange(std::vector<FacetPair> sendBuf,
25 |                  std::vector<FacetPair>& recvBuf);
26 |  private:
27 |    MPI_Comm _comm;
28 |    DecompositionObject _ddc;
29 | };
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/NVTX_Range.hh:
--------------------------------------------------------------------------------
 1 | /// \file
 2 | /// Manage NVTX ranges.  These are used to provide extra information
 3 | /// to NVProf.  They also create regions that can be visualized in
 4 | /// NVVP.
 5 | 
 6 | /// The easiest way to use a range is to create a NVTX_Range instance
 7 | /// at the start of a scope (such as a function).  The range will be
 8 | /// automatically ended by the destructor when the instance goes out
 9 | /// of scope.  The endRange() method exists for situations where it
10 | /// would be awkward or impossible to take advantage of scope to end
11 | /// the range.
12 | 
13 | #ifndef NVTX_RANGE_HH
14 | #define NVTX_RANGE_HH
15 | 
16 | #include <string>
17 | 
18 | #ifdef USE_NVTX
19 | #include "nvToolsExt.h"
20 | #endif
21 | 
22 | 
23 | class NVTX_Range
24 | {
25 |  public:
26 |    
27 |    NVTX_Range(const std::string& rangeName)
28 |    {
29 |       #ifdef USE_NVTX
30 |       char *result = strdup(rangeName.c_str());
31 |       _rangeId = nvtxRangeStartA(result);
32 |       _isOpen = true;
33 |       #endif
34 |    }
35 |    
36 |    ~NVTX_Range()
37 |    {
38 |       #ifdef USE_NVTX
39 |       if (_isOpen)
40 | 	 nvtxRangeEnd(_rangeId);
41 |       #endif
42 |    }
43 | 
44 |   void endRange()
45 |   {
46 |       #ifdef USE_NVTX
47 |       nvtxRangeEnd(_rangeId);
48 |       _isOpen = false;
49 |       #endif
50 |   }
51 |   
52 |  private:
53 |    #ifdef USE_NVTX
54 |    nvtxRangeId_t _rangeId;
55 |    bool _isOpen;
56 |    #endif
57 | };
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/src/NuclearData.hh:
--------------------------------------------------------------------------------
  1 | #ifndef NUCLEAR_DATA_HH
  2 | #define NUCLEAR_DATA_HH
  3 | 
  4 | #include <cstdio>
  5 | #include <string>
  6 | #include "QS_Vector.hh"
  7 | #include <cstdlib>
  8 | #include <cmath>
  9 | #include <algorithm>
 10 | #include "qs_assert.hh"
 11 | #include "DeclareMacro.hh"
 12 | 
 13 | class Polynomial
 14 | {
 15 |  public:
 16 |    Polynomial(double aa, double bb, double cc, double dd, double ee)
 17 |    :
 18 |    _aa(aa), _bb(bb), _cc(cc), _dd(dd), _ee(ee){}
 19 | 
 20 |    double operator()(double xx) const
 21 |    {
 22 |       return _ee + xx * (_dd + xx * (_cc + xx * (_bb + xx * (_aa))));
 23 |    }
 24 | 
 25 |  private:
 26 |    double _aa, _bb, _cc, _dd, _ee;
 27 | };
 28 | 
 29 | // Lowest level class at the reaction level
 30 | class NuclearDataReaction
 31 | {
 32 |  public:
 33 |    // The types of reactions
 34 |    enum Enum
 35 |    {
 36 |       Undefined = 0,
 37 |       Scatter,
 38 |       Absorption,
 39 |       Fission
 40 |    };
 41 |    
 42 |    NuclearDataReaction(){};
 43 | 
 44 |    NuclearDataReaction(Enum reactionType, double nuBar, const qs_vector<double>& energies,
 45 |                        const Polynomial& polynomial, double reationCrossSection);
 46 |    
 47 | 
 48 |    HOST_DEVICE_CUDA
 49 |    double getCrossSection(unsigned int group);
 50 |    HOST_DEVICE_CUDA
 51 |    void sampleCollision(double incidentEnergy, double material_mass, double* energyOut,
 52 |                         double* angleOut, int &nOut, uint64_t* seed, int max_production_size);
 53 |    
 54 |    
 55 |    qs_vector<double> _crossSection; //!< tabular data for microscopic cross section
 56 |    Enum _reactionType;                //!< What type of reaction is this
 57 |    double _nuBar;                     //!< If this is a fission, specify the nu bar
 58 | 
 59 | };
 60 | 
 61 | // This class holds an array of reactions for neutrons
 62 | class NuclearDataSpecies
 63 | {
 64 |  public:
 65 |    
 66 |    void addReaction(NuclearDataReaction::Enum type, double nuBar, qs_vector<double>& energies,
 67 |                     const Polynomial& polynomial, double reactionCrossSection);
 68 |    
 69 |    qs_vector<NuclearDataReaction> _reactions;
 70 | };
 71 | 
 72 | // For this isotope, store the cross sections. In this case the species is just neutron.
 73 | class NuclearDataIsotope
 74 | {
 75 |  public:
 76 |    
 77 |    NuclearDataIsotope()
 78 |    : _species(1,VAR_MEM){}
 79 |    
 80 |    qs_vector<NuclearDataSpecies> _species;
 81 | 
 82 | };
 83 | 
 84 | // Top level class to handle all things related to nuclear data
 85 | class NuclearData
 86 | {
 87 |  public:
 88 |    
 89 |    NuclearData(int numGroups, double energyLow, double energyHigh);
 90 | 
 91 |    int addIsotope(int nReactions,
 92 |                   const Polynomial& fissionFunction,
 93 |                   const Polynomial& scatterFunction,
 94 |                   const Polynomial& absorptionFunction,
 95 |                   double nuBar,
 96 |                   double totalCrossSection,
 97 |                   double fissionWeight, double scatterWeight, double absorptionWeight);
 98 | 
 99 |    HOST_DEVICE_CUDA
100 |    int getEnergyGroup(double energy);
101 |    HOST_DEVICE_CUDA
102 |    int getNumberReactions(unsigned int isotopeIndex);
103 |    HOST_DEVICE_CUDA
104 |    double getTotalCrossSection(unsigned int isotopeIndex, unsigned int group);
105 |    HOST_DEVICE_CUDA
106 |    double getReactionCrossSection(unsigned int reactIndex, unsigned int isotopeIndex, unsigned int group);
107 | 
108 |    int _numEnergyGroups;
109 |    // Store the cross sections and reactions by isotope, which stores
110 |    // it by species
111 |    qs_vector<NuclearDataIsotope> _isotopes;
112 |    // This is the overall energy layout. If we had more than just
113 |    // neutrons, this array would be a vector of vectors.
114 |    qs_vector<double> _energies;
115 | 
116 | };
117 | 
118 | #endif
119 | 
120 | // The input for the nuclear data comes from the material section
121 | // The input looks may like
122 | //
123 | // material NAME
124 | // nIsotope=XXX
125 | // nReactions=XXX
126 | // fissionCrossSection="XXX"
127 | // scatterCrossSection="XXX"
128 | // absorptionCrossSection="XXX"
129 | // nuBar=XXX
130 | // totalCrossSection=XXX
131 | // fissionWeight=XXX
132 | // scatterWeight=XXX
133 | // absorptionWeight=XXX
134 | //
135 | // Material NAME2
136 | // ...
137 | //
138 | // table NAME
139 | // a=XXX
140 | // b=XXX
141 | // c=XXX
142 | // d=XXX
143 | // e=XXX
144 | //
145 | // table NAME2
146 | //
147 | // Each isotope inside a material will have identical cross sections.
148 | // However, it will be treated as unique in the nuclear data.
149 | // Cross sectionsare strings that refer to tables
150 | 


--------------------------------------------------------------------------------
/src/ParticleVault.cc:
--------------------------------------------------------------------------------
 1 | #include "ParticleVault.hh"
 2 | #include "MC_Processor_Info.hh"
 3 | #include "Globals.hh"
 4 | 
 5 | #if 0
 6 | void ParticleVault::
 7 | cleanVault( int end_index )
 8 | {
 9 |     int s1 = end_index;
10 |     int s2 = _particles.size();
11 | 
12 |     int starting_point = s2 - ( ( s1<(s2-s1)) ? s1 : (s2-s1));
13 | 
14 | #if defined HAVE_OPENMP_TARGET
15 |     #pragma omp target teams distribute parallel for thread_limit(64)
16 | #endif
17 |     for( int ii = starting_point; ii < s2; ii++ )
18 |     {
19 |         qs_assert( _particles[ii-starting_point].species == -1 );
20 |         _particles[ii-starting_point] = _particles[ii];
21 |         _particles[ii].species = -1;
22 |     }
23 | 
24 |     _particles.eraseEnd( _particles.size() - end_index );
25 | }
26 | #endif
27 | 
28 | void ParticleVault::
29 | collapse( size_t fill_size, ParticleVault* vault2 )
30 | {
31 |     //The entirety of vault 2 fits in the space available in this vault 
32 |     if( vault2->size() < fill_size )
33 |     {
34 |         this->append( *vault2 );
35 |         vault2->clear();
36 |     }
37 |     else //Fill in what we can untill either vault2 is empty or we have filled this vault
38 |     {
39 |         bool notEmpty = false;
40 |         uint64_t fill = 0;
41 |         do
42 |         {
43 |             MC_Base_Particle base_particle;
44 |             notEmpty = vault2->popBaseParticle( base_particle );
45 |             if( notEmpty )
46 |             {
47 |                 this->pushBaseParticle( base_particle );
48 |                 fill++;
49 |             }
50 |         }while( notEmpty && fill < fill_size);
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/ParticleVaultContainer.hh:
--------------------------------------------------------------------------------
  1 | #ifndef PARTICLEVAULTCONTAINER_HH
  2 | #define PARTICLEVAULTCONTAINER_HH
  3 | 
  4 | #include "DeclareMacro.hh"
  5 | 
  6 | #include "portability.hh"
  7 | #include "QS_Vector.hh"
  8 | #include <vector>
  9 | 
 10 | //---------------------------------------------------------------
 11 | // ParticleVaultContainer is a container of ParticleVaults. 
 12 | // These Vaults are broken down into user defined chunks that can 
 13 | // be used to overlap asynchronous MPI with the tracking kernel.
 14 | //
 15 | // Facilities for storing Processing, Processed, and Extra vaults 
 16 | // are controled by the ParticleVaultContainer. As well as the 
 17 | // sendQueue, which lists the particles that must be send to 
 18 | // another process via MPI
 19 | //--------------------------------------------------------------
 20 | 
 21 | class MC_Base_Particle;
 22 | class MC_Particle;
 23 | class ParticleVault;
 24 | class SendQueue;
 25 | 
 26 | //typedef unsigned long long int uint64_cu;
 27 | 
 28 | class ParticleVaultContainer
 29 | {
 30 |   public:
 31 |     
 32 |     //Constructor
 33 |     ParticleVaultContainer( uint64_t vault_size, 
 34 |         uint64_t num_vaults, uint64_t num_extra_vaults );
 35 | 
 36 |     //Destructor
 37 |     ~ParticleVaultContainer();
 38 | 
 39 |     //Basic Getters
 40 |     uint64_t getVaultSize(){      return _vaultSize; }
 41 |     uint64_t getNumExtraVaults(){ return _numExtraVaults; }
 42 | 
 43 |     uint64_t processingSize(){ return _processingVault.size(); }
 44 |     uint64_t processedSize(){ return _processedVault.size(); }
 45 | 
 46 |     //Returns the ParticleVault that is currently pointed too 
 47 |     //by index listed
 48 |     ParticleVault* getTaskProcessingVault(uint64_t vaultIndex);
 49 |     ParticleVault* getTaskProcessedVault( uint64_t vaultIndex);
 50 | 
 51 |     //Returns the index to the first empty Processed Vault
 52 |     uint64_t getFirstEmptyProcessedVault();
 53 | 
 54 |     //Returns a pointer to the Send Queue
 55 |     HOST_DEVICE
 56 |     SendQueue* getSendQueue();
 57 |     HOST_DEVICE_END
 58 | 
 59 |     //Counts Particles in all vaults
 60 |     uint64_t sizeProcessing();
 61 |     uint64_t sizeProcessed();
 62 |     uint64_t sizeExtra();
 63 | 
 64 |     //Collapses Particles down into lowest amount of vaults as 
 65 |     //needed to hold them removes all but the last parially 
 66 |     //filled vault
 67 |     void collapseProcessing();
 68 |     void collapseProcessed();
 69 | 
 70 |     //Swaps the particles in Processed for the empty vaults in 
 71 |     //Processing
 72 |     void swapProcessingProcessedVaults();
 73 | 
 74 |     //Adds a particle to the processing particle vault
 75 |     void addProcessingParticle( MC_Base_Particle &particle, uint64_t &fill_vault_index );
 76 |     //Adds a particle to the extra particle vault
 77 |     HOST_DEVICE
 78 |     void addExtraParticle( MC_Particle &particle );
 79 |     HOST_DEVICE_END
 80 |  
 81 |     //Pushes particles from Extra Vaults onto the Processing 
 82 |     //Vault list
 83 |     void cleanExtraVaults();
 84 | 
 85 |   private:
 86 |     
 87 |     //The Size of the ParticleVaults (fixed at runtime for 
 88 |     //each run)
 89 |     uint64_t _vaultSize;
 90 | 
 91 |     //The number of Extra Vaults needed based on hueristics 
 92 |     //(fixed at runtime for each run)
 93 |     uint64_t _numExtraVaults;
 94 | 
 95 |     //A running index for the number of particles int the extra 
 96 |     //particle vaults
 97 |     uint64_t _extraVaultIndex;
 98 | 
 99 |     //The send queue - stores particle index and neighbor index 
100 |     //for any particles that hit (TRANSIT_OFF_PROCESSOR) 
101 |     SendQueue *_sendQueue;
102 | 
103 |     //The list of active particle vaults (size - grow-able)
104 |     std::vector<ParticleVault*> _processingVault;
105 | 
106 |     //The list of censused particle vaults (size - grow-able)
107 |     std::vector<ParticleVault*> _processedVault;
108 | 
109 |     //The list of extra particle vaults (size - fixed)
110 |     qs_vector<ParticleVault*>   _extraVault;
111 |      
112 | };
113 | 
114 | #endif
115 | 


--------------------------------------------------------------------------------
/src/PhysicalConstants.cc:
--------------------------------------------------------------------------------
 1 | #include "PhysicalConstants.hh"
 2 | 
 3 |    // The values of all physical constants are taken from:
 4 |    // 2006 CODATA which is located on the web at
 5 |    // http://physics.nist.gov/cuu/Constants/codata.pdf
 6 | 
 7 |    // The units of physical quantities used by the code are:
 8 |    //    Mass         -  gram (g)
 9 |    //    Length       -  centimeter (cm)
10 |    //    Time         -  second (s)
11 |    //    Energy       -  million electron-volts (MeV) : of a particle
12 |    //    Energy       -  erg (g cm^2/s^2): in some background calculation
13 |    //    Temperature  -  thousand electron-volts (keV)
14 | 
15 | const double PhysicalConstants::_neutronRestMassEnergy = 9.395656981095e+2; /* MeV */
16 | const double PhysicalConstants::_pi = 3.1415926535897932;
17 | const double PhysicalConstants::_speedOfLight  = 2.99792458e+10;                // cm / s
18 | 
19 | // Constants used in math for computer science, roundoff, and other reasons
20 | const double PhysicalConstants::_tinyDouble           = 1.0e-13;
21 | const double PhysicalConstants::_smallDouble          = 1.0e-10;
22 | const double PhysicalConstants::_hugeDouble           = 1.0e+75;
23 | 


--------------------------------------------------------------------------------
/src/PhysicalConstants.hh:
--------------------------------------------------------------------------------
 1 | #ifndef PHYSICAL_CONSTANTS_HH
 2 | #define PHYSICAL_CONSTANTS_HH
 3 | 
 4 | #include "DeclareMacro.hh"
 5 | HOST_DEVICE_CLASS
 6 | namespace PhysicalConstants
 7 | {
 8 | 
 9 | const double _neutronRestMassEnergy = 9.395656981095e+2; /* MeV */
10 | const double _pi = 3.1415926535897932;
11 | const double _speedOfLight  = 2.99792458e+10;                // cm / s
12 | 
13 | // Constants used in math for computer science, roundoff, and other reasons
14 |  const double _tinyDouble           = 1.0e-13;
15 |  const double _smallDouble          = 1.0e-10;
16 |  const double _hugeDouble           = 1.0e+75;
17 | //
18 | }
19 | HOST_DEVICE_END
20 | 
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/PopulationControl.hh:
--------------------------------------------------------------------------------
 1 | #ifndef POPULATION_CONTROL_HH
 2 | #define POPULATION_CONTROL_HH
 3 | 
 4 | class MonteCarlo;
 5 | 
 6 | void PopulationControl(MonteCarlo* monteCarlo, bool loadBalance);
 7 | 
 8 | void RouletteLowWeightParticles(MonteCarlo* monteCarlo);
 9 | 
10 | #endif
11 | 
12 | 


--------------------------------------------------------------------------------
/src/QS_Vector.hh:
--------------------------------------------------------------------------------
  1 | #ifndef QS_VECTOR_HH
  2 | #define QS_VECTOR_HH
  3 | 
  4 | #include "DeclareMacro.hh"
  5 | #include "QS_atomics.hh"
  6 | #include "qs_assert.hh"
  7 | #include "MemoryControl.hh"
  8 | 
  9 | #include <algorithm>
 10 | 
 11 | template <class T>
 12 | class qs_vector 
 13 | {
 14 |  public:
 15 | 
 16 |    qs_vector() : _data(0), _capacity(0), _size(0), _memPolicy(MemoryControl::AllocationPolicy::HOST_MEM), _isOpen(0) {};
 17 | 
 18 |    qs_vector(int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM )
 19 |    : _data(0), _capacity(size), _size(size), _memPolicy(memPolicy), _isOpen(0) 
 20 |    {
 21 |       _data = MemoryControl::allocate<T>(size, memPolicy);
 22 |    }
 23 | 
 24 | 
 25 |    qs_vector( int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM )
 26 |    : _data(0), _capacity(size), _size(size), _memPolicy(memPolicy), _isOpen(0) 
 27 |    { 
 28 |       _data = MemoryControl::allocate<T>(size, memPolicy);
 29 | 
 30 |       for (int ii = 0; ii < _capacity; ++ii)
 31 |          _data[ii] = value;
 32 |    }
 33 | 
 34 |    qs_vector(const qs_vector<T>& aa )
 35 |    : _data(0), _capacity(aa._capacity), _size(aa._size), _memPolicy(aa._memPolicy), _isOpen(aa._isOpen)
 36 |    {
 37 |       _data = MemoryControl::allocate<T>(_capacity, _memPolicy);
 38 |  
 39 |       for (int ii=0; ii<_size; ++ii)
 40 |          _data[ii] = aa._data[ii];
 41 |    }
 42 |    
 43 |    ~qs_vector()
 44 |    { 
 45 |       MemoryControl::deallocate(_data, _size, _memPolicy);
 46 |    }
 47 | 
 48 |    /// Needed for copy-swap idiom
 49 |    void swap(qs_vector<T>& other)
 50 |    {
 51 |       std::swap(_data,     other._data);
 52 |       std::swap(_capacity, other._capacity);
 53 |       std::swap(_size,     other._size);
 54 |       std::swap(_memPolicy, other._memPolicy);
 55 |       std::swap(_isOpen,   other._isOpen);
 56 |    }
 57 |    
 58 |    /// Implement assignment using copy-swap idiom
 59 |    qs_vector<T>& operator=(const qs_vector<T>& aa)
 60 |    {
 61 |       if (&aa != this)
 62 |       {
 63 |          qs_vector<T> temp(aa);
 64 |          this->swap(temp);
 65 |       }
 66 |       return *this;
 67 |    }
 68 |    
 69 |    HOST_DEVICE_CUDA
 70 |    int get_memPolicy()
 71 |    {
 72 | 	return _memPolicy;
 73 |    }
 74 | 
 75 |    void push_back( const T& dataElem )
 76 |    {
 77 |       qs_assert( _isOpen );
 78 |       _data[_size] = dataElem;
 79 |       _size++;
 80 |    }
 81 | 
 82 |    void Open() { _isOpen = true;  }
 83 |    void Close(){ _isOpen = false; }
 84 | 
 85 |    HOST_DEVICE_CUDA
 86 |    const T& operator[]( int index ) const
 87 |    {
 88 |       return _data[index];
 89 |    }
 90 | 
 91 |    HOST_DEVICE_CUDA
 92 |    T& operator[]( int index )
 93 |    {
 94 |       return _data[index];
 95 |    }
 96 |    
 97 |    HOST_DEVICE_CUDA
 98 |    int capacity() const
 99 |    {
100 |       return _capacity;
101 |    }
102 | 
103 |    HOST_DEVICE_CUDA
104 |    int size() const
105 |    {
106 |       return _size;
107 |    }
108 |    
109 |    T& back()
110 |    {
111 |       return _data[_size-1];
112 |    }
113 |    
114 |    void reserve( int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM )
115 |    {
116 |       qs_assert( _capacity == 0 );
117 |       _capacity = size;
118 |       _memPolicy = memPolicy;
119 |       _data = MemoryControl::allocate<T>(size, memPolicy);
120 |    }
121 | 
122 |    void resize( int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM )
123 |    {
124 |       qs_assert( _capacity == 0 );
125 |       _capacity = size;
126 |       _size = size;
127 |       _memPolicy = memPolicy;
128 |       _data = MemoryControl::allocate<T>(size, memPolicy);
129 |    }
130 | 
131 |    void resize( int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) 
132 |    { 
133 |       qs_assert( _capacity == 0 );
134 |       _capacity = size;
135 |       _size = size;
136 |       _memPolicy = memPolicy;
137 |       _data = MemoryControl::allocate<T>(size, memPolicy);
138 | 
139 |       for (int ii = 0; ii < _capacity; ++ii)
140 |          _data[ii] = value;
141 |    }
142 | 
143 |    bool empty() const
144 |    {
145 |        return ( _size == 0 );
146 |    }
147 | 
148 |    void eraseEnd( int NewEnd )
149 |    {
150 |        _size = NewEnd;
151 |    }
152 | 
153 |     void pop_back()
154 |    {
155 |        _size--;
156 |    }
157 | 
158 |    void clear()
159 |    {
160 |        _size = 0;
161 |    }
162 | 
163 |    void appendList( int listSize, T* list )
164 |    {
165 |        qs_assert( this->_size + listSize < this->_capacity );
166 | 
167 |        int size = _size;
168 |        this->_size += listSize;
169 | 
170 |        for( int i = size; i < _size; i++ )
171 |        {
172 |            _data[i] = list[ i-size ];
173 |        }
174 | 
175 |    }
176 | 
177 |    //Atomically retrieve an availible index then increment that index some amount
178 |    HOST_DEVICE_CUDA
179 |    int atomic_Index_Inc( int inc )
180 |    {
181 |        int pos;
182 | 
183 |        QS::atomicCaptureAdd( _size, inc, pos );
184 | 
185 |        return pos;
186 |    }
187 | 
188 |  private:
189 |    T* _data;
190 |    int _capacity;
191 |    int _size;
192 |    bool _isOpen;
193 |    MemoryControl::AllocationPolicy _memPolicy;
194 | 
195 | };
196 | 
197 | 
198 | #endif
199 | 


--------------------------------------------------------------------------------
/src/QS_atomics.hh:
--------------------------------------------------------------------------------
  1 | #ifndef QS_ATOMICS_HH
  2 | #define QS_ATOMICS_HH
  3 | 
  4 | #include "gpuPortability.hh"
  5 | 
  6 | // Provides the following atomic functions:
  7 | // * QS::atomicWrite(a,b)          a=b
  8 | // * QS::atomicAdd(a,b)            a+=b
  9 | // * QS::atomicIncrement(a,b)      a++
 10 | // * QS::atomicCaptureAdd(a,b,c)   c=a; a+=b
 11 | // These all function correctly on hip(AMD), cuda, openMP, and openMP offload.
 12 | //
 13 | // There is one significant complication that we need to worry about
 14 | // when trying to provide device native implementations of atomics on
 15 | // hip and cuda.  Cuda doesn't allow function overloading based on
 16 | // __host__ or __device__ attributes.  If you have two functions with
 17 | // the same signature, one with __host__ (or undecorated, since
 18 | // functions are __host by default) and another with __device__, nvcc
 19 | // will produce an error that the function is multiply defined.  The
 20 | // solution to this problem is to wrap the overloaded functions in a
 21 | // check for the __CUDA_ARCH__ macro, which is defined only when
 22 | // compiling for the device.  See
 23 | // https://forums.developer.nvidia.com/t/overloading-host-and-device-function/29601
 24 | //
 25 | // On the other hand, hip seems to have no such problem managing
 26 | // functions that are overloaded on __host__ or __device__ attributes.
 27 | // Hence, we don't have to worry about checking for the device pass on
 28 | // a hip build.
 29 | 
 30 | 
 31 | 
 32 | 
 33 | // First, we need to provide some "built-in" atomic signatures that
 34 | // the CUDA API doesn't provide.  These should only be available in
 35 | // the device pass of a CUDA build.  HIP provides these signatures.
 36 | #if defined HAVE_CUDA && defined __CUDA_ARCH__
 37 | 
 38 | // atomicAdd for uint64_t:
 39 | // It is common that unsigned long and unsigned long long are both
 40 | // 64-bit integers.  In such cases, uint64_t may be defined as
 41 | // unsigned long.  Unfortunately, nvidia doesn't supply a version of
 42 | // atomicAdd that takes unsigned long arguments.  As long as unsigned
 43 | // long and unsigned long long are the same size, we can get away with
 44 | // this kind of nonsense.
 45 | static inline __device__ uint64_t atomicAdd(uint64_t* address, uint64_t val)
 46 | {
 47 |   static_assert(sizeof(uint64_t) == sizeof(unsigned long long),
 48 | 		"type size mismatch");
 49 |   return ::atomicAdd(reinterpret_cast<unsigned long long*>(address), val);
 50 | }
 51 | 
 52 | // atomicExch for double:
 53 | // nvidia doesn't supply a version of atomicExch that takes doubles.
 54 | // So, we will roll our own with this somewhat evil hack.
 55 | static inline __device__ double atomicExch(double* address, double val)
 56 | {
 57 |   static_assert(sizeof(double) == sizeof(unsigned long long),
 58 | 		"type size mismatch");
 59 |   return __longlong_as_double
 60 |     (
 61 |      ::atomicExch(reinterpret_cast<unsigned long long*>(address),
 62 | 		  __double_as_longlong(val))
 63 |     );
 64 | }
 65 | 
 66 | #endif //#if defined HAVE_CUDA && defined __CUDA_ARCH__
 67 | 
 68 | 
 69 | namespace QS
 70 | {
 71 |   // First, the versions defined in terms of the native atomic
 72 |   // functions provided by CUDA and HIP.  
 73 | 
 74 |   // These get built when building for HIP (which QS assumes means AMD),
 75 |   // or the device pass of a CUDA build
 76 |   #if defined HAVE_HIP  || (defined HAVE_CUDA && defined __CUDA_ARCH__)
 77 | 
 78 |   template <typename T> static inline __device__
 79 |   void atomicWrite(T& aa, T bb)
 80 |   {
 81 |     atomicExch(&aa, bb);
 82 |   }
 83 | 
 84 |   template <typename T> static inline __device__
 85 |   void atomicAdd(T& aa, T bb)
 86 |   {
 87 |     ::atomicAdd(&aa, bb);
 88 |   }
 89 | 
 90 |   template <typename T> static inline __device__
 91 |   void atomicIncrement(T& aa)
 92 |   {
 93 |     ::atomicAdd(&aa, 1);
 94 |   }
 95 | 
 96 |   template <typename T> static inline __device__
 97 |   void atomicCaptureAdd(T& aa, T bb, T& cc)
 98 |   {
 99 |     cc = ::atomicAdd(&aa, bb);
100 |   }
101 | 
102 |   #endif // #if defined HAVE_HIP  || (defined HAVE_CUDA && defined __CUDA_ARCH__)
103 |   
104 | 
105 |   // Now the version defined in terms of omp atomic directives.  Note
106 |   // that these apply to both CPU and GPU (i.e., target) code.  These
107 |   // also supply implementations for CPU builds without openMP.
108 |   // Obviously, these functions aren't actually atomic without
109 |   // openMP. That's OK since without openMP quicksilver can't need
110 |   // atomics on the CPU since it has no way run multiple threads in
111 |   // the same address space.
112 | 
113 |   // These get build for everything *except* the device pass of a CUDA
114 |   // build.
115 |   #if ! (defined HAVE_CUDA && defined __CUDA_ARCH__) 
116 | 
117 |   template <typename T> static inline
118 |   void atomicWrite(T& aa, T bb)
119 |   {
120 |     #pragma omp atomic write
121 |     aa = bb;
122 |   }
123 | 
124 |   template <typename T> static inline
125 |   void atomicAdd(T& aa, T bb)
126 |   {
127 |     #pragma omp atomic
128 |     aa += bb;
129 |   }
130 | 
131 |   template <typename T> static inline
132 |   void atomicIncrement(T& aa)
133 |   {
134 |     #pragma omp atomic update
135 |     aa++;
136 |   }
137 |   
138 |   template <typename T> static inline
139 |   void atomicCaptureAdd(T& aa, T bb, T& cc)
140 |   {
141 |     #pragma omp atomic capture
142 |     {cc = aa; aa += bb;}
143 |   }
144 | 
145 |   #endif // #if ! (defined HAVE_CUDA && defined __CUDA_ARCH__) 
146 |   
147 | } // namespace QS
148 | 
149 | #endif // #ifndef QS_ATOMICS_HH
150 | 


--------------------------------------------------------------------------------
/src/SendQueue.cc:
--------------------------------------------------------------------------------
 1 | #include "SendQueue.hh"
 2 | #include "QS_Vector.hh"
 3 | 
 4 | SendQueue::SendQueue()
 5 | {}
 6 | 
 7 | SendQueue::SendQueue( size_t size )
 8 | : _data( size, VAR_MEM )
 9 | {}
10 | 
11 | 
12 | // -----------------------------------------------------------------------
13 | size_t SendQueue::
14 | size()
15 | {
16 |     return _data.size();
17 | }
18 |  
19 | // -----------------------------------------------------------------------
20 | size_t SendQueue::
21 | neighbor_size( int neighbor_ )
22 | {
23 |     size_t sum_n=0;
24 |     for( size_t i = 0; i < _data.size(); i++ )
25 |     {   
26 |         if( neighbor_ == _data[i]._neighbor )
27 |             sum_n++;
28 |     }   
29 |     return sum_n;
30 | }
31 | 
32 | // -----------------------------------------------------------------------
33 | HOST_DEVICE
34 | void SendQueue::
35 | push( int neighbor_, int vault_index_ )
36 | {
37 |     size_t indx = _data.atomic_Index_Inc(1);
38 | 
39 |     _data[indx]._neighbor    = neighbor_;
40 |     _data[indx]._particleIndex = vault_index_;
41 | }
42 | HOST_DEVICE_END
43 | 
44 | // -----------------------------------------------------------------------
45 | void SendQueue::
46 | clear()
47 | {
48 |     _data.clear();
49 | }
50 | 
51 | // -----------------------------------------------------------------------
52 | sendQueueTuple& SendQueue::
53 | getTuple( int index_ )
54 | {
55 |     qs_assert( index_ >= 0 );
56 |     qs_assert( index_ < _data.size() );
57 |     return _data[index_];
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/src/SendQueue.hh:
--------------------------------------------------------------------------------
 1 | #ifndef SENDQUEUE_HH
 2 | #define SENDQUEUE_HH
 3 | 
 4 | #include "QS_Vector.hh"
 5 | #include "DeclareMacro.hh"
 6 | 
 7 | //Tuple to record which particles need to be sent to which neighbor process during tracking
 8 | struct sendQueueTuple
 9 | {
10 |     int _neighbor;
11 |     int _particleIndex;
12 | };
13 | 
14 | class SendQueue
15 | {
16 |   public:
17 | 
18 |     SendQueue();
19 |     SendQueue( size_t size );
20 | 
21 |     //Get the total size of the send Queue
22 |     size_t size();
23 | 
24 |     void reserve( size_t size ){ _data.reserve(size, VAR_MEM); }
25 | 
26 |     //get the number of items in send queue going to a specific neighbor
27 |     size_t neighbor_size( int neighbor_ );
28 | 
29 |     sendQueueTuple& getTuple( int index_ );
30 | 
31 |     //Add items to the send queue in a kernel
32 |     HOST_DEVICE_CUDA
33 |     void push( int neighbor_, int vault_index_ );
34 | 
35 |     //Clear send queue before after use
36 |     void clear();
37 | 
38 |   private:    
39 | 
40 |     //The send queue - stores particle index and neighbor index for any particles that hit (TRANSIT_OFF_PROCESSOR)
41 |     qs_vector<sendQueueTuple> _data;
42 | 
43 | };
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/src/SharedMemoryCommObject.cc:
--------------------------------------------------------------------------------
 1 | #include "SharedMemoryCommObject.hh"
 2 | #include "qs_assert.hh"
 3 | #include "MeshPartition.hh"
 4 | 
 5 | using std::set;
 6 | using std::vector;
 7 | 
 8 | 
 9 | SharedMemoryCommObject::SharedMemoryCommObject(vector<MeshPartition>& meshPartition)
10 | :_partitions(meshPartition)
11 | {
12 |    _gidToIndex.resize(_partitions.size());
13 |    for (unsigned ii=0; ii<_partitions.size(); ++ii)
14 |    {
15 |       int gid = _partitions[ii].domainGid();
16 |       qs_assert(gid < _partitions.size());
17 |       _gidToIndex[gid] = ii;
18 |    }
19 | 
20 | }
21 | 
22 | void SharedMemoryCommObject::exchange(MeshPartition::MapType& cellInfoMap,
23 |                                       const vector<int>& nbrDomain,
24 |                                       vector<set<Long64> > sendSet,
25 |                                       vector<set<Long64> > recvSet)
26 | 
27 | {
28 |    for (unsigned ii=0; ii<nbrDomain.size(); ++ii)
29 |    {
30 |       const int& targetDomainGid = nbrDomain[ii];
31 |       MeshPartition& targetPartition = _partitions[_gidToIndex[targetDomainGid]];
32 |       qs_assert(targetPartition.domainGid() == targetDomainGid);
33 | 
34 |       for (auto iter=sendSet[ii].begin(); iter!=sendSet[ii].end(); ++iter)
35 |       {
36 |          const CellInfo& cellToSend = cellInfoMap[*iter];
37 |          qs_assert(cellToSend._domainIndex >= 0);
38 |          qs_assert(cellToSend._cellIndex >= 0);
39 |          targetPartition.addCell(*iter, cellToSend);
40 |       }
41 |    }
42 | }
43 | 
44 | void SharedMemoryCommObject::exchange(vector<FacetPair> sendBuf,
45 |                                       vector<FacetPair>& recvBuf)
46 | {
47 |    // This type of exchange should never occur in SharedMemory spaces.
48 |    qs_assert(false);
49 | }
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/src/SharedMemoryCommObject.hh:
--------------------------------------------------------------------------------
 1 | #ifndef SHARED_MEMORY_COMM_OBJECT_HH
 2 | #define SHARED_MEMORY_COMM_OBJECT_HH
 3 | 
 4 | #include "CommObject.hh"
 5 | 
 6 | #include <set>
 7 | 
 8 | #include <vector>
 9 | #include "MeshPartition.hh"
10 | #include "Long64.hh"
11 | 
12 | class SharedMemoryCommObject : public CommObject
13 | {
14 |  public:
15 |    SharedMemoryCommObject(std::vector<MeshPartition>& meshPartition);
16 | 
17 |    void exchange(MeshPartition::MapType& cellInfo,
18 |                  const std::vector<int>& nbrDomain,
19 |                  std::vector<std::set<Long64> > sendSet,
20 |                  std::vector<std::set<Long64> > recvSet);
21 | 
22 |    void exchange(std::vector<FacetPair> sendBuf,
23 |                  std::vector<FacetPair>& recvBuf);
24 | 
25 | 
26 |  private:
27 |    std::vector<MeshPartition>& _partitions;
28 |    std::vector<int> _gidToIndex;
29 | };
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/Tuple.hh:
--------------------------------------------------------------------------------
 1 | #ifndef TUPLE_HH
 2 | #define TUPLE_HH
 3 | 
 4 | class Tuple
 5 | {
 6 |  public:
 7 |      Tuple(){};
 8 |    Tuple(int ix, int iy, int iz) : ix_(ix), iy_(iy), iz_(iz){}
 9 | 
10 |    const int& x() const {return ix_;}
11 |    const int& y() const {return iy_;}
12 |    const int& z() const {return iz_;}
13 | 
14 |    int& x() {return ix_;}
15 |    int& y() {return iy_;}
16 |    int& z() {return iz_;}
17 | 
18 |    Tuple& operator-=(const Tuple& a);
19 |    Tuple& operator+=(const Tuple& a);
20 |    bool operator<(const Tuple& b) const;
21 | 
22 |  private:
23 |    int ix_;
24 |    int iy_;
25 |    int iz_;
26 | };
27 | 
28 | inline Tuple& Tuple::operator-=(const Tuple& a)
29 | {
30 |    ix_ -= a.ix_;
31 |    iy_ -= a.iy_;
32 |    iz_ -= a.iz_;
33 |    return *this;
34 | }
35 | 
36 | inline Tuple& Tuple::operator+=(const Tuple& a)
37 | {
38 |    ix_ += a.ix_;
39 |    iy_ += a.iy_;
40 |    iz_ += a.iz_;
41 |    return *this;
42 | }
43 | 
44 | inline bool Tuple::operator<(const Tuple& b) const
45 | {
46 |    return
47 |       ix_<b.ix_ ||
48 |       (ix_==b.ix_ &&
49 |        (iy_<b.iy_ ||
50 |         ( iy_ == b.iy_ && iz_ < b.iz_ )));
51 | }
52 | 
53 | 
54 | inline Tuple operator-(const Tuple& a, const Tuple& b)
55 | {
56 |    Tuple c(a);
57 |    return c-=b;
58 | }
59 | 
60 | inline Tuple operator+(const Tuple& a, const Tuple& b)
61 | {
62 |    Tuple c(a);
63 |    return c+=b;
64 | }
65 | 
66 | inline bool operator==(const Tuple& a, const Tuple& b)
67 | {
68 |    return (a.x() == b.x() && a.y() == b.y() && a.z() == b.z());
69 | }
70 | 
71 | inline int dot(const Tuple& a, const Tuple& b)
72 | {
73 |    return a.x()*b.x() + a.y()*b.y() + a.z()*b.z();
74 | }
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/src/Tuple4.hh:
--------------------------------------------------------------------------------
 1 | #ifndef TUPLE4_HH
 2 | #define TUPLE4_HH
 3 | 
 4 | class Tuple4
 5 | {
 6 |  public:
 7 |    Tuple4(){};
 8 |    Tuple4(int ix, int iy, int iz, int ib) : ix_(ix), iy_(iy), iz_(iz), ib_(ib){}
 9 | 
10 |    const int& x() const {return ix_;}
11 |    const int& y() const {return iy_;}
12 |    const int& z() const {return iz_;}
13 |    const int& b() const {return ib_;}
14 | 
15 |    int& x() {return ix_;}
16 |    int& y() {return iy_;}
17 |    int& z() {return iz_;}
18 |    int& b() {return ib_;}
19 | 
20 |    Tuple4& operator-=(const Tuple4& a);
21 |    Tuple4& operator+=(const Tuple4& a);
22 |    bool operator<(const Tuple4& b) const;
23 | 
24 |  private:
25 |    int ix_;
26 |    int iy_;
27 |    int iz_;
28 |    int ib_;
29 | };
30 | 
31 | inline Tuple4& Tuple4::operator-=(const Tuple4& a)
32 | {
33 |    ix_ -= a.ix_;
34 |    iy_ -= a.iy_;
35 |    iz_ -= a.iz_;
36 |    ib_ -= a.ib_;
37 |    return *this;
38 | }
39 | 
40 | inline Tuple4& Tuple4::operator+=(const Tuple4& a)
41 | {
42 |    ix_ += a.ix_;
43 |    iy_ += a.iy_;
44 |    iz_ += a.iz_;
45 |    ib_ += a.ib_;
46 |    return *this;
47 | }
48 | 
49 | inline bool Tuple4::operator<(const Tuple4& b) const
50 | {
51 |    return
52 |       ix_<b.ix_ ||
53 |       (ix_==b.ix_ &&
54 |        (iy_<b.iy_ ||
55 |         (iy_ ==b.iy_ &&
56 |          (iz_<b.iz_ ||
57 |           (iz_==b.iz_ && ib_<b.ib_ )))));
58 | }
59 | 
60 | 
61 | inline Tuple4 operator-(const Tuple4& a, const Tuple4& b)
62 | {
63 |    Tuple4 c(a);
64 |    return c-=b;
65 | }
66 | 
67 | inline Tuple4 operator+(const Tuple4& a, const Tuple4& b)
68 | {
69 |    Tuple4 c(a);
70 |    return c+=b;
71 | }
72 | 
73 | inline bool operator==(const Tuple4& a, const Tuple4& b)
74 | {
75 |    return (a.x() == b.x() && a.y() == b.y() &&
76 |            a.z() == b.z() && a.b() == b.b() );
77 | }
78 | 
79 | inline int dot(const Tuple4& a, const Tuple4& b)
80 | {
81 |    return a.x()*b.x() + a.y()*b.y() + a.z()*b.z() + a.b()*b.b();
82 | }
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/src/Tuple4ToIndex.hh:
--------------------------------------------------------------------------------
 1 | #ifndef TUPLE4_TO_INDEX_HH
 2 | #define TUPLE4_TO_INDEX_HH
 3 | 
 4 | #include "Long64.hh"
 5 | #include "Tuple4.hh"
 6 | 
 7 | class Tuple4ToIndex
 8 | {
 9 |  public:
10 |    Tuple4ToIndex(int nx, int ny, int nz, int nb);
11 | 
12 |    Long64 operator()(int ix, int iy, int iz, int ib) const;
13 |    Long64 operator()(const Tuple4& tt) const;
14 | 
15 |  private:
16 |    Long64 nx_;  // needs to be Long64 to force 64-bit math below
17 |    Long64 ny_;
18 |    Long64 nz_;
19 |    Long64 nb_;
20 | };
21 | 
22 | inline Tuple4ToIndex::Tuple4ToIndex(int nx, int ny, int nz, int nb)
23 | : nx_(nx), ny_(ny), nz_(nz), nb_(nb)
24 | {}
25 | 
26 | inline Long64
27 | Tuple4ToIndex::operator()(int ix, int iy, int iz, int ib) const
28 | {
29 |    return ix + nx_*(iy + ny_*(iz + nz_*(ib)));
30 | }
31 | 
32 | inline Long64
33 | Tuple4ToIndex::operator()(const Tuple4& tt) const
34 | {
35 |    return tt.x() + nx_*(tt.y() + ny_*(tt.z() + nz_*(tt.b())));
36 | }
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/src/TupleToIndex.hh:
--------------------------------------------------------------------------------
 1 | #ifndef TUPLE_TO_INDEX_HH
 2 | #define TUPLE_TO_INDEX_HH
 3 | 
 4 | #include "Long64.hh"
 5 | #include "Tuple.hh"
 6 | 
 7 | class TupleToIndex
 8 | {
 9 |  public:
10 |    TupleToIndex(int nx, int ny, int nz);
11 | 
12 |    Long64 operator()(int ix, int iy, int iz) const;
13 |    Long64 operator()(const Tuple& tt) const;
14 | 
15 |  private:
16 |     Long64 nx_;  // needs to be Long64 to force 64-bit math below
17 |     Long64 ny_;
18 |     Long64 nz_;
19 | };
20 | 
21 | inline TupleToIndex::TupleToIndex(int nx, int ny, int nz)
22 | : nx_(nx), ny_(ny), nz_(nz)
23 | {}
24 | 
25 | inline Long64
26 | TupleToIndex::operator()(int ix, int iy, int iz) const
27 | {
28 |    return ix + nx_*(iy + ny_*(iz));
29 | }
30 | 
31 | inline Long64
32 | TupleToIndex::operator()(const Tuple& tt) const
33 | {
34 |    return tt.x() + nx_*(tt.y() + ny_*(tt.z()));
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/src/cmdLineParser.hh:
--------------------------------------------------------------------------------
 1 | /// \file
 2 | /// A parser for command line arguments.
 3 | ///
 4 | /// \author Sriram Swaminarayan
 5 | /// \date July 24, 2007
 6 | 
 7 | #ifndef CMDLINEPARSER_H_
 8 | #define CMDLINEPARSER_H_
 9 | 
10 | /// Specifies a command line argument that should be accepted by the program.
11 | /// \param [in]  longOption  The long name of option i.e., --optionname
12 | /// \param [in]  shortOption The short name of option i.e., -o
13 | /// \param [in]  has_arg  Whether this option has an argument i.e., -o value.
14 | ///                       If has_arg is 0, then dataPtr must be an integer
15 | ///                       pointer.
16 | /// \param [in]  type  The type of the argument. Valid values are:
17 | ///                    -  i   integer
18 | ///                    -  u   uint64_t
19 | ///                    -  f   float
20 | ///                    -  d   double
21 | ///                    -  s   string
22 | ///                    -  c   character
23 | ///
24 | /// \param [in]  dataPtr  A pointer to where the value will be stored.
25 | /// \param [in]  dataSize The length of dataPtr, only useful for character
26 | ///                       strings.
27 | /// \param [in]  help     A short help string, preferably a single line or
28 | ///                       less.
29 | int addArg(const char *longOption, const char shortOption,
30 |            int has_arg, const char type, void *dataPtr, int dataSize,
31 |            const char *help);
32 | 
33 | /// Call this to process your arguments.
34 | void processArgs(int argc, char **argv);
35 | 
36 | /// Prints the arguments to the stdout stream.
37 | void printArgs(void);
38 | 
39 | void freeArgs(void);
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/src/cudaFunctions.cc:
--------------------------------------------------------------------------------
 1 | #include "gpuPortability.hh"
 2 | #include "cudaFunctions.hh"
 3 | #include "cudaUtils.hh"
 4 | #include <stdio.h> 
 5 | 
 6 | namespace
 7 | {
 8 | #if defined GPU_NATIVE
 9 |     __global__ void trivialKernel()
10 |     {
11 |         int global_index = getGlobalThreadID();
12 |         if( global_index == 0)
13 |         {
14 |         }
15 |     }
16 | #endif
17 | }
18 | 
19 | #if defined GPU_NATIVE
20 | void warmup_kernel()
21 | {
22 |         trivialKernel<<<1, 1>>>();
23 |         gpuDeviceSynchronize();
24 | }
25 | #endif
26 | 
27 | #if defined GPU_NATIVE
28 | int ThreadBlockLayout( dim3 &grid, dim3 &block, int num_particles )
29 | {
30 |     int run_kernel = 1;
31 |     const uint64_t max_block_size = 65535;
32 |     const uint64_t threads_per_block = 128;
33 |     
34 |     block.x = threads_per_block;
35 |     block.y = 1;
36 |     block.z = 1;
37 | 
38 |     uint64_t num_blocks = num_particles / threads_per_block + ( ( num_particles%threads_per_block == 0 ) ? 0 : 1 );
39 | 
40 |     if( num_blocks == 0 )
41 |     {
42 |         run_kernel = 0;
43 |     }
44 |     else if( num_blocks <= max_block_size )
45 |     {
46 |         grid.x = num_blocks;
47 |         grid.y = 1;
48 |         grid.z = 1;
49 |     } 
50 |     else if( num_blocks <= max_block_size*max_block_size )
51 |     {
52 |         grid.x = max_block_size;
53 |         grid.y = 1 + (num_blocks / max_block_size );
54 |         grid.z = 1;
55 |     }
56 |     else if( num_blocks <= max_block_size*max_block_size*max_block_size )
57 |     {
58 |         grid.x = max_block_size;
59 |         grid.y = max_block_size;
60 |         grid.z = 1 + (num_blocks / (max_block_size*max_block_size));
61 |     }
62 |     else
63 |     {
64 |         printf("Error: num_blocks exceeds maximum block specifications. Cannot handle this case yet\n");
65 |         run_kernel = 0;
66 |     }
67 | 
68 |     return run_kernel;
69 | } 
70 | #endif
71 | 
72 | #if defined GPU_NATIVE
73 | DEVICE 
74 | int getGlobalThreadID()
75 | {
76 |     int blockID  =  blockIdx.x + 
77 |                     blockIdx.y * gridDim.x + 
78 |                     blockIdx.z * gridDim.x * gridDim.y;
79 | 
80 |     int threadID =  blockID * (blockDim.x * blockDim.y * blockDim.z) +
81 |                     threadIdx.z * ( blockDim.x * blockDim.y ) + 
82 |                     threadIdx.y * blockDim.x +
83 |                     threadIdx.x;
84 |     return threadID;
85 | }
86 | #endif
87 | 


--------------------------------------------------------------------------------
/src/cudaFunctions.hh:
--------------------------------------------------------------------------------
 1 | #ifndef CUDAFUNCTIONS_HH
 2 | #define CUDAFUNCTIONS_HH
 3 | 
 4 | #include "cudaUtils.hh"
 5 | #include "DeclareMacro.hh"
 6 | 
 7 | #if defined GPU_NATIVE
 8 | void warmup_kernel();
 9 | int ThreadBlockLayout( dim3 &grid, dim3 &block, int num_particles );
10 | DEVICE 
11 | int getGlobalThreadID();
12 | #endif
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/src/cudaUtils.hh:
--------------------------------------------------------------------------------
 1 | #ifndef CUDAUTILS_HH
 2 | #define CUDAUTILS_HH
 3 | 
 4 | #if defined(HAVE_CUDA)
 5 | #include <cuda.h>
 6 | #include <cuda_runtime.h>
 7 | #include <cuda_runtime_api.h>
 8 | #endif
 9 | 
10 | enum ExecutionPolicy{ cpu, gpuNative, gpuWithOpenMP };
11 | 
12 | inline ExecutionPolicy getExecutionPolicy( int useGPU )
13 | {
14 |     ExecutionPolicy execPolicy = ExecutionPolicy::cpu;
15 | 
16 |     if( useGPU )
17 |     {
18 |         #if defined HAVE_CUDA || defined HAVE_HIP 
19 |         execPolicy = ExecutionPolicy::gpuNative;
20 |         #elif defined (HAVE_OPENMP_TARGET)
21 |         execPolicy = ExecutionPolicy::gpuWithOpenMP;
22 |         #endif
23 |     }
24 |     return execPolicy;
25 | }
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/gpuPortability.hh:
--------------------------------------------------------------------------------
 1 | #ifndef GPUPORTABILITY_HH
 2 | #define GPUPORTABILITY_HH
 3 | 
 4 | #if defined __CUDACC__ || defined TARGET_NVIDIA
 5 |     #define __DO_CUDA
 6 |     #define __PREFIX cuda
 7 |     #define HAVE_UVM
 8 |     #include <cuda.h>
 9 |     #include <cuda_runtime.h>
10 |     #include <cuda_runtime_api.h>
11 | #elif defined __HIPCC__ || defined TARGET_AMD
12 |     #define __DO_HIP
13 |     #define __PREFIX hip
14 |     #define HAVE_UVM
15 |     #define __HIP_PLATFORM_AMD__
16 |     #include <hip/hip_runtime.h>
17 | #else
18 |     #define __PREFIX invalid
19 | #endif
20 | 
21 | #if defined HAVE_CUDA || defined HAVE_HIP
22 |     #define GPU_NATIVE
23 | #endif
24 | 
25 | 
26 | #ifdef __DO_CUDA
27 | #endif
28 | 
29 | #ifdef __DO_HIP
30 | #endif
31 | 
32 | #if defined HAVE_UVM
33 |     #define VAR_MEM MemoryControl::AllocationPolicy::UVM_MEM
34 | #else
35 |     #define VAR_MEM MemoryControl::AllocationPolicy::HOST_MEM
36 | #endif  
37 | 
38 | #define CONCAT_(A, B) A ## B
39 | #define CONCAT(A1, B1) CONCAT_(A1, B1)
40 | 
41 | #define gpuMallocManaged      CONCAT(__PREFIX, MallocManaged)
42 | #define gpuFree               CONCAT(__PREFIX, Free)
43 | #define gpuDeviceSynchronize  CONCAT(__PREFIX, DeviceSynchronize)
44 | #define gpuGetDeviceCount     CONCAT(__PREFIX, GetDeviceCount)
45 | #define gpuSetDevice          CONCAT(__PREFIX, SetDevice)
46 | #define gpuPeekAtLastError    CONCAT(__PREFIX, PeekAtLastError)
47 | 
48 | 
49 | #undef __DO_CUDA
50 | #undef __DO_HIP
51 | 
52 | #endif // #ifndef GPUPORTABILITY_HH
53 | 


--------------------------------------------------------------------------------
/src/initMC.hh:
--------------------------------------------------------------------------------
 1 | #ifndef INIT_MC_HH
 2 | #define INIT_MC_HH
 3 | 
 4 | class Parameters;
 5 | class MonteCarlo;
 6 | 
 7 | MonteCarlo* initMC(const Parameters& params);
 8 | 
 9 | #endif
10 | 


--------------------------------------------------------------------------------
/src/macros.hh:
--------------------------------------------------------------------------------
 1 | #ifndef MACROS_HH
 2 | #define MACROS_HH
 3 | 
 4 | #include "qs_assert.hh"
 5 | #include <algorithm>
 6 | 
 7 | #define MC_CALLOC(A, N1, TYPE) if ( N1 ) { A = (TYPE*) calloc((N1), sizeof(TYPE)); } else { A = NULL; }
 8 | #define MC_MALLOC(A, N1, TYPE) if ( N1 ) { A = (TYPE*) malloc((N1)*sizeof(TYPE)); } else { A = NULL; }
 9 | #define MC_NEW_ARRAY(A,N1,TYPE)  if ( N1 ) { A = new TYPE[N1]; } else { A = NULL; }
10 | #define MC_REALLOC(a, b, c) {qs_assert(false); }
11 | #define MC_FREE(A)          if (A != NULL) { free(A) ; A = NULL ; }
12 | #define MC_DELETE(A)        if (A != NULL) { delete A ; A = NULL ; }
13 | #define MC_DELETE_ARRAY(A)  if (A != NULL) { delete [] A ; A = NULL ; }
14 | #define MC_MEMCPY(a, b, c)  {qs_assert(false); }
15 | #define MC_FABS(x) ( (x) < 0 ? -(x) : (x) )
16 | 
17 | 
18 | #define MC_Fatal_Jump(...) {qs_assert(false); }
19 | 
20 | //#define MC_MIN(a, b)       {std::min(a,b)}
21 | #define MC_MIN(a, b)       { ((a < b) ? a : b) } 
22 | 
23 | // If not compiled with OpenMP, define stub OpenMP
24 | // function that will work for the code.
25 | #ifdef HAVE_OPENMP
26 |     #include <omp.h>
27 | #else
28 |     #include <iostream>
29 |     #include <cstdlib>
30 |     #define omp_get_thread_num()   0
31 |     #define omp_get_max_threads()  1
32 |     #define omp_get_num_procs()    1
33 | #endif
34 | #else
35 | #endif
36 | 
37 | #if defined(HAVE_OPENMP) && defined(HAVE_DEBUG)
38 | #define MC_VERIFY_THREAD_ZERO MC_Verify_Thread_Zero(__FILE__, __LINE__);
39 | #else
40 | #define MC_VERIFY_THREAD_ZERO 
41 | #endif
42 | 
43 | #ifdef USE_PRINT_DEBUG
44 | #define PRINT_DEBUG printf("FILE: %s\tLINE: %d\n", __FILE__, __LINE__ )
45 | #else
46 | #define PRINT_DEBUG
47 | #endif
48 | 


--------------------------------------------------------------------------------
/src/mc_omp_critical.hh:
--------------------------------------------------------------------------------
1 | #if defined(HAVE_OPENMP)
2 | #pragma omp critical
3 | #endif
4 | 


--------------------------------------------------------------------------------
/src/mc_omp_parallel_for_schedule_static.hh:
--------------------------------------------------------------------------------
1 | #if defined(HAVE_OPENMP)
2 |     if ( (mcco->processor_info->rank == 0)  && (mcco->_params.simulationParams.debugThreads >= 2))
3 |        { printf("OpenMP Looping over %d threads\n",omp_get_max_threads()); }
4 |     #pragma omp parallel for schedule (static)
5 | #endif
6 | 


--------------------------------------------------------------------------------
/src/mc_omp_parallel_for_schedule_static_if.hh:
--------------------------------------------------------------------------------
1 | #if defined(HAVE_OPENMP)
2 |     #pragma omp parallel for schedule (static) MC_OMP_PARALLEL_FOR_IF_CONDITION
3 | #endif
4 | 
5 | 


--------------------------------------------------------------------------------
/src/mc_omp_parallel_for_schedule_static_num_physical_cores.hh:
--------------------------------------------------------------------------------
1 | #if defined(HAVE_OPENMP)
2 |     int num_physical_cores = mc_get_num_physical_procs();
3 |     if ((mcco->processor_info->rank == 0)  && (mcco->_params.simulationParams.debugThreads >= 2))
4 |        { printf("OpenMP Looping over %d cores\n",num_physical_cores); }
5 |     #pragma omp parallel for schedule (static) num_threads(num_physical_cores)
6 | #endif
7 | 


--------------------------------------------------------------------------------
/src/memUtils.hh:
--------------------------------------------------------------------------------
 1 | /// \file
 2 | /// Wrappers for memory allocation.
 3 | 
 4 | #ifndef MEMUTILS_HH
 5 | #define MEMUTILS_HH
 6 | 
 7 | #include <cstdlib>
 8 | 
 9 | static void* qsMalloc(size_t iSize)
10 | {
11 |    return std::malloc(iSize);
12 | }
13 | 
14 | static void* qsCalloc(size_t num, size_t iSize)
15 | {
16 |    return std::calloc(num, iSize);
17 | }
18 | 
19 | static void* qsRealloc(void* ptr, size_t iSize)
20 | {
21 |    return std::realloc(ptr, iSize);
22 | }
23 | 
24 | static void qsFree(void* ptr)
25 | {
26 |    std::free(ptr);
27 | }
28 | #endif
29 | 


--------------------------------------------------------------------------------
/src/mpi_stubs_internal.hh:
--------------------------------------------------------------------------------
  1 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
  2 | //
  3 | //                                      Copyright (c) 2012
  4 | //                           Lawrence Livermore National Security, LLC
  5 | //                                      All Rights Reserved
  6 | //
  7 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
  8 | 
  9 | #ifndef MPI_STUBS_INTERNAL_H
 10 | #define MPI_STUBS_INTERNAL_H
 11 | 
 12 | #include "mpi_stubs.hh"
 13 | 
 14 | //----------------------------------------------------------------------------------------------------------------------
 15 | // MPI stubs structures to implement mpi calls
 16 | //----------------------------------------------------------------------------------------------------------------------
 17 | 
 18 | typedef struct _List     *pList;            // forward declaration for prototypes.
 19 | typedef struct _Listitem *pListitem;
 20 | 
 21 | typedef uint64_t MPI_Aint;
 22 | 
 23 | typedef struct _List
 24 | {
 25 |   pListitem head;
 26 |   pListitem tail;
 27 |   int count;
 28 | } List;
 29 | 
 30 | 
 31 | typedef struct _Listitem
 32 | {
 33 |   void *data;
 34 |   pListitem prev;
 35 |   pListitem next;
 36 | 
 37 | #ifdef MPI_STUBS_DEBUG_DATA
 38 |   pList list;
 39 | #endif
 40 | 
 41 | } Listitem;
 42 | 
 43 | typedef struct
 44 | {
 45 |   pList sendlist;
 46 |   pList recvlist;
 47 | 
 48 |   int num;
 49 |   char *name;
 50 | 
 51 | } Comm;
 52 | 
 53 | typedef struct
 54 | {
 55 |   pListitem listitem;        // to allow Req to be removed from list
 56 | 
 57 |   int *buf;
 58 |   int tag;
 59 |   int complete;
 60 | 
 61 | } Req;
 62 | 
 63 | 
 64 | typedef struct _Handleitem
 65 | {
 66 |   int handle;
 67 |   struct _Handleitem *next;
 68 | 
 69 |   union
 70 |   {
 71 |     void *anything;           // At least size of void *
 72 |     Comm comm;
 73 |     Req req;
 74 | 
 75 |   } data;
 76 | 
 77 | 
 78 | } Handleitem;
 79 | 
 80 | typedef struct MPI_Stubs_Data_struct {
 81 | 
 82 |     MPI_Errhandler  errhandler;
 83 |     int headcount;
 84 |     int itemcount;
 85 |     int initialized;
 86 | 
 87 |     // 
 88 |     // The first block of handle items will be statically allocated.
 89 |     // Subsequent ones will be added if necessary.
 90 |     // blocks[0..nblocks-1] are allocated at any given time.
 91 |     //
 92 |     // Increase MPI_STUBS_MAX_BLOCKS if you *really* need more active request
 93 |     // (Although probably something is wrong if you need more than 256k !!!)
 94 |     //
 95 |     Handleitem block0[MPI_STUBS_BLOCK_ITEMS];
 96 |     Handleitem *(blocks[MPI_STUBS_MAX_BLOCKS]);
 97 |     int nblocks;
 98 | 
 99 |     int need_to_init;
100 |     Handleitem *nextfree;
101 | 
102 |     MPI_Stubs_Data_struct()
103 |     {
104 |         this->errhandler   = MPI_ERRORS_ARE_FATAL;
105 |         this->headcount    = 0;
106 |         this->itemcount    = 0;
107 |         this->initialized  = 0;
108 |         this->nblocks      = 0;
109 |         this->need_to_init = 1;
110 |         this->nextfree     = NULL;
111 |         for (int index=0; index<MPI_STUBS_MAX_BLOCKS; index++) { this->blocks[index] = NULL; }
112 |     }
113 | 
114 |     ~MPI_Stubs_Data_struct() {};   
115 | 
116 | } MPI_Stubs_Data_type;
117 | 
118 | 
119 | #endif      // ifndef MPI_STUBS_INTERNAL_H
120 | 


--------------------------------------------------------------------------------
/src/parseUtils.cc:
--------------------------------------------------------------------------------
  1 | #include "parseUtils.hh"
  2 | #include <utility>
  3 | #include "InputBlock.hh"
  4 | 
  5 | using std::string;
  6 | using std::istream;
  7 | 
  8 | namespace
  9 | {
 10 |    string whitespace(" \t\f\v\n\r");
 11 |    bool isComment(std::string line);
 12 |    bool split(string line, string& keyword, string& value, int& indent);
 13 |    bool validKeyword(const string& word);
 14 |    void chop(string& line);
 15 | }
 16 | 
 17 | bool blockStart(const string& line, string& blockName)
 18 | {
 19 |    string keyword;
 20 |    string value;
 21 |    int indent;
 22 |    bool valid = split(line, keyword, value, indent);
 23 |    if (valid && indent == 0 && value.size() == 0)
 24 |    {
 25 |       blockName = keyword;
 26 |       return true;
 27 |    }
 28 |    return false;
 29 | }
 30 | 
 31 | string readBlock(InputBlock& block, istream& in)
 32 | {
 33 |    string line;
 34 |    while (!in.eof())
 35 |    {
 36 |       getline(in, line);
 37 |       if (isComment(line))
 38 |          continue;
 39 |       string keyword;
 40 |       string value;
 41 |       int indent;
 42 |       bool valid = split(line, keyword, value, indent);
 43 |       if (!valid || indent == 0)
 44 |          break;
 45 |       block.addPair(keyword, value);
 46 |    }
 47 | 
 48 |    return line;
 49 | }
 50 | 
 51 | namespace
 52 | {
 53 |    /// Returns true if line contains nothing but whitespace and
 54 |    /// comments. False otherwise.
 55 |    bool isComment(string line)
 56 |    {
 57 |       size_t here = line.find("//");
 58 |       if (here != string::npos)
 59 |          line.erase(here, string::npos);
 60 |       return (line.find_last_not_of(whitespace) == string::npos);
 61 |    }
 62 | }
 63 | 
 64 | namespace
 65 | {
 66 |    bool split(string line, string& keyword, string& value, int& indent)
 67 |    {
 68 |       indent = 0;
 69 |       while (indent < line.size() && isspace(line[indent]))
 70 |          ++indent;
 71 | 
 72 |       size_t delimPos = line.find_first_of(":=", indent);
 73 |       if (delimPos == string::npos)
 74 |          return false;
 75 |       keyword = line.substr(indent, delimPos-indent);
 76 |       chop(keyword);
 77 |       if (! validKeyword(keyword))
 78 |          return false;
 79 |       value.clear();
 80 |       if (delimPos + 1 < line.size())
 81 |       {
 82 |          value = line.substr(delimPos+1, string::npos);
 83 |          chop(value);
 84 |       }
 85 |       return true;
 86 |    }
 87 | }
 88 | 
 89 | namespace
 90 | {
 91 |    bool validKeyword(const string& word)
 92 |    {
 93 |       return true;
 94 |    }
 95 | }
 96 | 
 97 | namespace
 98 | {
 99 |    void chop(string& line)
100 |    {
101 |       size_t here = line.size();
102 |       while (here > 0 && isspace(line[here-1]))
103 |          --here;
104 |       if (here < line.size())
105 |           line.erase(here, string::npos);
106 |       size_t nSpace = 0;
107 |       while (nSpace < line.size() && isspace(line[nSpace]))
108 |          ++nSpace;
109 |       line.erase(0, nSpace);
110 |    }
111 | }
112 | 


--------------------------------------------------------------------------------
/src/parseUtils.hh:
--------------------------------------------------------------------------------
 1 | #ifndef PARSE_UTILS_HH
 2 | #define PARSE_UTILS_HH
 3 | 
 4 | #include <string>
 5 | #include <iostream>
 6 | 
 7 | class InputBlock;
 8 | 
 9 | bool blockStart(const std::string& line, std::string& blockName);
10 | std::string readBlock(InputBlock& block, std::istream& in);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/portability.hh:
--------------------------------------------------------------------------------
 1 | #ifndef PORTABILITY_HH
 2 | #define PORTABILITY_HH
 3 | 
 4 | #ifdef CSTDINT_MISSING
 5 | #include <stdint.h>
 6 | #else
 7 | #include <cstdint>
 8 | #endif
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/src/qs_assert.hh:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | 
 3 | #if defined HAVE_HIP
 4 | #define __HIP_PLATFORM_AMD__
 5 | #include <hip/hip_runtime_api.h>
 6 | #include <hip/hip_runtime.h>
 7 | #endif
 8 | 
 9 | #if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__
10 | #define qs_assert( cond) \
11 |    do \
12 |    { \
13 |       if (!(cond)) \
14 |       { \
15 |         printf("ERROR\n"); \
16 |       } \
17 |    } while(0)
18 | #else
19 | #define qs_assert( cond)                        \
20 |    do \
21 |    { \
22 |       if (!(cond)) \
23 |       { \
24 |         printf("file=%s: line=%d ERROR\n",__FILE__,__LINE__); \
25 |       } \
26 |    } while(0)
27 | #endif
28 | 


--------------------------------------------------------------------------------
/src/utils.cc:
--------------------------------------------------------------------------------
  1 | #include "utils.hh"
  2 | #include <cstdio>
  3 | #include "qs_assert.hh"
  4 | #include "utilsMpi.hh"
  5 | #include "macros.hh"
  6 | #include <vector>
  7 | #include <stdarg.h>
  8 | #include <string.h>
  9 | #include "MonteCarlo.hh"
 10 | #include "Globals.hh"
 11 | #include "MC_Processor_Info.hh"
 12 | 
 13 | 
 14 | // Returns the number of physical cores.  Relies on the env var
 15 | // KMP_PLACE_THREADS being set to someting like 60c2t.
 16 | // Otherwise, returns omp_get_num_procs()
 17 | int mc_get_num_physical_procs(void)
 18 | {
 19 |    int num_physical_cores = omp_get_num_procs();
 20 |    #if defined(HAVE_OPENMP) && defined(HAVE_KNL)
 21 |    int num_threads_per_core = 0;
 22 |    char *env_str = getenv("KMP_PLACE_THREADS");
 23 |    if (env_str)
 24 |    {
 25 |       char *ptr = strchr(env_str, (int)'t');
 26 |       if (ptr)
 27 |       {
 28 |          int num_threads_per_core = 1;
 29 |          ptr--;
 30 |          while ((ptr > env_str) && isdigit(*ptr) )
 31 |          { num_threads_per_core = atoi(ptr); ptr--; }
 32 |          if (num_threads_per_core > 0) 
 33 |          { num_physical_cores = omp_get_num_procs() / num_threads_per_core; }
 34 |       }
 35 |    }
 36 |    #endif
 37 |    return num_physical_cores;
 38 | }
 39 | 
 40 | 
 41 | void MC_Verify_Thread_Zero(char const * const file, int line)
 42 | {
 43 | #ifdef HAVE_OPENMP
 44 |     int thread_id = omp_get_thread_num();
 45 |     if (thread_id != 0)
 46 |     {
 47 |         int mpi_rank = -1;
 48 |         mpiComm_rank(mcco->processor_info->comm_mc_world, &mpi_rank);
 49 |         fprintf(stderr,"Fatal Error: %s:%d MPI Routine called by thread other than zero."
 50 |                        "\n\tMPI Process %d, Thread %d", file, line, mpi_rank, thread_id);
 51 |         mpiAbort(MPI_COMM_WORLD, -1); abort();
 52 |     }
 53 | #endif
 54 |     return;
 55 | }
 56 | 
 57 | void printBanner(const char *git_version, const char *git_hash)
 58 | {
 59 |     int rank = -1, size=-1, mpi_major=0, mpi_minor=0;
 60 |     mpiComm_rank(MPI_COMM_WORLD, &rank);
 61 |     mpiComm_size(MPI_COMM_WORLD, &size);
 62 |     mpiGet_version(&mpi_major, &mpi_minor);
 63 | 
 64 |     if (rank == 0)
 65 |     {
 66 |         printf("Copyright (c) 2016\n");
 67 |         printf("Lawrence Livermore National Security, LLC\n");
 68 |         printf("All Rights Reserved\n");
 69 | 
 70 |         printf("Quicksilver Version     : %s\n",git_version);
 71 |         printf("Quicksilver Git Hash    : %s\n",git_hash);
 72 |         printf("MPI Version             : %d.%d\n",mpi_major,mpi_minor);
 73 |         printf("Number of MPI ranks     : %d\n",size);
 74 |         printf("Number of OpenMP Threads: %d\n",(int)omp_get_max_threads());
 75 |         printf("Number of OpenMP CPUs   : %d\n\n",(int)omp_get_num_procs());
 76 |     }
 77 | }
 78 | 
 79 | void Print0(const char *format, ...)
 80 | {
 81 |     int rank = -1;
 82 |     mpiComm_rank(MPI_COMM_WORLD, &rank);
 83 | 
 84 | #if 0
 85 |     printf("rank %i: ", rank);
 86 | #else
 87 |     if ( rank != 0 ) { return; }
 88 | #endif
 89 | 
 90 |     va_list args;
 91 |     va_start( args, format );
 92 |     vprintf(format, args);
 93 |     va_end( args );
 94 | }
 95 | 
 96 | //----------------------------------------------------------------------------------------------------------------------
 97 | // Converts a format string into a c++ string. Parameters are the same as printf.
 98 | //----------------------------------------------------------------------------------------------------------------------
 99 | std::string MC_String(const char fmt[], ...)
100 | {
101 |     va_list args;
102 |     va_start(args, fmt);
103 |     int chars_needed = vsnprintf(NULL, 0, fmt, args);
104 |     va_end(args);
105 | 
106 |     if (chars_needed < 0)
107 |     {
108 |         MC_Fatal_Jump( "Output error from vsnprintf: %d", chars_needed );
109 |     }
110 | 
111 |     // Increase one for the null terminator.
112 |     chars_needed++;
113 | 
114 |     // Bump up chars_needed (if necessary) so that we allocate according to our byte alignment.
115 |     // This is currently 16 bytes, so allocated 16, 32 48, etc. bytes at a time.
116 | #define MC_BYTE_ALIGNMENT 16
117 | 
118 |     int remainder = chars_needed % MC_BYTE_ALIGNMENT;
119 |     chars_needed += remainder > 0 ? MC_BYTE_ALIGNMENT - remainder: 0;
120 | 
121 |     std::vector<char> buffer(chars_needed);
122 |     va_start(args, fmt);
123 |     vsnprintf(&buffer[0], chars_needed, fmt, args);
124 |     va_end(args);
125 | 
126 |     return std::string(&buffer[0]);
127 | }
128 | 
129 | 


--------------------------------------------------------------------------------
/src/utils.hh:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_HH
 2 | #define UTILS_HH
 3 | 
 4 | #include <string>
 5 | 
 6 | int mc_get_num_physical_procs(void);
 7 | 
 8 | void MC_Verify_Thread_Zero(char const * const file, int line);
 9 | 
10 | void printBanner(const char *git_version, const char *git_hash);
11 | 
12 | #define MC_Warning printf
13 | 
14 | void Print0(const char *format, ...);
15 | 
16 | std::string MC_String(const char fmt[], ...);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------