├── .gitignore ├── Examples ├── AllAbsorb │ └── allAbsorb.inp ├── AllEscape │ └── allEscape.inp ├── AllScattering │ └── scatteringOnly.inp ├── CORAL2_Benchmark │ ├── Problem1 │ │ ├── 00_README.TXT │ │ ├── Coral2_P1.inp │ │ ├── Coral2_P1_1.inp │ │ ├── Coral2_P1_4096.inp │ │ ├── P1_04t.sh │ │ ├── P1_16t.sh │ │ └── P1_64t.sh │ └── Problem2 │ │ ├── 00_README.TXT │ │ ├── Coral2_P2.inp │ │ ├── Coral2_P2_1.inp │ │ ├── Coral2_P2_4096.inp │ │ └── P2_64t.sh ├── CTS2_Benchmark │ ├── 00_README.TXT │ ├── CTS2.inp │ ├── CTS2_1.inp │ ├── CTS2_36.inp │ └── CTS2_scaling.sh ├── Homogeneous │ ├── homogeneousProblem.inp │ ├── homogeneousProblem_v3.inp │ ├── homogeneousProblem_v3_wq.inp │ ├── homogeneousProblem_v4_tm.inp │ ├── homogeneousProblem_v4_ts.inp │ ├── homogeneousProblem_v5_ts.inp │ ├── homogeneousProblem_v7_ts.inp │ ├── quicksilver_aprun_trinity_01.sh │ ├── quicksilver_aprun_trinity_02.sh │ ├── quicksilver_aprun_trinity_04.sh │ ├── quicksilver_slurm_rzalast_01.sh │ ├── quicksilver_slurm_rzgenie_01.sh │ └── run.homogeneousProblem_v4.rzoz7.x ├── NoCollisions │ └── no.collisions.inp ├── NoFission │ └── noFission.inp └── NonFlatXC │ └── NonFlatXC.inp ├── LICENSE.md ├── README.md └── src ├── BulkStorage.hh ├── CollisionEvent.cc ├── CollisionEvent.hh ├── CommObject.hh ├── CoralBenchmark.cc ├── CoralBenchmark.hh ├── CycleTracking.cc ├── CycleTracking.hh ├── DeclareMacro.hh ├── DecompositionObject.cc ├── DecompositionObject.hh ├── DirectionCosine.cc ├── DirectionCosine.hh ├── Doxyfile ├── EnergySpectrum.cc ├── EnergySpectrum.hh ├── FacetPair.hh ├── GlobalFccGrid.cc ├── GlobalFccGrid.hh ├── Globals.hh ├── GridAssignmentObject.cc ├── GridAssignmentObject.hh ├── IndexToTuple.hh ├── IndexToTuple4.hh ├── InputBlock.cc ├── InputBlock.hh ├── Long64.hh ├── MCT.cc ├── MCT.hh ├── MC_Adjacent_Facet.cc ├── MC_Base_Particle.cc ├── MC_Base_Particle.hh ├── MC_Cell_State.hh ├── MC_Distance_To_Facet.hh ├── MC_Domain.cc ├── MC_Domain.hh ├── MC_Facet_Adjacency.hh ├── MC_Facet_Crossing_Event.cc ├── MC_Facet_Crossing_Event.hh ├── MC_Facet_Geometry.hh ├── MC_Fast_Timer.cc ├── MC_Fast_Timer.hh ├── MC_Load_Particle.cc ├── MC_Location.cc ├── MC_Location.hh ├── MC_Nearest_Facet.hh ├── MC_Particle.hh ├── MC_Particle_Buffer.cc ├── MC_Particle_Buffer.hh ├── MC_Processor_Info.hh ├── MC_RNG_State.cc ├── MC_RNG_State.hh ├── MC_Segment_Outcome.cc ├── MC_Segment_Outcome.hh ├── MC_SourceNow.cc ├── MC_SourceNow.hh ├── MC_Time_Info.hh ├── MC_Vector.hh ├── MacroscopicCrossSection.cc ├── MacroscopicCrossSection.hh ├── Makefile ├── MaterialDatabase.hh ├── MemoryControl.hh ├── MeshPartition.cc ├── MeshPartition.hh ├── MonteCarlo.cc ├── MonteCarlo.hh ├── MpiCommObject.cc ├── MpiCommObject.hh ├── NVTX_Range.hh ├── NuclearData.cc ├── NuclearData.hh ├── Parameters.cc ├── Parameters.hh ├── ParticleVault.cc ├── ParticleVault.hh ├── ParticleVaultContainer.cc ├── ParticleVaultContainer.hh ├── PhysicalConstants.cc ├── PhysicalConstants.hh ├── PopulationControl.cc ├── PopulationControl.hh ├── QS_Vector.hh ├── QS_atomics.hh ├── READ.ME.HOW.TO.RUN ├── SendQueue.cc ├── SendQueue.hh ├── SharedMemoryCommObject.cc ├── SharedMemoryCommObject.hh ├── Tallies.cc ├── Tallies.hh ├── Tuple.hh ├── Tuple4.hh ├── Tuple4ToIndex.hh ├── TupleToIndex.hh ├── cmdLineParser.cc ├── cmdLineParser.hh ├── cudaFunctions.cc ├── cudaFunctions.hh ├── cudaUtils.hh ├── gpuPortability.hh ├── initMC.cc ├── initMC.hh ├── macros.hh ├── main.cc ├── mc_omp_critical.hh ├── mc_omp_parallel_for_schedule_static.hh ├── mc_omp_parallel_for_schedule_static_if.hh ├── mc_omp_parallel_for_schedule_static_num_physical_cores.hh ├── memUtils.hh ├── mpi_stubs.hh ├── mpi_stubs_internal.hh ├── parseUtils.cc ├── parseUtils.hh ├── portability.hh ├── qs_assert.hh ├── utils.cc ├── utils.hh ├── utilsMpi.cc └── utilsMpi.hh /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # Project-specific 35 | .depend 36 | .depend.bak 37 | git_hash.hh 38 | git_vers.hh 39 | qs 40 | -------------------------------------------------------------------------------- /Examples/AllAbsorb/allAbsorb.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e6 3 | fMax: 0.1 4 | inputFile: 5 | loadBalance: 1 6 | lx: 10 7 | ly: 10 8 | lz: 10 9 | nParticles: 9999 10 | nSteps: 20 11 | nx: 10 12 | ny: 10 13 | nz: 10 14 | seed: 1029384761 15 | xDom: 0 16 | yDom: 0 17 | zDom: 0 18 | eMax: 1 19 | eMin: 0.99999 20 | nGroups: 1 21 | 22 | Geometry: 23 | material: boxMaterial 24 | shape: brick 25 | xMax: 10 26 | xMin: 0 27 | yMax: 10 28 | yMin: 0 29 | zMax: 10 30 | zMin: 0 31 | 32 | Geometry: 33 | material: sourceMaterial 34 | shape: brick 35 | xMax: 1 36 | xMin: 0 37 | yMax: 1 38 | yMin: 0 39 | zMax: 1 40 | zMin: 0 41 | 42 | Material: 43 | name: boxMaterial 44 | nIsotopes: 10 45 | nReactions: 9 46 | sourceRate: 0 47 | totalCrossSection: 1e10 48 | absorptionCrossSection: flat 49 | fissionCrossSection: flat 50 | scatteringCrossSection: flat 51 | absorptionCrossSectionRatio: 1 52 | fissionCrossSectionRatio: 0 53 | scatteringCrossSectionRatio: 0 54 | 55 | Material: 56 | name: sourceMaterial 57 | nIsotopes: 10 58 | nReactions: 9 59 | sourceRate: 1e-2 60 | totalCrossSection: 1e10 61 | absorptionCrossSection: flat 62 | fissionCrossSection: flat 63 | scatteringCrossSection: flat 64 | absorptionCrossSectionRatio: 1 65 | fissionCrossSectionRatio: 0 66 | scatteringCrossSectionRatio: 0 67 | 68 | CrossSection: 69 | name: flat 70 | A: 0 71 | B: 0 72 | C: 0 73 | D: 0 74 | E: 1 75 | nuBar: 2.4 76 | 77 | 78 | -------------------------------------------------------------------------------- /Examples/AllEscape/allEscape.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e6 3 | boundaryCondition: escape 4 | fMax: 0.1 5 | inputFile: 6 | loadBalance: 1 7 | lx: 10 8 | ly: 10 9 | lz: 10 10 | nParticles: 9999 11 | nSteps: 20 12 | nx: 10 13 | ny: 10 14 | nz: 10 15 | seed: 1029384761 16 | xDom: 0 17 | yDom: 0 18 | zDom: 0 19 | eMax: 1 20 | eMin: 0.99999 21 | nGroups: 1 22 | 23 | Geometry: 24 | material: boxMaterial 25 | shape: brick 26 | xMax: 10 27 | xMin: 0 28 | yMax: 10 29 | yMin: 0 30 | zMax: 10 31 | zMin: 0 32 | 33 | Geometry: 34 | material: sourceMaterial 35 | shape: brick 36 | xMax: 1 37 | xMin: 0 38 | yMax: 1 39 | yMin: 0 40 | zMax: 1 41 | zMin: 0 42 | 43 | Material: 44 | name: boxMaterial 45 | nIsotopes: 10 46 | nReactions: 9 47 | sourceRate: 0 48 | totalCrossSection: 1e-20 49 | absorptionCrossSection: flat 50 | fissionCrossSection: flat 51 | scatteringCrossSection: flat 52 | absorptionCrossSectionRatio: 0 53 | fissionCrossSectionRatio: 0 54 | scatteringCrossSectionRatio: 1 55 | 56 | Material: 57 | name: sourceMaterial 58 | nIsotopes: 10 59 | nReactions: 9 60 | sourceRate: 1e-2 61 | totalCrossSection: 1e-20 62 | absorptionCrossSection: flat 63 | fissionCrossSection: flat 64 | scatteringCrossSection: flat 65 | absorptionCrossSectionRatio: 0 66 | fissionCrossSectionRatio: 0 67 | scatteringCrossSectionRatio: 1 68 | 69 | CrossSection: 70 | name: flat 71 | A: 0 72 | B: 0 73 | C: 0 74 | D: 0 75 | E: 1 76 | nuBar: 2.4 77 | 78 | 79 | -------------------------------------------------------------------------------- /Examples/AllScattering/scatteringOnly.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: streamingProblem.inp 5 | boundaryCondition: octant 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | lx: 100 10 | ly: 100 11 | lz: 100 12 | nParticles: 10000000 13 | nSteps: 10 14 | nx: 10 15 | ny: 10 16 | nz: 10 17 | seed: 1029384756 18 | xDom: 0 19 | yDom: 0 20 | zDom: 0 21 | eMax: 20 22 | eMin: 1e-9 23 | nGroups: 230 24 | 25 | Geometry: 26 | material: sourceMaterial 27 | shape: brick 28 | xMax: 100 29 | xMin: 0 30 | yMax: 100 31 | yMin: 0 32 | zMax: 100 33 | zMin: 0 34 | 35 | Material: 36 | name: sourceMaterial 37 | nIsotopes: 10 38 | nReactions: 9 39 | sourceRate: 1e+10 40 | totalCrossSection: 0.1 41 | absorptionCrossSection: flat 42 | fissionCrossSection: flat 43 | scatteringCrossSection: flat 44 | absorptionCrossSectionRatio: 0 45 | fissionCrossSectionRatio: 0 46 | scatteringCrossSectionRatio: 1 47 | 48 | CrossSection: 49 | name: flat 50 | A: 0 51 | B: 0 52 | C: 0 53 | D: 0 54 | E: 1 55 | nuBar: 2.4 56 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/00_README.TXT: -------------------------------------------------------------------------------- 1 | This is Quicksilver Problem #1 for the CORAL2 Benchmark. 2 | 3 | This problem is required. 4 | 5 | The input files in this directory are configured to support a scaling 6 | study and collection of the Figure of Merit on Vulcan (BG/Q) at LLNL. 7 | 8 | The essential physics of the problem are defined in the input file 9 | Coral2_P1.inp. The parameters in this file should not be changed. 10 | Parameters to set the size of the problem (number of particles, number 11 | of mesh elements, size of domain, and MPI decomposition), can all be 12 | specified on the command line (for example see P1_64t.sh). Alternately, 13 | you can copy Coral2_P1.inp to a new file and add the necessary 14 | parameters (see Coral2_P1_1.inp). 15 | 16 | Note that parameters in the input deck overrride corresponding command 17 | line arguments. 18 | 19 | For the scaling study here, we have chosen 4096 mesh elements per node. 20 | This allows a uniform spatial decomopostion of mesh elements for both 1 21 | rank per node and 64 ranks per node (and any power of two in between). 22 | We also choose 40 particles per mesh element. This is divisible by 10 23 | (so we get an integer number of particles sourced in) and gives a 24 | reasonable cyle time of 2-4 seconds. 25 | 26 | 27 | MANIFEST: 28 | 29 | 00_README.TXT This file 30 | Coral2_P1.inp Input without problem size specification. 31 | Useful to build scaling study with commmand line 32 | arguments. 33 | Coral2_P1_1.inp Input file for a single MPI rank 34 | Coral2_P1_4096.inp Input file for 4096 MPI ranks 35 | P1_04t.sh Example scaling study for BG/Q with 4 threads per rank 36 | P1_16t.sh Example scaling study for BG/Q with 16 threads per rank 37 | P1_64t.sh Example scaling study for BG/Q with 64 threads per rank 38 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 2e-09 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | mpiThreadMultiple: 0 9 | nSteps: 100 10 | seed: 1029384756 11 | eMax: 20 12 | eMin: 1e-09 13 | nGroups: 230 14 | lowWeightCutoff: 0.001 15 | coralBenchmark: 1 16 | 17 | Geometry: 18 | material: sourceMaterial 19 | shape: brick 20 | xMax: 10000 21 | xMin: 0 22 | yMax: 10000 23 | yMin: 0 24 | zMax: 10000 25 | zMin: 0 26 | 27 | Material: 28 | name: sourceMaterial 29 | mass: 12.011 30 | nIsotopes: 20 31 | nReactions: 9 32 | sourceRate: 1e+10 33 | totalCrossSection: 1.5 34 | absorptionCrossSection: flat 35 | fissionCrossSection: flat 36 | scatteringCrossSection: flat 37 | absorptionCrossSectionRatio: 0.04 38 | fissionCrossSectionRatio: 0.05 39 | scatteringCrossSectionRatio: 1 40 | 41 | CrossSection: 42 | name: flat 43 | A: 0 44 | B: 0 45 | C: 0 46 | D: 0 47 | E: 1 48 | nuBar: 1.6 49 | 50 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 2e-09 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | mpiThreadMultiple: 0 9 | lx: 16 10 | ly: 16 11 | lz: 16 12 | nParticles: 163840 13 | nSteps: 100 14 | nx: 16 15 | ny: 16 16 | nz: 16 17 | xDom: 1 18 | yDom: 1 19 | zDom: 1 20 | seed: 1029384756 21 | eMax: 20 22 | eMin: 1e-09 23 | nGroups: 230 24 | lowWeightCutoff: 0.001 25 | coralBenchmark: 1 26 | 27 | Geometry: 28 | material: sourceMaterial 29 | shape: brick 30 | xMax: 10000 31 | xMin: 0 32 | yMax: 10000 33 | yMin: 0 34 | zMax: 10000 35 | zMin: 0 36 | 37 | Material: 38 | name: sourceMaterial 39 | mass: 12.011 40 | nIsotopes: 20 41 | nReactions: 9 42 | sourceRate: 1e+10 43 | totalCrossSection: 1.5 44 | absorptionCrossSection: flat 45 | fissionCrossSection: flat 46 | scatteringCrossSection: flat 47 | absorptionCrossSectionRatio: 0.04 48 | fissionCrossSectionRatio: 0.05 49 | scatteringCrossSectionRatio: 1 50 | 51 | CrossSection: 52 | name: flat 53 | A: 0 54 | B: 0 55 | C: 0 56 | D: 0 57 | E: 1 58 | nuBar: 1.6 59 | 60 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 2e-09 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | mpiThreadMultiple: 0 9 | lx: 256 10 | ly: 256 11 | lz: 256 12 | nParticles: 671088640 13 | nSteps: 100 14 | nx: 256 15 | ny: 256 16 | nz: 256 17 | xDom: 16 18 | yDom: 16 19 | zDom: 16 20 | seed: 1029384756 21 | eMax: 20 22 | eMin: 1e-09 23 | nGroups: 230 24 | lowWeightCutoff: 0.001 25 | coralBenchmark: 1 26 | 27 | Geometry: 28 | material: sourceMaterial 29 | shape: brick 30 | xMax: 10000 31 | xMin: 0 32 | yMax: 10000 33 | yMin: 0 34 | zMax: 10000 35 | zMin: 0 36 | 37 | Material: 38 | name: sourceMaterial 39 | mass: 12.011 40 | nIsotopes: 20 41 | nReactions: 9 42 | sourceRate: 1e+10 43 | totalCrossSection: 1.5 44 | absorptionCrossSection: flat 45 | fissionCrossSection: flat 46 | scatteringCrossSection: flat 47 | absorptionCrossSectionRatio: 0.04 48 | fissionCrossSectionRatio: 0.05 49 | scatteringCrossSectionRatio: 1 50 | 51 | CrossSection: 52 | name: flat 53 | A: 0 54 | B: 0 55 | C: 0 56 | D: 0 57 | E: 1 58 | nuBar: 1.6 59 | 60 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/P1_04t.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Problem 1: 4 | 5 | # 16 ranks per node 6 | # 4 threads per rank 7 | # 4096 mesh elements per node 8 | # 40 particles per mesh element -> 163840 particles per node 9 | 10 | export OMP_NUM_THREADS=4 11 | 12 | QS=../../../src/qs 13 | 14 | srun -N24576 -n393216 $QS -i Coral2_P1.inp -X 768 -Y 512 -Z 256 -x 768 -y 512 -z 256 -I 96 -J 64 -K 64 -n 4026531840 > p1n24576t04 15 | srun -N16384 -n262144 $QS -i Coral2_P1.inp -X 512 -Y 512 -Z 256 -x 512 -y 512 -z 256 -I 64 -J 64 -K 64 -n 2684354560 > p1n16384t04 16 | srun -N8192 -n131072 $QS -i Coral2_P1.inp -X 512 -Y 256 -Z 256 -x 512 -y 256 -z 256 -I 64 -J 64 -K 32 -n 1342117280 > p1n08192t04 17 | srun -N4096 -n65536 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 256 -x 256 -y 256 -z 256 -I 64 -J 32 -K 32 -n 671088640 > p1n04092t04 18 | srun -N2048 -n32768 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 128 -x 256 -y 256 -z 128 -I 32 -J 32 -K 32 -n 335544320 > p1n02048t04 19 | srun -N1024 -n16384 $QS -i Coral2_P1.inp -X 256 -Y 128 -Z 128 -x 256 -y 128 -z 128 -I 32 -J 32 -K 16 -n 167772160 > p1n01024t04 20 | srun -N512 -n8192 $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 128 -x 128 -y 128 -z 128 -I 32 -J 16 -K 16 -n 83886080 > p1n00512t04 21 | srun -N256 -n4096 $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 64 -x 128 -y 128 -z 64 -I 16 -J 16 -K 16 -n 41943040 > p1n00256t04 22 | srun -N128 -n2048 $QS -i Coral2_P1.inp -X 128 -Y 64 -Z 64 -x 128 -y 64 -z 64 -I 16 -J 16 -K 8 -n 20971520 > p1n00128t04 23 | srun -N64 -n1024 $QS -i Coral2_P1.inp -X 64 -Y 64 -Z 64 -x 64 -y 64 -z 64 -I 16 -J 8 -K 8 -n 10485760 > p1n00064t04 24 | srun -N32 -n512 $QS -i Coral2_P1.inp -X 64 -Y 64 -Z 32 -x 64 -y 64 -z 32 -I 8 -J 8 -K 8 -n 5242880 > p1n00032t04 25 | srun -N16 -n256 $QS -i Coral2_P1.inp -X 64 -Y 32 -Z 32 -x 64 -y 32 -z 32 -I 8 -J 8 -K 4 -n 2621440 > p1n00016t04 26 | srun -N8 -n128 $QS -i Coral2_P1.inp -X 32 -Y 32 -Z 32 -x 32 -y 32 -z 32 -I 8 -J 4 -K 4 -n 1310720 > p1n00008t04 27 | srun -N4 -n64 $QS -i Coral2_P1.inp -X 32 -Y 32 -Z 16 -x 32 -y 32 -z 16 -I 4 -J 4 -K 4 -n 655360 > p1n00004t04 28 | srun -N2 -n32 $QS -i Coral2_P1.inp -X 32 -Y 16 -Z 16 -x 32 -y 16 -z 16 -I 4 -J 4 -K 2 -n 327680 > p1n00002t04 29 | srun -N1 -n16 $QS -i Coral2_P1.inp -X 16 -Y 16 -Z 16 -x 16 -y 16 -z 16 -I 4 -J 2 -K 2 -n 163840 > p1n00001t04 30 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/P1_16t.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Problem 1: 4 | 5 | # 4 ranks per node 6 | # 16 threads per rank 7 | # 4096 mesh elements per node 8 | # 40 particles per mesh element -> 163840 particles per node 9 | 10 | export OMP_NUM_THREADS=16 11 | 12 | QS=../../../src/qs 13 | 14 | srun -N24576 -n98304 $QS -i Coral2_P1.inp -X 768 -Y 512 -Z 256 -x 768 -y 512 -z 256 -I 96 -J 32 -K 32 -n 4026531840 > p1n24576t16 15 | srun -N16384 -n65536 $QS -i Coral2_P1.inp -X 512 -Y 512 -Z 256 -x 512 -y 512 -z 256 -I 64 -J 32 -K 32 -n 2684354560 > p1n16384t16 16 | srun -N8192 -n32768 $QS -i Coral2_P1.inp -X 512 -Y 256 -Z 256 -x 512 -y 256 -z 256 -I 32 -J 32 -K 32 -n 1342117280 > p1n08192t16 17 | srun -N4096 -n16384 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 256 -x 256 -y 256 -z 256 -I 32 -J 32 -K 16 -n 671088640 > p1n04092t16 18 | srun -N2048 -n8192 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 128 -x 256 -y 256 -z 128 -I 32 -J 16 -K 16 -n 335544320 > p1n02048t16 19 | srun -N1024 -n4096 $QS -i Coral2_P1.inp -X 256 -Y 128 -Z 128 -x 256 -y 128 -z 128 -I 16 -J 16 -K 16 -n 167772160 > p1n01024t16 20 | srun -N512 -n2048 $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 128 -x 128 -y 128 -z 128 -I 16 -J 16 -K 8 -n 83886080 > p1n00512t16 21 | srun -N256 -n1024 $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 64 -x 128 -y 128 -z 64 -I 16 -J 8 -K 8 -n 41943040 > p1n00256t16 22 | srun -N128 -n512 $QS -i Coral2_P1.inp -X 128 -Y 64 -Z 64 -x 128 -y 64 -z 64 -I 8 -J 8 -K 8 -n 20971520 > p1n00128t16 23 | srun -N64 -n256 $QS -i Coral2_P1.inp -X 64 -Y 64 -Z 64 -x 64 -y 64 -z 64 -I 8 -J 8 -K 4 -n 10485760 > p1n00064t16 24 | srun -N32 -n128 $QS -i Coral2_P1.inp -X 64 -Y 64 -Z 32 -x 64 -y 64 -z 32 -I 8 -J 4 -K 4 -n 5242880 > p1n00032t16 25 | srun -N16 -n64 $QS -i Coral2_P1.inp -X 64 -Y 32 -Z 32 -x 64 -y 32 -z 32 -I 4 -J 4 -K 4 -n 2621440 > p1n00016t16 26 | srun -N8 -n32 $QS -i Coral2_P1.inp -X 32 -Y 32 -Z 32 -x 32 -y 32 -z 32 -I 4 -J 4 -K 2 -n 1310720 > p1n00008t16 27 | srun -N4 -n16 $QS -i Coral2_P1.inp -X 32 -Y 32 -Z 16 -x 32 -y 32 -z 16 -I 4 -J 2 -K 2 -n 655360 > p1n00004t16 28 | srun -N2 -n8 $QS -i Coral2_P1.inp -X 32 -Y 16 -Z 16 -x 32 -y 16 -z 16 -I 2 -J 2 -K 2 -n 327680 > p1n00002t16 29 | srun -N1 -n4 $QS -i Coral2_P1.inp -X 16 -Y 16 -Z 16 -x 16 -y 16 -z 16 -I 2 -J 2 -K 1 -n 163840 > p1n00001t16 30 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem1/P1_64t.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Problem 1: 4 | 5 | # 1 rank per node 6 | # 64 threads per rank 7 | # 4096 mesh elements per node 8 | # 40 particles per mesh element -> 163840 particles per node 9 | 10 | export OMP_NUM_THREADS=64 11 | 12 | QS=../../../src/qs 13 | 14 | srun -N24576 -n24576 $QS -i Coral2_P1.inp -X 768 -Y 512 -Z 256 -x 768 -y 512 -z 256 -I 48 -J 32 -K 16 -n 4026531840 > p1n24576t64 15 | srun -N16384 -n16384 $QS -i Coral2_P1.inp -X 512 -Y 512 -Z 256 -x 512 -y 512 -z 256 -I 32 -J 32 -K 16 -n 2684354560 > p1n16384t64 16 | srun -N8192 -n8192 $QS -i Coral2_P1.inp -X 512 -Y 256 -Z 256 -x 512 -y 256 -z 256 -I 32 -J 16 -K 16 -n 1342117280 > p1n08192t64 17 | srun -N4096 -n4096 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 256 -x 256 -y 256 -z 256 -I 16 -J 16 -K 16 -n 671088640 > p1n04092t64 18 | srun -N2048 -n2048 $QS -i Coral2_P1.inp -X 256 -Y 256 -Z 128 -x 256 -y 256 -z 128 -I 16 -J 16 -K 8 -n 335544320 > p1n02048t64 19 | srun -N1024 -n1024 $QS -i Coral2_P1.inp -X 256 -Y 128 -Z 128 -x 256 -y 128 -z 128 -I 16 -J 8 -K 8 -n 167772160 > p1n01024t64 20 | srun -N512 -n512 $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 128 -x 128 -y 128 -z 128 -I 8 -J 8 -K 8 -n 83886080 > p1n00512t64 21 | srun -N256 -n256 $QS -i Coral2_P1.inp -X 128 -Y 128 -Z 64 -x 128 -y 128 -z 64 -I 8 -J 8 -K 4 -n 41943040 > p1n00256t64 22 | srun -N128 -n128 $QS -i Coral2_P1.inp -X 128 -Y 64 -Z 64 -x 128 -y 64 -z 64 -I 8 -J 4 -K 4 -n 20971520 > p1n00128t64 23 | srun -N64 -n64 $QS -i Coral2_P1.inp -X 64 -Y 64 -Z 64 -x 64 -y 64 -z 64 -I 4 -J 4 -K 4 -n 10485760 > p1n00064t64 24 | srun -N32 -n32 $QS -i Coral2_P1.inp -X 64 -Y 64 -Z 32 -x 64 -y 64 -z 32 -I 4 -J 4 -K 2 -n 5242880 > p1n00032t64 25 | srun -N16 -n16 $QS -i Coral2_P1.inp -X 64 -Y 32 -Z 32 -x 64 -y 32 -z 32 -I 4 -J 2 -K 2 -n 2621440 > p1n00016t64 26 | srun -N8 -n8 $QS -i Coral2_P1.inp -X 32 -Y 32 -Z 32 -x 32 -y 32 -z 32 -I 2 -J 2 -K 2 -n 1310720 > p1n00008t64 27 | srun -N4 -n4 $QS -i Coral2_P1.inp -X 32 -Y 32 -Z 16 -x 32 -y 32 -z 16 -I 2 -J 2 -K 1 -n 655360 > p1n00004t64 28 | srun -N2 -n2 $QS -i Coral2_P1.inp -X 32 -Y 16 -Z 16 -x 32 -y 16 -z 16 -I 2 -J 1 -K 1 -n 327680 > p1n00002t64 29 | srun -N1 -n1 $QS -i Coral2_P1.inp -X 16 -Y 16 -Z 16 -x 16 -y 16 -z 16 -I 1 -J 1 -K 1 -n 163840 > p1n00001t64 30 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem2/00_README.TXT: -------------------------------------------------------------------------------- 1 | This is Quicksilver Problem #2 for the CORAL2 Benchmark. 2 | 3 | This problem is *not* required. 4 | 5 | Cross sections in this problem are tailored to give a broader energy 6 | spectrum for the particles and a different reaction mix compared to 7 | problem 1. 8 | 9 | The essential physics of the problem are defined in the input file 10 | Coral2_P2.inp. The parameters in this file should not be changed. 11 | Parameters to set the size of the problem (number of particles, number 12 | of mesh elements, size of domain, and MPI decomposition), can all be 13 | specified on the command line (for example see P2_64t.sh). Alternately, 14 | you can copy Coral2_P2.inp to a new file and add the necessary 15 | parameters (see Coral2_P2_1.inp). 16 | 17 | Note that parameters in the input deck overrride corresponding command 18 | line arguments. 19 | 20 | For the scaling study here, we have chosen 11^3 = 1331 mesh elements per 21 | node. This makes it difficult to uniformly decomopostion of mesh 22 | elements anything other than 1 rank per node. We also choose 40 23 | particles per mesh element. This is divisible by 10 (so we get an 24 | integer number of particles sourced in) and gives a reasonable cyle time 25 | of 2-4 seconds. 26 | 27 | MANIFEST: 28 | 29 | 00_README.TXT This file 30 | Coral2_P2.inp Input without problem size specification. 31 | Useful to build scaling study with commmand line 32 | arguments. 33 | Coral2_P2_1.inp Input file for a single MPI rank 34 | Coral2_P2_4096.inp Input file for 4096 MPI ranks 35 | P1_64t.sh Example scaling study for BG/Q with 64 threads per rank 36 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem2/Coral2_P2.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | nSteps: 100 9 | seed: 1029384756 10 | eMax: 20 11 | eMin: 1e-08 12 | nGroups: 230 13 | lowWeightCutoff: 0.001 14 | bTally: 1 15 | fTally: 1 16 | cTally: 1 17 | coralBenchmark: 2 18 | 19 | Geometry: 20 | material: sourceMaterial 21 | shape: brick 22 | xMax: 10000 23 | xMin: 0 24 | yMax: 10000 25 | yMin: 0 26 | zMax: 10000 27 | zMin: 0 28 | 29 | Material: 30 | name: sourceMaterial 31 | mass: 1.5 32 | nIsotopes: 10 33 | nReactions: 3 34 | sourceRate: 1e+10 35 | totalCrossSection: 16.75 36 | absorptionCrossSection: absorb 37 | fissionCrossSection: fission 38 | scatteringCrossSection: scatter 39 | absorptionCrossSectionRatio: 10 40 | fissionCrossSectionRatio: 8 41 | scatteringCrossSectionRatio: 82 42 | absorptionCrossSection: absorb 43 | fissionCrossSection: fission 44 | scatteringCrossSection: scatter 45 | 46 | CrossSection: 47 | name: absorb 48 | A: 0 49 | B: 0 50 | C: 0 51 | D: -0.2 52 | E: 2 53 | 54 | CrossSection: 55 | name: fission 56 | A: 0 57 | B: 0 58 | C: 0 59 | D: -0.2 60 | E: 2 61 | nuBar: 2 62 | 63 | CrossSection: 64 | name: scatter 65 | A: 0 66 | B: 0 67 | C: 0 68 | D: 0 69 | E: 97 70 | 71 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | lx: 1 9 | ly: 1 10 | lz: 1 11 | nParticles: 53240 12 | nSteps: 100 13 | nx: 11 14 | ny: 11 15 | nz: 11 16 | seed: 1029384756 17 | xDom: 1 18 | yDom: 1 19 | zDom: 1 20 | eMax: 20 21 | eMin: 1e-08 22 | nGroups: 230 23 | lowWeightCutoff: 0.001 24 | bTally: 1 25 | fTally: 1 26 | cTally: 1 27 | coralBenchmark: 2 28 | 29 | Geometry: 30 | material: sourceMaterial 31 | shape: brick 32 | xMax: 10000 33 | xMin: 0 34 | yMax: 10000 35 | yMin: 0 36 | zMax: 10000 37 | zMin: 0 38 | 39 | Material: 40 | name: sourceMaterial 41 | mass: 1.5 42 | nIsotopes: 10 43 | nReactions: 3 44 | sourceRate: 1e+10 45 | totalCrossSection: 16.75 46 | absorptionCrossSection: absorb 47 | fissionCrossSection: fission 48 | scatteringCrossSection: scatter 49 | absorptionCrossSectionRatio: 10 50 | fissionCrossSectionRatio: 8 51 | scatteringCrossSectionRatio: 82 52 | absorptionCrossSection: absorb 53 | fissionCrossSection: fission 54 | scatteringCrossSection: scatter 55 | 56 | CrossSection: 57 | name: absorb 58 | A: 0 59 | B: 0 60 | C: 0 61 | D: -0.2 62 | E: 2 63 | 64 | CrossSection: 65 | name: fission 66 | A: 0 67 | B: 0 68 | C: 0 69 | D: -0.2 70 | E: 2 71 | nuBar: 2 72 | 73 | CrossSection: 74 | name: scatter 75 | A: 0 76 | B: 0 77 | C: 0 78 | D: 0 79 | E: 97 80 | 81 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | lx: 16 9 | ly: 16 10 | lz: 16 11 | nParticles: 436142080 12 | nSteps: 100 13 | nx: 176 14 | ny: 176 15 | nz: 176 16 | seed: 1029384756 17 | xDom: 16 18 | yDom: 16 19 | zDom: 16 20 | eMax: 20 21 | eMin: 1e-08 22 | nGroups: 230 23 | lowWeightCutoff: 0.001 24 | bTally: 1 25 | fTally: 1 26 | cTally: 1 27 | coralBenchmark: 2 28 | 29 | Geometry: 30 | material: sourceMaterial 31 | shape: brick 32 | xMax: 10000 33 | xMin: 0 34 | yMax: 10000 35 | yMin: 0 36 | zMax: 10000 37 | zMin: 0 38 | 39 | Material: 40 | name: sourceMaterial 41 | mass: 1.5 42 | nIsotopes: 10 43 | nReactions: 3 44 | sourceRate: 1e+10 45 | totalCrossSection: 16.75 46 | absorptionCrossSection: absorb 47 | fissionCrossSection: fission 48 | scatteringCrossSection: scatter 49 | absorptionCrossSectionRatio: 10 50 | fissionCrossSectionRatio: 8 51 | scatteringCrossSectionRatio: 82 52 | absorptionCrossSection: absorb 53 | fissionCrossSection: fission 54 | scatteringCrossSection: scatter 55 | 56 | CrossSection: 57 | name: absorb 58 | A: 0 59 | B: 0 60 | C: 0 61 | D: -0.2 62 | E: 2 63 | 64 | CrossSection: 65 | name: fission 66 | A: 0 67 | B: 0 68 | C: 0 69 | D: -0.2 70 | E: 2 71 | nuBar: 2 72 | 73 | CrossSection: 74 | name: scatter 75 | A: 0 76 | B: 0 77 | C: 0 78 | D: 0 79 | E: 97 80 | 81 | -------------------------------------------------------------------------------- /Examples/CORAL2_Benchmark/Problem2/P2_64t.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Problem 2: 4 | 5 | # 1 rank per node 6 | # 64 threads per rank 7 | # 1311 mesh elements per node (11^3) 8 | # 40 particles per mesh element -> 53240 particles per node 9 | 10 | export OMP_NUM_THREADS=64 11 | 12 | QS=../../../src/qs 13 | 14 | srun -N1 -n1 $QS -i Coral2_P2.inp -X 1 -Y 1 -Z 1 -x 11 -y 11 -z 11 -I 1 -J 1 -K 1 -n 53240 > p2n00001t64 15 | srun -N2 -n2 $QS -i Coral2_P2.inp -X 2 -Y 1 -Z 1 -x 22 -y 11 -z 11 -I 2 -J 1 -K 1 -n 106480 > p2n00002t64 16 | srun -N4 -n4 $QS -i Coral2_P2.inp -X 2 -Y 2 -Z 1 -x 22 -y 22 -z 11 -I 2 -J 2 -K 1 -n 212960 > p2n00004t64 17 | srun -N8 -n8 $QS -i Coral2_P2.inp -X 2 -Y 2 -Z 2 -x 22 -y 22 -z 22 -I 2 -J 2 -K 2 -n 425920 > p2n00008t64 18 | srun -N16 -n16 $QS -i Coral2_P2.inp -X 4 -Y 2 -Z 2 -x 44 -y 22 -z 22 -I 4 -J 2 -K 2 -n 851840 > p2n00016t64 19 | srun -N32 -n32 $QS -i Coral2_P2.inp -X 4 -Y 4 -Z 2 -x 44 -y 44 -z 22 -I 4 -J 4 -K 2 -n 1703680 > p2n00032t64 20 | srun -N64 -n64 $QS -i Coral2_P2.inp -X 4 -Y 4 -Z 4 -x 44 -y 44 -z 44 -I 4 -J 4 -K 4 -n 3407360 > p2n00064t64 21 | srun -N128 -n128 $QS -i Coral2_P2.inp -X 8 -Y 4 -Z 4 -x 88 -y 44 -z 44 -I 8 -J 4 -K 4 -n 6814720 > p2n00128t64 22 | srun -N256 -n256 $QS -i Coral2_P2.inp -X 8 -Y 8 -Z 4 -x 88 -y 88 -z 44 -I 8 -J 8 -K 4 -n 13629440 > p2n00256t64 23 | srun -N512 -n512 $QS -i Coral2_P2.inp -X 8 -Y 8 -Z 8 -x 88 -y 88 -z 88 -I 8 -J 8 -K 8 -n 27258880 > p2n00512t64 24 | srun -N1024 -n1024 $QS -i Coral2_P2.inp -X 16 -Y 8 -Z 8 -x 176 -y 88 -z 88 -I 16 -J 8 -K 8 -n 54517760 > p2n01024t64 25 | srun -N2048 -n2048 $QS -i Coral2_P2.inp -X 16 -Y 16 -Z 8 -x 176 -y 176 -z 88 -I 16 -J 16 -K 8 -n 109035520 > p2n02048t64 26 | srun -N4096 -n4096 $QS -i Coral2_P2.inp -X 16 -Y 16 -Z 16 -x 176 -y 176 -z 176 -I 16 -J 16 -K 16 -n 218071040 > p2n04096t64 27 | srun -N8192 -n8192 $QS -i Coral2_P2.inp -X 32 -Y 16 -Z 16 -x 352 -y 176 -z 176 -I 32 -J 16 -K 16 -n 436142080 > p2n08192t64 28 | srun -N16384 -n16384 $QS -i Coral2_P2.inp -X 32 -Y 32 -Z 16 -x 352 -y 352 -z 176 -I 32 -J 32 -K 16 -n 872284160 > p2n16384t64 29 | srun -N24576 -n24576 $QS -i Coral2_P2.inp -X 48 -Y 32 -Z 16 -x 528 -y 532 -z 176 -I 48 -J 32 -K 16 -n 1308426240 > p2n24768t64 30 | -------------------------------------------------------------------------------- /Examples/CTS2_Benchmark/00_README.TXT: -------------------------------------------------------------------------------- 1 | This the Quicksilver CTS2 Benchmark Problem. 2 | 3 | This problem is based on the optional Problem #2 from the CORAL2 4 | Benchmark, but it has been changed slightly. 5 | 6 | The essential physics of the problem are defined in the input file 7 | CTS2.inp. The parameters in this file should not be changed. 8 | Parameters to set the size of the problem (number of particles, number 9 | of mesh elements, size of domain, and MPI decomposition), can all be 10 | specified on the command line (for example see CTS2_scaling.sh). 11 | Alternately, you can copy CTS2.inp to a new file and add the 12 | necessary parameters (see CTS2_1.inp). 13 | 14 | Note that parameters in the input deck overrride corresponding command 15 | line arguments. 16 | 17 | This problem should be run on a single node with weak scaling 18 | for the number of cores on a node. You are required to run this 19 | problem with 20 | * 1 rank per core 21 | * 16^3 = 4096 mesh elements per rank 22 | * 40960 partilces per rank 23 | 24 | 25 | MANIFEST: 26 | 27 | 00_README.TXT This file 28 | CTS2.inp Input without problem size specification. 29 | Useful to build scaling study with commmand line 30 | arguments. 31 | CTS2_1.inp Input file for a single MPI rank 32 | CTS2_36.inp Input file for 36 MPI ranks 33 | CTS2_scaling.sh Example scaling study for 36 cores per node 34 | -------------------------------------------------------------------------------- /Examples/CTS2_Benchmark/CTS2.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1.1e-07 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | nSteps: 100 9 | seed: 1029384756 10 | eMax: 20 11 | eMin: 1e-08 12 | nGroups: 230 13 | lowWeightCutoff: 0.001 14 | bTally: 1 15 | fTally: 1 16 | cTally: 1 17 | coralBenchmark: 2 18 | 19 | Geometry: 20 | material: sourceMaterial 21 | shape: brick 22 | xMax: 10000 23 | xMin: 0 24 | yMax: 10000 25 | yMin: 0 26 | zMax: 10000 27 | zMin: 0 28 | 29 | Material: 30 | name: sourceMaterial 31 | mass: 1.5 32 | nIsotopes: 20 33 | nReactions: 9 34 | sourceRate: 1e+10 35 | totalCrossSection: 1.5227 36 | absorptionCrossSection: absorb 37 | fissionCrossSection: fission 38 | scatteringCrossSection: scatter 39 | absorptionCrossSectionRatio: 10 40 | fissionCrossSectionRatio: 8 41 | scatteringCrossSectionRatio: 82 42 | absorptionCrossSection: absorb 43 | fissionCrossSection: fission 44 | scatteringCrossSection: scatter 45 | 46 | CrossSection: 47 | name: absorb 48 | A: 0 49 | B: 0 50 | C: 0 51 | D: -0.2 52 | E: 2 53 | 54 | CrossSection: 55 | name: fission 56 | A: 0 57 | B: 0 58 | C: 0 59 | D: -0.2 60 | E: 2 61 | nuBar: 2 62 | 63 | CrossSection: 64 | name: scatter 65 | A: 0 66 | B: 0 67 | C: 0 68 | D: 0 69 | E: 97 70 | 71 | -------------------------------------------------------------------------------- /Examples/CTS2_Benchmark/CTS2_1.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1.1e-07 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | lx: 16 9 | ly: 16 10 | lz: 16 11 | nParticles: 40960 12 | nSteps: 100 13 | nx: 16 14 | ny: 16 15 | nz: 16 16 | seed: 1029384756 17 | xDom: 1 18 | yDom: 1 19 | zDom: 1 20 | eMax: 20 21 | eMin: 1e-08 22 | nGroups: 230 23 | lowWeightCutoff: 0.001 24 | bTally: 1 25 | fTally: 1 26 | cTally: 1 27 | coralBenchmark: 2 28 | 29 | Geometry: 30 | material: sourceMaterial 31 | shape: brick 32 | xMax: 10000 33 | xMin: 0 34 | yMax: 10000 35 | yMin: 0 36 | zMax: 10000 37 | zMin: 0 38 | 39 | Material: 40 | name: sourceMaterial 41 | mass: 1.5 42 | nIsotopes: 20 43 | nReactions: 9 44 | sourceRate: 1e+10 45 | totalCrossSection: 1.5227 46 | absorptionCrossSection: absorb 47 | fissionCrossSection: fission 48 | scatteringCrossSection: scatter 49 | absorptionCrossSectionRatio: 10 50 | fissionCrossSectionRatio: 8 51 | scatteringCrossSectionRatio: 82 52 | absorptionCrossSection: absorb 53 | fissionCrossSection: fission 54 | scatteringCrossSection: scatter 55 | 56 | CrossSection: 57 | name: absorb 58 | A: 0 59 | B: 0 60 | C: 0 61 | D: -0.2 62 | E: 2 63 | 64 | CrossSection: 65 | name: fission 66 | A: 0 67 | B: 0 68 | C: 0 69 | D: -0.2 70 | E: 2 71 | nuBar: 2 72 | 73 | CrossSection: 74 | name: scatter 75 | A: 0 76 | B: 0 77 | C: 0 78 | D: 0 79 | E: 97 80 | 81 | -------------------------------------------------------------------------------- /Examples/CTS2_Benchmark/CTS2_36.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1.1e-07 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | lx: 48 9 | ly: 48 10 | lz: 64 11 | nParticles: 1474560 12 | nSteps: 100 13 | nx: 48 14 | ny: 48 15 | nz: 64 16 | seed: 1029384756 17 | xDom: 3 18 | yDom: 3 19 | zDom: 4 20 | eMax: 20 21 | eMin: 1e-08 22 | nGroups: 230 23 | lowWeightCutoff: 0.001 24 | bTally: 1 25 | fTally: 1 26 | cTally: 1 27 | coralBenchmark: 2 28 | 29 | Geometry: 30 | material: sourceMaterial 31 | shape: brick 32 | xMax: 10000 33 | xMin: 0 34 | yMax: 10000 35 | yMin: 0 36 | zMax: 10000 37 | zMin: 0 38 | 39 | Material: 40 | name: sourceMaterial 41 | mass: 1.5 42 | nIsotopes: 20 43 | nReactions: 9 44 | sourceRate: 1e+10 45 | totalCrossSection: 1.5227 46 | absorptionCrossSection: absorb 47 | fissionCrossSection: fission 48 | scatteringCrossSection: scatter 49 | absorptionCrossSectionRatio: 10 50 | fissionCrossSectionRatio: 8 51 | scatteringCrossSectionRatio: 82 52 | absorptionCrossSection: absorb 53 | fissionCrossSection: fission 54 | scatteringCrossSection: scatter 55 | 56 | CrossSection: 57 | name: absorb 58 | A: 0 59 | B: 0 60 | C: 0 61 | D: -0.2 62 | E: 2 63 | 64 | CrossSection: 65 | name: fission 66 | A: 0 67 | B: 0 68 | C: 0 69 | D: -0.2 70 | E: 2 71 | nuBar: 2 72 | 73 | CrossSection: 74 | name: scatter 75 | A: 0 76 | B: 0 77 | C: 0 78 | D: 0 79 | E: 97 80 | 81 | -------------------------------------------------------------------------------- /Examples/CTS2_Benchmark/CTS2_scaling.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Quicksilver CTS Benchmark 4 | # weak scaling on a single node: 5 | 6 | # 1 rank per core 7 | # 4096 mesh elements per rank (16^3) 8 | # 10 particles per mesh element -> 40960 particles per rank 9 | 10 | export OMP_NUM_THREADS=1 11 | 12 | QS=../../src/qs 13 | 14 | srun -N1 -n1 $QS -i CTS2.inp -X 16 -Y 16 -Z 16 -x 16 -y 16 -z 16 -I 1 -J 1 -K 1 -n 40960 > CTS2_01.out 15 | srun -N1 -n2 $QS -i CTS2.inp -X 32 -Y 16 -Z 16 -x 32 -y 16 -z 16 -I 2 -J 1 -K 1 -n 81920 > CTS2_02.out 16 | srun -N1 -n4 $QS -i CTS2.inp -X 32 -Y 32 -Z 16 -x 32 -y 32 -z 16 -I 2 -J 2 -K 1 -n 163840 > CTS2_04.out 17 | srun -N1 -n8 $QS -i CTS2.inp -X 32 -Y 32 -Z 32 -x 32 -y 32 -z 32 -I 2 -J 2 -K 2 -n 327680 > CTS2_08.out 18 | srun -N1 -n16 $QS -i CTS2.inp -X 64 -Y 32 -Z 32 -x 64 -y 32 -z 32 -I 4 -J 2 -K 2 -n 655360 > CTS2_16.out 19 | srun -N1 -n32 $QS -i CTS2.inp -X 64 -Y 64 -Z 32 -x 64 -y 64 -z 32 -I 4 -J 4 -K 2 -n 1310720 > CTS2_32.out 20 | srun -N1 -n36 $QS -i CTS2.inp -X 48 -Y 48 -Z 64 -x 48 -y 48 -z 64 -I 3 -J 3 -K 4 -n 1474560 > CTS2_36.out 21 | 22 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: 5 | boundaryCondition: reflect 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | lx: 100 10 | ly: 100 11 | lz: 100 12 | nParticles: 100000000 13 | nSteps: 10 14 | nx: 10 15 | ny: 10 16 | nz: 10 17 | seed: 1029384756 18 | xDom: 0 19 | yDom: 0 20 | zDom: 0 21 | eMax: 20 22 | eMin: 1e-09 23 | nGroups: 230 24 | 25 | Geometry: 26 | material: sourceMaterial 27 | shape: brick 28 | xMax: 100 29 | xMin: 0 30 | yMax: 100 31 | yMin: 0 32 | zMax: 100 33 | zMin: 0 34 | 35 | Material: 36 | name: sourceMaterial 37 | nIsotopes: 10 38 | nReactions: 9 39 | sourceRate: 1e+10 40 | totalCrossSection: 1 41 | absorptionCrossSection: flat 42 | fissionCrossSection: flat 43 | scatteringCrossSection: flat 44 | absorptionCrossSectionRatio: 1 45 | fissionCrossSectionRatio: 0.1 46 | scatteringCrossSectionRatio: 1 47 | 48 | CrossSection: 49 | name: flat 50 | A: 0 51 | B: 0 52 | C: 0 53 | D: 0 54 | E: 1 55 | nuBar: 2.4 56 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem_v3.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: homogeneousProblem_v3.inp 5 | boundaryCondition: reflect 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | mpiThreadMultiple: 1 10 | lx: 100 11 | ly: 100 12 | lz: 100 13 | nParticles: 20000000 14 | nSteps: 10 15 | nx: 12 16 | ny: 12 17 | nz: 12 18 | seed: 1029384756 19 | eMax: 20 20 | eMin: 1e-09 21 | nGroups: 230 22 | 23 | Geometry: 24 | material: sourceMaterial 25 | shape: brick 26 | xMax: 100 27 | xMin: 0 28 | yMax: 100 29 | yMin: 0 30 | zMax: 100 31 | zMin: 0 32 | 33 | Material: 34 | name: sourceMaterial 35 | nIsotopes: 10 36 | nReactions: 9 37 | sourceRate: 1e+10 38 | totalCrossSection: 1 39 | absorptionCrossSection: flat 40 | fissionCrossSection: flat 41 | scatteringCrossSection: flat 42 | absorptionCrossSectionRatio: 1 43 | fissionCrossSectionRatio: 0.1 44 | scatteringCrossSectionRatio: 1 45 | 46 | CrossSection: 47 | name: flat 48 | A: 0 49 | B: 0 50 | C: 0 51 | D: 0 52 | E: 1 53 | nuBar: 2.4 54 | 55 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem_v3_wq.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: homogeneousProblem_v3.inp 5 | boundaryCondition: reflect 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | mpiThreadMultiple: 0 10 | lx: 100 11 | ly: 100 12 | lz: 100 13 | nParticles: 20000000 14 | nSteps: 10 15 | nx: 12 16 | ny: 12 17 | nz: 12 18 | seed: 1029384756 19 | eMax: 20 20 | eMin: 1e-09 21 | nGroups: 230 22 | 23 | Geometry: 24 | material: sourceMaterial 25 | shape: brick 26 | xMax: 100 27 | xMin: 0 28 | yMax: 100 29 | yMin: 0 30 | zMax: 100 31 | zMin: 0 32 | 33 | Material: 34 | name: sourceMaterial 35 | nIsotopes: 10 36 | nReactions: 9 37 | sourceRate: 1e+10 38 | totalCrossSection: 1 39 | absorptionCrossSection: flat 40 | fissionCrossSection: flat 41 | scatteringCrossSection: flat 42 | absorptionCrossSectionRatio: 1 43 | fissionCrossSectionRatio: 0.1 44 | scatteringCrossSectionRatio: 1 45 | 46 | CrossSection: 47 | name: flat 48 | A: 0 49 | B: 0 50 | C: 0 51 | D: 0 52 | E: 1 53 | nuBar: 2.4 54 | 55 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem_v4_tm.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: homogeneousProblem_v3.inp 5 | boundaryCondition: reflect 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | mpiThreadMultiple: 1 10 | nSteps: 10 11 | seed: 1029384756 12 | eMax: 20 13 | eMin: 1e-09 14 | nGroups: 230 15 | mpiThreadMultiple: 1 16 | 17 | Geometry: 18 | material: sourceMaterial 19 | shape: brick 20 | xMax: 1000 21 | xMin: 0 22 | yMax: 1000 23 | yMin: 0 24 | zMax: 1000 25 | zMin: 0 26 | 27 | Material: 28 | name: sourceMaterial 29 | nIsotopes: 10 30 | nReactions: 9 31 | sourceRate: 1e+10 32 | totalCrossSection: 1 33 | absorptionCrossSection: flat 34 | fissionCrossSection: flat 35 | scatteringCrossSection: flat 36 | absorptionCrossSectionRatio: 1 37 | fissionCrossSectionRatio: 0.1 38 | scatteringCrossSectionRatio: 1 39 | 40 | CrossSection: 41 | name: flat 42 | A: 0 43 | B: 0 44 | C: 0 45 | D: 0 46 | E: 1 47 | nuBar: 2.4 48 | 49 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem_v4_ts.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: homogeneousProblem_v3.inp 5 | boundaryCondition: reflect 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | mpiThreadMultiple: 1 10 | nSteps: 10 11 | seed: 1029384756 12 | eMax: 20 13 | eMin: 1e-09 14 | nGroups: 230 15 | mpiThreadMultiple: 0 16 | 17 | 18 | Geometry: 19 | material: sourceMaterial 20 | shape: brick 21 | xMax: 1000 22 | xMin: 0 23 | yMax: 1000 24 | yMin: 0 25 | zMax: 1000 26 | zMin: 0 27 | 28 | Material: 29 | name: sourceMaterial 30 | nIsotopes: 10 31 | nReactions: 9 32 | sourceRate: 1e+10 33 | totalCrossSection: 1 34 | absorptionCrossSection: flat 35 | fissionCrossSection: flat 36 | scatteringCrossSection: flat 37 | absorptionCrossSectionRatio: 1 38 | fissionCrossSectionRatio: 0.1 39 | scatteringCrossSectionRatio: 1 40 | 41 | CrossSection: 42 | name: flat 43 | A: 0 44 | B: 0 45 | C: 0 46 | D: 0 47 | E: 1 48 | nuBar: 2.4 49 | 50 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem_v5_ts.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: homogeneousProblem_v5_ts.inp 5 | boundaryCondition: reflect 6 | loadBalance: 0 7 | cycleTimers: 0 8 | debugThreads: 0 9 | mpiThreadMultiple: 0 10 | nSteps: 10 11 | seed: 1029384756 12 | eMax: 20 13 | eMin: 1e-09 14 | nGroups: 230 15 | lowWeightCutoff: 0.001 16 | 17 | Geometry: 18 | material: sourceMaterial 19 | shape: brick 20 | xMax: 1000 21 | xMin: 0 22 | yMax: 1000 23 | yMin: 0 24 | zMax: 1000 25 | zMin: 0 26 | 27 | Material: 28 | name: sourceMaterial 29 | nIsotopes: 10 30 | nReactions: 9 31 | sourceRate: 1e+10 32 | totalCrossSection: 10 33 | absorptionCrossSection: flat 34 | fissionCrossSection: flat 35 | scatteringCrossSection: flat 36 | absorptionCrossSectionRatio: 0.04 37 | fissionCrossSectionRatio: 0.05 38 | scatteringCrossSectionRatio: 1 39 | 40 | CrossSection: 41 | name: flat 42 | A: 0 43 | B: 0 44 | C: 0 45 | D: 0 46 | E: 1 47 | nuBar: 1.0 48 | 49 | -------------------------------------------------------------------------------- /Examples/Homogeneous/homogeneousProblem_v7_ts.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-06 3 | fMax: 0.1 4 | inputFile: homogeneousProblem_v7_ts.inp 5 | boundaryCondition: reflect 6 | loadBalance: 0 7 | cycleTimers: 0 8 | debugThreads: 0 9 | mpiThreadMultiple: 0 10 | nSteps: 10 11 | seed: 1029384756 12 | eMax: 20 13 | eMin: 1e-09 14 | nGroups: 230 15 | lowWeightCutoff: 0.001 16 | 17 | 18 | Geometry: 19 | material: sourceMaterial 20 | shape: brick 21 | xMax: 1000 22 | xMin: 0 23 | yMax: 1000 24 | yMin: 0 25 | zMax: 1000 26 | zMin: 0 27 | 28 | Material: 29 | name: sourceMaterial 30 | mass: 12.011 31 | nIsotopes: 10 32 | nReactions: 9 33 | sourceRate: 1e+10 34 | totalCrossSection: 0.1 35 | absorptionCrossSection: flat 36 | fissionCrossSection: flat 37 | scatteringCrossSection: flat 38 | absorptionCrossSectionRatio: 0.1086 39 | fissionCrossSectionRatio: 0.0969 40 | scatteringCrossSectionRatio: 0.7946 41 | 42 | CrossSection: 43 | name: flat 44 | A: 0 45 | B: 0 46 | C: 0 47 | D: 0 48 | E: 1 49 | nuBar: 1.0 50 | 51 | -------------------------------------------------------------------------------- /Examples/Homogeneous/quicksilver_aprun_trinity_01.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #MSUB -lnodes=1:knl,os=CLE_quad_cache 3 | #MSUB -l walltime=2:00:00 4 | #MSUB -A tos2-8 5 | 6 | # 7 | # 8 | # To run interactively, grab a node like so: 9 | # 10 | # msub -I -lnodes=1:knl,os=CLE_quad_cache 11 | # 12 | # This relies on the bash shell for the 2>&2 | tee to work. 13 | # 14 | # To get average and max cycleTracking times: 15 | # grep "cycleTracking 10" *out | awk -F " " '{print $1 " " $4 " " $5}' 16 | # 17 | 18 | # #################### 19 | # Thread Funneled Runs - No Hyper Threads 20 | # #################### 21 | 22 | # Set this to where you have the code built on lustre 23 | cd /users/sdawson/Quicksilver-2017-Apr-19-12-45-27 24 | 25 | export MPICH_MAX_THREAD_SAFETY=funneled 26 | export OMP_PLACES=cores 27 | 28 | # (Per Node) 64 MPI x 1 Threads - Thread Funneled 29 | #export OMP_NUM_THREADS=1 30 | #time aprun -r 4 -n 64 -d 1 -j 1 -cc depth ./qs \ 31 | # --lx=400 --ly=400 --lz=400 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=2000000 \ 32 | # -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0064.d001.j01-ts.out 33 | 34 | # (Per Node) 32 MPI x 2 Threads - Thread Funneled 35 | #export OMP_NUM_THREADS=2 36 | #time aprun -r 4 -n 32 -d 2 -j 1 -cc depth ./qs \ 37 | # --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=2000000 \ 38 | # -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0032.d002.j01-ts.out 39 | 40 | # (Per Node) 16 MPI x 4 Threads - Thread Funneled 41 | #export OMP_NUM_THREADS=4 42 | #time aprun -r 4 -n 16 -d 4 -j 1 -cc depth ./qs \ 43 | # --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \ 44 | # -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0016.d004.j01-ts.out 45 | 46 | # (Per Node) 8 MPI x 8 Threads - Thread Funneled 47 | #export OMP_NUM_THREADS=8 48 | #time aprun -r 4 -n 8 -d 8 -j 1 -cc depth ./qs \ 49 | # --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \ 50 | # -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0008.d008.j01-ts.out 51 | 52 | # (Per Node) 4 MPI x 16 Threads - Thread Funneled 53 | #export OMP_NUM_THREADS=16 54 | #time aprun -r 4 -n 4 -d 16 -j 1 -cc depth ./qs \ 55 | # --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \ 56 | # -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0004.d016.j01-ts.out 57 | 58 | # (Per Node) 2 MPI x 32 Threads - Thread Funneled 59 | #export OMP_NUM_THREADS=32 60 | #time aprun -r 4 -n 2 -d 32 -j 1 -cc depth ./qs \ 61 | # --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \ 62 | # -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0002.d032.j01-ts.out 63 | 64 | # #################### 65 | # Thread Funneled Runs - 2 Hyper Threads 66 | # 67 | # As we add hyper threads, we do not change the problem size, ideally this will decrease time 68 | # spent in the threaded tracking though. 69 | # 70 | # Prior experience shows that while 4 hyper threads pays off on small node count, it is a wash 71 | # at higher node count, so let's stop at 2 hyper threads. 72 | # #################### 73 | 74 | export MPICH_MAX_THREAD_SAFETY=funneled 75 | export OMP_PLACES=threads 76 | 77 | export OMP_NUM_THREADS=2 78 | time aprun -r 4 -n 64 -d 2 -j 2 -cc depth ./qs \ 79 | --lx=400 --ly=400 --lz=400 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=2000000 \ 80 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0064.d002.j02-ts.out 81 | 82 | export OMP_NUM_THREADS=4 83 | time aprun -r 4 -n 32 -d 4 -j 2 -cc depth ./qs \ 84 | --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=2000000 \ 85 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0032.d004.j02-ts.out 86 | 87 | export OMP_NUM_THREADS=8 88 | time aprun -r 4 -n 16 -d 8 -j 2 -cc depth ./qs \ 89 | --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \ 90 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0016.d008.j02-ts.out 91 | 92 | export OMP_NUM_THREADS=16 93 | time aprun -r 4 -n 8 -d 16 -j 2 -cc depth ./qs \ 94 | --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \ 95 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0008.d016.j02-ts.out 96 | 97 | export OMP_NUM_THREADS=32 98 | time aprun -r 4 -n 4 -d 32 -j 2 -cc depth ./qs \ 99 | --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \ 100 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0004.d032.j02-ts.out 101 | 102 | export OMP_NUM_THREADS=64 103 | time aprun -r 4 -n 2 -d 64 -j 2 -cc depth ./qs \ 104 | --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \ 105 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0001.n0002.d064.j02-ts.out 106 | 107 | # 108 | # end of file 109 | # 110 | -------------------------------------------------------------------------------- /Examples/Homogeneous/quicksilver_aprun_trinity_02.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #MSUB -lnodes=2:knl,os=CLE_quad_cache 3 | #MSUB -l walltime=2:00:00 4 | #MSUB -A tos2-8 5 | 6 | # 7 | # 8 | # To run interactively, grab a node like so: 9 | # 10 | # msub -I -lnodes=1:knl,os=CLE_quad_cache 11 | # 12 | # This relies on the bash shell for the 2>&2 | tee to work. 13 | # 14 | # To get average and max cycleTracking times: 15 | # grep "cycleTracking 10" *out | awk -F " " '{print $1 " " $4 " " $5}' 16 | # 17 | 18 | # #################### 19 | # Thread Funneled Runs - No Hyper Threads 20 | # #################### 21 | 22 | # Set this to where you have the code built on lustre 23 | cd /users/sdawson/Quicksilver-2017-Apr-19-12-45-27 24 | 25 | export MPICH_MAX_THREAD_SAFETY=funneled 26 | export OMP_PLACES=cores 27 | 28 | # (Per Node) 64 MPI x 1 Threads - Thread Funneled 29 | export OMP_NUM_THREADS=1 30 | time aprun -r 4 -n 128 -d 1 -j 1 -cc depth ./qs \ 31 | --lx=800 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=4000000 \ 32 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0128.d001.j01-ts.out 33 | 34 | # (Per Node) 32 MPI x 2 Threads - Thread Funneled 35 | export OMP_NUM_THREADS=2 36 | time aprun -r 4 -n 64 -d 2 -j 1 -cc depth ./qs \ 37 | --lx=400 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=4000000 \ 38 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0064.d002.j01-ts.out 39 | 40 | # (Per Node) 16 MPI x 4 Threads - Thread Funneled 41 | export OMP_NUM_THREADS=4 42 | time aprun -r 4 -n 32 -d 4 -j 1 -cc depth ./qs \ 43 | --lx=400 --ly=400 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \ 44 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0032.d004.j01-ts.out 45 | 46 | # (Per Node) 8 MPI x 8 Threads - Thread Funneled 47 | export OMP_NUM_THREADS=8 48 | time aprun -r 4 -n 16 -d 8 -j 1 -cc depth ./qs \ 49 | --lx=400 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \ 50 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0016.d008.j01-ts.out 51 | 52 | # (Per Node) 4 MPI x 16 Threads - Thread Funneled 53 | export OMP_NUM_THREADS=16 54 | time aprun -r 4 -n 8 -d 16 -j 1 -cc depth ./qs \ 55 | --lx=200 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \ 56 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0008.d016.j01-ts.out 57 | 58 | # (Per Node) 2 MPI x 32 Threads - Thread Funneled 59 | export OMP_NUM_THREADS=32 60 | time aprun -r 4 -n 4 -d 32 -j 1 -cc depth ./qs \ 61 | --lx=200 --ly=200 --lz=100 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \ 62 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0004.d032.j01-ts.out 63 | 64 | # #################### 65 | # Thread Funneled Runs - 2 Hyper Threads 66 | # 67 | # As we add hyper threads, we do not change the problem size, ideally this will decrease time 68 | # spent in the threaded tracking though. 69 | # 70 | # Prior experience shows that while 4 hyper threads pays off on small node count, it is a wash 71 | # at higher node count, so let's stop at 2 hyper threads. 72 | # #################### 73 | 74 | export MPICH_MAX_THREAD_SAFETY=funneled 75 | export OMP_PLACES=threads 76 | 77 | export OMP_NUM_THREADS=2 78 | time aprun -r 4 -n 128 -d 2 -j 2 -cc depth ./qs \ 79 | --lx=800 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=4000000 \ 80 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0128.d002.j02-ts.out 81 | 82 | export OMP_NUM_THREADS=4 83 | time aprun -r 4 -n 64 -d 4 -j 2 -cc depth ./qs \ 84 | --lx=400 --ly=400 --lz=400 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=4000000 \ 85 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0064.d004.j02-ts.out 86 | 87 | export OMP_NUM_THREADS=8 88 | time aprun -r 4 -n 32 -d 8 -j 2 -cc depth ./qs \ 89 | --lx=400 --ly=400 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \ 90 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0032.d008.j02-ts.out 91 | 92 | export OMP_NUM_THREADS=16 93 | time aprun -r 4 -n 16 -d 16 -j 2 -cc depth ./qs \ 94 | --lx=400 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \ 95 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0016.d016.j02-ts.out 96 | 97 | export OMP_NUM_THREADS=32 98 | time aprun -r 4 -n 8 -d 32 -j 2 -cc depth ./qs \ 99 | --lx=200 --ly=200 --lz=200 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \ 100 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0008.d032.j02-ts.out 101 | 102 | export OMP_NUM_THREADS=64 103 | time aprun -r 4 -n 4 -d 64 -j 2 -cc depth ./qs \ 104 | --lx=200 --ly=200 --lz=100 --nx=40 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \ 105 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0002.n0004.d064.j02-ts.out 106 | 107 | # 108 | # end of file 109 | # 110 | -------------------------------------------------------------------------------- /Examples/Homogeneous/quicksilver_aprun_trinity_04.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #MSUB -lnodes=4:knl,os=CLE_quad_cache 3 | #MSUB -l walltime=2:00:00 4 | #MSUB -A tos2-8 5 | 6 | # 7 | # 8 | # To run interactively, grab a node like so: 9 | # 10 | # msub -I -lnodes=1:knl,os=CLE_quad_cache 11 | # 12 | # This relies on the bash shell for the 2>&2 | tee to work. 13 | # 14 | # To get average and max cycleTracking times: 15 | # grep "cycleTracking 10" *out | awk -F " " '{print $1 " " $4 " " $5}' 16 | # 17 | 18 | # #################### 19 | # Thread Funneled Runs - No Hyper Threads 20 | # #################### 21 | 22 | # Set this to where you have the code built on lustre 23 | cd /users/sdawson/Quicksilver-2017-Apr-19-12-45-27 24 | 25 | export MPICH_MAX_THREAD_SAFETY=funneled 26 | export OMP_PLACES=cores 27 | 28 | # (Per Node) 64 MPI x 1 Threads - Thread Funneled 29 | export OMP_NUM_THREADS=1 30 | time aprun -r 4 -n 256 -d 1 -j 1 -cc depth ./qs \ 31 | --lx=800 --ly=800 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=8 --zDom=4 --nParticles=8000000 \ 32 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0256.d001.j01-ts.out 33 | 34 | # (Per Node) 32 MPI x 2 Threads - Thread Funneled 35 | export OMP_NUM_THREADS=2 36 | time aprun -r 4 -n 128 -d 2 -j 1 -cc depth ./qs \ 37 | --lx=800 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=8000000 \ 38 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0128.d002.j01-ts.out 39 | 40 | # (Per Node) 16 MPI x 4 Threads - Thread Funneled 41 | export OMP_NUM_THREADS=4 42 | time aprun -r 4 -n 64 -d 4 -j 1 -cc depth ./qs \ 43 | --lx=400 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=8000000 \ 44 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0064.d004.j01-ts.out 45 | 46 | # (Per Node) 8 MPI x 8 Threads - Thread Funneled 47 | export OMP_NUM_THREADS=8 48 | time aprun -r 4 -n 32 -d 8 -j 1 -cc depth ./qs \ 49 | --lx=400 --ly=400 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=8000000 \ 50 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0032.d008.j01-ts.out 51 | 52 | # (Per Node) 4 MPI x 16 Threads - Thread Funneled 53 | export OMP_NUM_THREADS=16 54 | time aprun -r 4 -n 16 -d 16 -j 1 -cc depth ./qs \ 55 | --lx=400 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=8000000 \ 56 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0016.d016.j01-ts.out 57 | 58 | # (Per Node) 2 MPI x 32 Threads - Thread Funneled 59 | export OMP_NUM_THREADS=32 60 | time aprun -r 4 -n 8 -d 32 -j 1 -cc depth ./qs \ 61 | --lx=200 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=8000000 \ 62 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0008.d032.j01-ts.out 63 | 64 | # #################### 65 | # Thread Funneled Runs - 2 Hyper Threads 66 | # 67 | # As we add hyper threads, we do not change the problem size, ideally this will decrease time 68 | # spent in the threaded tracking though. 69 | # 70 | # Prior experience shows that while 4 hyper threads pays off on small node count, it is a wash 71 | # at higher node count, so let's stop at 2 hyper threads. 72 | # #################### 73 | 74 | export MPICH_MAX_THREAD_SAFETY=funneled 75 | export OMP_PLACES=threads 76 | 77 | export OMP_NUM_THREADS=2 78 | time aprun -r 4 -n 256 -d 2 -j 2 -cc depth ./qs \ 79 | --lx=800 --ly=800 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=8 --zDom=4 --nParticles=8000000 \ 80 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0256.d002.j02-ts.out 81 | 82 | export OMP_NUM_THREADS=4 83 | time aprun -r 4 -n 128 -d 4 -j 2 -cc depth ./qs \ 84 | --lx=800 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=8 --yDom=4 --zDom=4 --nParticles=8000000 \ 85 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0128.d004.j02-ts.out 86 | 87 | export OMP_NUM_THREADS=8 88 | time aprun -r 4 -n 64 -d 8 -j 2 -cc depth ./qs \ 89 | --lx=400 --ly=400 --lz=400 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=4 --nParticles=8000000 \ 90 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0064.d008.j02-ts.out 91 | 92 | export OMP_NUM_THREADS=16 93 | time aprun -r 4 -n 32 -d 16 -j 2 -cc depth ./qs \ 94 | --lx=400 --ly=400 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=8000000 \ 95 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0032.d016.j02-ts.out 96 | 97 | export OMP_NUM_THREADS=32 98 | time aprun -r 4 -n 16 -d 32 -j 2 -cc depth ./qs \ 99 | --lx=400 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=8000000 \ 100 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0016.d032.j02-ts.out 101 | 102 | export OMP_NUM_THREADS=64 103 | time aprun -r 4 -n 8 -d 64 -j 2 -cc depth ./qs \ 104 | --lx=200 --ly=200 --lz=200 --nx=40 --ny=40 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=8000000 \ 105 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.trinity.Node0004.n0008.d064.j02-ts.out 106 | 107 | # 108 | # end of file 109 | # 110 | -------------------------------------------------------------------------------- /Examples/Homogeneous/quicksilver_slurm_rzalast_01.sh: -------------------------------------------------------------------------------- 1 | # 2 | # salloc 1 nodes exclusively, then run these tests. 3 | # Or put them in batch script 4 | # 5 | 6 | export -n KMP_AFFINITY 7 | export OMP_PROC_BIND=FALSE 8 | 9 | # #################### 10 | # Thread Funneled Runs 11 | # #################### 12 | 13 | # (Per Node) 16 MPI x 1 Threads - Thread Funneled 14 | export OMP_NUM_THREADS=1; 15 | srun -n16 --distribution=cyclic ./qs \ 16 | --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \ 17 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0016.t0001-ts.out 18 | 19 | # (Per Node) 8 MPI x 2 Threads - Thread Funneled 20 | export OMP_NUM_THREADS=2; 21 | srun -n8 --distribution=cyclic ./qs \ 22 | --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \ 23 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0008.t0002-ts.out 24 | 25 | # (Per Node) 4 MPI x 4 Threads - Thread Funneled 26 | export OMP_NUM_THREADS=4; 27 | srun -n4 --distribution=cyclic ./qs \ 28 | --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \ 29 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0004.t0004-ts.out 30 | 31 | # (Per Node) 2 MPI x 8 Threads - Thread Funneled 32 | export OMP_NUM_THREADS=8; 33 | srun -n2 --distribution=cyclic ./qs \ 34 | --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \ 35 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzalast.Node0001.n0002.t0008-ts.out 36 | 37 | # #################### 38 | # Thread Multiple Runs 39 | # For testing, does not show improvement on Xeon 40 | # #################### 41 | 42 | # (Per Node) 16 MPI x 1 Threads - Thread Multiple 43 | #export OMP_NUM_THREADS=1; 44 | #srun -n16 --distribution=cyclic ./qs --mpiThreadMultiple \ 45 | # --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=2000000 \ 46 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0016.t0001-tm.out 47 | 48 | # (Per Node) 8 MPI x 2 Threads - Thread Multiple 49 | #export OMP_NUM_THREADS=2; 50 | #srun -n8 --distribution=cyclic ./qs --mpiThreadMultiple \ 51 | # --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=2000000 \ 52 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0008.t0002-tm.out 53 | 54 | # (Per Node) 4 MPI x 4 Threads - Thread Multiple 55 | #export OMP_NUM_THREADS=4; 56 | #srun -n4 --distribution=cyclic ./qs --mpiThreadMultiple \ 57 | # --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=2000000 \ 58 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0004.t0004-tm.out 59 | 60 | # (Per Node) 2 MPI x 8 Threads - Thread Multiple 61 | #export OMP_NUM_THREADS=8; 62 | #srun -n2 --distribution=cyclic ./qs --mpiThreadMultiple \ 63 | # --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=2000000 \ 64 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzalast.Node0001.n0002.t0008-tm.out 65 | 66 | # 67 | # end of file 68 | # 69 | -------------------------------------------------------------------------------- /Examples/Homogeneous/quicksilver_slurm_rzgenie_01.sh: -------------------------------------------------------------------------------- 1 | # 2 | # salloc 1 nodes exclusively, then run these tests. 3 | # Or put them in batch script 4 | # 5 | 6 | export -n KMP_AFFINITY 7 | export OMP_PROC_BIND=FALSE 8 | 9 | # #################### 10 | # Thread Funneled Runs 11 | # #################### 12 | 13 | # 32 MPI x 1 Thread - Thread Funneled 14 | export OMP_NUM_THREADS=1 15 | srun -n 32 --distribution=cyclic --mpibind ./qs \ 16 | --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \ 17 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0032.t0001-ts.out 18 | 19 | # 16 MPI x 2 Thread - Thread Funneled 20 | export OMP_NUM_THREADS=2 21 | srun -n 16 --distribution=cyclic --mpibind ./qs \ 22 | --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \ 23 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0016.t0002-ts.out 24 | 25 | # 8 MPI x 4 Thread - Thread Funneled 26 | export OMP_NUM_THREADS=4 27 | srun -n 8 --distribution=cyclic ./qs \ 28 | --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \ 29 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0008.t0004-ts.out 30 | 31 | # 4 MPI x 8 Thread - Thread Funneled 32 | export OMP_NUM_THREADS=8 33 | srun -n 4 --distribution=cyclic ./qs \ 34 | --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \ 35 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0004.t0008-ts.out 36 | 37 | # 4 MPI x 16 Thread - Thread Funneled 38 | export OMP_NUM_THREADS=16 39 | srun -n 2 --distribution=cyclic ./qs \ 40 | --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=4000000 \ 41 | -i Input/homogeneousProblem_v5_ts.inp 2>&1 | tee qs.rzgenie.Node0001.n0002.t0016-ts.out 42 | 43 | # #################### 44 | # Thread Multiple Runs 45 | # For testing, does not show improvement on Xeon 46 | # #################### 47 | 48 | # 32 MPI x 1 Thread - Thread Multiple 49 | #export OMP_NUM_THREADS=1 50 | #srun -n 32 --distribution=cyclic ./qs --mpiThreadMultiple \ 51 | # --lx=400 --ly=400 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=4 --zDom=2 --nParticles=4000000 \ 52 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0032.t0001-tm.out 53 | 54 | # 16 MPI x 2 Thread - Thread Multiple 55 | #export OMP_NUM_THREADS=2 56 | #srun -n 16 --distribution=cyclic ./qs --mpiThreadMultiple \ 57 | # --lx=400 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=4 --yDom=2 --zDom=2 --nParticles=4000000 \ 58 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0016.t0002-tm.out 59 | 60 | # 8 MPI x 4 Thread - Thread Multiple 61 | #export OMP_NUM_THREADS=4 62 | #srun -n 8 --distribution=cyclic ./qs --mpiThreadMultiple \ 63 | # --lx=200 --ly=200 --lz=200 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=2 --nParticles=4000000 \ 64 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0008.t0004-tm.out 65 | 66 | # 4 MPI x 8 Thread - Thread Multiple 67 | #export OMP_NUM_THREADS=8 68 | #srun -n 4 --distribution=cyclic ./qs --mpiThreadMultiple \ 69 | # --lx=200 --ly=200 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=4000000 \ 70 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0004.t0008-tm.out 71 | 72 | # 4 MPI x 16 Thread - Thread Multiple 73 | #export OMP_NUM_THREADS=16 74 | #srun -n 2 --distribution=cyclic ./qs --mpiThreadMultiple \ 75 | # --lx=200 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=1 --zDom=1 --nParticles=4000000 \ 76 | # -i Input/homogeneousProblem_v5_tm.inp 2>&1 | tee qs.rzgenie.Node0001.n0002.t0016-tm.out 77 | 78 | # 79 | # end of file 80 | # 81 | -------------------------------------------------------------------------------- /Examples/Homogeneous/run.homogeneousProblem_v4.rzoz7.x: -------------------------------------------------------------------------------- 1 | # 2 | # 2016-Oct-06 Note by S. Dawson 3 | # 4 | # Note on running thread multiple vs thread single. 5 | # 6 | # Its a bit clunky as one has to set up a separate test deck for thread single vs thread multiple, AS WELL AS 7 | # specify the correct command line argument. 8 | # 9 | # This has to do with the desire to fire up MPI before processing the command line arguments, yet still 10 | # have the input deck reflect how the deck is run. 11 | # 12 | # Also, despite with the command line help says, one can not specify a flag to --mpiThreadMultiple. If one 13 | # says --mpiThreadMultiple=1 or --mpiThreadMultiple=0 the code complains, it is just --mpiThreadMultiple 14 | # to turn it on and the default is to be in thrad single mode 15 | # 16 | 17 | export -n KMP_CPUINFO_FILE 18 | export KMP_CPUINFO_FILE=/home/dawson/cpuinfo_sad; 19 | export I_MPI_PIN_DOMAIN=64:compact 20 | export KMP_AFFINITY="granularity=fine,scatter" 21 | export KMP_HW_SUBSET=1T 22 | export KMP_BLOCKTIME=0 23 | export OMP_NUM_THREADS=16 24 | #export OMP_PLACES=cores 25 | 26 | export MPICH_MAX_THREAD_SAFETY=multiple 27 | time mpirun -np 4 ./qs --mpiThreadMultiple --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_tm.inp | tee rzoz18.N01.n04.t016.tm.out 28 | export MPICH_MAX_THREAD_SAFETY=funneled 29 | time mpirun -np 4 ./qs --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_ts.inp | tee rzoz18.N01.n04.t016.ts.out 30 | 31 | export KMP_HW_SUBSET=2T; 32 | export OMP_NUM_THREADS=32; 33 | export MPICH_MAX_THREAD_SAFETY=multiple 34 | time mpirun -np 4 ./qs --mpiThreadMultiple --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_tm.inp | tee rzoz18.N01.n04.t032.tm.out 35 | export MPICH_MAX_THREAD_SAFETY=funneled 36 | time mpirun -np 4 ./qs --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_ts.inp | tee rzoz18.N01.n04.t032.ts.out 37 | 38 | export KMP_HW_SUBSET=4T; 39 | export OMP_NUM_THREADS=64; 40 | export MPICH_MAX_THREAD_SAFETY=multiple 41 | time mpirun -np 4 ./qs --mpiThreadMultiple --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_tm.inp | tee rzoz18.N01.n04.t064.tm.out 42 | export MPICH_MAX_THREAD_SAFETY=funneled 43 | time mpirun -np 4 ./qs --lx=100 --ly=100 --lz=100 --nx=20 --ny=20 --nz=20 --xDom=2 --yDom=2 --zDom=1 --nParticles=20000000 -i homogeneousProblem_v4_ts.inp | tee rzoz18.N01.n04.t064.ts.out 44 | 45 | 46 | -------------------------------------------------------------------------------- /Examples/NoCollisions/no.collisions.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: 5 | loadBalance: 1 6 | lx: 100 7 | ly: 100 8 | lz: 100 9 | nParticles: 1000000 10 | nSteps: 10 11 | nx: 10 12 | ny: 10 13 | nz: 10 14 | seed: 1029384756 15 | xDom: 0 16 | yDom: 0 17 | zDom: 0 18 | eMax: 1.000001 19 | eMin: 1.000000 20 | nGroups: 230 21 | 22 | Geometry: 23 | material: boxMaterial 24 | shape: brick 25 | xMax: 100 26 | xMin: 0 27 | yMax: 100 28 | yMin: 0 29 | zMax: 100 30 | zMin: 0 31 | 32 | Geometry: 33 | material: sourceMaterial 34 | shape: brick 35 | xMax: 10 36 | xMin: 0 37 | yMax: 10 38 | yMin: 0 39 | zMax: 10 40 | zMin: 0 41 | 42 | Material: 43 | name: boxMaterial 44 | nIsotopes: 10 45 | nReactions: 9 46 | sourceRate: 0 47 | totalCrossSection: 1e-80 48 | absorptionCrossSection: flat 49 | fissionCrossSection: flat 50 | scatteringCrossSection: flat 51 | absorptionCrossSectionRatio: 1 52 | fissionCrossSectionRatio: 0 53 | scatteringCrossSectionRatio: 1 54 | 55 | Material: 56 | name: sourceMaterial 57 | nIsotopes: 10 58 | nReactions: 9 59 | sourceRate: 1e+10 60 | totalCrossSection: 1e-80 61 | absorptionCrossSection: flat 62 | fissionCrossSection: flat 63 | scatteringCrossSection: flat 64 | absorptionCrossSectionRatio: 1 65 | fissionCrossSectionRatio: 1 66 | scatteringCrossSectionRatio: 1 67 | 68 | CrossSection: 69 | name: flat 70 | A: 0 71 | B: 0 72 | C: 0 73 | D: 0 74 | E: 1 75 | nuBar: 2.4 76 | 77 | 78 | -------------------------------------------------------------------------------- /Examples/NoFission/noFission.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | inputFile: streamingProblem.inp 5 | boundaryCondition: octant 6 | loadBalance: 1 7 | cycleTimers: 0 8 | debugThreads: 0 9 | lx: 100 10 | ly: 100 11 | lz: 100 12 | nParticles: 10000000 13 | nSteps: 10 14 | nx: 10 15 | ny: 10 16 | nz: 10 17 | seed: 1029384756 18 | xDom: 0 19 | yDom: 0 20 | zDom: 0 21 | eMax: 20 22 | eMin: 1e-9 23 | nGroups: 230 24 | 25 | Geometry: 26 | material: sourceMaterial 27 | shape: brick 28 | xMax: 100 29 | xMin: 0 30 | yMax: 100 31 | yMin: 0 32 | zMax: 100 33 | zMin: 0 34 | 35 | Material: 36 | name: sourceMaterial 37 | nIsotopes: 10 38 | nReactions: 9 39 | sourceRate: 1e+10 40 | totalCrossSection: 0.198 41 | absorptionCrossSection: flat 42 | fissionCrossSection: flat 43 | scatteringCrossSection: flat 44 | absorptionCrossSectionRatio: 0.494949495 45 | fissionCrossSectionRatio: 0 46 | scatteringCrossSectionRatio: 0.505050505 47 | 48 | CrossSection: 49 | name: flat 50 | A: 0 51 | B: 0 52 | C: 0 53 | D: 0 54 | E: 1 55 | nuBar: 2.4 56 | -------------------------------------------------------------------------------- /Examples/NonFlatXC/NonFlatXC.inp: -------------------------------------------------------------------------------- 1 | Simulation: 2 | dt: 1e-08 3 | fMax: 0.1 4 | boundaryCondition: reflect 5 | loadBalance: 0 6 | cycleTimers: 0 7 | debugThreads: 0 8 | lx: 100 9 | ly: 100 10 | lz: 100 11 | nParticles: 1000000 12 | batchSize: 0 13 | nBatches: 10 14 | nSteps: 10 15 | nx: 10 16 | ny: 10 17 | nz: 10 18 | seed: 1029384756 19 | xDom: 0 20 | yDom: 0 21 | zDom: 0 22 | eMax: 20 23 | eMin: 1e-08 24 | nGroups: 230 25 | lowWeightCutoff: 0.001 26 | bTally: 1 27 | fTally: 1 28 | cTally: 1 29 | coralBenchmark: 0 30 | 31 | Geometry: 32 | material: sourceMaterial 33 | shape: brick 34 | xMax: 100 35 | xMin: 0 36 | yMax: 100 37 | yMin: 0 38 | zMax: 100 39 | zMin: 0 40 | 41 | Material: 42 | name: sourceMaterial 43 | mass: 1000.0 44 | nIsotopes: 10 45 | nReactions: 9 46 | sourceRate: 1e+10 47 | totalCrossSection: 6 48 | absorptionCrossSection: absorb 49 | fissionCrossSection: fission 50 | scatteringCrossSection: scatter 51 | absorptionCrossSectionRatio: 6e-3 52 | fissionCrossSectionRatio: 1 53 | scatteringCrossSectionRatio: 5 54 | 55 | Material: 56 | name: flatMaterial 57 | nIsotopes: 20 58 | nReactions: 9 59 | sourceRate: 1e+10 60 | totalCrossSection: 1 61 | absorptionCrossSection: flat 62 | fissionCrossSection: flat 63 | scatteringCrossSection: flat 64 | absorptionCrossSectionRatio: 1 65 | fissionCrossSectionRatio: 1 66 | scatteringCrossSectionRatio: 1 67 | 68 | CrossSection: 69 | name: flat 70 | A: 0 71 | B: 0 72 | C: 0 73 | D: 0 74 | E: 1 75 | nuBar: 2.4 76 | 77 | CrossSection: 78 | name: absorb 79 | A: 0 80 | B: 0 81 | C: 0 82 | D: -0.8446 83 | D: -0.5243 84 | E: -2.22 85 | 86 | CrossSection: 87 | name: fission 88 | A: 0 89 | B: 0 90 | C: 0 91 | D: -0.342 92 | E: 0 93 | nuBar: 2.4 94 | 95 | CrossSection: 96 | name: scatter 97 | A: 0 98 | B: 0 99 | C: 0 100 | D: 0 101 | E: 0.7 102 | 103 | 104 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | LICENSE 2 | ======= 3 | 4 | Copyright (c) 2017, Lawrence Livermore National Security, LLC. 5 | 6 | Produced at the Lawrence Livermore National Laboratory 7 | 8 | Written by David Richards [richards12@llnl.gov], Ryan Bleile, 9 | Patrick Brantley, Shawn Dawson, Scott McKinley, Matt O'Brien 10 | 11 | LLNL-CODE-684037. 12 | 13 | All rights reserved. 14 | 15 | This file is part of Quicksilver. For details, see 16 | http://www.github.com/LLNL/Quicksilver. Please also read 17 | the Additional BSD Notice below. 18 | 19 | Redistribution and use in source and binary forms, with or 20 | without modification, are permitted provided that the following 21 | conditions are met: 22 | 23 | * Redistributions of source code must retain the above copyright 24 | notice, this list of conditions and the disclaimer below. 25 | 26 | * Redistributions in binary form must reproduce the above copyright 27 | notice, this list of conditions and the disclaimer (as noted below) 28 | in the documentation and/or other materials provided with the 29 | distribution. 30 | 31 | * Neither the name of the LLNS/LLNL nor the names of its contributors 32 | may be used to endorse or promote products derived from this 33 | software without specific prior written permission. 34 | 35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 36 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 37 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 38 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 39 | DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL 40 | SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE 41 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 42 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 43 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 44 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 45 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 47 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 48 | THE POSSIBILITY OF SUCH DAMAGE. 49 | 50 | 51 | Additional BSD Notice 52 | --------------------- 53 | 54 | 1. This notice is required to be provided under our contract with the 55 | U.S. Department of Energy (DOE). This work was produced at Lawrence 56 | Livermore National Laboratory under Contract No. DE-AC52-07NA27344 57 | with the DOE. 58 | 59 | 2. Neither the United States Government nor Lawrence Livermore 60 | National Security, LLC nor any of their employees, makes any 61 | warranty, express or implied, or assumes any liability or 62 | responsibility for the accuracy, completeness, or usefulness of any 63 | information, apparatus, product, or process disclosed, or 64 | represents that its use would not infringe privately-owned rights. 65 | 66 | 3. Also, reference herein to any specific commercial products, 67 | process, or services by trade name, trademark, manufacturer or 68 | otherwise does not necessarily constitute or imply its endorsement, 69 | recommendation, or favoring by the United States Government or 70 | Lawrence Livermore National Security, LLC. The views and opinions 71 | of authors expressed herein do not necessarily state or reflect 72 | those of the United States Government or Lawrence Livermore 73 | National Security, LLC, and shall not be used for advertising or 74 | product endorsement purposes. 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Quicksilver 2 | =========== 3 | 4 | Introduction 5 | ------------ 6 | 7 | Quicksilver is a proxy application that represents some elements of 8 | the Mercury workload by solving a simplified dynamic monte carlo 9 | particle transport problem. Quicksilver attempts to replicate the 10 | memory access patterns, communication patterns, and the branching or 11 | divergence of Mercury for problems using multigroup cross sections. 12 | OpenMP and MPI are used for parallelization. A GPU version is 13 | available. Unified memory is assumed. 14 | 15 | Performance of Quicksilver is likely to be dominated by latency bound 16 | table look-ups, a highly branchy/divergent code path, and poor 17 | vectorization potential. 18 | 19 | For more information, visit the 20 | [LLNL co-design pages.](https://codesign.llnl.gov/quicksilver.php) 21 | 22 | 23 | Building Quicksilver 24 | -------------------- 25 | 26 | Instructions to build Quicksilver can be found in the 27 | Makefile. Quicksilver is a relatively easy to build code with no 28 | external dependencies (except MPI and OpenMP). You should be able to 29 | build Quicksilver on nearly any system by customizing the values of 30 | only four variables in the Makefile: 31 | 32 | * CXX The name of the C++ compiler (with path if necessary) 33 | Quicksilver uses C++11 features, so a C++11 compliant compiler 34 | should be used. 35 | 36 | * CXXFLAGS Command line switches to pass to the C++ compiler when 37 | compiling objects *and* when linking the executable. 38 | 39 | * CPPFLAGS Command line switches to pass to the compiler *only* when 40 | compiling objects 41 | 42 | * LDFLAGS Command line switches to pass to the compiler *only* 43 | when linking the executable 44 | 45 | Sample definitions for a number of common systems are provided. 46 | 47 | Quicksilver recognizes a number of pre-processor macros that enable or 48 | disable various code features such as MPI, OpenMP, etc. These are 49 | described in the Makefile. 50 | 51 | 52 | Running Quicksilver 53 | ------------------- 54 | 55 | Quicksilver’s behavior is controlled by a combination of command line 56 | options and an input file. All of the parameters that can be set on 57 | the command line can also be set in the input file. The input file 58 | values will override the command line. Run `$ qs –h` to see 59 | documentation on the available command line switches. Documentation 60 | of the input file parameters is in preparation. 61 | 62 | Quicksilver also has the property that the output of every run is a 63 | valid input file. Hence you can repeat any run for which you have the 64 | output file by using that output as an input file. 65 | 66 | 67 | License and Distribution Information 68 | ------------------------------------ 69 | 70 | Quicksilver is available [on github](https://github.com/LLNL/Quicksilver) 71 | 72 | 73 | Quicksilver is open source software with a BSD license. See 74 | [LICENSE.md](https://github.com/LLNL/Quicksilver/blob/master/LICENSE.md) 75 | 76 | This work was performed under the auspices of the U.S. Department of 77 | Energy by Lawrence Livermore National Laboratory under Contract 78 | DE-AC52-07NA27344. 79 | 80 | LLNL-CODE-684037 81 | -------------------------------------------------------------------------------- /src/BulkStorage.hh: -------------------------------------------------------------------------------- 1 | #ifndef BULK_STORAGE_HH 2 | #define BULK_STORAGE_HH 3 | 4 | #include "MemoryControl.hh" 5 | 6 | template 7 | class BulkStorage 8 | { 9 | public: 10 | BulkStorage() 11 | : _bulkStorage(0), 12 | _refCount(0), 13 | _size(0), 14 | _capacity(0), 15 | _memPolicy(MemoryControl::AllocationPolicy::UNDEFINED_POLICY) 16 | { 17 | _refCount = new int; 18 | *_refCount = 1; 19 | } 20 | 21 | BulkStorage(const BulkStorage& aa) 22 | : _bulkStorage(aa._bulkStorage), _refCount(aa._refCount), _size(aa._size), _capacity(aa._capacity), _memPolicy(aa._memPolicy) 23 | { 24 | ++(*_refCount); 25 | } 26 | 27 | ~BulkStorage() 28 | { 29 | --(*_refCount); 30 | if (*_refCount > 0) 31 | return; 32 | 33 | delete _refCount; 34 | 35 | // Catch the case that the storage was never allocated. This 36 | // happens when setCapacity is never called on this instance. 37 | if (_bulkStorage != 0) 38 | MemoryControl::deallocate(_bulkStorage, _capacity, _memPolicy); 39 | } 40 | 41 | /// Needed for copy-swap idiom 42 | void swap(BulkStorage& other) 43 | { 44 | std::swap(_bulkStorage, other._bulkStorage); 45 | std::swap(_refCount, other._refCount); 46 | std::swap(_size, other._size); 47 | std::swap(_capacity, other._capacity); 48 | std::swap(_memPolicy, other._memPolicy); 49 | } 50 | 51 | /// Implement assignment using copy-swap idiom 52 | BulkStorage& operator=(const BulkStorage& aa) 53 | { 54 | if (&aa != this) 55 | { 56 | BulkStorage temp(aa); 57 | this->swap(temp); 58 | } 59 | return *this; 60 | } 61 | 62 | void setCapacity(int capacity, MemoryControl::AllocationPolicy policy) 63 | { 64 | qs_assert(_bulkStorage == 0); 65 | _bulkStorage = MemoryControl::allocate(capacity, policy); 66 | _capacity = capacity; 67 | _memPolicy = policy; 68 | } 69 | 70 | T* getBlock(int nItems) 71 | { 72 | T* blockStart = _bulkStorage + _size; 73 | _size += nItems; 74 | qs_assert(_size <= _capacity); 75 | return blockStart; 76 | } 77 | 78 | 79 | private: 80 | 81 | // This class doesn't have well defined copy semantics. However, 82 | // just disabling copy operations breaks the build since we haven't 83 | // been consistent about dealing with copy semantics in classes like 84 | // MC_Mesh_Domain. 85 | 86 | 87 | 88 | T* _bulkStorage; 89 | int* _refCount; 90 | int _size; 91 | int _capacity; 92 | MemoryControl::AllocationPolicy _memPolicy; 93 | 94 | }; 95 | 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /src/CollisionEvent.hh: -------------------------------------------------------------------------------- 1 | #ifndef COLLISION_EVENT_HH 2 | #define COLLISION_EVENT_HH 3 | 4 | #include "DeclareMacro.hh" 5 | 6 | class MonteCarlo; 7 | class MC_Particle; 8 | 9 | HOST_DEVICE 10 | bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index ); 11 | HOST_DEVICE_END 12 | 13 | 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /src/CommObject.hh: -------------------------------------------------------------------------------- 1 | #ifndef COMM_OBJECT_HH 2 | #define COMM_OBJECT_HH 3 | 4 | #include 5 | 6 | #include 7 | #include "MeshPartition.hh" 8 | #include "Long64.hh" 9 | #include "FacetPair.hh" 10 | 11 | class CommObject 12 | { 13 | public: 14 | virtual ~CommObject(){}; 15 | virtual void exchange(MeshPartition::MapType& cellInfo, 16 | const std::vector& nbrDomain, 17 | std::vector > sendSet, 18 | std::vector > recvSet) = 0; 19 | virtual void exchange(std::vector sendBuf, 20 | std::vector& recvBuf) = 0; 21 | 22 | }; 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/CoralBenchmark.hh: -------------------------------------------------------------------------------- 1 | #ifndef CORALBENCHMARK_HH 2 | #define CORALBENCHMARK_HH 3 | 4 | class MonteCarlo; 5 | class Parameters; 6 | 7 | void coralBenchmarkCorrectness( MonteCarlo* monteCarlo, Parameters ¶ms ); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/CycleTracking.cc: -------------------------------------------------------------------------------- 1 | #include "CycleTracking.hh" 2 | #include "MonteCarlo.hh" 3 | #include "ParticleVaultContainer.hh" 4 | #include "ParticleVault.hh" 5 | #include "MC_Segment_Outcome.hh" 6 | #include "CollisionEvent.hh" 7 | #include "MC_Facet_Crossing_Event.hh" 8 | #include "MCT.hh" 9 | #include "DeclareMacro.hh" 10 | #include "QS_atomics.hh" 11 | #include "macros.hh" 12 | #include "qs_assert.hh" 13 | 14 | HOST_DEVICE 15 | void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault ) 16 | { 17 | MC_Particle mc_particle; 18 | 19 | // Copy a single particle from the particle vault into mc_particle 20 | MC_Load_Particle(monteCarlo, mc_particle, processingVault, particle_index); 21 | 22 | // set the particle.task to the index of the processed vault the particle will census into. 23 | mc_particle.task = 0;//processed_vault; 24 | 25 | // loop over this particle until we cannot do anything more with it on this processor 26 | CycleTrackingFunction( monteCarlo, mc_particle, particle_index, processingVault, processedVault ); 27 | 28 | //Make sure this particle is marked as completed 29 | processingVault->invalidateParticle( particle_index ); 30 | } 31 | HOST_DEVICE_END 32 | 33 | HOST_DEVICE 34 | void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault) 35 | { 36 | bool keepTrackingThisParticle = false; 37 | unsigned int tally_index = (particle_index) % monteCarlo->_tallies->GetNumBalanceReplications(); 38 | unsigned int flux_tally_index = (particle_index) % monteCarlo->_tallies->GetNumFluxReplications(); 39 | unsigned int cell_tally_index = (particle_index) % monteCarlo->_tallies->GetNumCellTallyReplications(); 40 | do 41 | { 42 | // Determine the outcome of a particle at the end of this segment such as: 43 | // 44 | // (0) Undergo a collision within the current cell, 45 | // (1) Cross a facet of the current cell, 46 | // (2) Reach the end of the time step and enter census, 47 | // 48 | #ifdef EXPONENTIAL_TALLY 49 | monteCarlo->_tallies->TallyCellValue( exp(rngSample(&mc_particle.random_number_seed)) , mc_particle.domain, cell_tally_index, mc_particle.cell); 50 | #endif 51 | MC_Segment_Outcome_type::Enum segment_outcome = MC_Segment_Outcome(monteCarlo, mc_particle, flux_tally_index); 52 | 53 | QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._numSegments); 54 | 55 | mc_particle.num_segments += 1.; /* Track the number of segments this particle has 56 | undergone this cycle on all processes. */ 57 | switch (segment_outcome) { 58 | case MC_Segment_Outcome_type::Collision: 59 | { 60 | // The particle undergoes a collision event producing: 61 | // (0) Other-than-one same-species secondary particle, or 62 | // (1) Exactly one same-species secondary particle. 63 | if (CollisionEvent(monteCarlo, mc_particle, tally_index ) == MC_Collision_Event_Return::Continue_Tracking) 64 | { 65 | keepTrackingThisParticle = true; 66 | } 67 | else 68 | { 69 | keepTrackingThisParticle = false; 70 | } 71 | } 72 | break; 73 | 74 | case MC_Segment_Outcome_type::Facet_Crossing: 75 | { 76 | // The particle has reached a cell facet. 77 | MC_Tally_Event::Enum facet_crossing_type = MC_Facet_Crossing_Event(mc_particle, monteCarlo, particle_index, processingVault); 78 | 79 | if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Transit_Exit) 80 | { 81 | keepTrackingThisParticle = true; // Transit Event 82 | } 83 | else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Escape) 84 | { 85 | QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._escape); 86 | mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape; 87 | mc_particle.species = -1; 88 | keepTrackingThisParticle = false; 89 | } 90 | else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Reflection) 91 | { 92 | MCT_Reflect_Particle(monteCarlo, mc_particle); 93 | keepTrackingThisParticle = true; 94 | } 95 | else 96 | { 97 | // Enters an adjacent cell in an off-processor domain. 98 | //mc_particle.species = -1; 99 | keepTrackingThisParticle = false; 100 | } 101 | } 102 | break; 103 | 104 | case MC_Segment_Outcome_type::Census: 105 | { 106 | // The particle has reached the end of the time step. 107 | processedVault->pushParticle(mc_particle); 108 | QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._census); 109 | keepTrackingThisParticle = false; 110 | break; 111 | } 112 | 113 | default: 114 | qs_assert(false); 115 | break; // should this be an error 116 | } 117 | 118 | } while ( keepTrackingThisParticle ); 119 | } 120 | HOST_DEVICE_END 121 | 122 | -------------------------------------------------------------------------------- /src/CycleTracking.hh: -------------------------------------------------------------------------------- 1 | #include "DeclareMacro.hh" 2 | 3 | // Forward Declaration 4 | class ParticleVault; 5 | class MonteCarlo; 6 | class MC_Particle; 7 | 8 | HOST_DEVICE 9 | void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault ); 10 | HOST_DEVICE_END 11 | 12 | HOST_DEVICE 13 | void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault); 14 | HOST_DEVICE_END 15 | -------------------------------------------------------------------------------- /src/DeclareMacro.hh: -------------------------------------------------------------------------------- 1 | #ifndef DECLAREMACRO_HH 2 | #define DECLAREMACRO_HH 3 | 4 | #if defined HAVE_CUDA || defined HAVE_HIP 5 | #define HOST_DEVICE __host__ __device__ 6 | #define HOST_DEVICE_CUDA __host__ __device__ 7 | #define HOST_DEVICE_CLASS 8 | #define HOST_DEVICE_END 9 | #define DEVICE __device__ 10 | #define DEVICE_END 11 | //#define HOST __host__ 12 | #define HOST_END 13 | #define GLOBAL __global__ 14 | #elif HAVE_OPENMP_TARGET 15 | #define HOST_DEVICE _Pragma( "omp declare target" ) 16 | #define HOST_DEVICE_CUDA 17 | #define HOST_DEVICE_CLASS _Pragma( "omp declare target" ) 18 | #define HOST_DEVICE_END _Pragma("omp end declare target") 19 | //#define HOST_DEVICE #pragma omp declare target 20 | //#define HOST_DEVICE_END #pragma omp end declare target 21 | //#define DEVICE #pragma omp declare target 22 | //#define DEVICE_END #pragma omp end declare target 23 | //#define HOST 24 | #define HOST_END 25 | #define GLOBAL 26 | #else 27 | #define HOST_DEVICE 28 | #define HOST_DEVICE_CUDA 29 | #define HOST_DEVICE_CLASS 30 | #define HOST_DEVICE_END 31 | #define DEVICE 32 | #define DEVICE_END 33 | //#define HOST 34 | #define HOST_END 35 | #define GLOBAL 36 | #endif 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/DecompositionObject.cc: -------------------------------------------------------------------------------- 1 | #include "DecompositionObject.hh" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "qs_assert.hh" 7 | 8 | using std::vector; 9 | using std::find; 10 | using std::swap; 11 | using std::pair; 12 | using std::set; 13 | using std::make_pair; 14 | namespace 15 | { 16 | void fisherYates(vector& vv) 17 | { 18 | int nItems = vv.size(); 19 | for (unsigned ii=0; ii localGid; 68 | 69 | for (unsigned jGid=0; jGid > tmp; 90 | for (unsigned ii=0; ii 5 | 6 | class DecompositionObject 7 | { 8 | public: 9 | DecompositionObject(int myRank, int nRanks, int nDomainsPerRank, int mode); 10 | 11 | int getRank(int domainGid) const {return _rank[domainGid];} 12 | int getIndex(int domainGid) const {return _index[domainGid];} 13 | const std::vector& getAssignedDomainGids() const {return _assignedGids;} 14 | 15 | private: 16 | std::vector _assignedGids; 17 | std::vector _rank; // rank for given gid 18 | std::vector _index; // index for given gid 19 | }; 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/DirectionCosine.cc: -------------------------------------------------------------------------------- 1 | #include "DirectionCosine.hh" 2 | #include "MC_RNG_State.hh" 3 | #include "PhysicalConstants.hh" 4 | 5 | void DirectionCosine::Sample_Isotropic(uint64_t *seed) 6 | { 7 | this->gamma = 1.0 - 2.0*rngSample(seed); 8 | double sine_gamma = sqrt((1.0 - (gamma*gamma))); 9 | double phi = PhysicalConstants::_pi*(2.0*rngSample(seed) - 1.0); 10 | 11 | this->alpha = sine_gamma * cos(phi); 12 | this->beta = sine_gamma * sin(phi); 13 | } 14 | -------------------------------------------------------------------------------- /src/EnergySpectrum.cc: -------------------------------------------------------------------------------- 1 | #include "EnergySpectrum.hh" 2 | #include "MonteCarlo.hh" 3 | #include "ParticleVault.hh" 4 | #include "ParticleVaultContainer.hh" 5 | #include "utilsMpi.hh" 6 | #include "MC_Processor_Info.hh" 7 | #include "Parameters.hh" 8 | #include 9 | 10 | using std::string; 11 | 12 | void EnergySpectrum::UpdateSpectrum(MonteCarlo* monteCarlo) 13 | { 14 | if( _fileName == "" ) return; 15 | 16 | for( uint64_t ii = 0; ii < monteCarlo->_particleVaultContainer->processingSize(); ii++) 17 | { 18 | ParticleVault* processing = monteCarlo->_particleVaultContainer->getTaskProcessingVault( ii ); 19 | for( uint64_t jj = 0; jj < processing->size(); jj++ ) 20 | { 21 | MC_Particle mc_particle; 22 | MC_Load_Particle(monteCarlo, mc_particle, processing, jj); 23 | _censusEnergySpectrum[mc_particle.energy_group]++; 24 | } 25 | } 26 | for( uint64_t ii = 0; ii < monteCarlo->_particleVaultContainer->processedSize(); ii++) 27 | { 28 | ParticleVault* processed = monteCarlo->_particleVaultContainer->getTaskProcessedVault( ii ); 29 | for( uint64_t jj = 0; jj < processed->size(); jj++ ) 30 | { 31 | MC_Particle mc_particle; 32 | MC_Load_Particle(monteCarlo, mc_particle, processed, jj); 33 | _censusEnergySpectrum[mc_particle.energy_group]++; 34 | } 35 | } 36 | } 37 | 38 | void EnergySpectrum::PrintSpectrum(MonteCarlo* monteCarlo) 39 | { 40 | if( _fileName == "" ) return; 41 | 42 | const int count = monteCarlo->_nuclearData->_energies.size(); 43 | uint64_t *sumHist = new uint64_t[ count ](); 44 | 45 | mpiAllreduce( _censusEnergySpectrum.data(), sumHist, count, MPI_INT64_T, MPI_SUM, monteCarlo->processor_info->comm_mc_world ); 46 | 47 | if( monteCarlo->processor_info->rank == 0 ) 48 | { 49 | _fileName += ".dat"; 50 | FILE* spectrumFile; 51 | spectrumFile = fopen( _fileName.c_str(), "w" ); 52 | 53 | for( int ii = 0; ii < count; ii++ ) 54 | { 55 | fprintf( spectrumFile, "%d\t%g\t%" PRIu64 "\n", ii, monteCarlo->_nuclearData->_energies[ii], sumHist[ii] ); 56 | } 57 | 58 | fclose( spectrumFile ); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/EnergySpectrum.hh: -------------------------------------------------------------------------------- 1 | #ifndef ENERGYSPECTRUM_HH 2 | #define ENERGYSPECTRUM_HH 3 | #include 4 | #include 5 | 6 | class MonteCarlo; 7 | 8 | class EnergySpectrum 9 | { 10 | public: 11 | EnergySpectrum(std::string name, uint64_t size) : _fileName(name), _censusEnergySpectrum(size,0) {}; 12 | void UpdateSpectrum(MonteCarlo* monteCarlo); 13 | void PrintSpectrum(MonteCarlo* monteCarlo); 14 | 15 | private: 16 | std::string _fileName; 17 | std::vector _censusEnergySpectrum; 18 | }; 19 | 20 | #endif 21 | 22 | -------------------------------------------------------------------------------- /src/FacetPair.hh: -------------------------------------------------------------------------------- 1 | #ifndef FACET_PAIR_HH 2 | #define FACET_PAIR_HH 3 | 4 | #include "MC_Location.hh" 5 | 6 | class FacetPair 7 | { 8 | public: 9 | FacetPair(){}; 10 | FacetPair(int domainGid1, const MC_Location& location1, 11 | int domainGid2, const MC_Location& location2) 12 | : _domainGid1(domainGid1), 13 | _domainIndex1(location1.domain), 14 | _cellIndex1(location1.cell), 15 | _facetIndex1(location1.facet), 16 | _domainGid2(domainGid2), 17 | _domainIndex2(location2.domain), 18 | _cellIndex2(location2.cell), 19 | _facetIndex2(location2.facet) 20 | { 21 | } 22 | 23 | int _domainGid1; 24 | int _domainIndex1; 25 | int _facetIndex1; 26 | int _cellIndex1; 27 | int _domainGid2; 28 | int _domainIndex2; 29 | int _facetIndex2; 30 | int _cellIndex2; 31 | }; 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/GlobalFccGrid.cc: -------------------------------------------------------------------------------- 1 | #include "GlobalFccGrid.hh" 2 | #include 3 | #include 4 | #include "MC_Vector.hh" 5 | #include "Tuple.hh" 6 | 7 | using std::vector; 8 | using std::min; 9 | using std::max; 10 | 11 | namespace 12 | { 13 | const vector& getFaceTupleOffset(); 14 | } 15 | 16 | 17 | GlobalFccGrid::GlobalFccGrid(int nx, int ny, int nz, 18 | double lx, double ly, double lz) 19 | : _nx(nx), _ny(ny), _nz(nz), 20 | _lx(lx), _ly(ly), _lz(lz), 21 | _cellTupleToIndex(nx, ny, nz), 22 | _cellIndexToTuple(nx, ny, nz), 23 | _nodeTupleToIndex(nx+1, ny+1, nz+1, 4), 24 | _nodeIndexToTuple(nx+1, ny+1, nz+1, 4) 25 | { 26 | _dx = _lx/_nx; 27 | _dy = _ly/_ny; 28 | _dz = _lz/_nz; 29 | } 30 | 31 | Long64 GlobalFccGrid::whichCell(const MC_Vector& r) const 32 | { 33 | int ix = r.x/_dx; 34 | int iy = r.y/_dy; 35 | int iz = r.z/_dz; 36 | return _cellTupleToIndex(ix, iy, iz); 37 | } 38 | 39 | 40 | MC_Vector GlobalFccGrid::cellCenter(Long64 iCell) const 41 | { 42 | Tuple tt = _cellIndexToTuple(iCell); 43 | MC_Vector r = nodeCoord(Tuple4(tt.x(), tt.y(), tt.z(), 0) ); 44 | r += MC_Vector(_dx/2., _dy/2., _dz/2.); 45 | return r; 46 | } 47 | 48 | const vector& GlobalFccGrid::cornerTupleOffsets() const 49 | { 50 | static vector offset; 51 | if (offset.size() == 0) 52 | { 53 | offset.reserve(14); 54 | offset.push_back(Tuple4(0, 0, 0, 0)); // 0 55 | offset.push_back(Tuple4(1, 0, 0, 0)); // 1 56 | offset.push_back(Tuple4(0, 1, 0, 0)); // 2 57 | offset.push_back(Tuple4(1, 1, 0, 0)); // 3 58 | offset.push_back(Tuple4(0, 0, 1, 0)); // 4 59 | offset.push_back(Tuple4(1, 0, 1, 0)); // 5 60 | offset.push_back(Tuple4(0, 1, 1, 0)); // 6 61 | offset.push_back(Tuple4(1, 1, 1, 0)); // 7 62 | offset.push_back(Tuple4(1, 0, 0, 1)); // 8 63 | offset.push_back(Tuple4(0, 0, 0, 1)); // 9 64 | offset.push_back(Tuple4(0, 1, 0, 2)); // 10 65 | offset.push_back(Tuple4(0, 0, 0, 2)); // 11 66 | offset.push_back(Tuple4(0, 0, 1, 3)); // 12 67 | offset.push_back(Tuple4(0, 0, 0, 3)); // 13 68 | } 69 | return offset; 70 | } 71 | 72 | void GlobalFccGrid::getNodeGids(Long64 cellGid, vector& nodeGid) const 73 | { 74 | if( nodeGid.size() == 0 ) 75 | { 76 | nodeGid.resize(14); 77 | } 78 | 79 | Tuple tt = _cellIndexToTuple(cellGid); 80 | Tuple4 baseNodeTuple = Tuple4(tt.x(), tt.y(), tt.z(), 0); 81 | const vector& cornerTupleOffset = cornerTupleOffsets(); 82 | for (unsigned ii=0; ii<14; ++ii) 83 | nodeGid[ii] = _nodeTupleToIndex(baseNodeTuple + cornerTupleOffset[ii]); 84 | } 85 | 86 | // for faces on the outer surface of the global grid, the returned cell 87 | // gid will be the same as the input cellGid 88 | void GlobalFccGrid::getFaceNbrGids(Long64 cellGid, vector& nbrCellGid) const 89 | { 90 | if( nbrCellGid.size() == 0 ) 91 | { 92 | nbrCellGid.resize(6); 93 | } 94 | 95 | Tuple cellTuple = _cellIndexToTuple(cellGid); 96 | const vector& faceTupleOffset = getFaceTupleOffset(); 97 | 98 | for (unsigned ii=0; ii<6; ++ii) 99 | { 100 | Tuple faceNbr = cellTuple + faceTupleOffset[ii]; 101 | snapTuple(faceNbr); 102 | nbrCellGid[ii] = _cellTupleToIndex(faceNbr); 103 | } 104 | } 105 | 106 | 107 | MC_Vector GlobalFccGrid::nodeCoord(Long64 index) const 108 | { 109 | return nodeCoord(_nodeIndexToTuple(index)); 110 | } 111 | 112 | MC_Vector GlobalFccGrid::nodeCoord(const Tuple4& tt) const 113 | { 114 | vector basisOffset; 115 | basisOffset.reserve(4); 116 | if (basisOffset.size() == 0) 117 | { 118 | basisOffset.push_back(MC_Vector(0., 0., 0. )); 119 | basisOffset.push_back(MC_Vector(0., _dy/2.0, _dz/2.0)); 120 | basisOffset.push_back(MC_Vector(_dx/2.0, 0., _dz/2.0)); 121 | basisOffset.push_back(MC_Vector(_dx/2.0, _dy/2.0, 0. )); 122 | } 123 | 124 | double rx = tt.x()*_dx; 125 | double ry = tt.y()*_dy; 126 | double rz = tt.z()*_dz; 127 | 128 | MC_Vector rr = MC_Vector(rx, ry, rz) + basisOffset[tt.b()]; 129 | 130 | return rr; 131 | } 132 | 133 | void GlobalFccGrid::snapTuple(Tuple& tt) const 134 | { 135 | tt.x() = min(max(0, tt.x()), _nx-1); 136 | tt.y() = min(max(0, tt.y()), _ny-1); 137 | tt.z() = min(max(0, tt.z()), _nz-1); 138 | } 139 | 140 | namespace 141 | { 142 | const vector& getFaceTupleOffset() 143 | { 144 | static vector faceTupleOffset; 145 | 146 | if (faceTupleOffset.size() == 0) 147 | { 148 | faceTupleOffset.reserve(6); 149 | faceTupleOffset.push_back( Tuple( 1, 0, 0) ); 150 | faceTupleOffset.push_back( Tuple(-1, 0, 0) ); 151 | faceTupleOffset.push_back( Tuple( 0, 1, 0) ); 152 | faceTupleOffset.push_back( Tuple( 0, -1, 0) ); 153 | faceTupleOffset.push_back( Tuple( 0, 0, 1) ); 154 | faceTupleOffset.push_back( Tuple( 0, 0, -1) ); 155 | } 156 | 157 | return faceTupleOffset; 158 | } 159 | } 160 | 161 | -------------------------------------------------------------------------------- /src/GlobalFccGrid.hh: -------------------------------------------------------------------------------- 1 | #ifndef GLOBAL_FCC_GRID_HH 2 | #define GLOBAL_FCC_GRID_HH 3 | 4 | #include 5 | #include "TupleToIndex.hh" 6 | #include "IndexToTuple.hh" 7 | #include "Tuple4ToIndex.hh" 8 | #include "IndexToTuple4.hh" 9 | 10 | 11 | class MC_Vector; 12 | 13 | class GlobalFccGrid 14 | { 15 | public: 16 | GlobalFccGrid(int nx, int ny, int nz, 17 | double lx, double ly, double lz); 18 | 19 | double lx() const {return _lx;} 20 | double ly() const {return _ly;} 21 | double lz() const {return _lz;} 22 | double nx() const {return _nx;} 23 | double ny() const {return _ny;} 24 | double nz() const {return _nz;} 25 | 26 | Long64 whichCell(const MC_Vector& r) const; 27 | 28 | MC_Vector cellCenter(Long64 iCell) const; 29 | Tuple cellIndexToTuple(Long64 iCell) const {return _cellIndexToTuple(iCell);} 30 | Long64 cellTupleToIndex(const Tuple& tt) const {return _cellTupleToIndex(tt);} 31 | 32 | Long64 nodeIndex(const Tuple4& tt) const {return _nodeTupleToIndex(tt);} 33 | 34 | const std::vector& cornerTupleOffsets() const; 35 | void getNodeGids(Long64 cellGid, std::vector& nodeGid) const; 36 | void getFaceNbrGids(Long64 cellGid, std::vector& nbrCellGid) const; 37 | 38 | MC_Vector nodeCoord(Long64 index) const; 39 | MC_Vector nodeCoord(const Tuple4& tt) const; 40 | 41 | // We should get rid of snap tuple and provide a way to get the 42 | // indices of face nbrs. 43 | void snapTuple(Tuple& tt) const; 44 | 45 | private: 46 | int _nx, _ny, _nz; // number of cells (i.e., elements) 47 | double _lx, _ly, _lz; // size of problem space (in cm) 48 | double _dx, _dy, _dz; // size of a mesh cell (in cm) 49 | 50 | TupleToIndex _cellTupleToIndex; 51 | IndexToTuple _cellIndexToTuple; 52 | Tuple4ToIndex _nodeTupleToIndex; 53 | IndexToTuple4 _nodeIndexToTuple; 54 | }; 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/Globals.hh: -------------------------------------------------------------------------------- 1 | #ifndef GLOBALS_HH 2 | #define GLOBALS_HH 3 | 4 | class MonteCarlo; 5 | extern MonteCarlo* mcco; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/GridAssignmentObject.hh: -------------------------------------------------------------------------------- 1 | #ifndef GRID_ASSIGNMENT_OBJECT_HH 2 | #define GRID_ASSIGNMENT_OBJECT_HH 3 | 4 | #include 5 | #include 6 | #include "MC_Vector.hh" 7 | #include "Tuple.hh" 8 | 9 | /** The GRID_ASSIGNMENT_OBJECT computes the closest center to a given 10 | * particle coordinate using a grid/flood approach. The intent of this 11 | * code is to provide an initial assignment method that scales only as 12 | * the number of particles to assign. (I.e., it is independent of the 13 | * number of centers). 14 | * 15 | * To vastly simplify the code we completely ignore periodic boundary 16 | * conditions. We can get away with this because the initial assignment 17 | * doesn't have to be perfect, it only needs to be close. If we can get 18 | * a particle into a domain that is close to its correct Voronoi domain 19 | * then the regular assignment will do the right thing. */ 20 | 21 | class GridAssignmentObject 22 | { 23 | public: 24 | 25 | GridAssignmentObject(const std::vector& centers); 26 | 27 | int nearestCenter(const MC_Vector rr); 28 | 29 | private: 30 | 31 | struct GridCell 32 | { 33 | GridCell() : _burned(false) {}; 34 | 35 | bool _burned; 36 | std::vector _myCenters; 37 | }; 38 | 39 | Tuple whichCellTuple(const MC_Vector r) const; 40 | int whichCell(const MC_Vector r) const; 41 | int tupleToIndex(Tuple tuple) const; 42 | Tuple indexToTuple(int index) const; 43 | double minDist2(const MC_Vector r, int iCell) const; 44 | void addTupleToQueue(Tuple iTuple); 45 | void addNbrsToQueue(int iCell); 46 | 47 | int _nx, _ny, _nz; 48 | double _dx, _dy, _dz; 49 | MC_Vector _corner; 50 | const std::vector& _centers; 51 | 52 | std::vector _grid; 53 | std::queue _floodQueue; 54 | std::queue _wetList; 55 | }; 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /src/IndexToTuple.hh: -------------------------------------------------------------------------------- 1 | #ifndef INDEX_TO_TUPLE_HH 2 | #define INDEX_TO_TUPLE_HH 3 | 4 | #include "Tuple.hh" 5 | #include "Long64.hh" 6 | 7 | class IndexToTuple 8 | { 9 | public: 10 | IndexToTuple(int nx, int ny, int nz) 11 | : nx_(nx), ny_(ny), nz_(nz) 12 | {}; 13 | 14 | Tuple operator()(Long64 index) const 15 | { 16 | int x = index % nx_; 17 | index /= nx_; 18 | int y = index % ny_; 19 | int z = index / ny_; 20 | 21 | return Tuple(x, y, z); 22 | } 23 | 24 | private: 25 | int nx_; 26 | int ny_; 27 | int nz_; 28 | }; 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/IndexToTuple4.hh: -------------------------------------------------------------------------------- 1 | #ifndef INDEX_TO_TUPLE4_HH 2 | #define INDEX_TO_TUPLE4_HH 3 | 4 | #include "Tuple4.hh" 5 | #include "Long64.hh" 6 | 7 | class IndexToTuple4 8 | { 9 | public: 10 | IndexToTuple4(int nx, int ny, int nz, int nb) 11 | : nx_(nx), ny_(ny), nz_(nz), nb_(nb) 12 | {}; 13 | 14 | Tuple4 operator()(Long64 index) const 15 | { 16 | int x = index % nx_; 17 | index /= nx_; 18 | int y = index % ny_; 19 | index /= ny_; 20 | int z = index % nz_; 21 | int b = index / nz_; 22 | 23 | return Tuple4(x, y, z, b); 24 | } 25 | 26 | private: 27 | int nx_; 28 | int ny_; 29 | int nz_; 30 | int nb_; 31 | }; 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/InputBlock.cc: -------------------------------------------------------------------------------- 1 | #include "InputBlock.hh" 2 | #include 3 | #include 4 | 5 | using std::map; 6 | using std::ostringstream; 7 | using std::string; 8 | 9 | 10 | InputBlock::InputBlock(const string& blockName) 11 | :_blockName(blockName) 12 | {} 13 | 14 | void InputBlock::addPair(const string& keyword, const string& value) 15 | { 16 | _kvPair[keyword] = value; 17 | } 18 | 19 | void InputBlock::serialize(std::vector& buf) const 20 | { 21 | ostringstream out; 22 | out << _blockName << '\0'; 23 | for (auto iter=_kvPair.begin(); iter!=_kvPair.end(); ++iter) 24 | out << iter->first << '\0' << iter->second <<'\0'; 25 | string tmp = out.str(); 26 | buf.clear(); 27 | buf.insert(buf.begin(), tmp.begin(), tmp.end()); 28 | } 29 | 30 | void InputBlock::deserialize(const std::vector& buf) 31 | { 32 | const char* tmp = &buf[0]; 33 | const char* end = tmp + buf.size(); 34 | 35 | _blockName = tmp; 36 | tmp += strlen(tmp) +1; 37 | 38 | while (tmp < end) 39 | { 40 | const char* keyword = tmp; 41 | tmp += strlen(tmp) +1; 42 | const char* value = tmp; 43 | tmp += strlen(tmp) +1; 44 | _kvPair[keyword] = value; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/InputBlock.hh: -------------------------------------------------------------------------------- 1 | #ifndef INPUT_BLOCK_HH 2 | #define INPUT_BLOCK_HH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "qs_assert.hh" 9 | 10 | 11 | class InputBlock 12 | { 13 | public: 14 | 15 | InputBlock(const std::string& blockName); 16 | void addPair(const std::string& keyword, const std::string& value); 17 | 18 | template 19 | void getValue(const std::string& keyword, T& value) const; 20 | 21 | const std::string& name() const {return _blockName;} 22 | unsigned nPairs() const {return _kvPair.size();} 23 | 24 | void serialize(std::vector& buf) const; 25 | void deserialize(const std::vector& buf); 26 | 27 | private: 28 | void parseError(const std::string& keyword) const; 29 | 30 | std::string _blockName; 31 | std::map _kvPair; 32 | }; 33 | 34 | // If the keyword isn't found, value is unchanged. 35 | template 36 | void InputBlock::getValue(const std::string& keyword, T& value) const 37 | { 38 | auto here = _kvPair.find(keyword); 39 | if (here == _kvPair.end()) 40 | return; 41 | 42 | std::istringstream tmp(here->second); 43 | tmp >> value; 44 | 45 | if (!tmp) 46 | parseError(keyword); 47 | } 48 | 49 | inline void InputBlock::parseError(const std::string& keyword) const 50 | { 51 | qs_assert(false); 52 | } 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /src/Long64.hh: -------------------------------------------------------------------------------- 1 | #ifndef LONG64_H 2 | #define LONG64_H 3 | 4 | #include "portability.hh" 5 | typedef uint64_t Long64; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/MCT.hh: -------------------------------------------------------------------------------- 1 | #ifndef MCT_HH 2 | #define MCT_HH 3 | 4 | #include "portability.hh" 5 | #include "DeclareMacro.hh" 6 | 7 | class MC_Particle; 8 | class MC_Domain; 9 | class MC_Location; 10 | class MC_Vector; 11 | class DirectionCosine; 12 | class MC_Nearest_Facet; 13 | class Subfacet_Adjacency; 14 | class MonteCarlo; 15 | 16 | 17 | HOST_DEVICE 18 | MC_Nearest_Facet MCT_Nearest_Facet( 19 | MC_Particle *mc_particle, 20 | MC_Location &location, 21 | MC_Vector &coordinate, 22 | const DirectionCosine *direction_cosine, 23 | double distance_threshold, 24 | double current_best_distance, 25 | bool new_segment, 26 | MonteCarlo* monteCarlo); 27 | HOST_DEVICE_END 28 | 29 | 30 | HOST_DEVICE 31 | void MCT_Generate_Coordinate_3D_G( 32 | uint64_t *random_number_seed, 33 | int domain_num, 34 | int cell, 35 | MC_Vector &coordinate, 36 | MonteCarlo* monteCarlo); 37 | HOST_DEVICE_END 38 | 39 | HOST_DEVICE 40 | MC_Vector MCT_Cell_Position_3D_G( 41 | const MC_Domain &domain, 42 | int cell_index); 43 | HOST_DEVICE_END 44 | 45 | HOST_DEVICE 46 | Subfacet_Adjacency &MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo* monteCarlo); 47 | HOST_DEVICE_END 48 | 49 | HOST_DEVICE 50 | void MCT_Reflect_Particle(MonteCarlo *mcco, MC_Particle &particle); 51 | HOST_DEVICE_END 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /src/MC_Adjacent_Facet.cc: -------------------------------------------------------------------------------- 1 | #include "MCT.hh" 2 | #include "MC_Domain.hh" 3 | #include "Globals.hh" 4 | #include "MonteCarlo.hh" 5 | #include "DeclareMacro.hh" 6 | 7 | class MC_Particle; 8 | 9 | HOST_DEVICE 10 | 11 | Subfacet_Adjacency &MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo* monteCarlo) 12 | 13 | { 14 | MC_Domain &domain = monteCarlo->domain[location.domain]; 15 | 16 | Subfacet_Adjacency &adjacency =domain.mesh._cellConnectivity[location.cell]._facet[location.facet].subfacet; 17 | 18 | return adjacency; 19 | } 20 | 21 | HOST_DEVICE_END 22 | -------------------------------------------------------------------------------- /src/MC_Base_Particle.cc: -------------------------------------------------------------------------------- 1 | #include "MC_Base_Particle.hh" 2 | 3 | #define MCP_DATA_MEMBER_OLD(member, buffer, index, mode) \ 4 | { if ( mode == MC_Data_Member_Operation::Count ) { (index)++; } \ 5 | else if ( mode == MC_Data_Member_Operation::Pack ) { buffer[ (index)++ ] = (member); } \ 6 | else if ( mode == MC_Data_Member_Operation::Unpack ) { member = buffer[ (index)++ ]; } \ 7 | else if ( mode == MC_Data_Member_Operation::Reset ) { (index)++; member = 0; } } 8 | 9 | #define MCP_DATA_MEMBER_CAST_OLD(member, buffer, index, mode, someType) \ 10 | { if ( mode == MC_Data_Member_Operation::Count ) { (index)++; } \ 11 | else if ( mode == MC_Data_Member_Operation::Pack ) { buffer[ (index)++ ] = (member); } \ 12 | else if ( mode == MC_Data_Member_Operation::Unpack ) { member = (someType) buffer[ (index)++ ]; } \ 13 | else if ( mode == MC_Data_Member_Operation::Reset ) { (index)++; member = (someType) 0; } } 14 | 15 | #define MCP_DATA_MEMBER_LONG_TO_CHAR8(member, buffer, index, mode) \ 16 | { if ( mode == MC_Data_Member_Operation::Count ) { (index) += 8; } \ 17 | else if ( mode == MC_Data_Member_Operation::Pack ) { MC_Long_To_Char8(&member, &buffer[(index)]); (index) += 8; } \ 18 | else if ( mode == MC_Data_Member_Operation::Unpack ) { MC_Char8_To_Long(&member, &buffer[(index)]); (index) += 8; } \ 19 | else if ( mode == MC_Data_Member_Operation::Reset ) { (index) += 8; member = 0; }} 20 | 21 | void MC_Char8_To_Long(uint64_t *long_out, char char_in[8]) 22 | { 23 | *long_out = 0 ; 24 | 25 | for (int char_index = 0; char_index < 8; char_index++) 26 | { 27 | *long_out = *long_out | (unsigned char) char_in[char_index]; // OR in next byte 28 | if (char_index < 7) 29 | { 30 | *long_out = *long_out << 8; // Shift Left one byte 31 | } 32 | } 33 | 34 | } 35 | 36 | void MC_Long_To_Char8(const uint64_t *long_in, 37 | char char_out[8]) 38 | { 39 | uint64_t long_tmp; 40 | uint64_t mask = 0xffff; 41 | 42 | // Examine long_in from Right > Left, byte by byte. 43 | long_tmp = *long_in; 44 | for (int char_index = 7; char_index >= 0; char_index--) 45 | { 46 | char_out[char_index] = (char)(long_tmp & mask); // Get right-most byte 47 | long_tmp = long_tmp >> 8; // Shift Right one byte 48 | } 49 | 50 | } 51 | 52 | 53 | //---------------------------------------------------------------------------------------------------------------------- 54 | // Count, pack or unpack a single base particle. This routine operates in 3 55 | // different modes. This is so that the exact same code does the counting, packing and 56 | // unpacking so they will always stay synchronized and the communication will happen correctly. 57 | // Also, when the data structure changes, you only have to change this one place. 58 | // 59 | //---------------------------------------------------------------------------------------------------------------------- 60 | void MC_Base_Particle::Serialize(int *int_data, double *float_data, char *char_data, int &int_index, int &float_index, 61 | int &char_index, MC_Data_Member_Operation::Enum mode) 62 | { 63 | MCP_DATA_MEMBER_OLD(coordinate.x, float_data, float_index, mode); 64 | MCP_DATA_MEMBER_OLD(coordinate.y, float_data, float_index, mode); 65 | MCP_DATA_MEMBER_OLD(coordinate.z, float_data, float_index, mode); 66 | MCP_DATA_MEMBER_OLD(velocity.x, float_data, float_index, mode); 67 | MCP_DATA_MEMBER_OLD(velocity.y, float_data, float_index, mode); 68 | MCP_DATA_MEMBER_OLD(velocity.z, float_data, float_index, mode); 69 | MCP_DATA_MEMBER_OLD(kinetic_energy, float_data, float_index, mode); 70 | MCP_DATA_MEMBER_OLD(weight, float_data, float_index, mode); 71 | MCP_DATA_MEMBER_OLD(time_to_census, float_data, float_index, mode); 72 | MCP_DATA_MEMBER_OLD(age, float_data, float_index, mode); 73 | MCP_DATA_MEMBER_OLD(num_mean_free_paths, float_data, float_index, mode); 74 | MCP_DATA_MEMBER_OLD(num_segments, float_data, float_index, mode); 75 | 76 | MCP_DATA_MEMBER_LONG_TO_CHAR8(random_number_seed, char_data, char_index, mode); 77 | MCP_DATA_MEMBER_LONG_TO_CHAR8(identifier, char_data, char_index, mode); 78 | 79 | MCP_DATA_MEMBER_CAST_OLD(last_event, int_data, int_index, mode, MC_Tally_Event::Enum); 80 | MCP_DATA_MEMBER_OLD(num_collisions, int_data, int_index, mode); 81 | MCP_DATA_MEMBER_OLD(breed, int_data, int_index, mode); 82 | MCP_DATA_MEMBER_OLD(species, int_data, int_index, mode); 83 | MCP_DATA_MEMBER_OLD(domain, int_data, int_index, mode); 84 | MCP_DATA_MEMBER_OLD(cell, int_data, int_index, mode); 85 | } 86 | 87 | 88 | 89 | 90 | 91 | 92 | int MC_Base_Particle::num_base_ints = 0; 93 | int MC_Base_Particle::num_base_floats = 0; 94 | int MC_Base_Particle::num_base_chars = 0; 95 | 96 | 97 | //---------------------------------------------------------------------------------------------------------------------- 98 | // Updates the num base counts by creating an instance and callingthe broadcast routine. 99 | // 100 | //---------------------------------------------------------------------------------------------------------------------- 101 | void MC_Base_Particle::Update_Counts() 102 | { 103 | MC_Base_Particle base_particle; 104 | num_base_ints = 0; 105 | num_base_floats = 0; 106 | num_base_chars = 0; 107 | base_particle.Serialize(NULL, NULL, NULL, num_base_ints, num_base_floats, 108 | num_base_chars, MC_Data_Member_Operation::Count); 109 | } 110 | 111 | -------------------------------------------------------------------------------- /src/MC_Cell_State.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_CELL_STATE_INCLUDE 2 | #define MC_CELL_STATE_INCLUDE 3 | 4 | #include 5 | #include "QS_Vector.hh" 6 | #include "macros.hh" 7 | 8 | 9 | // this stores all the material information on a cell 10 | class MC_Cell_State 11 | { 12 | public: 13 | 14 | int _material; // gid of material 15 | 16 | // pre-computed cross-sections for material 17 | double* _total; // [energy groups] 18 | 19 | double _volume; // cell volume 20 | double _cellNumberDensity; // number density of ions in cel 21 | 22 | uint64_t _id; 23 | uint64_t _sourceTally; 24 | 25 | MC_Cell_State(); 26 | 27 | private: 28 | }; 29 | 30 | inline MC_Cell_State::MC_Cell_State() 31 | : _material(0), 32 | _total(), 33 | _volume(0.0), 34 | _cellNumberDensity(0.0), 35 | _sourceTally(0) 36 | { 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/MC_Distance_To_Facet.hh: -------------------------------------------------------------------------------- 1 | #ifndef MCT_DISTANCE_INCLUDE 2 | #define MCT_DISTANCE_INCLUDE 3 | 4 | #include "DeclareMacro.hh" 5 | 6 | HOST_DEVICE_CLASS 7 | class MC_Distance_To_Facet 8 | { 9 | public: 10 | double distance; 11 | int facet; 12 | int subfacet; 13 | HOST_DEVICE_CUDA 14 | MC_Distance_To_Facet(): distance(0.0), facet(0), subfacet(0) {} 15 | private: 16 | MC_Distance_To_Facet( const MC_Distance_To_Facet& ); // disable copy constructor 17 | MC_Distance_To_Facet& operator=( const MC_Distance_To_Facet& tmp ); // disable assignment operator 18 | 19 | }; 20 | HOST_DEVICE_END 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/MC_Domain.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_DOMAIN_INCLUDE 2 | #define MC_DOMAIN_INCLUDE 3 | 4 | 5 | #include "QS_Vector.hh" 6 | #include "MC_Facet_Adjacency.hh" 7 | #include "MC_Vector.hh" 8 | #include "MC_Cell_State.hh" 9 | #include "MC_Facet_Geometry.hh" 10 | #include "BulkStorage.hh" 11 | 12 | class Parameters; 13 | class MeshPartition; 14 | class GlobalFccGrid; 15 | class DecompositionObject; 16 | class MaterialDatabase; 17 | 18 | 19 | //---------------------------------------------------------------------------------------------------------------------- 20 | // class that manages data set on a mesh like geometry 21 | //---------------------------------------------------------------------------------------------------------------------- 22 | 23 | class MC_Mesh_Domain 24 | { 25 | public: 26 | 27 | int _domainGid; //dfr: Might be able to delete this later. 28 | 29 | qs_vector _nbrDomainGid; 30 | qs_vector _nbrRank; 31 | 32 | qs_vector _node; 33 | qs_vector _cellConnectivity; 34 | 35 | qs_vector _cellGeometry; 36 | 37 | 38 | 39 | BulkStorage _connectivityFacetStorage; 40 | BulkStorage _connectivityPointStorage; 41 | BulkStorage _geomFacetStorage; 42 | 43 | // -------------------------- public interface 44 | MC_Mesh_Domain(){}; 45 | MC_Mesh_Domain(const MeshPartition& meshPartition, 46 | const GlobalFccGrid& grid, 47 | const DecompositionObject& ddc, 48 | const qs_vector& boundaryCondition); 49 | 50 | }; 51 | 52 | 53 | //---------------------------------------------------------------------------------------------------------------------- 54 | // class that manages a region on a domain. 55 | //---------------------------------------------------------------------------------------------------------------------- 56 | 57 | class MC_Domain 58 | { 59 | public: 60 | int domainIndex; // This appears to be unused. 61 | int global_domain; 62 | 63 | qs_vector cell_state; 64 | 65 | BulkStorage _cachedCrossSectionStorage; 66 | 67 | // hold mesh information 68 | MC_Mesh_Domain mesh; 69 | 70 | // -------------------------- public interface 71 | MC_Domain(){}; 72 | MC_Domain(const MeshPartition& meshPartition, const GlobalFccGrid& grid, 73 | const DecompositionObject& ddc, const Parameters& params, 74 | const MaterialDatabase& materialDatabase, int numEnergyGroups); 75 | 76 | 77 | void clearCrossSectionCache(int numEnergyGroups); 78 | }; 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /src/MC_Facet_Adjacency.hh: -------------------------------------------------------------------------------- 1 | #ifndef MCT_FACET_ADJACENCY_INCLUDE 2 | #define MCT_FACET_ADJACENCY_INCLUDE 3 | 4 | 5 | #include 6 | #include "MC_Location.hh" 7 | #include "macros.hh" 8 | 9 | struct MC_Subfacet_Adjacency_Event 10 | { 11 | public: 12 | enum Enum 13 | { 14 | Adjacency_Undefined = 0, 15 | Boundary_Escape, 16 | Boundary_Reflection, 17 | Transit_On_Processor, 18 | Transit_Off_Processor 19 | }; 20 | }; 21 | 22 | class Subfacet_Adjacency 23 | { 24 | public: 25 | MC_Subfacet_Adjacency_Event::Enum event; 26 | MC_Location current; 27 | MC_Location adjacent; 28 | int neighbor_index; 29 | int neighbor_global_domain; 30 | int neighbor_foreman; 31 | 32 | 33 | Subfacet_Adjacency() 34 | : event(MC_Subfacet_Adjacency_Event::Adjacency_Undefined), 35 | current(), 36 | adjacent(), 37 | neighbor_index(-1), 38 | neighbor_global_domain(-1), 39 | neighbor_foreman(-1) 40 | {} 41 | }; 42 | 43 | class MC_Facet_Adjacency 44 | { 45 | public: 46 | Subfacet_Adjacency subfacet; 47 | int num_points; // the number of points defining that facet, for polyhedra 48 | int point[3]; // the points defining that facet, for polyhedra 49 | 50 | MC_Facet_Adjacency() : subfacet(), num_points(3) {point[0] = point[1] = point[2] = -1;} 51 | }; 52 | 53 | class MC_Facet_Adjacency_Cell 54 | { 55 | public: 56 | int num_facets; // 6 quad faces, each quad has 3 triangles = 24 faces 57 | MC_Facet_Adjacency* _facet; 58 | int num_points; // 8 hex corners + 6 face centers = 14 points 59 | int* _point; 60 | 61 | MC_Facet_Adjacency_Cell() : num_facets(24), _facet(0), num_points(14), _point(0) {} 62 | private: 63 | 64 | }; 65 | 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /src/MC_Facet_Crossing_Event.cc: -------------------------------------------------------------------------------- 1 | #include "MC_Facet_Crossing_Event.hh" 2 | #include "ParticleVaultContainer.hh" 3 | #include "ParticleVault.hh" 4 | #include "MC_Domain.hh" 5 | #include "Tallies.hh" 6 | #include "MC_Particle.hh" 7 | #include "MC_Facet_Adjacency.hh" 8 | #include "Globals.hh" 9 | #include "MCT.hh" 10 | #include "MC_Particle_Buffer.hh" 11 | #include "DeclareMacro.hh" 12 | #include "macros.hh" 13 | #include "SendQueue.hh" 14 | 15 | //---------------------------------------------------------------------------------------------------------------------- 16 | // Determines whether the particle has been tracked to a facet such that it: 17 | // (i) enters into an adjacent cell 18 | // (ii) escapes across the system boundary (Vacuum BC), or 19 | // (iii) reflects off of the system boundary (Reflection BC). 20 | // 21 | //---------------------------------------------------------------------------------------------------------------------- 22 | 23 | HOST_DEVICE 24 | 25 | MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo* monteCarlo, int particle_index, ParticleVault* processingVault) 26 | { 27 | MC_Location location = mc_particle.Get_Location(); 28 | 29 | Subfacet_Adjacency &facet_adjacency = MCT_Adjacent_Facet(location, mc_particle, monteCarlo); 30 | 31 | if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_On_Processor ) 32 | { 33 | // The particle will enter into an adjacent cell. 34 | mc_particle.domain = facet_adjacency.adjacent.domain; 35 | mc_particle.cell = facet_adjacency.adjacent.cell; 36 | mc_particle.facet = facet_adjacency.adjacent.facet; 37 | mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Transit_Exit; 38 | } 39 | else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Escape ) 40 | { 41 | // The particle will escape across the system boundary. 42 | mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape; 43 | } 44 | else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection ) 45 | { 46 | // The particle will reflect off of the system boundary. 47 | mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Reflection; 48 | } 49 | else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_Off_Processor ) 50 | { 51 | // The particle will enter into an adjacent cell on a spatial neighbor. 52 | // The neighboring domain is on another processor. Set domain local domain on neighbor proc 53 | 54 | mc_particle.domain = facet_adjacency.adjacent.domain; 55 | mc_particle.cell = facet_adjacency.adjacent.cell; 56 | mc_particle.facet = facet_adjacency.adjacent.facet; 57 | mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Communication; 58 | 59 | // Select particle buffer 60 | int neighbor_rank = monteCarlo->domain[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index]; 61 | 62 | processingVault->putParticle( mc_particle, particle_index ); 63 | 64 | //Push neighbor rank and mc_particle onto the send queue 65 | monteCarlo->_particleVaultContainer->getSendQueue()->push( neighbor_rank, particle_index ); 66 | 67 | } 68 | 69 | return mc_particle.last_event; 70 | } 71 | 72 | HOST_DEVICE_END 73 | -------------------------------------------------------------------------------- /src/MC_Facet_Crossing_Event.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_FACET_CROSSING_EVENT_HH 2 | #define MC_FACET_CROSSING_EVENT_HH 3 | 4 | #include "Tallies.hh" 5 | #include "DeclareMacro.hh" 6 | 7 | class ParticleVault; 8 | class MC_Particle; 9 | 10 | HOST_DEVICE 11 | MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo* monteCarlo, int particle_index, ParticleVault* processingVault); 12 | HOST_DEVICE_END 13 | 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /src/MC_Facet_Geometry.hh: -------------------------------------------------------------------------------- 1 | #ifndef MCT_FACET_GEOMETRY_3D_INCLUDE 2 | #define MCT_FACET_GEOMETRY_3D_INCLUDE 3 | 4 | #include "macros.hh" 5 | #include // NULL 6 | 7 | // A x + B y + C z + D = 0, (A,B,C) is the plane normal and is normalized. 8 | class MC_General_Plane 9 | { 10 | public: 11 | double A; 12 | double B; 13 | double C; 14 | double D; 15 | 16 | // Code to compute coefficients stolen from MCT_Facet_Adjacency_3D_G 17 | MC_General_Plane(){}; 18 | MC_General_Plane(const MC_Vector& r0, const MC_Vector& r1, const MC_Vector& r2) 19 | { 20 | A = ((r1.y - r0.y)*(r2.z - r0.z)) - ((r1.z - r0.z)*(r2.y - r0.y)); 21 | B = ((r1.z - r0.z)*(r2.x - r0.x)) - ((r1.x - r0.x)*(r2.z - r0.z)); 22 | C = ((r1.x - r0.x)*(r2.y - r0.y)) - ((r1.y - r0.y)*(r2.x - r0.x)); 23 | D = -1.0*(A*r0.x + B*r0.y + C*r0.z); 24 | 25 | double magnitude = sqrt(A * A + B * B + C * C); 26 | 27 | if ( magnitude == 0.0 ) 28 | { 29 | A = 1.0; 30 | magnitude = 1.0; 31 | } 32 | // Normalize the planar-facet geometric cofficients. 33 | double inv_denominator = 1.0 / magnitude; 34 | 35 | A *= inv_denominator; 36 | B *= inv_denominator; 37 | C *= inv_denominator; 38 | D *= inv_denominator; 39 | } 40 | 41 | }; 42 | 43 | 44 | class MC_Facet_Geometry_Cell 45 | { 46 | public: 47 | MC_General_Plane* _facet; 48 | int _size; 49 | }; 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/MC_Fast_Timer.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_FAST_TIMER_INCLUDE 2 | #define MC_FAST_TIMER_INCLUDE 3 | 4 | #include 5 | #ifndef CHRONO_MISSING 6 | #include 7 | #endif 8 | 9 | #include "portability.hh" // needed for uint64_t in this file 10 | #include "utilsMpi.hh" // needed for MPI_Comm type in this file 11 | 12 | class MC_Fast_Timer 13 | { 14 | public: 15 | uint64_t numCalls; 16 | #ifdef CHRONO_MISSING 17 | double startClock; // from MPI 18 | double stopClock; 19 | #else 20 | std::chrono::high_resolution_clock::time_point startClock; // from c++11 high resolution timer calls 21 | std::chrono::high_resolution_clock::time_point stopClock; 22 | #endif 23 | uint64_t lastCycleClock; // in microseconds 24 | uint64_t cumulativeClock; // in microseconds 25 | 26 | 27 | MC_Fast_Timer() : numCalls(0), startClock(), stopClock(), lastCycleClock(0), cumulativeClock(0) {} ; // consturctor 28 | 29 | 30 | // 1 enumerated type for each timed section, this is hardcoded for efficiency. 31 | enum Enum 32 | { 33 | main = 0, 34 | cycleInit, 35 | cycleTracking, 36 | cycleTracking_Kernel, 37 | cycleTracking_MPI, 38 | cycleTracking_Test_Done, 39 | cycleFinalize, 40 | Num_Timers 41 | }; 42 | }; 43 | 44 | class MC_Fast_Timer_Container 45 | { 46 | public: 47 | MC_Fast_Timer_Container() {} ; // constructor 48 | void Cumulative_Report(int mpi_rank, int num_ranks, MPI_Comm comm_world, uint64_t numSegments); 49 | void Last_Cycle_Report(int report_time, int mpi_rank, int num_ranks, MPI_Comm comm_world); 50 | void Clear_Last_Cycle_Timers(); 51 | MC_Fast_Timer timers[MC_Fast_Timer::Num_Timers]; // timers for various routines 52 | 53 | private: 54 | void Print_Cumulative_Heading(int mpi_rank); 55 | void Print_Last_Cycle_Heading(int mpi_rank); 56 | }; 57 | 58 | 59 | extern const int mc_fast_timer_enums[MC_Fast_Timer::Num_Timers]; 60 | extern const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers]; 61 | 62 | #ifdef DISABLE_TIMERS // Disable timers with empty macros -- do not make timer calls 63 | 64 | #define MC_FASTTIMER_START(timerIndex) 65 | #define MC_FASTTIMER_STOP(timerIndex) 66 | #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) 0.0 67 | 68 | #else // DISABLE_TIMERS not defined. Set up timers 69 | 70 | #ifdef CHRONO_MISSING // compiler does not support high resolution timer, use MPI timer instead 71 | 72 | #define MC_FASTTIMER_START(timerIndex) \ 73 | if (omp_get_thread_num() == 0) { \ 74 | mcco->fast_timer->timers[timerIndex].startClock = mpiWtime(); \ 75 | } 76 | 77 | #define MC_FASTTIMER_STOP(timerIndex) \ 78 | if ( omp_get_thread_num() == 0 ) { \ 79 | mcco->fast_timer->timers[timerIndex].stopClock = mpiWtime(); \ 80 | mcco->fast_timer->timers[timerIndex].lastCycleClock += \ 81 | (long unsigned) ((mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock) * 1000000.0); \ 82 | mcco->fast_timer->timers[timerIndex].cumulativeClock += \ 83 | (long unsigned) ((mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock) * 1000000.0); \ 84 | mcco->fast_timer->timers[timerIndex].numCalls++; \ 85 | } 86 | 87 | #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) (float)mcco->fast_timer->timers[timerIndex].lastCycleClock / 1000000. 88 | 89 | #else // else CHRONO_MISSING is not defined, so high resolution clock is available 90 | 91 | #define MC_FASTTIMER_START(timerIndex) \ 92 | if (omp_get_thread_num() == 0) { \ 93 | mcco->fast_timer->timers[timerIndex].startClock = std::chrono::high_resolution_clock::now(); \ 94 | } 95 | 96 | #define MC_FASTTIMER_STOP(timerIndex) \ 97 | if ( omp_get_thread_num() == 0 ) { \ 98 | mcco->fast_timer->timers[timerIndex].stopClock = std::chrono::high_resolution_clock::now(); \ 99 | mcco->fast_timer->timers[timerIndex].lastCycleClock += \ 100 | std::chrono::duration_cast \ 101 | (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ 102 | mcco->fast_timer->timers[timerIndex].cumulativeClock += \ 103 | std::chrono::duration_cast \ 104 | (mcco->fast_timer->timers[timerIndex].stopClock - mcco->fast_timer->timers[timerIndex].startClock).count(); \ 105 | mcco->fast_timer->timers[timerIndex].numCalls++; \ 106 | } 107 | 108 | #define MC_FASTTIMER_GET_LASTCYCLE(timerIndex) (float)mcco->fast_timer->timers[timerIndex].lastCycleClock / 1000000. 109 | 110 | 111 | #endif // end ifdef CHRONO_MISSING else section 112 | #endif // end if DISABLE_TIMERS 113 | 114 | #endif // end ifdef MC_FAST_TIMER_INCLUDE 115 | -------------------------------------------------------------------------------- /src/MC_Load_Particle.cc: -------------------------------------------------------------------------------- 1 | #include "ParticleVault.hh" 2 | #include "MC_Particle.hh" 3 | #include "MC_Time_Info.hh" 4 | #include "DeclareMacro.hh" 5 | 6 | //---------------------------------------------------------------------------------------------------------------------- 7 | // Copies a single particle from the particle-vault data structure into the active-particle data structure. 8 | //---------------------------------------------------------------------------------------------------------------------- 9 | 10 | HOST_DEVICE 11 | void MC_Load_Particle(MonteCarlo *monteCarlo, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index) 12 | { 13 | //particleVault.popParticle(mc_particle); 14 | particleVault->getParticle(mc_particle, particle_index); 15 | 16 | // Time to Census 17 | if ( mc_particle.time_to_census <= 0.0 ) 18 | { 19 | mc_particle.time_to_census += monteCarlo->time_info->time_step; 20 | } 21 | 22 | // Age 23 | if (mc_particle.age < 0.0) { mc_particle.age = 0.0; } 24 | 25 | // Energy Group 26 | mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy); 27 | // printf("file=%s line=%d\n",__FILE__,__LINE__); 28 | 29 | } 30 | HOST_DEVICE_END 31 | 32 | -------------------------------------------------------------------------------- /src/MC_Location.cc: -------------------------------------------------------------------------------- 1 | #include "MC_Location.hh" 2 | #include "MonteCarlo.hh" 3 | #include "MC_Domain.hh" 4 | #include "DeclareMacro.hh" 5 | 6 | // Return a reference to the domain for this location. 7 | 8 | HOST_DEVICE 9 | const MC_Domain &MC_Location::get_domain(MonteCarlo *mcco) const 10 | { 11 | return mcco->domain[domain]; 12 | } 13 | 14 | HOST_DEVICE_END 15 | -------------------------------------------------------------------------------- /src/MC_Location.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_LOCATION_INCLUDE 2 | #define MC_LOCATION_INCLUDE 3 | 4 | 5 | // ToDo: How much chaos would be caused by removing the default constructor? 6 | 7 | #include 8 | #include "DeclareMacro.hh" 9 | 10 | class MC_Domain; 11 | class MC_Cell_State; 12 | class MonteCarlo; 13 | 14 | HOST_DEVICE_CLASS 15 | class MC_Location 16 | { 17 | public: 18 | int domain; 19 | int cell; 20 | int facet; 21 | 22 | HOST_DEVICE_CUDA 23 | MC_Location(int adomain, int acell, int afacet) 24 | : domain(adomain), 25 | cell(acell), 26 | facet(afacet) 27 | {} 28 | 29 | HOST_DEVICE_CUDA 30 | MC_Location() 31 | : domain(-1), 32 | cell(-1), 33 | facet(-1) 34 | {} 35 | 36 | HOST_DEVICE_CUDA 37 | const MC_Domain& get_domain(MonteCarlo *mcco) const; 38 | }; 39 | HOST_DEVICE_END 40 | 41 | HOST_DEVICE_CUDA 42 | inline bool operator==(const MC_Location& a, const MC_Location b) 43 | { 44 | return 45 | a.domain == b.domain && 46 | a.cell == b.cell && 47 | a.facet == b.facet; 48 | } 49 | 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/MC_Nearest_Facet.hh: -------------------------------------------------------------------------------- 1 | #ifndef MCT_NEAREST_FACET_INCLUDE 2 | #define MCT_NEAREST_FACET_INCLUDE 3 | 4 | #include "DeclareMacro.hh" 5 | 6 | class MC_Nearest_Facet 7 | { 8 | public: 9 | 10 | int facet; 11 | double distance_to_facet; 12 | double dot_product; 13 | 14 | HOST_DEVICE 15 | MC_Nearest_Facet() 16 | : facet(0), 17 | distance_to_facet(1e80), 18 | dot_product(0.0) 19 | {} 20 | 21 | HOST_DEVICE_CUDA 22 | MC_Nearest_Facet& operator=( const MC_Nearest_Facet& nf ) 23 | { 24 | this->facet = nf.facet; 25 | this->distance_to_facet = nf.distance_to_facet; 26 | this->dot_product = nf.dot_product; 27 | return *this; 28 | } 29 | HOST_DEVICE_END 30 | 31 | }; 32 | #endif 33 | -------------------------------------------------------------------------------- /src/MC_Processor_Info.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_PROCESSOR_INFO_HH 2 | #define MC_PROCESSOR_INFO_HH 3 | 4 | #include "utilsMpi.hh" 5 | 6 | class MC_Processor_Info 7 | { 8 | public: 9 | 10 | int rank; 11 | int num_processors; 12 | int use_gpu; 13 | int gpu_id; 14 | 15 | MPI_Comm comm_mc_world; 16 | 17 | MC_Processor_Info() 18 | : comm_mc_world(MPI_COMM_WORLD), 19 | use_gpu(0), 20 | gpu_id(0) 21 | { 22 | mpiComm_rank(comm_mc_world, &rank); 23 | mpiComm_size(comm_mc_world, &num_processors); 24 | } 25 | 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/MC_RNG_State.cc: -------------------------------------------------------------------------------- 1 | #include "MC_RNG_State.hh" 2 | #include "DeclareMacro.hh" 3 | 4 | //---------------------------------------------------------------------------// 5 | 6 | namespace 7 | { 8 | HOST_DEVICE 9 | // Break a 64 bit state into 2 32 bit ints. 10 | void breakup_uint64( uint64_t uint64_in, 11 | uint32_t& front_bits, uint32_t& back_bits ) 12 | { 13 | front_bits = static_cast( uint64_in >> 32 ); 14 | back_bits = static_cast( uint64_in & 0xffffffff ); 15 | } 16 | HOST_DEVICE_END 17 | } 18 | 19 | //---------------------------------------------------------------------------// 20 | 21 | namespace 22 | { 23 | // Function sed to hash a 64 bit int into another, unrelated one. It 24 | // does this in two 32 bit chuncks. This function uses the algorithm 25 | // from Numerical Recipies in C, 2nd edition: psdes, p. 302. This is 26 | // used to make 64 bit numbers for use as initial states for the 64 27 | // bit lcg random number generator. 28 | HOST_DEVICE 29 | void pseudo_des( uint32_t& lword, uint32_t& irword ) 30 | { 31 | // This random number generator assumes that type uint32_t is a 32 bit int 32 | // = 1/2 of a 64 bit int. The sizeof operator returns the size in bytes = 8 bits. 33 | 34 | const int NITER = 2; 35 | const uint32_t c1[] = { 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L }; 36 | const uint32_t c2[] = { 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L}; 37 | 38 | uint32_t ia,ib,iswap,itmph=0,itmpl=0; 39 | 40 | for( int i = 0; i < NITER; i++) 41 | { 42 | ia = ( iswap = irword ) ^ c1[i]; 43 | itmpl = ia & 0xffff; 44 | itmph = ia >> 16; 45 | ib = itmpl*itmpl+ ~(itmph*itmph); 46 | 47 | irword = lword ^ (((ia = (ib >> 16) | 48 | ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph); 49 | 50 | lword=iswap; 51 | } 52 | } 53 | HOST_DEVICE_END 54 | } 55 | 56 | //---------------------------------------------------------------------------// 57 | 58 | namespace 59 | { 60 | 61 | HOST_DEVICE 62 | // Function used to reconstruct a 64 bit from 2 32 bit ints. 63 | uint64_t reconstruct_uint64( uint32_t front_bits, uint32_t back_bits ) 64 | { 65 | uint64_t reconstructed, temp; 66 | reconstructed = static_cast( front_bits ); 67 | temp = static_cast( back_bits ); 68 | 69 | // shift first bits 32 bits to left 70 | reconstructed = reconstructed << 32; 71 | 72 | // temp must be masked to kill leading 1's. Then 'or' with reconstructed 73 | // to get the last bits in 74 | reconstructed |= (temp & 0x00000000ffffffff); 75 | 76 | return reconstructed; 77 | } 78 | HOST_DEVICE_END 79 | } 80 | 81 | //---------------------------------------------------------------------------// 82 | 83 | namespace 84 | { 85 | HOST_DEVICE 86 | // Function used to hash a 64 bit int to get an initial state. 87 | uint64_t hash_state( uint64_t initial_number ) 88 | { 89 | // break initial number apart into 2 32 bit ints 90 | uint32_t front_bits, back_bits; 91 | breakup_uint64( initial_number, front_bits, back_bits ); 92 | 93 | // hash the bits 94 | pseudo_des( front_bits, back_bits ); 95 | 96 | // put the hashed parts together into 1 64 bit int 97 | return reconstruct_uint64( front_bits, back_bits ); 98 | } 99 | HOST_DEVICE_END 100 | } 101 | 102 | //---------------------------------------------------------------------------------------------------------------------- 103 | // This routine spawns a "child" random number seed from a "parent" random number seed. 104 | //---------------------------------------------------------------------------------------------------------------------- 105 | 106 | HOST_DEVICE 107 | uint64_t rngSpawn_Random_Number_Seed(uint64_t *parent_seed) 108 | { 109 | uint64_t spawned_seed = hash_state(*parent_seed); 110 | // Bump the parent seed as that is what is expected from the interface. 111 | rngSample(parent_seed); 112 | return spawned_seed; 113 | } 114 | 115 | HOST_DEVICE_END 116 | -------------------------------------------------------------------------------- /src/MC_RNG_State.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_RNG_STATE_INCLUDE 2 | #define MC_RNG_STATE_INCLUDE 3 | 4 | #include "portability.hh" 5 | #include "DeclareMacro.hh" 6 | 7 | //---------------------------------------------------------------------------------------------------------------------- 8 | // A random number generator that implements a 64 bit linear congruential generator (lcg). 9 | // 10 | // This implementation is based on the rng class from Nick Gentile. 11 | //---------------------------------------------------------------------------------------------------------------------- 12 | 13 | // Generate a new random number seed 14 | HOST_DEVICE 15 | uint64_t rngSpawn_Random_Number_Seed(uint64_t *parent_seed); 16 | HOST_DEVICE_END 17 | 18 | //---------------------------------------------------------------------------------------------------------------------- 19 | // Sample returns the pseudo-random number produced by a call to a random 20 | // number generator. 21 | //---------------------------------------------------------------------------------------------------------------------- 22 | HOST_DEVICE 23 | inline double rngSample(uint64_t *seed) 24 | { 25 | // Reset the state from the previous value. 26 | *seed = 2862933555777941757ULL*(*seed) + 3037000493ULL; 27 | 28 | // Map the int state in (0,2**64) to double (0,1) 29 | // by multiplying by 30 | // 1/(2**64 - 1) = 1/18446744073709551615. 31 | return 5.4210108624275222e-20*(*seed); 32 | } 33 | HOST_DEVICE_END 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/MC_Segment_Outcome.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_SEGMENT_OUTCOME_INCLUDE 2 | #define MC_SEGMENT_OUTCOME_INCLUDE 3 | 4 | class MC_Particle; 5 | class MC_Vector; 6 | class MonteCarlo; 7 | 8 | 9 | struct MC_Segment_Outcome_type 10 | { 11 | public: 12 | enum Enum 13 | { 14 | Initialize = -1, 15 | Collision = 0, 16 | Facet_Crossing = 1, 17 | Census = 2, 18 | Max_Number = 3 19 | }; 20 | }; 21 | 22 | 23 | struct MC_Collision_Event_Return 24 | { 25 | public: 26 | enum Enum 27 | { 28 | Stop_Tracking = 0, 29 | Continue_Tracking = 1, 30 | Continue_Collision = 2 31 | }; 32 | }; 33 | 34 | #include "DeclareMacro.hh" 35 | HOST_DEVICE 36 | MC_Segment_Outcome_type::Enum MC_Segment_Outcome(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int &flux_tally_index); 37 | HOST_DEVICE_END 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/MC_SourceNow.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_SOURCE_NOW_HH 2 | #define MC_SOURCE_NOW_HH 3 | 4 | class MonteCarlo; 5 | 6 | void MC_SourceNow(MonteCarlo *mcco); 7 | 8 | #endif 9 | 10 | -------------------------------------------------------------------------------- /src/MC_Time_Info.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_TIME_INFO_INCLUDE 2 | #define MC_TIME_INFO_INCLUDE 3 | 4 | 5 | class MC_Time_Info 6 | { 7 | public: 8 | int cycle; 9 | double initial_time; 10 | double final_time; 11 | double time; 12 | double time_step; 13 | 14 | MC_Time_Info() : cycle(0), initial_time(0.0), final_time(), time(0.0), time_step(1.0) {} 15 | 16 | }; 17 | 18 | 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/MC_Vector.hh: -------------------------------------------------------------------------------- 1 | #ifndef MC_VECTOR_INCLUDE 2 | #define MC_VECTOR_INCLUDE 3 | 4 | #include 5 | #include "DeclareMacro.hh" 6 | 7 | HOST_DEVICE_CLASS 8 | class MC_Vector 9 | { 10 | public: 11 | double x; 12 | double y; 13 | double z; 14 | 15 | HOST_DEVICE_CUDA 16 | MC_Vector() : x(0), y(0), z(0) {} 17 | HOST_DEVICE_CUDA 18 | MC_Vector(double a, double b, double c) : x(a), y(b), z(c) {} 19 | 20 | HOST_DEVICE_CUDA 21 | MC_Vector& operator=( const MC_Vector&tmp ) 22 | { 23 | if ( this == &tmp ) { return *this; } 24 | 25 | x = tmp.x; 26 | y = tmp.y; 27 | z = tmp.z; 28 | 29 | return *this; 30 | } 31 | 32 | HOST_DEVICE_CUDA 33 | bool operator==( const MC_Vector& tmp ) 34 | { 35 | return tmp.x == x && tmp.y == y && tmp.z == z; 36 | } 37 | 38 | HOST_DEVICE_CUDA 39 | MC_Vector& operator+=( const MC_Vector &tmp ) 40 | { 41 | x += tmp.x; 42 | y += tmp.y; 43 | z += tmp.z; 44 | return *this; 45 | } 46 | 47 | HOST_DEVICE_CUDA 48 | MC_Vector& operator-=( const MC_Vector &tmp ) 49 | { 50 | x -= tmp.x; 51 | y -= tmp.y; 52 | z -= tmp.z; 53 | return *this; 54 | } 55 | 56 | HOST_DEVICE_CUDA 57 | MC_Vector& operator*=(const double scalar) 58 | { 59 | x *= scalar; 60 | y *= scalar; 61 | z *= scalar; 62 | return *this; 63 | } 64 | 65 | HOST_DEVICE_CUDA 66 | MC_Vector& operator/=(const double scalar) 67 | { 68 | x /= scalar; 69 | y /= scalar; 70 | z /= scalar; 71 | return *this; 72 | } 73 | 74 | HOST_DEVICE_CUDA 75 | const MC_Vector operator+( const MC_Vector &tmp ) const 76 | { 77 | return MC_Vector(x + tmp.x, y + tmp.y, z + tmp.z); 78 | } 79 | 80 | HOST_DEVICE_CUDA 81 | const MC_Vector operator-( const MC_Vector &tmp ) const 82 | { 83 | return MC_Vector(x - tmp.x, y - tmp.y, z - tmp.z); 84 | } 85 | 86 | HOST_DEVICE_CUDA 87 | const MC_Vector operator*(const double scalar) const 88 | { 89 | return MC_Vector(scalar*x, scalar*y, scalar*z); 90 | } 91 | 92 | HOST_DEVICE_CUDA 93 | inline double Length() const { return std::sqrt(x*x + y*y + z*z); } 94 | 95 | // Distance from this vector to another point. 96 | HOST_DEVICE_CUDA 97 | inline double Distance(const MC_Vector& vv) const 98 | { return std::sqrt((x - vv.x)*(x - vv.x) + (y - vv.y)*(y - vv.y)+ (z - vv.z)*(z - vv.z)); } 99 | 100 | HOST_DEVICE_CUDA 101 | inline double Dot(const MC_Vector &tmp) const 102 | { 103 | return this->x*tmp.x + this->y*tmp.y + this->z*tmp.z; 104 | } 105 | 106 | HOST_DEVICE_CUDA 107 | inline MC_Vector Cross(const MC_Vector &v) const 108 | { 109 | return MC_Vector(y * v.z - z * v.y, 110 | z * v.x - x * v.z, 111 | x * v.y - y * v.x); 112 | } 113 | 114 | }; 115 | HOST_DEVICE_END 116 | 117 | 118 | #endif 119 | -------------------------------------------------------------------------------- /src/MacroscopicCrossSection.cc: -------------------------------------------------------------------------------- 1 | #include "MacroscopicCrossSection.hh" 2 | #include "MonteCarlo.hh" 3 | #include "MaterialDatabase.hh" 4 | #include "NuclearData.hh" 5 | #include "MC_Cell_State.hh" 6 | #include "DeclareMacro.hh" 7 | 8 | //---------------------------------------------------------------------------------------------------------------------- 9 | // Routine MacroscopicCrossSection calculates the number-density-weighted macroscopic cross 10 | // section of a cell. 11 | // 12 | // A reactionIndex of -1 means total cross section. 13 | //---------------------------------------------------------------------------------------------------------------------- 14 | HOST_DEVICE 15 | double macroscopicCrossSection(MonteCarlo* monteCarlo, int reactionIndex, int domainIndex, int cellIndex, 16 | int isoIndex, int energyGroup) 17 | { 18 | // Initialize various data items. 19 | int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material; 20 | 21 | double atomFraction = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._atomFraction; 22 | 23 | double microscopicCrossSection = 0.0; 24 | // The cell number density is the fraction of the atoms in cell 25 | // volume of this isotope. We set this (elsewhere) to 1/nIsotopes. 26 | // This is a statement that we treat materials as if all of their 27 | // isotopes are present in equal amounts 28 | double cellNumberDensity = monteCarlo->domain[domainIndex].cell_state[cellIndex]._cellNumberDensity; 29 | 30 | int isotopeGid = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid; 31 | if ( atomFraction == 0.0 || cellNumberDensity == 0.0) { return 1e-20; } 32 | 33 | if (reactionIndex < 0) 34 | { 35 | // Return total cross section 36 | microscopicCrossSection = monteCarlo->_nuclearData->getTotalCrossSection(isotopeGid, energyGroup); 37 | } 38 | else 39 | { 40 | // Return the reaction cross section 41 | microscopicCrossSection = monteCarlo->_nuclearData->getReactionCrossSection((unsigned int)reactionIndex, 42 | isotopeGid, energyGroup); 43 | } 44 | 45 | return atomFraction * cellNumberDensity * microscopicCrossSection; 46 | 47 | } 48 | HOST_DEVICE_END 49 | 50 | 51 | //---------------------------------------------------------------------------------------------------------------------- 52 | // Routine weightedMacroscopicCrossSection calculates the number-density-weighted 53 | // macroscopic cross section of the collection of isotopes in a cell. 54 | //dfr Weighted is a bit of a misnomer here, since there is no weighting 55 | //applied by this routine. In Mercury we would weight for multiple 56 | //materials in a cell. 57 | //---------------------------------------------------------------------------------------------------------------------- 58 | HOST_DEVICE 59 | double weightedMacroscopicCrossSection(MonteCarlo* monteCarlo, int taskIndex, int domainIndex, 60 | int cellIndex, int energyGroup) 61 | { 62 | double* precomputedCrossSection = 63 | &monteCarlo->domain[domainIndex].cell_state[cellIndex]._total[energyGroup]; 64 | qs_assert (precomputedCrossSection != NULL); 65 | if (*precomputedCrossSection > 0.0) 66 | return *precomputedCrossSection; 67 | 68 | int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material; 69 | int nIsotopes = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size(); 70 | double sum = 0.0; 71 | for (int isoIndex = 0; isoIndex < nIsotopes; isoIndex++) 72 | { 73 | sum += macroscopicCrossSection(monteCarlo, -1, domainIndex, cellIndex, 74 | isoIndex, energyGroup); 75 | } 76 | 77 | QS::atomicWrite( *precomputedCrossSection, sum ); 78 | 79 | return sum; 80 | } 81 | HOST_DEVICE_END 82 | -------------------------------------------------------------------------------- /src/MacroscopicCrossSection.hh: -------------------------------------------------------------------------------- 1 | #ifndef MACROSCOPIC_CROSS_SECTION_HH 2 | #define MACROSCOPIC_CROSS_SECTION_HH 3 | 4 | #include "DeclareMacro.hh" 5 | 6 | class MonteCarlo; 7 | 8 | HOST_DEVICE 9 | double macroscopicCrossSection(MonteCarlo* monteCarlo, int reactionIndex, int domainIndex, int cellIndex, 10 | int isoIndex, int energyGroup); 11 | HOST_DEVICE_END 12 | 13 | HOST_DEVICE 14 | double weightedMacroscopicCrossSection(MonteCarlo* monteCarlo, int taskIndex, int domainIndex, 15 | int cellIndex, int energyGroup); 16 | HOST_DEVICE_END 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/MaterialDatabase.hh: -------------------------------------------------------------------------------- 1 | #ifndef MATERIALDATABASE_HH 2 | #define MATERIALDATABASE_HH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "qs_assert.hh" 9 | 10 | // For this material, store the global id in NuclearData of the isotope 11 | class Isotope 12 | { 13 | public: 14 | Isotope() 15 | : _gid(0), _atomFraction(0) { } 16 | 17 | Isotope(int isotopeGid, double atomFraction) 18 | : _gid(isotopeGid), _atomFraction(atomFraction) { } 19 | 20 | ~Isotope() {} 21 | 22 | int _gid; //!< index into NuclearData 23 | double _atomFraction; 24 | 25 | }; 26 | 27 | // Material information 28 | class Material 29 | { 30 | public: 31 | std::string _name; 32 | double _mass; 33 | qs_vector _iso; 34 | 35 | Material() 36 | : _name("0"), _mass(1000.0) {} 37 | 38 | Material(const std::string &name) 39 | : _name(name), _mass(1000.0){} 40 | 41 | Material(const std::string &name, double mass) 42 | : _name(name), _mass(mass){} 43 | 44 | ~Material() {} 45 | 46 | void addIsotope(const Isotope& isotope) 47 | { 48 | _iso.Open(); 49 | _iso.push_back(isotope); 50 | _iso.Close(); 51 | } 52 | 53 | }; 54 | 55 | 56 | // Top level class to store material information 57 | class MaterialDatabase 58 | { 59 | public: 60 | 61 | void addMaterial(const Material& material) 62 | { 63 | _mat.Open(); 64 | _mat.push_back(material); 65 | _mat.Close(); 66 | } 67 | 68 | int findMaterial(const std::string& name) const 69 | { 70 | for (int matIndex = 0; matIndex < _mat.size(); matIndex++) 71 | { 72 | if (_mat[matIndex]._name == name) { return matIndex; } 73 | } 74 | qs_assert(false); 75 | return -1; 76 | } 77 | 78 | // Store the cross sections and reactions by isotope, which stores it by species 79 | qs_vector _mat; 80 | 81 | }; 82 | 83 | #endif 84 | 85 | // The input for the nuclear data comes from the material section 86 | // The input looks may like 87 | // 88 | // material NAME 89 | // nIsotope=XXX 90 | // nReactions=XXX 91 | // fissionCrossSection="XXX" 92 | // scatterCrossSection="XXX" 93 | // absorptionCrossSection="XXX" 94 | // nuBar=XXX 95 | // totalCrossSection=XXX 96 | // fissionWeight=XXX 97 | // scatterWeight=XXX 98 | // absorptionWeight=XXX 99 | // 100 | // Material NAME2 101 | // ... 102 | // 103 | // table NAME 104 | // a=XXX 105 | // b=XXX 106 | // c=XXX 107 | // d=XXX 108 | // e=XXX 109 | // 110 | // table NAME2 111 | // 112 | // Each isotope inside a material will have identical cross sections. 113 | // However, it will be treated as unique in the nuclear data. 114 | -------------------------------------------------------------------------------- /src/MemoryControl.hh: -------------------------------------------------------------------------------- 1 | #ifndef MEMORY_CONTROL_HH 2 | #define MEMORY_CONTROL_HH 3 | 4 | #include "gpuPortability.hh" 5 | #include "qs_assert.hh" 6 | 7 | namespace MemoryControl 8 | { 9 | enum AllocationPolicy {HOST_MEM, UVM_MEM, UNDEFINED_POLICY}; 10 | 11 | template 12 | T* allocate(const int size, const AllocationPolicy policy) 13 | { 14 | if (size == 0) { return NULL;} 15 | T* tmp = NULL; 16 | 17 | switch (policy) 18 | { 19 | case AllocationPolicy::HOST_MEM: 20 | tmp = new T [size]; 21 | break; 22 | #ifdef HAVE_UVM 23 | case AllocationPolicy::UVM_MEM: 24 | void *ptr; 25 | gpuMallocManaged(&ptr, size*sizeof(T)); 26 | tmp = new(ptr) T[size]; 27 | break; 28 | #endif 29 | default: 30 | qs_assert(false); 31 | break; 32 | } 33 | return tmp; 34 | } 35 | 36 | template 37 | void deallocate(T* data, const int size, const AllocationPolicy policy) 38 | { 39 | switch (policy) 40 | { 41 | case AllocationPolicy::HOST_MEM: 42 | delete[] data; 43 | break; 44 | #ifdef HAVE_UVM 45 | case AllocationPolicy::UVM_MEM: 46 | for (int i=0; i < size; ++i) 47 | data[i].~T(); 48 | gpuFree(data); 49 | break; 50 | #endif 51 | default: 52 | qs_assert(false); 53 | break; 54 | } 55 | } 56 | } 57 | 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/MeshPartition.hh: -------------------------------------------------------------------------------- 1 | #ifndef MESH_PARTITION_HH 2 | #define MESH_PARTITION_HH 3 | 4 | #include 5 | #include 6 | #include "Long64.hh" 7 | 8 | class MC_Vector; 9 | class GlobalFccGrid; 10 | class CommObject; 11 | 12 | struct CellInfo 13 | { 14 | CellInfo() 15 | : _domainGid(-1), _foreman(-1), _domainIndex(-1), _cellIndex(-1){} 16 | CellInfo(int domainGid, int foreman, int domainIndex, int cellIndex) 17 | :_domainGid(domainGid), _foreman(foreman), _domainIndex(domainIndex), _cellIndex(cellIndex){} 18 | 19 | int _domainGid; 20 | int _foreman; 21 | int _domainIndex; 22 | int _cellIndex; 23 | }; 24 | 25 | 26 | class MeshPartition 27 | { 28 | public: 29 | 30 | typedef std::map MapType; 31 | 32 | MeshPartition(){}; 33 | MeshPartition(int domainGid, int domainIndex, int foreman); 34 | 35 | const int& domainGid() const {return _domainGid;} 36 | const int& domainIndex() const {return _domainIndex;} 37 | const int& foreman() const {return _foreman;} 38 | const std::vector& nbrDomains() const {return _nbrDomains;} 39 | 40 | const CellInfo& getCell(Long64 cellGid){return _cellInfoMap[cellGid];} 41 | MapType::const_iterator findCell(Long64 cellGid) const 42 | {return _cellInfoMap.find(cellGid);} 43 | 44 | MapType::const_iterator begin() const {return _cellInfoMap.begin();} 45 | MapType::const_iterator end() const {return _cellInfoMap.end();} 46 | int size() const { return _cellInfoMap.size(); } 47 | 48 | 49 | void addCell(Long64 cellGid, const CellInfo& cellInfo){_cellInfoMap[cellGid] = cellInfo;} 50 | 51 | // Warning: parition will contain some remote cells with invalid 52 | // domainIndex and cellIndex. These cells are not connected by a 53 | // face to any local cell so they are harmless. We could write code 54 | // to delete them if having them around is a problem. 55 | void buildMeshPartition(const GlobalFccGrid& grid, 56 | const std::vector centers, 57 | CommObject* comm); 58 | 59 | private: 60 | int _domainGid; //!< gid of this domain 61 | int _domainIndex; //!< local index of this domain 62 | int _foreman; 63 | MapType _cellInfoMap; 64 | std::vector _nbrDomains; // domain; 30 | 31 | Parameters _params; 32 | NuclearData* _nuclearData; 33 | ParticleVaultContainer* _particleVaultContainer; 34 | MaterialDatabase* _materialDatabase; 35 | Tallies *_tallies; 36 | MC_Time_Info *time_info; 37 | MC_Fast_Timer_Container *fast_timer; 38 | MC_Processor_Info *processor_info; 39 | MC_Particle_Buffer *particle_buffer; 40 | 41 | double source_particle_weight; 42 | 43 | private: 44 | // Disable copy constructor and assignment operator 45 | MonteCarlo(const MonteCarlo&); 46 | MonteCarlo& operator=(const MonteCarlo&); 47 | }; 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/MpiCommObject.hh: -------------------------------------------------------------------------------- 1 | #ifndef MPI_COMM_OBJECT_HH 2 | #define MPI_COMM_OBJECT_HH 3 | 4 | #include "CommObject.hh" 5 | 6 | #include 7 | #include 8 | #include "utilsMpi.hh" 9 | 10 | #include "MeshPartition.hh" 11 | #include "Long64.hh" 12 | #include "DecompositionObject.hh" 13 | 14 | class MpiCommObject : public CommObject 15 | { 16 | public: 17 | MpiCommObject(const MPI_Comm& comm, const DecompositionObject& ddc); 18 | 19 | void exchange(MeshPartition::MapType& cellInfo, 20 | const std::vector& nbrDomain, 21 | std::vector > sendSet, 22 | std::vector > recvSet); 23 | 24 | void exchange(std::vector sendBuf, 25 | std::vector& recvBuf); 26 | private: 27 | MPI_Comm _comm; 28 | DecompositionObject _ddc; 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/NVTX_Range.hh: -------------------------------------------------------------------------------- 1 | /// \file 2 | /// Manage NVTX ranges. These are used to provide extra information 3 | /// to NVProf. They also create regions that can be visualized in 4 | /// NVVP. 5 | 6 | /// The easiest way to use a range is to create a NVTX_Range instance 7 | /// at the start of a scope (such as a function). The range will be 8 | /// automatically ended by the destructor when the instance goes out 9 | /// of scope. The endRange() method exists for situations where it 10 | /// would be awkward or impossible to take advantage of scope to end 11 | /// the range. 12 | 13 | #ifndef NVTX_RANGE_HH 14 | #define NVTX_RANGE_HH 15 | 16 | #include 17 | 18 | #ifdef USE_NVTX 19 | #include "nvToolsExt.h" 20 | #endif 21 | 22 | 23 | class NVTX_Range 24 | { 25 | public: 26 | 27 | NVTX_Range(const std::string& rangeName) 28 | { 29 | #ifdef USE_NVTX 30 | char *result = strdup(rangeName.c_str()); 31 | _rangeId = nvtxRangeStartA(result); 32 | _isOpen = true; 33 | #endif 34 | } 35 | 36 | ~NVTX_Range() 37 | { 38 | #ifdef USE_NVTX 39 | if (_isOpen) 40 | nvtxRangeEnd(_rangeId); 41 | #endif 42 | } 43 | 44 | void endRange() 45 | { 46 | #ifdef USE_NVTX 47 | nvtxRangeEnd(_rangeId); 48 | _isOpen = false; 49 | #endif 50 | } 51 | 52 | private: 53 | #ifdef USE_NVTX 54 | nvtxRangeId_t _rangeId; 55 | bool _isOpen; 56 | #endif 57 | }; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /src/NuclearData.hh: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEAR_DATA_HH 2 | #define NUCLEAR_DATA_HH 3 | 4 | #include 5 | #include 6 | #include "QS_Vector.hh" 7 | #include 8 | #include 9 | #include 10 | #include "qs_assert.hh" 11 | #include "DeclareMacro.hh" 12 | 13 | class Polynomial 14 | { 15 | public: 16 | Polynomial(double aa, double bb, double cc, double dd, double ee) 17 | : 18 | _aa(aa), _bb(bb), _cc(cc), _dd(dd), _ee(ee){} 19 | 20 | double operator()(double xx) const 21 | { 22 | return _ee + xx * (_dd + xx * (_cc + xx * (_bb + xx * (_aa)))); 23 | } 24 | 25 | private: 26 | double _aa, _bb, _cc, _dd, _ee; 27 | }; 28 | 29 | // Lowest level class at the reaction level 30 | class NuclearDataReaction 31 | { 32 | public: 33 | // The types of reactions 34 | enum Enum 35 | { 36 | Undefined = 0, 37 | Scatter, 38 | Absorption, 39 | Fission 40 | }; 41 | 42 | NuclearDataReaction(){}; 43 | 44 | NuclearDataReaction(Enum reactionType, double nuBar, const qs_vector& energies, 45 | const Polynomial& polynomial, double reationCrossSection); 46 | 47 | 48 | HOST_DEVICE_CUDA 49 | double getCrossSection(unsigned int group); 50 | HOST_DEVICE_CUDA 51 | void sampleCollision(double incidentEnergy, double material_mass, double* energyOut, 52 | double* angleOut, int &nOut, uint64_t* seed, int max_production_size); 53 | 54 | 55 | qs_vector _crossSection; //!< tabular data for microscopic cross section 56 | Enum _reactionType; //!< What type of reaction is this 57 | double _nuBar; //!< If this is a fission, specify the nu bar 58 | 59 | }; 60 | 61 | // This class holds an array of reactions for neutrons 62 | class NuclearDataSpecies 63 | { 64 | public: 65 | 66 | void addReaction(NuclearDataReaction::Enum type, double nuBar, qs_vector& energies, 67 | const Polynomial& polynomial, double reactionCrossSection); 68 | 69 | qs_vector _reactions; 70 | }; 71 | 72 | // For this isotope, store the cross sections. In this case the species is just neutron. 73 | class NuclearDataIsotope 74 | { 75 | public: 76 | 77 | NuclearDataIsotope() 78 | : _species(1,VAR_MEM){} 79 | 80 | qs_vector _species; 81 | 82 | }; 83 | 84 | // Top level class to handle all things related to nuclear data 85 | class NuclearData 86 | { 87 | public: 88 | 89 | NuclearData(int numGroups, double energyLow, double energyHigh); 90 | 91 | int addIsotope(int nReactions, 92 | const Polynomial& fissionFunction, 93 | const Polynomial& scatterFunction, 94 | const Polynomial& absorptionFunction, 95 | double nuBar, 96 | double totalCrossSection, 97 | double fissionWeight, double scatterWeight, double absorptionWeight); 98 | 99 | HOST_DEVICE_CUDA 100 | int getEnergyGroup(double energy); 101 | HOST_DEVICE_CUDA 102 | int getNumberReactions(unsigned int isotopeIndex); 103 | HOST_DEVICE_CUDA 104 | double getTotalCrossSection(unsigned int isotopeIndex, unsigned int group); 105 | HOST_DEVICE_CUDA 106 | double getReactionCrossSection(unsigned int reactIndex, unsigned int isotopeIndex, unsigned int group); 107 | 108 | int _numEnergyGroups; 109 | // Store the cross sections and reactions by isotope, which stores 110 | // it by species 111 | qs_vector _isotopes; 112 | // This is the overall energy layout. If we had more than just 113 | // neutrons, this array would be a vector of vectors. 114 | qs_vector _energies; 115 | 116 | }; 117 | 118 | #endif 119 | 120 | // The input for the nuclear data comes from the material section 121 | // The input looks may like 122 | // 123 | // material NAME 124 | // nIsotope=XXX 125 | // nReactions=XXX 126 | // fissionCrossSection="XXX" 127 | // scatterCrossSection="XXX" 128 | // absorptionCrossSection="XXX" 129 | // nuBar=XXX 130 | // totalCrossSection=XXX 131 | // fissionWeight=XXX 132 | // scatterWeight=XXX 133 | // absorptionWeight=XXX 134 | // 135 | // Material NAME2 136 | // ... 137 | // 138 | // table NAME 139 | // a=XXX 140 | // b=XXX 141 | // c=XXX 142 | // d=XXX 143 | // e=XXX 144 | // 145 | // table NAME2 146 | // 147 | // Each isotope inside a material will have identical cross sections. 148 | // However, it will be treated as unique in the nuclear data. 149 | // Cross sectionsare strings that refer to tables 150 | -------------------------------------------------------------------------------- /src/ParticleVault.cc: -------------------------------------------------------------------------------- 1 | #include "ParticleVault.hh" 2 | #include "MC_Processor_Info.hh" 3 | #include "Globals.hh" 4 | 5 | #if 0 6 | void ParticleVault:: 7 | cleanVault( int end_index ) 8 | { 9 | int s1 = end_index; 10 | int s2 = _particles.size(); 11 | 12 | int starting_point = s2 - ( ( s1<(s2-s1)) ? s1 : (s2-s1)); 13 | 14 | #if defined HAVE_OPENMP_TARGET 15 | #pragma omp target teams distribute parallel for thread_limit(64) 16 | #endif 17 | for( int ii = starting_point; ii < s2; ii++ ) 18 | { 19 | qs_assert( _particles[ii-starting_point].species == -1 ); 20 | _particles[ii-starting_point] = _particles[ii]; 21 | _particles[ii].species = -1; 22 | } 23 | 24 | _particles.eraseEnd( _particles.size() - end_index ); 25 | } 26 | #endif 27 | 28 | void ParticleVault:: 29 | collapse( size_t fill_size, ParticleVault* vault2 ) 30 | { 31 | //The entirety of vault 2 fits in the space available in this vault 32 | if( vault2->size() < fill_size ) 33 | { 34 | this->append( *vault2 ); 35 | vault2->clear(); 36 | } 37 | else //Fill in what we can untill either vault2 is empty or we have filled this vault 38 | { 39 | bool notEmpty = false; 40 | uint64_t fill = 0; 41 | do 42 | { 43 | MC_Base_Particle base_particle; 44 | notEmpty = vault2->popBaseParticle( base_particle ); 45 | if( notEmpty ) 46 | { 47 | this->pushBaseParticle( base_particle ); 48 | fill++; 49 | } 50 | }while( notEmpty && fill < fill_size); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/ParticleVaultContainer.hh: -------------------------------------------------------------------------------- 1 | #ifndef PARTICLEVAULTCONTAINER_HH 2 | #define PARTICLEVAULTCONTAINER_HH 3 | 4 | #include "DeclareMacro.hh" 5 | 6 | #include "portability.hh" 7 | #include "QS_Vector.hh" 8 | #include 9 | 10 | //--------------------------------------------------------------- 11 | // ParticleVaultContainer is a container of ParticleVaults. 12 | // These Vaults are broken down into user defined chunks that can 13 | // be used to overlap asynchronous MPI with the tracking kernel. 14 | // 15 | // Facilities for storing Processing, Processed, and Extra vaults 16 | // are controled by the ParticleVaultContainer. As well as the 17 | // sendQueue, which lists the particles that must be send to 18 | // another process via MPI 19 | //-------------------------------------------------------------- 20 | 21 | class MC_Base_Particle; 22 | class MC_Particle; 23 | class ParticleVault; 24 | class SendQueue; 25 | 26 | //typedef unsigned long long int uint64_cu; 27 | 28 | class ParticleVaultContainer 29 | { 30 | public: 31 | 32 | //Constructor 33 | ParticleVaultContainer( uint64_t vault_size, 34 | uint64_t num_vaults, uint64_t num_extra_vaults ); 35 | 36 | //Destructor 37 | ~ParticleVaultContainer(); 38 | 39 | //Basic Getters 40 | uint64_t getVaultSize(){ return _vaultSize; } 41 | uint64_t getNumExtraVaults(){ return _numExtraVaults; } 42 | 43 | uint64_t processingSize(){ return _processingVault.size(); } 44 | uint64_t processedSize(){ return _processedVault.size(); } 45 | 46 | //Returns the ParticleVault that is currently pointed too 47 | //by index listed 48 | ParticleVault* getTaskProcessingVault(uint64_t vaultIndex); 49 | ParticleVault* getTaskProcessedVault( uint64_t vaultIndex); 50 | 51 | //Returns the index to the first empty Processed Vault 52 | uint64_t getFirstEmptyProcessedVault(); 53 | 54 | //Returns a pointer to the Send Queue 55 | HOST_DEVICE 56 | SendQueue* getSendQueue(); 57 | HOST_DEVICE_END 58 | 59 | //Counts Particles in all vaults 60 | uint64_t sizeProcessing(); 61 | uint64_t sizeProcessed(); 62 | uint64_t sizeExtra(); 63 | 64 | //Collapses Particles down into lowest amount of vaults as 65 | //needed to hold them removes all but the last parially 66 | //filled vault 67 | void collapseProcessing(); 68 | void collapseProcessed(); 69 | 70 | //Swaps the particles in Processed for the empty vaults in 71 | //Processing 72 | void swapProcessingProcessedVaults(); 73 | 74 | //Adds a particle to the processing particle vault 75 | void addProcessingParticle( MC_Base_Particle &particle, uint64_t &fill_vault_index ); 76 | //Adds a particle to the extra particle vault 77 | HOST_DEVICE 78 | void addExtraParticle( MC_Particle &particle ); 79 | HOST_DEVICE_END 80 | 81 | //Pushes particles from Extra Vaults onto the Processing 82 | //Vault list 83 | void cleanExtraVaults(); 84 | 85 | private: 86 | 87 | //The Size of the ParticleVaults (fixed at runtime for 88 | //each run) 89 | uint64_t _vaultSize; 90 | 91 | //The number of Extra Vaults needed based on hueristics 92 | //(fixed at runtime for each run) 93 | uint64_t _numExtraVaults; 94 | 95 | //A running index for the number of particles int the extra 96 | //particle vaults 97 | uint64_t _extraVaultIndex; 98 | 99 | //The send queue - stores particle index and neighbor index 100 | //for any particles that hit (TRANSIT_OFF_PROCESSOR) 101 | SendQueue *_sendQueue; 102 | 103 | //The list of active particle vaults (size - grow-able) 104 | std::vector _processingVault; 105 | 106 | //The list of censused particle vaults (size - grow-able) 107 | std::vector _processedVault; 108 | 109 | //The list of extra particle vaults (size - fixed) 110 | qs_vector _extraVault; 111 | 112 | }; 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /src/PhysicalConstants.cc: -------------------------------------------------------------------------------- 1 | #include "PhysicalConstants.hh" 2 | 3 | // The values of all physical constants are taken from: 4 | // 2006 CODATA which is located on the web at 5 | // http://physics.nist.gov/cuu/Constants/codata.pdf 6 | 7 | // The units of physical quantities used by the code are: 8 | // Mass - gram (g) 9 | // Length - centimeter (cm) 10 | // Time - second (s) 11 | // Energy - million electron-volts (MeV) : of a particle 12 | // Energy - erg (g cm^2/s^2): in some background calculation 13 | // Temperature - thousand electron-volts (keV) 14 | 15 | const double PhysicalConstants::_neutronRestMassEnergy = 9.395656981095e+2; /* MeV */ 16 | const double PhysicalConstants::_pi = 3.1415926535897932; 17 | const double PhysicalConstants::_speedOfLight = 2.99792458e+10; // cm / s 18 | 19 | // Constants used in math for computer science, roundoff, and other reasons 20 | const double PhysicalConstants::_tinyDouble = 1.0e-13; 21 | const double PhysicalConstants::_smallDouble = 1.0e-10; 22 | const double PhysicalConstants::_hugeDouble = 1.0e+75; 23 | -------------------------------------------------------------------------------- /src/PhysicalConstants.hh: -------------------------------------------------------------------------------- 1 | #ifndef PHYSICAL_CONSTANTS_HH 2 | #define PHYSICAL_CONSTANTS_HH 3 | 4 | #include "DeclareMacro.hh" 5 | HOST_DEVICE_CLASS 6 | namespace PhysicalConstants 7 | { 8 | 9 | const double _neutronRestMassEnergy = 9.395656981095e+2; /* MeV */ 10 | const double _pi = 3.1415926535897932; 11 | const double _speedOfLight = 2.99792458e+10; // cm / s 12 | 13 | // Constants used in math for computer science, roundoff, and other reasons 14 | const double _tinyDouble = 1.0e-13; 15 | const double _smallDouble = 1.0e-10; 16 | const double _hugeDouble = 1.0e+75; 17 | // 18 | } 19 | HOST_DEVICE_END 20 | 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/PopulationControl.hh: -------------------------------------------------------------------------------- 1 | #ifndef POPULATION_CONTROL_HH 2 | #define POPULATION_CONTROL_HH 3 | 4 | class MonteCarlo; 5 | 6 | void PopulationControl(MonteCarlo* monteCarlo, bool loadBalance); 7 | 8 | void RouletteLowWeightParticles(MonteCarlo* monteCarlo); 9 | 10 | #endif 11 | 12 | -------------------------------------------------------------------------------- /src/QS_Vector.hh: -------------------------------------------------------------------------------- 1 | #ifndef QS_VECTOR_HH 2 | #define QS_VECTOR_HH 3 | 4 | #include "DeclareMacro.hh" 5 | #include "QS_atomics.hh" 6 | #include "qs_assert.hh" 7 | #include "MemoryControl.hh" 8 | 9 | #include 10 | 11 | template 12 | class qs_vector 13 | { 14 | public: 15 | 16 | qs_vector() : _data(0), _capacity(0), _size(0), _memPolicy(MemoryControl::AllocationPolicy::HOST_MEM), _isOpen(0) {}; 17 | 18 | qs_vector(int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) 19 | : _data(0), _capacity(size), _size(size), _memPolicy(memPolicy), _isOpen(0) 20 | { 21 | _data = MemoryControl::allocate(size, memPolicy); 22 | } 23 | 24 | 25 | qs_vector( int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) 26 | : _data(0), _capacity(size), _size(size), _memPolicy(memPolicy), _isOpen(0) 27 | { 28 | _data = MemoryControl::allocate(size, memPolicy); 29 | 30 | for (int ii = 0; ii < _capacity; ++ii) 31 | _data[ii] = value; 32 | } 33 | 34 | qs_vector(const qs_vector& aa ) 35 | : _data(0), _capacity(aa._capacity), _size(aa._size), _memPolicy(aa._memPolicy), _isOpen(aa._isOpen) 36 | { 37 | _data = MemoryControl::allocate(_capacity, _memPolicy); 38 | 39 | for (int ii=0; ii<_size; ++ii) 40 | _data[ii] = aa._data[ii]; 41 | } 42 | 43 | ~qs_vector() 44 | { 45 | MemoryControl::deallocate(_data, _size, _memPolicy); 46 | } 47 | 48 | /// Needed for copy-swap idiom 49 | void swap(qs_vector& other) 50 | { 51 | std::swap(_data, other._data); 52 | std::swap(_capacity, other._capacity); 53 | std::swap(_size, other._size); 54 | std::swap(_memPolicy, other._memPolicy); 55 | std::swap(_isOpen, other._isOpen); 56 | } 57 | 58 | /// Implement assignment using copy-swap idiom 59 | qs_vector& operator=(const qs_vector& aa) 60 | { 61 | if (&aa != this) 62 | { 63 | qs_vector temp(aa); 64 | this->swap(temp); 65 | } 66 | return *this; 67 | } 68 | 69 | HOST_DEVICE_CUDA 70 | int get_memPolicy() 71 | { 72 | return _memPolicy; 73 | } 74 | 75 | void push_back( const T& dataElem ) 76 | { 77 | qs_assert( _isOpen ); 78 | _data[_size] = dataElem; 79 | _size++; 80 | } 81 | 82 | void Open() { _isOpen = true; } 83 | void Close(){ _isOpen = false; } 84 | 85 | HOST_DEVICE_CUDA 86 | const T& operator[]( int index ) const 87 | { 88 | return _data[index]; 89 | } 90 | 91 | HOST_DEVICE_CUDA 92 | T& operator[]( int index ) 93 | { 94 | return _data[index]; 95 | } 96 | 97 | HOST_DEVICE_CUDA 98 | int capacity() const 99 | { 100 | return _capacity; 101 | } 102 | 103 | HOST_DEVICE_CUDA 104 | int size() const 105 | { 106 | return _size; 107 | } 108 | 109 | T& back() 110 | { 111 | return _data[_size-1]; 112 | } 113 | 114 | void reserve( int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) 115 | { 116 | qs_assert( _capacity == 0 ); 117 | _capacity = size; 118 | _memPolicy = memPolicy; 119 | _data = MemoryControl::allocate(size, memPolicy); 120 | } 121 | 122 | void resize( int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) 123 | { 124 | qs_assert( _capacity == 0 ); 125 | _capacity = size; 126 | _size = size; 127 | _memPolicy = memPolicy; 128 | _data = MemoryControl::allocate(size, memPolicy); 129 | } 130 | 131 | void resize( int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) 132 | { 133 | qs_assert( _capacity == 0 ); 134 | _capacity = size; 135 | _size = size; 136 | _memPolicy = memPolicy; 137 | _data = MemoryControl::allocate(size, memPolicy); 138 | 139 | for (int ii = 0; ii < _capacity; ++ii) 140 | _data[ii] = value; 141 | } 142 | 143 | bool empty() const 144 | { 145 | return ( _size == 0 ); 146 | } 147 | 148 | void eraseEnd( int NewEnd ) 149 | { 150 | _size = NewEnd; 151 | } 152 | 153 | void pop_back() 154 | { 155 | _size--; 156 | } 157 | 158 | void clear() 159 | { 160 | _size = 0; 161 | } 162 | 163 | void appendList( int listSize, T* list ) 164 | { 165 | qs_assert( this->_size + listSize < this->_capacity ); 166 | 167 | int size = _size; 168 | this->_size += listSize; 169 | 170 | for( int i = size; i < _size; i++ ) 171 | { 172 | _data[i] = list[ i-size ]; 173 | } 174 | 175 | } 176 | 177 | //Atomically retrieve an availible index then increment that index some amount 178 | HOST_DEVICE_CUDA 179 | int atomic_Index_Inc( int inc ) 180 | { 181 | int pos; 182 | 183 | QS::atomicCaptureAdd( _size, inc, pos ); 184 | 185 | return pos; 186 | } 187 | 188 | private: 189 | T* _data; 190 | int _capacity; 191 | int _size; 192 | bool _isOpen; 193 | MemoryControl::AllocationPolicy _memPolicy; 194 | 195 | }; 196 | 197 | 198 | #endif 199 | -------------------------------------------------------------------------------- /src/QS_atomics.hh: -------------------------------------------------------------------------------- 1 | #ifndef QS_ATOMICS_HH 2 | #define QS_ATOMICS_HH 3 | 4 | #include "gpuPortability.hh" 5 | 6 | // Provides the following atomic functions: 7 | // * QS::atomicWrite(a,b) a=b 8 | // * QS::atomicAdd(a,b) a+=b 9 | // * QS::atomicIncrement(a,b) a++ 10 | // * QS::atomicCaptureAdd(a,b,c) c=a; a+=b 11 | // These all function correctly on hip(AMD), cuda, openMP, and openMP offload. 12 | // 13 | // There is one significant complication that we need to worry about 14 | // when trying to provide device native implementations of atomics on 15 | // hip and cuda. Cuda doesn't allow function overloading based on 16 | // __host__ or __device__ attributes. If you have two functions with 17 | // the same signature, one with __host__ (or undecorated, since 18 | // functions are __host by default) and another with __device__, nvcc 19 | // will produce an error that the function is multiply defined. The 20 | // solution to this problem is to wrap the overloaded functions in a 21 | // check for the __CUDA_ARCH__ macro, which is defined only when 22 | // compiling for the device. See 23 | // https://forums.developer.nvidia.com/t/overloading-host-and-device-function/29601 24 | // 25 | // On the other hand, hip seems to have no such problem managing 26 | // functions that are overloaded on __host__ or __device__ attributes. 27 | // Hence, we don't have to worry about checking for the device pass on 28 | // a hip build. 29 | 30 | 31 | 32 | 33 | // First, we need to provide some "built-in" atomic signatures that 34 | // the CUDA API doesn't provide. These should only be available in 35 | // the device pass of a CUDA build. HIP provides these signatures. 36 | #if defined HAVE_CUDA && defined __CUDA_ARCH__ 37 | 38 | // atomicAdd for uint64_t: 39 | // It is common that unsigned long and unsigned long long are both 40 | // 64-bit integers. In such cases, uint64_t may be defined as 41 | // unsigned long. Unfortunately, nvidia doesn't supply a version of 42 | // atomicAdd that takes unsigned long arguments. As long as unsigned 43 | // long and unsigned long long are the same size, we can get away with 44 | // this kind of nonsense. 45 | static inline __device__ uint64_t atomicAdd(uint64_t* address, uint64_t val) 46 | { 47 | static_assert(sizeof(uint64_t) == sizeof(unsigned long long), 48 | "type size mismatch"); 49 | return ::atomicAdd(reinterpret_cast(address), val); 50 | } 51 | 52 | // atomicExch for double: 53 | // nvidia doesn't supply a version of atomicExch that takes doubles. 54 | // So, we will roll our own with this somewhat evil hack. 55 | static inline __device__ double atomicExch(double* address, double val) 56 | { 57 | static_assert(sizeof(double) == sizeof(unsigned long long), 58 | "type size mismatch"); 59 | return __longlong_as_double 60 | ( 61 | ::atomicExch(reinterpret_cast(address), 62 | __double_as_longlong(val)) 63 | ); 64 | } 65 | 66 | #endif //#if defined HAVE_CUDA && defined __CUDA_ARCH__ 67 | 68 | 69 | namespace QS 70 | { 71 | // First, the versions defined in terms of the native atomic 72 | // functions provided by CUDA and HIP. 73 | 74 | // These get built when building for HIP (which QS assumes means AMD), 75 | // or the device pass of a CUDA build 76 | #if defined HAVE_HIP || (defined HAVE_CUDA && defined __CUDA_ARCH__) 77 | 78 | template static inline __device__ 79 | void atomicWrite(T& aa, T bb) 80 | { 81 | atomicExch(&aa, bb); 82 | } 83 | 84 | template static inline __device__ 85 | void atomicAdd(T& aa, T bb) 86 | { 87 | ::atomicAdd(&aa, bb); 88 | } 89 | 90 | template static inline __device__ 91 | void atomicIncrement(T& aa) 92 | { 93 | ::atomicAdd(&aa, 1); 94 | } 95 | 96 | template static inline __device__ 97 | void atomicCaptureAdd(T& aa, T bb, T& cc) 98 | { 99 | cc = ::atomicAdd(&aa, bb); 100 | } 101 | 102 | #endif // #if defined HAVE_HIP || (defined HAVE_CUDA && defined __CUDA_ARCH__) 103 | 104 | 105 | // Now the version defined in terms of omp atomic directives. Note 106 | // that these apply to both CPU and GPU (i.e., target) code. These 107 | // also supply implementations for CPU builds without openMP. 108 | // Obviously, these functions aren't actually atomic without 109 | // openMP. That's OK since without openMP quicksilver can't need 110 | // atomics on the CPU since it has no way run multiple threads in 111 | // the same address space. 112 | 113 | // These get build for everything *except* the device pass of a CUDA 114 | // build. 115 | #if ! (defined HAVE_CUDA && defined __CUDA_ARCH__) 116 | 117 | template static inline 118 | void atomicWrite(T& aa, T bb) 119 | { 120 | #pragma omp atomic write 121 | aa = bb; 122 | } 123 | 124 | template static inline 125 | void atomicAdd(T& aa, T bb) 126 | { 127 | #pragma omp atomic 128 | aa += bb; 129 | } 130 | 131 | template static inline 132 | void atomicIncrement(T& aa) 133 | { 134 | #pragma omp atomic update 135 | aa++; 136 | } 137 | 138 | template static inline 139 | void atomicCaptureAdd(T& aa, T bb, T& cc) 140 | { 141 | #pragma omp atomic capture 142 | {cc = aa; aa += bb;} 143 | } 144 | 145 | #endif // #if ! (defined HAVE_CUDA && defined __CUDA_ARCH__) 146 | 147 | } // namespace QS 148 | 149 | #endif // #ifndef QS_ATOMICS_HH 150 | -------------------------------------------------------------------------------- /src/SendQueue.cc: -------------------------------------------------------------------------------- 1 | #include "SendQueue.hh" 2 | #include "QS_Vector.hh" 3 | 4 | SendQueue::SendQueue() 5 | {} 6 | 7 | SendQueue::SendQueue( size_t size ) 8 | : _data( size, VAR_MEM ) 9 | {} 10 | 11 | 12 | // ----------------------------------------------------------------------- 13 | size_t SendQueue:: 14 | size() 15 | { 16 | return _data.size(); 17 | } 18 | 19 | // ----------------------------------------------------------------------- 20 | size_t SendQueue:: 21 | neighbor_size( int neighbor_ ) 22 | { 23 | size_t sum_n=0; 24 | for( size_t i = 0; i < _data.size(); i++ ) 25 | { 26 | if( neighbor_ == _data[i]._neighbor ) 27 | sum_n++; 28 | } 29 | return sum_n; 30 | } 31 | 32 | // ----------------------------------------------------------------------- 33 | HOST_DEVICE 34 | void SendQueue:: 35 | push( int neighbor_, int vault_index_ ) 36 | { 37 | size_t indx = _data.atomic_Index_Inc(1); 38 | 39 | _data[indx]._neighbor = neighbor_; 40 | _data[indx]._particleIndex = vault_index_; 41 | } 42 | HOST_DEVICE_END 43 | 44 | // ----------------------------------------------------------------------- 45 | void SendQueue:: 46 | clear() 47 | { 48 | _data.clear(); 49 | } 50 | 51 | // ----------------------------------------------------------------------- 52 | sendQueueTuple& SendQueue:: 53 | getTuple( int index_ ) 54 | { 55 | qs_assert( index_ >= 0 ); 56 | qs_assert( index_ < _data.size() ); 57 | return _data[index_]; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/SendQueue.hh: -------------------------------------------------------------------------------- 1 | #ifndef SENDQUEUE_HH 2 | #define SENDQUEUE_HH 3 | 4 | #include "QS_Vector.hh" 5 | #include "DeclareMacro.hh" 6 | 7 | //Tuple to record which particles need to be sent to which neighbor process during tracking 8 | struct sendQueueTuple 9 | { 10 | int _neighbor; 11 | int _particleIndex; 12 | }; 13 | 14 | class SendQueue 15 | { 16 | public: 17 | 18 | SendQueue(); 19 | SendQueue( size_t size ); 20 | 21 | //Get the total size of the send Queue 22 | size_t size(); 23 | 24 | void reserve( size_t size ){ _data.reserve(size, VAR_MEM); } 25 | 26 | //get the number of items in send queue going to a specific neighbor 27 | size_t neighbor_size( int neighbor_ ); 28 | 29 | sendQueueTuple& getTuple( int index_ ); 30 | 31 | //Add items to the send queue in a kernel 32 | HOST_DEVICE_CUDA 33 | void push( int neighbor_, int vault_index_ ); 34 | 35 | //Clear send queue before after use 36 | void clear(); 37 | 38 | private: 39 | 40 | //The send queue - stores particle index and neighbor index for any particles that hit (TRANSIT_OFF_PROCESSOR) 41 | qs_vector _data; 42 | 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/SharedMemoryCommObject.cc: -------------------------------------------------------------------------------- 1 | #include "SharedMemoryCommObject.hh" 2 | #include "qs_assert.hh" 3 | #include "MeshPartition.hh" 4 | 5 | using std::set; 6 | using std::vector; 7 | 8 | 9 | SharedMemoryCommObject::SharedMemoryCommObject(vector& meshPartition) 10 | :_partitions(meshPartition) 11 | { 12 | _gidToIndex.resize(_partitions.size()); 13 | for (unsigned ii=0; ii<_partitions.size(); ++ii) 14 | { 15 | int gid = _partitions[ii].domainGid(); 16 | qs_assert(gid < _partitions.size()); 17 | _gidToIndex[gid] = ii; 18 | } 19 | 20 | } 21 | 22 | void SharedMemoryCommObject::exchange(MeshPartition::MapType& cellInfoMap, 23 | const vector& nbrDomain, 24 | vector > sendSet, 25 | vector > recvSet) 26 | 27 | { 28 | for (unsigned ii=0; ii= 0); 38 | qs_assert(cellToSend._cellIndex >= 0); 39 | targetPartition.addCell(*iter, cellToSend); 40 | } 41 | } 42 | } 43 | 44 | void SharedMemoryCommObject::exchange(vector sendBuf, 45 | vector& recvBuf) 46 | { 47 | // This type of exchange should never occur in SharedMemory spaces. 48 | qs_assert(false); 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /src/SharedMemoryCommObject.hh: -------------------------------------------------------------------------------- 1 | #ifndef SHARED_MEMORY_COMM_OBJECT_HH 2 | #define SHARED_MEMORY_COMM_OBJECT_HH 3 | 4 | #include "CommObject.hh" 5 | 6 | #include 7 | 8 | #include 9 | #include "MeshPartition.hh" 10 | #include "Long64.hh" 11 | 12 | class SharedMemoryCommObject : public CommObject 13 | { 14 | public: 15 | SharedMemoryCommObject(std::vector& meshPartition); 16 | 17 | void exchange(MeshPartition::MapType& cellInfo, 18 | const std::vector& nbrDomain, 19 | std::vector > sendSet, 20 | std::vector > recvSet); 21 | 22 | void exchange(std::vector sendBuf, 23 | std::vector& recvBuf); 24 | 25 | 26 | private: 27 | std::vector& _partitions; 28 | std::vector _gidToIndex; 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/Tuple.hh: -------------------------------------------------------------------------------- 1 | #ifndef TUPLE_HH 2 | #define TUPLE_HH 3 | 4 | class Tuple 5 | { 6 | public: 7 | Tuple(){}; 8 | Tuple(int ix, int iy, int iz) : ix_(ix), iy_(iy), iz_(iz){} 9 | 10 | const int& x() const {return ix_;} 11 | const int& y() const {return iy_;} 12 | const int& z() const {return iz_;} 13 | 14 | int& x() {return ix_;} 15 | int& y() {return iy_;} 16 | int& z() {return iz_;} 17 | 18 | Tuple& operator-=(const Tuple& a); 19 | Tuple& operator+=(const Tuple& a); 20 | bool operator<(const Tuple& b) const; 21 | 22 | private: 23 | int ix_; 24 | int iy_; 25 | int iz_; 26 | }; 27 | 28 | inline Tuple& Tuple::operator-=(const Tuple& a) 29 | { 30 | ix_ -= a.ix_; 31 | iy_ -= a.iy_; 32 | iz_ -= a.iz_; 33 | return *this; 34 | } 35 | 36 | inline Tuple& Tuple::operator+=(const Tuple& a) 37 | { 38 | ix_ += a.ix_; 39 | iy_ += a.iy_; 40 | iz_ += a.iz_; 41 | return *this; 42 | } 43 | 44 | inline bool Tuple::operator<(const Tuple& b) const 45 | { 46 | return 47 | ix_ 5 | 6 | namespace 7 | { 8 | #if defined GPU_NATIVE 9 | __global__ void trivialKernel() 10 | { 11 | int global_index = getGlobalThreadID(); 12 | if( global_index == 0) 13 | { 14 | } 15 | } 16 | #endif 17 | } 18 | 19 | #if defined GPU_NATIVE 20 | void warmup_kernel() 21 | { 22 | trivialKernel<<<1, 1>>>(); 23 | gpuDeviceSynchronize(); 24 | } 25 | #endif 26 | 27 | #if defined GPU_NATIVE 28 | int ThreadBlockLayout( dim3 &grid, dim3 &block, int num_particles ) 29 | { 30 | int run_kernel = 1; 31 | const uint64_t max_block_size = 65535; 32 | const uint64_t threads_per_block = 128; 33 | 34 | block.x = threads_per_block; 35 | block.y = 1; 36 | block.z = 1; 37 | 38 | uint64_t num_blocks = num_particles / threads_per_block + ( ( num_particles%threads_per_block == 0 ) ? 0 : 1 ); 39 | 40 | if( num_blocks == 0 ) 41 | { 42 | run_kernel = 0; 43 | } 44 | else if( num_blocks <= max_block_size ) 45 | { 46 | grid.x = num_blocks; 47 | grid.y = 1; 48 | grid.z = 1; 49 | } 50 | else if( num_blocks <= max_block_size*max_block_size ) 51 | { 52 | grid.x = max_block_size; 53 | grid.y = 1 + (num_blocks / max_block_size ); 54 | grid.z = 1; 55 | } 56 | else if( num_blocks <= max_block_size*max_block_size*max_block_size ) 57 | { 58 | grid.x = max_block_size; 59 | grid.y = max_block_size; 60 | grid.z = 1 + (num_blocks / (max_block_size*max_block_size)); 61 | } 62 | else 63 | { 64 | printf("Error: num_blocks exceeds maximum block specifications. Cannot handle this case yet\n"); 65 | run_kernel = 0; 66 | } 67 | 68 | return run_kernel; 69 | } 70 | #endif 71 | 72 | #if defined GPU_NATIVE 73 | DEVICE 74 | int getGlobalThreadID() 75 | { 76 | int blockID = blockIdx.x + 77 | blockIdx.y * gridDim.x + 78 | blockIdx.z * gridDim.x * gridDim.y; 79 | 80 | int threadID = blockID * (blockDim.x * blockDim.y * blockDim.z) + 81 | threadIdx.z * ( blockDim.x * blockDim.y ) + 82 | threadIdx.y * blockDim.x + 83 | threadIdx.x; 84 | return threadID; 85 | } 86 | #endif 87 | -------------------------------------------------------------------------------- /src/cudaFunctions.hh: -------------------------------------------------------------------------------- 1 | #ifndef CUDAFUNCTIONS_HH 2 | #define CUDAFUNCTIONS_HH 3 | 4 | #include "cudaUtils.hh" 5 | #include "DeclareMacro.hh" 6 | 7 | #if defined GPU_NATIVE 8 | void warmup_kernel(); 9 | int ThreadBlockLayout( dim3 &grid, dim3 &block, int num_particles ); 10 | DEVICE 11 | int getGlobalThreadID(); 12 | #endif 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /src/cudaUtils.hh: -------------------------------------------------------------------------------- 1 | #ifndef CUDAUTILS_HH 2 | #define CUDAUTILS_HH 3 | 4 | #if defined(HAVE_CUDA) 5 | #include 6 | #include 7 | #include 8 | #endif 9 | 10 | enum ExecutionPolicy{ cpu, gpuNative, gpuWithOpenMP }; 11 | 12 | inline ExecutionPolicy getExecutionPolicy( int useGPU ) 13 | { 14 | ExecutionPolicy execPolicy = ExecutionPolicy::cpu; 15 | 16 | if( useGPU ) 17 | { 18 | #if defined HAVE_CUDA || defined HAVE_HIP 19 | execPolicy = ExecutionPolicy::gpuNative; 20 | #elif defined (HAVE_OPENMP_TARGET) 21 | execPolicy = ExecutionPolicy::gpuWithOpenMP; 22 | #endif 23 | } 24 | return execPolicy; 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /src/gpuPortability.hh: -------------------------------------------------------------------------------- 1 | #ifndef GPUPORTABILITY_HH 2 | #define GPUPORTABILITY_HH 3 | 4 | #if defined __CUDACC__ || defined TARGET_NVIDIA 5 | #define __DO_CUDA 6 | #define __PREFIX cuda 7 | #define HAVE_UVM 8 | #include 9 | #include 10 | #include 11 | #elif defined __HIPCC__ || defined TARGET_AMD 12 | #define __DO_HIP 13 | #define __PREFIX hip 14 | #define HAVE_UVM 15 | #define __HIP_PLATFORM_AMD__ 16 | #include 17 | #else 18 | #define __PREFIX invalid 19 | #endif 20 | 21 | #if defined HAVE_CUDA || defined HAVE_HIP 22 | #define GPU_NATIVE 23 | #endif 24 | 25 | 26 | #ifdef __DO_CUDA 27 | #endif 28 | 29 | #ifdef __DO_HIP 30 | #endif 31 | 32 | #if defined HAVE_UVM 33 | #define VAR_MEM MemoryControl::AllocationPolicy::UVM_MEM 34 | #else 35 | #define VAR_MEM MemoryControl::AllocationPolicy::HOST_MEM 36 | #endif 37 | 38 | #define CONCAT_(A, B) A ## B 39 | #define CONCAT(A1, B1) CONCAT_(A1, B1) 40 | 41 | #define gpuMallocManaged CONCAT(__PREFIX, MallocManaged) 42 | #define gpuFree CONCAT(__PREFIX, Free) 43 | #define gpuDeviceSynchronize CONCAT(__PREFIX, DeviceSynchronize) 44 | #define gpuGetDeviceCount CONCAT(__PREFIX, GetDeviceCount) 45 | #define gpuSetDevice CONCAT(__PREFIX, SetDevice) 46 | #define gpuPeekAtLastError CONCAT(__PREFIX, PeekAtLastError) 47 | 48 | 49 | #undef __DO_CUDA 50 | #undef __DO_HIP 51 | 52 | #endif // #ifndef GPUPORTABILITY_HH 53 | -------------------------------------------------------------------------------- /src/initMC.hh: -------------------------------------------------------------------------------- 1 | #ifndef INIT_MC_HH 2 | #define INIT_MC_HH 3 | 4 | class Parameters; 5 | class MonteCarlo; 6 | 7 | MonteCarlo* initMC(const Parameters& params); 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /src/macros.hh: -------------------------------------------------------------------------------- 1 | #ifndef MACROS_HH 2 | #define MACROS_HH 3 | 4 | #include "qs_assert.hh" 5 | #include 6 | 7 | #define MC_CALLOC(A, N1, TYPE) if ( N1 ) { A = (TYPE*) calloc((N1), sizeof(TYPE)); } else { A = NULL; } 8 | #define MC_MALLOC(A, N1, TYPE) if ( N1 ) { A = (TYPE*) malloc((N1)*sizeof(TYPE)); } else { A = NULL; } 9 | #define MC_NEW_ARRAY(A,N1,TYPE) if ( N1 ) { A = new TYPE[N1]; } else { A = NULL; } 10 | #define MC_REALLOC(a, b, c) {qs_assert(false); } 11 | #define MC_FREE(A) if (A != NULL) { free(A) ; A = NULL ; } 12 | #define MC_DELETE(A) if (A != NULL) { delete A ; A = NULL ; } 13 | #define MC_DELETE_ARRAY(A) if (A != NULL) { delete [] A ; A = NULL ; } 14 | #define MC_MEMCPY(a, b, c) {qs_assert(false); } 15 | #define MC_FABS(x) ( (x) < 0 ? -(x) : (x) ) 16 | 17 | 18 | #define MC_Fatal_Jump(...) {qs_assert(false); } 19 | 20 | //#define MC_MIN(a, b) {std::min(a,b)} 21 | #define MC_MIN(a, b) { ((a < b) ? a : b) } 22 | 23 | // If not compiled with OpenMP, define stub OpenMP 24 | // function that will work for the code. 25 | #ifdef HAVE_OPENMP 26 | #include 27 | #else 28 | #include 29 | #include 30 | #define omp_get_thread_num() 0 31 | #define omp_get_max_threads() 1 32 | #define omp_get_num_procs() 1 33 | #endif 34 | #else 35 | #endif 36 | 37 | #if defined(HAVE_OPENMP) && defined(HAVE_DEBUG) 38 | #define MC_VERIFY_THREAD_ZERO MC_Verify_Thread_Zero(__FILE__, __LINE__); 39 | #else 40 | #define MC_VERIFY_THREAD_ZERO 41 | #endif 42 | 43 | #ifdef USE_PRINT_DEBUG 44 | #define PRINT_DEBUG printf("FILE: %s\tLINE: %d\n", __FILE__, __LINE__ ) 45 | #else 46 | #define PRINT_DEBUG 47 | #endif 48 | -------------------------------------------------------------------------------- /src/mc_omp_critical.hh: -------------------------------------------------------------------------------- 1 | #if defined(HAVE_OPENMP) 2 | #pragma omp critical 3 | #endif 4 | -------------------------------------------------------------------------------- /src/mc_omp_parallel_for_schedule_static.hh: -------------------------------------------------------------------------------- 1 | #if defined(HAVE_OPENMP) 2 | if ( (mcco->processor_info->rank == 0) && (mcco->_params.simulationParams.debugThreads >= 2)) 3 | { printf("OpenMP Looping over %d threads\n",omp_get_max_threads()); } 4 | #pragma omp parallel for schedule (static) 5 | #endif 6 | -------------------------------------------------------------------------------- /src/mc_omp_parallel_for_schedule_static_if.hh: -------------------------------------------------------------------------------- 1 | #if defined(HAVE_OPENMP) 2 | #pragma omp parallel for schedule (static) MC_OMP_PARALLEL_FOR_IF_CONDITION 3 | #endif 4 | 5 | -------------------------------------------------------------------------------- /src/mc_omp_parallel_for_schedule_static_num_physical_cores.hh: -------------------------------------------------------------------------------- 1 | #if defined(HAVE_OPENMP) 2 | int num_physical_cores = mc_get_num_physical_procs(); 3 | if ((mcco->processor_info->rank == 0) && (mcco->_params.simulationParams.debugThreads >= 2)) 4 | { printf("OpenMP Looping over %d cores\n",num_physical_cores); } 5 | #pragma omp parallel for schedule (static) num_threads(num_physical_cores) 6 | #endif 7 | -------------------------------------------------------------------------------- /src/memUtils.hh: -------------------------------------------------------------------------------- 1 | /// \file 2 | /// Wrappers for memory allocation. 3 | 4 | #ifndef MEMUTILS_HH 5 | #define MEMUTILS_HH 6 | 7 | #include 8 | 9 | static void* qsMalloc(size_t iSize) 10 | { 11 | return std::malloc(iSize); 12 | } 13 | 14 | static void* qsCalloc(size_t num, size_t iSize) 15 | { 16 | return std::calloc(num, iSize); 17 | } 18 | 19 | static void* qsRealloc(void* ptr, size_t iSize) 20 | { 21 | return std::realloc(ptr, iSize); 22 | } 23 | 24 | static void qsFree(void* ptr) 25 | { 26 | std::free(ptr); 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /src/mpi_stubs_internal.hh: -------------------------------------------------------------------------------- 1 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 2 | // 3 | // Copyright (c) 2012 4 | // Lawrence Livermore National Security, LLC 5 | // All Rights Reserved 6 | // 7 | /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ 8 | 9 | #ifndef MPI_STUBS_INTERNAL_H 10 | #define MPI_STUBS_INTERNAL_H 11 | 12 | #include "mpi_stubs.hh" 13 | 14 | //---------------------------------------------------------------------------------------------------------------------- 15 | // MPI stubs structures to implement mpi calls 16 | //---------------------------------------------------------------------------------------------------------------------- 17 | 18 | typedef struct _List *pList; // forward declaration for prototypes. 19 | typedef struct _Listitem *pListitem; 20 | 21 | typedef uint64_t MPI_Aint; 22 | 23 | typedef struct _List 24 | { 25 | pListitem head; 26 | pListitem tail; 27 | int count; 28 | } List; 29 | 30 | 31 | typedef struct _Listitem 32 | { 33 | void *data; 34 | pListitem prev; 35 | pListitem next; 36 | 37 | #ifdef MPI_STUBS_DEBUG_DATA 38 | pList list; 39 | #endif 40 | 41 | } Listitem; 42 | 43 | typedef struct 44 | { 45 | pList sendlist; 46 | pList recvlist; 47 | 48 | int num; 49 | char *name; 50 | 51 | } Comm; 52 | 53 | typedef struct 54 | { 55 | pListitem listitem; // to allow Req to be removed from list 56 | 57 | int *buf; 58 | int tag; 59 | int complete; 60 | 61 | } Req; 62 | 63 | 64 | typedef struct _Handleitem 65 | { 66 | int handle; 67 | struct _Handleitem *next; 68 | 69 | union 70 | { 71 | void *anything; // At least size of void * 72 | Comm comm; 73 | Req req; 74 | 75 | } data; 76 | 77 | 78 | } Handleitem; 79 | 80 | typedef struct MPI_Stubs_Data_struct { 81 | 82 | MPI_Errhandler errhandler; 83 | int headcount; 84 | int itemcount; 85 | int initialized; 86 | 87 | // 88 | // The first block of handle items will be statically allocated. 89 | // Subsequent ones will be added if necessary. 90 | // blocks[0..nblocks-1] are allocated at any given time. 91 | // 92 | // Increase MPI_STUBS_MAX_BLOCKS if you *really* need more active request 93 | // (Although probably something is wrong if you need more than 256k !!!) 94 | // 95 | Handleitem block0[MPI_STUBS_BLOCK_ITEMS]; 96 | Handleitem *(blocks[MPI_STUBS_MAX_BLOCKS]); 97 | int nblocks; 98 | 99 | int need_to_init; 100 | Handleitem *nextfree; 101 | 102 | MPI_Stubs_Data_struct() 103 | { 104 | this->errhandler = MPI_ERRORS_ARE_FATAL; 105 | this->headcount = 0; 106 | this->itemcount = 0; 107 | this->initialized = 0; 108 | this->nblocks = 0; 109 | this->need_to_init = 1; 110 | this->nextfree = NULL; 111 | for (int index=0; indexblocks[index] = NULL; } 112 | } 113 | 114 | ~MPI_Stubs_Data_struct() {}; 115 | 116 | } MPI_Stubs_Data_type; 117 | 118 | 119 | #endif // ifndef MPI_STUBS_INTERNAL_H 120 | -------------------------------------------------------------------------------- /src/parseUtils.cc: -------------------------------------------------------------------------------- 1 | #include "parseUtils.hh" 2 | #include 3 | #include "InputBlock.hh" 4 | 5 | using std::string; 6 | using std::istream; 7 | 8 | namespace 9 | { 10 | string whitespace(" \t\f\v\n\r"); 11 | bool isComment(std::string line); 12 | bool split(string line, string& keyword, string& value, int& indent); 13 | bool validKeyword(const string& word); 14 | void chop(string& line); 15 | } 16 | 17 | bool blockStart(const string& line, string& blockName) 18 | { 19 | string keyword; 20 | string value; 21 | int indent; 22 | bool valid = split(line, keyword, value, indent); 23 | if (valid && indent == 0 && value.size() == 0) 24 | { 25 | blockName = keyword; 26 | return true; 27 | } 28 | return false; 29 | } 30 | 31 | string readBlock(InputBlock& block, istream& in) 32 | { 33 | string line; 34 | while (!in.eof()) 35 | { 36 | getline(in, line); 37 | if (isComment(line)) 38 | continue; 39 | string keyword; 40 | string value; 41 | int indent; 42 | bool valid = split(line, keyword, value, indent); 43 | if (!valid || indent == 0) 44 | break; 45 | block.addPair(keyword, value); 46 | } 47 | 48 | return line; 49 | } 50 | 51 | namespace 52 | { 53 | /// Returns true if line contains nothing but whitespace and 54 | /// comments. False otherwise. 55 | bool isComment(string line) 56 | { 57 | size_t here = line.find("//"); 58 | if (here != string::npos) 59 | line.erase(here, string::npos); 60 | return (line.find_last_not_of(whitespace) == string::npos); 61 | } 62 | } 63 | 64 | namespace 65 | { 66 | bool split(string line, string& keyword, string& value, int& indent) 67 | { 68 | indent = 0; 69 | while (indent < line.size() && isspace(line[indent])) 70 | ++indent; 71 | 72 | size_t delimPos = line.find_first_of(":=", indent); 73 | if (delimPos == string::npos) 74 | return false; 75 | keyword = line.substr(indent, delimPos-indent); 76 | chop(keyword); 77 | if (! validKeyword(keyword)) 78 | return false; 79 | value.clear(); 80 | if (delimPos + 1 < line.size()) 81 | { 82 | value = line.substr(delimPos+1, string::npos); 83 | chop(value); 84 | } 85 | return true; 86 | } 87 | } 88 | 89 | namespace 90 | { 91 | bool validKeyword(const string& word) 92 | { 93 | return true; 94 | } 95 | } 96 | 97 | namespace 98 | { 99 | void chop(string& line) 100 | { 101 | size_t here = line.size(); 102 | while (here > 0 && isspace(line[here-1])) 103 | --here; 104 | if (here < line.size()) 105 | line.erase(here, string::npos); 106 | size_t nSpace = 0; 107 | while (nSpace < line.size() && isspace(line[nSpace])) 108 | ++nSpace; 109 | line.erase(0, nSpace); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/parseUtils.hh: -------------------------------------------------------------------------------- 1 | #ifndef PARSE_UTILS_HH 2 | #define PARSE_UTILS_HH 3 | 4 | #include 5 | #include 6 | 7 | class InputBlock; 8 | 9 | bool blockStart(const std::string& line, std::string& blockName); 10 | std::string readBlock(InputBlock& block, std::istream& in); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/portability.hh: -------------------------------------------------------------------------------- 1 | #ifndef PORTABILITY_HH 2 | #define PORTABILITY_HH 3 | 4 | #ifdef CSTDINT_MISSING 5 | #include 6 | #else 7 | #include 8 | #endif 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /src/qs_assert.hh: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if defined HAVE_HIP 4 | #define __HIP_PLATFORM_AMD__ 5 | #include 6 | #include 7 | #endif 8 | 9 | #if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__ 10 | #define qs_assert( cond) \ 11 | do \ 12 | { \ 13 | if (!(cond)) \ 14 | { \ 15 | printf("ERROR\n"); \ 16 | } \ 17 | } while(0) 18 | #else 19 | #define qs_assert( cond) \ 20 | do \ 21 | { \ 22 | if (!(cond)) \ 23 | { \ 24 | printf("file=%s: line=%d ERROR\n",__FILE__,__LINE__); \ 25 | } \ 26 | } while(0) 27 | #endif 28 | -------------------------------------------------------------------------------- /src/utils.cc: -------------------------------------------------------------------------------- 1 | #include "utils.hh" 2 | #include 3 | #include "qs_assert.hh" 4 | #include "utilsMpi.hh" 5 | #include "macros.hh" 6 | #include 7 | #include 8 | #include 9 | #include "MonteCarlo.hh" 10 | #include "Globals.hh" 11 | #include "MC_Processor_Info.hh" 12 | 13 | 14 | // Returns the number of physical cores. Relies on the env var 15 | // KMP_PLACE_THREADS being set to someting like 60c2t. 16 | // Otherwise, returns omp_get_num_procs() 17 | int mc_get_num_physical_procs(void) 18 | { 19 | int num_physical_cores = omp_get_num_procs(); 20 | #if defined(HAVE_OPENMP) && defined(HAVE_KNL) 21 | int num_threads_per_core = 0; 22 | char *env_str = getenv("KMP_PLACE_THREADS"); 23 | if (env_str) 24 | { 25 | char *ptr = strchr(env_str, (int)'t'); 26 | if (ptr) 27 | { 28 | int num_threads_per_core = 1; 29 | ptr--; 30 | while ((ptr > env_str) && isdigit(*ptr) ) 31 | { num_threads_per_core = atoi(ptr); ptr--; } 32 | if (num_threads_per_core > 0) 33 | { num_physical_cores = omp_get_num_procs() / num_threads_per_core; } 34 | } 35 | } 36 | #endif 37 | return num_physical_cores; 38 | } 39 | 40 | 41 | void MC_Verify_Thread_Zero(char const * const file, int line) 42 | { 43 | #ifdef HAVE_OPENMP 44 | int thread_id = omp_get_thread_num(); 45 | if (thread_id != 0) 46 | { 47 | int mpi_rank = -1; 48 | mpiComm_rank(mcco->processor_info->comm_mc_world, &mpi_rank); 49 | fprintf(stderr,"Fatal Error: %s:%d MPI Routine called by thread other than zero." 50 | "\n\tMPI Process %d, Thread %d", file, line, mpi_rank, thread_id); 51 | mpiAbort(MPI_COMM_WORLD, -1); abort(); 52 | } 53 | #endif 54 | return; 55 | } 56 | 57 | void printBanner(const char *git_version, const char *git_hash) 58 | { 59 | int rank = -1, size=-1, mpi_major=0, mpi_minor=0; 60 | mpiComm_rank(MPI_COMM_WORLD, &rank); 61 | mpiComm_size(MPI_COMM_WORLD, &size); 62 | mpiGet_version(&mpi_major, &mpi_minor); 63 | 64 | if (rank == 0) 65 | { 66 | printf("Copyright (c) 2016\n"); 67 | printf("Lawrence Livermore National Security, LLC\n"); 68 | printf("All Rights Reserved\n"); 69 | 70 | printf("Quicksilver Version : %s\n",git_version); 71 | printf("Quicksilver Git Hash : %s\n",git_hash); 72 | printf("MPI Version : %d.%d\n",mpi_major,mpi_minor); 73 | printf("Number of MPI ranks : %d\n",size); 74 | printf("Number of OpenMP Threads: %d\n",(int)omp_get_max_threads()); 75 | printf("Number of OpenMP CPUs : %d\n\n",(int)omp_get_num_procs()); 76 | } 77 | } 78 | 79 | void Print0(const char *format, ...) 80 | { 81 | int rank = -1; 82 | mpiComm_rank(MPI_COMM_WORLD, &rank); 83 | 84 | #if 0 85 | printf("rank %i: ", rank); 86 | #else 87 | if ( rank != 0 ) { return; } 88 | #endif 89 | 90 | va_list args; 91 | va_start( args, format ); 92 | vprintf(format, args); 93 | va_end( args ); 94 | } 95 | 96 | //---------------------------------------------------------------------------------------------------------------------- 97 | // Converts a format string into a c++ string. Parameters are the same as printf. 98 | //---------------------------------------------------------------------------------------------------------------------- 99 | std::string MC_String(const char fmt[], ...) 100 | { 101 | va_list args; 102 | va_start(args, fmt); 103 | int chars_needed = vsnprintf(NULL, 0, fmt, args); 104 | va_end(args); 105 | 106 | if (chars_needed < 0) 107 | { 108 | MC_Fatal_Jump( "Output error from vsnprintf: %d", chars_needed ); 109 | } 110 | 111 | // Increase one for the null terminator. 112 | chars_needed++; 113 | 114 | // Bump up chars_needed (if necessary) so that we allocate according to our byte alignment. 115 | // This is currently 16 bytes, so allocated 16, 32 48, etc. bytes at a time. 116 | #define MC_BYTE_ALIGNMENT 16 117 | 118 | int remainder = chars_needed % MC_BYTE_ALIGNMENT; 119 | chars_needed += remainder > 0 ? MC_BYTE_ALIGNMENT - remainder: 0; 120 | 121 | std::vector buffer(chars_needed); 122 | va_start(args, fmt); 123 | vsnprintf(&buffer[0], chars_needed, fmt, args); 124 | va_end(args); 125 | 126 | return std::string(&buffer[0]); 127 | } 128 | 129 | -------------------------------------------------------------------------------- /src/utils.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_HH 2 | #define UTILS_HH 3 | 4 | #include 5 | 6 | int mc_get_num_physical_procs(void); 7 | 8 | void MC_Verify_Thread_Zero(char const * const file, int line); 9 | 10 | void printBanner(const char *git_version, const char *git_hash); 11 | 12 | #define MC_Warning printf 13 | 14 | void Print0(const char *format, ...); 15 | 16 | std::string MC_String(const char fmt[], ...); 17 | 18 | #endif 19 | --------------------------------------------------------------------------------