├── .codecov.yml
├── .github
    └── workflows
    │   └── CI.yml
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── decks
    ├── 2particle.cxx
    ├── 2stream-short.cxx
    ├── custom_init.cxx
    ├── dioctron_3d.cxx
    └── vpic
    │   ├── 2particle.cxx
    │   └── 2stream-em0.cxx
├── example
    ├── CMakeLists.txt
    └── example.cpp
├── scripts
    ├── README.md
    ├── plot.py
    └── run_scripts
    │   ├── build_and_run.sh
    │   ├── build_cabana.sh
    │   ├── build_kokkos.sh
    │   ├── kokkos-tools
    │       ├── Makefile
    │       ├── README.md
    │       ├── kp_kernel_info.h
    │       └── kp_kernel_timer.cpp
    │   ├── main.py
    │   ├── requirements.txt
    │   └── timing_lib.sh
├── src
    ├── CMakeLists.txt
    ├── accumulator.cpp
    ├── accumulator.h
    ├── fields.h
    ├── grid.h
    ├── helpers.h
    ├── input
    │   └── deck.h
    ├── interpolator.cpp
    ├── interpolator.h
    ├── logger.h
    ├── move_p.h
    ├── push.h
    ├── types.h
    ├── uncenter_p.h
    └── visualization.h
├── summary.md
└── tests
    ├── CMakeLists.txt
    ├── decks
        └── CMakeLists.txt
    ├── energy_comparison
        ├── 2stream-em.cxx
        ├── CMakeLists.txt
        ├── compare_energies.h
        ├── energies_gold.2stream-em.double
        └── energies_gold.2stream-em.float
    ├── example.cpp
    ├── include
        └── catch.hpp
    └── manual_tests
        └── test
            ├── 2-particle
                ├── 2pcle-minipic.png
                ├── partloc
                ├── partloc-vpic
                └── plot.gp
            ├── 2-stream-em
                ├── 2stream-em.png
                ├── outw0.2
                ├── outw0.2-2
                └── plot.gp
            └── 2-stream
                ├── 2stream-minipic.eps
                ├── out
                ├── out2
                └── plot.gp


/.codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 |   precision: 1
3 |   round: down
4 |   range: "70...100"
5 | ignore:
6 |   - tests/include
7 | 


--------------------------------------------------------------------------------
/.github/workflows/CI.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 |   schedule:
10 |     - cron:  '0 4 * * MON'
11 | 
12 | jobs:
13 |   CI:
14 |     strategy:
15 |       matrix:
16 |         backend: ["SERIAL", "OPENMP"]
17 |     runs-on: ubuntu-latest
18 |     container: ghcr.io/ecp-copa/ci-containers/ubuntu:latest
19 |     steps:
20 |       - name: Checkout kokkos
21 |         uses: actions/checkout@v2.2.0
22 |         with:
23 |           repository: kokkos/kokkos
24 |           ref: 3.7.02
25 |           path: kokkos
26 |       - name: Build kokkos
27 |         working-directory: kokkos
28 |         run: |
29 |           cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/kokkos -DKokkos_CXX_STANDARD=14 -DKokkos_ENABLE_${{ matrix.backend }}=ON
30 |           cmake --build build --parallel 2
31 |           cmake --install build
32 |       - name: Checkout Cabana
33 |         uses: actions/checkout@v2.2.0
34 |         with:
35 |           repository: ECP-copa/Cabana
36 |           ref: master
37 |           path: Cabana
38 |       - name: Build Cabana
39 |         working-directory: Cabana
40 |         run: |
41 |           cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON
42 |           cmake --build build --parallel 2
43 |           cmake --install build
44 |       - name: Checkout CabanaPIC
45 |         uses: actions/checkout@v2.2.0
46 |       - name: Build CabanaPIC EM
47 |         run: |
48 |           cmake -B build \
49 |             -DCMAKE_INSTALL_PREFIX=$HOME/CabanaPIC \
50 |             -DMPIEXEC_MAX_NUMPROCS=2 -DMPIEXEC_PREFLAGS="--oversubscribe" \
51 |             -DCMAKE_CXX_FLAGS="--coverage -O0 -Wall -Wextra -pedantic" \
52 |             -DCMAKE_EXE_LINKER_FLAGS="--coverage" \
53 |             -DCMAKE_SHARED_LINKER_FLAGS="--coverage" \
54 |             -DCMAKE_PREFIX_PATH="$HOME/Cabana" \
55 |             -DENABLE_TESTS=ON \
56 |             -DSOLVER_TYPE=EM \
57 |             -DREAL_TYPE=double
58 |           cmake --build build --parallel 2
59 |           cmake --install build
60 |       - name: Test CabanaPIC EM
61 |         run: |
62 |           CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target test
63 |       - name: Build CabanaPIC ES
64 |         run: |
65 |           cmake -B build \
66 |             -DCMAKE_INSTALL_PREFIX=$HOME/CabanaPIC \
67 |             -DMPIEXEC_MAX_NUMPROCS=2 -DMPIEXEC_PREFLAGS="--oversubscribe" \
68 |             -DCMAKE_CXX_FLAGS="--coverage -O0 -Wall -Wextra -pedantic" \
69 |             -DCMAKE_EXE_LINKER_FLAGS="--coverage" \
70 |             -DCMAKE_SHARED_LINKER_FLAGS="--coverage" \
71 |             -DCMAKE_PREFIX_PATH="$HOME/Cabana" \
72 |             -DENABLE_TESTS=ON \
73 |             -DSOLVER_TYPE=ES \
74 |             -DREAL_TYPE=double
75 |           cmake --build build --parallel 2
76 |           cmake --install build
77 |       - name: Test CabanaPIC ES
78 |         run: |
79 |           CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target test
80 |       - name: Upload Report to codecov.io
81 |         uses: codecov/codecov-action@v1
82 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Cabana"]
2 | 	path = Cabana
3 | 	url = https://github.com/ECP-copa/Cabana.git
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.9)
 2 | project(CabanaPIC LANGUAGES CXX VERSION 0.0.1)
 3 | 
 4 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 5 | set(CMAKE_CXX_STANDARD 14)
 6 | 
 7 | include_directories(${PROJECT_SOURCE_DIR})
 8 | include(GNUInstallDirs)
 9 | 
10 | # TODO: Tag this once we have a new release
11 | find_package(Cabana)
12 | 
13 | #### User configuration Options ####
14 | option(REQUIRE_HOST ON "Build with the default host execution space.")
15 | option(ENABLE_TESTS OFF)
16 | option(ENABLE_COVERAGE_BUILD OFF)
17 | #### End User configuration Options ####
18 | 
19 | ##### SET SOLVES TYPE #####
20 | # Flag for switching between electromagnetic and electrostatic solver
21 | set(SOLVER_TYPE "EM" CACHE STRING "Selected Solver Type")
22 | set(SolverTypes EM ES) # List allowable solver types
23 | # hint the tools the allowed values
24 | set_property(CACHE SOLVER_TYPE PROPERTY STRINGS ${SolverTypes})
25 | if (${SOLVER_TYPE} STREQUAL "EM")
26 |     add_definitions(-DEM_FIELD_SOLVER=YES)
27 | elseif (${SOLVER_TYPE} STREQUAL "ES")
28 |     add_definitions(-DES_FIELD_SOLVER=YES)
29 | else()
30 |     message(FATAL_ERROR "SOLVER_TYPE is not supported (EM/ES only)")
31 | endif()
32 | ##### END SET SOLVES TYPE #####
33 | 
34 | ##### SET DIMENSIONALITY #####
35 | set(DIMENSIONALITY "3" CACHE STRING "Selected Solver Type")
36 | set(ALLOWABLE_DIMENSIONS 1 2 3) # List allowable values
37 | # hint the tools the allowed values
38 | set_property(CACHE DIMENSIONALITY PROPERTY STRINGS ${ALLOWABLE_DIMENSIONS})
39 | 
40 | if (NOT ${DIMENSIONALITY} STREQUAL "3")
41 |     message(FATAL_ERROR "DIMENSIONALITY != 3 not yet supported")
42 | endif()
43 | ##### END SET DIMENSIONALITY #####
44 | 
45 | ##### SET REAL_TYPE (real_t) #####
46 | set(REAL_TYPE "float" CACHE STRING "Selected type for real numbers")
47 | set(ALLOWABLE_REALS "float" "double") # List allowable values
48 | set_property(CACHE REAL_TYPE PROPERTY STRINGS ${ALLOWABLE_REALS})
49 | add_definitions(-DREAL_TYPE=${REAL_TYPE})
50 | ##### END SET REAL_TYPE #####
51 | 
52 | ###### Allow user to select input deck to build against ######
53 | set(INPUT_DECK "" CACHE STRING "Path to input deck")
54 | 
55 | if (NOT ${INPUT_DECK} STREQUAL "")
56 |     # TODO: normalize these paths?
57 |     if(EXISTS ${PROJECT_SOURCE_DIR}/${INPUT_DECK})
58 |         add_definitions(-DUSER_INPUT_DECK=${PROJECT_SOURCE_DIR}/${INPUT_DECK})
59 |     elseif(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/${INPUT_DECK})
60 |         add_definitions(-DUSER_INPUT_DECK=${CMAKE_CURRENT_BINARY_DIR}/${INPUT_DECK})
61 |     else()
62 |     message(FATAL_ERROR "Cannot find user specified input deck: ${INPUT_DECK}")
63 |     endif()
64 | endif()
65 | ####### End User Deck ######
66 | 
67 | if(ENABLE_COVERAGE_BUILD)
68 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
69 | endif(ENABLE_COVERAGE_BUILD)
70 | 
71 | add_subdirectory(src)
72 | set(CabanaPIC_EXAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/example)
73 | add_subdirectory(example)
74 | 
75 | ##### TESTS ######
76 | if (ENABLE_TESTS)
77 |   enable_testing()
78 |   set(TEST_DIR "./tests/include")
79 |   include_directories(${TEST_DIR})
80 |   add_subdirectory(tests)
81 | endif(ENABLE_TESTS)
82 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright 2018-2019 the Cabana authors
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | * Redistributions in binary form must reproduce the above copyright notice,
12 |   this list of conditions and the following disclaimer in the documentation
13 |   and/or other materials provided with the distribution.
14 | * Neither the name of the copyright holder nor the names of its
15 |   contributors may be used to endorse or promote products derived from this
16 |   software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #CabanaPIC
 2 | 
 3 | ## Installation
 4 | 
 5 | This code has two major dependencies:
 6 | 
 7 | 1. Kokkos
 8 | 2. Cabana
 9 | 
10 | Instructions on how to obtain and install both can be found [here](https://github.com/ECP-copa/Cabana/wiki/Build-Instructions)
11 | 
12 | Once these are installed, you can configure and build this project using CMake.
13 | The only necessary configuration argument is the path to Cabana (which will
14 | also bring in Kokkos). An example build line will look something like this:
15 | 
16 | ```
17 | cmake -DCMAKE_PREFIX_PATH="$HOME/Cabana/build/install" ..
18 | ```
19 | 
20 | CabanaPIC uses the default enabled Kokkos backend (see more information
21 | [here](https://github.com/kokkos/kokkos/wiki/Initialization#51-initialization-by-command-line-arguments)).
22 | It is possible to require a CPU build by adding `-DREQUIRE_HOST=ON` (which uses
23 | the default enabled host backend).
24 | 
25 | The default field solver is "EM"; to use the "ES" solver, add `-DSOLVER_TYPE="ES"`.
26 | 
27 | 
28 | Note that if Kokkos <=3.4 is used, building with GCC and CUDA support requires
29 | specifying the compiler wrapper:
30 | 
31 | ```
32 | cmake -DCMAKE_PREFIX_PATH="$HOME/Cabana/build/install" -DCMAKE_CXX_COMPILER=$KOKKOS_SRC_DIR/bin/nvcc_wrapper ..
33 | ```
34 | 
35 | Remember that Kokkos, Cabana, and CabanaPIC should all be built with the same
36 | compiler.
37 | 
38 | ## Running
39 | 
40 | Users can compile in custom input decks by specifying `INPUT_DECK` at build
41 | time, e.g:
42 | 
43 | ```
44 | cmake -DCMAKE_PREFIX_PATH="$HOME/Cabana/build/install" -DINPUT_DECK=./decks/2stream-short.cxx ..
45 | ```
46 | 
47 | Some example decks live in `./decks`. Custom decks must follow the layout put
48 | forth in `./src/input/decks.h`
49 | 
50 | ## Feature Wishlist
51 | 
52 | 1. Configurable to run in different precisions (real_t to configure float/double)
53 | 2. The particle data store layout should be configurable (AoS/SoA/AoSoA)
54 | 3. The particle shape function used should be configurable
55 | 
56 | ## Copyright
57 | 
58 | © (or copyright) 2019. Triad National Security, LLC. All rights reserved.
59 | 
60 | This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear Security Administration. The Government is granted for itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so.
61 | 
62 | This is open source software; you can redistribute it and/or modify it under the terms of the BSD-3 License. If software is modified to produce derivative works, such modified software should be clearly marked, so as not to confuse it with the version available from LANL.
63 | 


--------------------------------------------------------------------------------
/decks/2particle.cxx:
--------------------------------------------------------------------------------
  1 | #include "src/input/deck.h"
  2 | 
  3 | // Override existing init_fields
  4 | class Custom_Field_Initializer : public Field_Initializer {
  5 |     public:
  6 |         using real_ = real_t;
  7 | 
  8 |         // This *has* to be virtual, as we store the object as a pointer to the
  9 |         // base class
 10 |         virtual void init(
 11 |                 field_array_t& fields,
 12 |                 size_t nx,
 13 |                 size_t ny,
 14 |                 size_t nz,
 15 |                 size_t ng,
 16 |                 real_t Lx, // TODO: do we prefer xmin or Lx?
 17 |                 real_t Ly,
 18 |                 real_t Lz,
 19 |                 real_t dx,
 20 |                 real_t dy,
 21 |                 real_t dz
 22 |                 )
 23 |         {
 24 |             std::cout << "Using Custom field Initialization" << std::endl;
 25 | 
 26 |             auto ex = Cabana::slice<FIELD_EX>(fields);
 27 |             auto ey = Cabana::slice<FIELD_EY>(fields);
 28 |             auto ez = Cabana::slice<FIELD_EZ>(fields);
 29 | 
 30 |             auto cbx = Cabana::slice<FIELD_CBX>(fields);
 31 |             auto cby = Cabana::slice<FIELD_CBY>(fields);
 32 |             auto cbz = Cabana::slice<FIELD_CBZ>(fields);
 33 | 
 34 | 
 35 |             real_t x0 = 0;
 36 |             real_t hx = Lx/nx;
 37 |             real_t xp1=0.6; //-0.5*hx;
 38 |             real_t xp2=0.8; //+0.5*hx;
 39 | 
 40 |             real_t phi[nx+2], Ex[nx+2];
 41 | 
 42 |             //real_t wi = Lx/(nx*2.0); //particle weight (how to have it here?)
 43 |             real_t wi = 1./2.;
 44 | 
 45 |             // 	    for (int i=0; i<nx+2; i++){
 46 |             // //       double xp = x0 + dx*i;
 47 |             // 	      double xc = x0 + dx*(i-0.5);
 48 |             // 	      if(xc>xp1&&xc<xp2)
 49 |             // 	 //phi[i] = (xp1+(1.0-xp1-xp2)*xc +xc*xc-xc)*0.5;
 50 |             // 		phi[i] = (xp1+(1.0-xp1-xp2)*xc)*wi;
 51 |             // 	      else if(xc<xp1)
 52 |             // 		phi[i] = (xc*(2.0-xp1-xp2))*wi;
 53 |             // 	      else if(xc>xp2)
 54 |             // 		phi[i] = (xp1+xp2-(xp1+xp2)*xc)*wi;
 55 | 
 56 |             // //       printf("%d %e %e ", i, xc,phi[i]);
 57 |             // 	    }
 58 |             // 	    for (int i=1; i<nx+1; i++){
 59 |             // 	      double xc = x0 + dx*(i-0.5);
 60 |             // 	      Ex[i] = (phi[i-1] - phi[i+1])/(2.0*dx) - (xc-0.5)*wi;
 61 |             // 	    }
 62 |             // 	    Ex[0] = Ex[nx];
 63 |             // 	    Ex[nx+1] = Ex[1];
 64 | 
 65 |             for (size_t i=0; i<nx+2; i++){
 66 |                 real_t xn = x0 + dx*(i-1);
 67 |                 if(xn>=xp1&&xn<=xp2)
 68 |                     phi[i] = (1.0-xn)*xp1*wi + xn*(1.0-xp2)*wi + (xn*xn-xn)*wi;
 69 |                 else if(xn<xp1)
 70 |                     phi[i] = (1.0-xp1)*xn*wi + xn*(1.0-xp2)*wi + (xn*xn-xn)*wi;
 71 |                 else if(xn>xp2)
 72 |                     phi[i] = (1.0-xn)*xp1*wi + xp2*(1.0-xn)*wi + (xn*xn-xn)*wi;
 73 |             }
 74 |             for (size_t i=1; i<nx+1; i++){
 75 |                 Ex[i] = (phi[i] - phi[i+1])/(dx);
 76 |             }
 77 |             Ex[0] = Ex[nx];
 78 |             Ex[nx+1] = Ex[1];
 79 |             // for (int i=0; i<nx+2; i++){
 80 |             //   double xc = x0 + dx*(i-0.5);
 81 |             //   double xn = x0 + dx*(i-1);
 82 |             //   printf("%e %e %e %e %e\n", xn,xc,phi[i],Ex[i], (Ex[i]-Ex[i-1])/dx);
 83 |             // }
 84 |             // exit(1);
 85 | 
 86 | 
 87 |             for(size_t i=0; i<fields.size(); i++){
 88 |                 ey(i) = 0.0;
 89 |                 ez(i) = 0.0;
 90 |                 cbx(i) = 0.0;
 91 |                 cby(i) = 0.0;
 92 |                 cbz(i) = 0.0;
 93 |                 size_t ix,iy,iz;
 94 |                 RANK_TO_INDEX(i, ix,iy,iz,nx+2*ng,ny+2*ng);
 95 |                 ex(i) = Ex[ix];
 96 |                 //		    printf("%d %e\n",ix, ex(i));
 97 |             }
 98 |             // auto _init_fields =
 99 |             //     KOKKOS_LAMBDA( const int i )
100 |             //     {
101 |             //         ex(i) = 0.0;
102 |             //         ey(i) = 0.0;
103 |             //         ez(i) = 0.0;
104 |             //         cbx(i) = 0.0;
105 |             //         cby(i) = 0.0;
106 |             //         cbz(i) = 0.0;
107 |             //         size_t ix,iy,iz;
108 |             //         RANK_TO_INDEX(i, ix,iy,iz,nx+2*ng,ny+2*ng);
109 |             // 	    ex(i) = Ex[ix]; //does not work
110 |             //         //printf("%d %e %e\n",iy,y,ey(i));
111 | 
112 |             //     };
113 | 
114 |             // Kokkos::parallel_for( fields.size(), _init_fields, "init_fields()" );
115 | 
116 |         }
117 | };
118 | 
119 | // Override existing init_particles
120 | class Custom_Particle_Initializer : public Particle_Initializer {
121 |     public:
122 |         using real_ = real_t;
123 | 
124 |         // This *has* to be virtual, as we store the object as a pointer to the
125 |         // base class
126 |         virtual void init(
127 |                 particle_list_t& particles,
128 |                 size_t nx,
129 |                 size_t ny,
130 |                 size_t nz,
131 |                 size_t ng,
132 |                 real_ dxp,
133 |                 size_t nppc,
134 |                 real_ w,
135 |                 real_ v0,
136 |                 real_ Lx, // TODO: is there a better way to pass/read global lens?
137 |                 real_ Ly,
138 |                 real_ Lz
139 |                 )
140 |         {
141 |             std::cout << "Using Custom Particle Initialization" << std::endl;
142 |             std::cout << "Lx = " << Lx << " Ly " << Ly << " Lz " << Lz << std::endl;
143 | 
144 |             auto position_x = Cabana::slice<PositionX>(particles);
145 |             auto position_y = Cabana::slice<PositionY>(particles);
146 |             auto position_z = Cabana::slice<PositionZ>(particles);
147 | 
148 |             auto velocity_x = Cabana::slice<VelocityX>(particles);
149 |             auto velocity_y = Cabana::slice<VelocityY>(particles);
150 |             auto velocity_z = Cabana::slice<VelocityZ>(particles);
151 | 
152 |             auto weight = Cabana::slice<Weight>(particles);
153 |             auto cell = Cabana::slice<Cell_Index>(particles);
154 | 
155 |             real_t hx = Lx/nx;
156 |             //real_t hy = Ly/ny;
157 |             //real_t hz = Lz/nz;
158 |             real_t xmin = 0; //-0.5*Lx;
159 |             //real_t ymin = 0; //-0.5*Ly;
160 | 
161 | #define rand_float(min, max) (min + (max-min)*rand()/RAND_MAX)
162 | 
163 |             auto _init =
164 |                 KOKKOS_LAMBDA( const int s, const int i )
165 |                 {
166 |                     // Initialize position.
167 |                     size_t pi = (s)*particle_list_t::vector_length+i;
168 |                     real_t xp1=0.6; //-0.5*hx; //those two numbers are also used in field init
169 |                     real_t xp2=0.8; //+0.5*hx;
170 |                     //2 particles only
171 |                     size_t ix, iy, iz;
172 |                     real_t x, y, z;
173 |                     if(pi==0){
174 |                         x = xp1;
175 |                         x= (x-xmin)/hx;
176 | 
177 |                         ix = (size_t) x;
178 |                         iy = 1;
179 |                         iz = 1;
180 |                         x = 1-0.5*hx;
181 |                         y = 0;
182 |                         z = 0;
183 |                         //		      ix++;
184 |                     }else{
185 |                         x = xp2;
186 |                         x= (x-xmin)/hx;
187 |                         ix = (size_t) x;
188 |                         iy = 1;
189 |                         iz = 1;
190 |                         x = -1+0.5*hx;
191 |                         y = 0;
192 |                         z = 0;
193 |                         ix++;
194 |                     }
195 | 
196 |                     position_x.access(s,i) = x;
197 |                     position_y.access(s,i) = y;
198 |                     position_z.access(s,i) = z;
199 | 
200 |                     cell.access(s,i) = VOXEL(ix,iy,iz,nx,ny,nz,ng); //needs to be more general
201 | 
202 |                     weight.access(s,i) = w;
203 | 
204 |                     velocity_x.access(s,i) = 0; //sign * v0 *gam*(1.0+na*sign); //0;
205 |                     velocity_y.access(s,i) = 0;
206 |                     velocity_z.access(s,i) = 0;
207 | 
208 |                     //std::cout << "Placing particles as "
209 |                     //<< x << ", " << y << ", " << z << " with u=0 in cell " << cell.access(s,i) << " with w " << w << std::endl;
210 |                 };
211 | 
212 |             Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
213 |                 vec_policy( 0, particles.size() );
214 |             Cabana::simd_parallel_for( vec_policy, _init, "init()" );
215 |         }
216 | };
217 | 
218 | Input_Deck::Input_Deck()
219 | {
220 |     field_initer = new Custom_Field_Initializer();
221 |     particle_initer = new Custom_Particle_Initializer();
222 | 
223 |     // User puts initialization code here
224 |     nx = 1000;
225 |     ny = 1;
226 |     nz = 1;
227 | 
228 |     num_steps = 200000;
229 |     nppc = 1;
230 | 
231 |     v0 = 0.0;
232 | 
233 |     const real_t default_grid_len = 1.0;
234 | 
235 |     //    const real_t a = 0.1;
236 |     len_x_global = default_grid_len; //16*a;
237 |     len_y_global = default_grid_len; //16*a;
238 |     len_z_global = default_grid_len;
239 | 
240 |     n0 = 1.0;
241 |     Npe = n0*len_x_global*len_y_global*len_z_global;
242 | 
243 |     dt = 0.99*courant_length(
244 |             len_x_global, len_y_global, len_z_global,
245 |             nx, ny, nz
246 |             ) / c;
247 | 
248 |     ec = 1.0;
249 |     qsp = ec;
250 |     me = qsp;
251 | 
252 |     Ne = 2;
253 | 
254 |     num_particles = 2;
255 | 
256 | }
257 | 


--------------------------------------------------------------------------------
/decks/2stream-short.cxx:
--------------------------------------------------------------------------------
 1 | #include "src/input/deck.h"
 2 | // For a list of available global variables, see `src/input/deck.h`, common ones include:
 3 | /*
 4 |         real_ de = 1.0; // Length normalization (electron inertial length)
 5 |         real_ ec = 1.0; // Charge normalization
 6 |         real_ me = 1.0; // Mass normalization
 7 |         real_ mu = 1.0; // permeability of free space
 8 |         real_ c = 1.0; // Speed of light
 9 |         real_ eps = 1.0; // permittivity of free space
10 |         real_ n0 = 1.0; // Background plasma density
11 |         size_t nx = 16;
12 |         size_t ny = 1;
13 |         size_t nz = 1;
14 |         size_t nppc = 1;
15 |         double dt = 1.0;
16 |         int num_steps = 2;
17 |         real_ len_x_global = 1.0;
18 |         real_ len_y_global = 1.0;
19 |         real_ len_z_global = 1.0;
20 |         real_ v0 = 1.0; //drift velocity
21 |         size_t num_ghosts = 1;
22 |         (len_x and dx will automatically be set)
23 | */
24 | // I would rather decalare this as a class, not just as a constructor, but that
25 | // would have to be in a header (which would stop the compile detecting
26 | // changes...). This is fine for now.
27 | Input_Deck::Input_Deck()
28 | {
29 |     // User puts initialization code here
30 |     nx = 32;
31 |     ny = 1;
32 |     nz = 1;
33 | 
34 |     num_steps = 3000;
35 |     nppc = 100;
36 | 
37 |     v0 = 0.2;
38 | 
39 |     // Can also create temporaries
40 |     real_ gam = 1.0 / sqrt(1.0 - v0*v0);
41 | 
42 |     const real_t default_grid_len = 1.0;
43 | 
44 |     len_x_global = 3.14159265358979*0.5; // TODO: use proper PI?
45 |     len_y_global = default_grid_len;
46 |     len_z_global = default_grid_len;
47 | 
48 |     Npe = n0*len_x_global*len_y_global*len_z_global;
49 | 
50 |     dt = 0.99*courant_length(
51 |             len_x_global, len_y_global, len_z_global,
52 |             nx, ny, nz
53 |             ) / c;
54 | 
55 |     n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1
56 | }
57 | 


--------------------------------------------------------------------------------
/decks/custom_init.cxx:
--------------------------------------------------------------------------------
  1 | #include "src/input/deck.h"
  2 | // For a list of available global variables, see `src/input/deck.h`, common ones include:
  3 | /*
  4 |         real_ de = 1.0; // Length normalization (electron inertial length)
  5 |         real_ ec = 1.0; // Charge normalization
  6 |         real_ me = 1.0; // Mass normalization
  7 |         real_ mu = 1.0; // permeability of free space
  8 |         real_ c = 1.0; // Speed of light
  9 |         real_ eps = 1.0; // permittivity of free space
 10 |         real_ n0 = 1.0; // Background plasma density
 11 |         size_t nx = 16;
 12 |         size_t ny = 1;
 13 |         size_t nz = 1;
 14 |         size_t nppc = 1;
 15 |         double dt = 1.0;
 16 |         int num_steps = 2;
 17 |         real_ len_x_global = 1.0;
 18 |         real_ len_y_global = 1.0;
 19 |         real_ len_z_global = 1.0;
 20 |         real_ v0 = 1.0; //drift velocity
 21 |         size_t num_ghosts = 1;
 22 |         (len_x and dx will automatically be set)
 23 | */
 24 | // I would rather decalare this as a class, not just as a constructor, but that
 25 | // would have to be in a header (which would stop the compile detecting
 26 | // changes...). This is fine for now.
 27 | 
 28 | // Override existing init_particles
 29 | class Custom_Particle_Initializer : public Particle_Initializer {
 30 |     public:
 31 |         using real_ = real_t;
 32 | 
 33 |         // This *has* to be virtual, as we store the object as a pointer to the
 34 |         // base class
 35 |         virtual void init(
 36 |                 particle_list_t& particles,
 37 |                 size_t nx,
 38 |                 size_t ny,
 39 |                 size_t,
 40 |                 size_t ng,
 41 |                 real_ dxp,
 42 |                 size_t nppc,
 43 |                 real_ w,
 44 |                 real_ v0,
 45 |                 real_,
 46 |                 real_,
 47 |                 real_		
 48 |                 ) override
 49 |         {
 50 |             std::cout << "Using Custom Particle Initialization" << std::endl;
 51 | 
 52 |             auto position_x = Cabana::slice<PositionX>(particles);
 53 |             auto position_y = Cabana::slice<PositionY>(particles);
 54 |             auto position_z = Cabana::slice<PositionZ>(particles);
 55 | 
 56 |             auto velocity_x = Cabana::slice<VelocityX>(particles);
 57 |             auto velocity_y = Cabana::slice<VelocityY>(particles);
 58 |             auto velocity_z = Cabana::slice<VelocityZ>(particles);
 59 | 
 60 |             auto weight = Cabana::slice<Weight>(particles);
 61 |             auto cell = Cabana::slice<Cell_Index>(particles);
 62 | 
 63 |             // TODO: sensible way to do rand in parallel?
 64 |             //srand (static_cast <unsigned> (time(0)));
 65 | 
 66 |             auto _init =
 67 |                 KOKKOS_LAMBDA( const int s, const int i )
 68 |                 {
 69 |                     // Initialize position.
 70 |                     int sign =  -1;
 71 |                     size_t pi2 = (s)*particle_list_t::vector_length+i;
 72 |                     size_t pi = ((pi2) / 2);
 73 |                     if (pi2%2 == 0) {
 74 |                         sign = 1;
 75 |                     }
 76 |                     size_t pic = (2*pi)%nppc; //Every 2 particles have the same "pic".
 77 | 
 78 |                     real_ x = pic*dxp+0.5*dxp-1.0;
 79 |                     size_t pre_ghost = (2*pi/nppc); //pre_gohost ranges [0,nx*ny*nz).
 80 | 
 81 |                     position_x.access(s,i) = x;
 82 |                     position_y.access(s,i) = 0.0;
 83 |                     position_z.access(s,i) = 0.0;
 84 | 
 85 |                     weight.access(s,i) = w;
 86 | 
 87 | 		    int ix,iy,iz;
 88 | 		    ix = pre_ghost+1;
 89 | 		    iy = 1;
 90 | 		    iz = 1;
 91 |                     cell.access(s,i) = VOXEL(ix,iy,iz,nx,ny,nz,ng);
 92 | 
 93 |                     // Initialize velocity.(each cell length is 2)
 94 | 		    real_t nax = 0.0001*sin(2.0*3.1415926*((x+1.0+ix*2)/(2*nx)));
 95 |                     real_ gam = 1.0/sqrt(1.0-v0*v0);
 96 |                     velocity_x.access(s,i) = sign * v0*gam*(1.0+nax); //0;
 97 |                     velocity_y.access(s,i) = 0;
 98 |                     velocity_z.access(s,i) = 0; //na*sign;  //sign * v0 *gam*(1.0+na*sign);
 99 | 		    //if(pi<100) printf("%d %d %d pre-g %d putting particle at x=%e with ux = %e ix = %d, pi = %d \n", pic, s, i, pre_ghost, position_x.access(s,i), velocity_x.access(s,i), ix, cell.access(s,i) );		    
100 |                 };
101 | 
102 |             Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
103 |                 vec_policy( 0, particles.size() );
104 |             Cabana::simd_parallel_for( vec_policy, _init, "init()" );
105 |         }
106 | };
107 | 
108 | Input_Deck::Input_Deck()
109 | {
110 |     // User puts initialization code here
111 | 
112 |     std::cout << "Custom Input_Deck constructor" << std::endl;
113 |     // Tell the deck to use the custom initer in place of the default
114 |     particle_initer = new Custom_Particle_Initializer();
115 | 
116 |     nx = 32;
117 |     ny = 1;
118 |     nz = 1;
119 | 
120 |     num_steps = 30;
121 |     nppc = 100;
122 | 
123 |     v0 = 0.0866025403784439;
124 | 
125 |     // Can also create local temporaries
126 |     real_ gam = 1.0 / sqrt(1.0 - v0*v0);
127 | 
128 |     const real_t default_grid_len = 1.0;
129 | 
130 |     len_x_global = 6.28318530717959*(gam*sqrt(gam));
131 |     len_y_global = default_grid_len;
132 |     len_z_global = default_grid_len;
133 | 
134 |     Npe = n0*len_x_global*len_y_global*len_z_global;
135 | 
136 |     dt = 0.99*courant_length(
137 |             len_x_global, len_y_global, len_z_global,
138 |             nx, ny, nz
139 |             ) / c;
140 | 
141 |     n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1
142 | }
143 | 


--------------------------------------------------------------------------------
/decks/dioctron_3d.cxx:
--------------------------------------------------------------------------------
  1 | #include "src/input/deck.h"
  2 | 
  3 | // Override existing init_fields
  4 | class Custom_Field_Initializer : public Field_Initializer {
  5 |     public:
  6 |         using real_ = real_t;
  7 | 
  8 |         // This *has* to be virtual, as we store the object as a pointer to the
  9 |         // base class
 10 |         virtual void init(
 11 |                 field_array_t& fields,
 12 |                 size_t nx,
 13 |                 size_t ny,
 14 |                 size_t nz,
 15 |                 size_t ng,
 16 |                 real_t Lx, // TODO: do we prefer xmin or Lx?
 17 |                 real_t Ly,
 18 |                 real_t Lz,
 19 |                 real_t dx,
 20 |                 real_t dy,
 21 |                 real_t dz
 22 |                 )
 23 |         {
 24 |             std::cout << "Using Custom field Initialization" << std::endl;
 25 | 
 26 |             auto ex = Cabana::slice<FIELD_EX>(fields);
 27 |             auto ey = Cabana::slice<FIELD_EY>(fields);
 28 |             auto ez = Cabana::slice<FIELD_EZ>(fields);
 29 | 
 30 |             auto cbx = Cabana::slice<FIELD_CBX>(fields);
 31 |             auto cby = Cabana::slice<FIELD_CBY>(fields);
 32 |             auto cbz = Cabana::slice<FIELD_CBZ>(fields);
 33 | 
 34 |             real_t b0 = sqrt(20.0);
 35 |             real_t a  = 0.1;
 36 | 
 37 |             real_t xmin = -0.5*Lx;
 38 |             real_t ymin = -0.5*Ly;
 39 | 
 40 |             auto _init_fields =
 41 |                 KOKKOS_LAMBDA( const int i )
 42 |                 {
 43 |                     ex(i) = 0.0;
 44 |                     ey(i) = 0.0;
 45 |                     ez(i) = 0.0;
 46 |                     cbx(i) = 0.0;
 47 |                     cby(i) = 0.0;
 48 |                     cbz(i) = b0;
 49 |                     size_t ix,iy,iz;
 50 |                     RANK_TO_INDEX(i, ix,iy,iz,nx+2*ng,ny+2*ng);
 51 |                     real_t y = ymin + (iy-0.5)*dy;
 52 | 
 53 |                     if(y<-a) {
 54 |                         ey(i) = a;
 55 |                     }
 56 |                     else if(y>a) {
 57 |                         ey(i) =-a;
 58 |                     }
 59 |                     else {
 60 |                         ey(i) =-y;
 61 |                     }
 62 |                     //printf("%d %e %e\n",iy,y,ey(i));
 63 | 
 64 |                 };
 65 | 
 66 |             Kokkos::parallel_for( fields.size(), _init_fields, "init_fields()" );
 67 |         }
 68 | };
 69 | 
 70 | // Override existing init_particles
 71 | class Custom_Particle_Initializer : public Particle_Initializer {
 72 |     public:
 73 |         using real_ = real_t;
 74 | 
 75 |         // This *has* to be virtual, as we store the object as a pointer to the
 76 |         // base class
 77 |         virtual void init(
 78 |                 particle_list_t& particles,
 79 |                 size_t nx,
 80 |                 size_t ny,
 81 |                 size_t nz,
 82 |                 size_t ng,
 83 |                 real_ dxp,
 84 |                 size_t nppc,
 85 |                 real_ w,
 86 |                 real_ v0,
 87 |                 real_ Lx, // TODO: is there a better way to pass/read global lens?
 88 |                 real_ Ly,
 89 |                 real_ Lz
 90 |                 )
 91 |         {
 92 |             std::cout << "Using Custom Particle Initialization" << std::endl;
 93 |             std::cout << "Lx = " << Lx << " Ly " << Ly << " Lz " << Lz << std::endl;
 94 | 
 95 |             auto position_x = Cabana::slice<PositionX>(particles);
 96 |             auto position_y = Cabana::slice<PositionY>(particles);
 97 |             auto position_z = Cabana::slice<PositionZ>(particles);
 98 | 
 99 |             auto velocity_x = Cabana::slice<VelocityX>(particles);
100 |             auto velocity_y = Cabana::slice<VelocityY>(particles);
101 |             auto velocity_z = Cabana::slice<VelocityZ>(particles);
102 | 
103 |             auto weight = Cabana::slice<Weight>(particles);
104 |             auto cell = Cabana::slice<Cell_Index>(particles);
105 | 
106 |             real_t hx = Lx/nx;
107 |             real_t hy = Ly/ny;
108 |             real_t hz = Lz/nz;
109 |             real_t xmin = -0.5*Lx;
110 |             real_t ymin = -0.5*Ly;
111 | 
112 | #define rand_float(min, max) (min + (max-min)*rand()/RAND_MAX)
113 | 
114 |             auto _init =
115 |                 KOKKOS_LAMBDA( const int s, const int i )
116 |                 {
117 |                     // Initialize position.
118 |                     size_t pi = (s)*particle_list_t::vector_length+i;
119 |                     size_t pic = (pi)%nppc;
120 | 
121 |                     size_t ix, iy, iz;
122 |                     real_t x, y, z;
123 |                     x = rand_float(-0.5*Lx,0.5*Lx);
124 |                     x= (x-xmin)/hx;
125 |                     ix = (size_t) x;
126 |                     x -= (real_t) ix;
127 |                     x = x+x-1;
128 |                     if(ix==nx) x = 1;
129 |                     if(ix==nx) ix = nx-1;
130 | 
131 |                     y = rand_float(-0.1f, 0.1f); //a = 0.1
132 |                     y = (y-ymin)/hy;
133 |                     iy = (size_t) y;
134 |                     y -= (real_t) iy;
135 |                     y = y+y-1;
136 |                     if(iy==ny) y = 1;
137 |                     if(iy==ny) iy = ny-1;
138 | 
139 |                     z = 0;
140 |                     iz = 0;
141 | 
142 |                     position_x.access(s,i) = x;
143 |                     position_y.access(s,i) = y;
144 |                     position_z.access(s,i) = z;
145 | 
146 |                     cell.access(s,i) = VOXEL(ix+1,iy+1,iz+1,nx,ny,nz,ng); //needs to be more general
147 | 
148 |                     weight.access(s,i) = w;
149 | 
150 |                     real_t na = 0; //0.0001*sin(2.0*3.1415926*((x+1.0+pre_ghost*2)/(2*ny)));
151 | 
152 |                     real_t gam = 1.0/sqrt(1.0-v0*v0);
153 |                     velocity_x.access(s,i) = 0; //sign * v0 *gam*(1.0+na*sign); //0;
154 |                     velocity_y.access(s,i) = 0;
155 |                     velocity_z.access(s,i) = 0;
156 | 
157 |                     //std::cout << "Placing particles as "
158 |                         //<< x << ", " << y << ", " << z << " with u=0 in cell " << cell.access(s,i) << " with w " << w << std::endl;
159 |                 };
160 | 
161 |             Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
162 |                 vec_policy( 0, particles.size() );
163 |             Cabana::simd_parallel_for( vec_policy, _init, "init()" );
164 |         }
165 | };
166 | 
167 | Input_Deck::Input_Deck()
168 | {
169 |     field_initer = new Custom_Field_Initializer();
170 |     particle_initer = new Custom_Particle_Initializer();
171 | 
172 |     // User puts initialization code here
173 |     nx = 64;
174 |     ny = 64;
175 |     nz = 1;
176 | 
177 |     num_steps = 20000;
178 |     nppc = 5; // Gy has 40 and then does /8?
179 | 
180 |     v0 = 0.0;
181 | 
182 |     // Can also create temporaries
183 |     real_ gam = 1.0 / sqrt(1.0 - v0*v0);
184 | 
185 |     const real_t default_grid_len = 1.0;
186 | 
187 |     const real_t a = 0.1;
188 |     len_x_global = 16*a;
189 |     len_y_global = 16*a;
190 |     len_z_global = default_grid_len;
191 | 
192 |     Npe = n0*len_x_global*0.2*len_z_global;
193 | 
194 |     dt = 0.99*courant_length(
195 |             len_x_global, len_y_global, len_z_global,
196 |             nx, ny, nz
197 |             ) / c;
198 | 
199 |     n0 = 1.0; //for 2stream, for 2 species, making sure omega_p of each species is 1
200 | }
201 | 


--------------------------------------------------------------------------------
/decks/vpic/2stream-em0.cxx:
--------------------------------------------------------------------------------
  1 | // Magnetic reconnection in a Harris equilibrium thin current sheet
  2 | //
  3 | // This input deck reproduces the PIC simulations found in:
  4 | //   William Daughton. "Nonlinear dynamics of thin current sheets." Phys.
  5 | //   Plasmas. 9(9): 3668-3678. September 2002.
  6 | //
  7 | // This input deck was written by:
  8 | //   Kevin J Bowers, Ph.D.
  9 | //   Plasma Physics Group (X-1)
 10 | //   Applied Physics Division
 11 | //   Los Alamos National Lab
 12 | // August 2003      - original version
 13 | // October 2003     - heavily revised to utilize input deck syntactic sugar
 14 | // March/April 2004 - rewritten for domain decomposition V4PIC
 15 | 
 16 | // If you want to use global variables (for example, to store the dump
 17 | // intervals for your diagnostics section), it must be done in the globals
 18 | // section. Variables declared the globals section will be preserved across
 19 | // restart dumps. For example, if the globals section is:
 20 | //   begin_globals {
 21 | //     double variable;
 22 | //   } end_globals
 23 | // the double "variable" will be visible to other input deck sections as
 24 | // "global->variable". Note: Variables declared in the globals section are set
 25 | // to zero before the user's initialization block is executed. Up to 16K
 26 | // of global variables can be defined.
 27 | 
 28 | begin_globals {
 29 |   double energies_interval;
 30 |   double fields_interval;
 31 |   double ehydro_interval;
 32 |   double ihydro_interval;
 33 |   double eparticle_interval;
 34 |   double iparticle_interval;
 35 |   double restart_interval;
 36 | };
 37 | 
 38 | begin_initialization {
 39 |   // At this point, there is an empty grid and the random number generator is
 40 |   // seeded with the rank. The grid, materials, species need to be defined.
 41 |   // Then the initial non-zero fields need to be loaded at time level 0 and the
 42 |   // particles (position and momentum both) need to be loaded at time level 0.
 43 | 
 44 |   // Arguments can be passed from the command line to the input deck
 45 |   // if( num_cmdline_arguments!=3 ) {
 46 |   //   sim_log( "Usage: " << cmdline_argument[0] << " mass_ratio seed" );
 47 |   //   abort(0);
 48 |   // }
 49 |   seed_entropy(1); //seed_entropy( atoi( cmdline_argument[2] ) );
 50 | 
 51 |   // Diagnostic messages can be passed written (usually to stderr)
 52 |   sim_log( "Computing simulation parameters");
 53 | 
 54 |   // Define the system of units for this problem (natural units)
 55 |   //double L    = 1; // Length normalization (sheet thickness)
 56 |   double de   = 1; // Length normalization (electron inertial length)
 57 |   double ec   = 1; // Charge normalization
 58 |   double me   = 1; // Mass normalization
 59 |   double c    = 1; // Speed of light
 60 |   double eps0 = 1; // Permittivity of space
 61 | 
 62 |   // Physics parameters
 63 |   double mi_me   = 1; //1836; //25; //atof(cmdline_argument[1]); // Ion mass / electron mass
 64 |   double vthe = 0; //0.0424264068711;       //0.424264068711;       // Electron thermal velocity
 65 |   double vthi = 0; //0.0424264068711;       //0.424264068711;       // Ion thermal velocity
 66 |   //double vthex =0; //0.0141421356237;      // 0.141421356237;      // Electron thermal velocity in x-direction.
 67 |   //double vthix =0; //0.0141421356237;      // 0.141421356237;Ion thermal velocity in x-direction.
 68 | 
 69 |   double v0e   = 0.0866025403784439; //*4.0; //*4; //drift velocity
 70 |   double v0i   =-0.0866025403784439; //*4.0; //*4; //drift velocity
 71 |   double gam   = 1.0/sqrt(1.0-v0e*v0e);
 72 |   v0e *= gam;
 73 |   v0i *= gam;
 74 | 
 75 |   double n0      = 1.0;    //  Background plasma density
 76 |   double b0 = 0.0;         // In plane magnetic field.
 77 |   //double bg = 0.0;         // Guide field magnitude
 78 |   double tauwpe    = 200000;    // simulation wpe's to run
 79 | 
 80 |   // Numerical parameters
 81 |   double topology_x = nproc();  // Number of domains in x, y, and z
 82 |   double topology_y = 1;
 83 |   double topology_z = 1;  // For load balance, best to keep "1" or "2" for Harris sheet
 84 |   double Lx        = 1; //*4.0; //4.62*de; //6.7*de; //10.0*de;  // How big should the box be in the x direction
 85 |   double Ly        = 0.628318530717959*(gam*sqrt(gam)); //0.0721875*de;  // How big should the box be in the y direction
 86 |   double Lz        = 1; //0.0721875*de;  // How big should the box be in the z direction
 87 |   double nx        = 1;    // Global resolution in the x direction
 88 |   double ny        = 32;    // Global resolution in the y direction
 89 |   double nz        = 1; //32;     // Global resolution in the z direction
 90 |   double nppc      = 50; //125; //800; //200; //2048; //1024; //128;    // Average number of macro particles per cell (both species combined!)
 91 |   double cfl_req   = 0.99f; //0.99;  // How close to Courant should we try to run
 92 |   double wpedt_max = 0.36;  // How big a timestep is allowed if Courant is not too restrictive
 93 |   double damp      = 0.0; // Level of radiation damping
 94 | 
 95 | 
 96 |   // Derived quantities
 97 |   double mi = me*mi_me;             // Ion mass
 98 |   double wpe  = c/de;               // electron plasma frequency
 99 |   double wpi  = wpe/sqrt(mi_me);    // ion plasma frequency
100 |   double di   = c/wpi;              // ion inertial length
101 | 
102 |   double hx = Lx/nx;
103 |   double hy = Ly/ny;
104 |   double hz = Lz/nz;
105 | 
106 |   double Npe = n0*Ly*Lz*Lx;    // Number physical electrons.
107 |   double Npi = Npe;            // Number of physical ions in box
108 |   double Ne  = nppc*nx*ny*nz;  // total macro electrons in box
109 | 
110 | 
111 |   Ne = trunc_granular(Ne,nproc());
112 |   double Ni   = Ne;                                   // Total macro ions in box
113 |   //double qe = -ec*Npe/Ne;  // Charge per macro electron
114 |   //double qi = -ec*Npe/Ne;  // Charge per macro electron
115 | 
116 |   double we   = Npe/Ne;                               // Weight of a macro electron
117 |   double wi   = Npi/Ni;                               // Weight of a macro ion
118 | 
119 |   printf("Npe %e Ne %e we %e \n", Npe, Ne, we);
120 | 
121 |   // Determine the timestep
122 |   double dg = courant_length(Lx,Ly,Lz,nx,ny,nz);      // Courant length
123 |   double dt = cfl_req*dg/c;                           // Courant limited time step
124 |   // printf("in harris.cxx: dt=%.7f\n",  dt);
125 |   // exit(1);
126 |   if( wpe*dt>wpedt_max ) dt=wpedt_max/wpe;            // Override time step if plasma frequency limited
127 | 
128 |   ////////////////////////////////////////
129 |   // Setup high level simulation parmeters
130 | 
131 |   num_step             = 6000; //1200; // int(tauwpe/(wpe*dt));
132 |   status_interval      = 0; //2000;
133 |   sync_shared_interval = 0; //status_interval;
134 |   clean_div_e_interval = 0; //turn off cleaning (GY)//status_interval;
135 |   clean_div_b_interval = 0; //status_interval; //(GY)
136 | 
137 |   global->energies_interval  = 1; //status_interval;
138 |   global->fields_interval    = status_interval;
139 |   global->ehydro_interval    = status_interval;
140 |   global->ihydro_interval    = status_interval;
141 |   global->eparticle_interval = status_interval; // Do not dump
142 |   global->iparticle_interval = status_interval; // Do not dump
143 |   global->restart_interval   = status_interval; // Do not dump
144 | 
145 |   ///////////////////////////
146 |   // Setup the space and time
147 | 
148 |   // Setup basic grid parameters
149 |   define_units( c, eps0 );
150 |   define_timestep( dt );
151 |   grid->dx = hx;
152 |   grid->dy = hy;
153 |   grid->dz = hz;
154 |   grid->dt = dt;
155 |   grid->cvac = c;
156 |   //grid->damp = damp;
157 |   double gx0  = 0;
158 |   double gy0  = 0; //-0.5*Ly;
159 |   double gz0  = 0; //-0.5*Lz;
160 |   double gx1  = Lx;
161 |   double gy1  = Ly;
162 |   double gz1  = Lz;
163 | 
164 |    define_periodic_grid(  gx0, gy0, gz0,    // Low corner
165 |    			  gx1, gy1, gz1,    // High corner
166 |    			  nx, ny, nz,             // Resolution
167 |    			  topology_x, topology_y, topology_z); // Topology
168 |   // Parition a periodic box among the processors sliced uniformly along y
169 |   // define_periodic_grid( -0.5*Lx, 0, 0,    // Low corner
170 |   //                        0.5*Lx, Ly, Lz,  // High corner
171 |   //                        nx, ny, nz,      // Resolution
172 |   //                        1, nproc(), 1 ); // Topology
173 |   // define_periodic_grid(  0, -0.5*Ly, -0.5*Lz,    // Low corner
174 |   // 			  Lx, 0.5*Ly, 0.5*Lz,     // High corner
175 |   // 			  nx, ny, nz,             // Resolution
176 |   // 			  topology_x, topology_y, topology_z); // Topology
177 | 
178 |   //   printf("in harris.cxx: g->neighbor[6*265]=%jd\n",  grid->neighbor[6*265]);
179 |   // Override some of the boundary conditions to put a particle reflecting
180 |   // perfect electrical conductor on the -x and +x boundaries
181 |   // set_domain_field_bc( BOUNDARY(-1,0,0), pec_fields );
182 |   // set_domain_field_bc( BOUNDARY( 1,0,0), pec_fields );
183 |   // set_domain_particle_bc( BOUNDARY(-1,0,0), reflect_particles );
184 |   // set_domain_particle_bc( BOUNDARY( 1,0,0), reflect_particles );
185 | 
186 |   define_material( "vacuum", 1 );
187 |   // Note: define_material defaults to isotropic materials with mu=1,sigma=0
188 |   // Tensor electronic, magnetic and conductive materials are supported
189 |   // though. See "shapes" for how to define them and assign them to regions.
190 |   // Also, space is initially filled with the first material defined.
191 | 
192 |   // If you pass NULL to define field array, the standard field array will
193 |   // be used (if damp is not provided, no radiation damping will be used).
194 |   define_field_array( NULL, damp );
195 | 
196 |   ////////////////////
197 |   // Setup the species
198 | 
199 |   // Allow 50% more local_particles in case of non-uniformity
200 |   // VPIC will pick the number of movers to use for each species
201 |   // Both species use out-of-place sorting
202 |   // species_t * ion      = define_species( "ion",       ec, mi, 1.5*Ni/nproc(), -1, 40, 1 );
203 |   // species_t * electron = define_species( "electron", -ec, me, 1.5*Ne/nproc(), -1, 20, 1 );
204 |   //species_t *electron = define_species("electron",-ec,me,2.4*Ne/nproc(),-1,25,0);
205 |   //species_t *ion      = define_species("ion",      ec,mi,2.4*Ne/nproc(),-1,25,0);
206 | 
207 |   species_t *electron = define_species("electron",-ec,me,3*Ne/nproc(),-1,0,0); //turn off sorting (GY)
208 |   species_t *ion      = define_species("ion",     -ec,mi,3*Ne/nproc(),-1,0,0); //(GY)
209 | 
210 |   ///////////////////////////////////////////////////
211 |   // Log diagnostic information about this simulation
212 | 
213 |   sim_log( "***********************************************" );
214 |   sim_log ( "Npe " << Npe );
215 |   sim_log ( "Ne " << Ne );
216 |   sim_log ( "we " << we );
217 |   sim_log ( "mi/me = " << mi_me );
218 |   sim_log ( "tauwpe = " << tauwpe );
219 |   sim_log ( "num_step = " << num_step );
220 |   sim_log ( "Lx/di = " << Lx/di );
221 |   sim_log ( "Lx/de = " << Lx/de );
222 |   sim_log ( "Ly/di = " << Ly/di );
223 |   sim_log ( "Ly/de = " << Ly/de );
224 |   sim_log ( "Lz/di = " << Lz/di );
225 |   sim_log ( "Lz/de = " << Lz/de );
226 |   sim_log ( "nx = " << nx );
227 |   sim_log ( "ny = " << ny );
228 |   sim_log ( "nz = " << nz );
229 |   sim_log ( "damp = " << damp );
230 |   sim_log ( "courant = " << c*dt/dg );
231 |   sim_log ( "nproc = " << nproc ()  );
232 |   sim_log ( "nppc = " << nppc );
233 |   sim_log ( " b0 = " << b0 );
234 |   sim_log ( " di = " << di );
235 |   sim_log ( " Ne = " << Ne );
236 |   sim_log ( "total # of particles = " << 2*Ne );
237 |   sim_log ( "dt*wpe = " << wpe*dt );
238 |   sim_log ( "dx/de = " << Lx/(de*nx) );
239 |   sim_log ( "dy/de = " << Ly/(de*ny) );
240 |   sim_log ( "dz/de = " << Lz/(de*nz) );
241 |   sim_log ( "dx/debye = " << (Lx/nx)/(vthe/wpe)  );
242 |   sim_log ( "n0 = " << n0 );
243 |   sim_log ( "vthi/c = " << vthi/c );
244 |   sim_log ( "vthe/c = " << vthe/c );
245 |   sim_log( "" );
246 | 
247 |   ////////////////////////////
248 |   // Load fields and particles
249 | 
250 |   // sim_log( "Loading fields" );
251 | 
252 |   // set_region_field( everywhere, 0, 0, 0,                    // Electric field
253 |   //                   0, -sn*b0*tanh(x/L), cs*b0*tanh(x/L) ); // Magnetic field
254 |   // Note: everywhere is a region that encompasses the entire simulation
255 |   // In general, regions are specied as logical equations (i.e. x>0 && x+y<2)
256 | 
257 |   sim_log( "Loading particles" );
258 | 
259 |   // Do a fast load of the particles
260 |   //seed_rand( rng_seed*nproc() + rank() );  //Generators desynchronized
261 |   double xmin = grid->x0 , xmax = grid->x1;
262 |   double ymin = grid->y0 , ymax = grid->y1;
263 |   double zmin = grid->z0 , zmax = grid->z1;
264 | 
265 |   // printf("rank=%d,xmin=%.14f,xmax=%.14f,dx=%.14f,nx=%d\n",rank(),grid->x0,grid->x1,grid->dx,grid->nx);
266 |   // printf("rank=%d,xmin=%.14f,xmax=%.14f\n",rank(),xmin,xmax);
267 |   // printf("rank=%d,xmin=%.14f,ymin=%.14f,zmin=%.14f\n",rank(),xmin,ymin,zmin);
268 |   // printf("rank=%d,xmax=%.14f,ymax=%.14f,zmax=%.14f\n",rank(),xmax,ymax,zmax);
269 |   // printf("rank=%d,gx0=%.14f,gy0=%.14f,gz0=%.14f\n",rank(),gx0,gy0,gz0);
270 |   // printf("rank=%d,gx1=%.14f,gy1=%.14f,gz1=%.14f\n",rank(),gx1,gy1,gz1);
271 |   sim_log( "-> Uniform Bi-Maxwellian" );
272 |   //int seed = 1;
273 |   //int seedn= 1;
274 |   double n1,n2,n3,n4,n5,n6;
275 |   //int signx,signy,signz;
276 |   int Nlocal=0;
277 |   double dxp=Ly/Ne;
278 |   int ip=0;
279 |   repeat ( Ne ) {
280 |     double y = (ip+0.5)*dxp;
281 |     ip++;
282 |     //double x = uniform2( gx0, gx1 , seed );
283 |     double x = 0.5*Lx; //uniform2( gy0, gy1 , seed );
284 |     double z = 0.5*Lz; //uniform2( gz0, gz1 , seed );
285 |    // double x = uniform( rng(0), xmin, xmax );
286 |    // double y = uniform( rng(0), ymin, ymax );
287 |    // double z = uniform( rng(0), zmin, zmax );
288 |    // double x = uniform( rng(0), gx0, gx1 );
289 |    // double y = uniform( rng(0), gy0, gy1 );
290 |    // double z = uniform( rng(0), gz0, gz1 );
291 |    // n1 = normal(rng(0),v0e,vthex);
292 |    // n2 = normal(rng(0),0,vthe );
293 |    // n3 = normal(rng(0),0,vthe );
294 |    // n4 = normal(rng(0),v0i,vthix);
295 |    // n5 = normal(rng(0),0,vthi );
296 |    // n6 = normal(rng(0),0,vthi );
297 |    n1 = v0e;
298 |    n2 = 0;
299 |    n3 = 0;
300 |    n4 = v0i;
301 |    n5 = 0;
302 |    n6 = 0;
303 |    //mpi reproducing serial
304 |    if(x<xmin||x>xmax||y<ymin||y>ymax||z<zmin||z>zmax) continue;
305 | 
306 |    double na = 1e-4*sin(2.0*3.1415926*y/Ly);
307 |    inject_particle( electron, x, y, z,
308 | 		    n1*(1.0+na),
309 | 		    n2,
310 | 		    n3,we, 0, 0);
311 | 
312 | 
313 | 
314 |    inject_particle( ion, x, y, z,
315 | 		    n4*(1.0-na),
316 | 		    n5,
317 | 		    n6,wi, 0 ,0 );
318 |    Nlocal++;
319 |   }
320 | 
321 |   // //quiet start
322 |   // repeat ( Ne/8 ) {
323 |   //   double x = uniform2( gx0, gx1 , seed );
324 |   //   double y = uniform2( gy0, gy1 , seed );
325 |   //   double z = uniform2( gz0, gz1 , seed );
326 |   //   n1 = v0e;
327 |   //   n2 = 0;
328 |   //   n3 = 0;
329 |   //   n4 = v0i;
330 |   //   n5 = 0;
331 |   //   n6 = 0;
332 | 
333 |   //    signx = -1;
334 |   //    signy = -1;
335 |   //    signz = -1;
336 |   //    for(int i=0; i<2; i++){
337 |   //      signx = -signx;
338 |   //      for(int j=0; j<2; j++){
339 |   //  	signy = -signy;
340 |   //  	for(int k=0; k<2; k++){
341 |   //  	  signz = -signz;
342 |   // 	  inject_particle( electron, x, y, z,
343 |   // 		    n1*signx,
344 |   //                   n2*signy,
345 |   //                   n3*signz,we, 0, 0);
346 |   // 	  inject_particle( ion, x, y, z,
347 |   //                   n4*signx,
348 |   //                   n5*signy,
349 |   //                   n6*signz,wi, 0 ,0 );
350 | 
351 |   //  	}
352 |   //      }
353 |   //    }
354 | 
355 | 
356 |   //   signx = -1;
357 |   //   signy = -1;
358 |   //   signz = -1;
359 |   //   for(int i=0; i<2; i++){
360 |   //     signx = -signx;
361 |   //     for(int j=0; j<2; j++){
362 |   // 	signy = -signy;
363 |   // 	for(int k=0; k<2; k++){
364 |   // 	  signz = -signz;
365 | 
366 |   // 	}
367 |   //     }
368 |   //   }
369 | 
370 |   //}
371 |   // printf("Nlocal=%d (of %f)\n",Nlocal,Ne);
372 |   sim_log( "Finished loading particles" );
373 | 
374 |   //exit(1);
375 | 
376 |   // Upon completion of the initialization, the following occurs:
377 |   // - The synchronization error (tang E, norm B) is computed between domains
378 |   //   and tang E / norm B are synchronized by averaging where discrepancies
379 |   //   are encountered.
380 |   // - The initial divergence error of the magnetic field is computed and
381 |   //   one pass of cleaning is done (for good measure)
382 |   // - The bound charge density necessary to give the simulation an initially
383 |   //   clean divergence e is computed.
384 |   // - The particle momentum is uncentered from u_0 to u_{-1/2}
385 |   // - The user diagnostics are called on the initial state
386 |   // - The physics loop is started
387 |   //
388 |   // The physics loop consists of:
389 |   // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
390 |   // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
391 |   // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
392 |   // - Advance B from B_0 to B_{1/2}
393 |   // - Advance E from E_0 to E_1
394 |   // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
395 |   // - Advance B from B_{1/2} to B_1
396 |   // - (periodically) Divergence clean electric field
397 |   // - (periodically) Divergence clean magnetic field
398 |   // - (periodically) Synchronize shared tang e and norm b
399 |   // - Increment the time step
400 |   // - Call user diagnostics
401 |   // - (periodically) Print a status message
402 | }
403 | 
404 | begin_diagnostics {
405 | 
406 | # define should_dump(x) (global->x##_interval>0 && remainder(step(),global->x##_interval)==0)
407 | 
408 |   if( step()==-10 ) {
409 |     // A grid dump contains all grid parameters, field boundary conditions,
410 |     // particle boundary conditions and domain connectivity information. This
411 |     // is stored in a binary format. Each rank makes a grid dump
412 |     dump_grid("grid");
413 | 
414 |     // A materials dump contains all the materials parameters. This is in a
415 |     // text format. Only rank 0 makes the materials dump
416 |     dump_materials("materials");
417 | 
418 |     // A species dump contains the physics parameters of a species. This is in
419 |     // a text format. Only rank 0 makes the species dump
420 |     dump_species("species");
421 |   }
422 | 
423 |   // Energy dumps store all the energies in various directions of E and B
424 |   // and the total kinetic (not including rest mass) energies of each species
425 |   // species in a simple text format. By default, the energies are appended to
426 |   // the file. However, if a "0" is added to the dump_energies call, a new
427 |   // energies dump file will be created. The energies are in the units of the
428 |   // problem and are all time centered appropriately. Note: When restarting a
429 |   // simulation from a restart dump made at a prior time step to the last
430 |   // energies dump, the energies file will have a "hiccup" of intervening
431 |   // time levels. This "hiccup" will not occur if the simulation is aborted
432 |   // immediately following a restart dump. Energies dumps are in a text
433 |   // format and the layout is documented at the top of the file. Only rank 0
434 |   // makes makes an energies dump.
435 |   if( should_dump(energies) ) {
436 |     dump_energies( "energies", step()==0 ? 0 : 1 );
437 |   }
438 | 
439 |   // Field dumps store the raw electromagnetic fields, sources and material
440 |   // placement and a number of auxilliary fields. E, B and RHOB are
441 |   // timecentered, JF and TCA are half a step old. Material fields are static
442 |   // and the remaining fields (DIV E ERR, DIV B ERR and RHOF) are for
443 |   // debugging purposes. By default, field dump filenames are tagged with
444 |   // step(). However, if a "0" is added to the call, the filename will not be
445 |   // tagged. The JF that gets stored is accumulated with a charge-conserving
446 |   // algorithm. As a result, JF is not valid until at least one timestep has
447 |   // been completed. Field dumps are in a binary format. Each rank makes a
448 |   // field dump.
449 |   if( step()==-10 )         dump_fields("fields"); // Get first valid total J
450 |   if( should_dump(fields) ) dump_fields("fields");
451 | 
452 |   // Hydro dumps store particle charge density, current density and
453 |   // stress-energy tensor. All these quantities are known at the time
454 |   // t = time().  All these quantities are accumulated trilinear
455 |   // node-centered. By default, species dump filenames are tagged with
456 |   // step(). However, if a "0" is added to the call, the filename will not
457 |   // be tagged. Note that the current density accumulated by this routine is
458 |   // purely diagnostic. It is not used by the simulation and it is not
459 |   // accumulated using a self-consistent charge-conserving method. Hydro dumps
460 |   // are in a binary format. Each rank makes a hydro dump.
461 |   if( should_dump(ehydro) ) dump_hydro("electron","ehydro");
462 |   if( should_dump(ihydro) ) dump_hydro("ion",     "ihydro");
463 | 
464 |   // Particle dumps store the particle data for a given species. The data
465 |   // written is known at the time t = time().  By default, particle dumps
466 |   // are tagged with step(). However, if a "0" is added to the call, the
467 |   // filename will not be tagged. Particle dumps are in a binary format.
468 |   // Each rank makes a particle dump.
469 |   if( should_dump(eparticle) ) dump_particles("electron","eparticle");
470 |   if( should_dump(iparticle) ) dump_particles("ion",     "iparticle");
471 | 
472 |   // A checkpt is made by calling checkpt( fbase, tag ) where fname is a string
473 |   // and tag is an integer.  A typical usage is:
474 |   //   checkpt( "checkpt", step() ).
475 |   // This will cause each process to write their simulation state to a file
476 |   // whose name is based on fbase, tag and the node's rank.  For the above
477 |   // usage, if called on step 314 on a 4 process run, the four files:
478 |   //   checkpt.314.0, checkpt.314.1, checkpt.314.2, checkpt.314.3
479 |   // to be written.  The simulation can then be restarted from this point by
480 |   // invoking the application with "--restore checkpt.314".  checkpt must be
481 |   // the _VERY_ LAST_ diagnostic called.  If not, diagnostics performed after
482 |   // the checkpt but before the next timestep will be missed on restore.
483 |   // Restart dumps are in a binary format unique to the each simulation.
484 | 
485 |   if( should_dump(restart) ) checkpt( "checkpt", step() );
486 | 
487 |   // If you want to write a checkpt after a certain amount of simulation time,
488 |   // use uptime() in conjunction with checkpt.  For example, this will cause
489 |   // the simulation state to be written after 7.5 hours of running to the
490 |   // same file every time (useful for dealing with quotas on big machines).
491 |   //if( uptime()>=27000 ) {
492 |   //  checkpt( "timeout", 0 );
493 |   //  abort(0);
494 |   //}
495 | 
496 | # undef should_dump
497 | 
498 | }
499 | 
500 | begin_particle_injection {
501 | 
502 |   // No particle injection for this simulation
503 | 
504 | }
505 | 
506 | begin_current_injection {
507 | 
508 |   // No current injection for this simulation
509 | 
510 | }
511 | 
512 | begin_field_injection {
513 | 
514 |   // No field injection for this simulation
515 | 
516 | }
517 | 
518 | begin_particle_collisions{
519 | 
520 |   // No collisions for this simulation
521 | 
522 | }
523 | 


--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(cbnpic example.cpp ${INPUT_DECK})
2 | target_link_libraries(cbnpic PUBLIC CabanaPIC)
3 | target_include_directories( cbnpic PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
4 | install(TARGETS cbnpic DESTINATION ${CMAKE_INSTALL_BINDIR})
5 | 


--------------------------------------------------------------------------------
/example/example.cpp:
--------------------------------------------------------------------------------
  1 | #include <Cabana_Core.hpp>
  2 | #include <Cabana_AoSoA.hpp>
  3 | #include <Cabana_Sort.hpp> // is this needed if we already have core?
  4 | 
  5 | #include <cstdlib>
  6 | #include <iostream>
  7 | 
  8 | #include "types.h"
  9 | #include "helpers.h"
 10 | 
 11 | #include "fields.h"
 12 | #include "accumulator.h"
 13 | #include "interpolator.h"
 14 | 
 15 | #include "uncenter_p.h"
 16 | 
 17 | #include "push.h"
 18 | 
 19 | //#include "visualization.h"
 20 | 
 21 | #include "input/deck.h"
 22 | 
 23 | // Requires C++14
 24 | static auto make_field_solver(field_array_t &fields)
 25 | {
 26 |     // TODO: make this support 1/2/3d
 27 | #ifdef ES_FIELD_SOLVER
 28 |     std::cout << "Created ES Solver (1D only)" << std::endl;
 29 |     Field_Solver<ES_Field_Solver_1D> field_solver(fields);
 30 | #else // EM
 31 |     std::cout << "Created EM Solver" << std::endl;
 32 |     Field_Solver<EM_Field_Solver> field_solver(fields);
 33 | #endif
 34 |     return field_solver;
 35 | }
 36 | 
 37 | // Global variable to hold paramters
 38 | //Parameters params;
 39 | Input_Deck deck;
 40 | 
 41 | //---------------------------------------------------------------------------//
 42 | // Main.
 43 | //---------------------------------------------------------------------------//
 44 | int main( int argc, char* argv[] )
 45 | {
 46 |     // Initialize the kokkos runtime.
 47 |     Kokkos::ScopeGuard scope_guard( argc, argv );
 48 | 
 49 |     printf("#Running On Kokkos execution space %s\n",
 50 |             typeid (Kokkos::DefaultExecutionSpace).name ());
 51 | 
 52 | 
 53 | #ifndef ENERGY_DUMP_INTERVAL
 54 | #define ENERGY_DUMP_INTERVAL 1
 55 | #endif
 56 | 
 57 |     // Cabana scoping block
 58 |     {
 59 |         FILE *fptr = fopen("partloc","w");
 60 |         FILE *fpfd = fopen("ex1d","w");
 61 |         deck.derive_params();
 62 |         deck.print_run_details();
 63 | 
 64 |         // Cache some values locally for printing
 65 |         const int npc = deck.nppc;
 66 |         const int nx = deck.nx;
 67 |         const int ny = deck.ny;
 68 |         const int nz = deck.nz;
 69 | #ifdef ES_FIELD_SOLVER
 70 | 	if(ny>1 || nz>1){
 71 | 	    std::cerr << "Error: ES Field solver supports 1D only.\n";
 72 | 	    return -1;
 73 | 	}
 74 | #endif
 75 |         const int num_ghosts = deck.num_ghosts;
 76 |         const size_t num_cells = deck.num_cells;
 77 |         real_t dxp = 2.f / (npc);
 78 | 
 79 |         // Define some consts
 80 |         const real_t dx = deck.dx;
 81 |         const real_t dy = deck.dy;
 82 |         const real_t dz = deck.dz;
 83 | 
 84 |         real_t dt = deck.dt;
 85 |         real_t c = deck.c;
 86 |         real_t n0 = deck.n0;
 87 |         //real_t ec = deck.ec;
 88 |         real_t Lx = deck.len_x;
 89 |         real_t Ly = deck.len_y;
 90 |         real_t Lz = deck.len_z;
 91 |         real_t v0 = deck.v0;
 92 | 
 93 |         int nppc = deck.nppc;
 94 |         real_t eps0 = deck.eps;
 95 | 
 96 |         real_t Npe = deck.Npe;
 97 |         size_t Ne = deck.Ne; // (nppc*nx*ny*nz)
 98 |         printf("nppc %d nx %d ny %d nz %d \n", nppc, nx, ny, nz);
 99 |              printf("n0 %e lx %e nly %e lz %e \n", n0, Lx, Ly, Lz);
100 |         printf("ne %ld npe %e \n", Ne, Npe);
101 | 
102 |         real_t qsp = deck.qsp;
103 |         printf("qsp %e \n", qsp);
104 |         real_t me = deck.me;
105 | 
106 |         real_t qdt_2mc = qsp*dt/(2*me*c);
107 | 
108 |         real_t cdt_dx = c*dt/dx;
109 |         real_t cdt_dy = c*dt/dy;
110 |         real_t cdt_dz = c*dt/dz;
111 |         real_t dt_eps0 = dt/eps0;
112 |         real_t frac = 1.0f;
113 |         real_t we = (real_t) Npe/(real_t) Ne;
114 |         printf("we %e \n", we);
115 | 
116 |         const size_t num_particles = deck.num_particles;
117 | 
118 |         printf("c %e dt %e dx %e cdt_dx %e \n", c, dt,dx,cdt_dx);
119 | 
120 |         // Create the particle list.
121 |         particle_list_t particles( "particles", num_particles );
122 | 
123 |         // Initialize particles.
124 |         deck.initialize_particles( particles, nx, ny, nz, num_ghosts, dxp, npc, we, v0 );
125 | 
126 |         grid_t* grid = new grid_t();
127 | 
128 |         // Print initial particle positions
129 |         //logger << "Initial:" << std::endl;
130 |         //print_particles( particles );
131 |         fprintf(fptr,"#step=0\n0 ");
132 |         dump_particles( fptr, particles, 0, 0, 0, dx,dy,dz,nx,ny,nz,num_ghosts );
133 | 
134 |         // Allocate Cabana Data
135 |         interpolator_array_t interpolators("interpolator", num_cells);
136 | 
137 |         accumulator_array_t accumulators("accumulator", num_cells);
138 | 
139 |         auto scatter_add = Kokkos::Experimental::create_scatter_view(accumulators);
140 |         //<Kokkos::Experimental::ScatterSum,
141 |         //KOKKOS_SCATTER_DUPLICATED,
142 |         //KOKKOS_SCATTER_ATOMIC>(accumulators);
143 | 
144 |         field_array_t fields("fields", num_cells);
145 | 
146 |         // Zero out the interpolator
147 |         // Techincally this is optional?
148 |         initialize_interpolator(interpolators);
149 | 
150 |         // Can obviously supply solver type at compile time
151 |         //Field_Solver<EM_Field_Solver> field_solver(fields);
152 |         //Field_Solver<ES_Field_Solver_1D> field_solver(fields);
153 |         // This is able to deduce solver type from compile options
154 |         auto field_solver = make_field_solver(fields);
155 | 
156 |         deck.initialize_fields(
157 |             fields,
158 |             nx,
159 |             ny,
160 |             nz,
161 |             num_ghosts,
162 |             Lx,
163 |             Ly,
164 |             Lz,
165 |             dx,
166 |             dy,
167 |             dz
168 |         );
169 | 
170 |         // Grab some global values for use later
171 |         const Boundary boundary = deck.BOUNDARY_TYPE;
172 | 
173 |         //logger << "nx " << params.nx << std::endl;
174 |         //logger << "num_particles " << num_particles << std::endl;
175 |         //logger << "num_cells " << num_cells << std::endl;
176 |         //logger << "Actual NPPC " << params.NPPC << std::endl;
177 | 
178 |         // TODO: give these a real value
179 |         const real_t px =  (nx>1) ? frac*c*dt/dx : 0;
180 |         const real_t py =  (ny>1) ? frac*c*dt/dy : 0;
181 |         const real_t pz =  (nz>1) ? frac*c*dt/dz : 0;
182 | 
183 |         // simulation loop
184 |         const int num_steps = deck.num_steps;
185 | 
186 |         printf( "#***********************************************\n" );
187 |         printf( "#num_step = %d\n" , num_steps );
188 |         printf( "#Lx/de = %f\n" , Lx );
189 |         printf( "#Ly/de = %f\n" , Ly );
190 |         printf( "#Lz/de = %f\n" , Lz );
191 |         printf( "#nx = %d\n" , nx );
192 |         printf( "#ny = %d\n" , ny );
193 |         printf( "#nz = %d\n" , nz );
194 |         printf( "#nppc = %d\n" , nppc );
195 |         printf( "# Ne = %ld\n" , Ne );
196 |         printf( "#dt*wpe = %f\n" , dt );
197 |         printf( "#dx/de = %f\n" , Lx/(nx) );
198 |         printf( "#dy/de = %f\n" , Ly/(ny) );
199 |         printf( "#dz/de = %f\n" , Lz/(nz) );
200 |         printf( "#n0 = %f\n" , n0 );
201 |         printf( "#we = %f\n" , we );
202 |         printf( "*****\n" );
203 | 
204 |         if (deck.perform_uncenter)
205 |         {
206 |             load_interpolator_array(fields, interpolators, nx, ny, nz, num_ghosts);
207 | 
208 |             uncenter_particles(
209 |                 particles,
210 |                 interpolators,
211 |                 qdt_2mc
212 |             );
213 |         }
214 | 
215 |         // Main loop
216 |         for (int step = 1; step <= num_steps; step++)
217 |         {
218 |             //printf("Step %d \n", step);
219 | 
220 |             // Convert fields to interpolators
221 |             load_interpolator_array(fields, interpolators, nx, ny, nz, num_ghosts);
222 | 
223 |             clear_accumulator_array(fields, accumulators, nx, ny, nz);
224 |             // TODO: Make the frequency of this configurable (every step is not
225 |             // required for this incarnation)
226 |             // Sort by cell index
227 |             //auto keys = particles.slice<Cell_Index>();
228 |             //auto bin_data = Cabana::sortByKey( keys );
229 | 
230 |             // Move
231 |             push(
232 |                     particles,
233 |                     interpolators,
234 |                     qdt_2mc,
235 |                     cdt_dx,
236 |                     cdt_dy,
237 |                     cdt_dz,
238 |                     qsp,
239 |                     scatter_add,
240 |                     grid,
241 |                     nx,
242 |                     ny,
243 |                     nz,
244 |                     num_ghosts,
245 |                     boundary
246 |                 );
247 | 
248 |             Kokkos::Experimental::contribute(accumulators, scatter_add);
249 | 
250 |             // Only reset the data if these two are not the same arrays
251 |             scatter_add.reset_except(accumulators);
252 | 
253 |             // TODO: boundaries? MPI
254 |             //boundary_p(); // Implies Parallel?
255 | 
256 |             // Map accumulator current back onto the fields
257 |             unload_accumulator_array(fields, accumulators, nx, ny, nz, num_ghosts, dx, dy, dz, dt);
258 | 
259 |             // Half advance the magnetic field from B_0 to B_{1/2}
260 |             field_solver.advance_b(fields, real_t(0.5)*px, real_t(0.5)*py, real_t(0.5)*pz, nx, ny, nz, num_ghosts);
261 | 
262 |             // Advance the electric field from E_0 to E_1
263 |             field_solver.advance_e(fields, px, py, pz, nx, ny, nz, num_ghosts, dt_eps0);
264 | 
265 |             // Half advance the magnetic field from B_{1/2} to B_1
266 |             field_solver.advance_b(fields, real_t(0.5)*px, real_t(0.5)*py, real_t(0.5)*pz, nx, ny, nz, num_ghosts);
267 | 
268 |             if( step % ENERGY_DUMP_INTERVAL == 0 )
269 |             {
270 |                 dump_energies(field_solver, fields, step, step*dt, px, py, pz, nx, ny, nz, num_ghosts);
271 |             }
272 | 
273 |             // TODO: abstract this out
274 |             fprintf(fpfd,"#step=%d\n",step);
275 |             field_solver.dump_fields(fpfd,fields, 0, 0, 0, dx,dy,dz,nx,ny,nz,num_ghosts );
276 |             fprintf(fptr,"#step=%d\n%e ",step,step*dt);
277 |             dump_particles( fptr, particles, 0, 0, 0, dx,dy,dz,nx,ny,nz,num_ghosts );
278 | 
279 |         }
280 | 
281 |         fclose(fptr);
282 |         fclose(fpfd);
283 | 
284 |     } // End Scoping block
285 | 
286 |     // Let the user perform any needed finalization
287 |     deck.finalize();
288 | 
289 |     return 0;
290 | }
291 | 
292 | //---------------------------------------------------------------------------//
293 | //
294 | 
295 | ////// Known Possible Improvements /////
296 | // I pass nx/ny/nz round a lot more than I could
297 | 
298 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | - `plot-py`: Simple python script to plot energies from files
2 | 


--------------------------------------------------------------------------------
/scripts/plot.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | filename = "out"
 4 | filename2 = "out2"
 5 | 
 6 | # Import data as a list of numbers
 7 | def read_file(filename):
 8 |     data = []
 9 |     with open(filename) as textFile:
10 |         for line in textFile:
11 |             if ( line.strip().startswith('#') ):
12 |                 # Skip comments
13 |                 continue
14 |             if ( line.strip() == ""):
15 |                 # skip empty lines
16 |                 continue
17 | 
18 |             print(line)
19 |             l = line.strip().split()[2]
20 |             data.append(l)
21 |         print(data)
22 |         return data
23 | 
24 | data = read_file(filename)
25 | data2 = read_file(filename2)
26 | 
27 | # Plot as a time series plot
28 | plt.plot(data)
29 | plt.plot(data2)
30 | plt.show()
31 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/build_and_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Positional Parameters"
 4 | echo '$0 = ' $0 # this file
 5 | echo '$1 = ' $1 # repo path
 6 | echo '$2 = ' $2 # CXX
 7 | echo '$3 = ' $3 # kokkos install dir
 8 | echo '$4 = ' $4 # cabana install dir
 9 | echo '$5 = ' $5 # platform
10 | 
11 | export KOKKOS_PROFILE_LIBRARY=`pwd`/kokkos-tools/kp_kernel_timer.so
12 | 
13 | cd $1 # CD into right folder
14 | echo "--> Running $5 in $1 with $2"
15 | 
16 | KOKKOS_INSTALL_DIR=$3
17 | CABANA_INSTALL_DIR=$4
18 | cxx=$2
19 | platform=$5
20 | 
21 | options=""
22 | if [[ $platform == "GPU" ]]; then
23 |     options="-D ENABLE_GPU=ON"
24 |     cxx="$KOKKOS_INSTALL_DIR/bin/nvcc_wrapper"
25 | elif [[ $platform == "Serial" ]]; then
26 |     options="-D ENABLE_SERIAL=ON"
27 | fi
28 | 
29 | 
30 | mkdir build-$platform
31 | cd build-$platform
32 | 
33 | # Build CPU *or* GPU?
34 | # TODO: the way this selects the cmake folder is awful
35 |  #-D CMAKE_CXX_COMPILER=$KOKKOS_SRC_DIR/bin/nvcc_wrapper \
36 | CXX=$cxx cmake -DCMAKE_BUILD_TYPE=Release -DKOKKOS_DIR=$KOKKOS_INSTALL_DIR -DCABANA_DIR=$CABANA_INSTALL_DIR $options ../../../../..;
37 | make VERBOSE=1
38 | 
39 | # Run the code and track the performance
40 | { time ./minipic > out ; } 2> time.txt
41 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/build_cabana.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Positional Parameters"
 4 | echo '$0 = ' $0 # this file
 5 | echo '$1 = ' $1 # cxx
 6 | echo '$2 = ' $2 # kokkos dir
 7 | echo '$3 = ' $3 # cabana dir
 8 | echo '$4 = ' $4 # install dir
 9 | echo '$5 = ' $5 # platform
10 | 
11 | cxx=$1
12 | kokkos_dir=`pwd`/$2
13 | echo $kokkos_dir
14 | cabana_dir=$3
15 | install_dir=$4
16 | platform=$5
17 | 
18 | cd $cabana_dir
19 | mkdir $install_dir
20 | cd $install_dir
21 | echo `pwd`
22 | 
23 | # Default to off
24 | options="-D Cabana_ENABLE_Serial=OFF"
25 | 
26 | # possible platforms = ["Serial", "CPU", "GPU", "UVM"]
27 | 
28 | if [[ $platform == "Serial" ]]; then
29 |     # Override default
30 |     options="-D Cabana_ENABLE_Serial=ON"
31 | elif [[ $platform == "CPU" ]]; then
32 |     options="-D Cabana_ENABLE_OpenMP=ON $options"
33 | elif [[ $platform == "GPU" ]]; then
34 |     options="-D CMAKE_CXX_COMPILER=$kokkos_dir/bin/nvcc_wrapper -D Cabana_ENABLE_Cuda:BOOL=ON $options"
35 |     cxx=$kokkos_dir/bin/nvcc_wrapper
36 | # TODO: enable UVM build
37 | #elif [[ $platform == "UVM" ]] then
38 |     #options="--"
39 | #else
40 |     # This means they passed up the wrong value
41 |     #eep
42 | fi
43 | 
44 | echo $options
45 | 
46 | CXX=$cxx cmake \
47 |      -D CMAKE_BUILD_TYPE="Release" \
48 |      -D CMAKE_PREFIX_PATH=$kokkos_dir \
49 |      -D CMAKE_INSTALL_PREFIX=`pwd`/install \
50 |      -D Cabana_ENABLE_TESTING=OFF \
51 |      -D Cabana_ENABLE_EXAMPLES=OFF \
52 |      $options \
53 |      .. ;
54 | make install
55 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/build_kokkos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Positional Parameters"
 4 | echo '$0 = ' $0 # this file
 5 | echo '$1 = ' $1 # cxx
 6 | echo '$2 = ' $2 # kokkos dir
 7 | echo '$3 = ' $3 # install dir
 8 | echo '$4 = ' $4 # platform
 9 | echo '$5 = ' $5 # arch
10 | 
11 | kokkos_dir=`pwd`/$2
12 | install_dir=$3
13 | platform=$4
14 | cxx=$1
15 | 
16 | cd $kokkos_dir
17 | mkdir $install_dir
18 | cd $install_dir
19 | echo `pwd`
20 | 
21 | options="--compiler=$cxx"
22 | 
23 | #platforms = ["Serial", "CPU", "GPU", "UVM"]
24 | if [[ $platform == "Serial" ]]; then
25 |     options="$options --with-serial"
26 | elif [[ $platform == "CPU" ]]; then
27 |     options="$options --with-openmp"
28 | elif [[ $platform == "GPU" ]]; then
29 |     #export NVCC_WRAPPER_DEFAULT_COMPILER=`which $CXX`
30 |     options="--with-openmp --with-cuda --arch=$5 --with-cuda-options=enable_lambda --compiler=$kokkos_dir/bin/nvcc_wrapper ;"
31 | # TODO: enable UVM build
32 | #elif [[ $platform == "UVM" ]] then
33 |     #options="--"
34 | else
35 |     # This means they passed up the wrong value
36 |     eep
37 | fi
38 | 
39 | echo "Running with $options"
40 | 
41 | # TODO: check this works
42 | CXX=$cxx ../generate_makefile.bash --prefix=`pwd`/install $options
43 | make install
44 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/kokkos-tools/Makefile:
--------------------------------------------------------------------------------
 1 | CXX=g++
 2 | CXXFLAGS=-O3 -std=c++11 -g
 3 | SHARED_CXXFLAGS=-shared -fPIC
 4 | # comment the following line if abi::__cxa_demangle is not supported by your compiler
 5 | CXXFLAGS+= -DHAVE_GCC_ABI_DEMANGLE
 6 | 
 7 | all: kp_kernel_timer.so #kp_reader
 8 | 
 9 | #kp_reader: kp_reader.cpp kp_kernel_timer.so
10 | 	#$(CXX) $(CXXFLAGS) -o kp_reader kp_reader.cpp
11 | 
12 | kp_kernel_timer.so: kp_kernel_timer.cpp kp_kernel_info.h
13 | 	$(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ kp_kernel_timer.cpp
14 | 
15 | clean:
16 | 	rm *.so #kp_reader
17 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/kokkos-tools/README.md:
--------------------------------------------------------------------------------
1 | This folder contains local, customized, versions of kokkos tools.
2 | 
3 | See https://github.com/kokkos/kokkos-tools/wiki for documentation
4 | 
5 | TLDR: set `export KOKKOS_PROFILE_LIBRARY=$PATH_TO/kp_kernel_timer.so`  before 
6 | running the kokkos app
7 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/kokkos-tools/kp_kernel_info.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef _H_KOKKOSP_KERNEL_INFO
  3 | #define _H_KOKKOSP_KERNEL_INFO
  4 | 
  5 | #include <stdio.h>
  6 | #include <sys/time.h>
  7 | #include <cstring>
  8 | #if defined(HAVE_GCC_ABI_DEMANGLE)
  9 | #include <cxxabi.h>
 10 | #endif
 11 | double seconds() {
 12 | 	struct timeval now;
 13 | 	gettimeofday(&now, NULL);
 14 | 
 15 | 	return (double) (now.tv_sec + (now.tv_usec * 1.0e-6));
 16 | }
 17 | 
 18 | enum KernelExecutionType {
 19 | 	PARALLEL_FOR = 0,
 20 | 	PARALLEL_REDUCE = 1,
 21 | 	PARALLEL_SCAN = 2,
 22 |         REGION = 3
 23 | };
 24 | 
 25 | class KernelPerformanceInfo {
 26 | 	public:
 27 | 		KernelPerformanceInfo(std::string kName, KernelExecutionType kernelType) :
 28 | 			kType(kernelType) {
 29 | 
 30 | 			kernelName = (char*) malloc(sizeof(char) * (kName.size() + 1));
 31 | 			strcpy(kernelName, kName.c_str());
 32 | 
 33 | 			callCount = 0;
 34 | 			time = 0;
 35 | 		}
 36 | 
 37 | 		~KernelPerformanceInfo() {
 38 | 			free(kernelName);
 39 | 		}
 40 | 
 41 | 		KernelExecutionType getKernelType() {
 42 | 			return kType;
 43 | 		}
 44 | 
 45 | 		void incrementCount() {
 46 | 			callCount++;
 47 | 		}
 48 | 
 49 | 		void addTime(double t) {
 50 | 			time   += t;
 51 | 			timeSq += (t*t);
 52 | 		}
 53 | 
 54 | 		void addFromTimer() {
 55 | 			addTime(seconds() - startTime);
 56 | 
 57 | 			incrementCount();
 58 | 		}
 59 | 
 60 | 		void startTimer() {
 61 | 			startTime = seconds();
 62 | 		}
 63 | 
 64 | 		uint64_t getCallCount() {
 65 | 			return callCount;
 66 | 		}
 67 | 
 68 | 		double getTime() {
 69 | 			return time;
 70 | 		}
 71 | 
 72 | 		double getTimeSq() {
 73 | 			return timeSq;
 74 | 		}
 75 | 
 76 | 		char* getName() {
 77 | 			return kernelName;
 78 | 		}
 79 | 
 80 | 		void addCallCount(const uint64_t newCalls) {
 81 | 			callCount += newCalls;
 82 | 		}
 83 | 
 84 | 		bool readFromFile(FILE* input) {
 85 | 			uint32_t recordLen = 0;
 86 | 			uint32_t actual_read = fread(&recordLen, sizeof(recordLen), 1, input);
 87 | 	                if(actual_read != 1) return false;
 88 | 
 89 | 			char* entry = (char*) malloc(recordLen);
 90 |                         fread(entry, recordLen, 1, input);
 91 | 
 92 | 			uint32_t nextIndex = 0;
 93 | 			uint32_t kernelNameLength;
 94 | 			copy((char*) &kernelNameLength, &entry[nextIndex], sizeof(kernelNameLength));
 95 | 			nextIndex += sizeof(kernelNameLength);
 96 | 
 97 | 			if(strlen(kernelName) > 0) {
 98 | 				free(kernelName);
 99 | 			}
100 | 
101 | 			kernelName = (char*) malloc( sizeof(char) * (kernelNameLength + 1));
102 | 			copy(kernelName, &entry[nextIndex], kernelNameLength);
103 | 			kernelName[kernelNameLength] = '\0';
104 | #if defined(HAVE_GCC_ABI_DEMANGLE)
105 | 			{
106 | 				int status = -1;
107 | 				char* demangledKernelName = abi::__cxa_demangle(kernelName, NULL, NULL, &status);
108 | 				if (status==0) {
109 | 					free(kernelName);
110 | 					kernelName = demangledKernelName;
111 | 				}
112 | 			}
113 | #endif // HAVE_GCC_ABI_DEMANGLE
114 | 			nextIndex += kernelNameLength;
115 | 
116 | 			copy((char*) &callCount, &entry[nextIndex], sizeof(callCount));
117 | 			nextIndex += sizeof(callCount);
118 | 
119 | 			copy((char*) &time, &entry[nextIndex], sizeof(time));
120 | 			nextIndex += sizeof(time);
121 | 
122 | 			copy((char*) &timeSq, &entry[nextIndex], sizeof(timeSq));
123 | 			nextIndex += sizeof(timeSq);
124 | 
125 | 			uint32_t kernelT = 0;
126 | 			copy((char*) &kernelT, &entry[nextIndex], sizeof(kernelT));
127 | 			nextIndex += sizeof(kernelT);
128 | 
129 | 			if(kernelT == 0) {
130 | 				kType = PARALLEL_FOR;
131 | 			} else if(kernelT == 1) {
132 | 				kType = PARALLEL_REDUCE;
133 | 			} else if(kernelT == 2) {
134 | 				kType = PARALLEL_SCAN;
135 | 			} else if(kernelT == 3) {
136 |         kType = REGION;
137 |       }
138 | 
139 | 			free(entry);
140 |                         return true;
141 | 		}
142 | 
143 | 		void writeToFile(FILE* output) {
144 | 			const uint32_t kernelNameLen = (uint32_t) strlen(kernelName);
145 | 
146 | 			const uint32_t recordLen =
147 | 				sizeof(uint32_t) +
148 | 				sizeof(char) * kernelNameLen +
149 | 				sizeof(uint64_t) +
150 | 				sizeof(double) +
151 | 				sizeof(double) +
152 | 				sizeof(uint32_t);
153 | 
154 | 			uint32_t nextIndex = 0;
155 | 			char* entry = (char*) malloc(recordLen);
156 | 
157 | 			copy(&entry[nextIndex], (char*) &kernelNameLen, sizeof(kernelNameLen));
158 | 			nextIndex += sizeof(kernelNameLen);
159 | 
160 | 			copy(&entry[nextIndex], kernelName, kernelNameLen);
161 | 			nextIndex += kernelNameLen;
162 | 
163 | 			copy(&entry[nextIndex], (char*) &callCount, sizeof(callCount));
164 | 			nextIndex += sizeof(callCount);
165 | 
166 | 			copy(&entry[nextIndex], (char*) &time, sizeof(time));
167 | 			nextIndex += sizeof(time);
168 | 
169 | 			copy(&entry[nextIndex], (char*) &timeSq, sizeof(timeSq));
170 | 			nextIndex += sizeof(timeSq);
171 | 
172 | 			uint32_t kernelTypeOutput = (uint32_t) kType;
173 | 			copy(&entry[nextIndex], (char*) &kernelTypeOutput, sizeof(kernelTypeOutput));
174 | 			nextIndex += sizeof(kernelTypeOutput);
175 | 
176 | 			fwrite(&recordLen, sizeof(uint32_t), 1, output);
177 | 			fwrite(entry, recordLen, 1, output);
178 | 			free(entry);
179 | 		}
180 | 
181 | 	private:
182 | 		void copy(char* dest, const char* src, uint32_t len) {
183 | 			for(uint32_t i = 0; i < len; i++) {
184 | 				dest[i] = src[i];
185 | 			}
186 | 		}
187 | 
188 | 		char* kernelName;
189 | 		uint64_t callCount;
190 | 		double time;
191 | 		double timeSq;
192 | 		double startTime;
193 | 		KernelExecutionType kType;
194 | };
195 | 
196 | #endif
197 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/kokkos-tools/kp_kernel_timer.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <inttypes.h>
  4 | #include <execinfo.h>
  5 | #include <cstdlib>
  6 | #include <cstring>
  7 | #include <map>
  8 | #include <vector>
  9 | #include <algorithm>
 10 | #include <string>
 11 | #include <sys/time.h>
 12 | #include <cxxabi.h>
 13 | #include <unistd.h>
 14 | #include "kp_kernel_info.h"
 15 | 
 16 | bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) {
 17 | 	return left->getTime() > right->getTime();
 18 | };
 19 | 
 20 | static uint64_t uniqID = 0;
 21 | static KernelPerformanceInfo* currentEntry;
 22 | static std::map<std::string, KernelPerformanceInfo*> count_map;
 23 | static double initTime;
 24 | static char* outputDelimiter;
 25 | static int current_region_level = 0;
 26 | static KernelPerformanceInfo* regions[512];
 27 | 
 28 | #define MAX_STACK_SIZE 128
 29 | 
 30 | void increment_counter(const char* name, KernelExecutionType kType) {
 31 | 	std::string nameStr(name);
 32 | 
 33 | 	if(count_map.find(name) == count_map.end()) {
 34 | 		KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType);
 35 | 		count_map.insert(std::pair<std::string, KernelPerformanceInfo*>(nameStr, info));
 36 | 
 37 | 		currentEntry = info;
 38 | 	} else {
 39 | 		currentEntry = count_map[nameStr];
 40 | 	}
 41 | 
 42 | 	currentEntry->startTimer();
 43 | }
 44 | 
 45 | void increment_counter_region(const char* name, KernelExecutionType kType) {
 46 |         std::string nameStr(name);
 47 | 
 48 |         if(count_map.find(name) == count_map.end()) {
 49 |                 KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType);
 50 |                 count_map.insert(std::pair<std::string, KernelPerformanceInfo*>(nameStr, info));
 51 | 
 52 |                 regions[current_region_level] = info;
 53 |         } else {
 54 |                 regions[current_region_level] = count_map[nameStr];
 55 |         }
 56 | 
 57 |         regions[current_region_level]->startTimer();
 58 |         current_region_level++;
 59 | }
 60 | 
 61 | extern "C" void kokkosp_init_library(const int loadSeq,
 62 | 	const uint64_t interfaceVer,
 63 | 	const uint32_t devInfoCount,
 64 | 	void* deviceInfo) {
 65 | 
 66 | 	const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM");
 67 | 	if(NULL == output_delim_env) {
 68 | 		outputDelimiter = (char*) malloc(sizeof(char) * 2);
 69 | 		sprintf(outputDelimiter, "%c", ' ');
 70 | 	} else {
 71 | 		outputDelimiter = (char*) malloc(sizeof(char) * (strlen(output_delim_env) + 1));
 72 | 		sprintf(outputDelimiter, "%s", output_delim_env);
 73 | 	}
 74 | 
 75 | 	printf("KokkosP: Example Library Initialized (sequence is %d, version: %llu)\n", loadSeq, interfaceVer);
 76 | 
 77 | 	initTime = seconds();
 78 | }
 79 | 
 80 | extern "C" void kokkosp_finalize_library() {
 81 | 	double finishTime = seconds();
 82 | 	double kernelTimes = 0;
 83 | 
 84 | 	char* hostname = (char*) malloc(sizeof(char) * 256);
 85 | 	gethostname(hostname, 256);
 86 | 
 87 | 	char* fileOutput = (char*) malloc(sizeof(char) * 256);
 88 | 	sprintf(fileOutput, "%s-%d.dat", hostname, (int) getpid());
 89 | 
 90 | 	free(hostname);
 91 | 	FILE* output_data = fopen(fileOutput, "w");
 92 | 
 93 | 	const double totalExecuteTime = (finishTime - initTime);
 94 | 	fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data);
 95 | 
 96 | 	std::vector<KernelPerformanceInfo*> kernelList;
 97 | 
 98 | 	for(auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) {
 99 | 		kernel_itr->second->writeToFile(output_data);
100 | 	}
101 | 
102 | 	fclose(output_data);
103 | 
104 |   #define CWD_MAX 1024
105 |   char cwd[CWD_MAX];
106 |   getcwd(cwd, CWD_MAX);
107 |   printf("KokkosP: Kernel timing written to %s/%s \n", cwd, fileOutput);
108 | 
109 | 	/*printf("\n");
110 | 	printf("======================================================================\n");
111 | 	printf("KokkosP: Finalization of Profiling Library\n");
112 | 	printf("KokkosP: Executed a total of %llu kernels\n", uniqID);
113 | 
114 | 	std::vector<KernelPerformanceInfo*> kernelList;
115 | 
116 | 	for(auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) {
117 | 		kernelList.push_back(kernel_itr->second);
118 | 		kernelTimes += kernel_itr->second->getTime();
119 | 	}
120 | 
121 | 	std::sort(kernelList.begin(), kernelList.end(), compareKernelPerformanceInfo);
122 | 	const double totalExecuteTime = (finishTime - initTime);
123 | 
124 | 	if(0 == strcmp(outputDelimiter, " ")) {
125 | 		printf("KokkosP: %100s %14s %14s %6s %6s %14s %4s\n", "Kernel", "Calls",
126 | 			"s/Total", "\%/Ko", "\%/Tot", "s/Call", "Type");
127 | 	} else {
128 | 		printf("KokkosP: %s%s%s%s%s%s%s%s%s%s%s%s%s\n",
129 | 			"Kernel",
130 | 			outputDelimiter,
131 | 			"Calls",
132 | 			outputDelimiter,
133 | 			"s/Total",
134 | 			outputDelimiter,
135 | 			"\%/Ko",
136 | 			outputDelimiter,
137 | 			"\%/Tot",
138 | 			outputDelimiter,
139 | 			"s/Call",
140 | 			outputDelimiter,
141 | 			"Type");
142 | 	}
143 | 
144 | 	for(auto kernel_itr = kernelList.begin(); kernel_itr != kernelList.end(); kernel_itr++) {
145 | 		KernelPerformanceInfo* kernelInfo = *kernel_itr;
146 | 
147 | 		const uint64_t kCallCount = kernelInfo->getCallCount();
148 | 		const double   kTime      = kernelInfo->getTime();
149 | 		const double   kTimeMean  = kTime / (double) kCallCount;
150 | 
151 | 		const std::string& kName   = kernelInfo->getName();
152 | 		char* kType = const_cast<char*>("");
153 | 
154 | 		switch(kernelInfo->getKernelType()) {
155 | 		case PARALLEL_FOR:
156 | 			kType = const_cast<char*>("PFOR"); break;
157 | 		case PARALLEL_SCAN:
158 | 			kType = const_cast<char*>("SCAN"); break;
159 | 		case PARALLEL_REDUCE:
160 | 			kType = const_cast<char*>("RDCE"); break;
161 |                 case REGION
162 |                         kType = const_cast<char*>("REGI"); break;
163 | 		}
164 | 
165 | 		int demangleStatus;
166 | 		char* finalDemangle = abi::__cxa_demangle(kName.c_str(), 0, 0, &demangleStatus);
167 | 
168 | 		if(0 == strcmp(outputDelimiter, " ")) {
169 | 			printf("KokkosP: %s%s%14llu%s%14.5f%s%6.2f%s%6.2f%s%14.5f%s%4s\n",
170 | 				(0 == demangleStatus) ? finalDemangle : kName.c_str(),
171 | 				outputDelimiter,
172 | 				kCallCount,
173 | 				outputDelimiter,
174 | 				kTime,
175 | 				outputDelimiter,
176 | 				(kTime / kernelTimes) * 100.0,
177 | 				outputDelimiter,
178 | 				(kTime / totalExecuteTime) * 100.0,
179 | 				outputDelimiter,
180 | 				kTimeMean,
181 | 				outputDelimiter,
182 | 				kType
183 | 				);
184 | 		} else {
185 | 			printf("KokkosP: %s%s%llu%s%f%s%f%s%f%s%f%s%s\n",
186 | 				(0 == demangleStatus) ? finalDemangle : kName.c_str(),
187 | 				outputDelimiter,
188 | 				kCallCount,
189 | 				outputDelimiter,
190 | 				kTime,
191 | 				outputDelimiter,
192 | 				(kTime / kernelTimes) * 100.0,
193 | 				outputDelimiter,
194 | 				(kTime / totalExecuteTime) * 100.0,
195 | 				outputDelimiter,
196 | 				kTimeMean,
197 | 				outputDelimiter,
198 | 				kType
199 | 				);
200 | 		}
201 | 	}
202 | 
203 | 	printf("\n");
204 | 	printf("KokkosP: Total Execution Time:        %15.6f seconds.\n", totalExecuteTime);
205 | 	printf("KokkosP: Time in Kokkos Kernels:      %15.6f seconds.\n", kernelTimes);
206 | 	printf("KokkosP: Time spent outside Kokkos:   %15.6f seconds.\n", (totalExecuteTime - kernelTimes));
207 | 
208 | 	const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0;
209 | 	printf("KokkosP: Runtime in Kokkos Kernels:   %15.6f \%\n", percentKokkos);
210 | 	printf("KokkosP: Unique kernels:              %22llu \n", (uint64_t) count_map.size());
211 | 	printf("KokkosP: Parallel For Calls:          %22llu \n", uniqID);
212 | 
213 | 	printf("\n");
214 | 	printf("======================================================================\n");
215 | 	printf("\n");
216 | 
217 | 	if(NULL != outputDelimiter) {
218 | 		free(outputDelimiter);
219 | 	}*/
220 | 
221 | 
222 | }
223 | 
224 | extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) {
225 | 	*kID = uniqID++;
226 | 
227 | 	if( (NULL == name) || (strcmp("", name) == 0) ) {
228 | 		fprintf(stderr, "Error: kernel is empty\n");
229 | 		exit(-1);
230 | 	}
231 | 
232 | 	increment_counter(name, PARALLEL_FOR);
233 | }
234 | 
235 | extern "C" void kokkosp_end_parallel_for(const uint64_t kID) {
236 | 	currentEntry->addFromTimer();
237 | }
238 | 
239 | extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) {
240 | 	*kID = uniqID++;
241 | 
242 | 	if( (NULL == name) || (strcmp("", name) == 0) ) {
243 | 		fprintf(stderr, "Error: kernel is empty\n");
244 | 		exit(-1);
245 | 	}
246 | 
247 | 	increment_counter(name, PARALLEL_SCAN);
248 | }
249 | 
250 | extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) {
251 | 	currentEntry->addFromTimer();
252 | }
253 | 
254 | extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) {
255 | 	*kID = uniqID++;
256 | 
257 | 	if( (NULL == name) || (strcmp("", name) == 0) ) {
258 | 		fprintf(stderr, "Error: kernel is empty\n");
259 | 		exit(-1);
260 | 	}
261 | 
262 | 	increment_counter(name, PARALLEL_REDUCE);
263 | }
264 | 
265 | extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) {
266 | 	currentEntry->addFromTimer();
267 | }
268 | 
269 | extern "C" void kokkosp_push_profile_region(char* regionName) {
270 |         increment_counter_region(regionName, REGION);
271 | }
272 | 
273 | extern "C" void kokkosp_pop_profile_region() {
274 |         current_region_level--;
275 |         regions[current_region_level]->addFromTimer();
276 | }
277 | 
278 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/main.py:
--------------------------------------------------------------------------------
  1 | from git import Repo
  2 | import subprocess
  3 | import os, shutil
  4 | 
  5 | # I use this later to lazily generate an error with a message
  6 | class CustomError(Exception):
  7 |     pass
  8 | 
  9 | repo_path = "../../"
 10 | r = Repo(repo_path)
 11 | repo_heads = r.heads # or it's alias: r.branches
 12 | repo_heads_names = [h.name for h in repo_heads]
 13 | 
 14 | #kokkos_src = '/Users/bird/kokkos/'
 15 | #kokkos_install = '/Users/bird/kokkos/build/install'
 16 | #cabana_install = '/Users/bird/Cabana/build/build/install' # not a typo, it's in a dumb path
 17 | 
 18 | #platforms = ["Serial", "CPU", "GPU", "UVM"]
 19 | platforms = ["Serial", "CPU", "GPU"]
 20 | #platforms = ["CPU", "GPU"]
 21 | #platforms = ["GPU"]
 22 | #platforms = ["CPU"]
 23 | 
 24 | CXX = "g++"
 25 | #arch = 'Volta70'
 26 | arch = 'Kepler35'
 27 | 
 28 | subprocess.check_call(['./timing_lib.sh'])
 29 | 
 30 | this_build_dir = 'build'
 31 | 
 32 | kokkos_dirs = {}
 33 | cabana_dirs = {}
 34 | 
 35 | home_dir = os.environ['HOME']
 36 | 
 37 | # Build Dependencies
 38 | # TODO: make this configurable
 39 | kokkos_root = os.path.join(home_dir,'kokkos')
 40 | cabana_root = os.path.join(home_dir,'Cabana')
 41 | 
 42 | # Check we can find Kokkos and Cabana
 43 | if not os.path.isdir(kokkos_root):
 44 |     raise CustomError("Can't find kokkos")
 45 | if not os.path.isdir(cabana_root):
 46 |     raise CustomError("Can't find Cabana")
 47 | 
 48 | # Copy Kokkos and Cabana to be inside this dir
 49 | def copy_and_overwrite(from_path, to_path):
 50 |     if os.path.exists(to_path):
 51 |         shutil.rmtree(to_path)
 52 |     shutil.copytree(from_path, to_path)
 53 | 
 54 | def copy_if_safe(from_path, to_path):
 55 |     if not os.path.isdir(to_path):
 56 |         shutil.copytree(from_path, to_path)
 57 | 
 58 | # only copy if they don't exist already
 59 | kokkos_new = os.path.join(this_build_dir,'kokkos')
 60 | copy_if_safe(kokkos_root, kokkos_new)
 61 | 
 62 | cabana_new = os.path.join(this_build_dir,'cabana')
 63 | copy_if_safe(cabana_root, cabana_new)
 64 | 
 65 | # Build Dependencies
 66 | for plat in platforms:
 67 |     install_dir = "build-" + plat
 68 | 
 69 |     # Do Build
 70 |     print("build_kokkos.sh " + CXX + " " + kokkos_new + " " + install_dir + " " + plat + " " + arch)
 71 |     subprocess.check_call(['./build_kokkos.sh', CXX, kokkos_new, install_dir, plat, arch])
 72 | 
 73 |     print("./build_cabana.sh " + " " + CXX + " " + os.path.join(kokkos_new,install_dir,'install') + " " + cabana_new + " " + install_dir + " " + plat)
 74 |     subprocess.check_call(['./build_cabana.sh', CXX, os.path.join(kokkos_new,install_dir,'install'), cabana_new, install_dir, plat])
 75 | 
 76 |     # Save dirs, relative to root
 77 |     cabana_dirs[plat] = install_dir
 78 |     kokkos_dirs[plat] = install_dir
 79 | 
 80 | 
 81 | # Iterate over *local* git branches
 82 | for branch in repo_heads_names:
 83 |     print("Working on branch " + branch)
 84 |     for plat in platforms:
 85 | 
 86 |         print(plat)
 87 |         # TODO: throughout these scripts we assume ./instal is the install dir! abstract it.
 88 |         cabana_install = os.path.join( cabana_dirs[plat], 'install')
 89 |         kokkos_install = os.path.join( kokkos_dirs[plat], 'install')
 90 | 
 91 |         # For each repo, check it out into a new folder and build it
 92 |         #clone_path = './' + branch
 93 |         clone_path = os.path.join('./', this_build_dir, branch)
 94 | 
 95 |         print("!!!! WORKING ON " + clone_path)
 96 | 
 97 |         # look to see if the folder already exists:
 98 |         if not os.path.isdir(clone_path):
 99 |             # if it does... delete it (!)
100 |             #print("Deleting " + clone_path)
101 |             # We need to delete where it will build only one platforms worth,
102 |             # or hoist the clone
103 |             #shutil.rmtree(clone_path + build??)
104 | 
105 |             # OR if it does... skip
106 |             #continue
107 | 
108 |             # clone it
109 |             cloned = Repo.clone_from(
110 |                 repo_path,
111 |                 clone_path,
112 |                 branch=branch
113 |             )
114 | 
115 |         pwd = os.getcwd()
116 | 
117 |         kokkos_full_path = os.path.join(pwd, kokkos_new, kokkos_install)
118 |         cabana_full_path = os.path.join(pwd, cabana_new, cabana_install)
119 |         print("kk full path " + kokkos_full_path)
120 | 
121 |         print("./build_and_run.sh " +  clone_path + " g++ " + kokkos_full_path + " " + cabana_full_path + " " + plat)
122 |         subprocess.check_call(['./build_and_run.sh', clone_path, "g++", kokkos_full_path, cabana_full_path, plat])
123 | 
124 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | gitpython
2 | 


--------------------------------------------------------------------------------
/scripts/run_scripts/timing_lib.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd kokkos-tools
3 | make
4 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | file(GLOB HEADERS "*.h")
 2 | file(GLOB SOURCES "*.cpp")
 3 | 
 4 | install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 5 | 
 6 | add_library(CabanaPIC ${SOURCES})
 7 | 
 8 | target_include_directories(CabanaPIC PUBLIC
 9 |   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
10 |   $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
11 |   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
12 | 
13 | target_link_libraries(CabanaPIC PUBLIC Cabana::cabanacore )
14 | 
15 | install(TARGETS CabanaPIC DESTINATION lib)
16 | 


--------------------------------------------------------------------------------
/src/accumulator.cpp:
--------------------------------------------------------------------------------
  1 | // TODO: add namespace?
  2 | 
  3 | #include "accumulator.h"
  4 | 
  5 | void clear_accumulator_array(
  6 |         field_array_t& fields,
  7 |         accumulator_array_t& accumulators,
  8 |         size_t, // TODO: we can probably pull these out of global params..
  9 |         size_t,
 10 |         size_t
 11 | )
 12 | {
 13 |     auto _clean_accumulator = KOKKOS_LAMBDA(const int i)
 14 |     {
 15 |         /*
 16 |            a0(i,JX_OFFSET+0) = 0;
 17 |            a0(i+y_offset,JX_OFFSET+1) = 0;
 18 |            a0(i+z_offset,JX_OFFSET+2) = 0;
 19 |            a0(i+y_offset+z_offset,JX_OFFSET+3) = 0;
 20 | 
 21 |            a0(i,JY_OFFSET+0) = 0;
 22 |            a0(i+z_offset,JY_OFFSET+1) = 0;
 23 |            a0(i+y_offset,JY_OFFSET+2) = 0;
 24 |            a0(i+y_offset+z_offset,JY_OFFSET+3) = 0;
 25 | 
 26 |            a0(i,JZ_OFFSET+0) = 0;
 27 |            a0(i+x_offset,JZ_OFFSET+1) = 0;
 28 |            a0(i+y_offset,JZ_OFFSET+2) = 0;
 29 |            a0(i+x_offset+y_offset,JZ_OFFSET+3) = 0;
 30 |          */
 31 | 
 32 |       for (int j = 0; j < ACCUMULATOR_VAR_COUNT; j++)
 33 |       {
 34 |           for (int k = 0; k < ACCUMULATOR_ARRAY_LENGTH; k++)
 35 |           {
 36 |               accumulators(i, j, k) = 0.0;
 37 |           }
 38 |       }
 39 |     };
 40 | 
 41 |     Kokkos::RangePolicy<ExecutionSpace> exec_policy( 0, fields.size() );
 42 |     Kokkos::parallel_for( "clean_accumulator()", exec_policy, _clean_accumulator );
 43 | }
 44 | 
 45 | void unload_accumulator_array(
 46 |         field_array_t& fields,
 47 |         accumulator_array_t& accumulators,
 48 |         size_t nx, // TODO: we can probably pull these out of global params..
 49 |         size_t ny,
 50 |         size_t nz,
 51 |         size_t ng,
 52 |         real_t dx,
 53 |         real_t dy,
 54 |         real_t dz,
 55 |         real_t dt
 56 | )
 57 | {
 58 | 
 59 |     auto jfx = Cabana::slice<FIELD_JFX>(fields);
 60 |     auto jfy = Cabana::slice<FIELD_JFY>(fields);
 61 |     auto jfz = Cabana::slice<FIELD_JFZ>(fields);
 62 | 
 63 |     // TODO: give these real values
 64 |     //    printf("cx %e dy %e dz %e dt %e \n", dy, dz, dt);
 65 |     //real_t cx = 0.25 * (1.0 / (dy * dz)) / dt;
 66 |     real_t cx = 0.25 / (dy * dz * dt);
 67 |     real_t cy = 0.25 / (dz * dx * dt);
 68 |     real_t cz = 0.25 / (dx * dy * dt);
 69 | 
 70 |     // TODO: we have to be careful we don't reach past the ghosts here
 71 |     auto _unload_accumulator = KOKKOS_LAMBDA( const int x, const int y, const int z )
 72 |     {
 73 |         // Original:
 74 |         // f0->jfx += cx*( a0->jx[0] + ay->jx[1] + az->jx[2] + ayz->jx[3] );
 75 |         int i = VOXEL(x,y,z, nx,ny,nz,ng);
 76 | 
 77 |         // TODO: this level of re-calculation is overkill
 78 |         size_t x_down  = VOXEL(x-1, y,   z,   nx,ny,nz,ng);
 79 |         size_t y_down  = VOXEL(x,   y-1, z,   nx,ny,nz,ng);
 80 |         size_t z_down  = VOXEL(x,   y,   z-1, nx,ny,nz,ng);
 81 | 
 82 |         size_t xz_down = VOXEL(x-1, y,   z-1, nx,ny,nz,ng);
 83 |         size_t xy_down = VOXEL(x-1, y-1, z,   nx,ny,nz,ng);
 84 |         size_t yz_down = VOXEL(x,   y-1, z-1, nx,ny,nz,ng);
 85 | 
 86 |         jfx(i) = cx*(
 87 |                     accumulators(i,       accumulator_var::jx, 0) +
 88 |                     accumulators(y_down,  accumulator_var::jx, 1) +
 89 |                     accumulators(z_down,  accumulator_var::jx, 2) +
 90 |                     accumulators(yz_down, accumulator_var::jx, 3)
 91 |                 );
 92 | 
 93 |         jfy(i) = cy*(
 94 |                     accumulators(i,       accumulator_var::jy, 0) +
 95 |                     accumulators(z_down,  accumulator_var::jy, 1) +
 96 |                     accumulators(x_down,  accumulator_var::jy, 2) +
 97 |                     accumulators(xz_down, accumulator_var::jy, 3)
 98 |                 );
 99 | 
100 |         jfz(i) = cz*(
101 |                     accumulators(i,       accumulator_var::jz, 0) +
102 |                     accumulators(x_down,  accumulator_var::jz, 1) +
103 |                     accumulators(y_down,  accumulator_var::jz, 2) +
104 |                     accumulators(xy_down, accumulator_var::jz, 3)
105 |                 );
106 |     };
107 | 
108 |     //may not be enough if particles run into ghost cells
109 |     Kokkos::MDRangePolicy< Kokkos::Rank<3> > non_ghost_policy( {ng,ng,ng}, {nx+ng+1, ny+ng+1, nz+ng+1} ); // Try not to into ghosts // TODO: dry this
110 |     Kokkos::parallel_for( "unload_accumulator()", non_ghost_policy, _unload_accumulator );
111 | 
112 |     /* // Crib sheet for old variable names
113 |     a0  = &a(x,  y,  z  );
114 |     ax  = &a(x-1,y,  z  );
115 |     ay  = &a(x,  y-1,z  );
116 |     az  = &a(x,  y,  z-1);
117 |     ayz = &a(x,  y-1,z-1);
118 |     azx = &a(x-1,y,  z-1);
119 |     axy = &a(x-1,y-1,z  )
120 |     */
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/src/accumulator.h:
--------------------------------------------------------------------------------
 1 | #ifndef ACCUMULATOR_T
 2 | #define ACCUMULATOR_T
 3 | 
 4 | #include <cstdint>
 5 | #include <cstddef>
 6 | 
 7 | #include <Cabana_Types.hpp>
 8 | #include <Cabana_AoSoA.hpp>
 9 | #include <Cabana_Slice.hpp>
10 | 
11 | #include "types.h"
12 | #include "grid.h"
13 | #include "fields.h"
14 | 
15 | void clear_accumulator_array(
16 |         field_array_t& fields,
17 |         accumulator_array_t& accumulators,
18 |         size_t nx, // TODO: we can probably pull these out of global params..
19 |         size_t ny,
20 |         size_t nz
21 | );
22 | 
23 | void unload_accumulator_array(
24 |         field_array_t& fields,
25 |         accumulator_array_t& accumulators,
26 |         size_t nx, // TODO: we can probably pull these out of global params..
27 |         size_t ny,
28 |         size_t nz,
29 |         size_t ng,
30 |         real_t dx,
31 |         real_t dy,
32 |         real_t dz,
33 |         real_t dt
34 | );
35 | 
36 | #endif // header guard
37 | 


--------------------------------------------------------------------------------
/src/grid.h:
--------------------------------------------------------------------------------
  1 | #ifndef GRID_T
  2 | #define GRID_T
  3 | 
  4 | enum grid_enums {
  5 | 
  6 |   // Phase 2 boundary conditions
  7 |   anti_symmetric_fields = -1, // E_tang = 0
  8 |   pec_fields            = -1,
  9 |   metal_fields          = -1,
 10 |   symmetric_fields      = -2, // B_tang = 0, B_norm = 0
 11 |   pmc_fields            = -3, // B_tang = 0, B_norm floats
 12 |   absorb_fields         = -4, // Gamma = 0
 13 | 
 14 |   // Phase 3 boundary conditions
 15 |   reflect_particles = -1, // Cell boundary should reflect particles
 16 |   absorb_particles  = -2  // Cell boundary should absorb particles
 17 | 
 18 |   // Symmetry in the field boundary conditions refers to image charge
 19 |   // sign
 20 |   //
 21 |   // Anti-symmetric -> Image charges are opposite signed (ideal metal)
 22 |   //                   Boundary rho/j are accumulated over partial voxel+image
 23 |   // Symmetric      -> Image charges are same signed (symmetry plane or pmc)
 24 |   //                   Boundary rho/j are accumulated over partial voxel+image
 25 |   // Absorbing      -> No image charges
 26 |   //                   Boundary rho/j are accumulated over partial voxel only
 27 |   //
 28 |   // rho     -> Anti-symmetric      | rho     -> Symmetric
 29 |   // jf_tang -> Anti-symmetric      | jf_tang -> Symmetric
 30 |   // E_tang  -> Anti-symmetric      | E_tang  -> Symmetric
 31 |   // B_norm  -> Anti-symmetric + DC | B_norm  -> Symmetric      (see note)
 32 |   // B_tang  -> Symmetric           | B_tang  -> Anti-symmetric
 33 |   // E_norm  -> Symmetric           | E_norm  -> Anti-symmetric (see note)
 34 |   // div B   -> Symmetric           | div B   -> Anti-symmetric
 35 |   // 
 36 |   // Note: B_norm is tricky. For a symmetry plane, B_norm on the
 37 |   // boundary must be zero as there are no magnetic charges (a
 38 |   // non-zero B_norm would imply an infinitesimal layer of magnetic
 39 |   // charge). However, if a symmetric boundary is interpreted as a
 40 |   // perfect magnetic conductor, B_norm could be present due to
 41 |   // magnetic conduction surface charges. Even though there are no
 42 |   // bulk volumetric magnetic charges to induce a surface magnetic
 43 |   // charge, I think that radiation/waveguide modes/etc could (the
 44 |   // total surface magnetic charge in the simulation would be zero
 45 |   // though). As a result, symmetric and pmc boundary conditions are
 46 |   // treated separately. Symmetric and pmc boundaries are identical
 47 |   // except the symmetric boundaries explicitly zero boundary
 48 |   // B_norm. Note: anti-symmetric and pec boundary conditions would
 49 |   // have the same issue if norm E was located directly on the
 50 |   // boundary. However, it is not so this problem does not arise.
 51 |   //
 52 |   // Note: Absorbing boundary conditions make no effort to clean
 53 |   // divergence errors on them. They assume that the ghost div b is
 54 |   // zero and force the surface div e on them to be zero. This means
 55 |   // ghost norm e can be set to any value on absorbing boundaries.
 56 | 
 57 | };
 58 | 
 59 | typedef struct grid {
 60 | 
 61 |   // System of units
 62 |   real_t dt, cvac, eps0;
 63 | 
 64 |   // Time stepper.  The simulation time is given by
 65 |   // t = g->t0 + (double)g->dt*(double)g->step
 66 |   int64_t step;             // Current timestep
 67 |   double t0;                // Simulation time corresponding to step 0
 68 | 
 69 |   // Phase 2 grid data structures 
 70 |   real_t x0, y0, z0;         // Min corner local domain (must be coherent)
 71 |   real_t x1, y1, z1;         // Max corner local domain (must be coherent)
 72 |   int   nx, ny, nz;         // Local voxel mesh resolution.  Voxels are
 73 |                             // indexed FORTRAN style 0:nx+1,0:ny+1,0:nz+1
 74 |                             // with voxels 1:nx,1:ny,1:nz being non-ghost
 75 |                             // voxels.
 76 |   real_t dx, dy, dz, dV;     // Cell dimensions and volume (CONVENIENCE ...
 77 |                             // USE x0,x1 WHEN DECIDING WHICH NODE TO USE!)
 78 |   real_t rdx, rdy, rdz, r8V; // Inverse voxel dimensions and one over
 79 |                             // eight times the voxel volume (CONVENIENCE)
 80 |   int   sx, sy, sz, nv;     // Voxel indexing x-, y-,z- strides and the
 81 |                             // number of local voxels (including ghosts,
 82 |                             // (nx+2)(ny+2)(nz+2)), (CONVENIENCE)
 83 |   int   bc[27];             // (-1:1,-1:1,-1:1) FORTRAN indexed array of
 84 |                             // boundary conditions to apply at domain edge
 85 |                             // 0 ... nproc-1 ... comm boundary condition
 86 |                             // <0 ... locally applied boundary condition
 87 | 
 88 |   // Phase 3 grid data structures
 89 |   // NOTE: VOXEL INDEXING LIMITS NUMBER OF VOXELS TO 2^31 (INCLUDING
 90 |   // GHOSTS) PER NODE.  NEIGHBOR INDEXING FURTHER LIMITS TO
 91 |   // (2^31)/6.  BOUNDARY CONDITION HANDLING LIMITS TO 2^28 PER NODE
 92 |   // EMITTER COMPONENT ID INDEXING FURTHER LIMITS TO 2^26 PER NODE.
 93 |   // THE LIMIT IS 2^63 OVER ALL NODES THOUGH.
 94 |   int64_t* range;
 95 |                           // (0:nproc) indexed array giving range of
 96 |                           // global indexes of voxel owned by each
 97 |                           // processor.  Replicated on each processor.
 98 |                           // (range[rank]:range[rank+1]-1) are global
 99 |                           // voxels owned by processor "rank".  Note:
100 |                           // range[rank+1]-range[rank] <~ 2^31 / 6
101 | 
102 |   int64_t* neighbor;
103 |                           // (0:5,0:local_num_voxel-1) FORTRAN indexed
104 |                           // array neighbor(0:5,lidx) are the global
105 |                           // indexes of neighboring voxels of the
106 |                           // voxel with local index "lidx".  Negative
107 |                           // if neighbor is a boundary condition.
108 | 
109 |   int64_t rangel, rangeh; // Redundant for move_p performance reasons:
110 |                           //   rangel = range[rank]
111 |                           //   rangeh = range[rank+1]-1.
112 |                           // Note: rangeh-rangel <~ 2^26
113 | 
114 |   // Nearest neighbor communications ports
115 |   //mp_t * mp;
116 | 
117 | } grid_t;
118 | 
119 | #endif // header guard
120 | 


--------------------------------------------------------------------------------
/src/helpers.h:
--------------------------------------------------------------------------------
  1 | #ifndef pic_helper_h
  2 | #define pic_helper_h
  3 | 
  4 | #include "logger.h"
  5 | #include "Cabana_ExecutionPolicy.hpp" // SIMDpolicy
  6 | #include "Cabana_Parallel.hpp" // Simd parallel for
  7 | #include "Cabana_DeepCopy.hpp" // Cabana::deep_copy
  8 | 
  9 | #include "input/deck.h"
 10 | 
 11 | // Converts from an index that doesn't know about ghosts to one that does
 12 | //KOKKOS_INLINE_FUNCTION
 13 | int allow_for_ghosts(int pre_ghost)
 14 | {
 15 | 
 16 |     size_t ix, iy, iz;
 17 |     RANK_TO_INDEX(pre_ghost, ix, iy, iz,
 18 |             deck.nx,
 19 |             deck.ny);
 20 |     //    printf("%ld\n",ix);
 21 |     int with_ghost = VOXEL(ix, iy, iz,
 22 |             deck.nx,
 23 |             deck.ny,
 24 |             deck.nz,
 25 |             deck.num_ghosts);
 26 | 
 27 |     return with_ghost;
 28 | }
 29 | 
 30 | // Function to print out the data for every particle.
 31 | void dump_particles( FILE * fp, const particle_list_t d_particles, const real_t xmin, const real_t, const real_t, const real_t dx, const real_t, const real_t, size_t nx,size_t ny,size_t, size_t ng)
 32 | {
 33 | 
 34 |     // Host
 35 |     particle_list_t::host_mirror_type particles("host_particles", d_particles.size());
 36 | 
 37 |     // Copy device particles to host
 38 |     Cabana::deep_copy(particles, d_particles);
 39 | 
 40 |     auto position_x = Cabana::slice<PositionX>(particles);
 41 |     auto position_y = Cabana::slice<PositionY>(particles);
 42 |     auto position_z = Cabana::slice<PositionZ>(particles);
 43 | 
 44 |     auto velocity_x = Cabana::slice<VelocityX>(particles);
 45 |     auto velocity_y = Cabana::slice<VelocityY>(particles);
 46 |     auto velocity_z = Cabana::slice<VelocityZ>(particles);
 47 | 
 48 |     auto weight = Cabana::slice<Weight>(particles);
 49 |     auto cell = Cabana::slice<Cell_Index>(particles);
 50 | 
 51 |     for (size_t i = 0; i < particles.size(); i++)
 52 |     {
 53 |         size_t ix,iy,iz;
 54 |         int ii = cell(i);
 55 | 
 56 |         RANK_TO_INDEX(ii, ix,iy,iz,nx+2*ng,ny+2*ng);
 57 | 
 58 |         real_t x = xmin + ( ix - 1 + ( position_x(i)+ 1.0 ) * 0.5) * dx;
 59 |         real_t v = velocity_x( i );
 60 | 
 61 |         fprintf(fp, "%e  %e ", x,v);
 62 |     }
 63 |     fprintf(fp, "\n");
 64 |     /*
 65 |     auto _print =
 66 |         KOKKOS_LAMBDA( const int s, const int i )
 67 |         {
 68 |                 // printf("Struct id %d offset %d \n", s, i);
 69 |                 // printf("Position x %e y %e z %e \n", position_x.access(s,i), position_y.access(s,i), position_z.access(s,i) );
 70 | 	  size_t ix,iy,iz;
 71 | 	  int ii = cell.access(s, i);
 72 | 	  RANK_TO_INDEX(ii, ix,iy,iz,nx+2*ng,ny+2*ng);
 73 | 	  real_t x = xmin + (ix-1+(position_x.access(s,i)+1.0)*0.5)*dx;
 74 | 	  real_t v = velocity_x.access(s,i);
 75 | 	  fprintf(fp, "%e  %e ", x,v);
 76 | 
 77 | 	  //	  real_t y = ymin + (iy-1+(position_y.access(s,i)+1.0)*0.5)*dy;
 78 | 	  //real_t z = zmin + (iz-1+(position_z.access(s,i)+1.0)*0.5)*dz;
 79 | 	  //	  fprintf(fp, "%e  %e  %e %d %d %d \n", x,y,z,ix,iy,iz);
 80 | 
 81 |         };
 82 | 
 83 |     // TODO: How much sense does printing in parallel make???
 84 |     Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
 85 |         vec_policy( 0, particles.size() );
 86 | 
 87 |     //logger << "particles.numSoA() " << particles.numSoA() << std::endl;
 88 |     //logger << "particles.numSoA() " << particles.numSoA() << std::endl;
 89 | 
 90 |     Cabana::simd_parallel_for( vec_policy, _print, "_print()" );
 91 |     */
 92 |     //    std::cout << std::endl;
 93 | 
 94 | }
 95 | 
 96 | void print_fields( const field_array_t& fields )
 97 | {
 98 |     auto ex = Cabana::slice<FIELD_EX>(fields);
 99 |     auto ey = Cabana::slice<FIELD_EY>(fields);
100 |     auto ez = Cabana::slice<FIELD_EZ>(fields);
101 | 
102 |     auto jfx = Cabana::slice<FIELD_JFX>(fields);
103 |     auto jfy = Cabana::slice<FIELD_JFY>(fields);
104 |     auto jfz = Cabana::slice<FIELD_JFZ>(fields);
105 | 
106 |     auto _print_fields =
107 |         KOKKOS_LAMBDA( const int i )
108 |         {
109 |             printf("%d e x %e y %e z %e jfx %e jfy %e jfz %e \n", i, ex(i), ey(i), ez(i), jfx(i), jfy(i), jfz(i) );
110 |         };
111 | 
112 |     Kokkos::RangePolicy<ExecutionSpace> exec_policy( 0, fields.size() );
113 |     Kokkos::parallel_for( "print()", exec_policy, _print_fields );
114 | 
115 |     std::cout << std::endl;
116 | 
117 | }
118 | 
119 | #endif // pic_helper_h
120 | 


--------------------------------------------------------------------------------
/src/input/deck.h:
--------------------------------------------------------------------------------
  1 | #ifndef INPUT_DECK_H
  2 | #define INPUT_DECK_H
  3 | 
  4 | #include <cstddef> // size_t
  5 | #include <iostream>
  6 | 
  7 | #include "types.h"
  8 | 
  9 | enum Boundary {
 10 |     Reflect = 0,
 11 |     Periodic
 12 | };
 13 | 
 14 | 
 15 | class Run_Finalizer {
 16 |     public:
 17 |         virtual void finalize()
 18 |         {
 19 |             // Default finalization is blank
 20 |         }
 21 | };
 22 | 
 23 | class Field_Initializer {
 24 | 
 25 |     public:
 26 |         using real_ = real_t;
 27 |         Field_Initializer() { } // blank
 28 | 
 29 |         virtual void init(
 30 |                 field_array_t& fields,
 31 |                 size_t,
 32 |                 size_t,
 33 |                 size_t,
 34 |                 size_t,
 35 |                 real_, // TODO: do we prefer xmin or Lx?
 36 |                 real_,
 37 |                 real_,
 38 |                 real_,
 39 |                 real_,
 40 |                 real_
 41 |         )
 42 |         {
 43 |             std::cout << "Default field init" << std::endl;
 44 | 
 45 |             // Zero fields
 46 |             auto ex = Cabana::slice<FIELD_EX>(fields);
 47 |             auto ey = Cabana::slice<FIELD_EY>(fields);
 48 |             auto ez = Cabana::slice<FIELD_EZ>(fields);
 49 | 
 50 |             auto cbx = Cabana::slice<FIELD_CBX>(fields);
 51 |             auto cby = Cabana::slice<FIELD_CBY>(fields);
 52 |             auto cbz = Cabana::slice<FIELD_CBZ>(fields);
 53 | 
 54 |             auto _init_fields =
 55 |                 KOKKOS_LAMBDA( const int i )
 56 |                 {
 57 |                     ex(i) = 0.0;
 58 |                     ey(i) = 0.0;
 59 |                     ez(i) = 0.0;
 60 |                     cbx(i) = 0.0;
 61 |                     cby(i) = 0.0;
 62 |                     cbz(i) = 0.0;
 63 |                 };
 64 | 
 65 |             Kokkos::parallel_for( "zero_fields()", fields.size(), _init_fields );
 66 | 
 67 |         }
 68 | };
 69 | 
 70 | // TODO: we can eventually provide a suite of default/sane initializers, such
 71 | // as ones that give the same RNG sequence over multiple procs
 72 | class Particle_Initializer {
 73 | 
 74 |     public:
 75 |         using real_ = real_t;
 76 | 
 77 |         Particle_Initializer() { } // blank
 78 | 
 79 |         virtual void init(
 80 |                 particle_list_t& particles,
 81 |                 size_t nx,
 82 |                 size_t ny,
 83 |                 size_t,
 84 |                 size_t,
 85 |                 real_ dxp,
 86 |                 size_t nppc,
 87 |                 real_ w,
 88 |                 real_ v0,
 89 |                 real_,
 90 |                 real_,
 91 |                 real_
 92 |                 )
 93 |         {
 94 |             // TODO: this doesnt currently do anything with nppc/num_cells
 95 |             std::cout << "Default particle init" << std::endl;
 96 | 
 97 |             auto position_x = Cabana::slice<PositionX>(particles);
 98 |             auto position_y = Cabana::slice<PositionY>(particles);
 99 |             auto position_z = Cabana::slice<PositionZ>(particles);
100 | 
101 |             auto velocity_x = Cabana::slice<VelocityX>(particles);
102 |             auto velocity_y = Cabana::slice<VelocityY>(particles);
103 |             auto velocity_z = Cabana::slice<VelocityZ>(particles);
104 | 
105 |             auto weight = Cabana::slice<Weight>(particles);
106 |             auto cell = Cabana::slice<Cell_Index>(particles);
107 | 
108 |             printf("dxp = %e \n", dxp);
109 |             printf("part list len = %ld \n", particles.size());
110 | 
111 |             auto _init =
112 |                 KOKKOS_LAMBDA( const int s, const int i )
113 |                 {
114 |                     // Initialize position.
115 |                     int sign =  -1;
116 |                     size_t pi2 = (s)*particle_list_t::vector_length+i;
117 |                     size_t pi = ((pi2) / 2);
118 |                     if (pi2%2 == 0) {
119 |                         sign = 1;
120 |                     }
121 |                     int pic = (2*pi)%nppc; //Every 2 particles have the same "pic".
122 | 
123 |                     real_ x = pic*dxp+0.5*dxp-1.0;
124 |                     int pre_ghost = (2*pi/nppc); //pre_gohost ranges [0,nx*ny*nz).
125 | 
126 |                     //int ix,iy,iz;
127 |                     //RANK_TO_INDEX(pre_ghost, ix, iy, iz, nx, ny);
128 |                     //ix += ng;
129 |                     //iy += ng;
130 |                     //iz += ng;
131 | 
132 |                     position_x.access(s,i) = 0.0;
133 |                     position_y.access(s,i) = x;
134 |                     position_z.access(s,i) = 0.0;
135 | 
136 |                     weight.access(s,i) = w;
137 | 
138 |                     //cell.access(s,i) = VOXEL(ix,iy,iz,nx,ny,nz,ng);
139 |                     cell.access(s,i) = pre_ghost*(nx+2) + (nx+2)*(ny+2) + (nx+2) + 1;
140 | 
141 |                     // Initialize velocity.(each cell length is 2)
142 |                     real_ gam = 1.0/sqrt(1.0-v0*v0);
143 | 
144 |                     real_t na = 0.0001*sin(2.0*3.1415926*((x+1.0+pre_ghost*2)/(2*ny)));
145 | 
146 |                     //velocity_x.access(s,i) = sign * v0*gam; // *(1.0-na*sign); //0;
147 |                     velocity_x.access(s,i) = sign *v0*gam*(1.0+na*sign);
148 |                     velocity_y.access(s,i) = 0;
149 |                     velocity_z.access(s,i) = 0; //na*sign;  //sign * v0 *gam*(1.0+na*sign);
150 |                     //velocity_z.access(s,i) = 1e-7*sign;
151 | 
152 |                     //printf("%d %d %d pre-g %d putting particle at y=%e with ux = %e pi = %d \n", pic, s, i, pre_ghost, position_y.access(s,i), velocity_x.access(s,i), cell.access(s,i) );
153 |                 };
154 | 
155 |             Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
156 |                 vec_policy( 0, particles.size() );
157 |             Cabana::simd_parallel_for( vec_policy, _init, "init()" );
158 |         }
159 | };
160 | 
161 | class _Input_Deck {
162 |     public:
163 |         // Having this separate lets us initialize us double if required
164 |         using real_ = real_t;
165 | 
166 |         // I would prefer that this wasn't a pointer, but it seems to be
167 |         // necessary. We need it to be able to do a vtable lookup for the init
168 |         // function call, which means we need a ref or a pointer. The ref has
169 |         // to be initialized which means the initialization of Particle_Initializer
170 |         // would leak to the init site of _Input_Deck. A normal (non ref, non
171 |         // pointer) variable would avoid the vtable lookup and always call the
172 |         // "default". Perhaps there is a better way?
173 |         // The original default pointer is pretty likely to leak in custom
174 |         // decks... at least it does't contain a lot of state
175 |         Particle_Initializer* particle_initer;
176 | 
177 |         Field_Initializer* field_initer;
178 | 
179 |         // Give the user a chance to hook into the end of the run, to do final
180 |         // things like correctness checks and timing dumping
181 |         Run_Finalizer* run_finalizer;
182 | 
183 |         _Input_Deck() :
184 |             particle_initer(new Particle_Initializer),
185 |             field_initer(new Field_Initializer),
186 |             run_finalizer(new Run_Finalizer)
187 |         {
188 |             // empty
189 |         }
190 | 
191 |         static real_ courant_length( real_ lx, real_ ly, real_ lz,
192 |                 size_t nx, size_t ny, size_t nz ) {
193 |             real_ w0, w1 = 0;
194 |             if( nx>1 ) w0 = nx/lx, w1 += w0*w0;
195 |             if( ny>1 ) w0 = ny/ly, w1 += w0*w0;
196 |             if( nz>1 ) w0 = nz/lz, w1 += w0*w0;
197 |             return sqrt(1/w1);
198 |         }
199 | 
200 |         // We could do this in the destructor, but this has 2 advantages:
201 |         // 1) It's more explicit
202 |         // 2) We have finer grained control, so we can more easily ensure it
203 |         // happens before valuable data is freed
204 |         void finalize()
205 |         {
206 |             run_finalizer->finalize();
207 |         }
208 | 
209 |         void initialize_particles(
210 |                 particle_list_t& particles,
211 |                 size_t nx,
212 |                 size_t ny,
213 |                 size_t nz,
214 |                 size_t ng,
215 |                 real_ dxp,
216 |                 size_t nppc,
217 |                 real_ w,
218 |                 real_ v0
219 |         )
220 |         {
221 |             particle_initer->init(particles, nx, ny, nz, ng, dxp, nppc, w, v0,
222 |                     len_x_global, len_y_global, len_z_global);
223 |         }
224 | 
225 |         void initialize_fields(
226 |                 field_array_t& fields,
227 |                 size_t nx,
228 |                 size_t ny,
229 |                 size_t nz,
230 |                 size_t ng,
231 |                 real_ Lx, // TODO: do we prefer xmin or Lx?
232 |                 real_ Ly,
233 |                 real_ Lz,
234 |                 real_ dx,
235 |                 real_ dy,
236 |                 real_ dz
237 |         )
238 |         {
239 |             field_initer->init(
240 |                     fields,
241 |                     nx,
242 |                     ny,
243 |                     nz,
244 |                     ng,
245 |                     Lx,
246 |                     Ly,
247 |                     Lz,
248 |                     dx,
249 |                     dy,
250 |                     dz
251 |             );
252 |         }
253 | 
254 |         real_ de = 1.0; // Length normalization (electron inertial length)
255 |         real_ ec = 1.0; // Charge normalization
256 |         real_ me = 1.0; // Mass normalization
257 |         real_ mu = 1.0; // permeability of free space
258 |         real_ c = 1.0; // Speed of light
259 |         real_ eps = 1.0; // permittivity of free space
260 | 
261 |         real_ qsp = -ec;
262 | 
263 |         // Params
264 |         real_ n0 = 1.0; // Background plasma density
265 |         size_t num_species = 1;
266 |         size_t nx = 16; // TODO: why is nx a size_t not an int?
267 |         size_t ny = 1;
268 |         size_t nz = 1;
269 | 
270 |         size_t num_ghosts = 1;
271 |         size_t nppc = 1;
272 |         real_ dt = 1.0;
273 |         int num_steps = 2;
274 | 
275 |         // Assume domain starts at [0,0,0] and goes to [len,len,len]
276 |         real_ len_x_global = 1.0;
277 |         real_ len_y_global = 1.0;
278 |         real_ len_z_global = 1.0;
279 | 
280 |         real_t Npe = -1;
281 |         real_t Ne = -1; //(nppc*nx*ny*nz);
282 | 
283 |         //real_ local_x_min;
284 |         //real_ local_y_min;
285 |         //real_ local_z_min;
286 |         //real_ local_x_max;
287 |         //real_ local_y_max;
288 |         //real_ local_z_max;
289 |         real_ v0 = 1.0; //drift velocity
290 | 
291 |         //size_t ghost_offset; // Where the cell id needs to start for a "real" cell, basically nx
292 |         //size_t num_real_cells;
293 | 
294 |         //Boundary BOUNDARY_TYPE = Boundary::Reflect;
295 |         Boundary BOUNDARY_TYPE = Boundary::Periodic;
296 | 
297 |         ////////////////////////// DERIVED /////////////////
298 |         // Don't set these, we can derive them instead
299 |         real_ dx;
300 |         real_ dy;
301 |         real_ dz;
302 | 
303 |         real_ len_x;
304 |         real_ len_y;
305 |         real_ len_z;
306 |         size_t num_cells; // This should *include* the ghost cells
307 |         long num_particles = -1;
308 | 
309 |         bool perform_uncenter = false;
310 | 
311 |         ////////////////////////////////////////////////////
312 | 
313 |         void print_run_details()
314 |         {
315 |             std::cout << "#~~~ Run Specifications ~~~ " << std::endl;
316 |             std::cout << "#Nx: " << nx << " Ny: " << ny << " Nz: " << nz << " Num Ghosts: " << num_ghosts << ". Cells Total: " << num_cells << std::endl;
317 |             std::cout << "#Len X: " << len_x << " Len Y: " << len_y << " Len Z: " << len_z << " number of ghosts: "<<num_ghosts << std::endl;
318 |             std::cout << "#Approx Particle Count: " << num_particles << " (nppc: " << nppc << ")" << std::endl;
319 |             std::cout << "#~~~~~~~~~~~~~~~~~~~~~~~~~~ " << std::endl;
320 |             std::cout << std::endl;
321 |         }
322 | 
323 |         void derive_params()
324 |         {
325 |             len_x = len_x_global;
326 |             len_y = len_y_global;
327 |             len_z = len_z_global;
328 | 
329 |             dx = len_x / nx;
330 |             dy = len_y / ny;
331 |             dz = len_z / nz;
332 | 
333 |             num_cells = (nx+(2*num_ghosts)) * (ny+(2*num_ghosts)) * (nz+(2*num_ghosts));
334 |             //num_real_cells = nx * ny * ny;
335 | 
336 |             // TODO: should we just warn the user for not setting this instead?
337 |             if (num_particles < 0)
338 |             {
339 |                 num_particles = nx * ny * nz * nppc; //can user define this?
340 |                 if (Ne < 0) {
341 |                     Ne = num_particles;
342 |                 }
343 |             }
344 |             if (Npe < 0)
345 |             {
346 |                 Npe = n0*len_x_global*len_y_global*len_z_global;
347 |             }
348 |         }
349 | 
350 |         // Function to intitialize the particles.
351 |         /*
352 |         void initialize_particles( particle_list_t particles,size_t nx,size_t ny,size_t nz, real_t dxp, size_t nppc, real_t w)
353 |         {
354 |             // TODO: this doesnt currently do anything with nppc/num_cells
355 | 
356 |             auto position_x = particles.slice<PositionX>();
357 |             auto position_y = particles.slice<PositionY>();
358 |             auto position_z = particles.slice<PositionZ>();
359 | 
360 |             auto velocity_x = particles.slice<VelocityX>();
361 |             auto velocity_y = particles.slice<VelocityY>();
362 |             auto velocity_z = particles.slice<VelocityZ>();
363 | 
364 |             auto weight = particles.slice<Weight>();
365 |             auto cell = particles.slice<Cell_Index>();
366 | 
367 |             // TODO: sensible way to do rand in parallel?
368 |             //srand (static_cast <unsigned> (time(0)));
369 | 
370 |             auto _init =
371 |                 KOKKOS_LAMBDA( const int s, const int i )
372 |                 {
373 |                     // Initialize position.
374 |                     int sign =  -1;
375 |                     size_t pi2 = (s)*particle_list_t::vector_length+i;
376 |                     size_t pi = ((pi2) / 2);
377 |                     if (pi2%2 == 0) {
378 |                         sign = 1;
379 |                     }
380 |                     size_t pic = (2*pi)%nppc;
381 | 
382 |                     real_t x = pic*dxp+0.5*dxp-1.0;
383 |                     position_x.access(s,i) = x;
384 |                     position_y.access(s,i) = 0.;
385 |                     position_z.access(s,i) = 0.;
386 | 
387 | 
388 |                     weight.access(s,i) = w;
389 | 
390 |                     // gives me a num in the range 0..num_real_cells
391 |                     //int pre_ghost = (s % params.num_real_cells);
392 |                     //   size_t ix, iy, iz;
393 | 
394 |                     size_t pre_ghost = (2*pi/nppc);
395 | 
396 |                     cell.access(s,i) = pre_ghost + (nx+2)*(ny+2) + (nx+2) + 1; //13; //allow_for_ghosts(pre_ghost);
397 | 
398 |                     // Initialize velocity.(each cell length is 2)
399 |                     real_t na = 0.0001*sin(2.0*3.1415926*((x+1.0+pre_ghost*2)/(2*nx)));
400 |                     //
401 | 
402 |                     real_t gam = 1.0/sqrt(1.0-v0*v0);
403 |                     velocity_x.access(s,i) = sign * v0 *gam*(1.0+na); //0.1;
404 |                     velocity_y.access(s,i) = 0;
405 |                     velocity_z.access(s,i) = 0;
406 |                 };
407 | 
408 |             Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
409 |                 vec_policy( 0, particles.size() );
410 |             Cabana::simd_parallel_for( vec_policy, _init, "init()" );
411 |         }
412 |         */
413 | };
414 | 
415 | #ifdef USER_INPUT_DECK
416 | #define STRINGIFY(s)#s
417 | #define EXPAND(s)STRINGIFY(s)
418 | //#include EXPAND(USER_INPUT_DECK)
419 | // Cmake will put the concrete definition in an object file.. hopefully.
420 | // This is not ideal, but the include would prevent compile time change
421 | // detection
422 | class Input_Deck : public _Input_Deck {
423 |     public:
424 |         // TODO: this may currently force any custom deck to implement an
425 |         // intitialize_particles function, which is not desired. We want to
426 |         // fall back to the default implementation above if the user chosoes
427 |         // not to define one
428 |         Input_Deck();
429 | };
430 | #else
431 | // Default deck -- Weibel
432 | class Input_Deck : public _Input_Deck {
433 |     public:
434 |         Input_Deck()
435 |         {
436 |             // User puts initialization code here
437 |             // Example: EM 2 Stream in 1d?
438 |             nx = 1;
439 |             ny = 32;
440 |             nz = 1;
441 | 
442 |             num_steps = 6000;
443 |             nppc = 100;
444 | 
445 |             //v0 = 0.2;
446 |             v0 = 0.0866025403784439;
447 | 
448 |             // Can also create temporaries
449 |             real_ gam = 1.0 / sqrt(1.0 - v0*v0);
450 | 
451 |             const real_ default_grid_len = 1.0;
452 | 
453 |             len_x_global = default_grid_len;
454 |             //len_y_global = 3.14159265358979*0.5; // TODO: use proper PI?
455 |             len_y_global = 0.628318530717959*(gam*sqrt(gam));
456 |             len_z_global = default_grid_len;
457 | 
458 |             dt = 0.99*courant_length(
459 |                     len_x_global, len_y_global, len_z_global,
460 |                     nx, ny, nz
461 |                     ) / c;
462 | 
463 |             n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1
464 |         }
465 | };
466 | #endif
467 | 
468 | extern Input_Deck deck;
469 | //Input_Deck deck;
470 | 
471 | #endif // guard
472 | 


--------------------------------------------------------------------------------
/src/interpolator.cpp:
--------------------------------------------------------------------------------
  1 | #include "interpolator.h"
  2 | 
  3 | 
  4 | void load_interpolator_array(
  5 |         field_array_t fields,
  6 |         interpolator_array_t interpolators,
  7 |         size_t nx, // TODO: we can probably pull these out of global params..
  8 |         size_t ny,
  9 |         size_t nz,
 10 |         size_t ng
 11 |         )
 12 | {
 13 |     size_t x_offset =  1; // VOXEL(x+1,y,  z,   nx,ny,nz);
 14 |     size_t y_offset = (1*(nx+ng*2)); // VOXEL(x,  y+1,z,   nx,ny,nz);
 15 |     size_t z_offset = (1*(nx+ng*2)*(ny+ng*2)); // VOXEL(x,  y,  z+1, nx,ny,nz);
 16 | 
 17 |     auto field_ex = Cabana::slice<FIELD_EX>(fields);
 18 |     auto field_ey = Cabana::slice<FIELD_EY>(fields);
 19 |     auto field_ez = Cabana::slice<FIELD_EZ>(fields);
 20 | 
 21 |     auto field_cbx = Cabana::slice<FIELD_CBX>(fields);
 22 |     auto field_cby = Cabana::slice<FIELD_CBY>(fields);
 23 |     auto field_cbz = Cabana::slice<FIELD_CBZ>(fields);
 24 | 
 25 |     auto interp_ex = Cabana::slice<EX>(interpolators);
 26 |     auto interp_dexdy = Cabana::slice<DEXDY>(interpolators);
 27 |     auto interp_dexdz = Cabana::slice<DEXDZ>(interpolators);
 28 |     auto interp_d2exdydz = Cabana::slice<D2EXDYDZ>(interpolators);
 29 |     auto interp_ey = Cabana::slice<EY>(interpolators);
 30 |     auto interp_deydz = Cabana::slice<DEYDZ>(interpolators);
 31 |     auto interp_deydx = Cabana::slice<DEYDX>(interpolators);
 32 |     auto interp_d2eydzdx = Cabana::slice<D2EYDZDX>(interpolators);
 33 |     auto interp_ez = Cabana::slice<EZ>(interpolators);
 34 |     auto interp_dezdx = Cabana::slice<DEZDX>(interpolators);
 35 |     auto interp_dezdy = Cabana::slice<DEZDY>(interpolators);
 36 |     auto interp_d2ezdxdy = Cabana::slice<D2EZDXDY>(interpolators);
 37 |     auto interp_cbx = Cabana::slice<CBX>(interpolators);
 38 |     auto interp_dcbxdx = Cabana::slice<DCBXDX>(interpolators);
 39 |     auto interp_cby = Cabana::slice<CBY>(interpolators);
 40 |     auto interp_dcbydy = Cabana::slice<DCBYDY>(interpolators);
 41 |     auto interp_cbz = Cabana::slice<CBZ>(interpolators);
 42 |     auto interp_dcbzdz = Cabana::slice<DCBZDZ>(interpolators);
 43 | 
 44 |     const real_t fourth = 1.0 / 4.0;
 45 |     const real_t half = 1.0 / 2.0;
 46 | 
 47 |     // TODO: we have to be careful we don't reach past the ghosts here
 48 |     auto _load_interpolator = KOKKOS_LAMBDA( const int x, const int y, const int z)
 49 |     {
 50 |         // Try avoid doing stencil operations on ghost cells
 51 |         //if ( is_ghost(i) ) continue;
 52 | 
 53 |         int i = VOXEL(x,y,z, nx,ny,nz,ng);
 54 | 
 55 |         // ex interpolation
 56 |         real_t w0 = field_ex(i);                       // pf0->ex;
 57 |         real_t w1 = field_ex(i + y_offset);            // pfy->ex;
 58 |         real_t w2 = field_ex(i + z_offset);            // pfz->ex;
 59 |         real_t w3 = field_ex(i + y_offset + z_offset); // pfyz->ex;
 60 | 
 61 |         // TODO: make this not use only w0
 62 |         interp_ex(i)       = fourth*( (w3 + w0) + (w1 + w2) );
 63 |         interp_dexdy(i)    = fourth*( (w3 - w0) + (w1 - w2) );
 64 |         interp_dexdz(i)    = fourth*( (w3 - w0) - (w1 - w2) );
 65 |         interp_d2exdydz(i) = fourth*( (w3 + w0) - (w1 + w2) );
 66 | 
 67 |         // ey interpolation coefficients
 68 |         w0 = field_ey(i);
 69 |         w1 = field_ey(i + z_offset); // pfz->ey;
 70 |         w2 = field_ey(i + x_offset); //pfx->ey;
 71 |         w3 = field_ey(i + x_offset + z_offset); // pfzx->ey;
 72 | 
 73 |         interp_ey(i)       = fourth*( (w3 + w0) + (w1 + w2) );
 74 |         interp_deydz(i)    = fourth*( (w3 - w0) + (w1 - w2) );
 75 |         interp_deydx(i)    = fourth*( (w3 - w0) - (w1 - w2) );
 76 |         interp_d2eydzdx(i) = fourth*( (w3 + w0) - (w1 + w2) );
 77 | 
 78 | 
 79 |         // ez interpolation coefficients
 80 |         w0 = field_ez(i); // pf0->ez;
 81 |         w1 = field_ez(i + x_offset); //pfx->ez;
 82 |         w2 = field_ez(i + y_offset); //pfy->ez;
 83 |         w3 = field_ez(i + x_offset + y_offset); //pfxy->ez;
 84 | 
 85 |         interp_ez(i)       = fourth*( (w3 + w0) + (w1 + w2) );
 86 |         interp_dezdx(i)    = fourth*( (w3 - w0) + (w1 - w2) );
 87 |         interp_dezdy(i)    = fourth*( (w3 - w0) - (w1 - w2) );
 88 |         interp_d2ezdxdy(i) = fourth*( (w3 + w0) - (w1 + w2) );
 89 | 
 90 |         // bx interpolation coefficients
 91 |         w0 = field_cbx(i); //pf0->cbx;
 92 |         w1 = field_cbx(i + x_offset); //pfx->cbx;
 93 |         interp_cbx(i)    = half*( w1 + w0 );
 94 |         interp_dcbxdx(i) = half*( w1 - w0 );
 95 | 
 96 |         // by interpolation coefficients
 97 |         w0 = field_cby(i); // pf0->cby;
 98 |         w1 = field_cby(i + y_offset); // pfy->cby;
 99 |         interp_cby(i)    = half*( w1 + w0 );
100 |         interp_dcbydy(i) = half*( w1 - w0 );
101 | 
102 |         // bz interpolation coefficients
103 |         w0 = field_cbz(i); // pf0->cbz;
104 |         w1 = field_cbz(i + z_offset); // pfz->cbz;
105 |         interp_cbz(i)    = half*( w1 + w0 );
106 |         interp_dcbzdz(i) = half*( w1 - w0 );
107 |     };
108 | 
109 |     //Kokkos::RangePolicy<ExecutionSpace> exec_policy( 0, fields.size() ); // All cells
110 |     Kokkos::MDRangePolicy< Kokkos::Rank<3> > non_ghost_policy( {ng,ng,ng}, {nx+ng, ny+ng, nz+ng} ); // Try not to into ghosts // TODO: dry this
111 |     Kokkos::parallel_for( "load_interpolator()", non_ghost_policy, _load_interpolator );
112 | 
113 |         /*
114 |         pi   = &fi(x,  y,  z  );
115 |         pf0  =  &f(x,  y,  z  );
116 |         pfx  =  &f(x+1,y,  z  );
117 |         pfy  =  &f(x,  y+1,z  );
118 |         pfz  =  &f(x,  y,  z+1);
119 |         pfyz =  &f(x,  y+1,z+1);
120 |         pfzx =  &f(x+1,y,  z+1);
121 |         pfxy =  &f(x+1,y+1,z  );
122 |         */
123 | 
124 | }
125 | void initialize_interpolator(interpolator_array_t& f0)
126 | {
127 |     auto ex = Cabana::slice<EX>(f0);
128 |     auto dexdy  = Cabana::slice<DEXDY>(f0);
129 |     auto dexdz  = Cabana::slice<DEXDZ>(f0);
130 |     auto d2exdydz  = Cabana::slice<D2EXDYDZ>(f0);
131 |     auto ey  = Cabana::slice<EY>(f0);
132 |     auto deydz  = Cabana::slice<DEYDZ>(f0);
133 |     auto deydx  = Cabana::slice<DEYDX>(f0);
134 |     auto d2eydzdx  = Cabana::slice<D2EYDZDX>(f0);
135 |     auto ez  = Cabana::slice<EZ>(f0);
136 |     auto dezdx  = Cabana::slice<DEZDX>(f0);
137 |     auto dezdy  = Cabana::slice<DEZDY>(f0);
138 |     auto d2ezdxdy  = Cabana::slice<D2EZDXDY>(f0);
139 |     auto cbx  = Cabana::slice<CBX>(f0);
140 |     auto dcbxdx   = Cabana::slice<DCBXDX>(f0);
141 |     auto cby  = Cabana::slice<CBY>(f0);
142 |     auto dcbydy  = Cabana::slice<DCBYDY>(f0);
143 |     auto cbz  = Cabana::slice<CBZ>(f0);
144 |     auto dcbzdz  = Cabana::slice<DCBZDZ>(f0);
145 | 
146 |     auto _init_interpolator =
147 |         KOKKOS_LAMBDA( const int i )
148 |         {
149 |             // Throw in some place holder values
150 |             ex(i) = 0.0; // TODO: is this important?
151 |             dexdy(i) = 0.0;
152 |             dexdz(i) = 0.0;
153 |             d2exdydz(i) = 0.0;
154 |             ey(i) = 0.0;
155 |             deydz(i) = 0.0;
156 |             deydx(i) = 0.0;
157 |             d2eydzdx(i) = 0.0;
158 |             ez(i) = 0.0;
159 |             dezdx(i) = 0.0;
160 |             dezdy(i) = 0.0;
161 |             d2ezdxdy(i) = 0.0;
162 |             cbx(i) = 0.0;
163 |             dcbxdx(i) = 0.0;
164 |             cby(i) = 0.0;
165 |             dcbydy(i) = 0.0;
166 |             cbz(i) = 0.0;
167 |             dcbzdz(i) = 0.0;
168 |         };
169 | 
170 |     Kokkos::parallel_for( "init_interpolator()", f0.size(), _init_interpolator );
171 | 
172 | }
173 | 


--------------------------------------------------------------------------------
/src/interpolator.h:
--------------------------------------------------------------------------------
 1 | #ifndef INTERPOLATOR_H
 2 | #define INTERPOLATOR_H
 3 | 
 4 | #include <cstdint>
 5 | #include <cstddef>
 6 | 
 7 | #include <Cabana_Types.hpp>
 8 | #include <Cabana_AoSoA.hpp>
 9 | #include <Cabana_Slice.hpp>
10 | 
11 | #include "types.h"
12 | #include "fields.h"
13 | 
14 | void load_interpolator_array(
15 |         field_array_t fields,
16 |         interpolator_array_t interpolators,
17 |         size_t nx, // TODO: we can probably pull these out of global params..
18 |         size_t ny,
19 |         size_t nz,
20 |         size_t ng
21 | );
22 | 
23 | void initialize_interpolator(interpolator_array_t& f0);
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/logger.h:
--------------------------------------------------------------------------------
1 | //#define ENABLE_DEBUG 0
2 | #if ENABLE_DEBUG
3 |   #define logger std::cout << "LOG:" << __FILE__ << ":" << __LINE__ << " \t :: \t "
4 | #else
5 |   #define logger while(0) std::cout
6 | #endif /* ENABLE_DEBUG */
7 | 


--------------------------------------------------------------------------------
/src/move_p.h:
--------------------------------------------------------------------------------
  1 | #ifndef pic_move_p_h
  2 | #define pic_move_p_h
  3 | 
  4 | #include <types.h>
  5 | 
  6 | 
  7 | // I make no claims that this is a sensible way to do this.. I just want it working ASAP
  8 | // THIS DEALS WITH GHOSTS ITSELF
  9 | KOKKOS_INLINE_FUNCTION int detect_leaving_domain( size_t, size_t nx, size_t ny, size_t nz, size_t ix, size_t iy, size_t iz, size_t )
 10 | {
 11 | 
 12 |     //RANK_TO_INDEX(ii, ix, iy, iz, (nx+(2*num_ghosts)), (ny+(2*num_ghosts)));
 13 |     //std::cout << "i " << ii << " ix " << ix << " iy " << iy << " iz " << iz << std::endl;
 14 | 
 15 |     //printf("nx,ny,nz=%ld,%ld,%ld, i=%ld, ix=%ld, iy=%ld, iz=%ld\n",nx,ny,nz,ii,ix,iy,iz);
 16 | 
 17 |     int leaving = -1;
 18 | 
 19 |     if (ix == 0)
 20 |     {
 21 |         leaving = 0;
 22 |     }
 23 | 
 24 |     if (iy == 0)
 25 |     {
 26 |         leaving = 1;
 27 |     }
 28 | 
 29 |     if (iz == 0)
 30 |     {
 31 |         leaving = 2;
 32 |     }
 33 | 
 34 |     if (ix == nx+1)
 35 |     {
 36 |         leaving = 3;
 37 |     }
 38 | 
 39 |     if (iy == ny+1)
 40 |     {
 41 |         leaving = 4;
 42 |     }
 43 | 
 44 |     if (iz == nz+1)
 45 |     {
 46 |         leaving = 5;
 47 |     }
 48 | 
 49 | 
 50 |     // if(leaving>=0){
 51 |     //   printf("%d %d %d %d\n", ix,iy,iz,leaving);
 52 |     // }
 53 |     return leaving;
 54 | }
 55 | 
 56 | 
 57 | // TODO: add namespace etc?
 58 | // TODO: port this to cabana syntax
 59 | template<typename T1, typename T2, typename T3, typename T4, typename T5> KOKKOS_INLINE_FUNCTION int move_p(
 60 |         //particle_list_t particles,
 61 |         T1& position_x,
 62 |         T2& position_y,
 63 |         T3& position_z,
 64 |         T4& cell,
 65 |         T5& a0, // TODO: does this need to be const
 66 |         real_t q,
 67 |         particle_mover_t& pm,
 68 |         const grid_t* ,
 69 |         const size_t s,
 70 |         const size_t i,
 71 |         const size_t nx,
 72 |         const size_t ny,
 73 |         const size_t nz,
 74 |         const size_t num_ghosts,
 75 |         const Boundary boundary
 76 |     )
 77 | {
 78 | 
 79 |     auto _asa = a0.access();
 80 | 
 81 |     /* // Kernel variables */
 82 |     real_t s_dir[3];
 83 |     real_t v0, v1, v2, v3; //, v4, v5;
 84 |     size_t axis, face;
 85 |     // if(s==1 && i==0){
 86 |     //   printf("%d %d\n",s,i);
 87 |     // }
 88 |     /* //particle_t* p = p0 + pm->i; */
 89 |     /* //int index = pm->i; */
 90 | 
 91 |     //q = qsp * weight.access(s, i);
 92 | 
 93 |     for(;;)
 94 |     {
 95 |         /*
 96 |            s_midx = p->dx;
 97 |            s_midy = p->dy;
 98 |            s_midz = p->dz;
 99 |            */
100 | 
101 |         real_t s_midx = position_x.access(s, i);
102 |         real_t s_midy = position_y.access(s, i);
103 |         real_t s_midz = position_z.access(s, i);
104 | 
105 |         real_t s_dispx = pm.dispx;
106 |         real_t s_dispy = pm.dispy;
107 |         real_t s_dispz = pm.dispz;
108 | 
109 |         s_dir[0] = (s_dispx>0) ? 1 : -1;
110 |         s_dir[1] = (s_dispy>0) ? 1 : -1;
111 |         s_dir[2] = (s_dispz>0) ? 1 : -1;
112 | 
113 |         // Compute the twice the fractional distance to each potential
114 |         // streak/cell face intersection.
115 |         v0 = (s_dispx==0) ? 3.4e38 : (s_dir[0]-s_midx)/s_dispx;
116 |         v1 = (s_dispy==0) ? 3.4e38 : (s_dir[1]-s_midy)/s_dispy;
117 |         v2 = (s_dispz==0) ? 3.4e38 : (s_dir[2]-s_midz)/s_dispz;
118 | 
119 |         // Determine the fractional length and axis of current streak. The
120 |         // streak ends on either the first face intersected by the
121 |         // particle track or at the end of the particle track.
122 |         //
123 |         //   axis 0,1 or 2 ... streak ends on a x,y or z-face respectively
124 |         //   axis 3        ... streak ends at end of the particle track
125 |         /**/      v3=2,  axis=3;
126 |         if(v0<v3) v3=v0, axis=0;
127 |         if(v1<v3) v3=v1, axis=1;
128 |         if(v2<v3) v3=v2, axis=2;
129 |         v3 *= 0.5;
130 | 
131 |         // Compute the midpoint and the normalized displacement of the streak
132 |         s_dispx *= v3;
133 |         s_dispy *= v3;
134 |         s_dispz *= v3;
135 |         s_midx += s_dispx;
136 |         s_midy += s_dispy;
137 |         s_midz += s_dispz;
138 | 
139 |         // Accumulate the streak.  Note: accumulator values are 4 times
140 |         // the total physical charge that passed through the appropriate
141 |         // current quadrant in a time-step
142 |         //v5 = q*s_dispx*s_dispy*s_dispz*(1./3.);
143 | 
144 |         int ii = cell.access(s, i);
145 | 
146 |         //a = (real_t *)(a0 + ii);
147 | 
148 |         //1D only
149 |         // _asa(ii,accumulator_var::jx, 0) += 4.0f*q*s_dispx;
150 |         // _asa(ii,accumulator_var::jx, 1) += 0.0;
151 |         // _asa(ii,accumulator_var::jx, 2) += 0.0;
152 |         // _asa(ii,accumulator_var::jx, 3) += 0.0;
153 |         real_t v4;
154 |         real_t v5 = q*s_dispx*s_dispy*s_dispz*(1./3.);
155 | 
156 | #define accumulate_j(X,Y,Z)						\
157 |         v4  = q*s_disp##X;    /* v2 = q ux                            */  \
158 |         v1  = v4*s_mid##Y;    /* v1 = q ux dy                         */  \
159 |         v0  = v4-v1;          /* v0 = q ux (1-dy)                     */  \
160 |         v1 += v4;             /* v1 = q ux (1+dy)                     */  \
161 |         v4  = 1+s_mid##Z;     /* v4 = 1+dz                            */  \
162 |         v2  = v0*v4;          /* v2 = q ux (1-dy)(1+dz)               */  \
163 |         v3  = v1*v4;          /* v3 = q ux (1+dy)(1+dz)               */  \
164 |         v4  = 1-s_mid##Z;     /* v4 = 1-dz                            */  \
165 |         v0 *= v4;             /* v0 = q ux (1-dy)(1-dz)               */  \
166 |         v1 *= v4;             /* v1 = q ux (1+dy)(1-dz)               */  \
167 |         v0 += v5;             /* v0 = q ux [ (1-dy)(1-dz) + uy*uz/3 ] */  \
168 |         v1 -= v5;             /* v1 = q ux [ (1+dy)(1-dz) - uy*uz/3 ] */  \
169 |         v2 -= v5;             /* v2 = q ux [ (1-dy)(1+dz) - uy*uz/3 ] */  \
170 |         v3 += v5;             /* v3 = q ux [ (1+dy)(1+dz) + uy*uz/3 ] */  \
171 | 
172 |        accumulate_j(x,y,z);
173 |        // printf("move_p deposit v0 %e to %d \n",
174 |        //         v0, ii);
175 |        _asa(ii, accumulator_var::jx, 0) += v0; // q*ux;
176 |        _asa(ii, accumulator_var::jx, 1) += v1; // 0.0;
177 |        _asa(ii, accumulator_var::jx, 2) += v2; // 0.0;
178 |        _asa(ii, accumulator_var::jx, 3) += v3; // 0.0;
179 | 
180 |        accumulate_j(y,z,x);
181 |        _asa(ii, accumulator_var::jy, 0) += v0; // q*ux;
182 |        _asa(ii, accumulator_var::jy, 1) += v1; // 0.0;
183 |        _asa(ii, accumulator_var::jy, 2) += v2; // 0.0;
184 |        _asa(ii, accumulator_var::jy, 3) += v3; // 0.0;
185 | 
186 |        accumulate_j(z,x,y);
187 |        _asa(ii, accumulator_var::jz, 0) += v0; // q*ux;
188 |        _asa(ii, accumulator_var::jz, 1) += v1; // 0.0;
189 |        _asa(ii, accumulator_var::jz, 2) += v2; // 0.0;
190 |        _asa(ii, accumulator_var::jz, 3) += v3; // 0.0;
191 | 
192 | #   undef accumulate_j
193 | 
194 |         // Compute the remaining particle displacment
195 |         pm.dispx -= s_dispx;
196 |         pm.dispy -= s_dispy;
197 |         pm.dispz -= s_dispz;
198 | 
199 |         //printf("%d %d, %d, %f %f",s, i, ii, position_x.access(s, i),position_x.access(s, i));
200 |         // Compute the new particle offset
201 |         position_x.access(s, i) += s_dispx+s_dispx;
202 |         position_y.access(s, i) += s_dispy+s_dispy;
203 |         position_z.access(s, i) += s_dispz+s_dispz;
204 | 
205 |         //printf(" %f\n",position_x.access(s, i));
206 | 
207 |         // If an end streak, return success (should be ~50% of the time)
208 | 
209 |         if( axis==3 ) break;
210 | 
211 |         // Determine if the particle crossed into a local cell or if it
212 |         // hit a boundary and convert the coordinate system accordingly.
213 |         // Note: Crossing into a local cell should happen ~50% of the
214 |         // time; hitting a boundary is usually a rare event.  Note: the
215 |         // entry / exit coordinate for the particle is guaranteed to be
216 |         // +/-1 _exactly_ for the particle.
217 | 
218 |         v0 = s_dir[axis];
219 | 
220 |         // TODO: do branching based on axis
221 | 
222 |         //(&(p->dx))[axis] = v0; // Avoid roundoff fiascos--put the particle
223 | 
224 |         // TODO: this conditional could be better
225 |         if (axis == 0) position_x.access(s, i) = v0;
226 |         if (axis == 1) position_y.access(s, i) = v0;
227 |         if (axis == 2) position_z.access(s, i) = v0;
228 | 
229 |         // _exactly_ on the boundary.
230 |         face = axis;
231 |         if( v0>0 ) face += 3;
232 | 
233 |         size_t ix, iy, iz;
234 |         RANK_TO_INDEX(ii, ix, iy, iz, (nx+(2*num_ghosts)), (ny+(2*num_ghosts)));
235 |         // ix = ii-((nx+2)*(ny+2)+(nx+2)); //ii-12;
236 |         // iy = 1;
237 |         // iz = 1;
238 | 
239 |         if (face == 0) { ix--; }
240 |         if (face == 1) { iy--; }
241 |         if (face == 2) { iz--; }
242 |         if (face == 3) { ix++; }
243 |         if (face == 4) { iy++; }
244 |         if (face == 5) { iz++; }
245 | 
246 |         int is_leaving_domain = detect_leaving_domain(face, nx, ny, nz, ix, iy, iz, num_ghosts);
247 |         if (is_leaving_domain >= 0) {
248 |             /*     //std::cout << s << ", " << i << " leaving on " << face << std::endl; */
249 | 
250 |             /*     //std::cout << */
251 |             /*         //" x " << position_x.access(s,i) << */
252 |             /*         //" y " << position_y.access(s,i) << */
253 |             /*         //" z " << position_z.access(s,i) << */
254 |             /*         //" cell " << cell.access(s,i) << */
255 |             /*         //std::endl; */
256 | 
257 |             if ( boundary == Boundary::Periodic)
258 |             {
259 |                 //std::cout << "face" << std::endl;
260 |                 // If we hit the periodic boundary, try and put the article in the right place
261 | 
262 |                 // TODO: we can do this in 1d just fine
263 | 
264 |                 //size_t ix, iy, iz;
265 | 
266 |                 //RANK_TO_INDEX(ii, ix, iy, iz, (nx-1+(2*num_ghosts)), (ny-1+(2*num_ghosts)));
267 |                 /* ix = ii-12; */
268 |                 /* iy = 1; */
269 |                 /* iz = 1; */
270 | 
271 |                 if (is_leaving_domain == 0) { // -1 on x face
272 |                     ix = (nx-1) + num_ghosts;
273 |                 }
274 |                 else if (is_leaving_domain == 1) { // -1 on y face
275 |                     iy = (ny-1) + num_ghosts;
276 |                 }
277 |                 else if (is_leaving_domain == 2) { // -1 on z face
278 |                     iz = (nz-1) + num_ghosts;
279 |                 }
280 |                 else if (is_leaving_domain == 3) { // 1 on x face
281 |                     ix = num_ghosts;
282 |                 }
283 |                 else if (is_leaving_domain == 4) { // 1 on y face
284 |                     iy = num_ghosts;
285 |                 }
286 |                 else if (is_leaving_domain == 5) { // 1 on z face
287 |                     iz = num_ghosts;
288 |                 }
289 |                 /* int updated_ii = VOXEL(ix, iy, iz, */
290 |                 /*         nx, */
291 |                 /*         ny, */
292 |                 /*         nz, */
293 |                 /*         num_ghosts); */
294 | 
295 |             }
296 | 
297 | 
298 |             /*         if ( Parameters::instance().BOUNDARY_TYPE == Boundary::Reflect) */
299 |             /*         { */
300 |             /*             // Hit a reflecting boundary condition.  Reflect the particle */
301 |             /*             // momentum and remaining displacement and keep moving the */
302 |             /*             // particle. */
303 | 
304 |             /*             //logger << "Reflecting " << s << " " << i << " on axis " << axis << std::endl; */
305 | 
306 |             /*             //(&(p->ux    ))[axis] = -(&(p->ux    ))[axis]; */
307 |             /*             //(&(pm->dispx))[axis] = -(&(pm->dispx))[axis]; */
308 |             /*             if (axis == 0) */
309 |             /*             { */
310 |             /*                 velocity_x.access(s, i) = -1.0f * velocity_x.access(s, i); */
311 |             /*                 pm.dispx = -1.0f * s_dispx; */
312 |             /*             } */
313 |             /*             if (axis == 1) */
314 |             /*             { */
315 |             /*                 velocity_y.access(s, i) = -1.0f * velocity_y.access(s, i); */
316 |             /*                 pm.dispy = -1.0f * s_dispy; */
317 |             /*             } */
318 |             /*             if (axis == 2) */
319 |             /*             { */
320 |             /*                 velocity_z.access(s, i) = -1.0f * velocity_z.access(s, i); */
321 |             /*                 pm.dispz = -1.0f * s_dispz; */
322 |             /*             } */
323 |             /*             continue; */
324 |             /*         } */
325 |         }
326 | 
327 |         /*     // TODO: this nieghbor stuff can be removed by going to more simple */
328 |         /*     // boundaries */
329 |         /*     /\* */
330 |         /*     if ( neighbor<g->rangel || neighbor>g->rangeh ) { */
331 |         /*         // Cannot handle the boundary condition here.  Save the updated */
332 |         /*         // particle position, face it hit and update the remaining */
333 |         /*         // displacement in the particle mover. */
334 |         /*         //p->i = 8*p->i + face; */
335 |         /*         cell.access(s, i) = 8 * ii + face; */
336 | 
337 |         /*         return 1; // Return "mover still in use" */
338 |         /*     } */
339 |         /*     *\/ */
340 |         /*     else { */
341 | 
342 |         /*     // Crossed into a normal voxel.  Update the voxel index, convert the */
343 |         /*     // particle coordinate system and keep moving the particle. */
344 | 
345 |         /*     //p->i = neighbor - g->rangel; // Compute local index of neighbor */
346 |         /*     //cell.access(s, i) = neighbor - g->rangel; */
347 |         /*     // TODO: I still need to update the cell we're in */
348 | 
349 |         //1D only
350 |         //int updated_ii = ix+(nx+2)*(ny+2) + (nx+2);
351 |         size_t updated_ii = VOXEL(ix, iy, iz, nx, ny, nz, num_ghosts);
352 |         cell.access(s, i) = updated_ii;
353 | 
354 | 
355 |         /* int updated_ii = VOXEL(ix, iy, iz, */
356 |         /*         nx, */
357 |         /*         ny, */
358 |         /*         nz, */
359 |         /*         num_ghosts); */
360 | 
361 |         /* cell.access(s, i) = updated_ii; */
362 |         /*     //std::cout << "Moving from cell " << ii << " to " << updated_ii << std::endl; */
363 |         /* } */
364 | 
365 |         /**/                         // Note: neighbor - g->rangel < 2^31 / 6
366 |         //(&(p->dx))[axis] = -v0;      // Convert coordinate system
367 |         // TODO: this conditional/branching could be better
368 |         if (axis == 0) position_x.access(s, i) = -v0;
369 |         if (axis == 1) position_y.access(s, i) = -v0;
370 |         if (axis == 2) position_z.access(s, i) = -v0;
371 |     }
372 | 
373 |     return 0; // Return "mover not in use"
374 | }
375 | 
376 | #endif // move_p
377 | 


--------------------------------------------------------------------------------
/src/push.h:
--------------------------------------------------------------------------------
  1 | #ifndef pic_push_h
  2 | #define pic_push_h
  3 | 
  4 | #include <types.h>
  5 | #include "move_p.h"
  6 | 
  7 | template <class _accumulator>
  8 | void push(
  9 |         particle_list_t& particles,
 10 |         interpolator_array_t& f0,
 11 |         real_t qdt_2mc,
 12 |         real_t cdt_dx,
 13 |         real_t cdt_dy,
 14 |         real_t cdt_dz,
 15 |         real_t qsp,
 16 |         _accumulator& a0,
 17 |         grid_t* g,
 18 |         const size_t nx,
 19 |         const size_t ny,
 20 |         const size_t nz,
 21 |         const size_t num_ghosts,
 22 |         Boundary boundary
 23 |         )
 24 | {
 25 | 
 26 |     //auto slice = a0.slice<0>();
 27 |     //decltype(slice)::atomic_access_slice _a = slice;
 28 | 
 29 |     auto position_x = Cabana::slice<PositionX>(particles);
 30 |     auto position_y = Cabana::slice<PositionY>(particles);
 31 |     auto position_z = Cabana::slice<PositionZ>(particles);
 32 | 
 33 |     auto velocity_x = Cabana::slice<VelocityX>(particles);
 34 |     auto velocity_y = Cabana::slice<VelocityY>(particles);
 35 |     auto velocity_z = Cabana::slice<VelocityZ>(particles);
 36 | 
 37 |     auto weight = Cabana::slice<Weight>(particles);
 38 |     auto cell = Cabana::slice<Cell_Index>(particles);
 39 | 
 40 |     //const real_t qdt_4mc        = -0.5*qdt_2mc; // For backward half rotate
 41 |     const real_t one            = 1.;
 42 |     const real_t one_third      = 1./3.;
 43 |     const real_t two_fifteenths = 2./15.;
 44 | 
 45 |     // We prefer making slices out side of the llambda
 46 |     auto _ex = Cabana::slice<EX>(f0);
 47 |     auto _dexdy = Cabana::slice<DEXDY>(f0);
 48 |     auto _dexdz = Cabana::slice<DEXDZ>(f0);
 49 |     auto _d2exdydz = Cabana::slice<D2EXDYDZ>(f0);
 50 |     auto _ey = Cabana::slice<EY>(f0);
 51 |     auto _deydz = Cabana::slice<DEYDZ>(f0);
 52 |     auto _deydx = Cabana::slice<DEYDX>(f0);
 53 |     auto _d2eydzdx = Cabana::slice<D2EYDZDX>(f0);
 54 |     auto _ez = Cabana::slice<EZ>(f0);
 55 |     auto _dezdx = Cabana::slice<DEZDX>(f0);
 56 |     auto _dezdy = Cabana::slice<DEZDY>(f0);
 57 |     auto _d2ezdxdy = Cabana::slice<D2EZDXDY>(f0);
 58 |     auto _cbx = Cabana::slice<CBX>(f0);
 59 |     auto _dcbxdx = Cabana::slice<DCBXDX>(f0);
 60 |     auto _cby = Cabana::slice<CBY>(f0);
 61 |     auto _dcbydy = Cabana::slice<DCBYDY>(f0);
 62 |     auto _cbz = Cabana::slice<CBZ>(f0);
 63 |     auto _dcbzdz = Cabana::slice<DCBZDZ>(f0);
 64 | 
 65 |     auto _push =
 66 |         KOKKOS_LAMBDA( const int s, const int i )
 67 |         {
 68 |             auto accumulators_scatter_access = a0.access();
 69 | 
 70 |             //for ( int i = 0; i < particle_list_t::vector_length; ++i )
 71 |             //{
 72 |             // Setup data accessors
 73 |             // This may be cleaner if we hoisted it?
 74 |             int ii = cell.access(s,i);
 75 | 
 76 |             auto ex = _ex(ii);
 77 |             auto dexdy = _dexdy(ii);
 78 |             auto dexdz = _dexdz(ii);
 79 |             auto d2exdydz = _d2exdydz(ii);
 80 |             auto ey = _ey(ii);
 81 |             auto deydz = _deydz(ii);
 82 |             auto deydx = _deydx(ii);
 83 |             auto d2eydzdx = _d2eydzdx(ii);
 84 |             auto ez = _ez(ii);
 85 |             auto dezdx = _dezdx(ii);
 86 |             auto dezdy = _dezdy(ii);
 87 |             auto d2ezdxdy = _d2ezdxdy(ii);
 88 |             auto cbx = _cbx(ii);
 89 |             auto dcbxdx = _dcbxdx(ii);
 90 |             auto cby = _cby(ii);
 91 |             auto dcbydy = _dcbydy(ii);
 92 |             auto cbz = _cbz(ii);
 93 |             auto dcbzdz = _dcbzdz(ii);
 94 |             /*
 95 |                auto ex  = f0.get<EX>(ii);
 96 |                auto dexdy  = f0.get<DEXDY>(ii);
 97 |                auto dexdz  = f0.get<DEXDZ>(ii);
 98 |                auto d2exdydz  = f0.get<D2EXDYDZ>(ii);
 99 |                auto ey  = f0.get<EY>(ii);
100 |                auto deydz  = f0.get<DEYDZ>(ii);
101 |                auto deydx  = f0.get<DEYDX>(ii);
102 |                auto d2eydzdx  = f0.get<D2EYDZDX>(ii);
103 |                auto ez  = f0.get<EZ>(ii);
104 |                auto dezdx  = f0.get<DEZDX>(ii);
105 |                auto dezdy  = f0.get<DEZDY>(ii);
106 |                auto d2ezdxdy  = f0.get<D2EZDXDY>(ii);
107 |                auto cbx  = f0.get<CBX>(ii);
108 |                auto dcbxdx   = f0.get<DCBXDX>(ii);
109 |                auto cby  = f0.get<CBY>(ii);
110 |                auto dcbydy  = f0.get<DCBYDY>(ii);
111 |                auto cbz  = f0.get<CBZ>(ii);
112 |                auto dcbzdz  = f0.get<DCBZDZ>(ii);
113 |                */
114 | 
115 |             // Perform push
116 | 
117 |             // TODO: deal with pm's
118 |             particle_mover_t local_pm = particle_mover_t();
119 | 
120 |             real_t dx = position_x.access(s,i);   // Load position
121 |             real_t dy = position_y.access(s,i);   // Load position
122 |             real_t dz = position_z.access(s,i);   // Load position
123 | 
124 |             real_t hax  = qdt_2mc*(    ( ex    + dy*dexdy    ) +
125 |                     dz*( dexdz + dy*d2exdydz ) );
126 |             real_t hay  = qdt_2mc*(    ( ey    + dz*deydz    ) +
127 |                     dx*( deydx + dz*d2eydzdx ) );
128 |             real_t haz  = qdt_2mc*(    ( ez    + dx*dezdx    ) +
129 |                     dy*( dezdy + dx*d2ezdxdy ) );
130 | 
131 |             //1D only
132 |             //real_t hax = qdt_2mc*ex;
133 |             // real_t hay = 0;
134 |             // real_t haz = 0;
135 | 
136 |             cbx  = cbx + dx*dcbxdx;             // Interpolate B
137 |             cby  = cby + dy*dcbydy;
138 |             cbz  = cbz + dz*dcbzdz;
139 | 
140 |             real_t ux = velocity_x.access(s,i);   // Load velocity
141 |             real_t uy = velocity_y.access(s,i);   // Load velocity
142 |             real_t uz = velocity_z.access(s,i);   // Load velocity
143 | 
144 |             ux  += hax;                               // Half advance E
145 |             uy  += hay;
146 |             uz  += haz;
147 | 
148 |             real_t v0   = qdt_2mc/sqrtf(one + (ux*ux + (uy*uy + uz*uz)));
149 |             /**/                                      // Boris - scalars
150 |             real_t v1   = cbx*cbx + (cby*cby + cbz*cbz);
151 |             real_t v2   = (v0*v0)*v1;
152 |             real_t v3   = v0*(one+v2*(one_third+v2*two_fifteenths));
153 |             real_t v4   = v3/(one+v1*(v3*v3));
154 |             v4  += v4;
155 |             v0   = ux + v3*( uy*cbz - uz*cby );       // Boris - uprime
156 |             v1   = uy + v3*( uz*cbx - ux*cbz );
157 |             v2   = uz + v3*( ux*cby - uy*cbx );
158 |             ux  += v4*( v1*cbz - v2*cby );            // Boris - rotation
159 |             uy  += v4*( v2*cbx - v0*cbz );
160 |             uz  += v4*( v0*cby - v1*cbx );
161 |             ux  += hax;                               // Half advance E
162 |             uy  += hay;
163 |             uz  += haz;
164 | 
165 |             velocity_x.access(s,i) = ux;
166 |             velocity_y.access(s,i) = uy;
167 |             velocity_z.access(s,i) = uz;
168 | 
169 |             v0   = one/sqrtf(one + (ux*ux+ (uy*uy + uz*uz)));
170 |             /**/                                      // Get norm displacement
171 |             ux  *= cdt_dx;
172 |             uy  *= cdt_dy;
173 |             uz  *= cdt_dz;
174 |             ux  *= v0;
175 |             uy  *= v0;
176 |             uz  *= v0;
177 |             v0   = dx + ux;                           // Streak midpoint (inbnds)
178 |             v1   = dy + uy;
179 |             v2   = dz + uz;
180 |             v3   = v0 + ux;                           // New position
181 |             v4   = v1 + uy;
182 |             real_t v5   = v2 + uz;
183 | 
184 |             real_t q = weight.access(s,i)*qsp;   // Load charge
185 | 
186 |             // Check if inbnds
187 |             if(  v3<=one &&  v4<=one &&  v5<=one && -v3<=one && -v4<=one && -v5<=one )
188 |             {
189 | 
190 |                 // Common case (inbnds).  Note: accumulator values are 4 times
191 |                 // the total physical charge that passed through the appropriate
192 |                 // current quadrant in a time-step
193 | 
194 | 
195 |                 // Store new position
196 |                 position_x.access(s,i) = v3;
197 |                 position_y.access(s,i) = v4;
198 |                 position_z.access(s,i) = v5;
199 | 
200 |                 dx = v0;                                // Streak midpoint
201 |                 dy = v1;
202 |                 dz = v2;
203 |                 v5 = q*ux*uy*uz*one_third;              // Compute correction
204 | 
205 |                 //real_t* a  = (real_t *)( a0[ii].a );              // Get accumulator
206 | 
207 |                 //1D only
208 |                 //_a(ii,0) += q*ux;
209 |                 //_a(ii,1) = 0;
210 |                 //_a(ii,2) = 0;
211 |                 //_a(ii,3) = 0;
212 | 
213 |                 // accumulators_scatter_access(ii, accumulator_var::jx, 0) += 4.0f*q*ux;
214 |                 // accumulators_scatter_access(ii, accumulator_var::jx, 1) += 0.0;
215 |                 // accumulators_scatter_access(ii, accumulator_var::jx, 2) += 0.0;
216 |                 // accumulators_scatter_access(ii, accumulator_var::jx, 3) += 0.0;
217 | 
218 |                 #define CALC_J(X,Y,Z)                                        \
219 |                 v4  = q*u##X;   /* v2 = q ux                            */   \
220 |                 v1  = v4*d##Y;  /* v1 = q ux dy                         */   \
221 |                 v0  = v4-v1;    /* v0 = q ux (1-dy)                     */   \
222 |                 v1 += v4;       /* v1 = q ux (1+dy)                     */   \
223 |                 v4  = one+d##Z; /* v4 = 1+dz                            */   \
224 |                 v2  = v0*v4;    /* v2 = q ux (1-dy)(1+dz)               */   \
225 |                 v3  = v1*v4;    /* v3 = q ux (1+dy)(1+dz)               */   \
226 |                 v4  = one-d##Z; /* v4 = 1-dz                            */   \
227 |                 v0 *= v4;       /* v0 = q ux (1-dy)(1-dz)               */   \
228 |                 v1 *= v4;       /* v1 = q ux (1+dy)(1-dz)               */   \
229 |                 v0 += v5;       /* v0 = q ux [ (1-dy)(1-dz) + uy*uz/3 ] */   \
230 |                 v1 -= v5;       /* v1 = q ux [ (1+dy)(1-dz) - uy*uz/3 ] */   \
231 |                 v2 -= v5;       /* v2 = q ux [ (1-dy)(1+dz) - uy*uz/3 ] */   \
232 |                 v3 += v5;       /* v3 = q ux [ (1+dy)(1+dz) + uy*uz/3 ] */
233 | 
234 |                 CALC_J( x,y,z );
235 |                 //std::cout << "Contributing " << v0 << ", " << v1 << ", " << v2 << ", " << v3 << std::endl;
236 |                 accumulators_scatter_access(ii, accumulator_var::jx, 0) += v0; // q*ux*(1-dy)*(1-dz);
237 |                 accumulators_scatter_access(ii, accumulator_var::jx, 1) += v1; // q*ux*(1+dy)*(1-dz);
238 |                 accumulators_scatter_access(ii, accumulator_var::jx, 2) += v2; // q*ux*(1-dy)*(1+dz);
239 |                 accumulators_scatter_access(ii, accumulator_var::jx, 3) += v3; // q*ux*(1+dy)*(1+dz);
240 | 
241 |                 // printf("push deposit v0 %e to %d where ux = %e uy = %e and uz = %e \n",
242 |                 //         v0, ii, ux, uy, uz);
243 | 
244 |                 CALC_J( y,z,x );
245 |                 accumulators_scatter_access(ii, accumulator_var::jy, 0) += v0; // q*ux;
246 |                 accumulators_scatter_access(ii, accumulator_var::jy, 1) += v1; // 0.0;
247 |                 accumulators_scatter_access(ii, accumulator_var::jy, 2) += v2; // 0.0;
248 |                 accumulators_scatter_access(ii, accumulator_var::jy, 3) += v3; // 0.0;
249 | 
250 |                 CALC_J( z,x,y );
251 |                 accumulators_scatter_access(ii, accumulator_var::jz, 0) += v0; // q*ux;
252 |                 accumulators_scatter_access(ii, accumulator_var::jz, 1) += v1; // 0.0;
253 |                 accumulators_scatter_access(ii, accumulator_var::jz, 2) += v2; // 0.0;
254 |                 accumulators_scatter_access(ii, accumulator_var::jz, 3) += v3; // 0.0;
255 | 
256 |                 #undef CALC_J
257 | 
258 |             }
259 |             else
260 |             {                                    // Unlikely
261 |                 local_pm.dispx = ux;
262 |                 local_pm.dispy = uy;
263 |                 local_pm.dispz = uz;
264 | 
265 |                 local_pm.i = s*particle_list_t::vector_length + i; //i + itmp; //p_ - p0;
266 | 
267 |                 // Handle particles that cross cells
268 |                 //move_p( position_x, position_y, position_z, cell, _a, q, local_pm,  g,  s, i, nx, ny, nz, num_ghosts, boundary );
269 |                 move_p( position_x, position_y, position_z, cell, a0, q, local_pm,  g,  s, i, nx, ny, nz, num_ghosts, boundary );
270 | 
271 |                 // TODO: renable this
272 |                 //if ( move_p( p0, local_pm, a0, g, qsp ) ) { // Unlikely
273 |                 //if ( move_p( particles, local_pm, a0, g, qsp, s, i ) ) { // Unlikely
274 |                 //if( nm<max_nm ) {
275 |                 //pm[nm++] = local_pm[0];
276 |                 //}
277 |                 //else {
278 |                 //ignore++;                 // Unlikely
279 |                 //} // if
280 |                 //} // if
281 | 
282 |                 /* // Copied from VPIC Kokkos for reference
283 |                    if( move_p_kokkos( k_particles, k_local_particle_movers,
284 |                    k_accumulators_sa, g, qsp ) ) { // Unlikely
285 |                    if( k_nm(0)<max_nm ) {
286 |                    nm = int(Kokkos::atomic_fetch_add( &k_nm(0), 1 ));
287 |                    if (nm >= max_nm) Kokkos::abort("overran max_nm");
288 |                    copy_local_to_pm(nm);
289 |                    }
290 |                    }
291 |                    */
292 |             }
293 | 
294 |             //} // end VLEN loop
295 |         };
296 | 
297 |         Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
298 |             vec_policy( 0, particles.size() );
299 |         Cabana::simd_parallel_for( vec_policy, _push, "push()" );
300 |         }
301 | 
302 | #endif // pic_push_h
303 | 


--------------------------------------------------------------------------------
/src/types.h:
--------------------------------------------------------------------------------
  1 | #ifndef pic_types_h
  2 | #define pic_types_h
  3 | 
  4 | #ifndef REAL_TYPE
  5 | #define real_t float
  6 | #else
  7 | #define real_t REAL_TYPE
  8 | #endif
  9 | 
 10 | #include <Kokkos_Core.hpp>
 11 | #include <Cabana_Core.hpp>
 12 | 
 13 | // Inner array size (the size of the arrays in the structs-of-arrays).
 14 | 
 15 | #ifndef CELL_BLOCK_FACTOR
 16 | #define CELL_BLOCK_FACTOR 32
 17 | #endif
 18 | // Cell blocking factor in memory
 19 | const size_t cell_blocking = CELL_BLOCK_FACTOR;
 20 | 
 21 | // Defaults
 22 | #ifdef REQUIRE_HOST
 23 | using MemorySpace = Kokkos::HostSpace;
 24 | using ExecutionSpace = Kokkos::DefaultHostExecutionSpace;
 25 | #else
 26 | using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
 27 | using ExecutionSpace = Kokkos::DefaultExecutionSpace;
 28 | #endif
 29 | ///// END ESSENTIALS ///
 30 | 
 31 | enum UserParticleFields
 32 | {
 33 |     PositionX = 0,
 34 |     PositionY,
 35 |     PositionZ,
 36 |     VelocityX,
 37 |     VelocityY,
 38 |     VelocityZ,
 39 |     Weight,
 40 |     Cell_Index, // This is stored as per VPIC, such that it includes ghost_offsets
 41 | };
 42 | 
 43 | // Designate the types that the particles will hold.
 44 | using ParticleDataTypes =
 45 | Cabana::MemberTypes<
 46 |     real_t,                        // (0) x-position
 47 |     real_t,                        // (1) y-position
 48 |     real_t,                        // (2) z-position
 49 |     real_t,                        // (3) x-velocity
 50 |     real_t,                        // (4) y-velocity
 51 |     real_t,                        // (5) z-velocity
 52 |     real_t,                        // (6) weight
 53 |     int                           // (7) Cell index
 54 | >;
 55 | 
 56 | // Set the type for the particle AoSoA.
 57 | using particle_list_t =
 58 |     Cabana::AoSoA<ParticleDataTypes,MemorySpace>;
 59 | 
 60 | /////////////// START VPIC TYPE ////////////
 61 | 
 62 | #include "grid.h"
 63 | 
 64 | enum InterpolatorFields
 65 | { // TODO: things in here like EXYZ and CBXYZ are ambigious
 66 |     EX = 0,
 67 |     DEXDY,
 68 |     DEXDZ,
 69 |     D2EXDYDZ,
 70 |     EY,
 71 |     DEYDZ,
 72 |     DEYDX,
 73 |     D2EYDZDX,
 74 |     EZ,
 75 |     DEZDX,
 76 |     DEZDY,
 77 |     D2EZDXDY,
 78 |     CBX,
 79 |     DCBXDX,
 80 |     CBY,
 81 |     DCBYDY,
 82 |     CBZ,
 83 |     DCBZDZ
 84 | };
 85 | 
 86 |     using InterpolatorDataTypes =
 87 |         Cabana::MemberTypes<
 88 |         real_t, //  ex,
 89 |         real_t , // dexdy,
 90 |         real_t , // dexdz,
 91 |         real_t , // d2exdydz,
 92 |         real_t , // ey,
 93 |         real_t , // deydz,
 94 |         real_t , // deydx,
 95 |         real_t , // d2eydzdx,
 96 |         real_t , // ez,
 97 |         real_t , // dezdx,
 98 |         real_t , // dezdy,
 99 |         real_t , // d2ezdxdy,
100 |         // Below here is not need for ES? EM only?
101 |         real_t , // cbx,
102 |         real_t , // dcbxdx,
103 |         real_t , // cby,
104 |         real_t , // dcbydy,
105 |         real_t , // cbz,
106 |         real_t // dcbzdz,
107 |         >;
108 |     using interpolator_array_t = Cabana::AoSoA<InterpolatorDataTypes,MemorySpace,cell_blocking>;
109 | using AccumulatorDataTypes =
110 |     Cabana::MemberTypes<
111 |     real_t[12] // jx[4] jy[4] jz[4]
112 | >;
113 | 
114 | //using accumulator_array_t = Cabana::AoSoA<AccumulatorDataTypes,MemorySpace,cell_blocking>;
115 | 
116 | #define ACCUMULATOR_VAR_COUNT 3
117 | #define ACCUMULATOR_ARRAY_LENGTH 4
118 | 
119 | // TODO: should we flatten this out to 1D 12 big?
120 | using accumulator_array_t = Kokkos::View<real_t* [ACCUMULATOR_VAR_COUNT][ACCUMULATOR_ARRAY_LENGTH]>;
121 | 
122 | using accumulator_array_sa_t = Kokkos::Experimental::ScatterView<
123 |     real_t *[ACCUMULATOR_VAR_COUNT][ACCUMULATOR_ARRAY_LENGTH]>; //, KOKKOS_LAYOUT,
124 |     //Kokkos::DefaultExecutionSpace, Kokkos::Experimental::ScatterSum,
125 |     //KOKKOS_SCATTER_DUPLICATED, KOKKOS_SCATTER_ATOMIC
126 | //>;
127 | 
128 | namespace accumulator_var {
129 |   enum a_v { \
130 |     jx = 0, \
131 |     jy = 1, \
132 |     jz = 2, \
133 |   };
134 | }
135 | 
136 | 
137 | 
138 | enum FieldFields
139 | {
140 |     FIELD_EX = 0,
141 |     FIELD_EY,
142 |     FIELD_EZ,
143 |     FIELD_CBX,
144 |     FIELD_CBY,
145 |     FIELD_CBZ,
146 |     FIELD_JFX,
147 |     FIELD_JFY,
148 |     FIELD_JFZ
149 | };
150 | 
151 | using FieldDataTypes = Cabana::MemberTypes<
152 | /*
153 |   ex,   ey,   ez,   div_e_err;     // Electric field and div E error
154 |   cbx,  cby,  cbz,  div_b_err;     // Magnetic field and div B error
155 |   tcax, tcay, tcaz, rhob;          // TCA fields and bound charge density
156 |   jfx,  jfy,  jfz,  rhof;          // Free current and charge density
157 |   material_id ematx, ematy, ematz, nmat; // Material at edge centers and nodes
158 |   material_id fmatx, fmaty, fmatz, cmat; // Material at face and cell centers
159 |   */
160 | 
161 |   real_t, // ex
162 |   real_t, // ey
163 |   real_t, // ez
164 |   real_t, // cbx
165 |   real_t, // cby
166 |   real_t, // cbz
167 |   real_t, // jfx
168 |   real_t, // jfy
169 |   real_t // jfz
170 | >;
171 | 
172 | using field_array_t = Cabana::AoSoA<FieldDataTypes,MemorySpace,cell_blocking>;
173 | 
174 | // TODO: should this be in it's own file?
175 | class particle_mover_t {
176 |     public:
177 |   real_t dispx, dispy, dispz; // Displacement of particle
178 |   int32_t i;                 // Index of the particle to move
179 | };
180 | 
181 | /////////////// END VPIC TYPE ////////////
182 | //
183 | // TODO: this may be a bad name?
184 | # define RANK_TO_INDEX(rank,ix,iy,iz,_x,_y) \
185 |     int _ix, _iy, _iz;                                                    \
186 |     _ix  = (rank);                        /* ix = ix+gpx*( iy+gpy*iz ) */ \
187 |     _iy  = _ix/int(_x);   /* iy = iy+gpy*iz */            \
188 |     _ix -= _iy*int(_x);   /* ix = ix */                   \
189 |     _iz  = _iy/int(_y);   /* iz = iz */                   \
190 |     _iy -= _iz*int(_y);   /* iy = iy */                   \
191 |     (ix) = _ix;                                                           \
192 |     (iy) = _iy;                                                           \
193 |     (iz) = _iz;                                                           \
194 | 
195 | #define VOXEL(x,y,z, nx,ny,nz, NG) ((x) + ((nx)+(NG*2))*((y) + ((ny)+(NG*2))*(z)))
196 | 
197 | #endif // pic_types_h
198 | 


--------------------------------------------------------------------------------
/src/uncenter_p.h:
--------------------------------------------------------------------------------
  1 | #ifndef uncenter_h
  2 | #define uncenter_h
  3 | 
  4 | void uncenter_particles(
  5 |         particle_list_t particles,
  6 |         interpolator_array_t& f0,
  7 |         real_t qdt_2mc
  8 |     )
  9 | {
 10 | 
 11 |     auto position_x = Cabana::slice<PositionX>(particles);
 12 |     auto position_y = Cabana::slice<PositionY>(particles);
 13 |     auto position_z = Cabana::slice<PositionZ>(particles);
 14 | 
 15 |     auto velocity_x = Cabana::slice<VelocityX>(particles);
 16 |     auto velocity_y = Cabana::slice<VelocityY>(particles);
 17 |     auto velocity_z = Cabana::slice<VelocityZ>(particles);
 18 | 
 19 |     //auto weight = Cabana::slice<Weight>(particles);
 20 |     auto cell = Cabana::slice<Cell_Index>(particles);
 21 | 
 22 |     const real_t qdt_4mc        = -0.5*qdt_2mc; // For backward half rotate
 23 |     const real_t one            = 1.;
 24 |     const real_t one_third      = 1./3.;
 25 |     const real_t two_fifteenths = 2./15.;
 26 | 
 27 |     auto _uncenter =
 28 |         //KOKKOS_LAMBDA( const int s ) {
 29 |         KOKKOS_LAMBDA( const int s, const int i ) {
 30 |             // Grab particle properties
 31 |             real_t dx = position_x.access(s,i);   // Load position
 32 |             real_t dy = position_y.access(s,i);   // Load position
 33 |             real_t dz = position_z.access(s,i);   // Load position
 34 | 
 35 |             int ii = cell.access(s,i);
 36 | 
 37 |             // Grab interpolator values
 38 |             // TODO: hoist slice call?
 39 |             auto ex       = Cabana::slice<EX>(f0)(ii);
 40 |             auto dexdy    = Cabana::slice<DEXDY>(f0)(ii);
 41 |             auto dexdz    = Cabana::slice<DEXDZ>(f0)(ii);
 42 |             auto d2exdydz = Cabana::slice<D2EXDYDZ>(f0)(ii);
 43 |             auto ey       = Cabana::slice<EY>(f0)(ii);
 44 |             auto deydz    = Cabana::slice<DEYDZ>(f0)(ii);
 45 |             auto deydx    = Cabana::slice<DEYDX>(f0)(ii);
 46 |             auto d2eydzdx = Cabana::slice<D2EYDZDX>(f0)(ii);
 47 |             auto ez       = Cabana::slice<EZ>(f0)(ii);
 48 |             auto dezdx    = Cabana::slice<DEZDX>(f0)(ii);
 49 |             auto dezdy    = Cabana::slice<DEZDY>(f0)(ii);
 50 |             auto d2ezdxdy = Cabana::slice<D2EZDXDY>(f0)(ii);
 51 |             auto cbx      = Cabana::slice<CBX>(f0)(ii);
 52 |             auto dcbxdx   = Cabana::slice<DCBXDX>(f0)(ii);
 53 |             auto cby      = Cabana::slice<CBY>(f0)(ii);
 54 |             auto dcbydy   = Cabana::slice<DCBYDY>(f0)(ii);
 55 |             auto cbz      = Cabana::slice<CBZ>(f0)(ii);
 56 |             auto dcbzdz   = Cabana::slice<DCBZDZ>(f0)(ii);
 57 | 
 58 |             // Calculate field values
 59 |             real_t hax = qdt_2mc*(( ex + dy*dexdy ) + dz*( dexdz + dy*d2exdydz ));
 60 |             real_t hay = qdt_2mc*(( ey + dz*deydz ) + dx*( deydx + dz*d2eydzdx ));
 61 |             real_t haz = qdt_2mc*(( ez + dx*dezdx ) + dy*( dezdy + dx*d2ezdxdy ));
 62 | 
 63 |             cbx = cbx + dx*dcbxdx;            // Interpolate B
 64 |             cby = cby + dy*dcbydy;
 65 |             cbz = cbz + dz*dcbzdz;
 66 | 
 67 |             // Load momentum
 68 |             real_t ux = velocity_x.access(s,i);   // Load velocity
 69 |             real_t uy = velocity_y.access(s,i);   // Load velocity
 70 |             real_t uz = velocity_z.access(s,i);   // Load velocity
 71 | 
 72 |             real_t v0 = qdt_4mc/(real_t)sqrt(one + (ux*ux + (uy*uy + uz*uz)));
 73 | 
 74 |             // Borris push
 75 |             // Boris - scalars
 76 |             real_t v1 = cbx*cbx + (cby*cby + cbz*cbz);
 77 |             real_t v2 = (v0*v0)*v1;
 78 |             real_t v3 = v0*(one+v2*(one_third+v2*two_fifteenths));
 79 |             real_t v4 = v3/(one+v1*(v3*v3));
 80 | 
 81 |             v4  += v4;
 82 | 
 83 |             v0   = ux + v3*( uy*cbz - uz*cby );      // Boris - uprime
 84 |             v1   = uy + v3*( uz*cbx - ux*cbz );
 85 |             v2   = uz + v3*( ux*cby - uy*cbx );
 86 | 
 87 |             ux  += v4*( v1*cbz - v2*cby );           // Boris - rotation
 88 |             uy  += v4*( v2*cbx - v0*cbz );
 89 |             uz  += v4*( v0*cby - v1*cbx );
 90 | 
 91 |             ux  += hax;                              // Half advance E
 92 |             uy  += hay;
 93 |             uz  += haz;
 94 | 
 95 |             // Store result
 96 |             velocity_x.access(s,i) = ux;
 97 |             velocity_y.access(s,i) = uy;
 98 |             velocity_z.access(s,i) = uz;
 99 | 
100 |         };
101 | 
102 |     Cabana::SimdPolicy<particle_list_t::vector_length,ExecutionSpace>
103 |         vec_policy( 0, particles.size() );
104 |     Cabana::simd_parallel_for( vec_policy, _uncenter, "uncenter()" );
105 | }
106 | 
107 | #endif // uncenter
108 | 


--------------------------------------------------------------------------------
/src/visualization.h:
--------------------------------------------------------------------------------
  1 | #ifndef pic_visualization_h
  2 | #define pic_visualization_h
  3 | 
  4 | #include <iostream>
  5 | #include <fstream>
  6 | 
  7 | class Visualizer {
  8 | 
  9 |     public:
 10 |         std::ofstream vis_file;
 11 | 
 12 |         void write_header(size_t total_num_particles, size_t step) {
 13 | 
 14 |             std::stringstream sstm;
 15 | 
 16 |             sstm << "vis/step" << step << ".vtk";
 17 |             std::string file_name = sstm.str();
 18 | 
 19 |             vis_file.open(file_name);
 20 | 
 21 |             vis_file << "# vtk DataFile Version 2.0" << std::endl;
 22 |             vis_file << "Unstructured Grid Example" << std::endl;
 23 |             vis_file << "ASCII" << std::endl;
 24 |             vis_file << "" << std::endl;
 25 |             vis_file << "DATASET UNSTRUCTURED_GRID" << std::endl;
 26 | 
 27 |             vis_file << "POINTS " << total_num_particles << " float" << std::endl;
 28 |         }
 29 | 
 30 |         // TODO: all these loops are the same, we could replace it with vtemplate
 31 |         void write_particles_position(particle_list_t& particles)
 32 |         {
 33 |             auto position_x = Cabana::slice<PositionX>(particles);
 34 |             auto position_y = Cabana::slice<PositionY>(particles);
 35 |             auto position_z = Cabana::slice<PositionZ>(particles);
 36 | 
 37 |             size_t write_count = 0;
 38 |             for ( std::size_t idx = 0; idx != particles.size(); ++idx )
 39 |             {
 40 |                         real_t x = position_x(idx);
 41 |                         real_t y = position_y(idx);
 42 |                         real_t z = position_z(idx);
 43 | 
 44 |                         vis_file << x << " " << y << " " << z << std::endl;
 45 |                         write_count++;
 46 |             }
 47 |         }
 48 | 
 49 |         void write_cell_types(size_t num_particles)
 50 |         {
 51 |             vis_file << "CELL_TYPES " << num_particles << std::endl;
 52 | 
 53 |             for (size_t p = 0; p < num_particles; p++)
 54 |             {
 55 |                 vis_file << "1" << std::endl;
 56 |             }
 57 |         }
 58 | 
 59 |         void pre_scalars(size_t num_particles)
 60 |         {
 61 |             vis_file << "POINT_DATA " << num_particles << std::endl;
 62 |         }
 63 | 
 64 |         void write_particles_property_header(std::string name, size_t num_particles)
 65 |         {
 66 |             vis_file << "SCALARS " << name << " float 1"  << std::endl;
 67 |             vis_file << "LOOKUP_TABLE default" << std::endl;
 68 |         }
 69 | 
 70 |         void write_particles_index(particle_list_t& particles)
 71 |         {
 72 |             auto cell = Cabana::slice<Cell_Index>(particles);
 73 | 
 74 |             for ( std::size_t idx = 0; idx != particles.size(); ++idx )
 75 |             {
 76 |                         real_t w = cell(idx);
 77 | 
 78 |                         vis_file << w << std::endl;
 79 |             }
 80 |         }
 81 | 
 82 |         void write_particles_w(particle_list_t& particles)
 83 |         {
 84 |             auto weight = Cabana::slice<Weight>(particles);
 85 | 
 86 |             for ( std::size_t idx = 0; idx != particles.size(); ++idx )
 87 |             {
 88 |                         real_t w = weight(idx);
 89 | 
 90 |                         vis_file << w << std::endl;
 91 |             }
 92 |         }
 93 | 
 94 |         void write_particles_sp(particle_list_t& particles, size_t sn)
 95 |         {
 96 |             for ( std::size_t idx = 0; idx != particles.size(); ++idx )
 97 |             {
 98 |                         vis_file << sn << std::endl;
 99 |             }
100 |         }
101 | 
102 |         void finalize()
103 |         {
104 |             vis_file.close();
105 |         }
106 | 
107 |         void write_vis(particle_list_t particles, size_t step)
108 |         {
109 | 
110 |             size_t total_num_particles = particles.size();
111 | 
112 |             // TODO: this needs to be updated once species are introduced
113 |             /*
114 |                for (unsigned int sn = 0; sn < species.size(); sn++)
115 |                {
116 |                int particle_count = species[sn].num_particles;
117 |                total_num_particles += particle_count;
118 |                }
119 |             */
120 | 
121 |             write_header(total_num_particles, step);
122 | 
123 |             //for (unsigned int sn = 0; sn < species.size(); sn++)
124 |             //{
125 |             //auto particles_accesor = get_particle_accessor(m, species[sn].key);
126 |             write_particles_position(particles);
127 |             //}
128 | 
129 |             write_cell_types(total_num_particles);
130 | 
131 |             pre_scalars(total_num_particles);
132 |             write_particles_property_header("weight", total_num_particles);
133 | 
134 |             //for (unsigned int sn = 0; sn < species.size(); sn++)
135 |             //{
136 |             //auto particles_accesor = get_particle_accessor(m, species[sn].key);
137 |             write_particles_w(particles);
138 |             //}
139 |             //*/
140 |             write_particles_property_header("cells", total_num_particles);
141 |             write_particles_index(particles);
142 | 
143 |             write_particles_property_header("species", total_num_particles);
144 | 
145 |             //for (unsigned int sn = 0; sn < species.size(); sn++)
146 |             //{
147 |             //auto particles_accesor = get_particle_accessor(m, species[sn].key);
148 |             write_particles_sp(particles, 1);
149 |             //}
150 |             finalize();
151 | 
152 |         }
153 | 
154 | };
155 | 
156 | #endif // Visualizer
157 | 


--------------------------------------------------------------------------------
/summary.md:
--------------------------------------------------------------------------------
 1 | ## Technical Details 
 2 | 
 3 | 1. It should exclusively use OMP for threading
 4 | 2. It should be written with OMP4.5 in mind, as well as being CUDA extensible 
 5 | 
 6 | ## Considerations 
 7 | 
 8 | One of the more interesting things about PIC for advanced architecrues is
 9 | that the different aspects of the push require different (contradictory)
10 | optimizations.
11 | 
12 | The main `particle_push` has three main parts:
13 | 
14 | ### The particle move
15 | 
16 | **Particle Properties Used (memory streams)**:
17 |     all  
18 | **Data layouts**: 
19 | i) AoS => Good, not great vectorized 
20 | ii) SoA => Good, but many memory streams
21 | iii) AoSoA => Ideal
22 | 
23 | - If we vectorize AoS we need to do a transpose 
24 | - SoA doesn't really buy us anything in terms of memory streams here as we
25 |     need all properties.
26 | - Particle order isn't a concern here
27 | - There isn't much burden on cache here as long as the next particle(s) are pre-fetched in time
28 | 
29 | ### The field stencil (read) and particle velocity 
30 | 
31 | **Particle Properties Used (memory streams)**:
32 |     Most  
33 | **Data layouts**: 
34 | i) AoS => Not great, doesn't need to use all the streams
35 | ii) SoA =>  Good, lets you split the streams out
36 | iii) AoSoA =>  Great
37 | 
38 | - The field stencil benefits significantly from having well ordered (on a cell
39 |     basis) particles
40 | - Cache use here is crucial because of the large, semi-nonobvious
41 |     (prefetching), stencil
42 | - If we can explicitly tell the compiler that groups of particle share
43 |     properties, we get better re-use than good "accidental" cache reuse 
44 | 
45 | 
46 | ### The current accumulation stencil (write)
47 | 
48 | **Particle Properties Used (memory streams)**:
49 |     Momentum  
50 | **Data layouts**: 
51 | i) AoS => Not great, doesn't need to use all the streams
52 | ii) SoA =>  Good, lets you split the streams out
53 | iii) AoSoA =>  Great
54 | 
55 | - If particles all write to the same cell, that gives good assumptions and
56 |     makes it safe to do some writes
57 | - If particles are highly disordered, this can cause big problems for the
58 |     safety of the writes and often leads to atomics
59 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory(decks)
 2 | if (${SOLVER_TYPE} STREQUAL "EM")
 3 |   add_subdirectory(energy_comparison)
 4 | endif()
 5 | 
 6 | #LIST(APPEND TESTS example)
 7 | 
 8 | foreach (test ${TESTS})
 9 |     add_executable(${test} ./${test}.cpp)
10 |     target_link_libraries(${test} CabanaPIC)
11 |     add_test(NAME ${test} COMMAND ./${test})
12 | endforeach(test)
13 | 


--------------------------------------------------------------------------------
/tests/decks/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # TODO: this will trigger a full rebuild of all files..
 2 | 
 3 | LIST(APPEND TESTS custom_init)
 4 | 
 5 | # TODO: we shouldn't have to duplicate all this
 6 | #INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/src)
 7 | 
 8 | # TODO: this should live somewhere more common to reduce code duplication
 9 | foreach (test ${TESTS})
10 |     set(DECK_PATH "${PROJECT_SOURCE_DIR}/decks/${test}")
11 |     add_executable(${test} ${SOURCES} ${CabanaPIC_EXAMPLE_DIR}/example.cpp ${DECK_PATH}.cxx)
12 |     target_compile_definitions(${test} PRIVATE USER_INPUT_DECK=${DECK_PATH})
13 |     target_link_libraries(${test} CabanaPIC)
14 |     add_test(NAME ${test} COMMAND ./${test})
15 | endforeach(test)
16 | 


--------------------------------------------------------------------------------
/tests/energy_comparison/2stream-em.cxx:
--------------------------------------------------------------------------------
  1 | #include "src/input/deck.h"
  2 | // TODO: reaching into this path is a bit odd..
  3 | #include "tests/energy_comparison/compare_energies.h"
  4 | 
  5 | class Custom_Finalizer : public Run_Finalizer {
  6 |     public:
  7 |         using real_ = real_t;
  8 | 
  9 |         // This *has* to be virtual, as we store the object as a pointer to the
 10 |         // base class
 11 |         virtual void finalize()
 12 |         {
 13 |            // Try and validate the final answers
 14 | 
 15 | #ifndef GOLD_ENERGY_FILE
 16 |             std::cerr << "Cannot find energy gold file, exiting" << std::endl;
 17 |             std::exit(EXIT_FAILURE);
 18 | #endif
 19 | 
 20 | #ifdef CUSTOM_ERROR_MARGIN
 21 |             double error_margin = CUSTOM_ERROR_MARGIN
 22 | #else
 23 |             double error_margin = 0.10; // 10%
 24 |             // TODO: add constexpr if for if real_T is double to decrease the toll
 25 | #endif
 26 | 
 27 |             // TODO: we might need to clear out local energy file first for
 28 |             // this to be sensible? Else we could end up reading old stale
 29 |             // files locally
 30 | 
 31 |             std::string energy_file_name = "energies.txt";
 32 |             std::string energy_gold_file_name = EXPAND( GOLD_ENERGY_FILE );
 33 | 
 34 |             // TODO: port this to a testing framework instead of relying on
 35 |             // error codes?
 36 | 
 37 |             // We want to measure 18-50 in science time, so 371 to 1030 in
 38 |             // timestep for the given configuration
 39 | 
 40 |             // This does 2 passes through the file, but it's OK for now..
 41 | 
 42 |             // Test steps 3581..5081 as it covers approx 60..100 sim time
 43 | 
 44 |             // Mask which fields to sum, read only 3rd val
 45 |             const unsigned short e_mask = 0b0000000100;
 46 |             bool e_correct = test_utils::compare_energies(
 47 |                     energy_gold_file_name,
 48 |                     energy_file_name,
 49 |                     error_margin,  // margin for error
 50 |                     e_mask,
 51 |                     test_utils::FIELD_ENUM::Sum,
 52 |                     1,  // if should diagnostic out
 53 |                     "e.out", // diagnostic output file
 54 |                     3581, //371 // num to skip, reads 3581
 55 |                     1300 // check 1500 lines
 56 |             );
 57 |             std::cout << "E Test Pass: " << e_correct << std::endl;
 58 | 
 59 |             const unsigned short b_mask = 0b0000001000;
 60 |             bool b_correct = test_utils::compare_energies(
 61 |                     energy_gold_file_name,
 62 |                     energy_file_name,
 63 |                     error_margin,  // margin for error
 64 |                     b_mask,
 65 |                     test_utils::FIELD_ENUM::Sum,
 66 |                     1,  // if should diagnostic out
 67 |                     "b.out", // diagnostic output file
 68 |                     3581, //371 // num to skip, reads 3581 to EOF
 69 |                     1300 // check 1500 lines
 70 |             );
 71 |             std::cout << "B Test Pass: " << b_correct << std::endl;
 72 | 
 73 |             // Throw error code if either failed
 74 |             if ((!b_correct) || (!e_correct)) {
 75 |                 std::exit(1);
 76 |             }
 77 |         }
 78 | };
 79 | 
 80 | // This relies on the default particle init, changing that will break this..
 81 | Input_Deck::Input_Deck()
 82 | {
 83 |     // User puts initialization code here
 84 |     // Example: EM 2 Stream in 1d?
 85 | 
 86 |     run_finalizer = new Custom_Finalizer();
 87 | 
 88 |     nx = 1;
 89 |     ny = 32;
 90 |     nz = 1;
 91 | 
 92 |     num_steps = 6000;
 93 |     nppc = 100;
 94 | 
 95 |     //v0 = 0.2;
 96 |     v0 = 0.0866025403784439;
 97 | 
 98 |     // Can also create temporaries
 99 |     real_ gam = 1.0 / sqrt(1.0 - v0*v0);
100 | 
101 |     const real_ default_grid_len = 1.0;
102 | 
103 |     len_x_global = default_grid_len;
104 |     //len_y_global = 3.14159265358979*0.5; // TODO: use proper PI?
105 |     len_y_global = 0.628318530717959*(gam*sqrt(gam));
106 |     len_z_global = default_grid_len;
107 | 
108 |     dt = 0.99*courant_length(
109 |             len_x_global, len_y_global, len_z_global,
110 |             nx, ny, nz
111 |             ) / c;
112 | 
113 |     n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1
114 | }
115 | 


--------------------------------------------------------------------------------
/tests/energy_comparison/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # TODO: this will trigger a full rebuild of all files..
 2 | 
 3 | LIST(APPEND TESTS 2stream-em)
 4 | 
 5 | # TODO: we shouldn't have to duplicate all this
 6 | #INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/src)
 7 | 
 8 | list(APPEND gold_file "${CMAKE_CURRENT_SOURCE_DIR}/energies_gold")
 9 | 
10 | 
11 | # TODO: avoid duplicating this link line everywhere
12 | foreach (test ${TESTS})
13 |     set(DECK_PATH "${test}")
14 |     add_executable(${test} ${SOURCES} ${CabanaPIC_EXAMPLE_DIR}/example.cpp ${DECK_PATH}.cxx)
15 |     target_compile_definitions(${test} PRIVATE USER_INPUT_DECK=${DECK_PATH})
16 |     target_compile_definitions(${test} PRIVATE GOLD_ENERGY_FILE=${gold_file}.${test}.${REAL_TYPE})
17 |     target_link_libraries(${test} CabanaPIC)
18 |     add_test(NAME ${test} COMMAND ./${test})
19 | endforeach(test)
20 | 


--------------------------------------------------------------------------------
/tests/energy_comparison/compare_energies.h:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <fstream>
  3 | #include <string>
  4 | 
  5 | #include <limits> // epsilon for limit
  6 | #include <utility> // pair
  7 | 
  8 | #include <vector>
  9 | 
 10 | namespace test_utils {
 11 | /**
 12 |  * @brief Helper function to write collective errors to file for further analysis
 13 |  *
 14 |  * @param errs The vector of all errors
 15 |  * @param field_per_line The number of values to write per file line
 16 |  */
 17 | void write_error_ouput( std::vector<double> errs, int field_per_line, std::string err_file_base_name)
 18 | {
 19 |     int counter = 0;
 20 |     std::ofstream outputFile(err_file_base_name);
 21 | 
 22 |     for (auto e : errs)
 23 |     {
 24 |         counter++;
 25 |         outputFile << counter << " " << e*100.0 << " "; // Convert to percent and dump
 26 |         if (counter % field_per_line == 0)
 27 |         {
 28 |             outputFile << std::endl;
 29 |         }
 30 |     }
 31 |     outputFile.close();
 32 | }
 33 | 
 34 | /**
 35 |  * @brief Helper function to compare numbers and calculate a absolute error
 36 |  *
 37 |  * @param A The first value to compare
 38 |  * @param B The second value to compare
 39 |  *
 40 |  * @return The calculated error
 41 |  */
 42 | double calculate_abs_error(double A, double B)
 43 | {
 44 |      return std::abs(A-B);
 45 | }
 46 | 
 47 | /**
 48 |  * @brief Helper function to compare numbers and calculate a relative error
 49 |  *
 50 |  * @param A The first value to compare
 51 |  * @param B The second value to compare
 52 |  *
 53 |  * @return The calculated error
 54 |  */
 55 | double calculate_relative_error(double A, double B)
 56 | {
 57 |     return std::abs(A-B) / std::min(A,B);
 58 | }
 59 | 
 60 | /**
 61 |  * @brief Function to compare errors to a given tolerance, and decide if it's within range
 62 |  *
 63 |  * @param A The first value to compare
 64 |  * @param B The second value to compare
 65 |  * @param relative_tolerance The relative tolerance to use when comparing
 66 |  *
 67 |  * @return A pair containing true/false if it's within tolerance, and the calculated error
 68 |  */
 69 | std::pair<bool, double> compare_error(double A, double B, double relative_tolerance)
 70 | {
 71 |     bool within_tol = false;
 72 |     double err = 0.0;
 73 | 
 74 |     // Right now this is pretty arbitrary..
 75 |     double abs_threshhold = 10 * std::numeric_limits<double>::epsilon();
 76 | 
 77 |     // Calculate if we're withing tolerances
 78 |     // If we're close to relative, do absolute
 79 |     if (std::abs(std::min(A,B)) < abs_threshhold)
 80 |     {
 81 |         err = calculate_abs_error(A, B);
 82 | 
 83 |         // Finding a relative error to 0 doesn't make much
 84 |         // sense, so lets do absolute error instead
 85 |         if ( err < 2*std::numeric_limits<double>::epsilon() )
 86 |         {
 87 |             within_tol = true;
 88 |         }
 89 |         else {
 90 |             within_tol = false;
 91 |         }
 92 |     }
 93 |     else { // Do relative error
 94 | 
 95 |         err = calculate_relative_error(A, B);
 96 | 
 97 |         if (err < relative_tolerance)
 98 |         {
 99 |             within_tol = true;
100 |         }
101 |         else {
102 |             within_tol = false;
103 |         }
104 |     }
105 |     return { within_tol, err };
106 | }
107 | 
108 | enum FIELD_ENUM {
109 |     Individual = 0, // Track each field individually
110 |     Sum // Sum the masked fields
111 | };
112 | 
113 | /**
114 |  * @brief Function to compare the contents of two energy files
115 |  *
116 |  * @param file_a First file to compare
117 |  * @param file_b Second file to compare
118 |  * @param relative_tolerance Relative tolerance which is acceptable
119 |  * @param field_mask A mask to specify which fields in the file to use
120 |  * @param sum_mask A mask to specify which fields in the file to sum and compare
121 |  * @param write_err_output If you should write the error output to a file
122 |  * @param err_file_base_name Base filename for writing output
123 |  * @param num_lines_to_skip The number of lines to skip into the file
124 |  * @param lines_to_read The number of lines to read into the file (for partial file analysis). Default -1 means "all"
125 |  *
126 |  * @NOTE A typical energy file is:
127 |  * <step> <ex> <ey> <ez> <bx> <by> <bz> <particle energies...>
128 |  * and the bit maps go accordingly with <step> being the LSB.
129 |  * A mask for b fields only would be 0x000001110
130 |  *
131 |  * @NOTE We could * use bitsets for the masking but * they're generally slower
132 |  *
133 |  * @return True is they match (within tol), false if not
134 |  */
135 | bool compare_energies(
136 |         const std::string file_a,
137 |         const std::string file_b,
138 |         const double relative_tolerance,
139 |         const unsigned short field_mask = 0b1111111111111111, /// short has 16 bytes, assume all are true
140 |         const FIELD_ENUM field_enum = FIELD_ENUM::Individual, /// short has 16 bytes, assume all are true
141 |         const int write_err_ouput = 0, // If the run should dump the errors to disk
142 |         const std::string err_file_base_name =  "err.out", // File name to write errors to
143 |         const int num_lines_to_skip = 0, // Most energy files have 3 lines of padding
144 |         const int lines_to_read = -1 // -1 => all.
145 | )
146 | {
147 |     // TODO: I could easily have a policy here based on the type of the field_mask
148 |     std::vector<double> errs;
149 | 
150 |     //const int DEFAULT_FILED_COUNT = 7;
151 | 
152 |     unsigned short agg_total = 0;
153 |     unsigned short v = field_mask;
154 |     // Count set bits
155 |     for (agg_total = 0; v; agg_total++)
156 |     {
157 |         v &= v - 1; // clear the least significant bit set
158 |     }
159 | 
160 |     try {
161 | 
162 |         bool match = true;
163 | 
164 |         std::string line1 = "";
165 |         std::string line2 = "";
166 | 
167 |         std::ifstream f1 (file_a);
168 |         std::ifstream f2 (file_b);
169 | 
170 |         //std::cout << "file_a " << file_a << std::endl;
171 |         //std::cout << "file_b " << file_b << std::endl;
172 | 
173 |         double max_err = 0.0;
174 |         double max_err_A = 0.0;
175 |         double max_err_B = 0.0;
176 |         int max_err_line = -1;
177 | 
178 |         // This is for counting the number of tokens on a line (changes
179 |         // based on number of species). It can likely be done much better
180 |         int line_token_count = 0;
181 | 
182 |         if (!f1.is_open())
183 |         {
184 |             std::cerr << "Unable to open file f1 " << file_a << std::endl;;
185 |             return false;
186 |         }
187 |         else if (!f2.is_open())
188 |         {
189 |             std::cerr << "Unable to open file f2 " << file_b << std::endl;
190 |             return false;
191 |         }
192 |         else // Performan test
193 |         {
194 | 
195 |             // Perform skipping
196 |             for (int i = 0; i < num_lines_to_skip; i++)
197 |             {
198 |                 getline(f1,line1);
199 |                 getline(f2,line2);
200 |             }
201 | 
202 |             int counter = num_lines_to_skip;
203 | 
204 |             // Do processing
205 |             while ( getline(f1,line1) )
206 |             {
207 |                 getline(f2,line2);
208 | 
209 |                 // Tokenize lines
210 |                 std::stringstream linestream1(line1);
211 |                 std::string item1;
212 | 
213 |                 std::stringstream linestream2(line2);
214 |                 std::string item2;
215 | 
216 |                 int used_line_token_count = 0;
217 |                 int total_line_token_count = 0;
218 | 
219 |                 double sum_A = 0.0;
220 |                 double sum_B = 0.0;
221 |                 std::pair<bool, double> returned_err;
222 |                 returned_err.second = -1.0; // set a dummy value to show uninit
223 | 
224 |                 int agg_count = 0;
225 | 
226 |                 // TODO: this is not resilient to whitepsace, and will act
227 |                 // oddly if the input files are not single space delimited
228 | 
229 |                 while (getline(linestream1, item1, ' '))
230 |                 {
231 |                     bool write_this_err_ouput = write_err_ouput;
232 |                     //std::cout << "Setting write_this_err_ouput tp " << write_this_err_ouput << std::endl;
233 | 
234 |                     //std::cout << "item 1 " << item1 << std::endl;
235 | 
236 |                     getline(linestream2, item2, ' ');
237 |                     //std::cout << "item 2 " << item2 << std::endl;
238 |                     total_line_token_count++;
239 | 
240 |                     // Use this field
241 |                     //std::cout << "this_line " << this_line_token_count << " mask " << field_mask << std::endl;
242 | 
243 |                     // Take the value one, and shift it to generate the mask to compare
244 |                     unsigned short this_line_token_mask = 1 << (total_line_token_count - 1); // Set correct highest bit on
245 |                     //this_line_token_mask |= this_line_token_mask-1; // Set lower bits on
246 | 
247 |                     // If this field is within our requested mask, use it
248 |                     if (this_line_token_mask & field_mask)
249 |                     {
250 |                         used_line_token_count++;
251 |                         //std::cout << "Parsing field " << used_line_token_count << " val " << item1 << std::endl;
252 | 
253 |                         double A = std::stod(item1);
254 |                         double B = std::stod(item2);
255 | 
256 |                         //std::cout << "A " << A << " vs " << B << std::endl;
257 | 
258 |                         if (
259 |                                 (field_enum == FIELD_ENUM::Sum) && // Need to aggregate
260 |                                 (agg_count < agg_total) // Not done aggregating yet
261 |                             )
262 |                         {
263 |                             // Need to aggregate..
264 |                             sum_A += A;
265 |                             sum_B += B;
266 |                             agg_count++;
267 | 
268 |                             //std::cout << "sum a " << sum_A << " += " << A << std::endl;
269 |                             //std::cout << "sum b " << sum_B << " += " << B << std::endl;
270 | 
271 |                             // Don't write this particular one
272 |                             write_this_err_ouput = false;
273 | 
274 |                             if (agg_count == agg_total) { // final_aggregation
275 |                                 //std::cout << sum_A << " vs " << sum_B << std::endl;
276 |                                 returned_err = compare_error(sum_A, sum_B, relative_tolerance);
277 |                                 write_this_err_ouput = true;
278 |                             }
279 |                         }
280 |                         else // We can just compare this val
281 |                         {
282 |                             sum_A = A;
283 |                             sum_B = B;
284 |                             returned_err = compare_error(A, B, relative_tolerance);
285 |                         }
286 | 
287 |                         if (returned_err.second != -1.0)  // Has some value set
288 |                         {
289 |                             bool returned_match = returned_err.first;
290 | 
291 |                             if (!returned_match) {
292 |                                 match = false;
293 |                             }
294 | 
295 |                             double err = returned_err.second;
296 | 
297 |                             // Track max absolute error
298 |                             if (err > max_err)
299 |                             {
300 |                                 max_err = err;
301 |                                 max_err_A = sum_A;
302 |                                 max_err_B = sum_B;
303 |                                 max_err_line = counter;
304 |                             }
305 | 
306 | 
307 |                             // If we track the errors, track this one
308 |                             if (write_this_err_ouput)
309 |                             {
310 |                                 errs.push_back(err);
311 |                             }
312 |                         }
313 |                     }
314 |                     else {
315 |                         //std::cout << "Skipping field " << this_line_token_mask << " val " << item1 << std::endl;
316 |                     }
317 |                 }
318 |                 line_token_count = used_line_token_count;
319 |                 counter++;
320 | 
321 |                 if (lines_to_read > 0) // Skipping is enabled
322 |                 {
323 |                     if ( (counter - num_lines_to_skip) >= lines_to_read)
324 |                     {
325 |                         break;
326 |                     }
327 |                 }
328 |             }
329 | 
330 |             f1.close();
331 |             f2.close();
332 |         }
333 | 
334 |         //std::cout << "Field mask : " << field_mask << std::endl;
335 |         //std::cout << "Fields used : " << line_token_count << std::endl;
336 | 
337 |         std::cout << "Max found err was " << max_err*100 << "% (" << max_err_A << " vs " << max_err_B << ") on line " << max_err_line << " (Threshold: " <<
338 |             relative_tolerance*100 << "%)" << std::endl;
339 | 
340 |         if (write_err_ouput)
341 |         {
342 |             int err_per_line = line_token_count;
343 |             if (field_enum == FIELD_ENUM::Sum) // Need to aggregate
344 |             {
345 |                 err_per_line /= agg_total; // Reduce by aggregation factor
346 |             }
347 | 
348 |             std::cout << "Writing error output " << errs.size() << std::endl;
349 |             write_error_ouput( errs, err_per_line, err_file_base_name);
350 |         }
351 |         std::cout << "Exiting test with a result of " << match << std::endl;
352 |         return match;
353 |     }
354 |     catch (const std::exception &exc) // Catching all is bad form, but OK for now..
355 |     {
356 |         // catch anything thrown within try block that derives from std::exception
357 |         std::cerr << "Caught error... Aborting" << std::endl;
358 |         std::cerr << exc.what();
359 |         return false;
360 |     }
361 | 
362 | }
363 | 
364 | } // namespace
365 | 


--------------------------------------------------------------------------------
/tests/example.cpp:
--------------------------------------------------------------------------------
1 | #define CATCH_CONFIG_MAIN  // This tells Catch to provide a main()
2 | //#define CATCH_CONFIG_RUNNER // We will provide a custom main
3 | #include "catch.hpp"
4 | 
5 | TEST_CASE( "Trivial example", "[example_tests]" )
6 | {
7 |         REQUIRE(1);
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/manual_tests/test/2-particle/2pcle-minipic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ECP-copa/CabanaPIC/1ee2c84582b051d59653232abe86ac6da2c6b35e/tests/manual_tests/test/2-particle/2pcle-minipic.png


--------------------------------------------------------------------------------
/tests/manual_tests/test/2-particle/plot.gp:
--------------------------------------------------------------------------------
 1 | set term png enhanced
 2 | 
 3 | set output '2pcle-minipic.png'
 4 | set xlabel 't'
 5 | set ylabel 'particle location'
 6 | set xrange [0:100]
 7 | #set yrange [0:1]
 8 | set grid
 9 | 
10 | f(x)=0.7-0.25-0.5*(0.2-0.5)*cos(x)
11 | dt=0.000990
12 | v(x)=-0.15*sin(x)
13 | 
14 | set keytitle 'nx=1000'
15 | set key bottom
16 | 
17 | plot 'partloc' u 1:2 w l t 'minipic,x','' u ($1+dt*0.5):3 w l t 'v','partloc-vpic' u 1:2 w l t 'vpic,x', f(x) t 'theory',v(x) t ''
18 | 


--------------------------------------------------------------------------------
/tests/manual_tests/test/2-stream-em/2stream-em.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ECP-copa/CabanaPIC/1ee2c84582b051d59653232abe86ac6da2c6b35e/tests/manual_tests/test/2-stream-em/2stream-em.png


--------------------------------------------------------------------------------
/tests/manual_tests/test/2-stream-em/plot.gp:
--------------------------------------------------------------------------------
 1 | set term png #post eps enhanced 22 color
 2 | 
 3 | #set output '2stream-em.eps'
 4 | set output '2stream-em.png'
 5 | 
 6 | set xlabel '{/Symbol w}_pt'
 7 | set ylabel 'W_B'
 8 | set xrange [0:150]
 9 | set yrange [1e-15:10]
10 | set log y
11 | set grid
12 | set format y "%.e"
13 | f(x) = exp(0.279*2*x)
14 | set keytitle '({/Symbol g}_0=1.02), nx=32, nppc=100'
15 | set key bottom
16 | 
17 | plot 'outw0.2' u 2:4 w l t 'minipic 1-thread','outw0.2-2' u 2:4 w l t 'minipic 2-thread',f(x)*1e-16 t 'linear theory' 
18 | 
19 | 


--------------------------------------------------------------------------------
/tests/manual_tests/test/2-stream/plot.gp:
--------------------------------------------------------------------------------
 1 | set term post eps enhanced 22 color
 2 | 
 3 | set output '2stream-minipic.eps'
 4 | set xlabel '{/Symbol w}_pt'
 5 | set ylabel 'W_E'
 6 | set xrange [0:50]
 7 | set yrange [1e-8:100]
 8 | set log y
 9 | set grid
10 | 
11 | f0(x) = exp(x)
12 | f(x) = exp(0.497184855006572*2*x)
13 | set keytitle '({/Symbol g}_0=1.0038), nx=32, nppc=8000'
14 | set key bottom
15 | 
16 | plot 'out' u 2:3 w l t 'minipic 1-thread','out2' u 2:3 w l t 'minipic 2-thread',f(x)*1.2e-10 t 'linear theory' #,f0(x)*1.2e-10
17 | 
18 | 


--------------------------------------------------------------------------------