├── .codecov.yml ├── .github └── workflows │ └── CI.yml ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── decks ├── 2particle.cxx ├── 2stream-short.cxx ├── custom_init.cxx ├── dioctron_3d.cxx └── vpic │ ├── 2particle.cxx │ └── 2stream-em0.cxx ├── example ├── CMakeLists.txt └── example.cpp ├── scripts ├── README.md ├── plot.py └── run_scripts │ ├── build_and_run.sh │ ├── build_cabana.sh │ ├── build_kokkos.sh │ ├── kokkos-tools │ ├── Makefile │ ├── README.md │ ├── kp_kernel_info.h │ └── kp_kernel_timer.cpp │ ├── main.py │ ├── requirements.txt │ └── timing_lib.sh ├── src ├── CMakeLists.txt ├── accumulator.cpp ├── accumulator.h ├── fields.h ├── grid.h ├── helpers.h ├── input │ └── deck.h ├── interpolator.cpp ├── interpolator.h ├── logger.h ├── move_p.h ├── push.h ├── types.h ├── uncenter_p.h └── visualization.h ├── summary.md └── tests ├── CMakeLists.txt ├── decks └── CMakeLists.txt ├── energy_comparison ├── 2stream-em.cxx ├── CMakeLists.txt ├── compare_energies.h ├── energies_gold.2stream-em.double └── energies_gold.2stream-em.float ├── example.cpp ├── include └── catch.hpp └── manual_tests └── test ├── 2-particle ├── 2pcle-minipic.png ├── partloc ├── partloc-vpic └── plot.gp ├── 2-stream-em ├── 2stream-em.png ├── outw0.2 ├── outw0.2-2 └── plot.gp └── 2-stream ├── 2stream-minipic.eps ├── out ├── out2 └── plot.gp /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | precision: 1 3 | round: down 4 | range: "70...100" 5 | ignore: 6 | - tests/include 7 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | schedule: 10 | - cron: '0 4 * * MON' 11 | 12 | jobs: 13 | CI: 14 | strategy: 15 | matrix: 16 | backend: ["SERIAL", "OPENMP"] 17 | runs-on: ubuntu-latest 18 | container: ghcr.io/ecp-copa/ci-containers/ubuntu:latest 19 | steps: 20 | - name: Checkout kokkos 21 | uses: actions/checkout@v2.2.0 22 | with: 23 | repository: kokkos/kokkos 24 | ref: 3.7.02 25 | path: kokkos 26 | - name: Build kokkos 27 | working-directory: kokkos 28 | run: | 29 | cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/kokkos -DKokkos_CXX_STANDARD=14 -DKokkos_ENABLE_${{ matrix.backend }}=ON 30 | cmake --build build --parallel 2 31 | cmake --install build 32 | - name: Checkout Cabana 33 | uses: actions/checkout@v2.2.0 34 | with: 35 | repository: ECP-copa/Cabana 36 | ref: master 37 | path: Cabana 38 | - name: Build Cabana 39 | working-directory: Cabana 40 | run: | 41 | cmake -B build -DCMAKE_INSTALL_PREFIX=$HOME/Cabana -DCMAKE_PREFIX_PATH="$HOME/kokkos" -DCabana_REQUIRE_${{ matrix.backend }}=ON 42 | cmake --build build --parallel 2 43 | cmake --install build 44 | - name: Checkout CabanaPIC 45 | uses: actions/checkout@v2.2.0 46 | - name: Build CabanaPIC EM 47 | run: | 48 | cmake -B build \ 49 | -DCMAKE_INSTALL_PREFIX=$HOME/CabanaPIC \ 50 | -DMPIEXEC_MAX_NUMPROCS=2 -DMPIEXEC_PREFLAGS="--oversubscribe" \ 51 | -DCMAKE_CXX_FLAGS="--coverage -O0 -Wall -Wextra -pedantic" \ 52 | -DCMAKE_EXE_LINKER_FLAGS="--coverage" \ 53 | -DCMAKE_SHARED_LINKER_FLAGS="--coverage" \ 54 | -DCMAKE_PREFIX_PATH="$HOME/Cabana" \ 55 | -DENABLE_TESTS=ON \ 56 | -DSOLVER_TYPE=EM \ 57 | -DREAL_TYPE=double 58 | cmake --build build --parallel 2 59 | cmake --install build 60 | - name: Test CabanaPIC EM 61 | run: | 62 | CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target test 63 | - name: Build CabanaPIC ES 64 | run: | 65 | cmake -B build \ 66 | -DCMAKE_INSTALL_PREFIX=$HOME/CabanaPIC \ 67 | -DMPIEXEC_MAX_NUMPROCS=2 -DMPIEXEC_PREFLAGS="--oversubscribe" \ 68 | -DCMAKE_CXX_FLAGS="--coverage -O0 -Wall -Wextra -pedantic" \ 69 | -DCMAKE_EXE_LINKER_FLAGS="--coverage" \ 70 | -DCMAKE_SHARED_LINKER_FLAGS="--coverage" \ 71 | -DCMAKE_PREFIX_PATH="$HOME/Cabana" \ 72 | -DENABLE_TESTS=ON \ 73 | -DSOLVER_TYPE=ES \ 74 | -DREAL_TYPE=double 75 | cmake --build build --parallel 2 76 | cmake --install build 77 | - name: Test CabanaPIC ES 78 | run: | 79 | CTEST_OUTPUT_ON_FAILURE=1 cmake --build build --target test 80 | - name: Upload Report to codecov.io 81 | uses: codecov/codecov-action@v1 82 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Cabana"] 2 | path = Cabana 3 | url = https://github.com/ECP-copa/Cabana.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | project(CabanaPIC LANGUAGES CXX VERSION 0.0.1) 3 | 4 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 5 | set(CMAKE_CXX_STANDARD 14) 6 | 7 | include_directories(${PROJECT_SOURCE_DIR}) 8 | include(GNUInstallDirs) 9 | 10 | # TODO: Tag this once we have a new release 11 | find_package(Cabana) 12 | 13 | #### User configuration Options #### 14 | option(REQUIRE_HOST ON "Build with the default host execution space.") 15 | option(ENABLE_TESTS OFF) 16 | option(ENABLE_COVERAGE_BUILD OFF) 17 | #### End User configuration Options #### 18 | 19 | ##### SET SOLVES TYPE ##### 20 | # Flag for switching between electromagnetic and electrostatic solver 21 | set(SOLVER_TYPE "EM" CACHE STRING "Selected Solver Type") 22 | set(SolverTypes EM ES) # List allowable solver types 23 | # hint the tools the allowed values 24 | set_property(CACHE SOLVER_TYPE PROPERTY STRINGS ${SolverTypes}) 25 | if (${SOLVER_TYPE} STREQUAL "EM") 26 | add_definitions(-DEM_FIELD_SOLVER=YES) 27 | elseif (${SOLVER_TYPE} STREQUAL "ES") 28 | add_definitions(-DES_FIELD_SOLVER=YES) 29 | else() 30 | message(FATAL_ERROR "SOLVER_TYPE is not supported (EM/ES only)") 31 | endif() 32 | ##### END SET SOLVES TYPE ##### 33 | 34 | ##### SET DIMENSIONALITY ##### 35 | set(DIMENSIONALITY "3" CACHE STRING "Selected Solver Type") 36 | set(ALLOWABLE_DIMENSIONS 1 2 3) # List allowable values 37 | # hint the tools the allowed values 38 | set_property(CACHE DIMENSIONALITY PROPERTY STRINGS ${ALLOWABLE_DIMENSIONS}) 39 | 40 | if (NOT ${DIMENSIONALITY} STREQUAL "3") 41 | message(FATAL_ERROR "DIMENSIONALITY != 3 not yet supported") 42 | endif() 43 | ##### END SET DIMENSIONALITY ##### 44 | 45 | ##### SET REAL_TYPE (real_t) ##### 46 | set(REAL_TYPE "float" CACHE STRING "Selected type for real numbers") 47 | set(ALLOWABLE_REALS "float" "double") # List allowable values 48 | set_property(CACHE REAL_TYPE PROPERTY STRINGS ${ALLOWABLE_REALS}) 49 | add_definitions(-DREAL_TYPE=${REAL_TYPE}) 50 | ##### END SET REAL_TYPE ##### 51 | 52 | ###### Allow user to select input deck to build against ###### 53 | set(INPUT_DECK "" CACHE STRING "Path to input deck") 54 | 55 | if (NOT ${INPUT_DECK} STREQUAL "") 56 | # TODO: normalize these paths? 57 | if(EXISTS ${PROJECT_SOURCE_DIR}/${INPUT_DECK}) 58 | add_definitions(-DUSER_INPUT_DECK=${PROJECT_SOURCE_DIR}/${INPUT_DECK}) 59 | elseif(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/${INPUT_DECK}) 60 | add_definitions(-DUSER_INPUT_DECK=${CMAKE_CURRENT_BINARY_DIR}/${INPUT_DECK}) 61 | else() 62 | message(FATAL_ERROR "Cannot find user specified input deck: ${INPUT_DECK}") 63 | endif() 64 | endif() 65 | ####### End User Deck ###### 66 | 67 | if(ENABLE_COVERAGE_BUILD) 68 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") 69 | endif(ENABLE_COVERAGE_BUILD) 70 | 71 | add_subdirectory(src) 72 | set(CabanaPIC_EXAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/example) 73 | add_subdirectory(example) 74 | 75 | ##### TESTS ###### 76 | if (ENABLE_TESTS) 77 | enable_testing() 78 | set(TEST_DIR "./tests/include") 79 | include_directories(${TEST_DIR}) 80 | add_subdirectory(tests) 81 | endif(ENABLE_TESTS) 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright 2018-2019 the Cabana authors 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | * Neither the name of the copyright holder nor the names of its 15 | contributors may be used to endorse or promote products derived from this 16 | software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #CabanaPIC 2 | 3 | ## Installation 4 | 5 | This code has two major dependencies: 6 | 7 | 1. Kokkos 8 | 2. Cabana 9 | 10 | Instructions on how to obtain and install both can be found [here](https://github.com/ECP-copa/Cabana/wiki/Build-Instructions) 11 | 12 | Once these are installed, you can configure and build this project using CMake. 13 | The only necessary configuration argument is the path to Cabana (which will 14 | also bring in Kokkos). An example build line will look something like this: 15 | 16 | ``` 17 | cmake -DCMAKE_PREFIX_PATH="$HOME/Cabana/build/install" .. 18 | ``` 19 | 20 | CabanaPIC uses the default enabled Kokkos backend (see more information 21 | [here](https://github.com/kokkos/kokkos/wiki/Initialization#51-initialization-by-command-line-arguments)). 22 | It is possible to require a CPU build by adding `-DREQUIRE_HOST=ON` (which uses 23 | the default enabled host backend). 24 | 25 | The default field solver is "EM"; to use the "ES" solver, add `-DSOLVER_TYPE="ES"`. 26 | 27 | 28 | Note that if Kokkos <=3.4 is used, building with GCC and CUDA support requires 29 | specifying the compiler wrapper: 30 | 31 | ``` 32 | cmake -DCMAKE_PREFIX_PATH="$HOME/Cabana/build/install" -DCMAKE_CXX_COMPILER=$KOKKOS_SRC_DIR/bin/nvcc_wrapper .. 33 | ``` 34 | 35 | Remember that Kokkos, Cabana, and CabanaPIC should all be built with the same 36 | compiler. 37 | 38 | ## Running 39 | 40 | Users can compile in custom input decks by specifying `INPUT_DECK` at build 41 | time, e.g: 42 | 43 | ``` 44 | cmake -DCMAKE_PREFIX_PATH="$HOME/Cabana/build/install" -DINPUT_DECK=./decks/2stream-short.cxx .. 45 | ``` 46 | 47 | Some example decks live in `./decks`. Custom decks must follow the layout put 48 | forth in `./src/input/decks.h` 49 | 50 | ## Feature Wishlist 51 | 52 | 1. Configurable to run in different precisions (real_t to configure float/double) 53 | 2. The particle data store layout should be configurable (AoS/SoA/AoSoA) 54 | 3. The particle shape function used should be configurable 55 | 56 | ## Copyright 57 | 58 | © (or copyright) 2019. Triad National Security, LLC. All rights reserved. 59 | 60 | This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear Security Administration. The Government is granted for itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. 61 | 62 | This is open source software; you can redistribute it and/or modify it under the terms of the BSD-3 License. If software is modified to produce derivative works, such modified software should be clearly marked, so as not to confuse it with the version available from LANL. 63 | -------------------------------------------------------------------------------- /decks/2particle.cxx: -------------------------------------------------------------------------------- 1 | #include "src/input/deck.h" 2 | 3 | // Override existing init_fields 4 | class Custom_Field_Initializer : public Field_Initializer { 5 | public: 6 | using real_ = real_t; 7 | 8 | // This *has* to be virtual, as we store the object as a pointer to the 9 | // base class 10 | virtual void init( 11 | field_array_t& fields, 12 | size_t nx, 13 | size_t ny, 14 | size_t nz, 15 | size_t ng, 16 | real_t Lx, // TODO: do we prefer xmin or Lx? 17 | real_t Ly, 18 | real_t Lz, 19 | real_t dx, 20 | real_t dy, 21 | real_t dz 22 | ) 23 | { 24 | std::cout << "Using Custom field Initialization" << std::endl; 25 | 26 | auto ex = Cabana::slice(fields); 27 | auto ey = Cabana::slice(fields); 28 | auto ez = Cabana::slice(fields); 29 | 30 | auto cbx = Cabana::slice(fields); 31 | auto cby = Cabana::slice(fields); 32 | auto cbz = Cabana::slice(fields); 33 | 34 | 35 | real_t x0 = 0; 36 | real_t hx = Lx/nx; 37 | real_t xp1=0.6; //-0.5*hx; 38 | real_t xp2=0.8; //+0.5*hx; 39 | 40 | real_t phi[nx+2], Ex[nx+2]; 41 | 42 | //real_t wi = Lx/(nx*2.0); //particle weight (how to have it here?) 43 | real_t wi = 1./2.; 44 | 45 | // for (int i=0; ixp1&&xcxp2) 54 | // phi[i] = (xp1+xp2-(xp1+xp2)*xc)*wi; 55 | 56 | // // printf("%d %e %e ", i, xc,phi[i]); 57 | // } 58 | // for (int i=1; i=xp1&&xn<=xp2) 68 | phi[i] = (1.0-xn)*xp1*wi + xn*(1.0-xp2)*wi + (xn*xn-xn)*wi; 69 | else if(xnxp2) 72 | phi[i] = (1.0-xn)*xp1*wi + xp2*(1.0-xn)*wi + (xn*xn-xn)*wi; 73 | } 74 | for (size_t i=1; i(particles); 145 | auto position_y = Cabana::slice(particles); 146 | auto position_z = Cabana::slice(particles); 147 | 148 | auto velocity_x = Cabana::slice(particles); 149 | auto velocity_y = Cabana::slice(particles); 150 | auto velocity_z = Cabana::slice(particles); 151 | 152 | auto weight = Cabana::slice(particles); 153 | auto cell = Cabana::slice(particles); 154 | 155 | real_t hx = Lx/nx; 156 | //real_t hy = Ly/ny; 157 | //real_t hz = Lz/nz; 158 | real_t xmin = 0; //-0.5*Lx; 159 | //real_t ymin = 0; //-0.5*Ly; 160 | 161 | #define rand_float(min, max) (min + (max-min)*rand()/RAND_MAX) 162 | 163 | auto _init = 164 | KOKKOS_LAMBDA( const int s, const int i ) 165 | { 166 | // Initialize position. 167 | size_t pi = (s)*particle_list_t::vector_length+i; 168 | real_t xp1=0.6; //-0.5*hx; //those two numbers are also used in field init 169 | real_t xp2=0.8; //+0.5*hx; 170 | //2 particles only 171 | size_t ix, iy, iz; 172 | real_t x, y, z; 173 | if(pi==0){ 174 | x = xp1; 175 | x= (x-xmin)/hx; 176 | 177 | ix = (size_t) x; 178 | iy = 1; 179 | iz = 1; 180 | x = 1-0.5*hx; 181 | y = 0; 182 | z = 0; 183 | // ix++; 184 | }else{ 185 | x = xp2; 186 | x= (x-xmin)/hx; 187 | ix = (size_t) x; 188 | iy = 1; 189 | iz = 1; 190 | x = -1+0.5*hx; 191 | y = 0; 192 | z = 0; 193 | ix++; 194 | } 195 | 196 | position_x.access(s,i) = x; 197 | position_y.access(s,i) = y; 198 | position_z.access(s,i) = z; 199 | 200 | cell.access(s,i) = VOXEL(ix,iy,iz,nx,ny,nz,ng); //needs to be more general 201 | 202 | weight.access(s,i) = w; 203 | 204 | velocity_x.access(s,i) = 0; //sign * v0 *gam*(1.0+na*sign); //0; 205 | velocity_y.access(s,i) = 0; 206 | velocity_z.access(s,i) = 0; 207 | 208 | //std::cout << "Placing particles as " 209 | //<< x << ", " << y << ", " << z << " with u=0 in cell " << cell.access(s,i) << " with w " << w << std::endl; 210 | }; 211 | 212 | Cabana::SimdPolicy 213 | vec_policy( 0, particles.size() ); 214 | Cabana::simd_parallel_for( vec_policy, _init, "init()" ); 215 | } 216 | }; 217 | 218 | Input_Deck::Input_Deck() 219 | { 220 | field_initer = new Custom_Field_Initializer(); 221 | particle_initer = new Custom_Particle_Initializer(); 222 | 223 | // User puts initialization code here 224 | nx = 1000; 225 | ny = 1; 226 | nz = 1; 227 | 228 | num_steps = 200000; 229 | nppc = 1; 230 | 231 | v0 = 0.0; 232 | 233 | const real_t default_grid_len = 1.0; 234 | 235 | // const real_t a = 0.1; 236 | len_x_global = default_grid_len; //16*a; 237 | len_y_global = default_grid_len; //16*a; 238 | len_z_global = default_grid_len; 239 | 240 | n0 = 1.0; 241 | Npe = n0*len_x_global*len_y_global*len_z_global; 242 | 243 | dt = 0.99*courant_length( 244 | len_x_global, len_y_global, len_z_global, 245 | nx, ny, nz 246 | ) / c; 247 | 248 | ec = 1.0; 249 | qsp = ec; 250 | me = qsp; 251 | 252 | Ne = 2; 253 | 254 | num_particles = 2; 255 | 256 | } 257 | -------------------------------------------------------------------------------- /decks/2stream-short.cxx: -------------------------------------------------------------------------------- 1 | #include "src/input/deck.h" 2 | // For a list of available global variables, see `src/input/deck.h`, common ones include: 3 | /* 4 | real_ de = 1.0; // Length normalization (electron inertial length) 5 | real_ ec = 1.0; // Charge normalization 6 | real_ me = 1.0; // Mass normalization 7 | real_ mu = 1.0; // permeability of free space 8 | real_ c = 1.0; // Speed of light 9 | real_ eps = 1.0; // permittivity of free space 10 | real_ n0 = 1.0; // Background plasma density 11 | size_t nx = 16; 12 | size_t ny = 1; 13 | size_t nz = 1; 14 | size_t nppc = 1; 15 | double dt = 1.0; 16 | int num_steps = 2; 17 | real_ len_x_global = 1.0; 18 | real_ len_y_global = 1.0; 19 | real_ len_z_global = 1.0; 20 | real_ v0 = 1.0; //drift velocity 21 | size_t num_ghosts = 1; 22 | (len_x and dx will automatically be set) 23 | */ 24 | // I would rather decalare this as a class, not just as a constructor, but that 25 | // would have to be in a header (which would stop the compile detecting 26 | // changes...). This is fine for now. 27 | Input_Deck::Input_Deck() 28 | { 29 | // User puts initialization code here 30 | nx = 32; 31 | ny = 1; 32 | nz = 1; 33 | 34 | num_steps = 3000; 35 | nppc = 100; 36 | 37 | v0 = 0.2; 38 | 39 | // Can also create temporaries 40 | real_ gam = 1.0 / sqrt(1.0 - v0*v0); 41 | 42 | const real_t default_grid_len = 1.0; 43 | 44 | len_x_global = 3.14159265358979*0.5; // TODO: use proper PI? 45 | len_y_global = default_grid_len; 46 | len_z_global = default_grid_len; 47 | 48 | Npe = n0*len_x_global*len_y_global*len_z_global; 49 | 50 | dt = 0.99*courant_length( 51 | len_x_global, len_y_global, len_z_global, 52 | nx, ny, nz 53 | ) / c; 54 | 55 | n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1 56 | } 57 | -------------------------------------------------------------------------------- /decks/custom_init.cxx: -------------------------------------------------------------------------------- 1 | #include "src/input/deck.h" 2 | // For a list of available global variables, see `src/input/deck.h`, common ones include: 3 | /* 4 | real_ de = 1.0; // Length normalization (electron inertial length) 5 | real_ ec = 1.0; // Charge normalization 6 | real_ me = 1.0; // Mass normalization 7 | real_ mu = 1.0; // permeability of free space 8 | real_ c = 1.0; // Speed of light 9 | real_ eps = 1.0; // permittivity of free space 10 | real_ n0 = 1.0; // Background plasma density 11 | size_t nx = 16; 12 | size_t ny = 1; 13 | size_t nz = 1; 14 | size_t nppc = 1; 15 | double dt = 1.0; 16 | int num_steps = 2; 17 | real_ len_x_global = 1.0; 18 | real_ len_y_global = 1.0; 19 | real_ len_z_global = 1.0; 20 | real_ v0 = 1.0; //drift velocity 21 | size_t num_ghosts = 1; 22 | (len_x and dx will automatically be set) 23 | */ 24 | // I would rather decalare this as a class, not just as a constructor, but that 25 | // would have to be in a header (which would stop the compile detecting 26 | // changes...). This is fine for now. 27 | 28 | // Override existing init_particles 29 | class Custom_Particle_Initializer : public Particle_Initializer { 30 | public: 31 | using real_ = real_t; 32 | 33 | // This *has* to be virtual, as we store the object as a pointer to the 34 | // base class 35 | virtual void init( 36 | particle_list_t& particles, 37 | size_t nx, 38 | size_t ny, 39 | size_t, 40 | size_t ng, 41 | real_ dxp, 42 | size_t nppc, 43 | real_ w, 44 | real_ v0, 45 | real_, 46 | real_, 47 | real_ 48 | ) override 49 | { 50 | std::cout << "Using Custom Particle Initialization" << std::endl; 51 | 52 | auto position_x = Cabana::slice(particles); 53 | auto position_y = Cabana::slice(particles); 54 | auto position_z = Cabana::slice(particles); 55 | 56 | auto velocity_x = Cabana::slice(particles); 57 | auto velocity_y = Cabana::slice(particles); 58 | auto velocity_z = Cabana::slice(particles); 59 | 60 | auto weight = Cabana::slice(particles); 61 | auto cell = Cabana::slice(particles); 62 | 63 | // TODO: sensible way to do rand in parallel? 64 | //srand (static_cast (time(0))); 65 | 66 | auto _init = 67 | KOKKOS_LAMBDA( const int s, const int i ) 68 | { 69 | // Initialize position. 70 | int sign = -1; 71 | size_t pi2 = (s)*particle_list_t::vector_length+i; 72 | size_t pi = ((pi2) / 2); 73 | if (pi2%2 == 0) { 74 | sign = 1; 75 | } 76 | size_t pic = (2*pi)%nppc; //Every 2 particles have the same "pic". 77 | 78 | real_ x = pic*dxp+0.5*dxp-1.0; 79 | size_t pre_ghost = (2*pi/nppc); //pre_gohost ranges [0,nx*ny*nz). 80 | 81 | position_x.access(s,i) = x; 82 | position_y.access(s,i) = 0.0; 83 | position_z.access(s,i) = 0.0; 84 | 85 | weight.access(s,i) = w; 86 | 87 | int ix,iy,iz; 88 | ix = pre_ghost+1; 89 | iy = 1; 90 | iz = 1; 91 | cell.access(s,i) = VOXEL(ix,iy,iz,nx,ny,nz,ng); 92 | 93 | // Initialize velocity.(each cell length is 2) 94 | real_t nax = 0.0001*sin(2.0*3.1415926*((x+1.0+ix*2)/(2*nx))); 95 | real_ gam = 1.0/sqrt(1.0-v0*v0); 96 | velocity_x.access(s,i) = sign * v0*gam*(1.0+nax); //0; 97 | velocity_y.access(s,i) = 0; 98 | velocity_z.access(s,i) = 0; //na*sign; //sign * v0 *gam*(1.0+na*sign); 99 | //if(pi<100) printf("%d %d %d pre-g %d putting particle at x=%e with ux = %e ix = %d, pi = %d \n", pic, s, i, pre_ghost, position_x.access(s,i), velocity_x.access(s,i), ix, cell.access(s,i) ); 100 | }; 101 | 102 | Cabana::SimdPolicy 103 | vec_policy( 0, particles.size() ); 104 | Cabana::simd_parallel_for( vec_policy, _init, "init()" ); 105 | } 106 | }; 107 | 108 | Input_Deck::Input_Deck() 109 | { 110 | // User puts initialization code here 111 | 112 | std::cout << "Custom Input_Deck constructor" << std::endl; 113 | // Tell the deck to use the custom initer in place of the default 114 | particle_initer = new Custom_Particle_Initializer(); 115 | 116 | nx = 32; 117 | ny = 1; 118 | nz = 1; 119 | 120 | num_steps = 30; 121 | nppc = 100; 122 | 123 | v0 = 0.0866025403784439; 124 | 125 | // Can also create local temporaries 126 | real_ gam = 1.0 / sqrt(1.0 - v0*v0); 127 | 128 | const real_t default_grid_len = 1.0; 129 | 130 | len_x_global = 6.28318530717959*(gam*sqrt(gam)); 131 | len_y_global = default_grid_len; 132 | len_z_global = default_grid_len; 133 | 134 | Npe = n0*len_x_global*len_y_global*len_z_global; 135 | 136 | dt = 0.99*courant_length( 137 | len_x_global, len_y_global, len_z_global, 138 | nx, ny, nz 139 | ) / c; 140 | 141 | n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1 142 | } 143 | -------------------------------------------------------------------------------- /decks/dioctron_3d.cxx: -------------------------------------------------------------------------------- 1 | #include "src/input/deck.h" 2 | 3 | // Override existing init_fields 4 | class Custom_Field_Initializer : public Field_Initializer { 5 | public: 6 | using real_ = real_t; 7 | 8 | // This *has* to be virtual, as we store the object as a pointer to the 9 | // base class 10 | virtual void init( 11 | field_array_t& fields, 12 | size_t nx, 13 | size_t ny, 14 | size_t nz, 15 | size_t ng, 16 | real_t Lx, // TODO: do we prefer xmin or Lx? 17 | real_t Ly, 18 | real_t Lz, 19 | real_t dx, 20 | real_t dy, 21 | real_t dz 22 | ) 23 | { 24 | std::cout << "Using Custom field Initialization" << std::endl; 25 | 26 | auto ex = Cabana::slice(fields); 27 | auto ey = Cabana::slice(fields); 28 | auto ez = Cabana::slice(fields); 29 | 30 | auto cbx = Cabana::slice(fields); 31 | auto cby = Cabana::slice(fields); 32 | auto cbz = Cabana::slice(fields); 33 | 34 | real_t b0 = sqrt(20.0); 35 | real_t a = 0.1; 36 | 37 | real_t xmin = -0.5*Lx; 38 | real_t ymin = -0.5*Ly; 39 | 40 | auto _init_fields = 41 | KOKKOS_LAMBDA( const int i ) 42 | { 43 | ex(i) = 0.0; 44 | ey(i) = 0.0; 45 | ez(i) = 0.0; 46 | cbx(i) = 0.0; 47 | cby(i) = 0.0; 48 | cbz(i) = b0; 49 | size_t ix,iy,iz; 50 | RANK_TO_INDEX(i, ix,iy,iz,nx+2*ng,ny+2*ng); 51 | real_t y = ymin + (iy-0.5)*dy; 52 | 53 | if(y<-a) { 54 | ey(i) = a; 55 | } 56 | else if(y>a) { 57 | ey(i) =-a; 58 | } 59 | else { 60 | ey(i) =-y; 61 | } 62 | //printf("%d %e %e\n",iy,y,ey(i)); 63 | 64 | }; 65 | 66 | Kokkos::parallel_for( fields.size(), _init_fields, "init_fields()" ); 67 | } 68 | }; 69 | 70 | // Override existing init_particles 71 | class Custom_Particle_Initializer : public Particle_Initializer { 72 | public: 73 | using real_ = real_t; 74 | 75 | // This *has* to be virtual, as we store the object as a pointer to the 76 | // base class 77 | virtual void init( 78 | particle_list_t& particles, 79 | size_t nx, 80 | size_t ny, 81 | size_t nz, 82 | size_t ng, 83 | real_ dxp, 84 | size_t nppc, 85 | real_ w, 86 | real_ v0, 87 | real_ Lx, // TODO: is there a better way to pass/read global lens? 88 | real_ Ly, 89 | real_ Lz 90 | ) 91 | { 92 | std::cout << "Using Custom Particle Initialization" << std::endl; 93 | std::cout << "Lx = " << Lx << " Ly " << Ly << " Lz " << Lz << std::endl; 94 | 95 | auto position_x = Cabana::slice(particles); 96 | auto position_y = Cabana::slice(particles); 97 | auto position_z = Cabana::slice(particles); 98 | 99 | auto velocity_x = Cabana::slice(particles); 100 | auto velocity_y = Cabana::slice(particles); 101 | auto velocity_z = Cabana::slice(particles); 102 | 103 | auto weight = Cabana::slice(particles); 104 | auto cell = Cabana::slice(particles); 105 | 106 | real_t hx = Lx/nx; 107 | real_t hy = Ly/ny; 108 | real_t hz = Lz/nz; 109 | real_t xmin = -0.5*Lx; 110 | real_t ymin = -0.5*Ly; 111 | 112 | #define rand_float(min, max) (min + (max-min)*rand()/RAND_MAX) 113 | 114 | auto _init = 115 | KOKKOS_LAMBDA( const int s, const int i ) 116 | { 117 | // Initialize position. 118 | size_t pi = (s)*particle_list_t::vector_length+i; 119 | size_t pic = (pi)%nppc; 120 | 121 | size_t ix, iy, iz; 122 | real_t x, y, z; 123 | x = rand_float(-0.5*Lx,0.5*Lx); 124 | x= (x-xmin)/hx; 125 | ix = (size_t) x; 126 | x -= (real_t) ix; 127 | x = x+x-1; 128 | if(ix==nx) x = 1; 129 | if(ix==nx) ix = nx-1; 130 | 131 | y = rand_float(-0.1f, 0.1f); //a = 0.1 132 | y = (y-ymin)/hy; 133 | iy = (size_t) y; 134 | y -= (real_t) iy; 135 | y = y+y-1; 136 | if(iy==ny) y = 1; 137 | if(iy==ny) iy = ny-1; 138 | 139 | z = 0; 140 | iz = 0; 141 | 142 | position_x.access(s,i) = x; 143 | position_y.access(s,i) = y; 144 | position_z.access(s,i) = z; 145 | 146 | cell.access(s,i) = VOXEL(ix+1,iy+1,iz+1,nx,ny,nz,ng); //needs to be more general 147 | 148 | weight.access(s,i) = w; 149 | 150 | real_t na = 0; //0.0001*sin(2.0*3.1415926*((x+1.0+pre_ghost*2)/(2*ny))); 151 | 152 | real_t gam = 1.0/sqrt(1.0-v0*v0); 153 | velocity_x.access(s,i) = 0; //sign * v0 *gam*(1.0+na*sign); //0; 154 | velocity_y.access(s,i) = 0; 155 | velocity_z.access(s,i) = 0; 156 | 157 | //std::cout << "Placing particles as " 158 | //<< x << ", " << y << ", " << z << " with u=0 in cell " << cell.access(s,i) << " with w " << w << std::endl; 159 | }; 160 | 161 | Cabana::SimdPolicy 162 | vec_policy( 0, particles.size() ); 163 | Cabana::simd_parallel_for( vec_policy, _init, "init()" ); 164 | } 165 | }; 166 | 167 | Input_Deck::Input_Deck() 168 | { 169 | field_initer = new Custom_Field_Initializer(); 170 | particle_initer = new Custom_Particle_Initializer(); 171 | 172 | // User puts initialization code here 173 | nx = 64; 174 | ny = 64; 175 | nz = 1; 176 | 177 | num_steps = 20000; 178 | nppc = 5; // Gy has 40 and then does /8? 179 | 180 | v0 = 0.0; 181 | 182 | // Can also create temporaries 183 | real_ gam = 1.0 / sqrt(1.0 - v0*v0); 184 | 185 | const real_t default_grid_len = 1.0; 186 | 187 | const real_t a = 0.1; 188 | len_x_global = 16*a; 189 | len_y_global = 16*a; 190 | len_z_global = default_grid_len; 191 | 192 | Npe = n0*len_x_global*0.2*len_z_global; 193 | 194 | dt = 0.99*courant_length( 195 | len_x_global, len_y_global, len_z_global, 196 | nx, ny, nz 197 | ) / c; 198 | 199 | n0 = 1.0; //for 2stream, for 2 species, making sure omega_p of each species is 1 200 | } 201 | -------------------------------------------------------------------------------- /decks/vpic/2stream-em0.cxx: -------------------------------------------------------------------------------- 1 | // Magnetic reconnection in a Harris equilibrium thin current sheet 2 | // 3 | // This input deck reproduces the PIC simulations found in: 4 | // William Daughton. "Nonlinear dynamics of thin current sheets." Phys. 5 | // Plasmas. 9(9): 3668-3678. September 2002. 6 | // 7 | // This input deck was written by: 8 | // Kevin J Bowers, Ph.D. 9 | // Plasma Physics Group (X-1) 10 | // Applied Physics Division 11 | // Los Alamos National Lab 12 | // August 2003 - original version 13 | // October 2003 - heavily revised to utilize input deck syntactic sugar 14 | // March/April 2004 - rewritten for domain decomposition V4PIC 15 | 16 | // If you want to use global variables (for example, to store the dump 17 | // intervals for your diagnostics section), it must be done in the globals 18 | // section. Variables declared the globals section will be preserved across 19 | // restart dumps. For example, if the globals section is: 20 | // begin_globals { 21 | // double variable; 22 | // } end_globals 23 | // the double "variable" will be visible to other input deck sections as 24 | // "global->variable". Note: Variables declared in the globals section are set 25 | // to zero before the user's initialization block is executed. Up to 16K 26 | // of global variables can be defined. 27 | 28 | begin_globals { 29 | double energies_interval; 30 | double fields_interval; 31 | double ehydro_interval; 32 | double ihydro_interval; 33 | double eparticle_interval; 34 | double iparticle_interval; 35 | double restart_interval; 36 | }; 37 | 38 | begin_initialization { 39 | // At this point, there is an empty grid and the random number generator is 40 | // seeded with the rank. The grid, materials, species need to be defined. 41 | // Then the initial non-zero fields need to be loaded at time level 0 and the 42 | // particles (position and momentum both) need to be loaded at time level 0. 43 | 44 | // Arguments can be passed from the command line to the input deck 45 | // if( num_cmdline_arguments!=3 ) { 46 | // sim_log( "Usage: " << cmdline_argument[0] << " mass_ratio seed" ); 47 | // abort(0); 48 | // } 49 | seed_entropy(1); //seed_entropy( atoi( cmdline_argument[2] ) ); 50 | 51 | // Diagnostic messages can be passed written (usually to stderr) 52 | sim_log( "Computing simulation parameters"); 53 | 54 | // Define the system of units for this problem (natural units) 55 | //double L = 1; // Length normalization (sheet thickness) 56 | double de = 1; // Length normalization (electron inertial length) 57 | double ec = 1; // Charge normalization 58 | double me = 1; // Mass normalization 59 | double c = 1; // Speed of light 60 | double eps0 = 1; // Permittivity of space 61 | 62 | // Physics parameters 63 | double mi_me = 1; //1836; //25; //atof(cmdline_argument[1]); // Ion mass / electron mass 64 | double vthe = 0; //0.0424264068711; //0.424264068711; // Electron thermal velocity 65 | double vthi = 0; //0.0424264068711; //0.424264068711; // Ion thermal velocity 66 | //double vthex =0; //0.0141421356237; // 0.141421356237; // Electron thermal velocity in x-direction. 67 | //double vthix =0; //0.0141421356237; // 0.141421356237;Ion thermal velocity in x-direction. 68 | 69 | double v0e = 0.0866025403784439; //*4.0; //*4; //drift velocity 70 | double v0i =-0.0866025403784439; //*4.0; //*4; //drift velocity 71 | double gam = 1.0/sqrt(1.0-v0e*v0e); 72 | v0e *= gam; 73 | v0i *= gam; 74 | 75 | double n0 = 1.0; // Background plasma density 76 | double b0 = 0.0; // In plane magnetic field. 77 | //double bg = 0.0; // Guide field magnitude 78 | double tauwpe = 200000; // simulation wpe's to run 79 | 80 | // Numerical parameters 81 | double topology_x = nproc(); // Number of domains in x, y, and z 82 | double topology_y = 1; 83 | double topology_z = 1; // For load balance, best to keep "1" or "2" for Harris sheet 84 | double Lx = 1; //*4.0; //4.62*de; //6.7*de; //10.0*de; // How big should the box be in the x direction 85 | double Ly = 0.628318530717959*(gam*sqrt(gam)); //0.0721875*de; // How big should the box be in the y direction 86 | double Lz = 1; //0.0721875*de; // How big should the box be in the z direction 87 | double nx = 1; // Global resolution in the x direction 88 | double ny = 32; // Global resolution in the y direction 89 | double nz = 1; //32; // Global resolution in the z direction 90 | double nppc = 50; //125; //800; //200; //2048; //1024; //128; // Average number of macro particles per cell (both species combined!) 91 | double cfl_req = 0.99f; //0.99; // How close to Courant should we try to run 92 | double wpedt_max = 0.36; // How big a timestep is allowed if Courant is not too restrictive 93 | double damp = 0.0; // Level of radiation damping 94 | 95 | 96 | // Derived quantities 97 | double mi = me*mi_me; // Ion mass 98 | double wpe = c/de; // electron plasma frequency 99 | double wpi = wpe/sqrt(mi_me); // ion plasma frequency 100 | double di = c/wpi; // ion inertial length 101 | 102 | double hx = Lx/nx; 103 | double hy = Ly/ny; 104 | double hz = Lz/nz; 105 | 106 | double Npe = n0*Ly*Lz*Lx; // Number physical electrons. 107 | double Npi = Npe; // Number of physical ions in box 108 | double Ne = nppc*nx*ny*nz; // total macro electrons in box 109 | 110 | 111 | Ne = trunc_granular(Ne,nproc()); 112 | double Ni = Ne; // Total macro ions in box 113 | //double qe = -ec*Npe/Ne; // Charge per macro electron 114 | //double qi = -ec*Npe/Ne; // Charge per macro electron 115 | 116 | double we = Npe/Ne; // Weight of a macro electron 117 | double wi = Npi/Ni; // Weight of a macro ion 118 | 119 | printf("Npe %e Ne %e we %e \n", Npe, Ne, we); 120 | 121 | // Determine the timestep 122 | double dg = courant_length(Lx,Ly,Lz,nx,ny,nz); // Courant length 123 | double dt = cfl_req*dg/c; // Courant limited time step 124 | // printf("in harris.cxx: dt=%.7f\n", dt); 125 | // exit(1); 126 | if( wpe*dt>wpedt_max ) dt=wpedt_max/wpe; // Override time step if plasma frequency limited 127 | 128 | //////////////////////////////////////// 129 | // Setup high level simulation parmeters 130 | 131 | num_step = 6000; //1200; // int(tauwpe/(wpe*dt)); 132 | status_interval = 0; //2000; 133 | sync_shared_interval = 0; //status_interval; 134 | clean_div_e_interval = 0; //turn off cleaning (GY)//status_interval; 135 | clean_div_b_interval = 0; //status_interval; //(GY) 136 | 137 | global->energies_interval = 1; //status_interval; 138 | global->fields_interval = status_interval; 139 | global->ehydro_interval = status_interval; 140 | global->ihydro_interval = status_interval; 141 | global->eparticle_interval = status_interval; // Do not dump 142 | global->iparticle_interval = status_interval; // Do not dump 143 | global->restart_interval = status_interval; // Do not dump 144 | 145 | /////////////////////////// 146 | // Setup the space and time 147 | 148 | // Setup basic grid parameters 149 | define_units( c, eps0 ); 150 | define_timestep( dt ); 151 | grid->dx = hx; 152 | grid->dy = hy; 153 | grid->dz = hz; 154 | grid->dt = dt; 155 | grid->cvac = c; 156 | //grid->damp = damp; 157 | double gx0 = 0; 158 | double gy0 = 0; //-0.5*Ly; 159 | double gz0 = 0; //-0.5*Lz; 160 | double gx1 = Lx; 161 | double gy1 = Ly; 162 | double gz1 = Lz; 163 | 164 | define_periodic_grid( gx0, gy0, gz0, // Low corner 165 | gx1, gy1, gz1, // High corner 166 | nx, ny, nz, // Resolution 167 | topology_x, topology_y, topology_z); // Topology 168 | // Parition a periodic box among the processors sliced uniformly along y 169 | // define_periodic_grid( -0.5*Lx, 0, 0, // Low corner 170 | // 0.5*Lx, Ly, Lz, // High corner 171 | // nx, ny, nz, // Resolution 172 | // 1, nproc(), 1 ); // Topology 173 | // define_periodic_grid( 0, -0.5*Ly, -0.5*Lz, // Low corner 174 | // Lx, 0.5*Ly, 0.5*Lz, // High corner 175 | // nx, ny, nz, // Resolution 176 | // topology_x, topology_y, topology_z); // Topology 177 | 178 | // printf("in harris.cxx: g->neighbor[6*265]=%jd\n", grid->neighbor[6*265]); 179 | // Override some of the boundary conditions to put a particle reflecting 180 | // perfect electrical conductor on the -x and +x boundaries 181 | // set_domain_field_bc( BOUNDARY(-1,0,0), pec_fields ); 182 | // set_domain_field_bc( BOUNDARY( 1,0,0), pec_fields ); 183 | // set_domain_particle_bc( BOUNDARY(-1,0,0), reflect_particles ); 184 | // set_domain_particle_bc( BOUNDARY( 1,0,0), reflect_particles ); 185 | 186 | define_material( "vacuum", 1 ); 187 | // Note: define_material defaults to isotropic materials with mu=1,sigma=0 188 | // Tensor electronic, magnetic and conductive materials are supported 189 | // though. See "shapes" for how to define them and assign them to regions. 190 | // Also, space is initially filled with the first material defined. 191 | 192 | // If you pass NULL to define field array, the standard field array will 193 | // be used (if damp is not provided, no radiation damping will be used). 194 | define_field_array( NULL, damp ); 195 | 196 | //////////////////// 197 | // Setup the species 198 | 199 | // Allow 50% more local_particles in case of non-uniformity 200 | // VPIC will pick the number of movers to use for each species 201 | // Both species use out-of-place sorting 202 | // species_t * ion = define_species( "ion", ec, mi, 1.5*Ni/nproc(), -1, 40, 1 ); 203 | // species_t * electron = define_species( "electron", -ec, me, 1.5*Ne/nproc(), -1, 20, 1 ); 204 | //species_t *electron = define_species("electron",-ec,me,2.4*Ne/nproc(),-1,25,0); 205 | //species_t *ion = define_species("ion", ec,mi,2.4*Ne/nproc(),-1,25,0); 206 | 207 | species_t *electron = define_species("electron",-ec,me,3*Ne/nproc(),-1,0,0); //turn off sorting (GY) 208 | species_t *ion = define_species("ion", -ec,mi,3*Ne/nproc(),-1,0,0); //(GY) 209 | 210 | /////////////////////////////////////////////////// 211 | // Log diagnostic information about this simulation 212 | 213 | sim_log( "***********************************************" ); 214 | sim_log ( "Npe " << Npe ); 215 | sim_log ( "Ne " << Ne ); 216 | sim_log ( "we " << we ); 217 | sim_log ( "mi/me = " << mi_me ); 218 | sim_log ( "tauwpe = " << tauwpe ); 219 | sim_log ( "num_step = " << num_step ); 220 | sim_log ( "Lx/di = " << Lx/di ); 221 | sim_log ( "Lx/de = " << Lx/de ); 222 | sim_log ( "Ly/di = " << Ly/di ); 223 | sim_log ( "Ly/de = " << Ly/de ); 224 | sim_log ( "Lz/di = " << Lz/di ); 225 | sim_log ( "Lz/de = " << Lz/de ); 226 | sim_log ( "nx = " << nx ); 227 | sim_log ( "ny = " << ny ); 228 | sim_log ( "nz = " << nz ); 229 | sim_log ( "damp = " << damp ); 230 | sim_log ( "courant = " << c*dt/dg ); 231 | sim_log ( "nproc = " << nproc () ); 232 | sim_log ( "nppc = " << nppc ); 233 | sim_log ( " b0 = " << b0 ); 234 | sim_log ( " di = " << di ); 235 | sim_log ( " Ne = " << Ne ); 236 | sim_log ( "total # of particles = " << 2*Ne ); 237 | sim_log ( "dt*wpe = " << wpe*dt ); 238 | sim_log ( "dx/de = " << Lx/(de*nx) ); 239 | sim_log ( "dy/de = " << Ly/(de*ny) ); 240 | sim_log ( "dz/de = " << Lz/(de*nz) ); 241 | sim_log ( "dx/debye = " << (Lx/nx)/(vthe/wpe) ); 242 | sim_log ( "n0 = " << n0 ); 243 | sim_log ( "vthi/c = " << vthi/c ); 244 | sim_log ( "vthe/c = " << vthe/c ); 245 | sim_log( "" ); 246 | 247 | //////////////////////////// 248 | // Load fields and particles 249 | 250 | // sim_log( "Loading fields" ); 251 | 252 | // set_region_field( everywhere, 0, 0, 0, // Electric field 253 | // 0, -sn*b0*tanh(x/L), cs*b0*tanh(x/L) ); // Magnetic field 254 | // Note: everywhere is a region that encompasses the entire simulation 255 | // In general, regions are specied as logical equations (i.e. x>0 && x+y<2) 256 | 257 | sim_log( "Loading particles" ); 258 | 259 | // Do a fast load of the particles 260 | //seed_rand( rng_seed*nproc() + rank() ); //Generators desynchronized 261 | double xmin = grid->x0 , xmax = grid->x1; 262 | double ymin = grid->y0 , ymax = grid->y1; 263 | double zmin = grid->z0 , zmax = grid->z1; 264 | 265 | // printf("rank=%d,xmin=%.14f,xmax=%.14f,dx=%.14f,nx=%d\n",rank(),grid->x0,grid->x1,grid->dx,grid->nx); 266 | // printf("rank=%d,xmin=%.14f,xmax=%.14f\n",rank(),xmin,xmax); 267 | // printf("rank=%d,xmin=%.14f,ymin=%.14f,zmin=%.14f\n",rank(),xmin,ymin,zmin); 268 | // printf("rank=%d,xmax=%.14f,ymax=%.14f,zmax=%.14f\n",rank(),xmax,ymax,zmax); 269 | // printf("rank=%d,gx0=%.14f,gy0=%.14f,gz0=%.14f\n",rank(),gx0,gy0,gz0); 270 | // printf("rank=%d,gx1=%.14f,gy1=%.14f,gz1=%.14f\n",rank(),gx1,gy1,gz1); 271 | sim_log( "-> Uniform Bi-Maxwellian" ); 272 | //int seed = 1; 273 | //int seedn= 1; 274 | double n1,n2,n3,n4,n5,n6; 275 | //int signx,signy,signz; 276 | int Nlocal=0; 277 | double dxp=Ly/Ne; 278 | int ip=0; 279 | repeat ( Ne ) { 280 | double y = (ip+0.5)*dxp; 281 | ip++; 282 | //double x = uniform2( gx0, gx1 , seed ); 283 | double x = 0.5*Lx; //uniform2( gy0, gy1 , seed ); 284 | double z = 0.5*Lz; //uniform2( gz0, gz1 , seed ); 285 | // double x = uniform( rng(0), xmin, xmax ); 286 | // double y = uniform( rng(0), ymin, ymax ); 287 | // double z = uniform( rng(0), zmin, zmax ); 288 | // double x = uniform( rng(0), gx0, gx1 ); 289 | // double y = uniform( rng(0), gy0, gy1 ); 290 | // double z = uniform( rng(0), gz0, gz1 ); 291 | // n1 = normal(rng(0),v0e,vthex); 292 | // n2 = normal(rng(0),0,vthe ); 293 | // n3 = normal(rng(0),0,vthe ); 294 | // n4 = normal(rng(0),v0i,vthix); 295 | // n5 = normal(rng(0),0,vthi ); 296 | // n6 = normal(rng(0),0,vthi ); 297 | n1 = v0e; 298 | n2 = 0; 299 | n3 = 0; 300 | n4 = v0i; 301 | n5 = 0; 302 | n6 = 0; 303 | //mpi reproducing serial 304 | if(xxmax||yymax||zzmax) continue; 305 | 306 | double na = 1e-4*sin(2.0*3.1415926*y/Ly); 307 | inject_particle( electron, x, y, z, 308 | n1*(1.0+na), 309 | n2, 310 | n3,we, 0, 0); 311 | 312 | 313 | 314 | inject_particle( ion, x, y, z, 315 | n4*(1.0-na), 316 | n5, 317 | n6,wi, 0 ,0 ); 318 | Nlocal++; 319 | } 320 | 321 | // //quiet start 322 | // repeat ( Ne/8 ) { 323 | // double x = uniform2( gx0, gx1 , seed ); 324 | // double y = uniform2( gy0, gy1 , seed ); 325 | // double z = uniform2( gz0, gz1 , seed ); 326 | // n1 = v0e; 327 | // n2 = 0; 328 | // n3 = 0; 329 | // n4 = v0i; 330 | // n5 = 0; 331 | // n6 = 0; 332 | 333 | // signx = -1; 334 | // signy = -1; 335 | // signz = -1; 336 | // for(int i=0; i<2; i++){ 337 | // signx = -signx; 338 | // for(int j=0; j<2; j++){ 339 | // signy = -signy; 340 | // for(int k=0; k<2; k++){ 341 | // signz = -signz; 342 | // inject_particle( electron, x, y, z, 343 | // n1*signx, 344 | // n2*signy, 345 | // n3*signz,we, 0, 0); 346 | // inject_particle( ion, x, y, z, 347 | // n4*signx, 348 | // n5*signy, 349 | // n6*signz,wi, 0 ,0 ); 350 | 351 | // } 352 | // } 353 | // } 354 | 355 | 356 | // signx = -1; 357 | // signy = -1; 358 | // signz = -1; 359 | // for(int i=0; i<2; i++){ 360 | // signx = -signx; 361 | // for(int j=0; j<2; j++){ 362 | // signy = -signy; 363 | // for(int k=0; k<2; k++){ 364 | // signz = -signz; 365 | 366 | // } 367 | // } 368 | // } 369 | 370 | //} 371 | // printf("Nlocal=%d (of %f)\n",Nlocal,Ne); 372 | sim_log( "Finished loading particles" ); 373 | 374 | //exit(1); 375 | 376 | // Upon completion of the initialization, the following occurs: 377 | // - The synchronization error (tang E, norm B) is computed between domains 378 | // and tang E / norm B are synchronized by averaging where discrepancies 379 | // are encountered. 380 | // - The initial divergence error of the magnetic field is computed and 381 | // one pass of cleaning is done (for good measure) 382 | // - The bound charge density necessary to give the simulation an initially 383 | // clean divergence e is computed. 384 | // - The particle momentum is uncentered from u_0 to u_{-1/2} 385 | // - The user diagnostics are called on the initial state 386 | // - The physics loop is started 387 | // 388 | // The physics loop consists of: 389 | // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2} 390 | // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles) 391 | // - User current injection (adjust field(x,y,z).jfx, jfy, jfz) 392 | // - Advance B from B_0 to B_{1/2} 393 | // - Advance E from E_0 to E_1 394 | // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz) 395 | // - Advance B from B_{1/2} to B_1 396 | // - (periodically) Divergence clean electric field 397 | // - (periodically) Divergence clean magnetic field 398 | // - (periodically) Synchronize shared tang e and norm b 399 | // - Increment the time step 400 | // - Call user diagnostics 401 | // - (periodically) Print a status message 402 | } 403 | 404 | begin_diagnostics { 405 | 406 | # define should_dump(x) (global->x##_interval>0 && remainder(step(),global->x##_interval)==0) 407 | 408 | if( step()==-10 ) { 409 | // A grid dump contains all grid parameters, field boundary conditions, 410 | // particle boundary conditions and domain connectivity information. This 411 | // is stored in a binary format. Each rank makes a grid dump 412 | dump_grid("grid"); 413 | 414 | // A materials dump contains all the materials parameters. This is in a 415 | // text format. Only rank 0 makes the materials dump 416 | dump_materials("materials"); 417 | 418 | // A species dump contains the physics parameters of a species. This is in 419 | // a text format. Only rank 0 makes the species dump 420 | dump_species("species"); 421 | } 422 | 423 | // Energy dumps store all the energies in various directions of E and B 424 | // and the total kinetic (not including rest mass) energies of each species 425 | // species in a simple text format. By default, the energies are appended to 426 | // the file. However, if a "0" is added to the dump_energies call, a new 427 | // energies dump file will be created. The energies are in the units of the 428 | // problem and are all time centered appropriately. Note: When restarting a 429 | // simulation from a restart dump made at a prior time step to the last 430 | // energies dump, the energies file will have a "hiccup" of intervening 431 | // time levels. This "hiccup" will not occur if the simulation is aborted 432 | // immediately following a restart dump. Energies dumps are in a text 433 | // format and the layout is documented at the top of the file. Only rank 0 434 | // makes makes an energies dump. 435 | if( should_dump(energies) ) { 436 | dump_energies( "energies", step()==0 ? 0 : 1 ); 437 | } 438 | 439 | // Field dumps store the raw electromagnetic fields, sources and material 440 | // placement and a number of auxilliary fields. E, B and RHOB are 441 | // timecentered, JF and TCA are half a step old. Material fields are static 442 | // and the remaining fields (DIV E ERR, DIV B ERR and RHOF) are for 443 | // debugging purposes. By default, field dump filenames are tagged with 444 | // step(). However, if a "0" is added to the call, the filename will not be 445 | // tagged. The JF that gets stored is accumulated with a charge-conserving 446 | // algorithm. As a result, JF is not valid until at least one timestep has 447 | // been completed. Field dumps are in a binary format. Each rank makes a 448 | // field dump. 449 | if( step()==-10 ) dump_fields("fields"); // Get first valid total J 450 | if( should_dump(fields) ) dump_fields("fields"); 451 | 452 | // Hydro dumps store particle charge density, current density and 453 | // stress-energy tensor. All these quantities are known at the time 454 | // t = time(). All these quantities are accumulated trilinear 455 | // node-centered. By default, species dump filenames are tagged with 456 | // step(). However, if a "0" is added to the call, the filename will not 457 | // be tagged. Note that the current density accumulated by this routine is 458 | // purely diagnostic. It is not used by the simulation and it is not 459 | // accumulated using a self-consistent charge-conserving method. Hydro dumps 460 | // are in a binary format. Each rank makes a hydro dump. 461 | if( should_dump(ehydro) ) dump_hydro("electron","ehydro"); 462 | if( should_dump(ihydro) ) dump_hydro("ion", "ihydro"); 463 | 464 | // Particle dumps store the particle data for a given species. The data 465 | // written is known at the time t = time(). By default, particle dumps 466 | // are tagged with step(). However, if a "0" is added to the call, the 467 | // filename will not be tagged. Particle dumps are in a binary format. 468 | // Each rank makes a particle dump. 469 | if( should_dump(eparticle) ) dump_particles("electron","eparticle"); 470 | if( should_dump(iparticle) ) dump_particles("ion", "iparticle"); 471 | 472 | // A checkpt is made by calling checkpt( fbase, tag ) where fname is a string 473 | // and tag is an integer. A typical usage is: 474 | // checkpt( "checkpt", step() ). 475 | // This will cause each process to write their simulation state to a file 476 | // whose name is based on fbase, tag and the node's rank. For the above 477 | // usage, if called on step 314 on a 4 process run, the four files: 478 | // checkpt.314.0, checkpt.314.1, checkpt.314.2, checkpt.314.3 479 | // to be written. The simulation can then be restarted from this point by 480 | // invoking the application with "--restore checkpt.314". checkpt must be 481 | // the _VERY_ LAST_ diagnostic called. If not, diagnostics performed after 482 | // the checkpt but before the next timestep will be missed on restore. 483 | // Restart dumps are in a binary format unique to the each simulation. 484 | 485 | if( should_dump(restart) ) checkpt( "checkpt", step() ); 486 | 487 | // If you want to write a checkpt after a certain amount of simulation time, 488 | // use uptime() in conjunction with checkpt. For example, this will cause 489 | // the simulation state to be written after 7.5 hours of running to the 490 | // same file every time (useful for dealing with quotas on big machines). 491 | //if( uptime()>=27000 ) { 492 | // checkpt( "timeout", 0 ); 493 | // abort(0); 494 | //} 495 | 496 | # undef should_dump 497 | 498 | } 499 | 500 | begin_particle_injection { 501 | 502 | // No particle injection for this simulation 503 | 504 | } 505 | 506 | begin_current_injection { 507 | 508 | // No current injection for this simulation 509 | 510 | } 511 | 512 | begin_field_injection { 513 | 514 | // No field injection for this simulation 515 | 516 | } 517 | 518 | begin_particle_collisions{ 519 | 520 | // No collisions for this simulation 521 | 522 | } 523 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(cbnpic example.cpp ${INPUT_DECK}) 2 | target_link_libraries(cbnpic PUBLIC CabanaPIC) 3 | target_include_directories( cbnpic PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) 4 | install(TARGETS cbnpic DESTINATION ${CMAKE_INSTALL_BINDIR}) 5 | -------------------------------------------------------------------------------- /example/example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // is this needed if we already have core? 4 | 5 | #include 6 | #include 7 | 8 | #include "types.h" 9 | #include "helpers.h" 10 | 11 | #include "fields.h" 12 | #include "accumulator.h" 13 | #include "interpolator.h" 14 | 15 | #include "uncenter_p.h" 16 | 17 | #include "push.h" 18 | 19 | //#include "visualization.h" 20 | 21 | #include "input/deck.h" 22 | 23 | // Requires C++14 24 | static auto make_field_solver(field_array_t &fields) 25 | { 26 | // TODO: make this support 1/2/3d 27 | #ifdef ES_FIELD_SOLVER 28 | std::cout << "Created ES Solver (1D only)" << std::endl; 29 | Field_Solver field_solver(fields); 30 | #else // EM 31 | std::cout << "Created EM Solver" << std::endl; 32 | Field_Solver field_solver(fields); 33 | #endif 34 | return field_solver; 35 | } 36 | 37 | // Global variable to hold paramters 38 | //Parameters params; 39 | Input_Deck deck; 40 | 41 | //---------------------------------------------------------------------------// 42 | // Main. 43 | //---------------------------------------------------------------------------// 44 | int main( int argc, char* argv[] ) 45 | { 46 | // Initialize the kokkos runtime. 47 | Kokkos::ScopeGuard scope_guard( argc, argv ); 48 | 49 | printf("#Running On Kokkos execution space %s\n", 50 | typeid (Kokkos::DefaultExecutionSpace).name ()); 51 | 52 | 53 | #ifndef ENERGY_DUMP_INTERVAL 54 | #define ENERGY_DUMP_INTERVAL 1 55 | #endif 56 | 57 | // Cabana scoping block 58 | { 59 | FILE *fptr = fopen("partloc","w"); 60 | FILE *fpfd = fopen("ex1d","w"); 61 | deck.derive_params(); 62 | deck.print_run_details(); 63 | 64 | // Cache some values locally for printing 65 | const int npc = deck.nppc; 66 | const int nx = deck.nx; 67 | const int ny = deck.ny; 68 | const int nz = deck.nz; 69 | #ifdef ES_FIELD_SOLVER 70 | if(ny>1 || nz>1){ 71 | std::cerr << "Error: ES Field solver supports 1D only.\n"; 72 | return -1; 73 | } 74 | #endif 75 | const int num_ghosts = deck.num_ghosts; 76 | const size_t num_cells = deck.num_cells; 77 | real_t dxp = 2.f / (npc); 78 | 79 | // Define some consts 80 | const real_t dx = deck.dx; 81 | const real_t dy = deck.dy; 82 | const real_t dz = deck.dz; 83 | 84 | real_t dt = deck.dt; 85 | real_t c = deck.c; 86 | real_t n0 = deck.n0; 87 | //real_t ec = deck.ec; 88 | real_t Lx = deck.len_x; 89 | real_t Ly = deck.len_y; 90 | real_t Lz = deck.len_z; 91 | real_t v0 = deck.v0; 92 | 93 | int nppc = deck.nppc; 94 | real_t eps0 = deck.eps; 95 | 96 | real_t Npe = deck.Npe; 97 | size_t Ne = deck.Ne; // (nppc*nx*ny*nz) 98 | printf("nppc %d nx %d ny %d nz %d \n", nppc, nx, ny, nz); 99 | printf("n0 %e lx %e nly %e lz %e \n", n0, Lx, Ly, Lz); 100 | printf("ne %ld npe %e \n", Ne, Npe); 101 | 102 | real_t qsp = deck.qsp; 103 | printf("qsp %e \n", qsp); 104 | real_t me = deck.me; 105 | 106 | real_t qdt_2mc = qsp*dt/(2*me*c); 107 | 108 | real_t cdt_dx = c*dt/dx; 109 | real_t cdt_dy = c*dt/dy; 110 | real_t cdt_dz = c*dt/dz; 111 | real_t dt_eps0 = dt/eps0; 112 | real_t frac = 1.0f; 113 | real_t we = (real_t) Npe/(real_t) Ne; 114 | printf("we %e \n", we); 115 | 116 | const size_t num_particles = deck.num_particles; 117 | 118 | printf("c %e dt %e dx %e cdt_dx %e \n", c, dt,dx,cdt_dx); 119 | 120 | // Create the particle list. 121 | particle_list_t particles( "particles", num_particles ); 122 | 123 | // Initialize particles. 124 | deck.initialize_particles( particles, nx, ny, nz, num_ghosts, dxp, npc, we, v0 ); 125 | 126 | grid_t* grid = new grid_t(); 127 | 128 | // Print initial particle positions 129 | //logger << "Initial:" << std::endl; 130 | //print_particles( particles ); 131 | fprintf(fptr,"#step=0\n0 "); 132 | dump_particles( fptr, particles, 0, 0, 0, dx,dy,dz,nx,ny,nz,num_ghosts ); 133 | 134 | // Allocate Cabana Data 135 | interpolator_array_t interpolators("interpolator", num_cells); 136 | 137 | accumulator_array_t accumulators("accumulator", num_cells); 138 | 139 | auto scatter_add = Kokkos::Experimental::create_scatter_view(accumulators); 140 | //(accumulators); 143 | 144 | field_array_t fields("fields", num_cells); 145 | 146 | // Zero out the interpolator 147 | // Techincally this is optional? 148 | initialize_interpolator(interpolators); 149 | 150 | // Can obviously supply solver type at compile time 151 | //Field_Solver field_solver(fields); 152 | //Field_Solver field_solver(fields); 153 | // This is able to deduce solver type from compile options 154 | auto field_solver = make_field_solver(fields); 155 | 156 | deck.initialize_fields( 157 | fields, 158 | nx, 159 | ny, 160 | nz, 161 | num_ghosts, 162 | Lx, 163 | Ly, 164 | Lz, 165 | dx, 166 | dy, 167 | dz 168 | ); 169 | 170 | // Grab some global values for use later 171 | const Boundary boundary = deck.BOUNDARY_TYPE; 172 | 173 | //logger << "nx " << params.nx << std::endl; 174 | //logger << "num_particles " << num_particles << std::endl; 175 | //logger << "num_cells " << num_cells << std::endl; 176 | //logger << "Actual NPPC " << params.NPPC << std::endl; 177 | 178 | // TODO: give these a real value 179 | const real_t px = (nx>1) ? frac*c*dt/dx : 0; 180 | const real_t py = (ny>1) ? frac*c*dt/dy : 0; 181 | const real_t pz = (nz>1) ? frac*c*dt/dz : 0; 182 | 183 | // simulation loop 184 | const int num_steps = deck.num_steps; 185 | 186 | printf( "#***********************************************\n" ); 187 | printf( "#num_step = %d\n" , num_steps ); 188 | printf( "#Lx/de = %f\n" , Lx ); 189 | printf( "#Ly/de = %f\n" , Ly ); 190 | printf( "#Lz/de = %f\n" , Lz ); 191 | printf( "#nx = %d\n" , nx ); 192 | printf( "#ny = %d\n" , ny ); 193 | printf( "#nz = %d\n" , nz ); 194 | printf( "#nppc = %d\n" , nppc ); 195 | printf( "# Ne = %ld\n" , Ne ); 196 | printf( "#dt*wpe = %f\n" , dt ); 197 | printf( "#dx/de = %f\n" , Lx/(nx) ); 198 | printf( "#dy/de = %f\n" , Ly/(ny) ); 199 | printf( "#dz/de = %f\n" , Lz/(nz) ); 200 | printf( "#n0 = %f\n" , n0 ); 201 | printf( "#we = %f\n" , we ); 202 | printf( "*****\n" ); 203 | 204 | if (deck.perform_uncenter) 205 | { 206 | load_interpolator_array(fields, interpolators, nx, ny, nz, num_ghosts); 207 | 208 | uncenter_particles( 209 | particles, 210 | interpolators, 211 | qdt_2mc 212 | ); 213 | } 214 | 215 | // Main loop 216 | for (int step = 1; step <= num_steps; step++) 217 | { 218 | //printf("Step %d \n", step); 219 | 220 | // Convert fields to interpolators 221 | load_interpolator_array(fields, interpolators, nx, ny, nz, num_ghosts); 222 | 223 | clear_accumulator_array(fields, accumulators, nx, ny, nz); 224 | // TODO: Make the frequency of this configurable (every step is not 225 | // required for this incarnation) 226 | // Sort by cell index 227 | //auto keys = particles.slice(); 228 | //auto bin_data = Cabana::sortByKey( keys ); 229 | 230 | // Move 231 | push( 232 | particles, 233 | interpolators, 234 | qdt_2mc, 235 | cdt_dx, 236 | cdt_dy, 237 | cdt_dz, 238 | qsp, 239 | scatter_add, 240 | grid, 241 | nx, 242 | ny, 243 | nz, 244 | num_ghosts, 245 | boundary 246 | ); 247 | 248 | Kokkos::Experimental::contribute(accumulators, scatter_add); 249 | 250 | // Only reset the data if these two are not the same arrays 251 | scatter_add.reset_except(accumulators); 252 | 253 | // TODO: boundaries? MPI 254 | //boundary_p(); // Implies Parallel? 255 | 256 | // Map accumulator current back onto the fields 257 | unload_accumulator_array(fields, accumulators, nx, ny, nz, num_ghosts, dx, dy, dz, dt); 258 | 259 | // Half advance the magnetic field from B_0 to B_{1/2} 260 | field_solver.advance_b(fields, real_t(0.5)*px, real_t(0.5)*py, real_t(0.5)*pz, nx, ny, nz, num_ghosts); 261 | 262 | // Advance the electric field from E_0 to E_1 263 | field_solver.advance_e(fields, px, py, pz, nx, ny, nz, num_ghosts, dt_eps0); 264 | 265 | // Half advance the magnetic field from B_{1/2} to B_1 266 | field_solver.advance_b(fields, real_t(0.5)*px, real_t(0.5)*py, real_t(0.5)*pz, nx, ny, nz, num_ghosts); 267 | 268 | if( step % ENERGY_DUMP_INTERVAL == 0 ) 269 | { 270 | dump_energies(field_solver, fields, step, step*dt, px, py, pz, nx, ny, nz, num_ghosts); 271 | } 272 | 273 | // TODO: abstract this out 274 | fprintf(fpfd,"#step=%d\n",step); 275 | field_solver.dump_fields(fpfd,fields, 0, 0, 0, dx,dy,dz,nx,ny,nz,num_ghosts ); 276 | fprintf(fptr,"#step=%d\n%e ",step,step*dt); 277 | dump_particles( fptr, particles, 0, 0, 0, dx,dy,dz,nx,ny,nz,num_ghosts ); 278 | 279 | } 280 | 281 | fclose(fptr); 282 | fclose(fpfd); 283 | 284 | } // End Scoping block 285 | 286 | // Let the user perform any needed finalization 287 | deck.finalize(); 288 | 289 | return 0; 290 | } 291 | 292 | //---------------------------------------------------------------------------// 293 | // 294 | 295 | ////// Known Possible Improvements ///// 296 | // I pass nx/ny/nz round a lot more than I could 297 | 298 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | - `plot-py`: Simple python script to plot energies from files 2 | -------------------------------------------------------------------------------- /scripts/plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | filename = "out" 4 | filename2 = "out2" 5 | 6 | # Import data as a list of numbers 7 | def read_file(filename): 8 | data = [] 9 | with open(filename) as textFile: 10 | for line in textFile: 11 | if ( line.strip().startswith('#') ): 12 | # Skip comments 13 | continue 14 | if ( line.strip() == ""): 15 | # skip empty lines 16 | continue 17 | 18 | print(line) 19 | l = line.strip().split()[2] 20 | data.append(l) 21 | print(data) 22 | return data 23 | 24 | data = read_file(filename) 25 | data2 = read_file(filename2) 26 | 27 | # Plot as a time series plot 28 | plt.plot(data) 29 | plt.plot(data2) 30 | plt.show() 31 | -------------------------------------------------------------------------------- /scripts/run_scripts/build_and_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Positional Parameters" 4 | echo '$0 = ' $0 # this file 5 | echo '$1 = ' $1 # repo path 6 | echo '$2 = ' $2 # CXX 7 | echo '$3 = ' $3 # kokkos install dir 8 | echo '$4 = ' $4 # cabana install dir 9 | echo '$5 = ' $5 # platform 10 | 11 | export KOKKOS_PROFILE_LIBRARY=`pwd`/kokkos-tools/kp_kernel_timer.so 12 | 13 | cd $1 # CD into right folder 14 | echo "--> Running $5 in $1 with $2" 15 | 16 | KOKKOS_INSTALL_DIR=$3 17 | CABANA_INSTALL_DIR=$4 18 | cxx=$2 19 | platform=$5 20 | 21 | options="" 22 | if [[ $platform == "GPU" ]]; then 23 | options="-D ENABLE_GPU=ON" 24 | cxx="$KOKKOS_INSTALL_DIR/bin/nvcc_wrapper" 25 | elif [[ $platform == "Serial" ]]; then 26 | options="-D ENABLE_SERIAL=ON" 27 | fi 28 | 29 | 30 | mkdir build-$platform 31 | cd build-$platform 32 | 33 | # Build CPU *or* GPU? 34 | # TODO: the way this selects the cmake folder is awful 35 | #-D CMAKE_CXX_COMPILER=$KOKKOS_SRC_DIR/bin/nvcc_wrapper \ 36 | CXX=$cxx cmake -DCMAKE_BUILD_TYPE=Release -DKOKKOS_DIR=$KOKKOS_INSTALL_DIR -DCABANA_DIR=$CABANA_INSTALL_DIR $options ../../../../..; 37 | make VERBOSE=1 38 | 39 | # Run the code and track the performance 40 | { time ./minipic > out ; } 2> time.txt 41 | -------------------------------------------------------------------------------- /scripts/run_scripts/build_cabana.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Positional Parameters" 4 | echo '$0 = ' $0 # this file 5 | echo '$1 = ' $1 # cxx 6 | echo '$2 = ' $2 # kokkos dir 7 | echo '$3 = ' $3 # cabana dir 8 | echo '$4 = ' $4 # install dir 9 | echo '$5 = ' $5 # platform 10 | 11 | cxx=$1 12 | kokkos_dir=`pwd`/$2 13 | echo $kokkos_dir 14 | cabana_dir=$3 15 | install_dir=$4 16 | platform=$5 17 | 18 | cd $cabana_dir 19 | mkdir $install_dir 20 | cd $install_dir 21 | echo `pwd` 22 | 23 | # Default to off 24 | options="-D Cabana_ENABLE_Serial=OFF" 25 | 26 | # possible platforms = ["Serial", "CPU", "GPU", "UVM"] 27 | 28 | if [[ $platform == "Serial" ]]; then 29 | # Override default 30 | options="-D Cabana_ENABLE_Serial=ON" 31 | elif [[ $platform == "CPU" ]]; then 32 | options="-D Cabana_ENABLE_OpenMP=ON $options" 33 | elif [[ $platform == "GPU" ]]; then 34 | options="-D CMAKE_CXX_COMPILER=$kokkos_dir/bin/nvcc_wrapper -D Cabana_ENABLE_Cuda:BOOL=ON $options" 35 | cxx=$kokkos_dir/bin/nvcc_wrapper 36 | # TODO: enable UVM build 37 | #elif [[ $platform == "UVM" ]] then 38 | #options="--" 39 | #else 40 | # This means they passed up the wrong value 41 | #eep 42 | fi 43 | 44 | echo $options 45 | 46 | CXX=$cxx cmake \ 47 | -D CMAKE_BUILD_TYPE="Release" \ 48 | -D CMAKE_PREFIX_PATH=$kokkos_dir \ 49 | -D CMAKE_INSTALL_PREFIX=`pwd`/install \ 50 | -D Cabana_ENABLE_TESTING=OFF \ 51 | -D Cabana_ENABLE_EXAMPLES=OFF \ 52 | $options \ 53 | .. ; 54 | make install 55 | -------------------------------------------------------------------------------- /scripts/run_scripts/build_kokkos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Positional Parameters" 4 | echo '$0 = ' $0 # this file 5 | echo '$1 = ' $1 # cxx 6 | echo '$2 = ' $2 # kokkos dir 7 | echo '$3 = ' $3 # install dir 8 | echo '$4 = ' $4 # platform 9 | echo '$5 = ' $5 # arch 10 | 11 | kokkos_dir=`pwd`/$2 12 | install_dir=$3 13 | platform=$4 14 | cxx=$1 15 | 16 | cd $kokkos_dir 17 | mkdir $install_dir 18 | cd $install_dir 19 | echo `pwd` 20 | 21 | options="--compiler=$cxx" 22 | 23 | #platforms = ["Serial", "CPU", "GPU", "UVM"] 24 | if [[ $platform == "Serial" ]]; then 25 | options="$options --with-serial" 26 | elif [[ $platform == "CPU" ]]; then 27 | options="$options --with-openmp" 28 | elif [[ $platform == "GPU" ]]; then 29 | #export NVCC_WRAPPER_DEFAULT_COMPILER=`which $CXX` 30 | options="--with-openmp --with-cuda --arch=$5 --with-cuda-options=enable_lambda --compiler=$kokkos_dir/bin/nvcc_wrapper ;" 31 | # TODO: enable UVM build 32 | #elif [[ $platform == "UVM" ]] then 33 | #options="--" 34 | else 35 | # This means they passed up the wrong value 36 | eep 37 | fi 38 | 39 | echo "Running with $options" 40 | 41 | # TODO: check this works 42 | CXX=$cxx ../generate_makefile.bash --prefix=`pwd`/install $options 43 | make install 44 | -------------------------------------------------------------------------------- /scripts/run_scripts/kokkos-tools/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CXXFLAGS=-O3 -std=c++11 -g 3 | SHARED_CXXFLAGS=-shared -fPIC 4 | # comment the following line if abi::__cxa_demangle is not supported by your compiler 5 | CXXFLAGS+= -DHAVE_GCC_ABI_DEMANGLE 6 | 7 | all: kp_kernel_timer.so #kp_reader 8 | 9 | #kp_reader: kp_reader.cpp kp_kernel_timer.so 10 | #$(CXX) $(CXXFLAGS) -o kp_reader kp_reader.cpp 11 | 12 | kp_kernel_timer.so: kp_kernel_timer.cpp kp_kernel_info.h 13 | $(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ kp_kernel_timer.cpp 14 | 15 | clean: 16 | rm *.so #kp_reader 17 | -------------------------------------------------------------------------------- /scripts/run_scripts/kokkos-tools/README.md: -------------------------------------------------------------------------------- 1 | This folder contains local, customized, versions of kokkos tools. 2 | 3 | See https://github.com/kokkos/kokkos-tools/wiki for documentation 4 | 5 | TLDR: set `export KOKKOS_PROFILE_LIBRARY=$PATH_TO/kp_kernel_timer.so` before 6 | running the kokkos app 7 | -------------------------------------------------------------------------------- /scripts/run_scripts/kokkos-tools/kp_kernel_info.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _H_KOKKOSP_KERNEL_INFO 3 | #define _H_KOKKOSP_KERNEL_INFO 4 | 5 | #include 6 | #include 7 | #include 8 | #if defined(HAVE_GCC_ABI_DEMANGLE) 9 | #include 10 | #endif 11 | double seconds() { 12 | struct timeval now; 13 | gettimeofday(&now, NULL); 14 | 15 | return (double) (now.tv_sec + (now.tv_usec * 1.0e-6)); 16 | } 17 | 18 | enum KernelExecutionType { 19 | PARALLEL_FOR = 0, 20 | PARALLEL_REDUCE = 1, 21 | PARALLEL_SCAN = 2, 22 | REGION = 3 23 | }; 24 | 25 | class KernelPerformanceInfo { 26 | public: 27 | KernelPerformanceInfo(std::string kName, KernelExecutionType kernelType) : 28 | kType(kernelType) { 29 | 30 | kernelName = (char*) malloc(sizeof(char) * (kName.size() + 1)); 31 | strcpy(kernelName, kName.c_str()); 32 | 33 | callCount = 0; 34 | time = 0; 35 | } 36 | 37 | ~KernelPerformanceInfo() { 38 | free(kernelName); 39 | } 40 | 41 | KernelExecutionType getKernelType() { 42 | return kType; 43 | } 44 | 45 | void incrementCount() { 46 | callCount++; 47 | } 48 | 49 | void addTime(double t) { 50 | time += t; 51 | timeSq += (t*t); 52 | } 53 | 54 | void addFromTimer() { 55 | addTime(seconds() - startTime); 56 | 57 | incrementCount(); 58 | } 59 | 60 | void startTimer() { 61 | startTime = seconds(); 62 | } 63 | 64 | uint64_t getCallCount() { 65 | return callCount; 66 | } 67 | 68 | double getTime() { 69 | return time; 70 | } 71 | 72 | double getTimeSq() { 73 | return timeSq; 74 | } 75 | 76 | char* getName() { 77 | return kernelName; 78 | } 79 | 80 | void addCallCount(const uint64_t newCalls) { 81 | callCount += newCalls; 82 | } 83 | 84 | bool readFromFile(FILE* input) { 85 | uint32_t recordLen = 0; 86 | uint32_t actual_read = fread(&recordLen, sizeof(recordLen), 1, input); 87 | if(actual_read != 1) return false; 88 | 89 | char* entry = (char*) malloc(recordLen); 90 | fread(entry, recordLen, 1, input); 91 | 92 | uint32_t nextIndex = 0; 93 | uint32_t kernelNameLength; 94 | copy((char*) &kernelNameLength, &entry[nextIndex], sizeof(kernelNameLength)); 95 | nextIndex += sizeof(kernelNameLength); 96 | 97 | if(strlen(kernelName) > 0) { 98 | free(kernelName); 99 | } 100 | 101 | kernelName = (char*) malloc( sizeof(char) * (kernelNameLength + 1)); 102 | copy(kernelName, &entry[nextIndex], kernelNameLength); 103 | kernelName[kernelNameLength] = '\0'; 104 | #if defined(HAVE_GCC_ABI_DEMANGLE) 105 | { 106 | int status = -1; 107 | char* demangledKernelName = abi::__cxa_demangle(kernelName, NULL, NULL, &status); 108 | if (status==0) { 109 | free(kernelName); 110 | kernelName = demangledKernelName; 111 | } 112 | } 113 | #endif // HAVE_GCC_ABI_DEMANGLE 114 | nextIndex += kernelNameLength; 115 | 116 | copy((char*) &callCount, &entry[nextIndex], sizeof(callCount)); 117 | nextIndex += sizeof(callCount); 118 | 119 | copy((char*) &time, &entry[nextIndex], sizeof(time)); 120 | nextIndex += sizeof(time); 121 | 122 | copy((char*) &timeSq, &entry[nextIndex], sizeof(timeSq)); 123 | nextIndex += sizeof(timeSq); 124 | 125 | uint32_t kernelT = 0; 126 | copy((char*) &kernelT, &entry[nextIndex], sizeof(kernelT)); 127 | nextIndex += sizeof(kernelT); 128 | 129 | if(kernelT == 0) { 130 | kType = PARALLEL_FOR; 131 | } else if(kernelT == 1) { 132 | kType = PARALLEL_REDUCE; 133 | } else if(kernelT == 2) { 134 | kType = PARALLEL_SCAN; 135 | } else if(kernelT == 3) { 136 | kType = REGION; 137 | } 138 | 139 | free(entry); 140 | return true; 141 | } 142 | 143 | void writeToFile(FILE* output) { 144 | const uint32_t kernelNameLen = (uint32_t) strlen(kernelName); 145 | 146 | const uint32_t recordLen = 147 | sizeof(uint32_t) + 148 | sizeof(char) * kernelNameLen + 149 | sizeof(uint64_t) + 150 | sizeof(double) + 151 | sizeof(double) + 152 | sizeof(uint32_t); 153 | 154 | uint32_t nextIndex = 0; 155 | char* entry = (char*) malloc(recordLen); 156 | 157 | copy(&entry[nextIndex], (char*) &kernelNameLen, sizeof(kernelNameLen)); 158 | nextIndex += sizeof(kernelNameLen); 159 | 160 | copy(&entry[nextIndex], kernelName, kernelNameLen); 161 | nextIndex += kernelNameLen; 162 | 163 | copy(&entry[nextIndex], (char*) &callCount, sizeof(callCount)); 164 | nextIndex += sizeof(callCount); 165 | 166 | copy(&entry[nextIndex], (char*) &time, sizeof(time)); 167 | nextIndex += sizeof(time); 168 | 169 | copy(&entry[nextIndex], (char*) &timeSq, sizeof(timeSq)); 170 | nextIndex += sizeof(timeSq); 171 | 172 | uint32_t kernelTypeOutput = (uint32_t) kType; 173 | copy(&entry[nextIndex], (char*) &kernelTypeOutput, sizeof(kernelTypeOutput)); 174 | nextIndex += sizeof(kernelTypeOutput); 175 | 176 | fwrite(&recordLen, sizeof(uint32_t), 1, output); 177 | fwrite(entry, recordLen, 1, output); 178 | free(entry); 179 | } 180 | 181 | private: 182 | void copy(char* dest, const char* src, uint32_t len) { 183 | for(uint32_t i = 0; i < len; i++) { 184 | dest[i] = src[i]; 185 | } 186 | } 187 | 188 | char* kernelName; 189 | uint64_t callCount; 190 | double time; 191 | double timeSq; 192 | double startTime; 193 | KernelExecutionType kType; 194 | }; 195 | 196 | #endif 197 | -------------------------------------------------------------------------------- /scripts/run_scripts/kokkos-tools/kp_kernel_timer.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "kp_kernel_info.h" 15 | 16 | bool compareKernelPerformanceInfo(KernelPerformanceInfo* left, KernelPerformanceInfo* right) { 17 | return left->getTime() > right->getTime(); 18 | }; 19 | 20 | static uint64_t uniqID = 0; 21 | static KernelPerformanceInfo* currentEntry; 22 | static std::map count_map; 23 | static double initTime; 24 | static char* outputDelimiter; 25 | static int current_region_level = 0; 26 | static KernelPerformanceInfo* regions[512]; 27 | 28 | #define MAX_STACK_SIZE 128 29 | 30 | void increment_counter(const char* name, KernelExecutionType kType) { 31 | std::string nameStr(name); 32 | 33 | if(count_map.find(name) == count_map.end()) { 34 | KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); 35 | count_map.insert(std::pair(nameStr, info)); 36 | 37 | currentEntry = info; 38 | } else { 39 | currentEntry = count_map[nameStr]; 40 | } 41 | 42 | currentEntry->startTimer(); 43 | } 44 | 45 | void increment_counter_region(const char* name, KernelExecutionType kType) { 46 | std::string nameStr(name); 47 | 48 | if(count_map.find(name) == count_map.end()) { 49 | KernelPerformanceInfo* info = new KernelPerformanceInfo(nameStr, kType); 50 | count_map.insert(std::pair(nameStr, info)); 51 | 52 | regions[current_region_level] = info; 53 | } else { 54 | regions[current_region_level] = count_map[nameStr]; 55 | } 56 | 57 | regions[current_region_level]->startTimer(); 58 | current_region_level++; 59 | } 60 | 61 | extern "C" void kokkosp_init_library(const int loadSeq, 62 | const uint64_t interfaceVer, 63 | const uint32_t devInfoCount, 64 | void* deviceInfo) { 65 | 66 | const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); 67 | if(NULL == output_delim_env) { 68 | outputDelimiter = (char*) malloc(sizeof(char) * 2); 69 | sprintf(outputDelimiter, "%c", ' '); 70 | } else { 71 | outputDelimiter = (char*) malloc(sizeof(char) * (strlen(output_delim_env) + 1)); 72 | sprintf(outputDelimiter, "%s", output_delim_env); 73 | } 74 | 75 | printf("KokkosP: Example Library Initialized (sequence is %d, version: %llu)\n", loadSeq, interfaceVer); 76 | 77 | initTime = seconds(); 78 | } 79 | 80 | extern "C" void kokkosp_finalize_library() { 81 | double finishTime = seconds(); 82 | double kernelTimes = 0; 83 | 84 | char* hostname = (char*) malloc(sizeof(char) * 256); 85 | gethostname(hostname, 256); 86 | 87 | char* fileOutput = (char*) malloc(sizeof(char) * 256); 88 | sprintf(fileOutput, "%s-%d.dat", hostname, (int) getpid()); 89 | 90 | free(hostname); 91 | FILE* output_data = fopen(fileOutput, "w"); 92 | 93 | const double totalExecuteTime = (finishTime - initTime); 94 | fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data); 95 | 96 | std::vector kernelList; 97 | 98 | for(auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) { 99 | kernel_itr->second->writeToFile(output_data); 100 | } 101 | 102 | fclose(output_data); 103 | 104 | #define CWD_MAX 1024 105 | char cwd[CWD_MAX]; 106 | getcwd(cwd, CWD_MAX); 107 | printf("KokkosP: Kernel timing written to %s/%s \n", cwd, fileOutput); 108 | 109 | /*printf("\n"); 110 | printf("======================================================================\n"); 111 | printf("KokkosP: Finalization of Profiling Library\n"); 112 | printf("KokkosP: Executed a total of %llu kernels\n", uniqID); 113 | 114 | std::vector kernelList; 115 | 116 | for(auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); kernel_itr++) { 117 | kernelList.push_back(kernel_itr->second); 118 | kernelTimes += kernel_itr->second->getTime(); 119 | } 120 | 121 | std::sort(kernelList.begin(), kernelList.end(), compareKernelPerformanceInfo); 122 | const double totalExecuteTime = (finishTime - initTime); 123 | 124 | if(0 == strcmp(outputDelimiter, " ")) { 125 | printf("KokkosP: %100s %14s %14s %6s %6s %14s %4s\n", "Kernel", "Calls", 126 | "s/Total", "\%/Ko", "\%/Tot", "s/Call", "Type"); 127 | } else { 128 | printf("KokkosP: %s%s%s%s%s%s%s%s%s%s%s%s%s\n", 129 | "Kernel", 130 | outputDelimiter, 131 | "Calls", 132 | outputDelimiter, 133 | "s/Total", 134 | outputDelimiter, 135 | "\%/Ko", 136 | outputDelimiter, 137 | "\%/Tot", 138 | outputDelimiter, 139 | "s/Call", 140 | outputDelimiter, 141 | "Type"); 142 | } 143 | 144 | for(auto kernel_itr = kernelList.begin(); kernel_itr != kernelList.end(); kernel_itr++) { 145 | KernelPerformanceInfo* kernelInfo = *kernel_itr; 146 | 147 | const uint64_t kCallCount = kernelInfo->getCallCount(); 148 | const double kTime = kernelInfo->getTime(); 149 | const double kTimeMean = kTime / (double) kCallCount; 150 | 151 | const std::string& kName = kernelInfo->getName(); 152 | char* kType = const_cast(""); 153 | 154 | switch(kernelInfo->getKernelType()) { 155 | case PARALLEL_FOR: 156 | kType = const_cast("PFOR"); break; 157 | case PARALLEL_SCAN: 158 | kType = const_cast("SCAN"); break; 159 | case PARALLEL_REDUCE: 160 | kType = const_cast("RDCE"); break; 161 | case REGION 162 | kType = const_cast("REGI"); break; 163 | } 164 | 165 | int demangleStatus; 166 | char* finalDemangle = abi::__cxa_demangle(kName.c_str(), 0, 0, &demangleStatus); 167 | 168 | if(0 == strcmp(outputDelimiter, " ")) { 169 | printf("KokkosP: %s%s%14llu%s%14.5f%s%6.2f%s%6.2f%s%14.5f%s%4s\n", 170 | (0 == demangleStatus) ? finalDemangle : kName.c_str(), 171 | outputDelimiter, 172 | kCallCount, 173 | outputDelimiter, 174 | kTime, 175 | outputDelimiter, 176 | (kTime / kernelTimes) * 100.0, 177 | outputDelimiter, 178 | (kTime / totalExecuteTime) * 100.0, 179 | outputDelimiter, 180 | kTimeMean, 181 | outputDelimiter, 182 | kType 183 | ); 184 | } else { 185 | printf("KokkosP: %s%s%llu%s%f%s%f%s%f%s%f%s%s\n", 186 | (0 == demangleStatus) ? finalDemangle : kName.c_str(), 187 | outputDelimiter, 188 | kCallCount, 189 | outputDelimiter, 190 | kTime, 191 | outputDelimiter, 192 | (kTime / kernelTimes) * 100.0, 193 | outputDelimiter, 194 | (kTime / totalExecuteTime) * 100.0, 195 | outputDelimiter, 196 | kTimeMean, 197 | outputDelimiter, 198 | kType 199 | ); 200 | } 201 | } 202 | 203 | printf("\n"); 204 | printf("KokkosP: Total Execution Time: %15.6f seconds.\n", totalExecuteTime); 205 | printf("KokkosP: Time in Kokkos Kernels: %15.6f seconds.\n", kernelTimes); 206 | printf("KokkosP: Time spent outside Kokkos: %15.6f seconds.\n", (totalExecuteTime - kernelTimes)); 207 | 208 | const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0; 209 | printf("KokkosP: Runtime in Kokkos Kernels: %15.6f \%\n", percentKokkos); 210 | printf("KokkosP: Unique kernels: %22llu \n", (uint64_t) count_map.size()); 211 | printf("KokkosP: Parallel For Calls: %22llu \n", uniqID); 212 | 213 | printf("\n"); 214 | printf("======================================================================\n"); 215 | printf("\n"); 216 | 217 | if(NULL != outputDelimiter) { 218 | free(outputDelimiter); 219 | }*/ 220 | 221 | 222 | } 223 | 224 | extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) { 225 | *kID = uniqID++; 226 | 227 | if( (NULL == name) || (strcmp("", name) == 0) ) { 228 | fprintf(stderr, "Error: kernel is empty\n"); 229 | exit(-1); 230 | } 231 | 232 | increment_counter(name, PARALLEL_FOR); 233 | } 234 | 235 | extern "C" void kokkosp_end_parallel_for(const uint64_t kID) { 236 | currentEntry->addFromTimer(); 237 | } 238 | 239 | extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) { 240 | *kID = uniqID++; 241 | 242 | if( (NULL == name) || (strcmp("", name) == 0) ) { 243 | fprintf(stderr, "Error: kernel is empty\n"); 244 | exit(-1); 245 | } 246 | 247 | increment_counter(name, PARALLEL_SCAN); 248 | } 249 | 250 | extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) { 251 | currentEntry->addFromTimer(); 252 | } 253 | 254 | extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) { 255 | *kID = uniqID++; 256 | 257 | if( (NULL == name) || (strcmp("", name) == 0) ) { 258 | fprintf(stderr, "Error: kernel is empty\n"); 259 | exit(-1); 260 | } 261 | 262 | increment_counter(name, PARALLEL_REDUCE); 263 | } 264 | 265 | extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) { 266 | currentEntry->addFromTimer(); 267 | } 268 | 269 | extern "C" void kokkosp_push_profile_region(char* regionName) { 270 | increment_counter_region(regionName, REGION); 271 | } 272 | 273 | extern "C" void kokkosp_pop_profile_region() { 274 | current_region_level--; 275 | regions[current_region_level]->addFromTimer(); 276 | } 277 | 278 | -------------------------------------------------------------------------------- /scripts/run_scripts/main.py: -------------------------------------------------------------------------------- 1 | from git import Repo 2 | import subprocess 3 | import os, shutil 4 | 5 | # I use this later to lazily generate an error with a message 6 | class CustomError(Exception): 7 | pass 8 | 9 | repo_path = "../../" 10 | r = Repo(repo_path) 11 | repo_heads = r.heads # or it's alias: r.branches 12 | repo_heads_names = [h.name for h in repo_heads] 13 | 14 | #kokkos_src = '/Users/bird/kokkos/' 15 | #kokkos_install = '/Users/bird/kokkos/build/install' 16 | #cabana_install = '/Users/bird/Cabana/build/build/install' # not a typo, it's in a dumb path 17 | 18 | #platforms = ["Serial", "CPU", "GPU", "UVM"] 19 | platforms = ["Serial", "CPU", "GPU"] 20 | #platforms = ["CPU", "GPU"] 21 | #platforms = ["GPU"] 22 | #platforms = ["CPU"] 23 | 24 | CXX = "g++" 25 | #arch = 'Volta70' 26 | arch = 'Kepler35' 27 | 28 | subprocess.check_call(['./timing_lib.sh']) 29 | 30 | this_build_dir = 'build' 31 | 32 | kokkos_dirs = {} 33 | cabana_dirs = {} 34 | 35 | home_dir = os.environ['HOME'] 36 | 37 | # Build Dependencies 38 | # TODO: make this configurable 39 | kokkos_root = os.path.join(home_dir,'kokkos') 40 | cabana_root = os.path.join(home_dir,'Cabana') 41 | 42 | # Check we can find Kokkos and Cabana 43 | if not os.path.isdir(kokkos_root): 44 | raise CustomError("Can't find kokkos") 45 | if not os.path.isdir(cabana_root): 46 | raise CustomError("Can't find Cabana") 47 | 48 | # Copy Kokkos and Cabana to be inside this dir 49 | def copy_and_overwrite(from_path, to_path): 50 | if os.path.exists(to_path): 51 | shutil.rmtree(to_path) 52 | shutil.copytree(from_path, to_path) 53 | 54 | def copy_if_safe(from_path, to_path): 55 | if not os.path.isdir(to_path): 56 | shutil.copytree(from_path, to_path) 57 | 58 | # only copy if they don't exist already 59 | kokkos_new = os.path.join(this_build_dir,'kokkos') 60 | copy_if_safe(kokkos_root, kokkos_new) 61 | 62 | cabana_new = os.path.join(this_build_dir,'cabana') 63 | copy_if_safe(cabana_root, cabana_new) 64 | 65 | # Build Dependencies 66 | for plat in platforms: 67 | install_dir = "build-" + plat 68 | 69 | # Do Build 70 | print("build_kokkos.sh " + CXX + " " + kokkos_new + " " + install_dir + " " + plat + " " + arch) 71 | subprocess.check_call(['./build_kokkos.sh', CXX, kokkos_new, install_dir, plat, arch]) 72 | 73 | print("./build_cabana.sh " + " " + CXX + " " + os.path.join(kokkos_new,install_dir,'install') + " " + cabana_new + " " + install_dir + " " + plat) 74 | subprocess.check_call(['./build_cabana.sh', CXX, os.path.join(kokkos_new,install_dir,'install'), cabana_new, install_dir, plat]) 75 | 76 | # Save dirs, relative to root 77 | cabana_dirs[plat] = install_dir 78 | kokkos_dirs[plat] = install_dir 79 | 80 | 81 | # Iterate over *local* git branches 82 | for branch in repo_heads_names: 83 | print("Working on branch " + branch) 84 | for plat in platforms: 85 | 86 | print(plat) 87 | # TODO: throughout these scripts we assume ./instal is the install dir! abstract it. 88 | cabana_install = os.path.join( cabana_dirs[plat], 'install') 89 | kokkos_install = os.path.join( kokkos_dirs[plat], 'install') 90 | 91 | # For each repo, check it out into a new folder and build it 92 | #clone_path = './' + branch 93 | clone_path = os.path.join('./', this_build_dir, branch) 94 | 95 | print("!!!! WORKING ON " + clone_path) 96 | 97 | # look to see if the folder already exists: 98 | if not os.path.isdir(clone_path): 99 | # if it does... delete it (!) 100 | #print("Deleting " + clone_path) 101 | # We need to delete where it will build only one platforms worth, 102 | # or hoist the clone 103 | #shutil.rmtree(clone_path + build??) 104 | 105 | # OR if it does... skip 106 | #continue 107 | 108 | # clone it 109 | cloned = Repo.clone_from( 110 | repo_path, 111 | clone_path, 112 | branch=branch 113 | ) 114 | 115 | pwd = os.getcwd() 116 | 117 | kokkos_full_path = os.path.join(pwd, kokkos_new, kokkos_install) 118 | cabana_full_path = os.path.join(pwd, cabana_new, cabana_install) 119 | print("kk full path " + kokkos_full_path) 120 | 121 | print("./build_and_run.sh " + clone_path + " g++ " + kokkos_full_path + " " + cabana_full_path + " " + plat) 122 | subprocess.check_call(['./build_and_run.sh', clone_path, "g++", kokkos_full_path, cabana_full_path, plat]) 123 | 124 | -------------------------------------------------------------------------------- /scripts/run_scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | gitpython 2 | -------------------------------------------------------------------------------- /scripts/run_scripts/timing_lib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd kokkos-tools 3 | make 4 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB HEADERS "*.h") 2 | file(GLOB SOURCES "*.cpp") 3 | 4 | install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 5 | 6 | add_library(CabanaPIC ${SOURCES}) 7 | 8 | target_include_directories(CabanaPIC PUBLIC 9 | $ 10 | $ 11 | $) 12 | 13 | target_link_libraries(CabanaPIC PUBLIC Cabana::cabanacore ) 14 | 15 | install(TARGETS CabanaPIC DESTINATION lib) 16 | -------------------------------------------------------------------------------- /src/accumulator.cpp: -------------------------------------------------------------------------------- 1 | // TODO: add namespace? 2 | 3 | #include "accumulator.h" 4 | 5 | void clear_accumulator_array( 6 | field_array_t& fields, 7 | accumulator_array_t& accumulators, 8 | size_t, // TODO: we can probably pull these out of global params.. 9 | size_t, 10 | size_t 11 | ) 12 | { 13 | auto _clean_accumulator = KOKKOS_LAMBDA(const int i) 14 | { 15 | /* 16 | a0(i,JX_OFFSET+0) = 0; 17 | a0(i+y_offset,JX_OFFSET+1) = 0; 18 | a0(i+z_offset,JX_OFFSET+2) = 0; 19 | a0(i+y_offset+z_offset,JX_OFFSET+3) = 0; 20 | 21 | a0(i,JY_OFFSET+0) = 0; 22 | a0(i+z_offset,JY_OFFSET+1) = 0; 23 | a0(i+y_offset,JY_OFFSET+2) = 0; 24 | a0(i+y_offset+z_offset,JY_OFFSET+3) = 0; 25 | 26 | a0(i,JZ_OFFSET+0) = 0; 27 | a0(i+x_offset,JZ_OFFSET+1) = 0; 28 | a0(i+y_offset,JZ_OFFSET+2) = 0; 29 | a0(i+x_offset+y_offset,JZ_OFFSET+3) = 0; 30 | */ 31 | 32 | for (int j = 0; j < ACCUMULATOR_VAR_COUNT; j++) 33 | { 34 | for (int k = 0; k < ACCUMULATOR_ARRAY_LENGTH; k++) 35 | { 36 | accumulators(i, j, k) = 0.0; 37 | } 38 | } 39 | }; 40 | 41 | Kokkos::RangePolicy exec_policy( 0, fields.size() ); 42 | Kokkos::parallel_for( "clean_accumulator()", exec_policy, _clean_accumulator ); 43 | } 44 | 45 | void unload_accumulator_array( 46 | field_array_t& fields, 47 | accumulator_array_t& accumulators, 48 | size_t nx, // TODO: we can probably pull these out of global params.. 49 | size_t ny, 50 | size_t nz, 51 | size_t ng, 52 | real_t dx, 53 | real_t dy, 54 | real_t dz, 55 | real_t dt 56 | ) 57 | { 58 | 59 | auto jfx = Cabana::slice(fields); 60 | auto jfy = Cabana::slice(fields); 61 | auto jfz = Cabana::slice(fields); 62 | 63 | // TODO: give these real values 64 | // printf("cx %e dy %e dz %e dt %e \n", dy, dz, dt); 65 | //real_t cx = 0.25 * (1.0 / (dy * dz)) / dt; 66 | real_t cx = 0.25 / (dy * dz * dt); 67 | real_t cy = 0.25 / (dz * dx * dt); 68 | real_t cz = 0.25 / (dx * dy * dt); 69 | 70 | // TODO: we have to be careful we don't reach past the ghosts here 71 | auto _unload_accumulator = KOKKOS_LAMBDA( const int x, const int y, const int z ) 72 | { 73 | // Original: 74 | // f0->jfx += cx*( a0->jx[0] + ay->jx[1] + az->jx[2] + ayz->jx[3] ); 75 | int i = VOXEL(x,y,z, nx,ny,nz,ng); 76 | 77 | // TODO: this level of re-calculation is overkill 78 | size_t x_down = VOXEL(x-1, y, z, nx,ny,nz,ng); 79 | size_t y_down = VOXEL(x, y-1, z, nx,ny,nz,ng); 80 | size_t z_down = VOXEL(x, y, z-1, nx,ny,nz,ng); 81 | 82 | size_t xz_down = VOXEL(x-1, y, z-1, nx,ny,nz,ng); 83 | size_t xy_down = VOXEL(x-1, y-1, z, nx,ny,nz,ng); 84 | size_t yz_down = VOXEL(x, y-1, z-1, nx,ny,nz,ng); 85 | 86 | jfx(i) = cx*( 87 | accumulators(i, accumulator_var::jx, 0) + 88 | accumulators(y_down, accumulator_var::jx, 1) + 89 | accumulators(z_down, accumulator_var::jx, 2) + 90 | accumulators(yz_down, accumulator_var::jx, 3) 91 | ); 92 | 93 | jfy(i) = cy*( 94 | accumulators(i, accumulator_var::jy, 0) + 95 | accumulators(z_down, accumulator_var::jy, 1) + 96 | accumulators(x_down, accumulator_var::jy, 2) + 97 | accumulators(xz_down, accumulator_var::jy, 3) 98 | ); 99 | 100 | jfz(i) = cz*( 101 | accumulators(i, accumulator_var::jz, 0) + 102 | accumulators(x_down, accumulator_var::jz, 1) + 103 | accumulators(y_down, accumulator_var::jz, 2) + 104 | accumulators(xy_down, accumulator_var::jz, 3) 105 | ); 106 | }; 107 | 108 | //may not be enough if particles run into ghost cells 109 | Kokkos::MDRangePolicy< Kokkos::Rank<3> > non_ghost_policy( {ng,ng,ng}, {nx+ng+1, ny+ng+1, nz+ng+1} ); // Try not to into ghosts // TODO: dry this 110 | Kokkos::parallel_for( "unload_accumulator()", non_ghost_policy, _unload_accumulator ); 111 | 112 | /* // Crib sheet for old variable names 113 | a0 = &a(x, y, z ); 114 | ax = &a(x-1,y, z ); 115 | ay = &a(x, y-1,z ); 116 | az = &a(x, y, z-1); 117 | ayz = &a(x, y-1,z-1); 118 | azx = &a(x-1,y, z-1); 119 | axy = &a(x-1,y-1,z ) 120 | */ 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/accumulator.h: -------------------------------------------------------------------------------- 1 | #ifndef ACCUMULATOR_T 2 | #define ACCUMULATOR_T 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "types.h" 12 | #include "grid.h" 13 | #include "fields.h" 14 | 15 | void clear_accumulator_array( 16 | field_array_t& fields, 17 | accumulator_array_t& accumulators, 18 | size_t nx, // TODO: we can probably pull these out of global params.. 19 | size_t ny, 20 | size_t nz 21 | ); 22 | 23 | void unload_accumulator_array( 24 | field_array_t& fields, 25 | accumulator_array_t& accumulators, 26 | size_t nx, // TODO: we can probably pull these out of global params.. 27 | size_t ny, 28 | size_t nz, 29 | size_t ng, 30 | real_t dx, 31 | real_t dy, 32 | real_t dz, 33 | real_t dt 34 | ); 35 | 36 | #endif // header guard 37 | -------------------------------------------------------------------------------- /src/grid.h: -------------------------------------------------------------------------------- 1 | #ifndef GRID_T 2 | #define GRID_T 3 | 4 | enum grid_enums { 5 | 6 | // Phase 2 boundary conditions 7 | anti_symmetric_fields = -1, // E_tang = 0 8 | pec_fields = -1, 9 | metal_fields = -1, 10 | symmetric_fields = -2, // B_tang = 0, B_norm = 0 11 | pmc_fields = -3, // B_tang = 0, B_norm floats 12 | absorb_fields = -4, // Gamma = 0 13 | 14 | // Phase 3 boundary conditions 15 | reflect_particles = -1, // Cell boundary should reflect particles 16 | absorb_particles = -2 // Cell boundary should absorb particles 17 | 18 | // Symmetry in the field boundary conditions refers to image charge 19 | // sign 20 | // 21 | // Anti-symmetric -> Image charges are opposite signed (ideal metal) 22 | // Boundary rho/j are accumulated over partial voxel+image 23 | // Symmetric -> Image charges are same signed (symmetry plane or pmc) 24 | // Boundary rho/j are accumulated over partial voxel+image 25 | // Absorbing -> No image charges 26 | // Boundary rho/j are accumulated over partial voxel only 27 | // 28 | // rho -> Anti-symmetric | rho -> Symmetric 29 | // jf_tang -> Anti-symmetric | jf_tang -> Symmetric 30 | // E_tang -> Anti-symmetric | E_tang -> Symmetric 31 | // B_norm -> Anti-symmetric + DC | B_norm -> Symmetric (see note) 32 | // B_tang -> Symmetric | B_tang -> Anti-symmetric 33 | // E_norm -> Symmetric | E_norm -> Anti-symmetric (see note) 34 | // div B -> Symmetric | div B -> Anti-symmetric 35 | // 36 | // Note: B_norm is tricky. For a symmetry plane, B_norm on the 37 | // boundary must be zero as there are no magnetic charges (a 38 | // non-zero B_norm would imply an infinitesimal layer of magnetic 39 | // charge). However, if a symmetric boundary is interpreted as a 40 | // perfect magnetic conductor, B_norm could be present due to 41 | // magnetic conduction surface charges. Even though there are no 42 | // bulk volumetric magnetic charges to induce a surface magnetic 43 | // charge, I think that radiation/waveguide modes/etc could (the 44 | // total surface magnetic charge in the simulation would be zero 45 | // though). As a result, symmetric and pmc boundary conditions are 46 | // treated separately. Symmetric and pmc boundaries are identical 47 | // except the symmetric boundaries explicitly zero boundary 48 | // B_norm. Note: anti-symmetric and pec boundary conditions would 49 | // have the same issue if norm E was located directly on the 50 | // boundary. However, it is not so this problem does not arise. 51 | // 52 | // Note: Absorbing boundary conditions make no effort to clean 53 | // divergence errors on them. They assume that the ghost div b is 54 | // zero and force the surface div e on them to be zero. This means 55 | // ghost norm e can be set to any value on absorbing boundaries. 56 | 57 | }; 58 | 59 | typedef struct grid { 60 | 61 | // System of units 62 | real_t dt, cvac, eps0; 63 | 64 | // Time stepper. The simulation time is given by 65 | // t = g->t0 + (double)g->dt*(double)g->step 66 | int64_t step; // Current timestep 67 | double t0; // Simulation time corresponding to step 0 68 | 69 | // Phase 2 grid data structures 70 | real_t x0, y0, z0; // Min corner local domain (must be coherent) 71 | real_t x1, y1, z1; // Max corner local domain (must be coherent) 72 | int nx, ny, nz; // Local voxel mesh resolution. Voxels are 73 | // indexed FORTRAN style 0:nx+1,0:ny+1,0:nz+1 74 | // with voxels 1:nx,1:ny,1:nz being non-ghost 75 | // voxels. 76 | real_t dx, dy, dz, dV; // Cell dimensions and volume (CONVENIENCE ... 77 | // USE x0,x1 WHEN DECIDING WHICH NODE TO USE!) 78 | real_t rdx, rdy, rdz, r8V; // Inverse voxel dimensions and one over 79 | // eight times the voxel volume (CONVENIENCE) 80 | int sx, sy, sz, nv; // Voxel indexing x-, y-,z- strides and the 81 | // number of local voxels (including ghosts, 82 | // (nx+2)(ny+2)(nz+2)), (CONVENIENCE) 83 | int bc[27]; // (-1:1,-1:1,-1:1) FORTRAN indexed array of 84 | // boundary conditions to apply at domain edge 85 | // 0 ... nproc-1 ... comm boundary condition 86 | // <0 ... locally applied boundary condition 87 | 88 | // Phase 3 grid data structures 89 | // NOTE: VOXEL INDEXING LIMITS NUMBER OF VOXELS TO 2^31 (INCLUDING 90 | // GHOSTS) PER NODE. NEIGHBOR INDEXING FURTHER LIMITS TO 91 | // (2^31)/6. BOUNDARY CONDITION HANDLING LIMITS TO 2^28 PER NODE 92 | // EMITTER COMPONENT ID INDEXING FURTHER LIMITS TO 2^26 PER NODE. 93 | // THE LIMIT IS 2^63 OVER ALL NODES THOUGH. 94 | int64_t* range; 95 | // (0:nproc) indexed array giving range of 96 | // global indexes of voxel owned by each 97 | // processor. Replicated on each processor. 98 | // (range[rank]:range[rank+1]-1) are global 99 | // voxels owned by processor "rank". Note: 100 | // range[rank+1]-range[rank] <~ 2^31 / 6 101 | 102 | int64_t* neighbor; 103 | // (0:5,0:local_num_voxel-1) FORTRAN indexed 104 | // array neighbor(0:5,lidx) are the global 105 | // indexes of neighboring voxels of the 106 | // voxel with local index "lidx". Negative 107 | // if neighbor is a boundary condition. 108 | 109 | int64_t rangel, rangeh; // Redundant for move_p performance reasons: 110 | // rangel = range[rank] 111 | // rangeh = range[rank+1]-1. 112 | // Note: rangeh-rangel <~ 2^26 113 | 114 | // Nearest neighbor communications ports 115 | //mp_t * mp; 116 | 117 | } grid_t; 118 | 119 | #endif // header guard 120 | -------------------------------------------------------------------------------- /src/helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef pic_helper_h 2 | #define pic_helper_h 3 | 4 | #include "logger.h" 5 | #include "Cabana_ExecutionPolicy.hpp" // SIMDpolicy 6 | #include "Cabana_Parallel.hpp" // Simd parallel for 7 | #include "Cabana_DeepCopy.hpp" // Cabana::deep_copy 8 | 9 | #include "input/deck.h" 10 | 11 | // Converts from an index that doesn't know about ghosts to one that does 12 | //KOKKOS_INLINE_FUNCTION 13 | int allow_for_ghosts(int pre_ghost) 14 | { 15 | 16 | size_t ix, iy, iz; 17 | RANK_TO_INDEX(pre_ghost, ix, iy, iz, 18 | deck.nx, 19 | deck.ny); 20 | // printf("%ld\n",ix); 21 | int with_ghost = VOXEL(ix, iy, iz, 22 | deck.nx, 23 | deck.ny, 24 | deck.nz, 25 | deck.num_ghosts); 26 | 27 | return with_ghost; 28 | } 29 | 30 | // Function to print out the data for every particle. 31 | void dump_particles( FILE * fp, const particle_list_t d_particles, const real_t xmin, const real_t, const real_t, const real_t dx, const real_t, const real_t, size_t nx,size_t ny,size_t, size_t ng) 32 | { 33 | 34 | // Host 35 | particle_list_t::host_mirror_type particles("host_particles", d_particles.size()); 36 | 37 | // Copy device particles to host 38 | Cabana::deep_copy(particles, d_particles); 39 | 40 | auto position_x = Cabana::slice(particles); 41 | auto position_y = Cabana::slice(particles); 42 | auto position_z = Cabana::slice(particles); 43 | 44 | auto velocity_x = Cabana::slice(particles); 45 | auto velocity_y = Cabana::slice(particles); 46 | auto velocity_z = Cabana::slice(particles); 47 | 48 | auto weight = Cabana::slice(particles); 49 | auto cell = Cabana::slice(particles); 50 | 51 | for (size_t i = 0; i < particles.size(); i++) 52 | { 53 | size_t ix,iy,iz; 54 | int ii = cell(i); 55 | 56 | RANK_TO_INDEX(ii, ix,iy,iz,nx+2*ng,ny+2*ng); 57 | 58 | real_t x = xmin + ( ix - 1 + ( position_x(i)+ 1.0 ) * 0.5) * dx; 59 | real_t v = velocity_x( i ); 60 | 61 | fprintf(fp, "%e %e ", x,v); 62 | } 63 | fprintf(fp, "\n"); 64 | /* 65 | auto _print = 66 | KOKKOS_LAMBDA( const int s, const int i ) 67 | { 68 | // printf("Struct id %d offset %d \n", s, i); 69 | // printf("Position x %e y %e z %e \n", position_x.access(s,i), position_y.access(s,i), position_z.access(s,i) ); 70 | size_t ix,iy,iz; 71 | int ii = cell.access(s, i); 72 | RANK_TO_INDEX(ii, ix,iy,iz,nx+2*ng,ny+2*ng); 73 | real_t x = xmin + (ix-1+(position_x.access(s,i)+1.0)*0.5)*dx; 74 | real_t v = velocity_x.access(s,i); 75 | fprintf(fp, "%e %e ", x,v); 76 | 77 | // real_t y = ymin + (iy-1+(position_y.access(s,i)+1.0)*0.5)*dy; 78 | //real_t z = zmin + (iz-1+(position_z.access(s,i)+1.0)*0.5)*dz; 79 | // fprintf(fp, "%e %e %e %d %d %d \n", x,y,z,ix,iy,iz); 80 | 81 | }; 82 | 83 | // TODO: How much sense does printing in parallel make??? 84 | Cabana::SimdPolicy 85 | vec_policy( 0, particles.size() ); 86 | 87 | //logger << "particles.numSoA() " << particles.numSoA() << std::endl; 88 | //logger << "particles.numSoA() " << particles.numSoA() << std::endl; 89 | 90 | Cabana::simd_parallel_for( vec_policy, _print, "_print()" ); 91 | */ 92 | // std::cout << std::endl; 93 | 94 | } 95 | 96 | void print_fields( const field_array_t& fields ) 97 | { 98 | auto ex = Cabana::slice(fields); 99 | auto ey = Cabana::slice(fields); 100 | auto ez = Cabana::slice(fields); 101 | 102 | auto jfx = Cabana::slice(fields); 103 | auto jfy = Cabana::slice(fields); 104 | auto jfz = Cabana::slice(fields); 105 | 106 | auto _print_fields = 107 | KOKKOS_LAMBDA( const int i ) 108 | { 109 | printf("%d e x %e y %e z %e jfx %e jfy %e jfz %e \n", i, ex(i), ey(i), ez(i), jfx(i), jfy(i), jfz(i) ); 110 | }; 111 | 112 | Kokkos::RangePolicy exec_policy( 0, fields.size() ); 113 | Kokkos::parallel_for( "print()", exec_policy, _print_fields ); 114 | 115 | std::cout << std::endl; 116 | 117 | } 118 | 119 | #endif // pic_helper_h 120 | -------------------------------------------------------------------------------- /src/input/deck.h: -------------------------------------------------------------------------------- 1 | #ifndef INPUT_DECK_H 2 | #define INPUT_DECK_H 3 | 4 | #include // size_t 5 | #include 6 | 7 | #include "types.h" 8 | 9 | enum Boundary { 10 | Reflect = 0, 11 | Periodic 12 | }; 13 | 14 | 15 | class Run_Finalizer { 16 | public: 17 | virtual void finalize() 18 | { 19 | // Default finalization is blank 20 | } 21 | }; 22 | 23 | class Field_Initializer { 24 | 25 | public: 26 | using real_ = real_t; 27 | Field_Initializer() { } // blank 28 | 29 | virtual void init( 30 | field_array_t& fields, 31 | size_t, 32 | size_t, 33 | size_t, 34 | size_t, 35 | real_, // TODO: do we prefer xmin or Lx? 36 | real_, 37 | real_, 38 | real_, 39 | real_, 40 | real_ 41 | ) 42 | { 43 | std::cout << "Default field init" << std::endl; 44 | 45 | // Zero fields 46 | auto ex = Cabana::slice(fields); 47 | auto ey = Cabana::slice(fields); 48 | auto ez = Cabana::slice(fields); 49 | 50 | auto cbx = Cabana::slice(fields); 51 | auto cby = Cabana::slice(fields); 52 | auto cbz = Cabana::slice(fields); 53 | 54 | auto _init_fields = 55 | KOKKOS_LAMBDA( const int i ) 56 | { 57 | ex(i) = 0.0; 58 | ey(i) = 0.0; 59 | ez(i) = 0.0; 60 | cbx(i) = 0.0; 61 | cby(i) = 0.0; 62 | cbz(i) = 0.0; 63 | }; 64 | 65 | Kokkos::parallel_for( "zero_fields()", fields.size(), _init_fields ); 66 | 67 | } 68 | }; 69 | 70 | // TODO: we can eventually provide a suite of default/sane initializers, such 71 | // as ones that give the same RNG sequence over multiple procs 72 | class Particle_Initializer { 73 | 74 | public: 75 | using real_ = real_t; 76 | 77 | Particle_Initializer() { } // blank 78 | 79 | virtual void init( 80 | particle_list_t& particles, 81 | size_t nx, 82 | size_t ny, 83 | size_t, 84 | size_t, 85 | real_ dxp, 86 | size_t nppc, 87 | real_ w, 88 | real_ v0, 89 | real_, 90 | real_, 91 | real_ 92 | ) 93 | { 94 | // TODO: this doesnt currently do anything with nppc/num_cells 95 | std::cout << "Default particle init" << std::endl; 96 | 97 | auto position_x = Cabana::slice(particles); 98 | auto position_y = Cabana::slice(particles); 99 | auto position_z = Cabana::slice(particles); 100 | 101 | auto velocity_x = Cabana::slice(particles); 102 | auto velocity_y = Cabana::slice(particles); 103 | auto velocity_z = Cabana::slice(particles); 104 | 105 | auto weight = Cabana::slice(particles); 106 | auto cell = Cabana::slice(particles); 107 | 108 | printf("dxp = %e \n", dxp); 109 | printf("part list len = %ld \n", particles.size()); 110 | 111 | auto _init = 112 | KOKKOS_LAMBDA( const int s, const int i ) 113 | { 114 | // Initialize position. 115 | int sign = -1; 116 | size_t pi2 = (s)*particle_list_t::vector_length+i; 117 | size_t pi = ((pi2) / 2); 118 | if (pi2%2 == 0) { 119 | sign = 1; 120 | } 121 | int pic = (2*pi)%nppc; //Every 2 particles have the same "pic". 122 | 123 | real_ x = pic*dxp+0.5*dxp-1.0; 124 | int pre_ghost = (2*pi/nppc); //pre_gohost ranges [0,nx*ny*nz). 125 | 126 | //int ix,iy,iz; 127 | //RANK_TO_INDEX(pre_ghost, ix, iy, iz, nx, ny); 128 | //ix += ng; 129 | //iy += ng; 130 | //iz += ng; 131 | 132 | position_x.access(s,i) = 0.0; 133 | position_y.access(s,i) = x; 134 | position_z.access(s,i) = 0.0; 135 | 136 | weight.access(s,i) = w; 137 | 138 | //cell.access(s,i) = VOXEL(ix,iy,iz,nx,ny,nz,ng); 139 | cell.access(s,i) = pre_ghost*(nx+2) + (nx+2)*(ny+2) + (nx+2) + 1; 140 | 141 | // Initialize velocity.(each cell length is 2) 142 | real_ gam = 1.0/sqrt(1.0-v0*v0); 143 | 144 | real_t na = 0.0001*sin(2.0*3.1415926*((x+1.0+pre_ghost*2)/(2*ny))); 145 | 146 | //velocity_x.access(s,i) = sign * v0*gam; // *(1.0-na*sign); //0; 147 | velocity_x.access(s,i) = sign *v0*gam*(1.0+na*sign); 148 | velocity_y.access(s,i) = 0; 149 | velocity_z.access(s,i) = 0; //na*sign; //sign * v0 *gam*(1.0+na*sign); 150 | //velocity_z.access(s,i) = 1e-7*sign; 151 | 152 | //printf("%d %d %d pre-g %d putting particle at y=%e with ux = %e pi = %d \n", pic, s, i, pre_ghost, position_y.access(s,i), velocity_x.access(s,i), cell.access(s,i) ); 153 | }; 154 | 155 | Cabana::SimdPolicy 156 | vec_policy( 0, particles.size() ); 157 | Cabana::simd_parallel_for( vec_policy, _init, "init()" ); 158 | } 159 | }; 160 | 161 | class _Input_Deck { 162 | public: 163 | // Having this separate lets us initialize us double if required 164 | using real_ = real_t; 165 | 166 | // I would prefer that this wasn't a pointer, but it seems to be 167 | // necessary. We need it to be able to do a vtable lookup for the init 168 | // function call, which means we need a ref or a pointer. The ref has 169 | // to be initialized which means the initialization of Particle_Initializer 170 | // would leak to the init site of _Input_Deck. A normal (non ref, non 171 | // pointer) variable would avoid the vtable lookup and always call the 172 | // "default". Perhaps there is a better way? 173 | // The original default pointer is pretty likely to leak in custom 174 | // decks... at least it does't contain a lot of state 175 | Particle_Initializer* particle_initer; 176 | 177 | Field_Initializer* field_initer; 178 | 179 | // Give the user a chance to hook into the end of the run, to do final 180 | // things like correctness checks and timing dumping 181 | Run_Finalizer* run_finalizer; 182 | 183 | _Input_Deck() : 184 | particle_initer(new Particle_Initializer), 185 | field_initer(new Field_Initializer), 186 | run_finalizer(new Run_Finalizer) 187 | { 188 | // empty 189 | } 190 | 191 | static real_ courant_length( real_ lx, real_ ly, real_ lz, 192 | size_t nx, size_t ny, size_t nz ) { 193 | real_ w0, w1 = 0; 194 | if( nx>1 ) w0 = nx/lx, w1 += w0*w0; 195 | if( ny>1 ) w0 = ny/ly, w1 += w0*w0; 196 | if( nz>1 ) w0 = nz/lz, w1 += w0*w0; 197 | return sqrt(1/w1); 198 | } 199 | 200 | // We could do this in the destructor, but this has 2 advantages: 201 | // 1) It's more explicit 202 | // 2) We have finer grained control, so we can more easily ensure it 203 | // happens before valuable data is freed 204 | void finalize() 205 | { 206 | run_finalizer->finalize(); 207 | } 208 | 209 | void initialize_particles( 210 | particle_list_t& particles, 211 | size_t nx, 212 | size_t ny, 213 | size_t nz, 214 | size_t ng, 215 | real_ dxp, 216 | size_t nppc, 217 | real_ w, 218 | real_ v0 219 | ) 220 | { 221 | particle_initer->init(particles, nx, ny, nz, ng, dxp, nppc, w, v0, 222 | len_x_global, len_y_global, len_z_global); 223 | } 224 | 225 | void initialize_fields( 226 | field_array_t& fields, 227 | size_t nx, 228 | size_t ny, 229 | size_t nz, 230 | size_t ng, 231 | real_ Lx, // TODO: do we prefer xmin or Lx? 232 | real_ Ly, 233 | real_ Lz, 234 | real_ dx, 235 | real_ dy, 236 | real_ dz 237 | ) 238 | { 239 | field_initer->init( 240 | fields, 241 | nx, 242 | ny, 243 | nz, 244 | ng, 245 | Lx, 246 | Ly, 247 | Lz, 248 | dx, 249 | dy, 250 | dz 251 | ); 252 | } 253 | 254 | real_ de = 1.0; // Length normalization (electron inertial length) 255 | real_ ec = 1.0; // Charge normalization 256 | real_ me = 1.0; // Mass normalization 257 | real_ mu = 1.0; // permeability of free space 258 | real_ c = 1.0; // Speed of light 259 | real_ eps = 1.0; // permittivity of free space 260 | 261 | real_ qsp = -ec; 262 | 263 | // Params 264 | real_ n0 = 1.0; // Background plasma density 265 | size_t num_species = 1; 266 | size_t nx = 16; // TODO: why is nx a size_t not an int? 267 | size_t ny = 1; 268 | size_t nz = 1; 269 | 270 | size_t num_ghosts = 1; 271 | size_t nppc = 1; 272 | real_ dt = 1.0; 273 | int num_steps = 2; 274 | 275 | // Assume domain starts at [0,0,0] and goes to [len,len,len] 276 | real_ len_x_global = 1.0; 277 | real_ len_y_global = 1.0; 278 | real_ len_z_global = 1.0; 279 | 280 | real_t Npe = -1; 281 | real_t Ne = -1; //(nppc*nx*ny*nz); 282 | 283 | //real_ local_x_min; 284 | //real_ local_y_min; 285 | //real_ local_z_min; 286 | //real_ local_x_max; 287 | //real_ local_y_max; 288 | //real_ local_z_max; 289 | real_ v0 = 1.0; //drift velocity 290 | 291 | //size_t ghost_offset; // Where the cell id needs to start for a "real" cell, basically nx 292 | //size_t num_real_cells; 293 | 294 | //Boundary BOUNDARY_TYPE = Boundary::Reflect; 295 | Boundary BOUNDARY_TYPE = Boundary::Periodic; 296 | 297 | ////////////////////////// DERIVED ///////////////// 298 | // Don't set these, we can derive them instead 299 | real_ dx; 300 | real_ dy; 301 | real_ dz; 302 | 303 | real_ len_x; 304 | real_ len_y; 305 | real_ len_z; 306 | size_t num_cells; // This should *include* the ghost cells 307 | long num_particles = -1; 308 | 309 | bool perform_uncenter = false; 310 | 311 | //////////////////////////////////////////////////// 312 | 313 | void print_run_details() 314 | { 315 | std::cout << "#~~~ Run Specifications ~~~ " << std::endl; 316 | std::cout << "#Nx: " << nx << " Ny: " << ny << " Nz: " << nz << " Num Ghosts: " << num_ghosts << ". Cells Total: " << num_cells << std::endl; 317 | std::cout << "#Len X: " << len_x << " Len Y: " << len_y << " Len Z: " << len_z << " number of ghosts: "<(); 357 | auto position_y = particles.slice(); 358 | auto position_z = particles.slice(); 359 | 360 | auto velocity_x = particles.slice(); 361 | auto velocity_y = particles.slice(); 362 | auto velocity_z = particles.slice(); 363 | 364 | auto weight = particles.slice(); 365 | auto cell = particles.slice(); 366 | 367 | // TODO: sensible way to do rand in parallel? 368 | //srand (static_cast (time(0))); 369 | 370 | auto _init = 371 | KOKKOS_LAMBDA( const int s, const int i ) 372 | { 373 | // Initialize position. 374 | int sign = -1; 375 | size_t pi2 = (s)*particle_list_t::vector_length+i; 376 | size_t pi = ((pi2) / 2); 377 | if (pi2%2 == 0) { 378 | sign = 1; 379 | } 380 | size_t pic = (2*pi)%nppc; 381 | 382 | real_t x = pic*dxp+0.5*dxp-1.0; 383 | position_x.access(s,i) = x; 384 | position_y.access(s,i) = 0.; 385 | position_z.access(s,i) = 0.; 386 | 387 | 388 | weight.access(s,i) = w; 389 | 390 | // gives me a num in the range 0..num_real_cells 391 | //int pre_ghost = (s % params.num_real_cells); 392 | // size_t ix, iy, iz; 393 | 394 | size_t pre_ghost = (2*pi/nppc); 395 | 396 | cell.access(s,i) = pre_ghost + (nx+2)*(ny+2) + (nx+2) + 1; //13; //allow_for_ghosts(pre_ghost); 397 | 398 | // Initialize velocity.(each cell length is 2) 399 | real_t na = 0.0001*sin(2.0*3.1415926*((x+1.0+pre_ghost*2)/(2*nx))); 400 | // 401 | 402 | real_t gam = 1.0/sqrt(1.0-v0*v0); 403 | velocity_x.access(s,i) = sign * v0 *gam*(1.0+na); //0.1; 404 | velocity_y.access(s,i) = 0; 405 | velocity_z.access(s,i) = 0; 406 | }; 407 | 408 | Cabana::SimdPolicy 409 | vec_policy( 0, particles.size() ); 410 | Cabana::simd_parallel_for( vec_policy, _init, "init()" ); 411 | } 412 | */ 413 | }; 414 | 415 | #ifdef USER_INPUT_DECK 416 | #define STRINGIFY(s)#s 417 | #define EXPAND(s)STRINGIFY(s) 418 | //#include EXPAND(USER_INPUT_DECK) 419 | // Cmake will put the concrete definition in an object file.. hopefully. 420 | // This is not ideal, but the include would prevent compile time change 421 | // detection 422 | class Input_Deck : public _Input_Deck { 423 | public: 424 | // TODO: this may currently force any custom deck to implement an 425 | // intitialize_particles function, which is not desired. We want to 426 | // fall back to the default implementation above if the user chosoes 427 | // not to define one 428 | Input_Deck(); 429 | }; 430 | #else 431 | // Default deck -- Weibel 432 | class Input_Deck : public _Input_Deck { 433 | public: 434 | Input_Deck() 435 | { 436 | // User puts initialization code here 437 | // Example: EM 2 Stream in 1d? 438 | nx = 1; 439 | ny = 32; 440 | nz = 1; 441 | 442 | num_steps = 6000; 443 | nppc = 100; 444 | 445 | //v0 = 0.2; 446 | v0 = 0.0866025403784439; 447 | 448 | // Can also create temporaries 449 | real_ gam = 1.0 / sqrt(1.0 - v0*v0); 450 | 451 | const real_ default_grid_len = 1.0; 452 | 453 | len_x_global = default_grid_len; 454 | //len_y_global = 3.14159265358979*0.5; // TODO: use proper PI? 455 | len_y_global = 0.628318530717959*(gam*sqrt(gam)); 456 | len_z_global = default_grid_len; 457 | 458 | dt = 0.99*courant_length( 459 | len_x_global, len_y_global, len_z_global, 460 | nx, ny, nz 461 | ) / c; 462 | 463 | n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1 464 | } 465 | }; 466 | #endif 467 | 468 | extern Input_Deck deck; 469 | //Input_Deck deck; 470 | 471 | #endif // guard 472 | -------------------------------------------------------------------------------- /src/interpolator.cpp: -------------------------------------------------------------------------------- 1 | #include "interpolator.h" 2 | 3 | 4 | void load_interpolator_array( 5 | field_array_t fields, 6 | interpolator_array_t interpolators, 7 | size_t nx, // TODO: we can probably pull these out of global params.. 8 | size_t ny, 9 | size_t nz, 10 | size_t ng 11 | ) 12 | { 13 | size_t x_offset = 1; // VOXEL(x+1,y, z, nx,ny,nz); 14 | size_t y_offset = (1*(nx+ng*2)); // VOXEL(x, y+1,z, nx,ny,nz); 15 | size_t z_offset = (1*(nx+ng*2)*(ny+ng*2)); // VOXEL(x, y, z+1, nx,ny,nz); 16 | 17 | auto field_ex = Cabana::slice(fields); 18 | auto field_ey = Cabana::slice(fields); 19 | auto field_ez = Cabana::slice(fields); 20 | 21 | auto field_cbx = Cabana::slice(fields); 22 | auto field_cby = Cabana::slice(fields); 23 | auto field_cbz = Cabana::slice(fields); 24 | 25 | auto interp_ex = Cabana::slice(interpolators); 26 | auto interp_dexdy = Cabana::slice(interpolators); 27 | auto interp_dexdz = Cabana::slice(interpolators); 28 | auto interp_d2exdydz = Cabana::slice(interpolators); 29 | auto interp_ey = Cabana::slice(interpolators); 30 | auto interp_deydz = Cabana::slice(interpolators); 31 | auto interp_deydx = Cabana::slice(interpolators); 32 | auto interp_d2eydzdx = Cabana::slice(interpolators); 33 | auto interp_ez = Cabana::slice(interpolators); 34 | auto interp_dezdx = Cabana::slice(interpolators); 35 | auto interp_dezdy = Cabana::slice(interpolators); 36 | auto interp_d2ezdxdy = Cabana::slice(interpolators); 37 | auto interp_cbx = Cabana::slice(interpolators); 38 | auto interp_dcbxdx = Cabana::slice(interpolators); 39 | auto interp_cby = Cabana::slice(interpolators); 40 | auto interp_dcbydy = Cabana::slice(interpolators); 41 | auto interp_cbz = Cabana::slice(interpolators); 42 | auto interp_dcbzdz = Cabana::slice(interpolators); 43 | 44 | const real_t fourth = 1.0 / 4.0; 45 | const real_t half = 1.0 / 2.0; 46 | 47 | // TODO: we have to be careful we don't reach past the ghosts here 48 | auto _load_interpolator = KOKKOS_LAMBDA( const int x, const int y, const int z) 49 | { 50 | // Try avoid doing stencil operations on ghost cells 51 | //if ( is_ghost(i) ) continue; 52 | 53 | int i = VOXEL(x,y,z, nx,ny,nz,ng); 54 | 55 | // ex interpolation 56 | real_t w0 = field_ex(i); // pf0->ex; 57 | real_t w1 = field_ex(i + y_offset); // pfy->ex; 58 | real_t w2 = field_ex(i + z_offset); // pfz->ex; 59 | real_t w3 = field_ex(i + y_offset + z_offset); // pfyz->ex; 60 | 61 | // TODO: make this not use only w0 62 | interp_ex(i) = fourth*( (w3 + w0) + (w1 + w2) ); 63 | interp_dexdy(i) = fourth*( (w3 - w0) + (w1 - w2) ); 64 | interp_dexdz(i) = fourth*( (w3 - w0) - (w1 - w2) ); 65 | interp_d2exdydz(i) = fourth*( (w3 + w0) - (w1 + w2) ); 66 | 67 | // ey interpolation coefficients 68 | w0 = field_ey(i); 69 | w1 = field_ey(i + z_offset); // pfz->ey; 70 | w2 = field_ey(i + x_offset); //pfx->ey; 71 | w3 = field_ey(i + x_offset + z_offset); // pfzx->ey; 72 | 73 | interp_ey(i) = fourth*( (w3 + w0) + (w1 + w2) ); 74 | interp_deydz(i) = fourth*( (w3 - w0) + (w1 - w2) ); 75 | interp_deydx(i) = fourth*( (w3 - w0) - (w1 - w2) ); 76 | interp_d2eydzdx(i) = fourth*( (w3 + w0) - (w1 + w2) ); 77 | 78 | 79 | // ez interpolation coefficients 80 | w0 = field_ez(i); // pf0->ez; 81 | w1 = field_ez(i + x_offset); //pfx->ez; 82 | w2 = field_ez(i + y_offset); //pfy->ez; 83 | w3 = field_ez(i + x_offset + y_offset); //pfxy->ez; 84 | 85 | interp_ez(i) = fourth*( (w3 + w0) + (w1 + w2) ); 86 | interp_dezdx(i) = fourth*( (w3 - w0) + (w1 - w2) ); 87 | interp_dezdy(i) = fourth*( (w3 - w0) - (w1 - w2) ); 88 | interp_d2ezdxdy(i) = fourth*( (w3 + w0) - (w1 + w2) ); 89 | 90 | // bx interpolation coefficients 91 | w0 = field_cbx(i); //pf0->cbx; 92 | w1 = field_cbx(i + x_offset); //pfx->cbx; 93 | interp_cbx(i) = half*( w1 + w0 ); 94 | interp_dcbxdx(i) = half*( w1 - w0 ); 95 | 96 | // by interpolation coefficients 97 | w0 = field_cby(i); // pf0->cby; 98 | w1 = field_cby(i + y_offset); // pfy->cby; 99 | interp_cby(i) = half*( w1 + w0 ); 100 | interp_dcbydy(i) = half*( w1 - w0 ); 101 | 102 | // bz interpolation coefficients 103 | w0 = field_cbz(i); // pf0->cbz; 104 | w1 = field_cbz(i + z_offset); // pfz->cbz; 105 | interp_cbz(i) = half*( w1 + w0 ); 106 | interp_dcbzdz(i) = half*( w1 - w0 ); 107 | }; 108 | 109 | //Kokkos::RangePolicy exec_policy( 0, fields.size() ); // All cells 110 | Kokkos::MDRangePolicy< Kokkos::Rank<3> > non_ghost_policy( {ng,ng,ng}, {nx+ng, ny+ng, nz+ng} ); // Try not to into ghosts // TODO: dry this 111 | Kokkos::parallel_for( "load_interpolator()", non_ghost_policy, _load_interpolator ); 112 | 113 | /* 114 | pi = &fi(x, y, z ); 115 | pf0 = &f(x, y, z ); 116 | pfx = &f(x+1,y, z ); 117 | pfy = &f(x, y+1,z ); 118 | pfz = &f(x, y, z+1); 119 | pfyz = &f(x, y+1,z+1); 120 | pfzx = &f(x+1,y, z+1); 121 | pfxy = &f(x+1,y+1,z ); 122 | */ 123 | 124 | } 125 | void initialize_interpolator(interpolator_array_t& f0) 126 | { 127 | auto ex = Cabana::slice(f0); 128 | auto dexdy = Cabana::slice(f0); 129 | auto dexdz = Cabana::slice(f0); 130 | auto d2exdydz = Cabana::slice(f0); 131 | auto ey = Cabana::slice(f0); 132 | auto deydz = Cabana::slice(f0); 133 | auto deydx = Cabana::slice(f0); 134 | auto d2eydzdx = Cabana::slice(f0); 135 | auto ez = Cabana::slice(f0); 136 | auto dezdx = Cabana::slice(f0); 137 | auto dezdy = Cabana::slice(f0); 138 | auto d2ezdxdy = Cabana::slice(f0); 139 | auto cbx = Cabana::slice(f0); 140 | auto dcbxdx = Cabana::slice(f0); 141 | auto cby = Cabana::slice(f0); 142 | auto dcbydy = Cabana::slice(f0); 143 | auto cbz = Cabana::slice(f0); 144 | auto dcbzdz = Cabana::slice(f0); 145 | 146 | auto _init_interpolator = 147 | KOKKOS_LAMBDA( const int i ) 148 | { 149 | // Throw in some place holder values 150 | ex(i) = 0.0; // TODO: is this important? 151 | dexdy(i) = 0.0; 152 | dexdz(i) = 0.0; 153 | d2exdydz(i) = 0.0; 154 | ey(i) = 0.0; 155 | deydz(i) = 0.0; 156 | deydx(i) = 0.0; 157 | d2eydzdx(i) = 0.0; 158 | ez(i) = 0.0; 159 | dezdx(i) = 0.0; 160 | dezdy(i) = 0.0; 161 | d2ezdxdy(i) = 0.0; 162 | cbx(i) = 0.0; 163 | dcbxdx(i) = 0.0; 164 | cby(i) = 0.0; 165 | dcbydy(i) = 0.0; 166 | cbz(i) = 0.0; 167 | dcbzdz(i) = 0.0; 168 | }; 169 | 170 | Kokkos::parallel_for( "init_interpolator()", f0.size(), _init_interpolator ); 171 | 172 | } 173 | -------------------------------------------------------------------------------- /src/interpolator.h: -------------------------------------------------------------------------------- 1 | #ifndef INTERPOLATOR_H 2 | #define INTERPOLATOR_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "types.h" 12 | #include "fields.h" 13 | 14 | void load_interpolator_array( 15 | field_array_t fields, 16 | interpolator_array_t interpolators, 17 | size_t nx, // TODO: we can probably pull these out of global params.. 18 | size_t ny, 19 | size_t nz, 20 | size_t ng 21 | ); 22 | 23 | void initialize_interpolator(interpolator_array_t& f0); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/logger.h: -------------------------------------------------------------------------------- 1 | //#define ENABLE_DEBUG 0 2 | #if ENABLE_DEBUG 3 | #define logger std::cout << "LOG:" << __FILE__ << ":" << __LINE__ << " \t :: \t " 4 | #else 5 | #define logger while(0) std::cout 6 | #endif /* ENABLE_DEBUG */ 7 | -------------------------------------------------------------------------------- /src/move_p.h: -------------------------------------------------------------------------------- 1 | #ifndef pic_move_p_h 2 | #define pic_move_p_h 3 | 4 | #include 5 | 6 | 7 | // I make no claims that this is a sensible way to do this.. I just want it working ASAP 8 | // THIS DEALS WITH GHOSTS ITSELF 9 | KOKKOS_INLINE_FUNCTION int detect_leaving_domain( size_t, size_t nx, size_t ny, size_t nz, size_t ix, size_t iy, size_t iz, size_t ) 10 | { 11 | 12 | //RANK_TO_INDEX(ii, ix, iy, iz, (nx+(2*num_ghosts)), (ny+(2*num_ghosts))); 13 | //std::cout << "i " << ii << " ix " << ix << " iy " << iy << " iz " << iz << std::endl; 14 | 15 | //printf("nx,ny,nz=%ld,%ld,%ld, i=%ld, ix=%ld, iy=%ld, iz=%ld\n",nx,ny,nz,ii,ix,iy,iz); 16 | 17 | int leaving = -1; 18 | 19 | if (ix == 0) 20 | { 21 | leaving = 0; 22 | } 23 | 24 | if (iy == 0) 25 | { 26 | leaving = 1; 27 | } 28 | 29 | if (iz == 0) 30 | { 31 | leaving = 2; 32 | } 33 | 34 | if (ix == nx+1) 35 | { 36 | leaving = 3; 37 | } 38 | 39 | if (iy == ny+1) 40 | { 41 | leaving = 4; 42 | } 43 | 44 | if (iz == nz+1) 45 | { 46 | leaving = 5; 47 | } 48 | 49 | 50 | // if(leaving>=0){ 51 | // printf("%d %d %d %d\n", ix,iy,iz,leaving); 52 | // } 53 | return leaving; 54 | } 55 | 56 | 57 | // TODO: add namespace etc? 58 | // TODO: port this to cabana syntax 59 | template KOKKOS_INLINE_FUNCTION int move_p( 60 | //particle_list_t particles, 61 | T1& position_x, 62 | T2& position_y, 63 | T3& position_z, 64 | T4& cell, 65 | T5& a0, // TODO: does this need to be const 66 | real_t q, 67 | particle_mover_t& pm, 68 | const grid_t* , 69 | const size_t s, 70 | const size_t i, 71 | const size_t nx, 72 | const size_t ny, 73 | const size_t nz, 74 | const size_t num_ghosts, 75 | const Boundary boundary 76 | ) 77 | { 78 | 79 | auto _asa = a0.access(); 80 | 81 | /* // Kernel variables */ 82 | real_t s_dir[3]; 83 | real_t v0, v1, v2, v3; //, v4, v5; 84 | size_t axis, face; 85 | // if(s==1 && i==0){ 86 | // printf("%d %d\n",s,i); 87 | // } 88 | /* //particle_t* p = p0 + pm->i; */ 89 | /* //int index = pm->i; */ 90 | 91 | //q = qsp * weight.access(s, i); 92 | 93 | for(;;) 94 | { 95 | /* 96 | s_midx = p->dx; 97 | s_midy = p->dy; 98 | s_midz = p->dz; 99 | */ 100 | 101 | real_t s_midx = position_x.access(s, i); 102 | real_t s_midy = position_y.access(s, i); 103 | real_t s_midz = position_z.access(s, i); 104 | 105 | real_t s_dispx = pm.dispx; 106 | real_t s_dispy = pm.dispy; 107 | real_t s_dispz = pm.dispz; 108 | 109 | s_dir[0] = (s_dispx>0) ? 1 : -1; 110 | s_dir[1] = (s_dispy>0) ? 1 : -1; 111 | s_dir[2] = (s_dispz>0) ? 1 : -1; 112 | 113 | // Compute the twice the fractional distance to each potential 114 | // streak/cell face intersection. 115 | v0 = (s_dispx==0) ? 3.4e38 : (s_dir[0]-s_midx)/s_dispx; 116 | v1 = (s_dispy==0) ? 3.4e38 : (s_dir[1]-s_midy)/s_dispy; 117 | v2 = (s_dispz==0) ? 3.4e38 : (s_dir[2]-s_midz)/s_dispz; 118 | 119 | // Determine the fractional length and axis of current streak. The 120 | // streak ends on either the first face intersected by the 121 | // particle track or at the end of the particle track. 122 | // 123 | // axis 0,1 or 2 ... streak ends on a x,y or z-face respectively 124 | // axis 3 ... streak ends at end of the particle track 125 | /**/ v3=2, axis=3; 126 | if(v0dx))[axis] = v0; // Avoid roundoff fiascos--put the particle 223 | 224 | // TODO: this conditional could be better 225 | if (axis == 0) position_x.access(s, i) = v0; 226 | if (axis == 1) position_y.access(s, i) = v0; 227 | if (axis == 2) position_z.access(s, i) = v0; 228 | 229 | // _exactly_ on the boundary. 230 | face = axis; 231 | if( v0>0 ) face += 3; 232 | 233 | size_t ix, iy, iz; 234 | RANK_TO_INDEX(ii, ix, iy, iz, (nx+(2*num_ghosts)), (ny+(2*num_ghosts))); 235 | // ix = ii-((nx+2)*(ny+2)+(nx+2)); //ii-12; 236 | // iy = 1; 237 | // iz = 1; 238 | 239 | if (face == 0) { ix--; } 240 | if (face == 1) { iy--; } 241 | if (face == 2) { iz--; } 242 | if (face == 3) { ix++; } 243 | if (face == 4) { iy++; } 244 | if (face == 5) { iz++; } 245 | 246 | int is_leaving_domain = detect_leaving_domain(face, nx, ny, nz, ix, iy, iz, num_ghosts); 247 | if (is_leaving_domain >= 0) { 248 | /* //std::cout << s << ", " << i << " leaving on " << face << std::endl; */ 249 | 250 | /* //std::cout << */ 251 | /* //" x " << position_x.access(s,i) << */ 252 | /* //" y " << position_y.access(s,i) << */ 253 | /* //" z " << position_z.access(s,i) << */ 254 | /* //" cell " << cell.access(s,i) << */ 255 | /* //std::endl; */ 256 | 257 | if ( boundary == Boundary::Periodic) 258 | { 259 | //std::cout << "face" << std::endl; 260 | // If we hit the periodic boundary, try and put the article in the right place 261 | 262 | // TODO: we can do this in 1d just fine 263 | 264 | //size_t ix, iy, iz; 265 | 266 | //RANK_TO_INDEX(ii, ix, iy, iz, (nx-1+(2*num_ghosts)), (ny-1+(2*num_ghosts))); 267 | /* ix = ii-12; */ 268 | /* iy = 1; */ 269 | /* iz = 1; */ 270 | 271 | if (is_leaving_domain == 0) { // -1 on x face 272 | ix = (nx-1) + num_ghosts; 273 | } 274 | else if (is_leaving_domain == 1) { // -1 on y face 275 | iy = (ny-1) + num_ghosts; 276 | } 277 | else if (is_leaving_domain == 2) { // -1 on z face 278 | iz = (nz-1) + num_ghosts; 279 | } 280 | else if (is_leaving_domain == 3) { // 1 on x face 281 | ix = num_ghosts; 282 | } 283 | else if (is_leaving_domain == 4) { // 1 on y face 284 | iy = num_ghosts; 285 | } 286 | else if (is_leaving_domain == 5) { // 1 on z face 287 | iz = num_ghosts; 288 | } 289 | /* int updated_ii = VOXEL(ix, iy, iz, */ 290 | /* nx, */ 291 | /* ny, */ 292 | /* nz, */ 293 | /* num_ghosts); */ 294 | 295 | } 296 | 297 | 298 | /* if ( Parameters::instance().BOUNDARY_TYPE == Boundary::Reflect) */ 299 | /* { */ 300 | /* // Hit a reflecting boundary condition. Reflect the particle */ 301 | /* // momentum and remaining displacement and keep moving the */ 302 | /* // particle. */ 303 | 304 | /* //logger << "Reflecting " << s << " " << i << " on axis " << axis << std::endl; */ 305 | 306 | /* //(&(p->ux ))[axis] = -(&(p->ux ))[axis]; */ 307 | /* //(&(pm->dispx))[axis] = -(&(pm->dispx))[axis]; */ 308 | /* if (axis == 0) */ 309 | /* { */ 310 | /* velocity_x.access(s, i) = -1.0f * velocity_x.access(s, i); */ 311 | /* pm.dispx = -1.0f * s_dispx; */ 312 | /* } */ 313 | /* if (axis == 1) */ 314 | /* { */ 315 | /* velocity_y.access(s, i) = -1.0f * velocity_y.access(s, i); */ 316 | /* pm.dispy = -1.0f * s_dispy; */ 317 | /* } */ 318 | /* if (axis == 2) */ 319 | /* { */ 320 | /* velocity_z.access(s, i) = -1.0f * velocity_z.access(s, i); */ 321 | /* pm.dispz = -1.0f * s_dispz; */ 322 | /* } */ 323 | /* continue; */ 324 | /* } */ 325 | } 326 | 327 | /* // TODO: this nieghbor stuff can be removed by going to more simple */ 328 | /* // boundaries */ 329 | /* /\* */ 330 | /* if ( neighborrangel || neighbor>g->rangeh ) { */ 331 | /* // Cannot handle the boundary condition here. Save the updated */ 332 | /* // particle position, face it hit and update the remaining */ 333 | /* // displacement in the particle mover. */ 334 | /* //p->i = 8*p->i + face; */ 335 | /* cell.access(s, i) = 8 * ii + face; */ 336 | 337 | /* return 1; // Return "mover still in use" */ 338 | /* } */ 339 | /* *\/ */ 340 | /* else { */ 341 | 342 | /* // Crossed into a normal voxel. Update the voxel index, convert the */ 343 | /* // particle coordinate system and keep moving the particle. */ 344 | 345 | /* //p->i = neighbor - g->rangel; // Compute local index of neighbor */ 346 | /* //cell.access(s, i) = neighbor - g->rangel; */ 347 | /* // TODO: I still need to update the cell we're in */ 348 | 349 | //1D only 350 | //int updated_ii = ix+(nx+2)*(ny+2) + (nx+2); 351 | size_t updated_ii = VOXEL(ix, iy, iz, nx, ny, nz, num_ghosts); 352 | cell.access(s, i) = updated_ii; 353 | 354 | 355 | /* int updated_ii = VOXEL(ix, iy, iz, */ 356 | /* nx, */ 357 | /* ny, */ 358 | /* nz, */ 359 | /* num_ghosts); */ 360 | 361 | /* cell.access(s, i) = updated_ii; */ 362 | /* //std::cout << "Moving from cell " << ii << " to " << updated_ii << std::endl; */ 363 | /* } */ 364 | 365 | /**/ // Note: neighbor - g->rangel < 2^31 / 6 366 | //(&(p->dx))[axis] = -v0; // Convert coordinate system 367 | // TODO: this conditional/branching could be better 368 | if (axis == 0) position_x.access(s, i) = -v0; 369 | if (axis == 1) position_y.access(s, i) = -v0; 370 | if (axis == 2) position_z.access(s, i) = -v0; 371 | } 372 | 373 | return 0; // Return "mover not in use" 374 | } 375 | 376 | #endif // move_p 377 | -------------------------------------------------------------------------------- /src/push.h: -------------------------------------------------------------------------------- 1 | #ifndef pic_push_h 2 | #define pic_push_h 3 | 4 | #include 5 | #include "move_p.h" 6 | 7 | template 8 | void push( 9 | particle_list_t& particles, 10 | interpolator_array_t& f0, 11 | real_t qdt_2mc, 12 | real_t cdt_dx, 13 | real_t cdt_dy, 14 | real_t cdt_dz, 15 | real_t qsp, 16 | _accumulator& a0, 17 | grid_t* g, 18 | const size_t nx, 19 | const size_t ny, 20 | const size_t nz, 21 | const size_t num_ghosts, 22 | Boundary boundary 23 | ) 24 | { 25 | 26 | //auto slice = a0.slice<0>(); 27 | //decltype(slice)::atomic_access_slice _a = slice; 28 | 29 | auto position_x = Cabana::slice(particles); 30 | auto position_y = Cabana::slice(particles); 31 | auto position_z = Cabana::slice(particles); 32 | 33 | auto velocity_x = Cabana::slice(particles); 34 | auto velocity_y = Cabana::slice(particles); 35 | auto velocity_z = Cabana::slice(particles); 36 | 37 | auto weight = Cabana::slice(particles); 38 | auto cell = Cabana::slice(particles); 39 | 40 | //const real_t qdt_4mc = -0.5*qdt_2mc; // For backward half rotate 41 | const real_t one = 1.; 42 | const real_t one_third = 1./3.; 43 | const real_t two_fifteenths = 2./15.; 44 | 45 | // We prefer making slices out side of the llambda 46 | auto _ex = Cabana::slice(f0); 47 | auto _dexdy = Cabana::slice(f0); 48 | auto _dexdz = Cabana::slice(f0); 49 | auto _d2exdydz = Cabana::slice(f0); 50 | auto _ey = Cabana::slice(f0); 51 | auto _deydz = Cabana::slice(f0); 52 | auto _deydx = Cabana::slice(f0); 53 | auto _d2eydzdx = Cabana::slice(f0); 54 | auto _ez = Cabana::slice(f0); 55 | auto _dezdx = Cabana::slice(f0); 56 | auto _dezdy = Cabana::slice(f0); 57 | auto _d2ezdxdy = Cabana::slice(f0); 58 | auto _cbx = Cabana::slice(f0); 59 | auto _dcbxdx = Cabana::slice(f0); 60 | auto _cby = Cabana::slice(f0); 61 | auto _dcbydy = Cabana::slice(f0); 62 | auto _cbz = Cabana::slice(f0); 63 | auto _dcbzdz = Cabana::slice(f0); 64 | 65 | auto _push = 66 | KOKKOS_LAMBDA( const int s, const int i ) 67 | { 68 | auto accumulators_scatter_access = a0.access(); 69 | 70 | //for ( int i = 0; i < particle_list_t::vector_length; ++i ) 71 | //{ 72 | // Setup data accessors 73 | // This may be cleaner if we hoisted it? 74 | int ii = cell.access(s,i); 75 | 76 | auto ex = _ex(ii); 77 | auto dexdy = _dexdy(ii); 78 | auto dexdz = _dexdz(ii); 79 | auto d2exdydz = _d2exdydz(ii); 80 | auto ey = _ey(ii); 81 | auto deydz = _deydz(ii); 82 | auto deydx = _deydx(ii); 83 | auto d2eydzdx = _d2eydzdx(ii); 84 | auto ez = _ez(ii); 85 | auto dezdx = _dezdx(ii); 86 | auto dezdy = _dezdy(ii); 87 | auto d2ezdxdy = _d2ezdxdy(ii); 88 | auto cbx = _cbx(ii); 89 | auto dcbxdx = _dcbxdx(ii); 90 | auto cby = _cby(ii); 91 | auto dcbydy = _dcbydy(ii); 92 | auto cbz = _cbz(ii); 93 | auto dcbzdz = _dcbzdz(ii); 94 | /* 95 | auto ex = f0.get(ii); 96 | auto dexdy = f0.get(ii); 97 | auto dexdz = f0.get(ii); 98 | auto d2exdydz = f0.get(ii); 99 | auto ey = f0.get(ii); 100 | auto deydz = f0.get(ii); 101 | auto deydx = f0.get(ii); 102 | auto d2eydzdx = f0.get(ii); 103 | auto ez = f0.get(ii); 104 | auto dezdx = f0.get(ii); 105 | auto dezdy = f0.get(ii); 106 | auto d2ezdxdy = f0.get(ii); 107 | auto cbx = f0.get(ii); 108 | auto dcbxdx = f0.get(ii); 109 | auto cby = f0.get(ii); 110 | auto dcbydy = f0.get(ii); 111 | auto cbz = f0.get(ii); 112 | auto dcbzdz = f0.get(ii); 113 | */ 114 | 115 | // Perform push 116 | 117 | // TODO: deal with pm's 118 | particle_mover_t local_pm = particle_mover_t(); 119 | 120 | real_t dx = position_x.access(s,i); // Load position 121 | real_t dy = position_y.access(s,i); // Load position 122 | real_t dz = position_z.access(s,i); // Load position 123 | 124 | real_t hax = qdt_2mc*( ( ex + dy*dexdy ) + 125 | dz*( dexdz + dy*d2exdydz ) ); 126 | real_t hay = qdt_2mc*( ( ey + dz*deydz ) + 127 | dx*( deydx + dz*d2eydzdx ) ); 128 | real_t haz = qdt_2mc*( ( ez + dx*dezdx ) + 129 | dy*( dezdy + dx*d2ezdxdy ) ); 130 | 131 | //1D only 132 | //real_t hax = qdt_2mc*ex; 133 | // real_t hay = 0; 134 | // real_t haz = 0; 135 | 136 | cbx = cbx + dx*dcbxdx; // Interpolate B 137 | cby = cby + dy*dcbydy; 138 | cbz = cbz + dz*dcbzdz; 139 | 140 | real_t ux = velocity_x.access(s,i); // Load velocity 141 | real_t uy = velocity_y.access(s,i); // Load velocity 142 | real_t uz = velocity_z.access(s,i); // Load velocity 143 | 144 | ux += hax; // Half advance E 145 | uy += hay; 146 | uz += haz; 147 | 148 | real_t v0 = qdt_2mc/sqrtf(one + (ux*ux + (uy*uy + uz*uz))); 149 | /**/ // Boris - scalars 150 | real_t v1 = cbx*cbx + (cby*cby + cbz*cbz); 151 | real_t v2 = (v0*v0)*v1; 152 | real_t v3 = v0*(one+v2*(one_third+v2*two_fifteenths)); 153 | real_t v4 = v3/(one+v1*(v3*v3)); 154 | v4 += v4; 155 | v0 = ux + v3*( uy*cbz - uz*cby ); // Boris - uprime 156 | v1 = uy + v3*( uz*cbx - ux*cbz ); 157 | v2 = uz + v3*( ux*cby - uy*cbx ); 158 | ux += v4*( v1*cbz - v2*cby ); // Boris - rotation 159 | uy += v4*( v2*cbx - v0*cbz ); 160 | uz += v4*( v0*cby - v1*cbx ); 161 | ux += hax; // Half advance E 162 | uy += hay; 163 | uz += haz; 164 | 165 | velocity_x.access(s,i) = ux; 166 | velocity_y.access(s,i) = uy; 167 | velocity_z.access(s,i) = uz; 168 | 169 | v0 = one/sqrtf(one + (ux*ux+ (uy*uy + uz*uz))); 170 | /**/ // Get norm displacement 171 | ux *= cdt_dx; 172 | uy *= cdt_dy; 173 | uz *= cdt_dz; 174 | ux *= v0; 175 | uy *= v0; 176 | uz *= v0; 177 | v0 = dx + ux; // Streak midpoint (inbnds) 178 | v1 = dy + uy; 179 | v2 = dz + uz; 180 | v3 = v0 + ux; // New position 181 | v4 = v1 + uy; 182 | real_t v5 = v2 + uz; 183 | 184 | real_t q = weight.access(s,i)*qsp; // Load charge 185 | 186 | // Check if inbnds 187 | if( v3<=one && v4<=one && v5<=one && -v3<=one && -v4<=one && -v5<=one ) 188 | { 189 | 190 | // Common case (inbnds). Note: accumulator values are 4 times 191 | // the total physical charge that passed through the appropriate 192 | // current quadrant in a time-step 193 | 194 | 195 | // Store new position 196 | position_x.access(s,i) = v3; 197 | position_y.access(s,i) = v4; 198 | position_z.access(s,i) = v5; 199 | 200 | dx = v0; // Streak midpoint 201 | dy = v1; 202 | dz = v2; 203 | v5 = q*ux*uy*uz*one_third; // Compute correction 204 | 205 | //real_t* a = (real_t *)( a0[ii].a ); // Get accumulator 206 | 207 | //1D only 208 | //_a(ii,0) += q*ux; 209 | //_a(ii,1) = 0; 210 | //_a(ii,2) = 0; 211 | //_a(ii,3) = 0; 212 | 213 | // accumulators_scatter_access(ii, accumulator_var::jx, 0) += 4.0f*q*ux; 214 | // accumulators_scatter_access(ii, accumulator_var::jx, 1) += 0.0; 215 | // accumulators_scatter_access(ii, accumulator_var::jx, 2) += 0.0; 216 | // accumulators_scatter_access(ii, accumulator_var::jx, 3) += 0.0; 217 | 218 | #define CALC_J(X,Y,Z) \ 219 | v4 = q*u##X; /* v2 = q ux */ \ 220 | v1 = v4*d##Y; /* v1 = q ux dy */ \ 221 | v0 = v4-v1; /* v0 = q ux (1-dy) */ \ 222 | v1 += v4; /* v1 = q ux (1+dy) */ \ 223 | v4 = one+d##Z; /* v4 = 1+dz */ \ 224 | v2 = v0*v4; /* v2 = q ux (1-dy)(1+dz) */ \ 225 | v3 = v1*v4; /* v3 = q ux (1+dy)(1+dz) */ \ 226 | v4 = one-d##Z; /* v4 = 1-dz */ \ 227 | v0 *= v4; /* v0 = q ux (1-dy)(1-dz) */ \ 228 | v1 *= v4; /* v1 = q ux (1+dy)(1-dz) */ \ 229 | v0 += v5; /* v0 = q ux [ (1-dy)(1-dz) + uy*uz/3 ] */ \ 230 | v1 -= v5; /* v1 = q ux [ (1+dy)(1-dz) - uy*uz/3 ] */ \ 231 | v2 -= v5; /* v2 = q ux [ (1-dy)(1+dz) - uy*uz/3 ] */ \ 232 | v3 += v5; /* v3 = q ux [ (1+dy)(1+dz) + uy*uz/3 ] */ 233 | 234 | CALC_J( x,y,z ); 235 | //std::cout << "Contributing " << v0 << ", " << v1 << ", " << v2 << ", " << v3 << std::endl; 236 | accumulators_scatter_access(ii, accumulator_var::jx, 0) += v0; // q*ux*(1-dy)*(1-dz); 237 | accumulators_scatter_access(ii, accumulator_var::jx, 1) += v1; // q*ux*(1+dy)*(1-dz); 238 | accumulators_scatter_access(ii, accumulator_var::jx, 2) += v2; // q*ux*(1-dy)*(1+dz); 239 | accumulators_scatter_access(ii, accumulator_var::jx, 3) += v3; // q*ux*(1+dy)*(1+dz); 240 | 241 | // printf("push deposit v0 %e to %d where ux = %e uy = %e and uz = %e \n", 242 | // v0, ii, ux, uy, uz); 243 | 244 | CALC_J( y,z,x ); 245 | accumulators_scatter_access(ii, accumulator_var::jy, 0) += v0; // q*ux; 246 | accumulators_scatter_access(ii, accumulator_var::jy, 1) += v1; // 0.0; 247 | accumulators_scatter_access(ii, accumulator_var::jy, 2) += v2; // 0.0; 248 | accumulators_scatter_access(ii, accumulator_var::jy, 3) += v3; // 0.0; 249 | 250 | CALC_J( z,x,y ); 251 | accumulators_scatter_access(ii, accumulator_var::jz, 0) += v0; // q*ux; 252 | accumulators_scatter_access(ii, accumulator_var::jz, 1) += v1; // 0.0; 253 | accumulators_scatter_access(ii, accumulator_var::jz, 2) += v2; // 0.0; 254 | accumulators_scatter_access(ii, accumulator_var::jz, 3) += v3; // 0.0; 255 | 256 | #undef CALC_J 257 | 258 | } 259 | else 260 | { // Unlikely 261 | local_pm.dispx = ux; 262 | local_pm.dispy = uy; 263 | local_pm.dispz = uz; 264 | 265 | local_pm.i = s*particle_list_t::vector_length + i; //i + itmp; //p_ - p0; 266 | 267 | // Handle particles that cross cells 268 | //move_p( position_x, position_y, position_z, cell, _a, q, local_pm, g, s, i, nx, ny, nz, num_ghosts, boundary ); 269 | move_p( position_x, position_y, position_z, cell, a0, q, local_pm, g, s, i, nx, ny, nz, num_ghosts, boundary ); 270 | 271 | // TODO: renable this 272 | //if ( move_p( p0, local_pm, a0, g, qsp ) ) { // Unlikely 273 | //if ( move_p( particles, local_pm, a0, g, qsp, s, i ) ) { // Unlikely 274 | //if( nm= max_nm) Kokkos::abort("overran max_nm"); 288 | copy_local_to_pm(nm); 289 | } 290 | } 291 | */ 292 | } 293 | 294 | //} // end VLEN loop 295 | }; 296 | 297 | Cabana::SimdPolicy 298 | vec_policy( 0, particles.size() ); 299 | Cabana::simd_parallel_for( vec_policy, _push, "push()" ); 300 | } 301 | 302 | #endif // pic_push_h 303 | -------------------------------------------------------------------------------- /src/types.h: -------------------------------------------------------------------------------- 1 | #ifndef pic_types_h 2 | #define pic_types_h 3 | 4 | #ifndef REAL_TYPE 5 | #define real_t float 6 | #else 7 | #define real_t REAL_TYPE 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | // Inner array size (the size of the arrays in the structs-of-arrays). 14 | 15 | #ifndef CELL_BLOCK_FACTOR 16 | #define CELL_BLOCK_FACTOR 32 17 | #endif 18 | // Cell blocking factor in memory 19 | const size_t cell_blocking = CELL_BLOCK_FACTOR; 20 | 21 | // Defaults 22 | #ifdef REQUIRE_HOST 23 | using MemorySpace = Kokkos::HostSpace; 24 | using ExecutionSpace = Kokkos::DefaultHostExecutionSpace; 25 | #else 26 | using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 27 | using ExecutionSpace = Kokkos::DefaultExecutionSpace; 28 | #endif 29 | ///// END ESSENTIALS /// 30 | 31 | enum UserParticleFields 32 | { 33 | PositionX = 0, 34 | PositionY, 35 | PositionZ, 36 | VelocityX, 37 | VelocityY, 38 | VelocityZ, 39 | Weight, 40 | Cell_Index, // This is stored as per VPIC, such that it includes ghost_offsets 41 | }; 42 | 43 | // Designate the types that the particles will hold. 44 | using ParticleDataTypes = 45 | Cabana::MemberTypes< 46 | real_t, // (0) x-position 47 | real_t, // (1) y-position 48 | real_t, // (2) z-position 49 | real_t, // (3) x-velocity 50 | real_t, // (4) y-velocity 51 | real_t, // (5) z-velocity 52 | real_t, // (6) weight 53 | int // (7) Cell index 54 | >; 55 | 56 | // Set the type for the particle AoSoA. 57 | using particle_list_t = 58 | Cabana::AoSoA; 59 | 60 | /////////////// START VPIC TYPE //////////// 61 | 62 | #include "grid.h" 63 | 64 | enum InterpolatorFields 65 | { // TODO: things in here like EXYZ and CBXYZ are ambigious 66 | EX = 0, 67 | DEXDY, 68 | DEXDZ, 69 | D2EXDYDZ, 70 | EY, 71 | DEYDZ, 72 | DEYDX, 73 | D2EYDZDX, 74 | EZ, 75 | DEZDX, 76 | DEZDY, 77 | D2EZDXDY, 78 | CBX, 79 | DCBXDX, 80 | CBY, 81 | DCBYDY, 82 | CBZ, 83 | DCBZDZ 84 | }; 85 | 86 | using InterpolatorDataTypes = 87 | Cabana::MemberTypes< 88 | real_t, // ex, 89 | real_t , // dexdy, 90 | real_t , // dexdz, 91 | real_t , // d2exdydz, 92 | real_t , // ey, 93 | real_t , // deydz, 94 | real_t , // deydx, 95 | real_t , // d2eydzdx, 96 | real_t , // ez, 97 | real_t , // dezdx, 98 | real_t , // dezdy, 99 | real_t , // d2ezdxdy, 100 | // Below here is not need for ES? EM only? 101 | real_t , // cbx, 102 | real_t , // dcbxdx, 103 | real_t , // cby, 104 | real_t , // dcbydy, 105 | real_t , // cbz, 106 | real_t // dcbzdz, 107 | >; 108 | using interpolator_array_t = Cabana::AoSoA; 109 | using AccumulatorDataTypes = 110 | Cabana::MemberTypes< 111 | real_t[12] // jx[4] jy[4] jz[4] 112 | >; 113 | 114 | //using accumulator_array_t = Cabana::AoSoA; 115 | 116 | #define ACCUMULATOR_VAR_COUNT 3 117 | #define ACCUMULATOR_ARRAY_LENGTH 4 118 | 119 | // TODO: should we flatten this out to 1D 12 big? 120 | using accumulator_array_t = Kokkos::View; 121 | 122 | using accumulator_array_sa_t = Kokkos::Experimental::ScatterView< 123 | real_t *[ACCUMULATOR_VAR_COUNT][ACCUMULATOR_ARRAY_LENGTH]>; //, KOKKOS_LAYOUT, 124 | //Kokkos::DefaultExecutionSpace, Kokkos::Experimental::ScatterSum, 125 | //KOKKOS_SCATTER_DUPLICATED, KOKKOS_SCATTER_ATOMIC 126 | //>; 127 | 128 | namespace accumulator_var { 129 | enum a_v { \ 130 | jx = 0, \ 131 | jy = 1, \ 132 | jz = 2, \ 133 | }; 134 | } 135 | 136 | 137 | 138 | enum FieldFields 139 | { 140 | FIELD_EX = 0, 141 | FIELD_EY, 142 | FIELD_EZ, 143 | FIELD_CBX, 144 | FIELD_CBY, 145 | FIELD_CBZ, 146 | FIELD_JFX, 147 | FIELD_JFY, 148 | FIELD_JFZ 149 | }; 150 | 151 | using FieldDataTypes = Cabana::MemberTypes< 152 | /* 153 | ex, ey, ez, div_e_err; // Electric field and div E error 154 | cbx, cby, cbz, div_b_err; // Magnetic field and div B error 155 | tcax, tcay, tcaz, rhob; // TCA fields and bound charge density 156 | jfx, jfy, jfz, rhof; // Free current and charge density 157 | material_id ematx, ematy, ematz, nmat; // Material at edge centers and nodes 158 | material_id fmatx, fmaty, fmatz, cmat; // Material at face and cell centers 159 | */ 160 | 161 | real_t, // ex 162 | real_t, // ey 163 | real_t, // ez 164 | real_t, // cbx 165 | real_t, // cby 166 | real_t, // cbz 167 | real_t, // jfx 168 | real_t, // jfy 169 | real_t // jfz 170 | >; 171 | 172 | using field_array_t = Cabana::AoSoA; 173 | 174 | // TODO: should this be in it's own file? 175 | class particle_mover_t { 176 | public: 177 | real_t dispx, dispy, dispz; // Displacement of particle 178 | int32_t i; // Index of the particle to move 179 | }; 180 | 181 | /////////////// END VPIC TYPE //////////// 182 | // 183 | // TODO: this may be a bad name? 184 | # define RANK_TO_INDEX(rank,ix,iy,iz,_x,_y) \ 185 | int _ix, _iy, _iz; \ 186 | _ix = (rank); /* ix = ix+gpx*( iy+gpy*iz ) */ \ 187 | _iy = _ix/int(_x); /* iy = iy+gpy*iz */ \ 188 | _ix -= _iy*int(_x); /* ix = ix */ \ 189 | _iz = _iy/int(_y); /* iz = iz */ \ 190 | _iy -= _iz*int(_y); /* iy = iy */ \ 191 | (ix) = _ix; \ 192 | (iy) = _iy; \ 193 | (iz) = _iz; \ 194 | 195 | #define VOXEL(x,y,z, nx,ny,nz, NG) ((x) + ((nx)+(NG*2))*((y) + ((ny)+(NG*2))*(z))) 196 | 197 | #endif // pic_types_h 198 | -------------------------------------------------------------------------------- /src/uncenter_p.h: -------------------------------------------------------------------------------- 1 | #ifndef uncenter_h 2 | #define uncenter_h 3 | 4 | void uncenter_particles( 5 | particle_list_t particles, 6 | interpolator_array_t& f0, 7 | real_t qdt_2mc 8 | ) 9 | { 10 | 11 | auto position_x = Cabana::slice(particles); 12 | auto position_y = Cabana::slice(particles); 13 | auto position_z = Cabana::slice(particles); 14 | 15 | auto velocity_x = Cabana::slice(particles); 16 | auto velocity_y = Cabana::slice(particles); 17 | auto velocity_z = Cabana::slice(particles); 18 | 19 | //auto weight = Cabana::slice(particles); 20 | auto cell = Cabana::slice(particles); 21 | 22 | const real_t qdt_4mc = -0.5*qdt_2mc; // For backward half rotate 23 | const real_t one = 1.; 24 | const real_t one_third = 1./3.; 25 | const real_t two_fifteenths = 2./15.; 26 | 27 | auto _uncenter = 28 | //KOKKOS_LAMBDA( const int s ) { 29 | KOKKOS_LAMBDA( const int s, const int i ) { 30 | // Grab particle properties 31 | real_t dx = position_x.access(s,i); // Load position 32 | real_t dy = position_y.access(s,i); // Load position 33 | real_t dz = position_z.access(s,i); // Load position 34 | 35 | int ii = cell.access(s,i); 36 | 37 | // Grab interpolator values 38 | // TODO: hoist slice call? 39 | auto ex = Cabana::slice(f0)(ii); 40 | auto dexdy = Cabana::slice(f0)(ii); 41 | auto dexdz = Cabana::slice(f0)(ii); 42 | auto d2exdydz = Cabana::slice(f0)(ii); 43 | auto ey = Cabana::slice(f0)(ii); 44 | auto deydz = Cabana::slice(f0)(ii); 45 | auto deydx = Cabana::slice(f0)(ii); 46 | auto d2eydzdx = Cabana::slice(f0)(ii); 47 | auto ez = Cabana::slice(f0)(ii); 48 | auto dezdx = Cabana::slice(f0)(ii); 49 | auto dezdy = Cabana::slice(f0)(ii); 50 | auto d2ezdxdy = Cabana::slice(f0)(ii); 51 | auto cbx = Cabana::slice(f0)(ii); 52 | auto dcbxdx = Cabana::slice(f0)(ii); 53 | auto cby = Cabana::slice(f0)(ii); 54 | auto dcbydy = Cabana::slice(f0)(ii); 55 | auto cbz = Cabana::slice(f0)(ii); 56 | auto dcbzdz = Cabana::slice(f0)(ii); 57 | 58 | // Calculate field values 59 | real_t hax = qdt_2mc*(( ex + dy*dexdy ) + dz*( dexdz + dy*d2exdydz )); 60 | real_t hay = qdt_2mc*(( ey + dz*deydz ) + dx*( deydx + dz*d2eydzdx )); 61 | real_t haz = qdt_2mc*(( ez + dx*dezdx ) + dy*( dezdy + dx*d2ezdxdy )); 62 | 63 | cbx = cbx + dx*dcbxdx; // Interpolate B 64 | cby = cby + dy*dcbydy; 65 | cbz = cbz + dz*dcbzdz; 66 | 67 | // Load momentum 68 | real_t ux = velocity_x.access(s,i); // Load velocity 69 | real_t uy = velocity_y.access(s,i); // Load velocity 70 | real_t uz = velocity_z.access(s,i); // Load velocity 71 | 72 | real_t v0 = qdt_4mc/(real_t)sqrt(one + (ux*ux + (uy*uy + uz*uz))); 73 | 74 | // Borris push 75 | // Boris - scalars 76 | real_t v1 = cbx*cbx + (cby*cby + cbz*cbz); 77 | real_t v2 = (v0*v0)*v1; 78 | real_t v3 = v0*(one+v2*(one_third+v2*two_fifteenths)); 79 | real_t v4 = v3/(one+v1*(v3*v3)); 80 | 81 | v4 += v4; 82 | 83 | v0 = ux + v3*( uy*cbz - uz*cby ); // Boris - uprime 84 | v1 = uy + v3*( uz*cbx - ux*cbz ); 85 | v2 = uz + v3*( ux*cby - uy*cbx ); 86 | 87 | ux += v4*( v1*cbz - v2*cby ); // Boris - rotation 88 | uy += v4*( v2*cbx - v0*cbz ); 89 | uz += v4*( v0*cby - v1*cbx ); 90 | 91 | ux += hax; // Half advance E 92 | uy += hay; 93 | uz += haz; 94 | 95 | // Store result 96 | velocity_x.access(s,i) = ux; 97 | velocity_y.access(s,i) = uy; 98 | velocity_z.access(s,i) = uz; 99 | 100 | }; 101 | 102 | Cabana::SimdPolicy 103 | vec_policy( 0, particles.size() ); 104 | Cabana::simd_parallel_for( vec_policy, _uncenter, "uncenter()" ); 105 | } 106 | 107 | #endif // uncenter 108 | -------------------------------------------------------------------------------- /src/visualization.h: -------------------------------------------------------------------------------- 1 | #ifndef pic_visualization_h 2 | #define pic_visualization_h 3 | 4 | #include 5 | #include 6 | 7 | class Visualizer { 8 | 9 | public: 10 | std::ofstream vis_file; 11 | 12 | void write_header(size_t total_num_particles, size_t step) { 13 | 14 | std::stringstream sstm; 15 | 16 | sstm << "vis/step" << step << ".vtk"; 17 | std::string file_name = sstm.str(); 18 | 19 | vis_file.open(file_name); 20 | 21 | vis_file << "# vtk DataFile Version 2.0" << std::endl; 22 | vis_file << "Unstructured Grid Example" << std::endl; 23 | vis_file << "ASCII" << std::endl; 24 | vis_file << "" << std::endl; 25 | vis_file << "DATASET UNSTRUCTURED_GRID" << std::endl; 26 | 27 | vis_file << "POINTS " << total_num_particles << " float" << std::endl; 28 | } 29 | 30 | // TODO: all these loops are the same, we could replace it with vtemplate 31 | void write_particles_position(particle_list_t& particles) 32 | { 33 | auto position_x = Cabana::slice(particles); 34 | auto position_y = Cabana::slice(particles); 35 | auto position_z = Cabana::slice(particles); 36 | 37 | size_t write_count = 0; 38 | for ( std::size_t idx = 0; idx != particles.size(); ++idx ) 39 | { 40 | real_t x = position_x(idx); 41 | real_t y = position_y(idx); 42 | real_t z = position_z(idx); 43 | 44 | vis_file << x << " " << y << " " << z << std::endl; 45 | write_count++; 46 | } 47 | } 48 | 49 | void write_cell_types(size_t num_particles) 50 | { 51 | vis_file << "CELL_TYPES " << num_particles << std::endl; 52 | 53 | for (size_t p = 0; p < num_particles; p++) 54 | { 55 | vis_file << "1" << std::endl; 56 | } 57 | } 58 | 59 | void pre_scalars(size_t num_particles) 60 | { 61 | vis_file << "POINT_DATA " << num_particles << std::endl; 62 | } 63 | 64 | void write_particles_property_header(std::string name, size_t num_particles) 65 | { 66 | vis_file << "SCALARS " << name << " float 1" << std::endl; 67 | vis_file << "LOOKUP_TABLE default" << std::endl; 68 | } 69 | 70 | void write_particles_index(particle_list_t& particles) 71 | { 72 | auto cell = Cabana::slice(particles); 73 | 74 | for ( std::size_t idx = 0; idx != particles.size(); ++idx ) 75 | { 76 | real_t w = cell(idx); 77 | 78 | vis_file << w << std::endl; 79 | } 80 | } 81 | 82 | void write_particles_w(particle_list_t& particles) 83 | { 84 | auto weight = Cabana::slice(particles); 85 | 86 | for ( std::size_t idx = 0; idx != particles.size(); ++idx ) 87 | { 88 | real_t w = weight(idx); 89 | 90 | vis_file << w << std::endl; 91 | } 92 | } 93 | 94 | void write_particles_sp(particle_list_t& particles, size_t sn) 95 | { 96 | for ( std::size_t idx = 0; idx != particles.size(); ++idx ) 97 | { 98 | vis_file << sn << std::endl; 99 | } 100 | } 101 | 102 | void finalize() 103 | { 104 | vis_file.close(); 105 | } 106 | 107 | void write_vis(particle_list_t particles, size_t step) 108 | { 109 | 110 | size_t total_num_particles = particles.size(); 111 | 112 | // TODO: this needs to be updated once species are introduced 113 | /* 114 | for (unsigned int sn = 0; sn < species.size(); sn++) 115 | { 116 | int particle_count = species[sn].num_particles; 117 | total_num_particles += particle_count; 118 | } 119 | */ 120 | 121 | write_header(total_num_particles, step); 122 | 123 | //for (unsigned int sn = 0; sn < species.size(); sn++) 124 | //{ 125 | //auto particles_accesor = get_particle_accessor(m, species[sn].key); 126 | write_particles_position(particles); 127 | //} 128 | 129 | write_cell_types(total_num_particles); 130 | 131 | pre_scalars(total_num_particles); 132 | write_particles_property_header("weight", total_num_particles); 133 | 134 | //for (unsigned int sn = 0; sn < species.size(); sn++) 135 | //{ 136 | //auto particles_accesor = get_particle_accessor(m, species[sn].key); 137 | write_particles_w(particles); 138 | //} 139 | //*/ 140 | write_particles_property_header("cells", total_num_particles); 141 | write_particles_index(particles); 142 | 143 | write_particles_property_header("species", total_num_particles); 144 | 145 | //for (unsigned int sn = 0; sn < species.size(); sn++) 146 | //{ 147 | //auto particles_accesor = get_particle_accessor(m, species[sn].key); 148 | write_particles_sp(particles, 1); 149 | //} 150 | finalize(); 151 | 152 | } 153 | 154 | }; 155 | 156 | #endif // Visualizer 157 | -------------------------------------------------------------------------------- /summary.md: -------------------------------------------------------------------------------- 1 | ## Technical Details 2 | 3 | 1. It should exclusively use OMP for threading 4 | 2. It should be written with OMP4.5 in mind, as well as being CUDA extensible 5 | 6 | ## Considerations 7 | 8 | One of the more interesting things about PIC for advanced architecrues is 9 | that the different aspects of the push require different (contradictory) 10 | optimizations. 11 | 12 | The main `particle_push` has three main parts: 13 | 14 | ### The particle move 15 | 16 | **Particle Properties Used (memory streams)**: 17 | all 18 | **Data layouts**: 19 | i) AoS => Good, not great vectorized 20 | ii) SoA => Good, but many memory streams 21 | iii) AoSoA => Ideal 22 | 23 | - If we vectorize AoS we need to do a transpose 24 | - SoA doesn't really buy us anything in terms of memory streams here as we 25 | need all properties. 26 | - Particle order isn't a concern here 27 | - There isn't much burden on cache here as long as the next particle(s) are pre-fetched in time 28 | 29 | ### The field stencil (read) and particle velocity 30 | 31 | **Particle Properties Used (memory streams)**: 32 | Most 33 | **Data layouts**: 34 | i) AoS => Not great, doesn't need to use all the streams 35 | ii) SoA => Good, lets you split the streams out 36 | iii) AoSoA => Great 37 | 38 | - The field stencil benefits significantly from having well ordered (on a cell 39 | basis) particles 40 | - Cache use here is crucial because of the large, semi-nonobvious 41 | (prefetching), stencil 42 | - If we can explicitly tell the compiler that groups of particle share 43 | properties, we get better re-use than good "accidental" cache reuse 44 | 45 | 46 | ### The current accumulation stencil (write) 47 | 48 | **Particle Properties Used (memory streams)**: 49 | Momentum 50 | **Data layouts**: 51 | i) AoS => Not great, doesn't need to use all the streams 52 | ii) SoA => Good, lets you split the streams out 53 | iii) AoSoA => Great 54 | 55 | - If particles all write to the same cell, that gives good assumptions and 56 | makes it safe to do some writes 57 | - If particles are highly disordered, this can cause big problems for the 58 | safety of the writes and often leads to atomics 59 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(decks) 2 | if (${SOLVER_TYPE} STREQUAL "EM") 3 | add_subdirectory(energy_comparison) 4 | endif() 5 | 6 | #LIST(APPEND TESTS example) 7 | 8 | foreach (test ${TESTS}) 9 | add_executable(${test} ./${test}.cpp) 10 | target_link_libraries(${test} CabanaPIC) 11 | add_test(NAME ${test} COMMAND ./${test}) 12 | endforeach(test) 13 | -------------------------------------------------------------------------------- /tests/decks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # TODO: this will trigger a full rebuild of all files.. 2 | 3 | LIST(APPEND TESTS custom_init) 4 | 5 | # TODO: we shouldn't have to duplicate all this 6 | #INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/src) 7 | 8 | # TODO: this should live somewhere more common to reduce code duplication 9 | foreach (test ${TESTS}) 10 | set(DECK_PATH "${PROJECT_SOURCE_DIR}/decks/${test}") 11 | add_executable(${test} ${SOURCES} ${CabanaPIC_EXAMPLE_DIR}/example.cpp ${DECK_PATH}.cxx) 12 | target_compile_definitions(${test} PRIVATE USER_INPUT_DECK=${DECK_PATH}) 13 | target_link_libraries(${test} CabanaPIC) 14 | add_test(NAME ${test} COMMAND ./${test}) 15 | endforeach(test) 16 | -------------------------------------------------------------------------------- /tests/energy_comparison/2stream-em.cxx: -------------------------------------------------------------------------------- 1 | #include "src/input/deck.h" 2 | // TODO: reaching into this path is a bit odd.. 3 | #include "tests/energy_comparison/compare_energies.h" 4 | 5 | class Custom_Finalizer : public Run_Finalizer { 6 | public: 7 | using real_ = real_t; 8 | 9 | // This *has* to be virtual, as we store the object as a pointer to the 10 | // base class 11 | virtual void finalize() 12 | { 13 | // Try and validate the final answers 14 | 15 | #ifndef GOLD_ENERGY_FILE 16 | std::cerr << "Cannot find energy gold file, exiting" << std::endl; 17 | std::exit(EXIT_FAILURE); 18 | #endif 19 | 20 | #ifdef CUSTOM_ERROR_MARGIN 21 | double error_margin = CUSTOM_ERROR_MARGIN 22 | #else 23 | double error_margin = 0.10; // 10% 24 | // TODO: add constexpr if for if real_T is double to decrease the toll 25 | #endif 26 | 27 | // TODO: we might need to clear out local energy file first for 28 | // this to be sensible? Else we could end up reading old stale 29 | // files locally 30 | 31 | std::string energy_file_name = "energies.txt"; 32 | std::string energy_gold_file_name = EXPAND( GOLD_ENERGY_FILE ); 33 | 34 | // TODO: port this to a testing framework instead of relying on 35 | // error codes? 36 | 37 | // We want to measure 18-50 in science time, so 371 to 1030 in 38 | // timestep for the given configuration 39 | 40 | // This does 2 passes through the file, but it's OK for now.. 41 | 42 | // Test steps 3581..5081 as it covers approx 60..100 sim time 43 | 44 | // Mask which fields to sum, read only 3rd val 45 | const unsigned short e_mask = 0b0000000100; 46 | bool e_correct = test_utils::compare_energies( 47 | energy_gold_file_name, 48 | energy_file_name, 49 | error_margin, // margin for error 50 | e_mask, 51 | test_utils::FIELD_ENUM::Sum, 52 | 1, // if should diagnostic out 53 | "e.out", // diagnostic output file 54 | 3581, //371 // num to skip, reads 3581 55 | 1300 // check 1500 lines 56 | ); 57 | std::cout << "E Test Pass: " << e_correct << std::endl; 58 | 59 | const unsigned short b_mask = 0b0000001000; 60 | bool b_correct = test_utils::compare_energies( 61 | energy_gold_file_name, 62 | energy_file_name, 63 | error_margin, // margin for error 64 | b_mask, 65 | test_utils::FIELD_ENUM::Sum, 66 | 1, // if should diagnostic out 67 | "b.out", // diagnostic output file 68 | 3581, //371 // num to skip, reads 3581 to EOF 69 | 1300 // check 1500 lines 70 | ); 71 | std::cout << "B Test Pass: " << b_correct << std::endl; 72 | 73 | // Throw error code if either failed 74 | if ((!b_correct) || (!e_correct)) { 75 | std::exit(1); 76 | } 77 | } 78 | }; 79 | 80 | // This relies on the default particle init, changing that will break this.. 81 | Input_Deck::Input_Deck() 82 | { 83 | // User puts initialization code here 84 | // Example: EM 2 Stream in 1d? 85 | 86 | run_finalizer = new Custom_Finalizer(); 87 | 88 | nx = 1; 89 | ny = 32; 90 | nz = 1; 91 | 92 | num_steps = 6000; 93 | nppc = 100; 94 | 95 | //v0 = 0.2; 96 | v0 = 0.0866025403784439; 97 | 98 | // Can also create temporaries 99 | real_ gam = 1.0 / sqrt(1.0 - v0*v0); 100 | 101 | const real_ default_grid_len = 1.0; 102 | 103 | len_x_global = default_grid_len; 104 | //len_y_global = 3.14159265358979*0.5; // TODO: use proper PI? 105 | len_y_global = 0.628318530717959*(gam*sqrt(gam)); 106 | len_z_global = default_grid_len; 107 | 108 | dt = 0.99*courant_length( 109 | len_x_global, len_y_global, len_z_global, 110 | nx, ny, nz 111 | ) / c; 112 | 113 | n0 = 2.0; //for 2stream, for 2 species, making sure omega_p of each species is 1 114 | } 115 | -------------------------------------------------------------------------------- /tests/energy_comparison/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # TODO: this will trigger a full rebuild of all files.. 2 | 3 | LIST(APPEND TESTS 2stream-em) 4 | 5 | # TODO: we shouldn't have to duplicate all this 6 | #INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/src) 7 | 8 | list(APPEND gold_file "${CMAKE_CURRENT_SOURCE_DIR}/energies_gold") 9 | 10 | 11 | # TODO: avoid duplicating this link line everywhere 12 | foreach (test ${TESTS}) 13 | set(DECK_PATH "${test}") 14 | add_executable(${test} ${SOURCES} ${CabanaPIC_EXAMPLE_DIR}/example.cpp ${DECK_PATH}.cxx) 15 | target_compile_definitions(${test} PRIVATE USER_INPUT_DECK=${DECK_PATH}) 16 | target_compile_definitions(${test} PRIVATE GOLD_ENERGY_FILE=${gold_file}.${test}.${REAL_TYPE}) 17 | target_link_libraries(${test} CabanaPIC) 18 | add_test(NAME ${test} COMMAND ./${test}) 19 | endforeach(test) 20 | -------------------------------------------------------------------------------- /tests/energy_comparison/compare_energies.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include // epsilon for limit 6 | #include // pair 7 | 8 | #include 9 | 10 | namespace test_utils { 11 | /** 12 | * @brief Helper function to write collective errors to file for further analysis 13 | * 14 | * @param errs The vector of all errors 15 | * @param field_per_line The number of values to write per file line 16 | */ 17 | void write_error_ouput( std::vector errs, int field_per_line, std::string err_file_base_name) 18 | { 19 | int counter = 0; 20 | std::ofstream outputFile(err_file_base_name); 21 | 22 | for (auto e : errs) 23 | { 24 | counter++; 25 | outputFile << counter << " " << e*100.0 << " "; // Convert to percent and dump 26 | if (counter % field_per_line == 0) 27 | { 28 | outputFile << std::endl; 29 | } 30 | } 31 | outputFile.close(); 32 | } 33 | 34 | /** 35 | * @brief Helper function to compare numbers and calculate a absolute error 36 | * 37 | * @param A The first value to compare 38 | * @param B The second value to compare 39 | * 40 | * @return The calculated error 41 | */ 42 | double calculate_abs_error(double A, double B) 43 | { 44 | return std::abs(A-B); 45 | } 46 | 47 | /** 48 | * @brief Helper function to compare numbers and calculate a relative error 49 | * 50 | * @param A The first value to compare 51 | * @param B The second value to compare 52 | * 53 | * @return The calculated error 54 | */ 55 | double calculate_relative_error(double A, double B) 56 | { 57 | return std::abs(A-B) / std::min(A,B); 58 | } 59 | 60 | /** 61 | * @brief Function to compare errors to a given tolerance, and decide if it's within range 62 | * 63 | * @param A The first value to compare 64 | * @param B The second value to compare 65 | * @param relative_tolerance The relative tolerance to use when comparing 66 | * 67 | * @return A pair containing true/false if it's within tolerance, and the calculated error 68 | */ 69 | std::pair compare_error(double A, double B, double relative_tolerance) 70 | { 71 | bool within_tol = false; 72 | double err = 0.0; 73 | 74 | // Right now this is pretty arbitrary.. 75 | double abs_threshhold = 10 * std::numeric_limits::epsilon(); 76 | 77 | // Calculate if we're withing tolerances 78 | // If we're close to relative, do absolute 79 | if (std::abs(std::min(A,B)) < abs_threshhold) 80 | { 81 | err = calculate_abs_error(A, B); 82 | 83 | // Finding a relative error to 0 doesn't make much 84 | // sense, so lets do absolute error instead 85 | if ( err < 2*std::numeric_limits::epsilon() ) 86 | { 87 | within_tol = true; 88 | } 89 | else { 90 | within_tol = false; 91 | } 92 | } 93 | else { // Do relative error 94 | 95 | err = calculate_relative_error(A, B); 96 | 97 | if (err < relative_tolerance) 98 | { 99 | within_tol = true; 100 | } 101 | else { 102 | within_tol = false; 103 | } 104 | } 105 | return { within_tol, err }; 106 | } 107 | 108 | enum FIELD_ENUM { 109 | Individual = 0, // Track each field individually 110 | Sum // Sum the masked fields 111 | }; 112 | 113 | /** 114 | * @brief Function to compare the contents of two energy files 115 | * 116 | * @param file_a First file to compare 117 | * @param file_b Second file to compare 118 | * @param relative_tolerance Relative tolerance which is acceptable 119 | * @param field_mask A mask to specify which fields in the file to use 120 | * @param sum_mask A mask to specify which fields in the file to sum and compare 121 | * @param write_err_output If you should write the error output to a file 122 | * @param err_file_base_name Base filename for writing output 123 | * @param num_lines_to_skip The number of lines to skip into the file 124 | * @param lines_to_read The number of lines to read into the file (for partial file analysis). Default -1 means "all" 125 | * 126 | * @NOTE A typical energy file is: 127 | * 128 | * and the bit maps go accordingly with being the LSB. 129 | * A mask for b fields only would be 0x000001110 130 | * 131 | * @NOTE We could * use bitsets for the masking but * they're generally slower 132 | * 133 | * @return True is they match (within tol), false if not 134 | */ 135 | bool compare_energies( 136 | const std::string file_a, 137 | const std::string file_b, 138 | const double relative_tolerance, 139 | const unsigned short field_mask = 0b1111111111111111, /// short has 16 bytes, assume all are true 140 | const FIELD_ENUM field_enum = FIELD_ENUM::Individual, /// short has 16 bytes, assume all are true 141 | const int write_err_ouput = 0, // If the run should dump the errors to disk 142 | const std::string err_file_base_name = "err.out", // File name to write errors to 143 | const int num_lines_to_skip = 0, // Most energy files have 3 lines of padding 144 | const int lines_to_read = -1 // -1 => all. 145 | ) 146 | { 147 | // TODO: I could easily have a policy here based on the type of the field_mask 148 | std::vector errs; 149 | 150 | //const int DEFAULT_FILED_COUNT = 7; 151 | 152 | unsigned short agg_total = 0; 153 | unsigned short v = field_mask; 154 | // Count set bits 155 | for (agg_total = 0; v; agg_total++) 156 | { 157 | v &= v - 1; // clear the least significant bit set 158 | } 159 | 160 | try { 161 | 162 | bool match = true; 163 | 164 | std::string line1 = ""; 165 | std::string line2 = ""; 166 | 167 | std::ifstream f1 (file_a); 168 | std::ifstream f2 (file_b); 169 | 170 | //std::cout << "file_a " << file_a << std::endl; 171 | //std::cout << "file_b " << file_b << std::endl; 172 | 173 | double max_err = 0.0; 174 | double max_err_A = 0.0; 175 | double max_err_B = 0.0; 176 | int max_err_line = -1; 177 | 178 | // This is for counting the number of tokens on a line (changes 179 | // based on number of species). It can likely be done much better 180 | int line_token_count = 0; 181 | 182 | if (!f1.is_open()) 183 | { 184 | std::cerr << "Unable to open file f1 " << file_a << std::endl;; 185 | return false; 186 | } 187 | else if (!f2.is_open()) 188 | { 189 | std::cerr << "Unable to open file f2 " << file_b << std::endl; 190 | return false; 191 | } 192 | else // Performan test 193 | { 194 | 195 | // Perform skipping 196 | for (int i = 0; i < num_lines_to_skip; i++) 197 | { 198 | getline(f1,line1); 199 | getline(f2,line2); 200 | } 201 | 202 | int counter = num_lines_to_skip; 203 | 204 | // Do processing 205 | while ( getline(f1,line1) ) 206 | { 207 | getline(f2,line2); 208 | 209 | // Tokenize lines 210 | std::stringstream linestream1(line1); 211 | std::string item1; 212 | 213 | std::stringstream linestream2(line2); 214 | std::string item2; 215 | 216 | int used_line_token_count = 0; 217 | int total_line_token_count = 0; 218 | 219 | double sum_A = 0.0; 220 | double sum_B = 0.0; 221 | std::pair returned_err; 222 | returned_err.second = -1.0; // set a dummy value to show uninit 223 | 224 | int agg_count = 0; 225 | 226 | // TODO: this is not resilient to whitepsace, and will act 227 | // oddly if the input files are not single space delimited 228 | 229 | while (getline(linestream1, item1, ' ')) 230 | { 231 | bool write_this_err_ouput = write_err_ouput; 232 | //std::cout << "Setting write_this_err_ouput tp " << write_this_err_ouput << std::endl; 233 | 234 | //std::cout << "item 1 " << item1 << std::endl; 235 | 236 | getline(linestream2, item2, ' '); 237 | //std::cout << "item 2 " << item2 << std::endl; 238 | total_line_token_count++; 239 | 240 | // Use this field 241 | //std::cout << "this_line " << this_line_token_count << " mask " << field_mask << std::endl; 242 | 243 | // Take the value one, and shift it to generate the mask to compare 244 | unsigned short this_line_token_mask = 1 << (total_line_token_count - 1); // Set correct highest bit on 245 | //this_line_token_mask |= this_line_token_mask-1; // Set lower bits on 246 | 247 | // If this field is within our requested mask, use it 248 | if (this_line_token_mask & field_mask) 249 | { 250 | used_line_token_count++; 251 | //std::cout << "Parsing field " << used_line_token_count << " val " << item1 << std::endl; 252 | 253 | double A = std::stod(item1); 254 | double B = std::stod(item2); 255 | 256 | //std::cout << "A " << A << " vs " << B << std::endl; 257 | 258 | if ( 259 | (field_enum == FIELD_ENUM::Sum) && // Need to aggregate 260 | (agg_count < agg_total) // Not done aggregating yet 261 | ) 262 | { 263 | // Need to aggregate.. 264 | sum_A += A; 265 | sum_B += B; 266 | agg_count++; 267 | 268 | //std::cout << "sum a " << sum_A << " += " << A << std::endl; 269 | //std::cout << "sum b " << sum_B << " += " << B << std::endl; 270 | 271 | // Don't write this particular one 272 | write_this_err_ouput = false; 273 | 274 | if (agg_count == agg_total) { // final_aggregation 275 | //std::cout << sum_A << " vs " << sum_B << std::endl; 276 | returned_err = compare_error(sum_A, sum_B, relative_tolerance); 277 | write_this_err_ouput = true; 278 | } 279 | } 280 | else // We can just compare this val 281 | { 282 | sum_A = A; 283 | sum_B = B; 284 | returned_err = compare_error(A, B, relative_tolerance); 285 | } 286 | 287 | if (returned_err.second != -1.0) // Has some value set 288 | { 289 | bool returned_match = returned_err.first; 290 | 291 | if (!returned_match) { 292 | match = false; 293 | } 294 | 295 | double err = returned_err.second; 296 | 297 | // Track max absolute error 298 | if (err > max_err) 299 | { 300 | max_err = err; 301 | max_err_A = sum_A; 302 | max_err_B = sum_B; 303 | max_err_line = counter; 304 | } 305 | 306 | 307 | // If we track the errors, track this one 308 | if (write_this_err_ouput) 309 | { 310 | errs.push_back(err); 311 | } 312 | } 313 | } 314 | else { 315 | //std::cout << "Skipping field " << this_line_token_mask << " val " << item1 << std::endl; 316 | } 317 | } 318 | line_token_count = used_line_token_count; 319 | counter++; 320 | 321 | if (lines_to_read > 0) // Skipping is enabled 322 | { 323 | if ( (counter - num_lines_to_skip) >= lines_to_read) 324 | { 325 | break; 326 | } 327 | } 328 | } 329 | 330 | f1.close(); 331 | f2.close(); 332 | } 333 | 334 | //std::cout << "Field mask : " << field_mask << std::endl; 335 | //std::cout << "Fields used : " << line_token_count << std::endl; 336 | 337 | std::cout << "Max found err was " << max_err*100 << "% (" << max_err_A << " vs " << max_err_B << ") on line " << max_err_line << " (Threshold: " << 338 | relative_tolerance*100 << "%)" << std::endl; 339 | 340 | if (write_err_ouput) 341 | { 342 | int err_per_line = line_token_count; 343 | if (field_enum == FIELD_ENUM::Sum) // Need to aggregate 344 | { 345 | err_per_line /= agg_total; // Reduce by aggregation factor 346 | } 347 | 348 | std::cout << "Writing error output " << errs.size() << std::endl; 349 | write_error_ouput( errs, err_per_line, err_file_base_name); 350 | } 351 | std::cout << "Exiting test with a result of " << match << std::endl; 352 | return match; 353 | } 354 | catch (const std::exception &exc) // Catching all is bad form, but OK for now.. 355 | { 356 | // catch anything thrown within try block that derives from std::exception 357 | std::cerr << "Caught error... Aborting" << std::endl; 358 | std::cerr << exc.what(); 359 | return false; 360 | } 361 | 362 | } 363 | 364 | } // namespace 365 | -------------------------------------------------------------------------------- /tests/example.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN // This tells Catch to provide a main() 2 | //#define CATCH_CONFIG_RUNNER // We will provide a custom main 3 | #include "catch.hpp" 4 | 5 | TEST_CASE( "Trivial example", "[example_tests]" ) 6 | { 7 | REQUIRE(1); 8 | } 9 | -------------------------------------------------------------------------------- /tests/manual_tests/test/2-particle/2pcle-minipic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ECP-copa/CabanaPIC/1ee2c84582b051d59653232abe86ac6da2c6b35e/tests/manual_tests/test/2-particle/2pcle-minipic.png -------------------------------------------------------------------------------- /tests/manual_tests/test/2-particle/plot.gp: -------------------------------------------------------------------------------- 1 | set term png enhanced 2 | 3 | set output '2pcle-minipic.png' 4 | set xlabel 't' 5 | set ylabel 'particle location' 6 | set xrange [0:100] 7 | #set yrange [0:1] 8 | set grid 9 | 10 | f(x)=0.7-0.25-0.5*(0.2-0.5)*cos(x) 11 | dt=0.000990 12 | v(x)=-0.15*sin(x) 13 | 14 | set keytitle 'nx=1000' 15 | set key bottom 16 | 17 | plot 'partloc' u 1:2 w l t 'minipic,x','' u ($1+dt*0.5):3 w l t 'v','partloc-vpic' u 1:2 w l t 'vpic,x', f(x) t 'theory',v(x) t '' 18 | -------------------------------------------------------------------------------- /tests/manual_tests/test/2-stream-em/2stream-em.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ECP-copa/CabanaPIC/1ee2c84582b051d59653232abe86ac6da2c6b35e/tests/manual_tests/test/2-stream-em/2stream-em.png -------------------------------------------------------------------------------- /tests/manual_tests/test/2-stream-em/plot.gp: -------------------------------------------------------------------------------- 1 | set term png #post eps enhanced 22 color 2 | 3 | #set output '2stream-em.eps' 4 | set output '2stream-em.png' 5 | 6 | set xlabel '{/Symbol w}_pt' 7 | set ylabel 'W_B' 8 | set xrange [0:150] 9 | set yrange [1e-15:10] 10 | set log y 11 | set grid 12 | set format y "%.e" 13 | f(x) = exp(0.279*2*x) 14 | set keytitle '({/Symbol g}_0=1.02), nx=32, nppc=100' 15 | set key bottom 16 | 17 | plot 'outw0.2' u 2:4 w l t 'minipic 1-thread','outw0.2-2' u 2:4 w l t 'minipic 2-thread',f(x)*1e-16 t 'linear theory' 18 | 19 | -------------------------------------------------------------------------------- /tests/manual_tests/test/2-stream/plot.gp: -------------------------------------------------------------------------------- 1 | set term post eps enhanced 22 color 2 | 3 | set output '2stream-minipic.eps' 4 | set xlabel '{/Symbol w}_pt' 5 | set ylabel 'W_E' 6 | set xrange [0:50] 7 | set yrange [1e-8:100] 8 | set log y 9 | set grid 10 | 11 | f0(x) = exp(x) 12 | f(x) = exp(0.497184855006572*2*x) 13 | set keytitle '({/Symbol g}_0=1.0038), nx=32, nppc=8000' 14 | set key bottom 15 | 16 | plot 'out' u 2:3 w l t 'minipic 1-thread','out2' u 2:3 w l t 'minipic 2-thread',f(x)*1.2e-10 t 'linear theory' #,f0(x)*1.2e-10 17 | 18 | --------------------------------------------------------------------------------