├── .gitignore
├── .mailmap
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── __main__.py
├── config_example.py
├── docs
    ├── Makefile
    ├── conf.py
    ├── contributing.rst
    ├── example-config.rst
    ├── illustrations
    │   ├── boundaries.png
    │   ├── cell_grid.png
    │   ├── coarse_to_fine.png
    │   ├── tsc2d.png
    │   ├── virtplasma.png
    │   └── virtplasma_moved.png
    ├── index-html.rst
    ├── index-nonhtml.rst
    ├── lcode-source.rst
    ├── make.bat
    ├── overview
    │   ├── problem.rst
    │   ├── trickery.rst
    │   ├── units.rst
    │   └── window_and_grids.rst
    ├── technicalities
    │   ├── design_decisions.rst
    │   ├── gpu.rst
    │   ├── grid_sizes.rst
    │   ├── initialization.rst
    │   └── offsets.rst
    ├── toc-overview.rst
    ├── toc-technicalities.rst
    ├── toc-tour.rst
    ├── toc-usage.rst
    ├── tour
    │   ├── Bz.rst
    │   ├── ExEyBxBy.rst
    │   ├── Ez.rst
    │   ├── background_ions.rst
    │   ├── beam.rst
    │   ├── coarse_and_fine_plasma.rst
    │   ├── deposition.rst
    │   ├── plasma.rst
    │   ├── plasma_pusher.rst
    │   └── xi_step.rst
    ├── usage
    │   ├── installation.rst
    │   └── running_and_embedding.rst
    └── useful-links.rst
├── lcode.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── shell.nix


/.gitignore:
--------------------------------------------------------------------------------
 1 | *$py.class
 2 | *.c
 3 | *.calltree
 4 | *.egg-info/
 5 | *.eggs/
 6 | *.html
 7 | *.log
 8 | *.lprof
 9 | *.mp4
10 | *.preprocessed.pyx
11 | *.prof
12 | *.py[cod]
13 | *.so
14 | .coverage
15 | /AUTHORS
16 | /Bz*.npy
17 | /ChangeLog
18 | /Ez*.npy
19 | /build/
20 | /cover/
21 | /coverage.xml
22 | /dist/
23 | /dump/
24 | /htmlcov/
25 | /misc/c_plasma_solver
26 | /result_*.dat
27 | /transverse*/
28 | __pycache__/
29 | /docs/index.rst
30 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
1 | Alexander Sosedkin <A.P.Sosedkin@inp.nsk.su> <monk@lcode.info> <monk@unboiled.info>
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | When contributing to this repository, please discuss the change you wish to make
 5 | via email (`mailto:team@lcode.info`),
 6 | issue tracker (`https://github.com/lotov/lcode3d/issues`),
 7 | personal communication or any other method with our team.
 8 | 
 9 | The suggested followup workflow for the implementor would be:
10 | 
11 | * choose the most suitable parent branch;
12 | * fork `https://github.com/lotov/lcode3d` or its fork;
13 | * check it out locally;
14 | * install dependencies (see `requirements.txt`);
15 | * verify that LCODE runs as-is;
16 | * implement, test and commit changes;
17 | * rebase it if the parent branch advances;
18 | * run `flake8 lcode.py` and fix the warnings;
19 | * submit a pull request;
20 | * wait for it to be rebased-and-merged.
21 | 
22 | By submitting patches to this project,
23 | you agree them to be redistributed under the project's license
24 | according to the normal forms and usages of the open-source community.
25 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     Copyright (c) 2016-2017 LCODE team <team@lcode.info>.
633 | 
634 |     LCODE is free software: you can redistribute it and/or modify
635 |     it under the terms of the GNU Affero General Public License as published by
636 |     the Free Software Foundation, either version 3 of the License, or
637 |     (at your option) any later version.
638 | 
639 |     LCODE is distributed in the hope that it will be useful,
640 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
641 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
642 |     GNU Affero General Public License for more details.
643 | 
644 |     You should have received a copy of the GNU Affero General Public License
645 |     along with LCODE.  If not, see <http://www.gnu.org/licenses/>.
646 | 
647 | Also add information on how to contact you by electronic and paper mail.
648 | 
649 |   If your software can interact with users remotely through a computer
650 | network, you should also make sure that it provides a way for users to
651 | get its source.  For example, if your program is a web application, its
652 | interface could display a "Source" link that leads users to an archive
653 | of the code.  There are many ways you could offer source, and different
654 | solutions will be better for different programs; see section 13 for the
655 | specific requirements.
656 | 
657 |   You should also get your employer (if you work as a programmer) or school,
658 | if any, to sign a "copyright disclaimer" for the program, if necessary.
659 | For more information on this, and how to apply and follow the GNU AGPL, see
660 | <http://www.gnu.org/licenses/>.
661 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | LCODE
 2 | =====
 3 | 
 4 | LCODE is free software for simulations of
 5 | particle beam-driven plasma wakefield acceleration.
 6 | 
 7 | Visit https://lcode3d.readthedocs.io to learn more about it.
 8 | 
 9 | LCODE 3D is new and experimental software.
10 | For now, please please contact `team@lcode.info`
11 | for assistance with installing and running it.
12 | 
13 | You may also be looking for more mature 2D version of LCODE
14 | at http://star.inp.nsk.su/~lotov/lcode/ instead.
15 | 
16 | LCODE 3D is distributed under the terms of AGPLv3+ License, see [LICENSE.txt](LICENSE.txt).
17 | 


--------------------------------------------------------------------------------
/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | """Execute main function when the directory is run with python3"""
3 | from lcode import main
4 | 
5 | main.main()
6 | 


--------------------------------------------------------------------------------
/config_example.py:
--------------------------------------------------------------------------------
 1 | grid_steps = 641  #: Transverse grid size in cells
 2 | grid_step_size = .025  #: Transverse grid step size in plasma units
 3 | 
 4 | xi_step_size = .005  #: Step size in time-space coordinate xi
 5 | xi_steps = int(3000 // xi_step_size)  #: Amount of xi steps
 6 | 
 7 | diagnostics_each_N_steps = int(1 / xi_step_size)
 8 | 
 9 | field_solver_subtraction_trick = 1  #: 0 for Laplace eqn., Helmholtz otherwise
10 | field_solver_variant_A = True  #: Use Variant A or Variant B for Ex, Ey, Bx, By
11 | 
12 | reflect_padding_steps = 5  #: Plasma reflection <-> field calculation boundaries
13 | plasma_padding_steps = 10  #: Plasma placement <-> field calculation boundaries
14 | 
15 | plasma_coarseness = 3  #: Square root of the amount of cells per coarse particle
16 | plasma_fineness = 2  #: Square root of the amount of fine particles per cell
17 | 
18 | 
19 | from numpy import cos, exp, pi, sqrt
20 | 
21 | def beam(xi_i, x, y):
22 |     xi = -xi_i * xi_step_size
23 |     COMPRESS, BOOST, SIGMA, SHIFT = 1, 1, 1, 0
24 |     if xi < -2 * sqrt(2 * pi) / COMPRESS:
25 |         return 0
26 |     r = sqrt(x**2 + (y - SHIFT)**2)
27 |     return (.05 * BOOST * exp(-.5 * (r / SIGMA)**2) *
28 |             (1 - cos(xi * COMPRESS * sqrt(pi / 2))))
29 | 
30 | gpu_index = 0  #: Index of the GPU that should perform the calculations
31 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # https://github.com/sphinx-doc/sphinx/issues/3451
 2 | SOURCE_DATE_EPOCH = $(shell git log -1 --format=%ct)
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
20 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | sys.path.insert(0, os.path.abspath('..'))
 18 | 
 19 | autodoc_mock_imports = ['matplotlib', 'numba', 'numpy', 'scipy', 'cupy']
 20 | 
 21 | 
 22 | # -- Project information -----------------------------------------------------
 23 | 
 24 | project = 'LCODE 3D'
 25 | author = 'Ivan Kargapolov, Konstantin Lotov, Irina Shalimova, Alexander Sosedkin'
 26 | copyright = '2019, ' + author
 27 | 
 28 | # The short X.Y version
 29 | version = ''
 30 | # The full version, including alpha/beta/rc tags
 31 | release = ''
 32 | 
 33 | 
 34 | # -- General configuration ---------------------------------------------------
 35 | 
 36 | # If your documentation needs a minimal Sphinx version, state it here.
 37 | #
 38 | # needs_sphinx = '1.0'
 39 | 
 40 | # Add any Sphinx extension module names here, as strings. They can be
 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 42 | # ones.
 43 | extensions = [
 44 |     'sphinx.ext.autodoc',
 45 |     'sphinx.ext.todo',
 46 |     'sphinx.ext.mathjax',
 47 |     'sphinx.ext.viewcode',
 48 |     'matplotlib.sphinxext.plot_directive',
 49 | ]
 50 | 
 51 | # Add any paths that contain templates here, relative to this directory.
 52 | templates_path = ['_templates']
 53 | 
 54 | # The suffix(es) of source filenames.
 55 | # You can specify multiple suffix as a list of string:
 56 | #
 57 | # source_suffix = ['.rst', '.md']
 58 | source_suffix = '.rst'
 59 | 
 60 | # The master toctree document.
 61 | master_doc = 'index'
 62 | # Pull off a hack and substitute different indices
 63 | def copy_master_doc(apps):
 64 |     import shutil
 65 |     if apps.tags.has('html'):
 66 |         shutil.copy('index-html.rst', 'index.rst')
 67 |     else:
 68 |         shutil.copy('index-nonhtml.rst', 'index.rst')
 69 | def setup(app):
 70 |     app.connect('builder-inited', copy_master_doc)
 71 | 
 72 | 
 73 | # The language for content autogenerated by Sphinx. Refer to documentation
 74 | # for a list of supported languages.
 75 | #
 76 | # This is also used if you do content translation via gettext catalogs.
 77 | # Usually you set "language" from the command line for these cases.
 78 | language = 'en'
 79 | 
 80 | # List of patterns, relative to source directory, that match files and
 81 | # directories to ignore when looking for source files.
 82 | # This pattern also affects html_static_path and html_extra_path.
 83 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = None
 87 | 
 88 | 
 89 | # -- Options for HTML output -------------------------------------------------
 90 | 
 91 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 92 | # a list of builtin themes.
 93 | #
 94 | html_theme = 'sphinx_rtd_theme'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #
100 | #html_theme_options = {
101 | #    'navigation_depth': 4,
102 | #    'collapse_navigation': False,
103 | #}
104 | 
105 | # Add any paths that contain custom static files (such as style sheets) here,
106 | # relative to this directory. They are copied after the builtin static files,
107 | # so a file named "default.css" will overwrite the builtin "default.css".
108 | html_static_path = ['_static']
109 | 
110 | # Custom sidebar templates, must be a dictionary that maps document names
111 | # to template names.
112 | #
113 | # The default sidebars (for documents that don't match any pattern) are
114 | # defined by theme itself.  Builtin themes are using these templates by
115 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
116 | # 'searchbox.html']``.
117 | #
118 | # html_sidebars = {}
119 | 
120 | 
121 | # -- Options for HTMLHelp output ---------------------------------------------
122 | 
123 | # Output file base name for HTML help builder.
124 | htmlhelp_basename = 'LCODE3Ddoc'
125 | 
126 | 
127 | # -- Options for LaTeX output ------------------------------------------------
128 | 
129 | latex_elements = {
130 |     # The paper size ('letterpaper' or 'a4paper').
131 |     #
132 |     'papersize': 'a4paper',
133 | 
134 |     # The font size ('10pt', '11pt' or '12pt').
135 |     #
136 |     'pointsize': '12pt',
137 | 
138 | 
139 |     # Latex figure (float) alignment
140 |     #
141 |     # 'figure_align': 'htbp',
142 | 
143 |     'extraclassoptions': 'openany,oneside',
144 | 
145 |     'preamble': r'''
146 |         \setcounter{tocdepth}{1}
147 |         \setcounter{secnumdepth}{1}
148 |         \let\stdchapter\chapter
149 |         \renewcommand\chapter{\cleardoublepage\stdchapter}
150 |         \let\stdsection\section
151 |         \renewcommand\section{\newpage\stdsection}
152 |     ''',
153 |     #\newcommand{\chapterbreak}{\clearpage}
154 |     #\newcommand{\sectionbreak}{\clearpage}
155 |     #\titlespacing{\chapter}{0em}{*0}{*0}
156 |     #\usepackage{anonchap}
157 |     #\titleformat{\chapter}[hang]{\Huge\bfseries}{\thechapter\hspace{.5em}{|}\hspace{.5em}}{0pt}{\Huge\bfseries}
158 |     #'fncychap': '',
159 |     #'passoptionstopackages': r'\PassOptionsToPackage{compact,tiny}{titlesec}',
160 | 
161 |     'sphinxsetup': 'hmargin=.5in, vmargin=.75in, marginpar=0in',
162 | }
163 | 
164 | # Grouping the document tree into LaTeX files. List of tuples
165 | # (source start file, target name, title,
166 | #  author, documentclass [howto, manual, or own class]).
167 | latex_documents = [
168 |     (master_doc, 'LCODE3D.tex', 'LCODE 3D Documentation',
169 |      'I. Kargapolov, K. Lotov, I. Shalimova, A. Sosedkin',
170 |      'manual'),
171 | ]
172 | 
173 | latex_toplevel_sectioning = 'chapter'
174 | latex_show_pagerefs = True
175 | latex_show_urls = 'footnote'
176 | 
177 | 
178 | # -- Options for manual page output ------------------------------------------
179 | 
180 | # One entry per manual page. List of tuples
181 | # (source start file, name, description, authors, manual section).
182 | man_pages = [
183 |     (master_doc, 'lcode3d', 'LCODE 3D Documentation',
184 |      [author], 1)
185 | ]
186 | 
187 | 
188 | # -- Options for Texinfo output ----------------------------------------------
189 | 
190 | # Grouping the document tree into Texinfo files. List of tuples
191 | # (source start file, target name, title, author,
192 | #  dir menu entry, description, category)
193 | texinfo_documents = [
194 |     (master_doc, 'LCODE3D', 'LCODE 3D Documentation',
195 |      author, 'LCODE3D',
196 |      'Quasistatic plasma wakefield simulation code in well under 1000 SLOC.',
197 |      'Miscellaneous'),
198 | ]
199 | 
200 | 
201 | # -- Options for Epub output -------------------------------------------------
202 | 
203 | # Bibliographic Dublin Core info.
204 | epub_title = project
205 | 
206 | # The unique identifier of the text. This can be a ISBN number
207 | # or the project homepage.
208 | #
209 | # epub_identifier = ''
210 | 
211 | # A unique identification for the text.
212 | #
213 | # epub_uid = ''
214 | 
215 | # A list of files that should not be packed into the epub file.
216 | epub_exclude_files = ['search.html']
217 | 
218 | 
219 | # -- Extension configuration -------------------------------------------------
220 | 
221 | # -- Options for todo extension ----------------------------------------------
222 | 
223 | # If true, `todo` and `todoList` produce output, else they produce nothing.
224 | todo_include_todos = True
225 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | When contributing to this repository, please discuss the change you wish to make
 5 | via email (`team@lcode.info <mailto:team@lcode.info>`_),
 6 | issue tracker (`<https://github.com/lotov/lcode3d/issues>`_),
 7 | personal communication or any other method with our team.
 8 | 
 9 | The suggested followup workflow for the implementor would be:
10 | 
11 | * choose the most suitable parent branch;
12 | * fork `<https://github.com/lotov/lcode3d>`_ or its fork;
13 | * check it out locally;
14 | * install dependencies (see ``requirements.txt``);
15 | * verify that LCODE runs as-is;
16 | * implement, test and commit changes;
17 | * check that the code is still under 1000 SLOC;
18 | * try to strip all the complex programming concepts and clever hacks;
19 | * rebase it if the parent branch advances;
20 | * submit a pull request;
21 | * wait for it to be rebased-and-merged.
22 | 
23 | By submitting patches to this project,
24 | you agree them to be redistributed under the project's license
25 | according to the normal forms and usages of the open-source community.
26 | 


--------------------------------------------------------------------------------
/docs/example-config.rst:
--------------------------------------------------------------------------------
1 | Example configuration file
2 | ==========================
3 | 
4 | .. literalinclude:: ../config_example.py
5 |    :language: python
6 | 


--------------------------------------------------------------------------------
/docs/illustrations/boundaries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lotov/lcode3d/6f8f2bbfcaa545aab24bd8f1266ce3df9423a026/docs/illustrations/boundaries.png


--------------------------------------------------------------------------------
/docs/illustrations/cell_grid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lotov/lcode3d/6f8f2bbfcaa545aab24bd8f1266ce3df9423a026/docs/illustrations/cell_grid.png


--------------------------------------------------------------------------------
/docs/illustrations/coarse_to_fine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lotov/lcode3d/6f8f2bbfcaa545aab24bd8f1266ce3df9423a026/docs/illustrations/coarse_to_fine.png


--------------------------------------------------------------------------------
/docs/illustrations/tsc2d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lotov/lcode3d/6f8f2bbfcaa545aab24bd8f1266ce3df9423a026/docs/illustrations/tsc2d.png


--------------------------------------------------------------------------------
/docs/illustrations/virtplasma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lotov/lcode3d/6f8f2bbfcaa545aab24bd8f1266ce3df9423a026/docs/illustrations/virtplasma.png


--------------------------------------------------------------------------------
/docs/illustrations/virtplasma_moved.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lotov/lcode3d/6f8f2bbfcaa545aab24bd8f1266ce3df9423a026/docs/illustrations/virtplasma_moved.png


--------------------------------------------------------------------------------
/docs/index-html.rst:
--------------------------------------------------------------------------------
 1 | ======================
 2 | LCODE 3D documentation
 3 | ======================
 4 | 
 5 | If you are interested in not just *what* LCODE does, but also *how* does it do it,
 6 | and you are reading this documentation for the first time,
 7 | the recommended procedure is to
 8 | glance over the first chapter and then
 9 | `dive straight into the source code <_modules/lcode.html#dst2d>`_
10 | switching back and forth using [source] and [docs] cross-links.
11 | Trust us, at 500 lines of code it's even shorter than the docs,
12 | and if you get stuck, you always have a link back to the explanations.
13 | 
14 | 
15 | .. include:: toc-overview.rst
16 | 
17 | .. include:: toc-usage.rst
18 | 
19 | .. include:: toc-tour.rst
20 | 
21 | .. include:: toc-technicalities.rst
22 | 
23 | .. toctree::
24 |    :maxdepth: 1
25 |    :caption: Extras
26 | 
27 |    contributing
28 |    useful-links
29 | 


--------------------------------------------------------------------------------
/docs/index-nonhtml.rst:
--------------------------------------------------------------------------------
 1 | ======================
 2 | LCODE 3D documentation
 3 | ======================
 4 | 
 5 | Useful links:
 6 | 
 7 | * LCODE 3D source: https://github.com/lotov/lcode3d
 8 | * LCODE 3D documentation: https://lcode3d.readthedocs.org
 9 | * LCODE team website: https://lcode.info
10 | * LCODE team email: `team@lcode.info <mailto:team@lcode.info>`_
11 | 
12 | .. note::
13 |    Please consider using the online version of this document
14 |    at https://lcode3d.readthedocs.org instead if you are reading this documentation
15 |    for the first time and you are interested in the implementation details.
16 | 
17 |    Documentation in other formats is not officially supported,
18 |    is not guaranteed to be complete and
19 |    is generally provided only for convenience.
20 | 
21 | 
22 | If you are interested in not just *what* LCODE does, but also *how* does it do it,
23 | and you are reading this documentation for the first time,
24 | the recommended procedure is to obtain it in HTML format,
25 | glance over the first chapter and then
26 | dive straight into the source code,
27 | switching back and forth using [source] and [docs] cross-links
28 | (available only in HTML version).
29 | Trust us, at 500 lines of code it's even shorter than the docs,
30 | and if you get stuck, you'll always have a link back to the explanations.
31 | 
32 | 
33 | .. toctree::
34 |    :caption: Contents
35 | 
36 | 
37 | Overview
38 | ========
39 | .. include:: toc-overview.rst
40 | 
41 | 
42 | Usage
43 | =====
44 | .. include:: toc-usage.rst
45 | 
46 | 
47 | Tour of the simulations
48 | =======================
49 | .. include:: toc-tour.rst
50 | 
51 | 
52 | Technicalities
53 | ==============
54 | .. include:: toc-technicalities.rst
55 | 
56 | 
57 | Extras:
58 | =======
59 | .. toctree::
60 |    :maxdepth: 1
61 |    :caption: Extras
62 | 
63 |    contributing
64 |    example-config
65 |    lcode-source
66 | 


--------------------------------------------------------------------------------
/docs/lcode-source.rst:
--------------------------------------------------------------------------------
1 | The complete LCODE 3D source code
2 | =================================
3 | 
4 | .. literalinclude:: ../lcode.py
5 |    :language: python
6 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/overview/problem.rst:
--------------------------------------------------------------------------------
  1 | Problem
  2 | =======
  3 | 
  4 | Objective
  5 | ---------
  6 | LCODE 3D calculates the plasma response to an ultrarelativistic charged beam.
  7 | 
  8 | Simulating particle beams is definitely planned for the future.
  9 | 
 10 | 
 11 | .. _geometry:
 12 | 
 13 | Geometry
 14 | --------
 15 | Quasistatic approximation is employed, with time-space coordinate :math:`\xi = z - ct`.
 16 | 
 17 | From the perspective of the beam, :math:`\xi` is a space coordinate.
 18 | The head of the beam corresponds to :math:`\xi = 0`,
 19 | with its tail extending into *lower, negative* values of :math:`\xi`.
 20 | 
 21 | From the perspective of a plasma layer, penetrated by the beam,
 22 | :math:`\xi` is a time coordinate.
 23 | At :math:`\xi = 0` the plasma layer is unperturbed;
 24 | as the beam passes through it, :math:`\xi` values decrease.
 25 | 
 26 | The remaining two coordinates :math:`x, y` are way more boring
 27 | [:doc:`../overview/window_and_grids`].
 28 | 
 29 | The problem geometry is thus :math:`\xi, x, y`.
 30 | 
 31 | 
 32 | Beam
 33 | ----
 34 | The beam is currently simulated as a charge density function :math:`\rho_b(\xi, x, y)`,
 35 | and not with particles
 36 | [:doc:`../tour/beam`].
 37 | 
 38 | 
 39 | Plasma
 40 | ------
 41 | Only the electron motion is simulated,
 42 | the ions are represented with a static backround charge density
 43 | [:doc:`../tour/background_ions`].
 44 | 
 45 | .. math::
 46 |   \frac{d \vec{p}}{d \xi} &= -\frac{q}{1-v_z} \left( \vec{E} + \left[ \vec{v} \times \vec{B} \right]\right)
 47 | 
 48 |   \frac{d x}{d \xi} &= -\frac{v_x}{1-v_z}
 49 | 
 50 |   \frac{d y}{d \xi} &= -\frac{v_y}{1-v_z}
 51 | 
 52 |   \vec{v} &= \frac{\vec{p}}{\sqrt{M^2+p^2}}
 53 | 
 54 | The plasma is simulated using a PIC method with an optional twist:
 55 | only a 'coarse' grid of plasma (think 1 particle per 9 cells) is stored and evolved,
 56 | while 'fine' particles (think 4 per cell) are bilinearly interpolated from it during the deposition
 57 | [:doc:`../tour/coarse_and_fine_plasma`].
 58 | The plasma is effectively made not from independent particles,
 59 | but from a fabric of 'fine' TSC-2D shaped particles.
 60 | 
 61 | 
 62 | Fields
 63 | ------
 64 | Both the plasma movement and the 'external' beam contribute to the charge density/currents
 65 | :math:`\rho, j_x, j_y, j_z`
 66 | [:doc:`../tour/deposition`].
 67 | 
 68 | The fields are calculated from their derivatives. Theoretically, the equations are
 69 | 
 70 | .. math::
 71 | 
 72 |    \Delta_\perp E_z &= \frac{\partial j_x}{\partial x} - \frac{\partial j_y}{\partial y}
 73 | 
 74 |    \Delta_\perp B_z &= \frac{\partial j_x}{\partial y} - \frac{\partial j_y}{\partial x}
 75 | 
 76 |    \Delta_\perp E_x &= \frac{\partial \rho}{\partial x} - \frac{\partial j_x}{\partial \xi}
 77 | 
 78 |    \Delta_\perp E_y &= \frac{\partial \rho}{\partial y} - \frac{\partial j_y}{\partial \xi}
 79 | 
 80 |    \Delta_\perp B_x &= \frac{\partial j_y}{\partial \xi} - \frac{\partial j_z}{\partial y}
 81 | 
 82 |    \Delta_\perp B_y &= \frac{\partial j_z}{\partial x} - \frac{\partial j_x}{\partial \xi}
 83 | 
 84 |    \Delta_\perp &= \frac{\partial^2}{\partial x^2} + \frac{\partial^2}{\partial y^2}
 85 | 
 86 |    \rho &= \rho_e + \rho_i + \rho_b
 87 | 
 88 |    j &= j_e + j_i + j_b
 89 | 
 90 | where indices :math:`e, i, b` represent electrons, ions and beam respectively.
 91 | 
 92 | .. note::
 93 | 
 94 |    In reality, things are not that simple.
 95 | 
 96 |    :math:`E_z` and :math:`B_z` calculations is relatively straightforward and
 97 |    boils down to solving
 98 |    the Laplace and Neumann equation with Dirichlet boundary conditions
 99 |    respectively.
100 | 
101 |    The transverse fields are actually obtained
102 |    by solving the Helmholtz equation with mixed boundary conditions,
103 |    and then doing some more magic on top of that
104 |    (so refer to :doc:`../tour/Ez`, :doc:`../tour/ExEyBxBy` and :doc:`../tour/Bz`
105 |    for the equations that we *really* solve).
106 | 
107 | 
108 | Step
109 | ----
110 | The :math:`\xi`-cycle idea consists of looping these three actions:
111 | 
112 | * depositing plasma particles (and adding the beam density/current),
113 | * calculating the new fields and
114 | * moving plasma particles,
115 | 
116 | executed several times for each step in a predictor-corrector scheme
117 | [:doc:`../tour/xi_step`].
118 | 


--------------------------------------------------------------------------------
/docs/overview/trickery.rst:
--------------------------------------------------------------------------------
 1 | Trickery index
 2 | ==============
 3 | 
 4 | Geometry
 5 | --------
 6 | * :ref:`Quasistatic approximation <geometry>`
 7 |   reduces the problem dimensionality.
 8 | 
 9 | .. todo:: DOCS: write a separate page on the topic or link somewhere from the overview.
10 | 
11 | 
12 | Numerical stability
13 | -------------------
14 | 
15 | * :ref:`Helmholtz equation <helmholtz>`
16 |   increases numerical stability; optional, but highly recommended.
17 | 
18 | * :ref:`'Variant A' <variant_A>`
19 |   increases numerical stability; optional.
20 | 
21 | * :doc:`Coarse/fine plasma approach <../tour/coarse_and_fine_plasma>`
22 |   increases numerical stability; optional.
23 | 
24 | * :doc:`Offset-coordinate separation <../technicalities/offsets>`
25 |   (probably) helps with float precision.
26 | 
27 | 
28 | Simplifications
29 | ---------------
30 | 
31 | * :ref:`Reflection boundary <reflect_and_plasma_boundaries>`
32 |   is closer than the field calculation boundary to simplify boundary handling.
33 | 


--------------------------------------------------------------------------------
/docs/overview/units.rst:
--------------------------------------------------------------------------------
1 | Units
2 | =====
3 | 
4 | .. todo:: DOCS (Lotov)
5 | 


--------------------------------------------------------------------------------
/docs/overview/window_and_grids.rst:
--------------------------------------------------------------------------------
 1 | Simulation window and grids
 2 | ===========================
 3 | 
 4 | .. _fields_and_densities_grid:
 5 | 
 6 | Fields and densities grid
 7 | -------------------------
 8 | 
 9 | .. figure:: ../illustrations/cell_grid.png
10 | 
11 |    The grid on which the fields and densities are calculated.
12 | 
13 | Fields and densities (:math:`ro`, :math:`j`) are calculated on a
14 | ``config.grid_steps`` x ``config.grid_steps``-sized grid.
15 | This number must be odd in order to have an on-axis cell
16 | for on-axis diagnostics.
17 | 
18 | .. autodata:: config_example.grid_steps
19 | 
20 | .. autodata:: config_example.grid_step_size
21 | 
22 | The fields are calculated at the centers of the grid cells.
23 | 
24 | 
25 | .. note::
26 |    The muddy concept of 'window width' is no longer referenced in LCODE 3D
27 |    to ease up the inevitable confusion about what it actually means
28 |    and how it relates to ``config.grid_step_size``.
29 |    Please refrain from thinking in these terms
30 |    and head over to the following subsection for more useful ones.
31 | 
32 | 
33 | .. _reflect_and_plasma_boundaries:
34 | 
35 | Reflect and 'plasma' boundaries
36 | -------------------------------
37 | 
38 | .. figure:: ../illustrations/boundaries.png
39 | 
40 |    The reflect and plasma boundaries illustrated.
41 | 
42 | .. autodata:: config_example.reflect_padding_steps
43 | 
44 | .. autodata:: config_example.plasma_padding_steps
45 | 
46 | 
47 | The plasma partlcles are not allowed to enter the outermost cells
48 | in order to simplify the treatment of boundary regions
49 | during interpolation, deposition and field calculation
50 | [:ref:`zero_special_boundary_treatment`].
51 | In order to achieve that, the reflection boundary is placed
52 | ``config.reflection_padding_steps`` steps deeper into the simulation area.
53 | 
54 | .. note::
55 |    While choosing the value for this parameter, one should take into account
56 |    the particle size. Even a single fine
57 |    [:doc:`Coarse/fine plasma <../tour/coarse_and_fine_plasma>`]
58 |    particle is three cells wide in deposition,
59 |    [:doc:`Plasma <../tour/plasma>`],
60 |    so the gap width should be wide enough to cover the entire coarse particle cloud
61 |    size.
62 |    Failure to meet this condition may result in a memory violation error.
63 |    This could be solved by introducing fine particle reflection,
64 |    but that'd be more resource-intensive.
65 | 
66 | 
67 | Note that while it defines the area where plasma is allowed to be present,
68 | it must be larger than the area where the plasma is initially positioned.
69 | The size of the second area is controlled by the
70 | ``config.plasma_padding_steps``,
71 | which puts a cut-off limit on the placement
72 | of both coarse and fine plasma particles.
73 | 
74 | 
75 | Coarse and fine plasma grids
76 | ----------------------------
77 | 
78 | Finally, the plasma boundary hosts two grids of particles,
79 | the coarse grid and the fine grid
80 | [more info in :doc:`Coarse/fine plasma approach <../tour/coarse_and_fine_plasma>`],
81 | which are coarser and finer than the field grid respectively.
82 | 


--------------------------------------------------------------------------------
/docs/technicalities/design_decisions.rst:
--------------------------------------------------------------------------------
 1 | Design decisions
 2 | ================
 3 | 
 4 | Codebase complexity
 5 | -------------------
 6 | The code strives to be readable and vaguely understandable
 7 | by a freshman student with only some basic background
 8 | in plasma physics and numerical simulations,
 9 | to the point of being comfortable with modifying it.
10 | 
11 | Given the complexity of the physical problem behind it,
12 | this goal is, sadly, unattainable,
13 | but the authors employ several avenues to get as close as possible:
14 | 
15 | 1. Abstaining from using advanced programming concepts.
16 |    Cool things like aspect-oriented programming are neat,
17 |    but keeping the code well under 1000 SLOC is even neater.
18 |    The two classes we currently have is two classes over the ideal amount of them.
19 | 2. Preferring less code over extensibility.
20 | 3. Picking simpler code over performance tweaking.
21 | 4. Choosing malleability over user convenience.
22 | 5. Creating external modules or branches over exhaustive featureset.
23 | 6. Not shying away from external dependencies or unpopular technologies,
24 |    even if this means sacrificing the portability.
25 | 7. Appointing a physics student with modest programming background
26 |    as the maintainer and primary code reviewer.
27 | 
28 | 
29 | Codebase size
30 | -------------
31 | LCODE 3D wasn't always around 500 SLOC.
32 | In fact, even as it got rewritten from C to Cython to numba to numba.cuda to cupy,
33 | it peaked at around 5000 SLOC, twice.
34 | And we don't even count its Fortran days.
35 | 
36 | In order to objectively curb the complexity, scope and malleability of the codebase,
37 | its size is limited to 1000 SLOC.
38 | 
39 | David Wheeler's SLOCCount is used for obtaining the metric.
40 | Empty lines, docstrings and comments don't count towards that limit.
41 | 
42 | 
43 | .. _zero_special_boundary_treatment:
44 | 
45 | Zero special boundary treatment
46 | -------------------------------
47 | 
48 | [:ref:`Not allowing the particles to reach the outer cells of the simulation window <reflect_and_plasma_boundaries>`]
49 | slightly modifies the physical problem itself, but, in return
50 | blesses us with the ability to forego special boundary checks during deposition, interpolation and field calculation,
51 | simplifying the code and boosting the performance.
52 | 
53 | 
54 | .. _memory_considerations:
55 | 
56 | Memory considerations
57 | ---------------------
58 | 
59 | LCODE 3D is observed to consume roughly the same amount of host and GPU RAM,
60 | hovering around 500 MiB for a 641x641 grid, coarseness=3 and fineness=2.
61 | 
62 | The size of the arrays processed by LCODE 3D depends on these parameters.
63 | 
64 | Let's label the field/densities grid size in a single direction as :math:`N`,
65 | coarse plasma grid size as :math:`N_c \approx \frac{N}{\text{coarseness}}` and
66 | fine plasma grid size as :math:`N_f \approx N * \text{fineness}`.
67 | 
68 | With about the same amount of arrays in scope for each of these three sizes,
69 | it is clear that the :math:`N_f^2`-sized arrays would dominate the memory consumption.
70 | Fortunately, the arrays that contain fine plasma characteristics would be transient and only used during the deposition,
71 | while the interpolation indices and coefficients grouped under ``virt_params``
72 | can be reduced to 1D arrays by exploiting the :math:`x/y` symmetry of the coarse/fine plasma grids.
73 | 
74 | This way LCODE 3D stores only :math:`N_c^2`- and :math:`N^2`-sized arrays,
75 | with :math:`N_f`-sized ones barely taking up any space thanks to the being 1D.
76 | 
77 | Also, all previous attempts to micromanaged the GPU memory allocations have been scraped
78 | in favor of blindly trusting the ``cupy`` on-demand allocation.
79 | Not only it is extremely convenient, it's even more performant than our own solutions.
80 | 
81 | 
82 | .. _integer_xi_steps:
83 | 
84 | Integer xi steps
85 | ----------------
86 | 
87 | :math:`\xi`-steps are integer for the purpose of bypassing float precision-based errors.
88 | The task of converting it into the :math:`\xi`-coordinate is placed within the usage context.
89 | 


--------------------------------------------------------------------------------
/docs/technicalities/gpu.rst:
--------------------------------------------------------------------------------
  1 | GPU calculations pecularities
  2 | =============================
  3 | 
  4 | LCODE 3D performs most of the calculations on GPU using a mix of two approaches.
  5 | 
  6 | 
  7 | .. _cuda_kernels:
  8 | 
  9 | CUDA kernels with numba.cuda
 10 | ----------------------------
 11 | One can use CUDA from Python more or less directly by writing and launching CUDA kernels with
 12 | ``numba.cuda``.
 13 | 
 14 | An example would be:
 15 | 
 16 | .. code-block:: python
 17 | 
 18 |    @numba.cuda.jit
 19 |    def add_two_arrays_kernel(arr1, arr2, result):
 20 |        i = numba.cuda.grid(1)
 21 |        if i >= arr1.shape[0]:
 22 |            return
 23 |        result[i] = arr1[i] + arr2[i]
 24 | 
 25 | This function represents a loop body, launched in parallel with many threads at once.
 26 | Each of them starts with obtaining the array index it is 'responsible' for with ``cuda.grid(1)``
 27 | and then proceeds to perform the required calculation.
 28 | As it is optimal to launch them in 32-threaded 'warps', one also has to handle the case
 29 | of having more threads than needed by making them skip the calculation.
 30 | 
 31 | No fancy Python operations are supported inside CUDA kernels,
 32 | it is basically a way to write C-like bodies for hot loops
 33 | without having to write actual C/CUDA code.
 34 | You can only use simple types for kernel arguments
 35 | and you cannot return anything from them.
 36 | 
 37 | To rub it in, this isn't even a directly callable function yet.
 38 | To conceal the limitations and the calling complexity,
 39 | it is convenient to write a wrapper for it.
 40 | 
 41 | .. code-block:: python
 42 | 
 43 |    def add_two_arrays(arr1, arr2):
 44 |        result = cp.zeros_like(arr1)  # uses cupy, see below
 45 |        warp_count = int(ceil(arr1.size / WARP_SIZE))
 46 |        add_two_arrays_kernel[warp_count, WARP_SIZE](arr1, arr2, result)
 47 |        return result
 48 | 
 49 | A pair of numbers (``warp_count``, ``WARP_SIZE``) is required to launch the kernel.
 50 | ``warp_count`` is chosen this way so that ``warp_count * WARP_SIZE`` would be
 51 | larger than the problem size.
 52 | 
 53 | 
 54 | .. autodata:: lcode.WARP_SIZE
 55 | 
 56 | 
 57 | Array-wise operations with cupy
 58 | -------------------------------
 59 | ``cupy`` is a GPU array library that aims to implement a ``numpy-like`` interface to GPU arrays.
 60 | It allows one to, e.g., add up two GPU arrays with a simple and terse ``a + b``.
 61 | Most of the functions in LCODE use vectorized operations and ``cupy``.
 62 | All memory management is done with ``cupy`` for consistency.
 63 | 
 64 | It's hard to underestimate the convenience of this approach, but sometimes expressing algorithms
 65 | in vectorized notation is too hard or suboptimal.
 66 | The only two times we're actually going for writing CUDA kernels are
 67 | :func:`deposit` (our fine particle loop) and
 68 | :func:`move_smart` (our coarse particle loop).
 69 | 
 70 | 
 71 | Copying is expensive
 72 | --------------------
 73 | If the arrays were copied between GPU RAM and host RAM, the PCI-E bandwidth would become a bottleneck.
 74 | The two most useful strategies to minimize excessive copying are
 75 | 
 76 | 1. churning for several consecutive :math:`\xi`-steps
 77 |    with no copying and no CPU-side data processing
 78 |    (with a notable exception of :func:`beam` and the resulting `beam_ro`); and
 79 | 
 80 | 2. copying only the subset of the arrays that the outer diagnostics code needs.
 81 | 
 82 | 
 83 | .. _array_conversion:
 84 | 
 85 | GPU array conversion
 86 | --------------------
 87 | In order for ``a + b`` to work in ``cupy``,
 88 | both arrays have to be copied to GPU (``cupy.asarray(a)``) and,
 89 | in case you want the results back as ``numpy`` arrays, you have to explicitly copy them back
 90 | (``gpu_array.get()``).
 91 | 
 92 | While for the LCODE 3D itself it's easier and quicker to stick to using GPU arrays exclusively,
 93 | this means the only time when we want to do the conversion to ``numpy`` is when we are returning
 94 | the results back to the external code.
 95 | 
 96 | There are two classes that assist in copying the arrays back and forth and conveniently as possible.
 97 | The implementation looks a bit nightmarish, but using them is simple.
 98 | 
 99 | .. autoclass:: lcode.GPUArrays
100 | 
101 | .. autoclass:: lcode.GPUArraysView
102 | 
103 | This way we can wrap everything we need in GPUArrays with, e.g.,
104 | ``const = GPUArrays(x_init=x_init, y_init=y_init, ...)``
105 | and then access them as ``const.x_init`` from GPU-heavy code.
106 | For the outer code that does not care about GPU arrays at all,
107 | we can return a wrapped ``const_view = GPUArraysView(const)``
108 | and access the arrays as ``const_view.x_init``.
109 | 
110 | Copying will happen on-demand during the attribute access,
111 | intercepted by our ``__getattr__`` implementation,
112 | but beware!
113 | 
114 | .. note::
115 |    Repeatedly accessing ``const_view.x_init`` will needlessly perform the copying again,
116 |    so one should bind it to the variable name (``x_init = const_view.x_init``) once
117 |    and reuse the resulting ``numpy`` array.
118 | 
119 | .. todo:: CODE: wrap the returned array with GPUArraysView by default
120 | 
121 | 
122 | Selecting GPU
123 | -------------
124 | 
125 | .. autodata:: config_example.gpu_index
126 | 
127 | LCODE 3D currently does not support utilizing several GPUs for one simulation,
128 | but once we switch to beam evolution calculation,
129 | processing several consecutive :math:`t`-steps in a pipeline of several GPUs
130 | should be a low hanging fruit.
131 | 


--------------------------------------------------------------------------------
/docs/technicalities/grid_sizes.rst:
--------------------------------------------------------------------------------
 1 | Optimal transverse grid sizes
 2 | =============================
 3 | 
 4 | FFT works best for grid sizes that are factorizable into small numbers.
 5 | Any size will work, but the performance may vary dramatically.
 6 | 
 7 | FFTW documentation quotes the optimal size for their algorithm as
 8 | :math:`2^a 3^b 5^c 7^d 11^e 13^f`,
 9 | where :math:`e+f` is either :math:`0` or :math:`1`,
10 | and the other exponents are arbitrary.
11 | 
12 | While LCODE 3D does not use FFTW (it uses ``cufft`` instead, wrapped by ``cupy``),
13 | the formula is still quite a good rule of thumb
14 | for calculating performance-friendly ``config_example.grid_steps`` values.
15 | 
16 | The employed FFT sizes for a grid sized :math:`N` are :math:`2N-2`
17 | for both DST (:func:`dst2d`, :func:`mix2d`) and DCT transforms (:func:`dct2d`, :func:`mix2d`)
18 | when we take padding and perimeter cell stripping into account.
19 | 
20 | This leaves us to find such :math:`N` that :math:`N-1` satisfies the small-factor conditions.
21 | 
22 | If you don't mind arbitrary grid sizes, we suggest using
23 | 
24 | 1. :math:`N=2^K + 1`, they always perform the best, or
25 | 
26 | 2. one of the roundish
27 |    :math:`201`, :math:`301`, :math:`401`, :math:`501`, :math:`601`, :math:`701`, :math:`801`, :math:`901`,
28 |    :math:`1001`, :math:`1101`, :math:`1201`, :math:`1301`, :math:`1401`, :math:`1501`, :math:`1601`, :math:`1801`,
29 |    :math:`2001`, :math:`2101`, :math:`2201`, :math:`2401`, :math:`2501`, :math:`2601`, :math:`2701`, :math:`2801`,
30 |    :math:`3001`, :math:`3201`, :math:`3301`, :math:`3501`, :math:`3601`, :math:`3901`, :math:`4001`.
31 | 
32 | The code to check for the FFTW criteria above and some of the matching numbers are listed below.
33 | 
34 | .. code-block:: python
35 | 
36 |    def factorize(n, a=[]):
37 |        if n <= 1:
38 |            return a
39 |        for i in range(2, n + 1):
40 |            if n % i == 0:
41 |                return factorize(n // i, a + [i])
42 | 
43 |    def good_size(n):
44 |        factors = factorize(n - 1)
45 |        return (all([f in [2, 3, 4, 5, 7, 11, 13] for f in factors])
46 |                and actors.count(11) + factors.count(13) < 2 and
47 |                and n % 2)
48 | 
49 |    ', '.join([str(a) for a in range(20, 4100) if good_size(a)])
50 | 
51 | :math:`21`, :math:`23`, :math:`25`, :math:`27`, :math:`29`, :math:`31`, :math:`33`, :math:`37`, :math:`41`, :math:`43`, :math:`45`, :math:`49`, :math:`51`, :math:`53`, :math:`55`, :math:`57`, :math:`61`, :math:`65`, :math:`67`, :math:`71`, :math:`73`, :math:`79`, :math:`81`, :math:`85`, :math:`89`, :math:`91`, :math:`97`, :math:`99`, :math:`101`, :math:`105`, :math:`109`, :math:`111`, :math:`113`, :math:`121`, :math:`127`, :math:`129`, :math:`131`, :math:`133`, :math:`141`, :math:`145`, :math:`151`, :math:`155`, :math:`157`, :math:`161`, :math:`163`, :math:`169`, :math:`177`, :math:`181`, :math:`183`, :math:`193`, :math:`197`, :math:`199`, :math:`201`, :math:`209`, :math:`211`, :math:`217`, :math:`221`, :math:`225`, :math:`235`, :math:`241`, :math:`251`, :math:`253`, :math:`257`, :math:`261`, :math:`265`, :math:`271`, :math:`281`, :math:`289`, :math:`295`, :math:`301`, :math:`309`, :math:`313`, :math:`321`, :math:`325`, :math:`331`, :math:`337`, :math:`351`, :math:`353`, :math:`361`, :math:`365`, :math:`379`, :math:`385`, :math:`391`, :math:`393`, :math:`397`, :math:`401`, :math:`417`, :math:`421`, :math:`433`, :math:`441`, :math:`449`, :math:`451`, :math:`463`, :math:`469`, :math:`481`, :math:`487`, :math:`491`, :math:`501`, :math:`505`, :math:`513`, :math:`521`, :math:`529`, :math:`541`, :math:`547`, :math:`551`, :math:`561`, :math:`577`, :math:`589`, :math:`595`, :math:`601`, :math:`617`, :math:`625`, :math:`631`, :math:`641`, :math:`649`, :math:`651`, :math:`661`, :math:`673`, :math:`687`, :math:`701`, :math:`703`, :math:`705`, :math:`721`, :math:`729`, :math:`751`, :math:`757`, :math:`769`, :math:`771`, :math:`781`, :math:`785`, :math:`793`, :math:`801`, :math:`811`, :math:`833`, :math:`841`, :math:`865`, :math:`881`, :math:`883`, :math:`897`, :math:`901`, :math:`911`, :math:`925`, :math:`937`, :math:`961`, :math:`973`, :math:`981`, :math:`991`, :math:`1001`, :math:`1009`, :math:`1025`, :math:`1041`, :math:`1051`, :math:`1057`, :math:`1079`, :math:`1081`, :math:`1093`, :math:`1101`, :math:`1121`, :math:`1135`, :math:`1153`, :math:`1171`, :math:`1177`, :math:`1189`, :math:`1201`, :math:`1233`, :math:`1249`, :math:`1251`, :math:`1261`, :math:`1275`, :math:`1281`, :math:`1297`, :math:`1301`, :math:`1321`, :math:`1345`, :math:`1351`, :math:`1373`, :math:`1387`, :math:`1401`, :math:`1405`, :math:`1409`, :math:`1441`, :math:`1457`, :math:`1459`, :math:`1471`, :math:`1501`, :math:`1513`, :math:`1537`, :math:`1541`, :math:`1561`, :math:`1569`, :math:`1585`, :math:`1601`, :math:`1621`, :math:`1639`, :math:`1651`, :math:`1665`, :math:`1681`, :math:`1729`, :math:`1751`, :math:`1761`, :math:`1765`, :math:`1783`, :math:`1793`, :math:`1801`, :math:`1821`, :math:`1849`, :math:`1873`, :math:`1891`, :math:`1921`, :math:`1945`, :math:`1951`, :math:`1961`, :math:`1981`, :math:`2001`, :math:`2017`, :math:`2049`, :math:`2059`, :math:`2081`, :math:`2101`, :math:`2107`, :math:`2113`, :math:`2157`, :math:`2161`, :math:`2185`, :math:`2201`, :math:`2241`, :math:`2251`, :math:`2269`, :math:`2305`, :math:`2311`, :math:`2341`, :math:`2353`, :math:`2377`, :math:`2401`, :math:`2431`, :math:`2451`, :math:`2465`, :math:`2497`, :math:`2501`, :math:`2521`, :math:`2549`, :math:`2561`, :math:`2593`, :math:`2601`, :math:`2641`, :math:`2647`, :math:`2689`, :math:`2701`, :math:`2731`, :math:`2745`, :math:`2751`, :math:`2773`, :math:`2801`, :math:`2809`, :math:`2817`, :math:`2881`, :math:`2913`, :math:`2917`, :math:`2941`, :math:`2971`, :math:`3001`, :math:`3025`, :math:`3073`, :math:`3081`, :math:`3121`, :math:`3137`, :math:`3151`, :math:`3169`, :math:`3201`, :math:`3235`, :math:`3241`, :math:`3251`, :math:`3277`, :math:`3301`, :math:`3329`, :math:`3361`, :math:`3403`, :math:`3431`, :math:`3457`, :math:`3501`, :math:`3511`, :math:`3521`, :math:`3529`, :math:`3565`, :math:`3585`, :math:`3601`, :math:`3641`, :math:`3697`, :math:`3745`, :math:`3751`, :math:`3781`, :math:`3823`, :math:`3841`, :math:`3851`, :math:`3889`, :math:`3901`, :math:`3921`, :math:`3961`, :math:`4001`, :math:`4033`, :math:`4051`, :math:`4097`
52 | 


--------------------------------------------------------------------------------
/docs/technicalities/initialization.rst:
--------------------------------------------------------------------------------
 1 | Initialization
 2 | ==============
 3 | 
 4 | .. autofunction:: lcode.init
 5 | 
 6 |    This function performs quite a boring sequence of actions, outlined here for interlinking purposes:
 7 | 
 8 |    * validates the oddity of ``config.grid_steps``
 9 |      [:ref:`fields_and_densities_grid`],
10 |    * validates that ``config.reflect_padding_steps`` is large enough
11 |      [:ref:`reflect_and_plasma_boundaries`],
12 |    * calculates the ``reflect_boundary`` and monkey-patches it back into
13 |      ``config``,
14 |    * initializes the ``x`` and ``y`` arrays for use in :func:`config_example.beam`,
15 |    * calculates the plasma placement boundary,
16 |    * immediately passes it to :func:`make_plasma`, leaving it oblivious to the padding concerns,
17 |    * performs the initial electrion deposition to obtain the background ions charge density
18 |      [:doc:`../tour/background_ions`],
19 |    * groups the constant arrays into a :class:`GPUArray` instance ``const``, and
20 |    * groups the evolving arrays into a :class:`GPUArray` instance ``state``.
21 | 


--------------------------------------------------------------------------------
/docs/technicalities/offsets.rst:
--------------------------------------------------------------------------------
 1 | Offset-coordinate separation
 2 | ============================
 3 | 
 4 | Float precision loss
 5 | --------------------
 6 | When floating point numbers of different magnitudes get added up,
 7 | there is an inherent precision loss that grows with the magnitude disparity.
 8 | 
 9 | If a particle has a large coordinate (think ``5.223426``),
10 | but moves for a small distance (think ``7.139152e-4``) due to low ``xi_step_size``
11 | and small momentum projection, calculating the sum of these numbers
12 | suffers from the precision loss due to the finite significand size:
13 | 
14 | An oversimplified illustration in decimal notation::
15 | 
16 |     5.223426
17 |    +0.0007139152
18 |    =5.224139LOST
19 | 
20 | We have not conducted extensive research on how detrimental this round-off accumulation
21 | is to LCODE 3D numerical stability in :math:`\xi`.
22 | Currently the transverse noise dominates,
23 | but in order to make our implementation a bit more future-proof,
24 | we store the plasma particle coordinates separated into two floats:
25 | initial position (``x_init``, ``y_init``) and accumulated offset (``x_offt``, ``y_offt``)
26 | and do not mix them.
27 | 
28 | 
29 | Mixing them all the time...
30 | ---------------------------
31 | OK, we do mix them. Each and every function involving them adds them up at some point
32 | and even has the code like this:
33 | 
34 | .. code-block:: python
35 | 
36 |    x = x_init + x_offt
37 |    ...
38 |    x_offt = x - x_init
39 | 
40 | to reconstruct ``x_offt`` from the 'dirty' sum values ``x``.
41 | 
42 | We do that because we're fine with singular round-off errors until they don't propagate
43 | to the next step, accumulating
44 | for millions of :math:`\xi`-steps ('Test 1' simulations were conducted for up to 1.5 million steps).
45 | 
46 | 
47 | ... but not where it really matters
48 | -----------------------------------
49 | 
50 | This way the only places where the separation should be preserved
51 | is the path from ``prev.x_offt`` to ``new_state.x_offt``.
52 | Several ``x_offt`` additions are performed and rolled back
53 | at each :math:`\xi`-step,
54 | but only two kinds of them persist, both residing in :func:`move_smart`:
55 | 
56 | 1. ``x_offt += px / (gamma_m - pz) * xi_step_size`` does no mixing with the coordinate values, and
57 | 
58 | 2. ``x = +2 * reflect_boundary - x`` and the similar one for the left boundary
59 |    only happen during particle reflection,
60 |    which presumably happens rarely and only affects the particles that have already deviated
61 |    at least several cells away from the initial position.
62 | 
63 | This way most particles won't experience this kind of rounding issues with their coordinates.
64 | On the flip side, splitting the coordinates makes working with them quite unwieldy.
65 | 


--------------------------------------------------------------------------------
/docs/toc-overview.rst:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 1
 3 |    :caption: Overview
 4 | 
 5 |    overview/problem
 6 |    overview/trickery
 7 |    overview/window_and_grids
 8 |    overview/units
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/toc-technicalities.rst:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 1
 3 |    :caption: Technicalities
 4 | 
 5 |    technicalities/initialization
 6 |    technicalities/gpu
 7 |    technicalities/grid_sizes
 8 |    technicalities/offsets
 9 |    technicalities/design_decisions
10 | 


--------------------------------------------------------------------------------
/docs/toc-tour.rst:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 1
 3 |    :caption: A tour of the simulations
 4 | 
 5 |    tour/Ez
 6 |    tour/ExEyBxBy
 7 |    tour/Bz
 8 |    tour/plasma
 9 |    tour/coarse_and_fine_plasma
10 |    tour/deposition
11 |    tour/background_ions
12 |    tour/plasma_pusher
13 |    tour/beam
14 |    tour/xi_step
15 | 


--------------------------------------------------------------------------------
/docs/toc-usage.rst:
--------------------------------------------------------------------------------
1 | .. toctree::
2 |    :maxdepth: 1
3 |    :caption: Usage
4 | 
5 |    usage/installation
6 |    usage/running_and_embedding
7 | 


--------------------------------------------------------------------------------
/docs/tour/Bz.rst:
--------------------------------------------------------------------------------
 1 | Bz
 2 | ==
 3 | 
 4 | Equations
 5 | ---------
 6 | We want to solve
 7 | 
 8 | .. math::
 9 | 
10 |    \Delta_\perp B_z = \frac{\partial j_x}{\partial y} - \frac{\partial j_y}{\partial x}
11 | 
12 | with Neumann boundary conditions (derivative = 0).
13 | 
14 | 
15 | Method
16 | ------
17 | The algorithm can be succinctly written as ``iDCT2D(neumann_matrix * DCT2D(RHS))``,
18 | where ``DCT2D`` and ``iDCT2D`` are
19 | Type-1 Forward and Inverse Discrete Sine 2D Trasforms respectively,
20 | ``RHS`` is the right-hand side of the equiation above,
21 | and ``neumann_matrix`` is a 'magical' matrix that does all the work.
22 | 
23 | 
24 | .. autofunction:: lcode.neumann_matrix
25 | 
26 |   In addition to the magic values, it also hosts the DCT normalization multiplier.
27 | 
28 | .. todo:: DOCS: expand with method description (Kargapolov, Shalimova)
29 | 
30 | 
31 | .. autofunction:: lcode.calculate_Bz
32 | 
33 |   Note that this time the outer cells do not participate in the calculations,
34 |   so the RHS derivatives are padded with zeroes in the beginning.
35 | 
36 | 
37 | DCT2D
38 | -----
39 | 
40 | .. autofunction:: lcode.dct2d
41 | 
42 |    As ``cupy`` currently ships no readily available function for calculating
43 |    the DCT2D on the GPU, we roll out our own FFT-based implementation.
44 | 
45 |    We don't need to make a separate iDCT2D function
46 |    as (for Type-1) it matches DCT2D up to the normalization multiplier,
47 |    which is taken into account in :func:`neumann_matrix`.
48 | 


--------------------------------------------------------------------------------
/docs/tour/ExEyBxBy.rst:
--------------------------------------------------------------------------------
  1 | Ex, Ey, Bx, By
  2 | ==============
  3 | 
  4 | Theory
  5 | ------
  6 | We want to solve
  7 | 
  8 | .. math::
  9 | 
 10 |    \Delta_\perp E_x &= \frac{\partial \rho}{\partial x} - \frac{\partial j_x}{\partial \xi}
 11 | 
 12 |    \Delta_\perp E_y &= \frac{\partial \rho}{\partial y} - \frac{\partial j_y}{\partial \xi}
 13 | 
 14 |    \Delta_\perp B_x &= \frac{\partial j_y}{\partial \xi} - \frac{\partial j_z}{\partial y}
 15 | 
 16 |    \Delta_\perp B_y &= \frac{\partial j_z}{\partial x} - \frac{\partial j_x}{\partial \xi}
 17 | 
 18 | 
 19 | with mixed boundary conditions.
 20 | 
 21 | .. todo:: DOCS: specify the boundary conditions.
 22 | 
 23 | Unfortunately, what we actually solve is no less than three steps away from these.
 24 | 
 25 | 
 26 | .. _helmholtz:
 27 | 
 28 | Helmholtz equations
 29 | -------------------
 30 | The harsh reality of numerical stability forces us to solve
 31 | these Helmholtz equations instead:
 32 | 
 33 | .. math::
 34 | 
 35 |    \Delta_\perp E_x - E_x &= \frac{\partial \rho}{\partial x} - \frac{\partial j_x}{\partial \xi} - E_x
 36 | 
 37 |    \Delta_\perp E_y - E_y &= \frac{\partial \rho}{\partial y} - \frac{\partial j_y}{\partial \xi} - E_y
 38 | 
 39 |    \Delta_\perp B_x - B_x &= \frac{\partial j_y}{\partial \xi} - \frac{\partial j_z}{\partial y} - B_x
 40 | 
 41 |    \Delta_\perp B_y - B_y &= \frac{\partial j_z}{\partial x} - \frac{\partial j_x}{\partial \xi} - B_y
 42 | 
 43 | 
 44 | .. note::
 45 |    The behaviour is actually configurable with
 46 |    ``config.field_solver_subtraction_trick`` (what a mouthful).
 47 |    ``0`` or ``False`` corresponds to Laplace equation,
 48 |    and while any floating-point values or whole matrices of them should be accepted,
 49 |    it's recommended to simply use ``1`` or ``True`` instead.
 50 | 
 51 | .. autodata:: config_example.field_solver_subtraction_trick
 52 | 
 53 | 
 54 | Method
 55 | ------
 56 | The algorithm can be succinctly written as ``iMIX2D(mixed_matrix * MIX2D(RHS))``,
 57 | where ``MIX2D`` and ``iMIX2D`` are
 58 | Type-1 Forward and Inverse Discrete Trasforms, Sine in one direction and Cosine in the other.
 59 | ``RHS`` is the right-hand side of the equiation above,
 60 | and ``dirichlet_matrix`` is a 'magical' matrix that does all the work.
 61 | 
 62 | 
 63 | .. autofunction:: lcode.mixed_matrix
 64 | 
 65 | .. todo:: DOCS: expand with method description (Kargapolov, Shalimova)
 66 | 
 67 | 
 68 | .. autofunction:: lcode.calculate_Ex_Ey_Bx_By
 69 | 
 70 |   Note that some outer cells do not participate in the calculations,
 71 |   and the result is simply padded with zeroes in the end.
 72 |   We don't define separate functions for separate boundary condition types
 73 |   and simply transpose the input and output data.
 74 | 
 75 | 
 76 | DST-DCT Hybrid
 77 | --------------
 78 | 
 79 | .. autofunction:: lcode.mix2d
 80 | 
 81 |    As ``cupy`` currently ships no readily available function for calculating
 82 |    even 1D DST/DCT on the GPU,
 83 |    we, once again, roll out our own FFT-based implementation.
 84 | 
 85 |    We don't need a separate function for the inverse transform,
 86 |    as it matches the forward one up to the normalization multiplier,
 87 |    which is taken into account in :func:`mixed_matrix`.
 88 | 
 89 | 
 90 | .. _variant_a:
 91 | 
 92 | Variant B
 93 | ---------
 94 | But wait, the complications don't stop here.
 95 | 
 96 | While we do have a succesfully implemented :math:`(\Delta_\perp - 1)` inverse operator,
 97 | there's still an open question of supplying an unknown value to the RHS.
 98 | 
 99 | The naive version (internally known as 'Variant B')
100 | is to pass the best known substitute to date, i.e.
101 | previous layer fields at the predictor phase
102 | and the averaged fields at the corrector phase.
103 | :math:`\xi`-derivatives are taken at half-steps,
104 | transverse derivatives are averaged at half-steps
105 | or taken from the previous layer if not available.
106 | 
107 | .. math::
108 | 
109 |    (\Delta_\perp - 1) E_x^{next} &=
110 |      \frac{\partial \rho^{prev}}{\partial x} -
111 |      (\frac{\partial j_x}{\partial \xi})^{\mathrm{halfstep}} -
112 |      E_x^{avg}
113 | 
114 |    (\Delta_\perp - 1) E_y^{next} &=
115 |      \frac{\partial \rho^{prev}}{\partial y} -
116 |      (\frac{\partial j_y}{\partial \xi})^{\mathrm{halfstep}} -
117 |      E_y^{avg}
118 | 
119 |    (\Delta_\perp - 1) B_x^{next} &=
120 |      (\frac{\partial j_y}{\partial \xi})^{\mathrm{halfstep}} -
121 |      \frac{\partial j_z^{prev}}{\partial y} -
122 |      B_x^{avg}
123 | 
124 |    (\Delta_\perp - 1) B_y^{next} &=
125 |      \frac{\partial j_z^{prev}}{\partial x} -
126 |      (\frac{\partial j_x}{\partial \xi})^{\mathrm{halfstep}} -
127 |      B_y^{avg}
128 | 
129 | 
130 | Variant A
131 | ---------
132 | 
133 | The more correct version (known as 'Variant A')
134 | mutates the equations once again to take everything at half-steps:
135 | 
136 | .. math::
137 | 
138 |    (\Delta_\perp - 1) E_x^{\mathrm{halfstep}} &=
139 |      \frac{\partial \rho^{\mathrm{avg}}}{\partial x} -
140 |      (\frac{\partial j_x}{\partial \xi})^{\mathrm{halfstep}} -
141 |      E_x^{\mathrm{avg}}
142 | 
143 |    (\Delta_\perp - 1) E_y^{\mathrm{halfstep}} &=
144 |      \frac{\partial \rho^{\mathrm{avg}}}{\partial y} -
145 |      (\frac{\partial j_y}{\partial \xi})^{\mathrm{halfstep}} -
146 |      E_y^{\mathrm{avg}}
147 | 
148 |    (\Delta_\perp - 1) B_x^{\mathrm{halfstep}} &=
149 |      (\frac{\partial j_y}{\partial \xi})^{\mathrm{halfstep}} -
150 |      \frac{\partial j_z^{\mathrm{avg}}}{\partial y} -
151 |      B_x^{\mathrm{avg}}
152 | 
153 |    (\Delta_\perp - 1) B_y^{\mathrm{halfstep}} &=
154 |      \frac{\partial j_z^{\mathrm{avg}}}{\partial x} -
155 |      (\frac{\partial j_x}{\partial \xi})^{\mathrm{halfstep}} -
156 |      B_y^{\mathrm{avg}}
157 | 
158 | 
159 | and calculates the fields at next step in the following fashion: :math:`E_x^{next} = 2 E_x^{avg} - E_x^{prev}`, e.t.c.
160 | 
161 | Solving these is equivalent to solving Variant B equations
162 | with averaged fields, :math:`\rho` and :math:`j_z` and applying the above transformation to the result.
163 | See :func:`lcode.step` for the wrapping code that does that.
164 | 
165 | .. autodata:: config_example.field_solver_variant_A
166 | 


--------------------------------------------------------------------------------
/docs/tour/Ez.rst:
--------------------------------------------------------------------------------
 1 | Ez
 2 | ==
 3 | 
 4 | Equations
 5 | ---------
 6 | We want to solve
 7 | 
 8 | .. math::
 9 | 
10 |    \Delta_\perp E_z = \frac{\partial j_x}{\partial x} - \frac{\partial j_y}{\partial y}
11 | 
12 | with Dirichlet (zero) boundary conditions.
13 | 
14 | 
15 | Method
16 | ------
17 | The algorithm can be succinctly written as ``iDST2D(dirichlet_matrix * DST2D(RHS))``,
18 | where ``DST2D`` and ``iDST2D`` are
19 | Type-1 Forward and Inverse Discrete Sine 2D Trasforms respectively,
20 | ``RHS`` is the right-hand side of the equiation above,
21 | and ``dirichlet_matrix`` is a 'magical' matrix that does all the work.
22 | 
23 | 
24 | .. autofunction:: lcode.dirichlet_matrix
25 | 
26 |   In addition to the magic values, it also hosts the DST normalization multiplier.
27 | 
28 | .. todo:: DOCS: expand with method description (Kargapolov, Shalimova)
29 | 
30 | 
31 | .. autofunction:: lcode.calculate_Ez
32 | 
33 |   Note that the outer cells do not participate in the calculations,
34 |   and the result is simply padded with zeroes in the end.
35 | 
36 | 
37 | DST2D
38 | -----
39 | 
40 | .. autofunction:: lcode.dst2d
41 | 
42 |    As ``cupy`` currently ships no readily available function for calculating
43 |    the DST2D on the GPU, we roll out our own FFT-based implementation.
44 | 
45 |    We don't need to make a separate iDST2D function
46 |    as (for Type-1) it matches DST2D up to the normalization multiplier,
47 |    which is taken into account in :func:`dirichlet_matrix`.
48 | 


--------------------------------------------------------------------------------
/docs/tour/background_ions.rst:
--------------------------------------------------------------------------------
 1 | Background ions
 2 | ===============
 3 | 
 4 | LCODE 3D currently simulates only the movement of plasma electrons.
 5 | The ions are modelled as a constant charge density distribution component
 6 | that is calculated from the inital electron placement during
 7 | the :doc:`initialization <../technicalities/initialization>`.
 8 | 
 9 | .. autofunction:: lcode.initial_deposition
10 | 
11 |    For this initial deposition invocation,
12 |    the ion density argument is specified as ``0``.
13 | 
14 | The result is stored as ``const.ro_initial``
15 | and passed to every consequtive :func:`lcode.deposit` invocation.
16 | 


--------------------------------------------------------------------------------
/docs/tour/beam.rst:
--------------------------------------------------------------------------------
 1 | Beam
 2 | ====
 3 | 
 4 | The beam is currently simulated as a charge density function :math:`\rho_b(\xi, x, y)`,
 5 | and not with particles.
 6 | 
 7 | In the future, there will certaintly be a way to define a beam
 8 | with particles and simulate beam-plasma interaction both ways,
 9 | but for now only simulating a plasma response to a rigid beam is possible.
10 | 
11 | .. autofunction:: config_example.beam
12 | 
13 |    The user should specify the beam charge density as a function in the configuration file.
14 | 
15 |    ``xi_i`` is not the value of the :math:`\xi` coordinate, but the step index.
16 |    Please use something in the lines of ``xi = -xi_i * xi_step_size + some_offset``,
17 |    according to where exactly in :math:`\xi` do you define the beam density slices
18 |    [:ref:`integer_xi_steps`].
19 | 
20 |    ``x`` and ``y`` are ``numpy`` arrays, so one should use vectorized numpy operations
21 |    to calculate the desired beam charge density, like ``numpy.exp(-numpy.sqrt(x**2 + y**2))``.
22 | 
23 |    The function should ultimately return an array with the same shape as ``x`` and ``y``.
24 | 
25 | .. todo:: CODE: Simulate the beam with particles and evolve it according to the plasma response.
26 | 


--------------------------------------------------------------------------------
/docs/tour/coarse_and_fine_plasma.rst:
--------------------------------------------------------------------------------
  1 | Coarse and fine plasma
  2 | ======================
  3 | 
  4 | Concept
  5 | -------
  6 | In order to increase stability and combat transverse grid noise,
  7 | LCODE 3D utilises a dual plasma appoach.
  8 | 
  9 | .. figure:: ../illustrations/virtplasma.png
 10 | 
 11 |    Positioning of the coarse and fine particles in dual plasma approach.
 12 | 
 13 | 
 14 | Coarse particles are the ones that get tracked throughout the program,
 15 | and pushed by the pusher. There coarse plasma grid is many times more sparse
 16 | than the fields grid, think :math:`\frac{1}{9}` particles per cell.
 17 | 
 18 | .. autodata:: config_example.plasma_coarseness
 19 | 
 20 | Fine particles only exist inside the deposition phase.
 21 | There are several fine particles per cell, think :math:`4` or more.
 22 | Their characteristic values are neither stored or evolved;
 23 | instead they are intepolated from the coarse particle grid as a part of the
 24 | the deposition process (and they don't 'exist' in any form outside of it).
 25 | 
 26 | .. autodata:: config_example.plasma_fineness
 27 | 
 28 | 
 29 | Initialization
 30 | --------------
 31 | .. autofunction:: lcode.make_coarse_plasma_grid
 32 | 
 33 | .. autofunction:: lcode.make_fine_plasma_grid
 34 | 
 35 |    * ``fineness=3`` (and ``coarseness=2``)::
 36 | 
 37 |        +-----------+-----------+-----------+-----------+
 38 |        | .   .   . | .   .   . | .   .   . | .   .   . |
 39 |        |           |           |           |           |   . - fine particle
 40 |        | .   .   . | .   *   . | .   .   . | .   *   . |
 41 |        |           |           |           |           |   * - coarse+fine particle
 42 |        | .   .   . | .   .   . | .   .   . | .   .   . |
 43 |        +-----------+-----------+-----------+-----------+
 44 | 
 45 |    * ``fineness=2`` (and ``coarseness=2``)::
 46 | 
 47 |        +-------+-------+-------+-------+-------+
 48 |        | .   . | .   . | .   . | .   . | .   . |           . - fine particle
 49 |        |       |   *   |       |   *   |       |
 50 |        | .   . | .   . | .   . | .   . | .   . |           * - coarse particle
 51 |        +-------+-------+-------+-------+-------+
 52 | 
 53 | .. autofunction:: lcode.make_plasma
 54 | 
 55 |    Initializing coarse particles is pretty simple:
 56 |    ``coarse_x_init`` and ``coarse_y_init`` are broadcasted output of :func:`make_coarse_plasma_grid`.
 57 |    ``coarse_x_offt`` and ``coarse_y_offt`` are zeros and so are ``coarse_px``, ``coarse_py`` and ``coarse_pz``.
 58 |    ``coarse_m`` and ``coarse_q`` are constants divided by the factor of coarseness by fineness squared
 59 |    because fine particles represent smaller macroparticles.
 60 | 
 61 |    Initializing fine particle boils down to calculating the interpolation coefficients
 62 |    (``influence_prev`` and ``influence_next``)
 63 |    and the indices of the coarse particles (``indices_prev``, ``indices_next``)
 64 |    that the characteristics will be intepolated from.
 65 | 
 66 |    ``influence_prev`` and ``influence_next`` are linear interpolation coefficients
 67 |    based on the initial closest coarse particle positioning.
 68 |    Note that these are constant and do not change in :math:`\xi`.
 69 |    The edges get special treatment later.
 70 | 
 71 |    ``indices_next`` happens to be pretty much equal to ``np.searchsorted(coarse_grid, fine_grid)``
 72 |    and ``indices_prev`` is basically ``indices_next - 1``,
 73 |    except for the edges,
 74 |    where a fine particle can have less than four 'parent' coarse particles.
 75 |    For such 'outer' particles, existing coarse particles are used instead,
 76 |    so clipping the indices and fixing ``influence``-arrays is carried out.
 77 | 
 78 |    Note that these arrays are 1D for memory considerations [:ref:`memory_considerations`].
 79 | 
 80 |    The function returns the coarse particles and ``virtparams``:
 81 |    a ``GPUArrays`` instance
 82 |    that conveniently groups the fine-particle related arrays,
 83 |    which only matter during deposition,
 84 |    under a single name.
 85 | 
 86 | 
 87 | Coarse-to-fine interpolation
 88 | ----------------------------
 89 | 
 90 | .. figure:: ../illustrations/virtplasma_moved.png
 91 | 
 92 | .. autofunction:: lcode.mix
 93 | 
 94 |    This is just a shorthand for the characteristic value mixing
 95 |    for internal use in ``coarse_to_fine``.
 96 | 
 97 | .. figure:: ../illustrations/coarse_to_fine.png
 98 | 
 99 | .. autofunction:: lcode.coarse_to_fine
100 | 
101 |    The internals are pretty straightforward
102 |    once you wrap your head around the indexing.
103 | 
104 |    A single fine particle with the indices ``[fi, fj]``
105 |    (in fine particles ``virt_params`` 1D arrays)
106 |    is interpolated from four particles with indices
107 |    ``[pi, pj]``, ``[pi, nj]``, ``[ni, pj]``, ``[ni, nj]``
108 |    (in coarse particles ``c_*`` arrays)
109 |    and four weights ``A``, ``B``, ``C``, ``D`` respectively.
110 |    The weights are, in turn, composed as a products of
111 |    values from ``influence_prev`` and ``influence_next`` arrays,
112 |    indiced, once again, with ``[fi, fj]``.
113 |    It would be convenient to calculate them beforehand,
114 |    but they are recalculated instead as a result of time-memory tradeoff
115 |    [:ref:`memory_considerations`].
116 | 
117 |    Finally, momenta, charge and mass are scaled
118 |    according to the coarse-to-fine macrosity coefficient
119 |    discussed above.
120 | 
121 | 
122 | 
123 | Alternative illustration
124 | ------------------------
125 | 
126 | .. plot::
127 | 
128 |    import numpy as np
129 |    import matplotlib as mpl
130 |    import matplotlib.pyplot as plt
131 | 
132 |    XI_STEPS, XI_STEP_SIZE = 12, .1
133 |    LIM = .5
134 | 
135 |    def make_coarse_plasma_grid(steps, step_size, coarseness=3):
136 |        plasma_step = step_size * coarseness
137 |        right_half = np.arange(steps // (coarseness * 2)) * plasma_step
138 |        left_half = -right_half[:0:-1]  # invert, reverse, drop zero
139 |        plasma_grid = np.concatenate([left_half, right_half])
140 |        return plasma_grid
141 | 
142 | 
143 |    def make_fine_plasma_grid(steps, step_size, fineness=2):
144 |        plasma_step = step_size / fineness
145 |        if fineness % 2:  # some on zero axes, none on cell corners
146 |            right_half = np.arange(steps // 2 * fineness) * plasma_step
147 |            left_half = -right_half[:0:-1]  # invert, reverse, drop zero
148 |        else:  # none on zero axes, none on cell corners
149 |            right_half = (.5 + np.arange(steps // 2 * fineness)) * plasma_step
150 |            left_half = -right_half[::-1]  # invert, reverse
151 |        plasma_grid = np.concatenate([left_half, right_half])
152 |        return plasma_grid
153 | 
154 |    cells = make_coarse_plasma_grid(XI_STEPS, XI_STEP_SIZE, coarseness=1)
155 |    for x in cells:
156 |       for y in cells:
157 |          r = mpl.patches.Rectangle((x - XI_STEP_SIZE / 2, y - XI_STEP_SIZE / 2),
158 |                                    XI_STEP_SIZE, XI_STEP_SIZE,
159 |                                    linewidth=.3,
160 |                                    edgecolor='black', facecolor='none')
161 |          plt.axes().add_patch(r)
162 |    cell_x, cell_y = np.meshgrid(cells, cells)
163 |    plt.scatter(cell_x, cell_y, marker='+', color='red', s=.1)
164 | 
165 |    fine = make_fine_plasma_grid(XI_STEPS, XI_STEP_SIZE, fineness=2)
166 |    fine_x, fine_y = np.meshgrid(fine, fine)
167 |    plt.scatter(fine_x, fine_y, marker='.', color='blue', s=10)
168 | 
169 |    coarse = make_coarse_plasma_grid(XI_STEPS, XI_STEP_SIZE, coarseness=3)
170 |    coarse_x, coarse_y = np.meshgrid(coarse, coarse)
171 |    plt.scatter(coarse_x, coarse_y,
172 |                marker='.', color='green', facecolor='none', s=80)
173 | 
174 |    plt.xlim(-LIM, LIM)
175 |    plt.ylim(-LIM, LIM)
176 |    plt.axes().set_aspect('equal')
177 |    plt.show()
178 | 


--------------------------------------------------------------------------------
/docs/tour/deposition.rst:
--------------------------------------------------------------------------------
 1 | Deposition
 2 | ==========
 3 | 
 4 | Deposition operates on fine particles.
 5 | Once the :doc:`coarse-to-fine interpolation <../tour/coarse_and_fine_plasma>` is out of the picture,
 6 | there isn't much left to discuss.
 7 | 
 8 | .. autofunction:: lcode.deposit_kernel
 9 | 
10 |    First, the fine particle characteristics are interpolated from the coarse ones.
11 |    Then the total contribution of the particles to the density and the currents
12 |    is calculated and, finally,
13 |    deposited on a grid in a 3x3 cell square with ``i``, ``j`` as its center
14 |    according to the weights calculated by :func:`weights`.
15 |    Finally, the :doc:`ion background density <../tour/background_ions>`
16 |    is added to the resulting array.
17 | 
18 |    The strange incantation at the top and
19 |    the need to modify the output arrays instead of returning them
20 |    are dictated by the fact that
21 |    ihis is actually not a function, but a CUDA kernel
22 |    (for more info, refer to :ref:`cuda_kernels`).
23 |    It is launched in parallel for each fine particle, determines its 2D index (``fi``, ``fj``),
24 |    interpolates its characteristics from coarse particles and proceeds to deposit it.
25 | 
26 | .. autofunction:: lcode.deposit
27 | 
28 |    This function allocates the output arrays,
29 |    unpacks the arguments from ``config`` and ``virt_params``,
30 |    calculates the kernel dispatch parameters
31 |    (for more info, refer to :ref:`cuda_kernels`),
32 |    and launches the kernel.
33 | 
34 | .. todo:: DOCS: explain deposition contribution formula (Lotov)
35 | 


--------------------------------------------------------------------------------
/docs/tour/plasma.rst:
--------------------------------------------------------------------------------
 1 | Plasma
 2 | ======
 3 | 
 4 | Characteristics
 5 | ---------------
 6 | 
 7 | A plasma particle has these characteristics according to our model:
 8 | 
 9 | * Coordinates :math:`x` and :math:`y`, stored as ``x_init + x_offt`` and ``y_init + y_offt``
10 |   [:doc:`../technicalities/offsets`].
11 | * Momenta :math:`p_x`, :math:`p_y` and :math:`p_z`, stored as ``px``, ``py`` and ``pz``.
12 | * Charge :math:`q`, stored as ``q``.
13 | * Mass :math:`m`, stored as ``m``.
14 | 
15 | 
16 | .. _plasma_particle_shape:
17 | 
18 | Shape
19 | -----
20 | 
21 | From the interpolation/deposition perspective, a plasma particle represents not a point in space,
22 | but a 2D Triangular-Shaped Cloud (TSC2D).
23 | 
24 | These clouds always (partially) cover an area the size of :math:`3x3` cells:
25 | the one where their center lies and eight neighouring ones.
26 | 
27 | .. figure:: ../illustrations/tsc2d.png
28 | 
29 | .. todo:: DOCS: WRITE: write a nicer formula for the weights of each cell.
30 | 
31 | .. autofunction:: lcode.weights
32 | 
33 | The same coefficients are used for both deposition of the particle characterictics onto the grid
34 | [:doc:`deposition`]
35 | and interpolation of the fields in the particle center positions
36 | [:doc:`plasma_pusher`].
37 | 
38 | .. autofunction:: lcode.deposit9
39 | 
40 | .. autofunction:: lcode.interp9
41 | 
42 | The concept is orthogonal to the coarse plasma particle shape
43 | [:doc:`coarse_and_fine_plasma`].
44 | While a coarse particle may be considered to be a component of an elastic cloud of fine particles,
45 | each individial fine particle sports the same TSC2D shape.
46 | 


--------------------------------------------------------------------------------
/docs/tour/plasma_pusher.rst:
--------------------------------------------------------------------------------
 1 | Plasma pusher
 2 | =============
 3 | 
 4 | Without fields
 5 | --------------
 6 | 
 7 | The coordinate-evolving equations of motion are as follows:
 8 | 
 9 | .. math::
10 |   \frac{d x}{d \xi} &= -\frac{v_x}{1-v_z}
11 | 
12 |   \frac{d y}{d \xi} &= -\frac{v_y}{1-v_z}
13 | 
14 |   \vec{v} &= \frac{\vec{p}}{\sqrt{M^2+p^2}}
15 | 
16 | .. autofunction:: lcode.move_estimate_wo_fields
17 | 
18 |    This is used at the beginning of the :doc:`xi step <xi_step>`
19 |    to roughly estimate the half-step positions of the particles.
20 | 
21 |    The reflection here flips the coordinate, but not the momenta components.
22 | 
23 | 
24 | With fields
25 | -----------
26 | 
27 | The coordinate-evolving equation of motion is as follows:
28 | 
29 | .. math::
30 | 
31 |   \frac{d \vec{p}}{d \xi} = -\frac{q}{1-v_z} \left( \vec{E} + \left[ \vec{v} \times \vec{B} \right]\right)
32 | 
33 | As the particle momentum is present at both sides of the equation
34 | (as :math:`p` and :math:`v` respectively),
35 | an iterative predictor-corrector scheme is employed.
36 | 
37 | The alternative is to use a symplectic solver that solves the resulting matrix equation
38 | (not mainlined at the moment, look for an alternative branch in ``t184256``'s fork).
39 | 
40 | 
41 | .. autofunction:: lcode.move_smart_kernel
42 | 
43 |    The function serves as *the* coarse particle loop,
44 |    fusing together midpoint calculation,
45 |    field interpolation with :func:`interp9` and
46 |    particle movement
47 |    for performance reasons.
48 | 
49 |    The equations for half-step momentum are solved twice,
50 |    with more precise momentum for the second time.
51 | 
52 |    The particles coordinates are advanced using half-step momentum,
53 |    and afterwards the momentum is advanced to the next step.
54 | 
55 |    The reflection is more involved this time, affecting both the coordinates and the momenta.
56 | 
57 |    Note that the reflected particle offsets are mixed with the positions,
58 |    resulting in a possible float precision loss [:doc:`../technicalities/offsets`].
59 |    This effect probably negligible at this point, as the particle had to travel
60 |    at least several cell sizes at this point.
61 |    The only place where the separation really matters is the (final) coordinate addition
62 |    (``x_offt += ...`` and ``y_offt += ...``).
63 | 
64 |    The strange incantation at the top and
65 |    the need to modify the output arrays instead of returning them
66 |    is dictated by the fact that
67 |    ihis is actually not a function, but a CUDA kernel
68 |    (for more info, refer to :ref:`cuda_kernels`),
69 |    It is launched in parallel for each coarse particle, determines its 1D index ``k``,
70 |    interpolates the fields at its position and proceeds to move and reflect it.
71 | 
72 | 
73 | .. autofunction:: lcode.move_smart
74 | 
75 |    This function allocates the output arrays,
76 |    unpacks the arguments from ``config``
77 |    calculates the kernel dispatch parameters
78 |    (for more info, refer to :ref:`cuda_kernels`),
79 |    flattens the input and output array of particle characteristics
80 |    (as the pusher does not care about the particle 2D indices)
81 |    and launches the kernel.
82 | 


--------------------------------------------------------------------------------
/docs/tour/xi_step.rst:
--------------------------------------------------------------------------------
 1 | Looping in xi
 2 | =============
 3 | 
 4 | Finally, here's the function that binds it all together,
 5 | and currently makes up half of LCODE 3D API.
 6 | 
 7 | In short it: moves, deposits, estimates fields, moves, deposits, recalculates fields, moves and deposits.
 8 | 
 9 | .. autodata:: config_example.xi_step_size
10 | 
11 | .. autodata:: config_example.xi_steps
12 | 
13 | .. autofunction:: lcode.step
14 | 
15 | 
16 | Input parameters
17 | ----------------
18 | Beam density array `\rho_b` (``beam_ro``) is copied to the GPU with ``cupy.asarray``,
19 | as it is calculated with ``numpy`` in config-residing :func:`beam`.
20 | 
21 | All the other arrays come packed in ``GPUArrays`` objects [:ref:`array_conversion`],
22 | which ensures that they reside in the GPU memory.
23 | These objects are:
24 | 
25 | * ``const`` and ``virt_params``,
26 |   which are constant at least for the :math:`\xi`-step duration
27 |   and defined during the :doc:`initialization <../technicalities/initialization>`, and
28 | 
29 | * ``prev``,
30 |   which is usually obtained as the return value of the previous :func:`step` invocation,
31 |   except for the very first step.
32 | 
33 | 
34 | Initial half-step estimation
35 | ----------------------------
36 | 1. The particles are advanced according to their current momenta only
37 |    (:func:`lcode.move_estimate_wo_fields`).
38 | 
39 | 
40 | Field prediction
41 | ----------------
42 | 
43 | While we don't know the fields on the next step:
44 | 
45 | 2. The particles are advanced with the fields from **the previous step**
46 |    using the coordinates **estimated at 1.** to calculate the half-step positions
47 |    where the **previous step** fields should be interpolated at
48 |    (:func:`lcode.move_smart`).
49 | 3. The particles from **2.** are deposited onto the charge/current density grids
50 |    (:func:`lcode.deposit`).
51 | 4. The fields at the next step are calculated using densities from **3.**
52 |    (:func:`lcode.calculate_Ez`, :func:`lcode.calculate_Ex_Ey_Bx_By`, :func:`lcode.calculate_Bz`)
53 |    and averaged with the previous fields.
54 | 
55 | This phase gives us an estimation of the fields at half-step,
56 | and the coordinate estimation at next step,
57 | while all other intermediate results are ultimately ignored.
58 | 
59 | 
60 | Field correction
61 | ----------------
62 | 5. The particles are advanced with the **averaged** fields from **4.**,
63 |    using the coordinates **from 2.** to calculate the half-step positions
64 |    where the **averaged** fields from *4.* should be interpolated at
65 |    (:func:`lcode.move_smart`).
66 | 6. The particles from **5.** are deposited onto the charge/current density grids
67 |    (:func:`lcode.deposit`).
68 | 7. The fields at the next step are calculated using densities from **6.**
69 |    (:func:`lcode.calculate_Ez`, :func:`lcode.calculate_Ex_Ey_Bx_By`, :func:`lcode.calculate_Bz`)
70 |    and averaged with the previous fields.
71 | 
72 | The resulting fields are far more precise than the ones from the prediction phase,
73 | but the coordinates and momenta are still pretty low-quality until we recalculate them
74 | using the new fields.
75 | Iterating the algorithm more times improves the stability,
76 | but it currently doesn't bring much to the table as the transverse noise dominates.
77 | 
78 | 
79 | Final plasma evolution and deposition
80 | -------------------------------------
81 | 8. The particles are advanced with the **averaged** fields from **7.**,
82 |    using the coordinates **from 5.** to calculate the half-step positions
83 |    where the **averaged** fields from **7.** should be interpolated at
84 |    (:func:`lcode.move_smart`).
85 | 9. The particles from **8.** are deposited onto the charge/current density grids
86 |    (:func:`lcode.deposit`).
87 | 
88 | 
89 | The result, or the 'new prev'
90 | -----------------------------
91 | The fields from 7., coordinates and momenta from 8., and densities from 9.
92 | make up the new ``GPUArrays`` collection that would be passed as ``prev``
93 | to the next iteration of :func:`step()`.
94 | 


--------------------------------------------------------------------------------
/docs/usage/installation.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ============
  3 | 
  4 | Common
  5 | ------
  6 | LCODE 3D requires an NVIDIA GPU with CUDA support.
  7 | CUDA Compute Capability 6+ is strongly recommended
  8 | for accelerated atomic operations support.
  9 | 
 10 | On the Python front, it needs Python 3.6+ and the packages listed in ``requirements.txt``:
 11 | 
 12 | .. literalinclude:: ../../requirements.txt
 13 | 
 14 | Most of them are extremely popular
 15 | and the only one that may be slightly problematic to obtain due to its 'teen age' is ``cupy``.
 16 | 
 17 | 
 18 | Linux, distribution Python
 19 | --------------------------
 20 | All the dependencies, except for, probably, ``cupy``,
 21 | should be easily installable with your package manager.
 22 | 
 23 | Install NVIDIA drivers and CUDA packages according to your distribution documentation.
 24 | 
 25 | Install ``cupy`` according to the
 26 | `official installation guide <https://docs-cupy.chainer.org/en/stable/install.html>`_,
 27 | unless ``5.1`` or newer is already packaged by your distribution.
 28 | 
 29 | 
 30 | Linux, Anaconda
 31 | ---------------
 32 | All dependencies, including ``cupy``, are available from the official conda channels.
 33 | 
 34 | .. code-block:: bash
 35 | 
 36 |    conda install cupy
 37 | 
 38 | or, if you are a miniconda user or a thorough kind of person,
 39 | 
 40 | .. code-block:: bash
 41 | 
 42 |    while read req; do conda install --yes $req; done < requirements.txt
 43 | 
 44 | You probably still need to install NVIDIA drivers and CUDA packages,
 45 | follow your distribution documentation.
 46 | 
 47 | 
 48 | Linux, NixOS
 49 | ---------------
 50 | .. code-block:: bash
 51 | 
 52 |    nix-shell
 53 | 
 54 | In case it's not enough, consider either switching to NVIDIA driver, or simply adding
 55 | 
 56 | .. code-block:: nix
 57 | 
 58 |    boot.kernelPackages = pkgs.linuxPackages;  # explicitly require stable kernel
 59 |    boot.kernelModules = [ "nvidia-uvm" ];  # should bring just enough kernel support for CUDA userspace
 60 | 
 61 | to ``/etc/nixos/configuration.nix`` and rebuilding the system.
 62 | 
 63 | 
 64 | Linux, locked-down environment
 65 | ------------------------------
 66 | If want to, e.g., run LCODE 3D on a cluster without permissions
 67 | to install software the proper way, please contact the administrator first
 68 | and refer them to this page.
 69 | 
 70 | If you are sure about CUDA support and you absolutely want to install the dependencies yourself,
 71 | then make sure you have Python 3.6+ and
 72 | try to install ``cupy`` using the
 73 | official installation guide.
 74 | If you succeed, install all the other missing requirements with ``pip``'s
 75 | 'User Installs' feature.
 76 | You mileage may vary. You're responsible for the cleanup.
 77 | 
 78 | 
 79 | Windows, Anaconda
 80 | -----------------
 81 | If ``cupy`` ``5.1`` or newer has already hit the channels, you're in luck.
 82 | Just ``conda install cupy``, and you should be good to go.
 83 | 
 84 | If https://anaconda.org/anaconda/cupy still shows 'win-64' at ``v4.1.0``,
 85 | please accept our condolences and proceed to the next subsection.
 86 | 
 87 | 
 88 | Windows, the hard way
 89 | ---------------------
 90 | * Ensure that you have Python 3.6+.
 91 | * Free up some 10 GiB of disk space or more.
 92 | * Verify that you're on good terms with the deity of your choice.
 93 | * Install Visual Studio (Community Edition is fine) with C++ support.
 94 | * Install NVIDIA CUDA Toolkit.
 95 | * Follow the ``cupy`` installation guide.
 96 | * Prefer installing precompiled packages,
 97 |   but you might also try installing from source.
 98 | * Verify that it works by executing ``import cupy; (cupy.asarray([2])**2).get()``
 99 |   in Python shell.
100 | * Install the other dependencies.
101 | 
102 | 
103 | Known to work
104 | -------------
105 | As of early 2019, LCODE 3D is developed and known to work under:
106 | 
107 | * NixOS 19.03 "Koi"
108 | * Debian 10 "Buster" + Anaconda 2019.03
109 | * Windows 10 1903 + Anaconda 2019.03
110 | 


--------------------------------------------------------------------------------
/docs/usage/running_and_embedding.rst:
--------------------------------------------------------------------------------
 1 | Running and embedding
 2 | =====================
 3 | 
 4 | Only two files are required
 5 | ---------------------------
 6 | LCODE 3D is a single-file module and you only need two files to execute it:
 7 | ``lcode.py`` and ``config.py``.
 8 | 
 9 | Installing LCODE into ``PYTHONPATH`` with the likes of ``pip install .`` is possible,
10 | but is not officially supported.
11 | 
12 | 
13 | Configuration
14 | -------------
15 | LCODE 3D is configured by placing a file ``config.py`` into the current working directory.
16 | An example is provided as ``config_example.py``.
17 | 
18 | The file gets imported by the standard Python importing mechanism,
19 | the resulting module is passed around internally as ``config``.
20 | 
21 | One can use all the features of Python inside the configuration file,
22 | from arithmetic expressions and functions to other modules and metaprogramming.
23 | 
24 | 
25 | Execution
26 | ---------
27 | ``python3 lcode.py``, ``python lcode.py`` or ``./lcode.py``
28 | 
29 | 
30 | .. todo:: CODE: embedding
31 | 


--------------------------------------------------------------------------------
/docs/useful-links.rst:
--------------------------------------------------------------------------------
 1 | Useful links
 2 | ============
 3 | 
 4 | Internal:
 5 | 
 6 | * `Source code listing interlinked with documentation <_modules/lcode.html>`_
 7 | * `Example configuration file interlinked with documentation <_modules/config_example.html>`_
 8 | * :ref:`Index of LCODE functions and config parameters <genindex>`
 9 | 
10 | External:
11 | 
12 | * LCODE 3D source: https://github.com/lotov/lcode3d
13 | * LCODE 3D documentation: https://lcode3d.readthedocs.org
14 | * LCODE team website: https://lcode.info
15 | * LCODE team email: `team@lcode.info <mailto:team@lcode.info>`_
16 | 
17 | 


--------------------------------------------------------------------------------
/lcode.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | 
   3 | # Copyright (c) 2016-2019 LCODE team <team@lcode.info>.
   4 | 
   5 | # LCODE is free software: you can redistribute it and/or modify
   6 | # it under the terms of the GNU Affero General Public License as published by
   7 | # the Free Software Foundation, either version 3 of the License, or
   8 | # (at your option) any later version.
   9 | #
  10 | # LCODE is distributed in the hope that it will be useful,
  11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 | # GNU Affero General Public License for more details.
  14 | #
  15 | # You should have received a copy of the GNU Affero General Public License
  16 | # along with LCODE.  If not, see <http://www.gnu.org/licenses/>.
  17 | 
  18 | 
  19 | from math import sqrt, floor
  20 | 
  21 | import os
  22 | import sys
  23 | 
  24 | import matplotlib.pyplot as plt
  25 | 
  26 | import numpy as np
  27 | 
  28 | import numba
  29 | import numba.cuda
  30 | 
  31 | import cupy as cp
  32 | 
  33 | import scipy.ndimage
  34 | import scipy.signal
  35 | 
  36 | 
  37 | # Prevent all CPU cores waiting for the GPU at 100% utilization (under conda).
  38 | # os.environ['OMP_NUM_THREADS'] = '1'
  39 | 
  40 | #: Should be detectable with newer ``cupy`` (>6.0.0b2) as
  41 | #: ``WARP_SIZE = cp.cuda.Device(config.gpu_index).attributes['WarpSize']``.
  42 | #: As of 2019 it's equal to 32 for all CUDA-capable GPUs.
  43 | #: It's even a hardcoded value in ``cupy``.
  44 | WARP_SIZE = 32
  45 | 
  46 | ELECTRON_CHARGE = -1
  47 | ELECTRON_MASS = 1
  48 | 
  49 | 
  50 | # Grouping GPU arrays, with optional transparent RAM<->GPU copying #
  51 | 
  52 | class GPUArrays:
  53 |     """
  54 |     A convenient way to group several GPU arrays and access them with a dot.
  55 |     ``x = GPUArrays(something=numpy_array, something_else=another_array)`` will
  56 |     create ``x`` with ``x.something`` and ``x.something_else`` stored on GPU.
  57 | 
  58 |     Do not add more attributes later, specify them all at construction time.
  59 |     """
  60 |     def __init__(self, **kwargs):
  61 |         """
  62 |         Convert the keyword arguments to ``cupy`` arrays and assign them
  63 |         to the object attributes.
  64 |         Amounts to, e.g., ``self.something = cp.asarray(numpy_array)``,
  65 |         and ``self.something_else = cp.asarray(another_array)``,
  66 |         see class doctring.
  67 |         """
  68 |         for name, array in kwargs.items():
  69 |             setattr(self, name, cp.asarray(array))
  70 | 
  71 | 
  72 | # NOTE: The implementation may be complicated, but the usage is simple.
  73 | class GPUArraysView:
  74 |     """
  75 |     This is a magical wrapper around GPUArrays that handles GPU-RAM data
  76 |     transfer transparently.
  77 |     Accessing ``view.something`` will automatically copy array to host RAM,
  78 |     setting ``view.something = ...`` will copy the changes back to GPU RAM.
  79 | 
  80 |     Usage: ``view = GPUArraysView(gpu_arrays); view.something``
  81 | 
  82 |     Do not add more attributes later, specify them all at construction time.
  83 | 
  84 |     NOTE: repeatedly accessing an attribute will result in repeated copying!
  85 |     """
  86 |     def __init__(self, gpu_arrays):
  87 |         """
  88 |         Wrap ``gpu_arrays`` and transparently copy data to/from GPU.
  89 |         """
  90 |         # Could've been written as ``self._arrs = gpu_arrays``
  91 |         # if only ``__setattr__`` was not overwritten!
  92 |         # ``super(GPUArraysView)`` is the proper way to obtain the parent class
  93 |         # (``object``), which has a regular boring and usable ``__setattr__``.
  94 |         super(GPUArraysView, self).__setattr__('_arrs', gpu_arrays)
  95 | 
  96 |     def __dir__(self):
  97 |         """
  98 |         Make ``dir()`` also show the wrapped ``gpu_arrays`` attributes.
  99 |         """
 100 |         # See ``GPUArraysView.__init__`` for the explanation how we access the
 101 |         # parent's plain ``__dir__()`` implementation (and avoid recursion).
 102 |         return list(set(super(GPUArraysView, self).__dir__() +
 103 |                         dir(self._arrs)))
 104 | 
 105 |     def __getattr__(self, attrname):
 106 |         """
 107 |         Intercept access to (missing) attributes, access the wrapped object
 108 |         attributes instead and copy the arrays from GPU to RAM.
 109 |         """
 110 |         return getattr(self._arrs, attrname).get()  # auto-copies to host RAM
 111 | 
 112 |     def __setattr__(self, attrname, value):
 113 |         """
 114 |         Intercept setting attributes, access the wrapped object attributes
 115 |         instead and reassign their contents, copying the arrays from RAM
 116 |         to GPU in the process.
 117 |         """
 118 |         getattr(self._arrs, attrname)[...] = value  # copies to GPU RAM
 119 |         # TODO: just copy+reassign it without preserving identity and shape?
 120 | 
 121 | 
 122 | # Solving Laplace equation with Dirichlet boundary conditions (Ez) #
 123 | 
 124 | def dst2d(a):
 125 |     """
 126 |     Calculate DST-Type1-2D, jury-rigged from anti-symmetrically-padded rFFT.
 127 |     """
 128 |     assert a.shape[0] == a.shape[1]
 129 |     N = a.shape[0]
 130 |     #                                    / 0  0  0  0  0  0 \
 131 |     #  0  0  0  0                       |  0 /1  2\ 0 -2 -1  |
 132 |     #  0 /1  2\ 0   anti-symmetrically  |  0 \3  4/ 0 -4 -3  |
 133 |     #  0 \3  4/ 0       padded to       |  0  0  0  0  0  0  |
 134 |     #  0  0  0  0                       |  0 -3 -4  0 +4 +3  |
 135 |     #                                    \ 0 -1 -2  0 +2 +1 /
 136 |     p = cp.zeros((2 * N + 2, 2 * N + 2))
 137 |     p[1:N+1, 1:N+1], p[1:N+1, N+2:] = a,             -cp.fliplr(a)
 138 |     p[N+2:,  1:N+1], p[N+2:,  N+2:] = -cp.flipud(a), +cp.fliplr(cp.flipud(a))
 139 | 
 140 |     # after padding: rFFT-2D, cut out the top-left segment, take -real part
 141 |     return -cp.fft.rfft2(p)[1:N+1, 1:N+1].real
 142 | 
 143 | 
 144 | @cp.memoize()
 145 | def dirichlet_matrix(grid_steps, grid_step_size):
 146 |     """
 147 |     Calculate a magical matrix that solves the Laplace equation
 148 |     if you elementwise-multiply the RHS by it "in DST-space".
 149 |     See Samarskiy-Nikolaev, p. 187.
 150 |     """
 151 |     # mul[i, j] = 1 / (lam[i] + lam[j])
 152 |     # lam[k] = 4 / h**2 * sin(k * pi * h / (2 * L))**2, where L = h * (N - 1)
 153 |     k = cp.arange(1, grid_steps - 1)
 154 |     lam = 4 / grid_step_size**2 * cp.sin(k * cp.pi / (2 * (grid_steps - 1)))**2
 155 |     lambda_i, lambda_j = lam[:, None], lam[None, :]
 156 |     mul = 1 / (lambda_i + lambda_j)
 157 |     return mul / (2 * (grid_steps - 1))**2  # additional 2xDST normalization
 158 | 
 159 | 
 160 | def calculate_Ez(config, jx, jy):
 161 |     """
 162 |     Calculate Ez as iDST2D(dirichlet_matrix * DST2D(djx/dx + djy/dy)).
 163 |     """
 164 |     # 0. Calculate RHS (NOTE: it is smaller by 1 on each side).
 165 |     # NOTE: use gradient instead if available (cupy doesn't have gradient yet).
 166 |     djx_dx = jx[2:, 1:-1] - jx[:-2, 1:-1]
 167 |     djy_dy = jy[1:-1, 2:] - jy[1:-1, :-2]
 168 |     rhs_inner = -(djx_dx + djy_dy) / (config.grid_step_size * 2)  # -?
 169 | 
 170 |     # 1. Apply DST-Type1-2D (Discrete Sine Transform Type 1 2D) to the RHS.
 171 |     f = dst2d(rhs_inner)
 172 | 
 173 |     # 2. Multiply f by the special matrix that does the job and normalizes.
 174 |     f *= dirichlet_matrix(config.grid_steps, config.grid_step_size)
 175 | 
 176 |     # 3. Apply iDST-Type1-2D (Inverse Discrete Sine Transform Type 1 2D).
 177 |     #    We don't have to define a separate iDST function, because
 178 |     #    unnormalized DST-Type1 is its own inverse, up to a factor 2(N+1)
 179 |     #    and we take all scaling matters into account with a single factor
 180 |     #    hidden inside dirichlet_matrix.
 181 |     Ez_inner = dst2d(f)
 182 |     Ez = cp.pad(Ez_inner, 1, 'constant', constant_values=0)
 183 |     numba.cuda.synchronize()
 184 |     return Ez
 185 | 
 186 | 
 187 | # Solving Laplace or Helmholtz equation with mixed boundary conditions #
 188 | 
 189 | # jury-rigged from padded rFFT
 190 | def mix2d(a):
 191 |     """
 192 |     Calculate a DST-DCT-hybrid transform
 193 |     (DST in first direction, DCT in second one),
 194 |     jury-rigged from padded rFFT
 195 |     (anti-symmetrically in first direction, symmetrically in second direction).
 196 |     """
 197 |     # NOTE: LCODE 3D uses x as the first direction, thus the confision below.
 198 |     M, N = a.shape
 199 |     #                                  /(0  1  2  0)-2 -1 \      +---->  x
 200 |     #  / 1  2 \                       | (0  3  4  0)-4 -3  |     |      (M)
 201 |     #  | 3  4 |  mixed-symmetrically  | (0  5  6  0)-6 -5  |     |
 202 |     #  | 5  6 |       padded to       | (0  7  8  0)-8 -7  |     v
 203 |     #  \ 7  8 /                       |  0 +5 +6  0 -6 -5  |
 204 |     #                                  \ 0 +3 +4  0 -4 -3 /      y (N)
 205 |     p = cp.zeros((2 * M + 2, 2 * N - 2))  # wider than before
 206 |     p[1:M+1, :N] = a
 207 |     p[M+2:2*M+2, :N] = -cp.flipud(a)  # flip to right on drawing above
 208 |     p[1:M+1, N-1:2*N-2] = cp.fliplr(a)[:, :-1]  # flip down on drawing above
 209 |     p[M+2:2*M+2, N-1:2*N-2] = -cp.flipud(cp.fliplr(a))[:, :-1]
 210 |     # Note: the returned array is wider than the input array, it is padded
 211 |     # with zeroes (depicted above as a square region marked with round braces).
 212 |     return -cp.fft.rfft2(p)[:M+2, :N].imag  # FFT, cut a corner with 0s, -imag
 213 | 
 214 | 
 215 | @cp.memoize()
 216 | def mixed_matrix(grid_steps, grid_step_size, subtraction_trick):
 217 |     """
 218 |     Calculate a magical matrix that solves the Helmholtz or Laplace equation
 219 |     (subtraction_trick=True and subtraction_trick=False correspondingly)
 220 |     if you elementwise-multiply the RHS by it "in DST-DCT-transformed-space".
 221 |     See Samarskiy-Nikolaev, p. 189 and around.
 222 |     """
 223 |     # mul[i, j] = 1 / (lam[i] + lam[j])
 224 |     # lam[k] = 4 / h**2 * sin(k * pi * h / (2 * L))**2, where L = h * (N - 1)
 225 |     # but k for lam_i spans from 1..N-2, while k for lam_j covers 0..N-1
 226 |     ki, kj = cp.arange(1, grid_steps - 1), cp.arange(grid_steps)
 227 |     li = 4 / grid_step_size**2 * cp.sin(ki * cp.pi / (2 * (grid_steps - 1)))**2
 228 |     lj = 4 / grid_step_size**2 * cp.sin(kj * cp.pi / (2 * (grid_steps - 1)))**2
 229 |     lambda_i, lambda_j = li[:, None], lj[None, :]
 230 |     mul = 1 / (lambda_i + lambda_j + (1 if subtraction_trick else 0))
 231 |     return mul / (2 * (grid_steps - 1))**2  # additional 2xDST normalization
 232 | 
 233 | 
 234 | def dx_dy(arr, grid_step_size):
 235 |     """
 236 |     Calculate x and y derivatives simultaneously (like np.gradient does).
 237 |     NOTE: use gradient instead if available (cupy doesn't have gradient yet).
 238 |     NOTE: arrays are assumed to have zeros on the perimeter.
 239 |     """
 240 |     dx, dy = cp.zeros_like(arr), cp.zeros_like(arr)
 241 |     dx[1:-1, 1:-1] = arr[2:, 1:-1] - arr[:-2, 1:-1]  # arrays have 0s
 242 |     dy[1:-1, 1:-1] = arr[1:-1, 2:] - arr[1:-1, :-2]  # on the perimeter
 243 |     return dx / (grid_step_size * 2), dy / (grid_step_size * 2)
 244 | 
 245 | 
 246 | def calculate_Ex_Ey_Bx_By(config, Ex_avg, Ey_avg, Bx_avg, By_avg,
 247 |                           beam_ro, ro, jx, jy, jz, jx_prev, jy_prev):
 248 |     """
 249 |     Calculate transverse fields as iDST-DCT(mixed_matrix * DST-DCT(RHS.T)).T,
 250 |     with and without transposition depending on the field component.
 251 |     """
 252 |     # NOTE: density and currents are assumed to be zero on the perimeter
 253 |     # (no plasma particles must reach the wall, so the reflection boundary
 254 |     #  must be closer to the center than the simulation window boundary
 255 |     #  minus the coarse plasma particle cloud width).
 256 | 
 257 |     # 0. Calculate gradients and RHS.
 258 |     dro_dx, dro_dy = dx_dy(ro + beam_ro, config.grid_step_size)
 259 |     djz_dx, djz_dy = dx_dy(jz + beam_ro, config.grid_step_size)
 260 |     djx_dxi = (jx_prev - jx) / config.xi_step_size  # - ?
 261 |     djy_dxi = (jy_prev - jy) / config.xi_step_size  # - ?
 262 | 
 263 |     # Are we solving a Laplace equation or a Helmholtz one?
 264 |     subtraction_trick = config.field_solver_subtraction_trick
 265 |     Ex_rhs = -((dro_dx - djx_dxi) - Ex_avg * subtraction_trick)  # -?
 266 |     Ey_rhs = -((dro_dy - djy_dxi) - Ey_avg * subtraction_trick)
 267 |     Bx_rhs = +((djz_dy - djy_dxi) + Bx_avg * subtraction_trick)
 268 |     By_rhs = -((djz_dx - djx_dxi) - By_avg * subtraction_trick)
 269 | 
 270 |     # Boundary conditions application (for future reference, ours are zero):
 271 |     # rhs[:, 0] -= bound_bottom[:] * (2 / grid_step_size)
 272 |     # rhs[:, -1] += bound_top[:] * (2 / grid_step_size)
 273 | 
 274 |     # 1. Apply our mixed DCT-DST transform to RHS.
 275 |     Ey_f = mix2d(Ey_rhs[1:-1, :])[1:-1, :]
 276 | 
 277 |     # 2. Multiply f by the magic matrix.
 278 |     mix_mat = mixed_matrix(config.grid_steps, config.grid_step_size,
 279 |                            config.field_solver_subtraction_trick)
 280 |     Ey_f *= mix_mat
 281 | 
 282 |     # 3. Apply our mixed DCT-DST transform again.
 283 |     Ey = mix2d(Ey_f)
 284 | 
 285 |     # Likewise for other fields:
 286 |     Bx = mix2d(mix_mat * mix2d(Bx_rhs[1:-1, :])[1:-1, :])
 287 |     By = mix2d(mix_mat * mix2d(By_rhs.T[1:-1, :])[1:-1, :]).T
 288 |     Ex = mix2d(mix_mat * mix2d(Ex_rhs.T[1:-1, :])[1:-1, :]).T
 289 | 
 290 |     return Ex, Ey, Bx, By
 291 | 
 292 | 
 293 | # Solving Laplace equation with Neumann boundary conditions (Bz) #
 294 | 
 295 | def dct2d(a):
 296 |     """
 297 |     Calculate DCT-Type1-2D, jury-rigged from symmetrically-padded rFFT.
 298 |     """
 299 |     assert a.shape[0] == a.shape[1]
 300 |     N = a.shape[0]
 301 |     #                                    //1  2  3  4\ 3  2 \
 302 |     # /1  2  3  4\                      | |5  6  7  8| 7  6  |
 303 |     # |5  6  7  8|     symmetrically    | |9  A  B  C| B  A  |
 304 |     # |9  A  B  C|      padded to       | \D  E  F  G/ F  E  |
 305 |     # \D  E  F  G/                      |  9  A  B  C  B  A  |
 306 |     #                                    \ 5  6  7  8  7  6 /
 307 |     p = cp.zeros((2 * N - 2, 2 * N - 2))
 308 |     p[:N, :N] = a
 309 |     p[N:, :N] = cp.flipud(a)[1:-1, :]  # flip to right on drawing above
 310 |     p[:N, N:] = cp.fliplr(a)[:, 1:-1]  # flip down on drawing above
 311 |     p[N:, N:] = cp.flipud(cp.fliplr(a))[1:-1, 1:-1]  # bottom-right corner
 312 |     # after padding: rFFT-2D, cut out the top-left segment, take -real part
 313 |     return -cp.fft.rfft2(p)[:N, :N].real
 314 | 
 315 | 
 316 | @cp.memoize()
 317 | def neumann_matrix(grid_steps, grid_step_size):
 318 |     """
 319 |     Calculate a magical matrix that solves the Laplace equation
 320 |     if you elementwise-multiply the RHS by it "in DST-space".
 321 |     See Samarskiy-Nikolaev, p. 187.
 322 |     """
 323 |     # mul[i, j] = 1 / (lam[i] + lam[j])
 324 |     # lam[k] = 4 / h**2 * sin(k * pi * h / (2 * L))**2, where L = h * (N - 1)
 325 |     k = cp.arange(0, grid_steps)
 326 |     lam = 4 / grid_step_size**2 * cp.sin(k * cp.pi / (2 * (grid_steps - 1)))**2
 327 |     lambda_i, lambda_j = lam[:, None], lam[None, :]
 328 |     mul = 1 / (lambda_i + lambda_j)  # WARNING: zero division in mul[0, 0]!
 329 |     mul[0, 0] = 0  # doesn't matter anyway, just defines constant shift
 330 |     return mul / (2 * (grid_steps - 1))**2  # additional 2xDST normalization
 331 | 
 332 | 
 333 | def calculate_Bz(config, jx, jy):
 334 |     """
 335 |     Calculate Bz as iDCT2D(dirichlet_matrix * DCT2D(djx/dy - djy/dx)).
 336 |     """
 337 |     # 0. Calculate RHS.
 338 |     # NOTE: use gradient instead if available (cupy doesn't have gradient yet).
 339 |     djx_dy = jx[1:-1, 2:] - jx[1:-1, :-2]
 340 |     djy_dx = jy[2:, 1:-1] - jy[:-2, 1:-1]
 341 |     djx_dy = cp.pad(djx_dy, 1, 'constant', constant_values=0)
 342 |     djy_dx = cp.pad(djy_dx, 1, 'constant', constant_values=0)
 343 |     rhs = -(djx_dy - djy_dx) / (config.grid_step_size * 2)  # -?
 344 | 
 345 |     # As usual, the boundary conditions are zero
 346 |     # (otherwise add them to boundary cells, divided by grid_step_size/2
 347 | 
 348 |     # 1. Apply DST-Type1-2D (Discrete Sine Transform Type 1 2D) to the RHS.
 349 |     f = dct2d(rhs)
 350 | 
 351 |     # 2. Multiply f by the special matrix that does the job and normalizes.
 352 |     f *= neumann_matrix(config.grid_steps, config.grid_step_size)
 353 | 
 354 |     # 3. Apply iDCT-Type1-2D (Inverse Discrete Cosine Transform Type 1 2D).
 355 |     #    We don't have to define a separate iDCT function, because
 356 |     #    unnormalized DCT-Type1 is its own inverse, up to a factor 2(N+1)
 357 |     #    and we take all scaling matters into account with a single factor
 358 |     #    hidden inside neumann_matrix.
 359 |     Bz = dct2d(f)
 360 |     numba.cuda.synchronize()
 361 | 
 362 |     Bz -= Bz.mean()  # Integral over Bz must be 0.
 363 | 
 364 |     return Bz
 365 | 
 366 | 
 367 | # Pushing particles without any fields (used for initial halfstep estimation) #
 368 | 
 369 | def move_estimate_wo_fields(config,
 370 |                             m, x_init, y_init, prev_x_offt, prev_y_offt,
 371 |                             px, py, pz):
 372 |     """
 373 |     Move coarse plasma particles as if there were no fields.
 374 |     Also reflect the particles from `+-reflect_boundary`.
 375 |     """
 376 |     x, y = x_init + prev_x_offt, y_init + prev_y_offt
 377 |     gamma_m = cp.sqrt(m**2 + pz**2 + px**2 + py**2)
 378 | 
 379 |     x += px / (gamma_m - pz) * config.xi_step_size
 380 |     y += py / (gamma_m - pz) * config.xi_step_size
 381 | 
 382 |     reflect = config.reflect_boundary
 383 |     x[x >= +reflect] = +2 * reflect - x[x >= +reflect]
 384 |     x[x <= -reflect] = -2 * reflect - x[x <= -reflect]
 385 |     y[y >= +reflect] = +2 * reflect - y[y >= +reflect]
 386 |     y[y <= -reflect] = -2 * reflect - y[y <= -reflect]
 387 | 
 388 |     x_offt, y_offt = x - x_init, y - y_init
 389 | 
 390 |     numba.cuda.synchronize()
 391 |     return x_offt, y_offt
 392 | 
 393 | 
 394 | # Deposition and interpolation helper functions #
 395 | 
 396 | @numba.jit(inline=True)
 397 | def weights(x, y, grid_steps, grid_step_size):
 398 |     """
 399 |     Calculate the indices of a cell corresponding to the coordinates,
 400 |     and the coefficients of interpolation and deposition for this cell
 401 |     and 8 surrounding cells.
 402 |     The weights correspond to 2D triangluar shaped cloud (TSC2D).
 403 |     """
 404 |     x_h, y_h = x / grid_step_size + .5, y / grid_step_size + .5
 405 |     i, j = int(floor(x_h) + grid_steps // 2), int(floor(y_h) + grid_steps // 2)
 406 |     x_loc, y_loc = x_h - floor(x_h) - .5, y_h - floor(y_h) - .5
 407 |     # centered to -.5 to 5, not 0 to 1, as formulas use offset from cell center
 408 |     # TODO: get rid of this deoffsetting/reoffsetting festival
 409 | 
 410 |     wx0, wy0 = .75 - x_loc**2, .75 - y_loc**2  # fx1, fy1
 411 |     wxP, wyP = (.5 + x_loc)**2 / 2, (.5 + y_loc)**2 / 2  # fx2**2/2, fy2**2/2
 412 |     wxM, wyM = (.5 - x_loc)**2 / 2, (.5 - y_loc)**2 / 2  # fx3**2/2, fy3**2/2
 413 | 
 414 |     wMP, w0P, wPP = wxM * wyP, wx0 * wyP, wxP * wyP
 415 |     wM0, w00, wP0 = wxM * wy0, wx0 * wy0, wxP * wy0
 416 |     wMM, w0M, wPM = wxM * wyM, wx0 * wyM, wxP * wyM
 417 | 
 418 |     return i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM
 419 | 
 420 | 
 421 | @numba.jit(inline=True)
 422 | def interp9(a, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM):
 423 |     """
 424 |     Collect value from a cell and 8 surrounding cells (using `weights` output).
 425 |     """
 426 |     return (
 427 |         a[i - 1, j + 1] * wMP + a[i + 0, j + 1] * w0P + a[i + 1, j + 1] * wPP +
 428 |         a[i - 1, j + 0] * wM0 + a[i + 0, j + 0] * w00 + a[i + 1, j + 0] * wP0 +
 429 |         a[i - 1, j - 1] * wMM + a[i + 0, j - 1] * w0M + a[i + 1, j - 1] * wPM
 430 |     )
 431 | 
 432 | 
 433 | @numba.jit(inline=True)
 434 | def deposit9(a, i, j, val, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM):
 435 |     """
 436 |     Deposit value into a cell and 8 surrounding cells (using `weights` output).
 437 |     """
 438 |     # This is like a[i - 1, j + 1] += val * wMP, except it is atomic
 439 |     # and incrementing the same cell by several threads will add up correctly.
 440 |     # CUDA Compute Capability 6.0+ is recommended for hardware atomics support.
 441 |     numba.cuda.atomic.add(a, (i - 1, j + 1), val * wMP)
 442 |     numba.cuda.atomic.add(a, (i + 0, j + 1), val * w0P)
 443 |     numba.cuda.atomic.add(a, (i + 1, j + 1), val * wPP)
 444 |     numba.cuda.atomic.add(a, (i - 1, j + 0), val * wM0)
 445 |     numba.cuda.atomic.add(a, (i + 0, j + 0), val * w00)
 446 |     numba.cuda.atomic.add(a, (i + 1, j + 0), val * wP0)
 447 |     numba.cuda.atomic.add(a, (i - 1, j - 1), val * wMM)
 448 |     numba.cuda.atomic.add(a, (i + 0, j - 1), val * w0M)
 449 |     numba.cuda.atomic.add(a, (i + 1, j - 1), val * wPM)
 450 | 
 451 | 
 452 | # Coarse and fine plasma initialization #
 453 | 
 454 | def make_coarse_plasma_grid(steps, step_size, coarseness=3):
 455 |     """
 456 |     Create initial coarse plasma particles coordinates
 457 |     (a single 1D grid for both x and y).
 458 |     """
 459 |     assert coarseness == int(coarseness)  # TODO: why?
 460 |     plasma_step = step_size * coarseness
 461 |     right_half = np.arange(steps // (coarseness * 2)) * plasma_step
 462 |     left_half = -right_half[:0:-1]  # invert, reverse, drop zero
 463 |     plasma_grid = np.concatenate([left_half, right_half])
 464 |     assert(np.array_equal(plasma_grid, -plasma_grid[::-1]))
 465 |     return plasma_grid
 466 | 
 467 | 
 468 | def make_fine_plasma_grid(steps, step_size, fineness=2):
 469 |     """
 470 |     Create initial fine plasma particles coordinates
 471 |     (a single 1D grid for both x and y).
 472 | 
 473 |     Avoids positioning particles at the cell edges and boundaries.
 474 | 
 475 |     .. See docs/how/fine_and_coarse_plasma for illustrations.
 476 |     """
 477 |     assert fineness == int(fineness)
 478 |     plasma_step = step_size / fineness
 479 |     if fineness % 2:  # some on zero axes, none on cell corners
 480 |         right_half = np.arange(steps // 2 * fineness) * plasma_step
 481 |         left_half = -right_half[:0:-1]  # invert, reverse, drop zero
 482 |     else:  # none on zero axes, none on cell corners
 483 |         right_half = (.5 + np.arange(steps // 2 * fineness)) * plasma_step
 484 |         left_half = -right_half[::-1]  # invert, reverse
 485 |     plasma_grid = np.concatenate([left_half, right_half])
 486 |     assert(np.array_equal(plasma_grid, -plasma_grid[::-1]))
 487 |     return plasma_grid
 488 | 
 489 | 
 490 | def make_plasma(steps, cell_size, coarseness=3, fineness=2):
 491 |     """
 492 |     Make coarse plasma initial state arrays and the arrays needed to intepolate
 493 |     coarse plasma into fine plasma (``virt_params``).
 494 | 
 495 |     Coarse is the one that will evolve and fine is the one to be bilinearly
 496 |     interpolated from the coarse one based on the initial positions
 497 |     (using 1 to 4 coarse plasma particles that initially were the closest).
 498 |     """
 499 |     coarse_step = cell_size * coarseness
 500 | 
 501 |     # Make two initial grids of plasma particles, coarse and fine.
 502 |     # Coarse is the one that will evolve and fine is the one to be bilinearly
 503 |     # interpolated from the coarse one based on the initial positions.
 504 | 
 505 |     coarse_grid = make_coarse_plasma_grid(steps, cell_size, coarseness)
 506 |     coarse_grid_xs, coarse_grid_ys = coarse_grid[:, None], coarse_grid[None, :]
 507 | 
 508 |     fine_grid = make_fine_plasma_grid(steps, cell_size, fineness)
 509 | 
 510 |     Nc = len(coarse_grid)
 511 | 
 512 |     # Create plasma electrons on the coarse grid, the ones that really move
 513 |     coarse_x_init = cp.broadcast_to(cp.asarray(coarse_grid_xs), (Nc, Nc))
 514 |     coarse_y_init = cp.broadcast_to(cp.asarray(coarse_grid_ys), (Nc, Nc))
 515 |     coarse_x_offt = cp.zeros((Nc, Nc))
 516 |     coarse_y_offt = cp.zeros((Nc, Nc))
 517 |     coarse_px = cp.zeros((Nc, Nc))
 518 |     coarse_py = cp.zeros((Nc, Nc))
 519 |     coarse_pz = cp.zeros((Nc, Nc))
 520 |     coarse_m = cp.ones((Nc, Nc)) * ELECTRON_MASS * coarseness**2
 521 |     coarse_q = cp.ones((Nc, Nc)) * ELECTRON_CHARGE * coarseness**2
 522 | 
 523 |     # Calculate indices for coarse -> fine bilinear interpolation
 524 | 
 525 |     # Neighbour indices array, 1D, same in both x and y direction.
 526 |     indices = np.searchsorted(coarse_grid, fine_grid)
 527 |     # example:
 528 |     #     coarse:  [-2., -1.,  0.,  1.,  2.]
 529 |     #     fine:    [-2.4, -1.8, -1.2, -0.6,  0. ,  0.6,  1.2,  1.8,  2.4]
 530 |     #     indices: [ 0  ,  1  ,  1  ,  2  ,  2  ,  3  ,  4  ,  4  ,  5 ]
 531 |     # There is no coarse particle with index 5, so clip it to 4:
 532 |     indices_next = np.clip(indices, 0, Nc - 1)  # [0, 1, 1, 2, 2, 3, 4, 4, 4]
 533 |     # Clip to zero for indices of prev particles as well:
 534 |     indices_prev = np.clip(indices - 1, 0, Nc - 1)  # [0, 0, 0, 1 ... 3, 3, 4]
 535 |     # mixed from: [ 0&0 , 0&1 , 0&1 , 1&2 , 1&2 , 2&3 , 3&4 , 3&4, 4&4 ]
 536 | 
 537 |     # Calculate weights for coarse->fine interpolation from initial positions.
 538 |     # The further the fine particle is from closest right coarse particles,
 539 |     # the more influence the left ones have.
 540 |     influence_prev = (coarse_grid[indices_next] - fine_grid) / coarse_step
 541 |     influence_next = (fine_grid - coarse_grid[indices_prev]) / coarse_step
 542 |     # Fix for boundary cases of missing cornering particles.
 543 |     influence_prev[indices_next == 0] = 0   # nothing on the left?
 544 |     influence_next[indices_next == 0] = 1   # use right
 545 |     influence_next[indices_prev == Nc - 1] = 0  # nothing on the right?
 546 |     influence_prev[indices_prev == Nc - 1] = 1  # use left
 547 |     # Same arrays are used for interpolating in y-direction.
 548 | 
 549 |     # The virtualization formula is thus
 550 |     # influence_prev[pi] * influence_prev[pj] * <bottom-left neighbour value> +
 551 |     # influence_prev[pi] * influence_next[nj] * <top-left neighbour value> +
 552 |     # influence_next[ni] * influence_prev[pj] * <bottom-right neighbour val> +
 553 |     # influence_next[ni] * influence_next[nj] * <top-right neighbour value>
 554 |     # where pi, pj are indices_prev[i], indices_prev[j],
 555 |     #       ni, nj are indices_next[i], indices_next[j] and
 556 |     #       i, j are indices of fine virtual particles
 557 | 
 558 |     # This is what is employed inside mix() and deposit_kernel().
 559 | 
 560 |     # An equivalent formula would be
 561 |     # inf_prev[pi] * (inf_prev[pj] * <bot-left> + inf_next[nj] * <bot-right>) +
 562 |     # inf_next[ni] * (inf_prev[pj] * <top-left> + inf_next[nj] * <top-right>)
 563 | 
 564 |     # Values of m, q, px, py, pz should be scaled by 1/(fineness*coarseness)**2
 565 | 
 566 |     virt_params = GPUArrays(
 567 |         influence_prev=influence_prev, influence_next=influence_next,
 568 |         indices_prev=indices_prev, indices_next=indices_next,
 569 |         fine_grid=fine_grid,
 570 |     )
 571 | 
 572 |     return (coarse_x_init, coarse_y_init, coarse_x_offt, coarse_y_offt,
 573 |             coarse_px, coarse_py, coarse_pz, coarse_m, coarse_q, virt_params)
 574 | 
 575 | 
 576 | @numba.jit(inline=True)
 577 | def mix(coarse, A, B, C, D, pi, ni, pj, nj):
 578 |     """
 579 |     Bilinearly interpolate fine plasma properties from four
 580 |     historically-neighbouring plasma particle property values::
 581 |         B    D   #  y ^         A - bottom-left  neighbour, indices: pi, pj
 582 |            .     #    |         B - top-left     neighbour, indices: pi, nj
 583 |                  #    +---->    C - bottom-right neighbour, indices: ni, pj
 584 |         A    C   #         x    D - top-right    neighbour, indices: ni, nj
 585 |     See the rest of the deposition and plasma creation for more info.
 586 |     """
 587 |     return (A * coarse[pi, pj] + B * coarse[pi, nj] +
 588 |             C * coarse[ni, pj] + D * coarse[ni, nj])
 589 | 
 590 | 
 591 | @numba.jit(inline=True)
 592 | def coarse_to_fine(fi, fj, c_x_offt, c_y_offt, c_m, c_q, c_px, c_py, c_pz,
 593 |                    virtplasma_smallness_factor, fine_grid,
 594 |                    influence_prev, influence_next, indices_prev, indices_next):
 595 |     """
 596 |     Bilinearly interpolate fine plasma properties from four
 597 |     historically-neighbouring plasma particle property values.
 598 |     """
 599 |     # Calculate the weights of the historically-neighbouring coarse particles
 600 |     A = influence_prev[fi] * influence_prev[fj]
 601 |     B = influence_prev[fi] * influence_next[fj]
 602 |     C = influence_next[fi] * influence_prev[fj]
 603 |     D = influence_next[fi] * influence_next[fj]
 604 |     # and retrieve their indices.
 605 |     pi, ni = indices_prev[fi], indices_next[fi]
 606 |     pj, nj = indices_prev[fj], indices_next[fj]
 607 | 
 608 |     # Now we're ready to mix the fine particle characteristics
 609 |     x_offt = mix(c_x_offt, A, B, C, D, pi, ni, pj, nj)
 610 |     y_offt = mix(c_y_offt, A, B, C, D, pi, ni, pj, nj)
 611 |     x = fine_grid[fi] + x_offt  # x_fine_init
 612 |     y = fine_grid[fj] + y_offt  # y_fine_init
 613 | 
 614 |     # TODO: const m and q
 615 |     m = virtplasma_smallness_factor * mix(c_m, A, B, C, D, pi, ni, pj, nj)
 616 |     q = virtplasma_smallness_factor * mix(c_q, A, B, C, D, pi, ni, pj, nj)
 617 | 
 618 |     px = virtplasma_smallness_factor * mix(c_px, A, B, C, D, pi, ni, pj, nj)
 619 |     py = virtplasma_smallness_factor * mix(c_py, A, B, C, D, pi, ni, pj, nj)
 620 |     pz = virtplasma_smallness_factor * mix(c_pz, A, B, C, D, pi, ni, pj, nj)
 621 |     return x, y, m, q, px, py, pz
 622 | 
 623 | 
 624 | # Deposition #
 625 | 
 626 | @numba.cuda.jit
 627 | def deposit_kernel(grid_steps, grid_step_size, virtplasma_smallness_factor,
 628 |                    c_x_offt, c_y_offt, c_m, c_q, c_px, c_py, c_pz,  # coarse
 629 |                    fine_grid,
 630 |                    influence_prev, influence_next, indices_prev, indices_next,
 631 |                    out_ro, out_jx, out_jy, out_jz):
 632 |     """
 633 |     Interpolate coarse plasma into fine plasma and deposit it on the
 634 |     charge density and current grids.
 635 |     """
 636 |     # Do nothing if our thread does not have a fine particle to deposit.
 637 |     fk = numba.cuda.grid(1)
 638 |     if fk >= fine_grid.size**2:
 639 |         return
 640 |     fi, fj = fk // fine_grid.size, fk % fine_grid.size
 641 | 
 642 |     # Interpolate fine plasma particle from coarse particle characteristics
 643 |     x, y, m, q, px, py, pz = coarse_to_fine(fi, fj, c_x_offt, c_y_offt,
 644 |                                             c_m, c_q, c_px, c_py, c_pz,
 645 |                                             virtplasma_smallness_factor,
 646 |                                             fine_grid,
 647 |                                             influence_prev, influence_next,
 648 |                                             indices_prev, indices_next)
 649 | 
 650 |     # Deposit the resulting fine particle on ro/j grids.
 651 |     gamma_m = sqrt(m**2 + px**2 + py**2 + pz**2)
 652 |     dro = q / (1 - pz / gamma_m)
 653 |     djx = px * (dro / gamma_m)
 654 |     djy = py * (dro / gamma_m)
 655 |     djz = pz * (dro / gamma_m)
 656 | 
 657 |     i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM = weights(
 658 |         x, y, grid_steps, grid_step_size
 659 |     )
 660 |     deposit9(out_ro, i, j, dro, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 661 |     deposit9(out_jx, i, j, djx, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 662 |     deposit9(out_jy, i, j, djy, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 663 |     deposit9(out_jz, i, j, djz, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 664 | 
 665 | 
 666 | def deposit(config, ro_initial, x_offt, y_offt, m, q, px, py, pz, virt_params):
 667 |     """
 668 |     Interpolate coarse plasma into fine plasma and deposit it on the
 669 |     charge density and current grids.
 670 |     This is a convenience wrapper around the ``deposit_kernel`` CUDA kernel.
 671 |     """
 672 |     virtplasma_smallness_factor = 1 / (config.plasma_coarseness *
 673 |                                        config.plasma_fineness)**2
 674 |     ro = cp.zeros((config.grid_steps, config.grid_steps))
 675 |     jx = cp.zeros((config.grid_steps, config.grid_steps))
 676 |     jy = cp.zeros((config.grid_steps, config.grid_steps))
 677 |     jz = cp.zeros((config.grid_steps, config.grid_steps))
 678 |     cfg = int(np.ceil(virt_params.fine_grid.size**2 / WARP_SIZE)), WARP_SIZE
 679 |     deposit_kernel[cfg](config.grid_steps, config.grid_step_size,
 680 |                         virtplasma_smallness_factor,
 681 |                         x_offt, y_offt, m, q, px, py, pz,
 682 |                         virt_params.fine_grid,
 683 |                         virt_params.influence_prev, virt_params.influence_next,
 684 |                         virt_params.indices_prev, virt_params.indices_next,
 685 |                         ro, jx, jy, jz)
 686 |     # Also add the background ion charge density.
 687 |     ro += ro_initial  # Do it last to preserve more float precision
 688 |     numba.cuda.synchronize()
 689 |     return ro, jx, jy, jz
 690 | 
 691 | 
 692 | def initial_deposition(config, x_offt, y_offt, px, py, pz, m, q, virt_params):
 693 |     """
 694 |     Determine the background ion charge density by depositing the electrons
 695 |     with their initial parameters and negating the result.
 696 |     """
 697 |     ro_electrons_initial, _, _, _ = deposit(config, 0, x_offt, y_offt,
 698 |                                             m, q, px, py, pz, virt_params)
 699 |     return -ro_electrons_initial  # Right on the GPU, huh
 700 | 
 701 | 
 702 | # Field interpolation and particle movement (fused) #
 703 | 
 704 | @numba.cuda.jit
 705 | def move_smart_kernel(xi_step_size, reflect_boundary,
 706 |                       grid_step_size, grid_steps,
 707 |                       ms, qs,
 708 |                       x_init, y_init,
 709 |                       prev_x_offt, prev_y_offt,
 710 |                       estimated_x_offt, estimated_y_offt,
 711 |                       prev_px, prev_py, prev_pz,
 712 |                       Ex_avg, Ey_avg, Ez_avg, Bx_avg, By_avg, Bz_avg,
 713 |                       new_x_offt, new_y_offt, new_px, new_py, new_pz):
 714 |     """
 715 |     Update plasma particle coordinates and momenta according to the field
 716 |     values interpolated halfway between the previous plasma particle location
 717 |     and the the best estimation of its next location currently available to us.
 718 |     Also reflect the particles from ``+-reflect_boundary``.
 719 |     """
 720 |     # Do nothing if our thread does not have a coarse particle to move.
 721 |     k = numba.cuda.grid(1)
 722 |     if k >= ms.size:
 723 |         return
 724 | 
 725 |     m, q = ms[k], qs[k]
 726 | 
 727 |     opx, opy, opz = prev_px[k], prev_py[k], prev_pz[k]
 728 |     px, py, pz = opx, opy, opz
 729 |     x_offt, y_offt = prev_x_offt[k], prev_y_offt[k]
 730 | 
 731 |     # Calculate midstep positions and fields in them.
 732 |     x_halfstep = x_init[k] + (prev_x_offt[k] + estimated_x_offt[k]) / 2
 733 |     y_halfstep = y_init[k] + (prev_y_offt[k] + estimated_y_offt[k]) / 2
 734 |     i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM = weights(
 735 |         x_halfstep, y_halfstep, grid_steps, grid_step_size
 736 |     )
 737 |     Ex = interp9(Ex_avg, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 738 |     Ey = interp9(Ey_avg, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 739 |     Ez = interp9(Ez_avg, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 740 |     Bx = interp9(Bx_avg, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 741 |     By = interp9(By_avg, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 742 |     Bz = interp9(Bz_avg, i, j, wMP, w0P, wPP, wM0, w00, wP0, wMM, w0M, wPM)
 743 | 
 744 |     # Move the particles according the the fields
 745 |     gamma_m = sqrt(m**2 + pz**2 + px**2 + py**2)
 746 |     vx, vy, vz = px / gamma_m, py / gamma_m, pz / gamma_m
 747 |     factor_1 = q * xi_step_size / (1 - pz / gamma_m)
 748 |     dpx = factor_1 * (Ex + vy * Bz - vz * By)
 749 |     dpy = factor_1 * (Ey - vx * Bz + vz * Bx)
 750 |     dpz = factor_1 * (Ez + vx * By - vy * Bx)
 751 |     px, py, pz = opx + dpx / 2, opy + dpy / 2, opz + dpz / 2
 752 | 
 753 |     # Move the particles according the the fields again using updated momenta
 754 |     gamma_m = sqrt(m**2 + pz**2 + px**2 + py**2)
 755 |     vx, vy, vz = px / gamma_m, py / gamma_m, pz / gamma_m
 756 |     factor_1 = q * xi_step_size / (1 - pz / gamma_m)
 757 |     dpx = factor_1 * (Ex + vy * Bz - vz * By)
 758 |     dpy = factor_1 * (Ey - vx * Bz + vz * Bx)
 759 |     dpz = factor_1 * (Ez + vx * By - vy * Bx)
 760 |     px, py, pz = opx + dpx / 2, opy + dpy / 2, opz + dpz / 2
 761 | 
 762 |     # Apply the coordinate and momenta increments
 763 |     gamma_m = sqrt(m**2 + pz**2 + px**2 + py**2)
 764 | 
 765 |     x_offt += px / (gamma_m - pz) * xi_step_size  # no mixing with x_init
 766 |     y_offt += py / (gamma_m - pz) * xi_step_size  # no mixing with y_init
 767 | 
 768 |     px, py, pz = opx + dpx, opy + dpy, opz + dpz
 769 | 
 770 |     # Reflect the particles from `+-reflect_boundary`.
 771 |     # TODO: avoid branching?
 772 |     x = x_init[k] + x_offt
 773 |     y = y_init[k] + y_offt
 774 |     if x > +reflect_boundary:
 775 |         x = +2 * reflect_boundary - x
 776 |         x_offt = x - x_init[k]
 777 |         px = -px
 778 |     if x < -reflect_boundary:
 779 |         x = -2 * reflect_boundary - x
 780 |         x_offt = x - x_init[k]
 781 |         px = -px
 782 |     if y > +reflect_boundary:
 783 |         y = +2 * reflect_boundary - y
 784 |         y_offt = y - y_init[k]
 785 |         py = -py
 786 |     if y < -reflect_boundary:
 787 |         y = -2 * reflect_boundary - y
 788 |         y_offt = y - y_init[k]
 789 |         py = -py
 790 | 
 791 |     # Save the results into the output arrays  # TODO: get rid of that
 792 |     new_x_offt[k], new_y_offt[k] = x_offt, y_offt
 793 |     new_px[k], new_py[k], new_pz[k] = px, py, pz
 794 | 
 795 | 
 796 | def move_smart(config,
 797 |                m, q, x_init, y_init, x_prev_offt, y_prev_offt,
 798 |                estimated_x_offt, estimated_y_offt, px_prev, py_prev, pz_prev,
 799 |                Ex_avg, Ey_avg, Ez_avg, Bx_avg, By_avg, Bz_avg):
 800 |     """
 801 |     Update plasma particle coordinates and momenta according to the field
 802 |     values interpolated halfway between the previous plasma particle location
 803 |     and the the best estimation of its next location currently available to us.
 804 |     This is a convenience wrapper around the ``move_smart_kernel`` CUDA kernel.
 805 |     """
 806 |     x_offt_new = cp.zeros_like(x_prev_offt)
 807 |     y_offt_new = cp.zeros_like(y_prev_offt)
 808 |     px_new = cp.zeros_like(px_prev)
 809 |     py_new = cp.zeros_like(py_prev)
 810 |     pz_new = cp.zeros_like(pz_prev)
 811 |     cfg = int(np.ceil(x_init.size / WARP_SIZE)), WARP_SIZE
 812 |     move_smart_kernel[cfg](config.xi_step_size, config.reflect_boundary,
 813 |                            config.grid_step_size, config.grid_steps,
 814 |                            m.ravel(), q.ravel(),
 815 |                            x_init.ravel(), y_init.ravel(),
 816 |                            x_prev_offt.ravel(), y_prev_offt.ravel(),
 817 |                            estimated_x_offt.ravel(), estimated_y_offt.ravel(),
 818 |                            px_prev.ravel(), py_prev.ravel(), pz_prev.ravel(),
 819 |                            Ex_avg, Ey_avg, Ez_avg, Bx_avg, By_avg, Bz_avg,
 820 |                            x_offt_new.ravel(), y_offt_new.ravel(),
 821 |                            px_new.ravel(), py_new.ravel(), pz_new.ravel())
 822 |     numba.cuda.synchronize()
 823 |     return x_offt_new, y_offt_new, px_new, py_new, pz_new
 824 | 
 825 | 
 826 | # The scheme of a single step in xi #
 827 | 
 828 | def step(config, const, virt_params, prev, beam_ro):
 829 |     """
 830 |     Calculate the next iteration of plasma evolution and response.
 831 |     Returns the new state with the following attributes:
 832 |     ``x_offt``, ``y_offt``, ``px``, ``py``, ``pz``,
 833 |     ``Ex``, ``Ey``, ``Ez``, ``Bx``, ``By``, ``Bz``,
 834 |     ``ro``, ``jx``, ``jy``, ``jz``.
 835 |     Pass the returned value as ``prev`` for the next iteration.
 836 |     Wrap it in ``GPUArraysView`` if you want transparent conversion
 837 |     to ``numpy`` arrays.
 838 |     """
 839 |     beam_ro = cp.asarray(beam_ro)  # copy the array is on GPU if it's not there
 840 | 
 841 |     # Estimate the midpoint particle position without knowing the fields yet
 842 |     # TODO: use regular pusher and pass zero fields? previous fields?
 843 |     x_offt, y_offt = move_estimate_wo_fields(config, const.m,
 844 |                                              const.x_init, const.y_init,
 845 |                                              prev.x_offt, prev.y_offt,
 846 |                                              prev.px, prev.py, prev.pz)
 847 | 
 848 |     # Interpolate fields in midpoint and move particles with previous fields.
 849 |     x_offt, y_offt, px, py, pz = move_smart(
 850 |         config, const.m, const.q, const.x_init, const.y_init,
 851 |         prev.x_offt, prev.y_offt, x_offt, y_offt, prev.px, prev.py, prev.pz,
 852 |         # no halfstep-averaged fields yet
 853 |         prev.Ex, prev.Ey, prev.Ez, prev.Bx, prev.By, prev.Bz
 854 |     )
 855 |     # Recalculate the plasma density and currents.
 856 |     ro, jx, jy, jz = deposit(
 857 |         config, const.ro_initial, x_offt, y_offt, const.m, const.q, px, py, pz,
 858 |         virt_params
 859 |     )
 860 | 
 861 |     # Calculate the fields.
 862 |     ro_in = ro if not config.field_solver_variant_A else (ro + prev.ro) / 2
 863 |     jz_in = jz if not config.field_solver_variant_A else (jz + prev.jz) / 2
 864 |     Ex, Ey, Bx, By = calculate_Ex_Ey_Bx_By(config,
 865 |                                            prev.Ex, prev.Ey, prev.Bx, prev.By,
 866 |                                            # no halfstep-averaged fields yet
 867 |                                            beam_ro, ro_in, jx, jy, jz_in,
 868 |                                            prev.jx, prev.jy)
 869 |     if config.field_solver_variant_A:
 870 |         Ex, Ey = 2 * Ex - prev.Ex, 2 * Ey - prev.Ey
 871 |         Bx, By = 2 * Bx - prev.Bx, 2 * By - prev.By
 872 | 
 873 |     Ez = calculate_Ez(config, jx, jy)
 874 |     Bz = calculate_Bz(config, jx, jy)
 875 | 
 876 |     Ex_avg = (Ex + prev.Ex) / 2
 877 |     Ey_avg = (Ey + prev.Ey) / 2
 878 |     Ez_avg = (Ez + prev.Ez) / 2
 879 |     Bx_avg = (Bx + prev.Bx) / 2
 880 |     By_avg = (By + prev.By) / 2
 881 |     Bz_avg = (Bz + prev.Bz) / 2
 882 | 
 883 |     # Repeat the previous procedure using averaged fields.
 884 |     x_offt, y_offt, px, py, pz = move_smart(
 885 |         config, const.m, const.q, const.x_init, const.y_init,
 886 |         prev.x_offt, prev.y_offt, x_offt, y_offt,
 887 |         prev.px, prev.py, prev.pz,
 888 |         Ex_avg, Ey_avg, Ez_avg, Bx_avg, By_avg, Bz_avg
 889 |     )
 890 |     ro, jx, jy, jz = deposit(config, const.ro_initial, x_offt, y_offt,
 891 |                              const.m, const.q, px, py, pz, virt_params)
 892 | 
 893 |     ro_in = ro if not config.field_solver_variant_A else (ro + prev.ro) / 2
 894 |     jz_in = jz if not config.field_solver_variant_A else (jz + prev.jz) / 2
 895 |     Ex, Ey, Bx, By = calculate_Ex_Ey_Bx_By(config,
 896 |                                            Ex_avg, Ey_avg, Bx_avg, By_avg,
 897 |                                            beam_ro, ro_in, jx, jy, jz_in,
 898 |                                            prev.jx, prev.jy)
 899 |     if config.field_solver_variant_A:
 900 |         Ex, Ey = 2 * Ex - prev.Ex, 2 * Ey - prev.Ey
 901 |         Bx, By = 2 * Bx - prev.Bx, 2 * By - prev.By
 902 | 
 903 |     Ez = calculate_Ez(config, jx, jy)
 904 |     Bz = calculate_Bz(config, jx, jy)
 905 | 
 906 |     Ex_avg = (Ex + prev.Ex) / 2
 907 |     Ey_avg = (Ey + prev.Ey) / 2
 908 |     Ez_avg = (Ez + prev.Ez) / 2
 909 |     Bx_avg = (Bx + prev.Bx) / 2
 910 |     By_avg = (By + prev.By) / 2
 911 |     Bz_avg = (Bz + prev.Bz) / 2
 912 | 
 913 |     # Repeat the previous procedure using averaged fields once again.
 914 |     x_offt, y_offt, px, py, pz = move_smart(
 915 |         config, const.m, const.q, const.x_init, const.y_init,
 916 |         prev.x_offt, prev.y_offt, x_offt, y_offt,
 917 |         prev.px, prev.py, prev.pz,
 918 |         Ex_avg, Ey_avg, Ez_avg, Bx_avg, By_avg, Bz_avg
 919 |     )
 920 |     ro, jx, jy, jz = deposit(config, const.ro_initial, x_offt, y_offt,
 921 |                              const.m, const.q, px, py, pz, virt_params)
 922 | 
 923 |     # TODO: what do we need that roj_new for, jx_prev/jy_prev only?
 924 | 
 925 |     # Return the array collection that would serve as `prev` for the next step.
 926 |     new_state = GPUArrays(x_offt=x_offt, y_offt=y_offt, px=px, py=py, pz=pz,
 927 |                           Ex=Ex.copy(), Ey=Ey.copy(), Ez=Ez.copy(),
 928 |                           Bx=Bx.copy(), By=By.copy(), Bz=Bz.copy(),
 929 |                           ro=ro, jx=jx, jy=jy, jz=jz)
 930 | 
 931 |     return new_state
 932 | 
 933 | 
 934 | # Array initialization #
 935 | 
 936 | def init(config):
 937 |     """
 938 |     Initialize all the arrays needed for ``step`` and ``config.beam``.
 939 |     """
 940 | 
 941 |     assert config.grid_steps % 2 == 1
 942 | 
 943 |     # virtual particles should not reach the window pre-boundary cells
 944 |     assert config.reflect_padding_steps > config.plasma_coarseness + 1
 945 |     # the (costly) alternative is to reflect after plasma virtualization
 946 | 
 947 |     config.reflect_boundary = config.grid_step_size * (
 948 |         config.grid_steps / 2 - config.reflect_padding_steps
 949 |     )
 950 | 
 951 |     grid = ((np.arange(config.grid_steps) - config.grid_steps // 2)
 952 |             * config.grid_step_size)
 953 |     xs, ys = grid[:, None], grid[None, :]
 954 | 
 955 |     x_init, y_init, x_offt, y_offt, px, py, pz, m, q, virt_params = \
 956 |         make_plasma(config.grid_steps - config.plasma_padding_steps * 2,
 957 |                     config.grid_step_size,
 958 |                     coarseness=config.plasma_coarseness,
 959 |                     fineness=config.plasma_fineness)
 960 | 
 961 |     ro_initial = initial_deposition(config, x_offt, y_offt,
 962 |                                     px, py, pz, m, q, virt_params)
 963 | 
 964 |     const = GPUArrays(m=m, q=q, x_init=x_init, y_init=y_init,
 965 |                       ro_initial=ro_initial)
 966 | 
 967 |     def zeros():
 968 |         return cp.zeros((config.grid_steps, config.grid_steps))
 969 | 
 970 |     state = GPUArrays(x_offt=x_offt, y_offt=y_offt, px=px, py=py, pz=pz,
 971 |                       Ex=zeros(), Ey=zeros(), Ez=zeros(),
 972 |                       Bx=zeros(), By=zeros(), Bz=zeros(),
 973 |                       ro=zeros(), jx=zeros(), jy=zeros(), jz=zeros())
 974 | 
 975 |     return xs, ys, const, virt_params, state
 976 | 
 977 | 
 978 | # Some really sloppy diagnostics #
 979 | 
 980 | max_zn = 0
 981 | def diags_ro_zn(config, ro):
 982 |     global max_zn
 983 | 
 984 |     sigma = 0.25 / config.grid_step_size
 985 |     blurred = scipy.ndimage.gaussian_filter(ro, sigma=sigma)
 986 |     hf = ro - blurred
 987 |     zn = np.abs(hf).mean() / 4.23045376e-04
 988 |     max_zn = max(max_zn, zn)
 989 |     return max_zn
 990 | 
 991 | 
 992 | def diags_peak_msg(Ez_00_history):
 993 |     Ez_00_array = np.array(Ez_00_history)
 994 |     peak_indices = scipy.signal.argrelmax(Ez_00_array)[0]
 995 | 
 996 |     if peak_indices.size:
 997 |         peak_values = Ez_00_array[peak_indices]
 998 |         rel_deviations_perc = 100 * (peak_values / peak_values[0] - 1)
 999 |         return (f'{peak_values[-1]:0.4e} '
1000 |                 f'{rel_deviations_perc[-1]:+0.2f}%')
1001 |     else:
1002 |         return '...'
1003 | 
1004 | 
1005 | def diags_ro_slice(config, xi_i, xi, ro):
1006 |     if xi_i % int(1 / config.xi_step_size):
1007 |         return
1008 |     if not os.path.isdir('transverse'):
1009 |         os.mkdir('transverse')
1010 | 
1011 |     fname = f'ro_{xi:+09.2f}.png' if xi else 'ro_-00000.00.png'
1012 |     plt.imsave(os.path.join('transverse', fname), ro.T,
1013 |                origin='lower', vmin=-0.1, vmax=0.1, cmap='bwr')
1014 | 
1015 | 
1016 | def diagnostics(view_state, config, xi_i, Ez_00_history):
1017 |     xi = -xi_i * config.xi_step_size
1018 | 
1019 |     Ez_00 = Ez_00_history[-1]
1020 |     peak_report = diags_peak_msg(Ez_00_history)
1021 | 
1022 |     ro = view_state.ro
1023 |     max_zn = diags_ro_zn(config, ro)
1024 |     diags_ro_slice(config, xi_i, xi, ro)
1025 | 
1026 |     print(f'xi={xi:+.4f} {Ez_00:+.4e}|{peak_report}|zn={max_zn:.3f}')
1027 |     sys.stdout.flush()
1028 | 
1029 | 
1030 | # Main loop #
1031 | 
1032 | def main():
1033 |     import config
1034 |     with cp.cuda.Device(config.gpu_index):
1035 | 
1036 |         xs, ys, const, virt_params, state = init(config)
1037 |         Ez_00_history = []
1038 | 
1039 |         for xi_i in range(config.xi_steps):
1040 |             beam_ro = config.beam(xi_i, xs, ys)
1041 | 
1042 |             state = step(config, const, virt_params, state, beam_ro)
1043 |             view_state = GPUArraysView(state)
1044 | 
1045 |             ez = view_state.Ez[config.grid_steps // 2, config.grid_steps // 2]
1046 |             Ez_00_history.append(ez)
1047 | 
1048 |             time_for_diags = xi_i % config.diagnostics_each_N_steps == 0
1049 |             last_step = xi_i == config.xi_steps - 1
1050 |             if time_for_diags or last_step:
1051 |                 diagnostics(view_state, config, xi_i, Ez_00_history)
1052 | 
1053 | 
1054 | if __name__ == '__main__':
1055 |     main()
1056 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cupy>=5.1
2 | matplotlib>=1.4
3 | numba>=0.41
4 | numpy>=1.8
5 | scipy>=0.14
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = lcode
 3 | author = LCODE Team
 4 | author-email = team@lcode.info
 5 | summary = Particle beam-driven plasma wakefield acceleration simulator
 6 | description-file = README.md
 7 | home-page = https://lcode.info
 8 | license = 'AGPL-v3+'
 9 | classifier =
10 |     Development Status :: 2 - Pre-Alpha
11 |     Environment :: Console
12 |     Intended Audience :: Education
13 |     Intended Audience :: Science/Research
14 |     License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
15 |     Programming Language :: Cython
16 |     Programming Language :: Python :: 3 :: Only
17 |     Programming Language :: Python :: 3.5
18 |     Topic :: Scientific/Engineering :: Physics
19 | keywords =
20 |     PWFA
21 |     particle beam
22 |     physics
23 |     plasma
24 |     simulation
25 |     wakefield acceleration
26 | 
27 | requires-python = >=3.5
28 | setup_requires_dist =
29 |     pbr>=1.9
30 | 
31 | [entry_points]
32 | console_scripts =
33 |     lcode = lcode:main
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A setuptools based setup module for LCODE.
 3 | It uses pbr (http://docs.openstack.org/developer/pbr),
 4 | so most of the packaging metadata is moved to
 5 | setup.cfg and requirements.txt.
 6 | """
 7 | 
 8 | from setuptools import setup
 9 | 
10 | 
11 | setup(
12 |     setup_requires=[
13 |         'pbr>=1.9',
14 |     ],
15 |     pbr=True,
16 | )
17 | 


--------------------------------------------------------------------------------
/shell.nix:
--------------------------------------------------------------------------------
 1 | { pkgs ? import <nixpkgs> {} }:
 2 | 
 3 | let
 4 |   cupy = pkgs.python3Packages.buildPythonPackage rec {
 5 |     pname = "cupy";
 6 |     version = "6.0.0rc1";
 7 |     src = pkgs.python3Packages.fetchPypi {
 8 |       inherit pname version;
 9 |       sha256 = "0pbw872f4m4jck4z114xdgs084ah5vin4chki9j6b17ybvx9jnxw";
10 |     };
11 |     propagatedBuildInputs = with pkgs; [
12 |       cudatoolkit cudnn linuxPackages.nvidia_x11 nccl
13 |     ] ++ (with pkgs.python3Packages; [
14 |       fastrlock numpy six wheel
15 |     ]);
16 |     doCheck = false;
17 |   };
18 | in
19 | pkgs.mkShell {
20 |   buildInputs = with pkgs; [
21 |     (python3.withPackages (ps: with ps; [
22 |       cupy
23 |       matplotlib
24 |       numba
25 |       numpy
26 |       scipy
27 |       sphinx sphinx_rtd_theme recommonmark
28 |     ]))
29 |     cloc
30 |     python3Packages.flake8
31 |     nvtop
32 |   ];
33 | 
34 |   NUMBA_FORCE_CUDA_CC = "6.1";
35 |   NUMBA_WARNINGS = 1;
36 |   NUMBAPRO_NVVM = "${pkgs.cudatoolkit}/nvvm/lib64/libnvvm.so";
37 |   NUMBAPRO_LIBDEVICE = "${pkgs.cudatoolkit}/nvvm/libdevice";
38 |   NUMBAPRO_CUDA_DRIVER = "${pkgs.linuxPackages.nvidia_x11}/lib/libcuda.so";
39 | 
40 |   #OMP_NUM_THREADS = 1;
41 |   #NUMBA_NUM_THREADS = 1;
42 |   #MKL_NUM_THREADS = 1;
43 | }
44 | 


--------------------------------------------------------------------------------