├── _config.yml
├── docs
    ├── requirements.txt
    ├── source
    │   ├── modules.rst
    │   ├── Nbody_simulation_9_1.png
    │   ├── usage
    │   │   ├── CPU_Time_serial.png
    │   │   ├── installation.rst
    │   │   ├── quickstart.md
    │   │   └── quickstart.rst
    │   ├── frontend_API.rst
    │   ├── community.rst
    │   ├── index.rst
    │   ├── pytreegrav.rst
    │   ├── conf.py
    │   └── Nbody_simulation.rst
    ├── Makefile
    └── make.bat
├── src
    └── pytreegrav
    │   ├── __init__.py
    │   ├── kernel.py
    │   ├── misc.py
    │   ├── kdtree
    │       ├── kdtree.py
    │       └── treewalk.py
    │   ├── bruteforce.py
    │   ├── octree.py
    │   ├── dynamic_tree.py
    │   ├── frontend.py
    │   └── treewalk.py
├── requirements.txt
├── images
    ├── CPU_Time_both.png
    ├── CPU_Time_parallel.png
    └── CPU_Time_serial.png
├── pyproject.toml
├── tests
    ├── __pycache__
    │   └── test.cpython-38-pytest-6.2.5.pyc
    └── tree_test.py
├── .github
    └── workflows
    │   └── python-package.yml
├── LICENSE.txt
├── .readthedocs.yaml
├── setup.py
├── examples
    ├── benchmark.py
    └── cuda_test.ipynb
├── paper.md
├── README.ipynb
├── README.md
└── paper.bib


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx_rtd_theme
2 | numba
3 | numpy
4 | scipy
5 | healpy
6 | 


--------------------------------------------------------------------------------
/src/pytreegrav/__init__.py:
--------------------------------------------------------------------------------
1 | from .octree import *
2 | from .frontend import *
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | healpy
2 | numba
3 | numpy
4 | scipy
5 | pytest
6 | pyerfa>=2.0.1.4
7 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | src
2 | ===
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    pytreegrav
8 | 


--------------------------------------------------------------------------------
/images/CPU_Time_both.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/images/CPU_Time_both.png


--------------------------------------------------------------------------------
/images/CPU_Time_parallel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/images/CPU_Time_parallel.png


--------------------------------------------------------------------------------
/images/CPU_Time_serial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/images/CPU_Time_serial.png


--------------------------------------------------------------------------------
/docs/source/Nbody_simulation_9_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/docs/source/Nbody_simulation_9_1.png


--------------------------------------------------------------------------------
/docs/source/usage/CPU_Time_serial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/docs/source/usage/CPU_Time_serial.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/docs/source/frontend_API.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | =================
3 | 
4 | .. automodule:: pytreegrav.frontend
5 |    :noindex:
6 |    :members:
7 | 


--------------------------------------------------------------------------------
/tests/__pycache__/test.cpython-38-pytest-6.2.5.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/tests/__pycache__/test.cpython-38-pytest-6.2.5.pyc


--------------------------------------------------------------------------------
/docs/source/community.rst:
--------------------------------------------------------------------------------
1 | Feedback, Support, and Contributions
2 | ====================================
3 | 
4 | To contribute to pytreegrav, report an issue, or seek support, please initiate a pull request or issue through the project `project github <https://github.com/mikegrudic/pytreegrav>`_
5 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: black-action
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   linter_name:
 5 |     name: runner / black formatter
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: actions/checkout@v4
 9 |       - uses: rickstaa/action-black@v1
10 |         with:
11 |           black_args: "-l 119 ."


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. pytreegrav documentation master file, created by
 2 |    sphinx-quickstart on Mon Nov 22 10:52:56 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to pytreegrav's documentation!
 7 | ======================================
 8 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation, without writing a single line of C or Cython.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 2
12 |    :caption: Contents:
13 | 
14 |    usage/installation
15 |    usage/quickstart
16 |    Nbody_simulation
17 |    frontend_API
18 |    community
19 | 
20 | Indices and tables
21 | ==================
22 | 
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 | 


--------------------------------------------------------------------------------
/tests/tree_test.py:
--------------------------------------------------------------------------------
 1 | # simple test program for the tree solver: computes acceleration and potential and checks that it is as accurate as expected
 2 | 
 3 | import numpy as np
 4 | from pytreegrav import Accel, Potential
 5 | from time import time
 6 | 
 7 | 
 8 | def test_answer():
 9 |     # generate points
10 |     np.random.seed(42)
11 |     N = 4 * 10**4
12 |     x = np.random.rand(N, 3)
13 |     m = np.ones(N) / N
14 |     h = np.repeat(0.01, N)
15 | 
16 |     accel_tree = Accel(x, m, h, method="tree", parallel=True)
17 |     accel_bruteforce = Accel(x, m, h, method="bruteforce", parallel=True)
18 |     phi_tree = Potential(x, m, h, method="tree", parallel=True)
19 |     phi_bruteforce = Potential(x, m, h, method="bruteforce", parallel=True)
20 | 
21 |     acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1)))  # RMS force error
22 |     print("RMS force error: ", acc_error)
23 |     phi_error = np.std(phi_tree - phi_bruteforce)
24 |     print("RMS potential error: ", phi_error)
25 |     np.save("phi_error.npy", phi_tree - phi_bruteforce)
26 |     assert acc_error < 0.02
27 |     assert phi_error < 0.02
28 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Michael Y. Grudić
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.11"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: docs/source/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #    - pdf
28 | #    - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | python:
34 |     install:
35 |     - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools, os
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | thelibFolder = os.path.dirname(os.path.realpath(__file__))
 7 | requirementPath = thelibFolder + "/requirements.txt"
 8 | install_requires = []
 9 | if os.path.isfile(requirementPath):
10 |     with open(requirementPath) as f:
11 |         install_requires = f.read().splitlines()
12 | 
13 | setuptools.setup(
14 |     name="pytreegrav",
15 |     version="1.1.6",
16 |     author="Mike Grudic",
17 |     author_email="mike.grudich@gmail.com",
18 |     description="Fast approximate gravitational force and potential calculations",
19 |     long_description=long_description,
20 |     long_description_content_type="text/markdown",
21 |     project_urls={
22 |         "Bug Tracker": "https://github.com/mikegrudic/pytreegrav",
23 |     },
24 |     classifiers=[
25 |         "Programming Language :: Python :: 3",
26 |         "License :: OSI Approved :: MIT License",
27 |         "Operating System :: OS Independent",
28 |     ],
29 |     package_dir={"": "src"},
30 |     packages=setuptools.find_packages(where="src"),
31 |     python_requires=">=3.6",
32 |     install_requires=install_requires,
33 | )
34 | 


--------------------------------------------------------------------------------
/docs/source/pytreegrav.rst:
--------------------------------------------------------------------------------
 1 | pytreegrav package
 2 | ==================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | pytreegrav.bruteforce module
 8 | ----------------------------
 9 | 
10 | .. automodule:: pytreegrav.bruteforce
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | pytreegrav.dynamic\_tree module
16 | -------------------------------
17 | 
18 | .. automodule:: pytreegrav.dynamic_tree
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | pytreegrav.frontend module
24 | --------------------------
25 | 
26 | .. automodule:: pytreegrav.frontend
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | pytreegrav.kernel module
32 | ------------------------
33 | 
34 | .. automodule:: pytreegrav.kernel
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | pytreegrav.octree module
40 | ------------------------
41 | 
42 | .. automodule:: pytreegrav.octree
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | pytreegrav.treewalk module
48 | --------------------------
49 | 
50 | .. automodule:: pytreegrav.treewalk
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | Module contents
56 | ---------------
57 | 
58 | .. automodule:: pytreegrav
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 


--------------------------------------------------------------------------------
/src/pytreegrav/kernel.py:
--------------------------------------------------------------------------------
 1 | from numba import njit, float64, float32
 2 | 
 3 | 
 4 | @njit(fastmath=True)  # ([float64(float64,float64),float32(float32,float32)])
 5 | def ForceKernel(r, h):
 6 |     """
 7 |     Returns the quantity equivalent to (fraction of mass enclosed)/ r^3 for a cubic-spline mass distribution of compact support radius h. Used to calculate the softened gravitational force.
 8 | 
 9 |     Arguments:
10 |     r - radius
11 |     h - softening
12 |     """
13 |     if r > h:
14 |         return 1.0 / (r * r * r)
15 |     hinv = 1.0 / h
16 |     q = r * hinv
17 |     if q <= 0.5:
18 |         return (10.666666666666666666 + q * q * (-38.4 + 32.0 * q)) * hinv * hinv * hinv
19 |     else:
20 |         return (
21 |             (21.333333333333 - 48.0 * q + 38.4 * q * q - 10.666666666667 * q * q * q - 0.066666666667 / (q * q * q))
22 |             * hinv
23 |             * hinv
24 |             * hinv
25 |         )
26 | 
27 | 
28 | @njit(fastmath=True)  # ([float64(float64,float64)])
29 | def PotentialKernel(r, h):
30 |     """
31 |     Returns the equivalent of -1/r for a cubic-spline mass distribution of compact support radius h. Used to calculate the softened gravitational potential.
32 | 
33 |     Arguments:
34 |     r - radius
35 |     h - softening
36 |     """
37 |     if h == 0.0:
38 |         return -1.0 / r
39 |     hinv = 1.0 / h
40 |     q = r * hinv
41 |     if q <= 0.5:
42 |         return (-2.8 + q * q * (5.33333333333333333 + q * q * (6.4 * q - 9.6))) * hinv
43 |     elif q <= 1:
44 |         return (
45 |             -3.2
46 |             + 0.066666666666666666666 / q
47 |             + q * q * (10.666666666666666666666 + q * (-16.0 + q * (9.6 - 2.1333333333333333333333 * q)))
48 |         ) * hinv
49 |     else:
50 |         return -1.0 / r
51 | 


--------------------------------------------------------------------------------
/src/pytreegrav/misc.py:
--------------------------------------------------------------------------------
 1 | from numba import njit
 2 | import numpy as np
 3 | from numpy import zeros, sqrt
 4 | 
 5 | 
 6 | @njit(fastmath=True)
 7 | def random_rotation(seed):
 8 |     """Returns a random rotation matrix reproducibly, given a random seed
 9 | 
10 |     Parameters
11 |     ----------
12 |     seed: int
13 |     Random seed
14 | 
15 |     Returns
16 |     -------
17 |     rotation_matrix: array_like
18 |     3x3 array of random rotation matrix entries
19 |     """
20 | 
21 |     rotation_matrix = zeros((3, 3))
22 |     np.random.seed(seed)
23 |     # generate x axis
24 |     costheta = np.random.uniform(-1, 1)
25 |     sintheta = sqrt(max(1 - costheta * costheta, 0))
26 |     phi = 2 * np.pi * np.random.uniform(0,1)
27 |     rotation_matrix[0] = sintheta * np.cos(phi), sintheta * np.sin(phi), costheta
28 | 
29 |     # generate independent y axis and orthogonalize
30 |     costheta = np.random.uniform(-1, 1)
31 |     sintheta = sqrt(max(1 - costheta * costheta, 0))
32 |     phi = 2 * np.pi * np.random.uniform(0,1)
33 |     rotation_matrix[1] = sintheta * np.cos(phi), sintheta * np.sin(phi), costheta
34 | 
35 |     sum = 0
36 |     for k in range(3):  # dot product
37 |         sum += rotation_matrix[0, k] * rotation_matrix[1, k]
38 |     for k in range(3):  # deproject
39 |         rotation_matrix[1, k] -= sum * rotation_matrix[0, k]
40 |     sum = 0
41 |     for k in range(3):  # normalize
42 |         sum += rotation_matrix[1, k] * rotation_matrix[1, k]
43 |     sum = sqrt(sum)
44 |     for k in range(3):
45 |         rotation_matrix[1, k] /= sum
46 | 
47 |     # now z axis is the cross product
48 |     for i in range(3):
49 |         j, k = (i + 1) % 3, (i + 2) % 3
50 |         rotation_matrix[2, i] = (
51 |             rotation_matrix[0, j] * rotation_matrix[1, k] - rotation_matrix[1, j] * rotation_matrix[0, k]
52 |         )
53 | 
54 |     return rotation_matrix
55 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | 
16 | sys.path.insert(0, os.path.abspath("../../src"))
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = "pytreegrav"
22 | copyright = "2021, Mike Grudic"
23 | author = "Mike Grudic"
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | release = "1.2.1"
27 | 
28 | 
29 | # -- General configuration ---------------------------------------------------
30 | 
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = ["sphinx.ext.napoleon", "sphinx_rtd_theme", "sphinx.ext.autodoc"]
35 | 
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ["_templates"]
38 | 
39 | # List of patterns, relative to source directory, that match files and
40 | # directories to ignore when looking for source files.
41 | # This pattern also affects html_static_path and html_extra_path.
42 | exclude_patterns = []
43 | 
44 | 
45 | # -- Options for HTML output -------------------------------------------------
46 | 
47 | # The theme to use for HTML and HTML Help pages.  See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = "sphinx_rtd_theme"
51 | 
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ["_static"]
56 | 


--------------------------------------------------------------------------------
/docs/source/usage/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _install: 
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | The below will help you quickly install pytreegrav.
 7 | 
 8 | Requirements
 9 | ------------
10 | 
11 | You will need a working Python 3.x installation; we recommend installing `Anaconda <https://www.anaconda.com/download/>`_ Python version 3.x.
12 | You will also need to install the following packages:
13 | 
14 |     * numpy
15 | 
16 |     * numba
17 | 
18 | Installing the latest stable release
19 | ------------------------------------
20 | 
21 | Install the latest stable release with
22 | 
23 | .. code-block:: bash
24 | 
25 |     pip install pytreegrav
26 | 
27 | This is the preferred way to install pytreegrav as it will
28 | automatically install the necessary requirements and put Pytreegrav
29 | into your :code:`${PYTHONPATH}` environment variable so you can 
30 | import it.
31 | 
32 | Install from source
33 | -------------------
34 | 
35 | Alternatively, you can install the latest version directly from the most up-to-date version
36 | of the source-code by cloning/forking the GitHub repository 
37 | 
38 | .. code-block:: bash
39 | 
40 |     git clone https://github.com/mikegrudic/pytreegrav.git
41 | 
42 | 
43 | Once you have the source, you can build pytreegrav (and add it to your environment)
44 | by executing
45 | 
46 | .. code-block:: bash
47 | 
48 |     python setup.py install
49 | 
50 | or
51 | 
52 | .. code-block:: bash
53 | 
54 |     pip install -e .
55 | 
56 | in the top level directory. The required Python packages will automatically be 
57 | installed as well.
58 | 
59 | You can test your installation by looking for the pytreegrav 
60 | executable built by the installation
61 | 
62 | .. code-block:: bash
63 | 
64 |     which pytreegrav
65 | 
66 | and by importing the pytreegrav Python frontend in Python
67 | 
68 | .. code-block:: python
69 | 
70 |     import pytreegrav
71 | 
72 | Testing
73 | -------
74 | 
75 | To test that the tree solver is working correctly, run
76 | 
77 | .. code-block:: bash
78 | 
79 |     pytest
80 | 
81 | from the root directory of the package. This will run a basic test problem comparing the acceleration and potential from the tree and brute force solvers respectively, and check that the answers are within the expected tolerance.
82 | 


--------------------------------------------------------------------------------
/examples/benchmark.py:
--------------------------------------------------------------------------------
 1 | # script to run the Plummer sphere benchmark from the JOSS paper: time how long it takes to run acceleration and potential solves for tree and bruteforce methods, and plot the results
 2 | 
 3 | from pytreegrav import *
 4 | import numpy as np
 5 | from time import time
 6 | from matplotlib import pyplot as plt
 7 | import palettable
 8 | 
 9 | parallel = True
10 | theta = 0.7
11 | soft = 0.0
12 | N = 2 ** np.arange(6, 28)
13 | t1 = []
14 | t2 = []
15 | t3 = []
16 | t4 = []
17 | force_error = []
18 | phi_error = []
19 | x = np.random.rand(10**1, 3)
20 | m = np.random.rand(10**1)
21 | Accel(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="tree")
22 | Accel(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="bruteforce")
23 | # BruteForceAccel(x,m,np.repeat(soft,len(m)))
24 | Potential(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="tree")
25 | Potential(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="bruteforce")
26 | 
27 | fig, ax = plt.subplots(figsize=(4, 4))
28 | ax.set_prop_cycle("color", palettable.colorbrewer.qualitative.Dark2_4.mpl_colors)
29 | for n in N:
30 |     print(n)
31 |     x = np.random.rand(n)
32 |     r = np.sqrt(x ** (2.0 / 3) * (1 + x ** (2.0 / 3) + x ** (4.0 / 3)) / (1 - x**2))
33 |     phi_exact = -((1 + r**2) ** -0.5)
34 |     x = np.random.normal(size=(n, 3))
35 |     x = (x.T * r / np.sum(x**2, axis=1) ** 0.5).T
36 |     m = np.repeat(1.0 / n, n)
37 |     h = np.ones_like(m) * soft
38 |     t = time()
39 |     phitree = Potential(x, m, h, parallel=parallel, theta=theta, method="tree")
40 |     t = time() - t
41 |     t1.append(t)
42 |     t = time()
43 |     atree = Accel(x, m, h, parallel=parallel, theta=theta, method="tree")
44 |     print(atree)
45 |     t = time() - t
46 |     t2.append(t)
47 |     if n < 64**3:
48 |         t = time()
49 |         phibrute = Potential(x, m, h, parallel=parallel, theta=theta, method="bruteforce")
50 |         t = time() - t
51 |         t3.append(t)
52 |         phi_error.append(np.std((phitree - phibrute) / phibrute))
53 |         t = time()
54 |         abrute = Accel(x, m, h, parallel=parallel, theta=theta, method="bruteforce")
55 |         t = time() - t
56 |         t4.append(t)
57 |         amag = (np.sum(abrute**2, axis=1) + np.sum(atree**2, axis=1)) / 2
58 |         aerror = np.sum((abrute - atree) ** 2, axis=1)
59 |         force_error.append((aerror / amag).mean() ** 0.5)
60 |         print(force_error[-1])
61 |     else:
62 |         t4.append(np.nan)
63 |         t3.append(np.nan)
64 |         force_error.append(np.nan)
65 |         phi_error.append(np.nan)
66 | 
67 | 
68 | ax.loglog(N, np.array(t1) / N, label="Potential (Tree)")
69 | ax.loglog(N, np.array(t2) / N, label="Acceleration (Tree)")
70 | ax.loglog(N, np.array(t3) / N, label="Potential (Brute Force)")
71 | ax.loglog(N, np.array(t4) / N, label="Acceleration (Brute Force)")
72 | ax.legend(labelspacing=0.1, frameon=True)
73 | ax.set_ylabel("Time per particle (s)")
74 | ax.set_xlabel("Number of particles")
75 | plt.savefig("CPU_Time.png", bbox_inches="tight")
76 | plt.clf()
77 | plt.loglog(N, phi_error, label="Potential error")
78 | plt.loglog(N, force_error, label="Acceleration error")
79 | print(force_error, phi_error)
80 | plt.legend()
81 | plt.savefig("Errors.png", bbox_inches="tight")
82 | 


--------------------------------------------------------------------------------
/src/pytreegrav/kdtree/kdtree.py:
--------------------------------------------------------------------------------
  1 | from numba import int32, deferred_type, optional, float64, boolean, int64, njit, jit, prange, types
  2 | from numba.experimental import jitclass
  3 | import numpy as np
  4 | from numpy import empty, empty_like, zeros, zeros_like, sqrt
  5 | from numba.typed import List
  6 | 
  7 | node_type = deferred_type()
  8 | 
  9 | spec = [
 10 |     ("bounds", float64[:, :]),
 11 |     ("size", float64),
 12 |     ("delta", float64),
 13 |     ("points", float64[:, :]),
 14 |     ("masses", float64[:]),
 15 |     ("Npoints", int64),
 16 |     ("h", float64),
 17 |     ("softening", float64[:]),
 18 |     ("mass", float64),
 19 |     ("COM", float64[:]),
 20 |     ("IsLeaf", boolean),
 21 |     ("HasLeft", boolean),
 22 |     ("HasRight", boolean),
 23 |     ("left", optional(node_type)),
 24 |     ("right", optional(node_type)),
 25 | ]
 26 | 
 27 | 
 28 | @jitclass(spec)
 29 | class KDNode(object):
 30 |     def __init__(self, points, masses, softening):
 31 |         self.bounds = empty((3, 2))
 32 |         self.bounds[0, 0] = points[:, 0].min()
 33 |         self.bounds[0, 1] = points[:, 0].max()
 34 |         self.bounds[1, 0] = points[:, 1].min()
 35 |         self.bounds[1, 1] = points[:, 1].max()
 36 |         self.bounds[2, 0] = points[:, 2].min()
 37 |         self.bounds[2, 1] = points[:, 2].max()
 38 | 
 39 |         self.softening = softening
 40 |         self.h = self.softening.max()
 41 | 
 42 |         self.size = max(
 43 |             self.bounds[0, 1] - self.bounds[0, 0],
 44 |             self.bounds[1, 1] - self.bounds[1, 0],
 45 |             self.bounds[2, 1] - self.bounds[2, 0],
 46 |         )
 47 |         self.points = points
 48 |         self.Npoints = points.shape[0]
 49 |         self.masses = masses
 50 |         self.mass = np.sum(masses)
 51 |         self.delta = 0.0
 52 |         if self.Npoints == 1:
 53 |             self.IsLeaf = True
 54 |             self.COM = points[0]
 55 |         else:
 56 |             self.IsLeaf = False
 57 |             self.COM = zeros(3)
 58 |             for k in range(3):
 59 |                 for i in range(self.Npoints):
 60 |                     self.COM[k] += points[i, k] * masses[i]
 61 |                 self.COM[k] /= self.mass
 62 |                 self.delta += (0.5 * (self.bounds[k, 1] + self.bounds[k, 0]) - self.COM[k]) ** 2
 63 |             self.delta = sqrt(self.delta)
 64 | 
 65 |         self.HasLeft = False
 66 |         self.HasRight = False
 67 |         self.left = None
 68 |         self.right = None
 69 | 
 70 |     def GenerateChildren(self, axis):
 71 |         if self.IsLeaf:
 72 |             return 0
 73 |         x = self.points[:, axis]
 74 |         med = (self.bounds[axis, 0] + self.bounds[axis, 1]) / 2
 75 |         index = x < med
 76 |         if np.any(index):
 77 |             self.left = KDNode(self.points[index], self.masses[index], self.softening[index])
 78 |             self.HasLeft = True
 79 |         index = np.invert(index)
 80 |         if np.any(index):
 81 |             self.right = KDNode(self.points[index], self.masses[index], self.softening[index])
 82 |             self.HasRight = True
 83 |         self.points = empty((1, 1))
 84 |         self.masses = empty(1)
 85 |         self.softening = empty(1)
 86 |         return 1
 87 | 
 88 | 
 89 | node_type.define(KDNode.class_type.instance_type)
 90 | 
 91 | 
 92 | @njit
 93 | def ConstructKDTree(x, m, softening):
 94 |     if len(np.unique(x[:, 0])) < len(x):
 95 |         raise Exception(
 96 |             "Non-unique particle positions are currently not supported by the tree-building algorithm. Consider perturbing your positions with a bit of noise if you really want to proceed."
 97 |         )
 98 |     root = KDNode(x, m, softening)
 99 |     nodes = [
100 |         root,
101 |     ]
102 |     axis = 0
103 |     divisible_nodes = 1
104 |     count = 0
105 |     while divisible_nodes > 0:
106 |         N = len(nodes)
107 |         divisible_nodes = 0
108 |         for i in range(count, N):  # loop through the nodes we spawned in the previous pass
109 |             count += 1
110 |             if nodes[i].IsLeaf:
111 |                 continue
112 |             else:
113 |                 generated_children = nodes[i].GenerateChildren(axis)
114 |                 divisible_nodes += generated_children
115 |                 if nodes[i].HasLeft:
116 |                     nodes.append(nodes[i].left)
117 |                 if nodes[i].HasRight:
118 |                     nodes.append(nodes[i].right)
119 | 
120 |         axis = (axis + 1) % 3
121 |     return root
122 | 


--------------------------------------------------------------------------------
/docs/source/Nbody_simulation.rst:
--------------------------------------------------------------------------------
  1 | Example: N-body simulation
  2 | ==========================
  3 | 
  4 | Here we provide a simple example of an N-body integrator implemented
  5 | using force and potential evaluation routines from pytreegrav. If you
  6 | were writing a more serious simulation code you would want to adopt a
  7 | more modular, object-oriented approach, but this suffices to demonstrate
  8 | the use of pytreegrav.
  9 | 
 10 | Initial Conditions
 11 | ------------------
 12 | 
 13 | We first make a function to initialize some particles in a Gaussian
 14 | blob. You can try modifying the IC generator and playing around with the
 15 | initial velocity and geometry for extra fun. We also write a function to
 16 | evaluate the total energy, which is conserved down to tree-force and
 17 | integration errors.
 18 | 
 19 | .. code:: ipython3
 20 | 
 21 |     %pylab
 22 |     from pytreegrav import Accel, Potential
 23 |     
 24 |     def GenerateICs(N,seed=42):
 25 |         np.random.seed(seed) # seed the RNG for reproducibility
 26 |         pos = np.random.normal(size=(N,3)) # positions of particles
 27 |         pos -= np.average(pos,axis=0) # put center of mass at the origin
 28 |         vel = np.zeros_like(pos) # initialize at rest
 29 |         vel -= np.average(vel,axis=0) # make average velocity 0
 30 |         softening = np.repeat(0.1,N) # initialize softening to 0.1 
 31 |         masses = np.repeat(1./N,N) # make the system have unit mass
 32 |         return pos, masses, vel, softening
 33 |     
 34 |     def TotalEnergy(pos, masses, vel, softening):
 35 |         kinetic = 0.5 * np.sum(masses[:,None] * vel**2)
 36 |         potential = 0.5 * np.sum(masses * Potential(pos,masses,softening,parallel=True))
 37 |         return kinetic + potential
 38 | 
 39 | 
 40 | .. parsed-literal::
 41 | 
 42 |     Using matplotlib backend: MacOSX
 43 |     Populating the interactive namespace from numpy and matplotlib
 44 | 
 45 | 
 46 | Stepper function
 47 | ----------------
 48 | 
 49 | Now let’s define the basic timestep for a leapfrog integrator, put in
 50 | the Hamiltonian split kick-drift-kick form (e.g. Springel 2005).
 51 | 
 52 | .. code:: ipython3
 53 | 
 54 |     def leapfrog_kdk_timestep(dt, pos, masses, softening, vel, accel):
 55 |         # first a half-step kick
 56 |         vel[:] = vel + 0.5 * dt * accel # note that you must slice arrays to modify them in place in the function!
 57 |         # then full-step drift
 58 |         pos[:] = pos + dt * vel
 59 |         # then recompute accelerations
 60 |         accel[:] = Accel(pos,masses,softening,parallel=True)
 61 |         # then another half-step kick
 62 |         vel[:] = vel + 0.5 * dt * accel  
 63 | 
 64 | Main simulation loop
 65 | --------------------
 66 | 
 67 | .. code:: ipython3
 68 | 
 69 |     pos, masses, vel, softening = GenerateICs(10000) # initialize initial condition with 10k particles
 70 |     
 71 |     accel = Accel(pos,masses,softening,parallel=True) # initialize acceleration
 72 |     
 73 |     t = 0 # initial time
 74 |     Tmax = 50 # final/max time
 75 |     
 76 |     energies = [] #energies
 77 |     r50s = [] #half-mass radii
 78 |     ts = [] # times
 79 |     
 80 |     
 81 |     while t <= Tmax: # actual simulation loop - this may take a couple minutes to run    
 82 |         r50s.append(np.median(np.sum((pos - np.median(pos,axis=0))**2,axis=1)**0.5))
 83 |         energies.append(TotalEnergy(pos,masses,vel,softening))
 84 |         ts.append(t)
 85 |         
 86 |         dt = 0.03 # adjust this to control integration error
 87 |     
 88 |         leapfrog_kdk_timestep(dt, pos, masses, softening, vel, accel)
 89 |         t += dt
 90 |         
 91 |     print("Simulation complete! Relative energy error: %g"%(np.abs((energies[0]-energies[-1])/energies[0])))
 92 | 
 93 | 
 94 | .. parsed-literal::
 95 | 
 96 |     Simulation complete! Relative energy error: 0.00161328
 97 | 
 98 | 
 99 | Analysis
100 | --------
101 | 
102 | Now we can plot the half-mass radius (to get an idea of how the system
103 | pulsates over time) and the total energy (to check for accuracy) as a
104 | function of time
105 | 
106 | .. code:: ipython3
107 | 
108 |     %matplotlib inline
109 |     plt.figure(figsize=(4,4),dpi=300)
110 |     plt.plot(ts,energies,label="Total Energy")
111 |     plt.plot(ts,r50s,label="Half-mass Radius")
112 |     plt.xlabel("Time")
113 |     plt.legend()
114 | 
115 | 
116 | 
117 | 
118 | .. parsed-literal::
119 | 
120 |     <matplotlib.legend.Legend at 0x7fa6d7753820>
121 | 
122 | 
123 | 
124 | 
125 | .. image:: Nbody_simulation_9_1.png
126 | 
127 | 


--------------------------------------------------------------------------------
/src/pytreegrav/kdtree/treewalk.py:
--------------------------------------------------------------------------------
  1 | from numpy import sqrt, empty, zeros, empty_like, zeros_like
  2 | from numba import njit, prange
  3 | from ..kernel import *
  4 | import numpy as np
  5 | 
  6 | 
  7 | @njit(fastmath=True)
  8 | def PotentialWalk(pos, node, phi, softening=0, theta=0.7):
  9 |     """Returns the gravitational field at position x by performing the Barnes-Hut treewalk using the provided KD-tree node
 10 | 
 11 |     Arguments:
 12 |     pos - (3,) array containing position of interest
 13 |     node - KD-tree to walk
 14 | 
 15 |     Keyword arguments:
 16 |     g - (3,) array containing initial value of the gravitational field, used when adding up the contributions in recursive calls
 17 |     softening - softening radius of the particle at which the force is being evaluated - needed if you want the short-range force to be momentum-conserving
 18 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0, gives ~1\
 19 | % accuracy)
 20 |     """
 21 |     ## (ABG) NOTE softening is not actually used here...
 22 |     dx = node.COM[0] - pos[0]
 23 |     dy = node.COM[1] - pos[1]
 24 |     dz = node.COM[2] - pos[2]
 25 |     r = sqrt(dx * dx + dy * dy + dz * dz)
 26 |     if node.IsLeaf:
 27 |         if r > 0:
 28 |             phi += node.mass * PotentialKernel(r, node.h)
 29 |     elif r > max(node.size / theta, node.h + node.size):
 30 |         phi -= node.mass / r
 31 |     else:
 32 |         if node.HasLeft:
 33 |             phi = PotentialWalk(pos, node.left, phi, theta=theta)
 34 |         if node.HasRight:
 35 |             phi = PotentialWalk(pos, node.right, phi, theta=theta)
 36 |     return phi
 37 | 
 38 | 
 39 | @njit(fastmath=True)
 40 | def ForceWalk(pos, node, g, softening=0.0, theta=0.7):
 41 |     """Returns the gravitational field at position pos by performing the Barnes-Hut treewalk using the provided KD-tree node
 42 | 
 43 |     Arguments:
 44 |     pos - (3,) array containing position of interest
 45 |     node - KD-tree to walk
 46 | 
 47 |     Parameters:
 48 |     g - (3,) array containing initial value of the gravitational field, used when adding up the contributions in recursive calls
 49 |     softening - softening radius of the particle at which the force is being evaluated - needed if you want the short-range force to be momentum-conserving
 50 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0, gives ~1\
 51 | % accuracy)
 52 |     """
 53 |     dx = node.COM[0] - pos[0]
 54 |     dy = node.COM[1] - pos[1]
 55 |     dz = node.COM[2] - pos[2]
 56 |     r = sqrt(dx * dx + dy * dy + dz * dz)
 57 |     add_accel = False
 58 |     fac = 0
 59 |     if r > 0:
 60 |         if node.IsLeaf:
 61 |             add_accel = True
 62 |             if r < max(node.h, softening):
 63 |                 fac = node.mass * ForceKernel(r, max(node.h, softening))
 64 |             else:
 65 |                 fac = node.mass / (r * r * r)
 66 |         elif r > max(node.size / theta + node.delta, max(node.h, softening) + node.size):
 67 |             add_accel = True
 68 |             fac = node.mass / (r * r * r)
 69 | 
 70 |     if add_accel:
 71 |         g[0] += dx * fac
 72 |         g[1] += dy * fac
 73 |         g[2] += dz * fac
 74 |     else:
 75 |         if node.HasLeft:
 76 |             g = ForceWalk(pos, node.left, g, softening=softening, theta=theta)
 77 |         if node.HasRight:
 78 |             g = ForceWalk(pos, node.right, g, softening=softening, theta=theta)
 79 |     return g
 80 | 
 81 | 
 82 | @njit(parallel=True, fastmath=True)
 83 | def GetPotentialParallel(pos, tree, softening=None, G=1.0, theta=0.7):
 84 |     if softening is None:
 85 |         softening = zeros(pos.shape[0])
 86 |     result = empty(pos.shape[0])
 87 |     for i in prange(pos.shape[0]):
 88 |         result[i] = G * PotentialWalk(pos[i], tree, 0.0, softening=softening[i], theta=theta)
 89 |     return result
 90 | 
 91 | 
 92 | @njit(fastmath=True)
 93 | def GetPotential(pos, tree, softening=None, G=1.0, theta=0.7):
 94 |     if softening is None:
 95 |         softening = zeros(pos.shape[0])
 96 |     result = empty(pos.shape[0])
 97 |     for i in range(pos.shape[0]):
 98 |         result[i] = G * PotentialWalk(pos[i], tree, 0.0, softening=softening[i], theta=theta)
 99 |     return result
100 | 
101 | 
102 | @njit(fastmath=True)
103 | def GetAccel(pos, tree, softening=None, G=1.0, theta=0.7):
104 |     if softening is None:
105 |         softening = zeros(pos.shape[0])
106 |     result = empty(pos.shape)
107 |     for i in range(pos.shape[0]):
108 |         result[i] = G * ForceWalk(pos[i], tree, zeros(3), softening=softening[i], theta=theta)
109 |     return result
110 | 
111 | 
112 | @njit(parallel=True, fastmath=True)
113 | def GetAccelParallel(pos, tree, softening, G=1.0, theta=0.7):
114 |     if softening is None:
115 |         softening = zeros(len(pos), dtype=np.float64)
116 |     result = empty(pos.shape)
117 |     for i in prange(pos.shape[0]):
118 |         result[i] = G * ForceWalk(pos[i], tree, zeros(3), softening=softening[i], theta=theta)
119 |     return result
120 | 


--------------------------------------------------------------------------------
/docs/source/usage/quickstart.md:
--------------------------------------------------------------------------------
  1 | # Quickstart
  2 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation.
  3 | 
  4 | First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is.
  5 | 
  6 | 
  7 | ```python
  8 | import numpy as np
  9 | from pytreegrav import Accel, Potential
 10 | ```
 11 | 
 12 | 
 13 | ```python
 14 | N = 10**5 # number of particles
 15 | x = np.random.rand(N,3) # positions randomly sampled in the unit cube
 16 | m = np.repeat(1./N,N) # masses - let the system have unit mass
 17 | h = np.repeat(0.01,N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions
 18 | ```
 19 | 
 20 | Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential at each particle position:
 21 | 
 22 | 
 23 | ```python
 24 | print(Accel(x,m,h))
 25 | print(Potential(x,m,h))
 26 | ```
 27 | 
 28 |     [[-0.1521787   0.2958852  -0.30109005]
 29 |      [-0.50678204 -0.37489886 -1.0558666 ]
 30 |      [-0.24650087  0.95423467 -0.175074  ]
 31 |      ...
 32 |      [ 0.87868472 -1.28332176 -0.22718531]
 33 |      [-0.41962742  0.32372245 -1.31829084]
 34 |      [ 2.45127054  0.38292881  0.05820412]]
 35 |     [-2.35518057 -2.19299372 -2.28494218 ... -2.11783337 -2.1653377
 36 |      -1.80464695]
 37 | 
 38 | 
 39 | By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes:
 40 | 
 41 | 
 42 | ```python
 43 | from time import time
 44 | t = time()
 45 | # tree gravitational acceleration
 46 | accel_tree = Accel(x,m,h,method='tree')
 47 | print("Tree accel runtime: %gs"%(time() - t)); t = time()
 48 | 
 49 | accel_bruteforce = Accel(x,m,h,method='bruteforce')
 50 | print("Brute force accel runtime: %gs"%(time() - t)); t = time()
 51 | 
 52 | phi_tree = Potential(x,m,h,method='tree')
 53 | print("Tree potential runtime: %gs"%(time() - t)); t = time()
 54 | 
 55 | phi_bruteforce = Potential(x,m,h,method='bruteforce')
 56 | print("Brute force potential runtime: %gs"%(time() - t)); t = time()
 57 | ```
 58 | 
 59 |     Tree accel runtime: 0.927745s
 60 |     Brute force accel runtime: 44.1175s
 61 |     Tree potential runtime: 0.802386s
 62 |     Brute force potential runtime: 20.0234s
 63 | 
 64 | 
 65 | As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding 10^4. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation:
 66 | ![Benchmark](./CPU_Time_serial.png)
 67 | 
 68 | 
 69 | But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions:
 70 | 
 71 | 
 72 | ```python
 73 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error
 74 | print("RMS force error: ", acc_error)
 75 | phi_error = np.std(phi_tree - phi_bruteforce)
 76 | print("RMS potential error: ", phi_error)
 77 | ```
 78 | 
 79 |     RMS force error:  0.006739311224338851
 80 |     RMS potential error:  0.0003888328578588027
 81 | 
 82 | 
 83 | The above errors are typical for default settings: ~1% force error and ~0.1\% potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle ``theta``, set to 0.7 by default. Smaller ``theta`` gives higher accuracy, but also runs slower:
 84 | 
 85 | 
 86 | ```python
 87 | thetas = 0.1,0.2,0.4,0.8 # different thetas to try
 88 | for theta in thetas:
 89 |     t = time()    
 90 |     accel_tree = Accel(x,m,h,method='tree',theta=theta)
 91 |     acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1)))
 92 |     print("theta=%g Runtime: %gs RMS force error: %g"%(theta, time()-t, acc_error))
 93 | ```
 94 | 
 95 |     theta=0.1 Runtime: 63.1738s RMS force error: 3.78978e-05
 96 |     theta=0.2 Runtime: 14.3356s RMS force error: 0.000258755
 97 |     theta=0.4 Runtime: 2.91292s RMS force error: 0.00148698
 98 |     theta=0.8 Runtime: 0.724668s RMS force error: 0.0105937
 99 | 
100 | 
101 | Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number:
102 | 
103 | 
104 | ```python
105 | from time import time
106 | t = time()
107 | # tree gravitational acceleration
108 | accel_tree = Accel(x,m,h,method='tree',parallel=True)
109 | print("Tree accel runtime in parallel: %gs"%(time() - t)); t = time()
110 | 
111 | accel_bruteforce = Accel(x,m,h,method='bruteforce',parallel=True)
112 | print("Brute force accel runtime in parallel: %gs"%(time() - t)); t = time()
113 | 
114 | phi_tree = Potential(x,m,h,method='tree',parallel=True)
115 | print("Tree potential runtime in parallel: %gs"%(time() - t)); t = time()
116 | 
117 | phi_bruteforce = Potential(x,m,h,method='bruteforce',parallel=True)
118 | print("Brute force potential runtime in parallel: %gs"%(time() - t)); t = time()
119 | ```
120 | 
121 |     Tree accel runtime in parallel: 0.222271s
122 |     Brute force accel runtime in parallel: 7.25576s
123 |     Tree potential runtime in parallel: 0.181393s
124 |     Brute force potential runtime in parallel: 5.72611s
125 | 
126 |  
127 | ## What if I want to evaluate the fields at different points than where the particles are?
128 | 
129 | We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver):
130 | 
131 | 
132 | ```python
133 | from pytreegrav import AccelTarget, PotentialTarget
134 | 
135 | # generate a separate set of "target" positions where we want to know the potential and field
136 | N_target = 10**4
137 | x_target = np.random.rand(N_target,3)
138 | h_target = np.repeat(0.01,N_target) # optional "target" softening: this sets a floor on the softening length of all forces/potentials computed
139 | 
140 | accel_tree = AccelTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles
141 | accel_bruteforce = AccelTarget(x_target,x,m,h_source=h,method='bruteforce')
142 | 
143 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error
144 | print("RMS force error: ", acc_error)
145 | 
146 | phi_tree = PotentialTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles
147 | phi_bruteforce = PotentialTarget(x_target,x,m,h_target=h_target, h_source=h,method='bruteforce')
148 | 
149 | phi_error = np.std(phi_tree - phi_bruteforce)
150 | print("RMS potential error: ", phi_error)
151 | ```
152 | 
153 |     RMS force error:  0.006719983300560105
154 |     RMS potential error:  0.0003873676304955059
155 | 
156 | 


--------------------------------------------------------------------------------
/docs/source/usage/quickstart.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Quickstart
  3 | ==========
  4 | 
  5 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation.
  6 | 
  7 | First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is.
  8 | 
  9 | .. code-block:: python
 10 | 
 11 |    import numpy as np
 12 |    from pytreegrav import Accel, Potential
 13 | 
 14 | .. code-block:: python
 15 | 
 16 |    N = 10**5 # number of particles
 17 |    x = np.random.rand(N,3) # positions randomly sampled in the unit cube
 18 |    m = np.repeat(1./N,N) # masses - let the system have unit mass
 19 |    h = np.repeat(0.01,N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions
 20 | 
 21 | Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential at each particle position:
 22 | 
 23 | .. code-block:: python
 24 | 
 25 |    print(Accel(x,m,h))
 26 |    print(Potential(x,m,h))
 27 | 
 28 | .. code-block::
 29 | 
 30 |    [[-0.1521787   0.2958852  -0.30109005]
 31 |     [-0.50678204 -0.37489886 -1.0558666 ]
 32 |     [-0.24650087  0.95423467 -0.175074  ]
 33 |     ...
 34 |     [ 0.87868472 -1.28332176 -0.22718531]
 35 |     [-0.41962742  0.32372245 -1.31829084]
 36 |     [ 2.45127054  0.38292881  0.05820412]]
 37 |    [-2.35518057 -2.19299372 -2.28494218 ... -2.11783337 -2.1653377
 38 |     -1.80464695]
 39 | 
 40 | 
 41 | 
 42 | By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes:
 43 | 
 44 | .. code-block:: python
 45 | 
 46 |    from time import time
 47 |    t = time()
 48 |    # tree gravitational acceleration
 49 |    accel_tree = Accel(x,m,h,method='tree')
 50 |    print("Tree accel runtime: %gs"%(time() - t)); t = time()
 51 | 
 52 |    accel_bruteforce = Accel(x,m,h,method='bruteforce')
 53 |    print("Brute force accel runtime: %gs"%(time() - t)); t = time()
 54 | 
 55 |    phi_tree = Potential(x,m,h,method='tree')
 56 |    print("Tree potential runtime: %gs"%(time() - t)); t = time()
 57 | 
 58 |    phi_bruteforce = Potential(x,m,h,method='bruteforce')
 59 |    print("Brute force potential runtime: %gs"%(time() - t)); t = time()
 60 | 
 61 | .. code-block::
 62 | 
 63 |    Tree accel runtime: 0.927745s
 64 |    Brute force accel runtime: 44.1175s
 65 |    Tree potential runtime: 0.802386s
 66 |    Brute force potential runtime: 20.0234s
 67 | 
 68 | 
 69 | 
 70 | As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding 10^4. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation:
 71 | 
 72 | .. image:: ./CPU_Time_serial.png
 73 |    :target: ./CPU_Time_serial.png
 74 |    :alt: Benchmark
 75 | 
 76 | 
 77 | But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions:
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |    acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error
 82 |    print("RMS force error: ", acc_error)
 83 |    phi_error = np.std(phi_tree - phi_bruteforce)
 84 |    print("RMS potential error: ", phi_error)
 85 | 
 86 | .. code-block::
 87 | 
 88 |    RMS force error:  0.006739311224338851
 89 |    RMS potential error:  0.0003888328578588027
 90 | 
 91 | 
 92 | 
 93 | The above errors are typical for default settings: ~1% force error and ~0.1\% potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle ``theta``\ , set to 0.7 by default. Smaller ``theta`` gives higher accuracy, but also runs slower:
 94 | 
 95 | .. code-block:: python
 96 | 
 97 |    thetas = 0.1,0.2,0.4,0.8 # different thetas to try
 98 |    for theta in thetas:
 99 |        t = time()    
100 |        accel_tree = Accel(x,m,h,method='tree',theta=theta)
101 |        acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1)))
102 |        print("theta=%g Runtime: %gs RMS force error: %g"%(theta, time()-t, acc_error))
103 | 
104 | .. code-block::
105 | 
106 |    theta=0.1 Runtime: 63.1738s RMS force error: 3.78978e-05
107 |    theta=0.2 Runtime: 14.3356s RMS force error: 0.000258755
108 |    theta=0.4 Runtime: 2.91292s RMS force error: 0.00148698
109 |    theta=0.8 Runtime: 0.724668s RMS force error: 0.0105937
110 | 
111 | 
112 | 
113 | Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number:
114 | 
115 | .. code-block:: python
116 | 
117 |    from time import time
118 |    t = time()
119 |    # tree gravitational acceleration
120 |    accel_tree = Accel(x,m,h,method='tree',parallel=True)
121 |    print("Tree accel runtime in parallel: %gs"%(time() - t)); t = time()
122 | 
123 |    accel_bruteforce = Accel(x,m,h,method='bruteforce',parallel=True)
124 |    print("Brute force accel runtime in parallel: %gs"%(time() - t)); t = time()
125 | 
126 |    phi_tree = Potential(x,m,h,method='tree',parallel=True)
127 |    print("Tree potential runtime in parallel: %gs"%(time() - t)); t = time()
128 | 
129 |    phi_bruteforce = Potential(x,m,h,method='bruteforce',parallel=True)
130 |    print("Brute force potential runtime in parallel: %gs"%(time() - t)); t = time()
131 | 
132 | .. code-block::
133 | 
134 |    Tree accel runtime in parallel: 0.222271s
135 |    Brute force accel runtime in parallel: 7.25576s
136 |    Tree potential runtime in parallel: 0.181393s
137 |    Brute force potential runtime in parallel: 5.72611s
138 | 
139 | 
140 | 
141 | What if I want to evaluate the fields at different points than where the particles are?
142 | ---------------------------------------------------------------------------------------
143 | 
144 | We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver):
145 | 
146 | .. code-block:: python
147 | 
148 |    from pytreegrav import AccelTarget, PotentialTarget
149 | 
150 |    # generate a separate set of "target" positions where we want to know the potential and field
151 |    N_target = 10**4
152 |    x_target = np.random.rand(N_target,3)
153 |    h_target = np.repeat(0.01,N_target) # optional "target" softening: this sets a floor on the softening length of all forces/potentials computed
154 | 
155 |    accel_tree = AccelTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles
156 |    accel_bruteforce = AccelTarget(x_target,x,m,h_source=h,method='bruteforce')
157 | 
158 |    acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error
159 |    print("RMS force error: ", acc_error)
160 | 
161 |    phi_tree = PotentialTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles
162 |    phi_bruteforce = PotentialTarget(x_target,x,m,h_target=h_target, h_source=h,method='bruteforce')
163 | 
164 |    phi_error = np.std(phi_tree - phi_bruteforce)
165 |    print("RMS potential error: ", phi_error)
166 | 
167 | .. code-block::
168 | 
169 |    RMS force error:  0.006719983300560105
170 |    RMS potential error:  0.0003873676304955059
171 | 


--------------------------------------------------------------------------------
/examples/cuda_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Using matplotlib backend: TkAgg\n",
 13 |       "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n",
 14 |       "Populating the interactive namespace from numpy and matplotlib\n",
 15 |       "Found 1 CUDA devices\n",
 16 |       "id 0         b'NVIDIA T1000'                              [SUPPORTED]\n",
 17 |       "                      Compute Capability: 7.5\n",
 18 |       "                           PCI Device ID: 0\n",
 19 |       "                              PCI Bus ID: 101\n",
 20 |       "                                    UUID: GPU-b303fbe2-bd8d-69ed-9a8c-01198eed12ed\n",
 21 |       "                                Watchdog: Enabled\n",
 22 |       "             FP32/FP64 Performance Ratio: 32\n",
 23 |       "Summary:\n",
 24 |       "\t1/1 devices are supported\n"
 25 |      ]
 26 |     },
 27 |     {
 28 |      "name": "stderr",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "/home/mgrudic/.local/lib/python3.9/site-packages/IPython/core/magics/pylab.py:162: UserWarning: pylab import has clobbered these variables: ['sqrt', 'vectorize']\n",
 32 |       "`%matplotlib` prevents importing * from pylab and numpy\n",
 33 |       "  warn(\"pylab import has clobbered these variables: %s\"  % clobbered +\n",
 34 |       "/home/mgrudic/.local/lib/python3.9/site-packages/numba/cuda/cudadrv/devicearray.py:886: NumbaPerformanceWarning: \u001b[1mHost array used in CUDA kernel will incur copy overhead to/from device.\u001b[0m\n",
 35 |       "  warn(NumbaPerformanceWarning(msg))\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "name": "stdout",
 40 |      "output_type": "stream",
 41 |      "text": [
 42 |       "3.88 s ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "%pylab\n",
 48 |     "from numba import (\n",
 49 |     "    jit,\n",
 50 |     "    vectorize,\n",
 51 |     "    float32,\n",
 52 |     "    float64,\n",
 53 |     "    cfunc,\n",
 54 |     "    njit,\n",
 55 |     "    prange,\n",
 56 |     "    get_num_threads,\n",
 57 |     "    set_num_threads,\n",
 58 |     ")\n",
 59 |     "import numpy as np\n",
 60 |     "from math import sqrt\n",
 61 |     "from scipy.special import comb\n",
 62 |     "from scipy.interpolate import interp2d, RectBivariateSpline\n",
 63 |     "from numba import cuda\n",
 64 |     "from numpy import float64, float32, int32, ndarray\n",
 65 |     "\n",
 66 |     "cuda.detect()\n",
 67 |     "\n",
 68 |     "\n",
 69 |     "# Controls threads per block and shared memory usage.\n",
 70 |     "# The computation will be done on blocks of TPBxTPB elements.\n",
 71 |     "TPB = 16\n",
 72 |     "\n",
 73 |     "\n",
 74 |     "@cuda.jit(\"void(float32[:,:],float32[:])\", fastmath=True)\n",
 75 |     "def bruteforce_potential(x, phi):\n",
 76 |     "    i, j = cuda.grid(2)\n",
 77 |     "    if i < x.shape[0] and j < x.shape[0] and j < i:\n",
 78 |     "        r = (\n",
 79 |     "            (x[i, 0] - x[j, 0]) * (x[i, 0] - x[j, 0])\n",
 80 |     "            + (x[i, 1] - x[j, 1]) * (x[i, 1] - x[j, 1])\n",
 81 |     "            + (x[i, 2] - x[j, 2]) * (x[i, 2] - x[j, 2])\n",
 82 |     "        )\n",
 83 |     "        dphi = -1 / sqrt(r)\n",
 84 |     "\n",
 85 |     "        cuda.atomic.add(phi, i, dphi)\n",
 86 |     "        cuda.atomic.add(phi, j, dphi)\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "Np = 4096 * 32\n",
 90 |     "\n",
 91 |     "x = np.float32(np.random.rand(Np, 3))\n",
 92 |     "phi = np.zeros(Np)  # cuda.device_array(Np, dtype=np.float32); phi[:] = 0.\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "threadsperblock = (16, 16)\n",
 96 |     "blockspergrid = (Np // threadsperblock[0], Np // threadsperblock[1])  # int(ceil(Np // threadsperblock))\n",
 97 |     "\n",
 98 |     "%timeit bruteforce_potential[blockspergrid,threadsperblock](x,phi)\n",
 99 |     "\n",
100 |     "# @cuda.jit(\"void(float32[:], float32[:,:], float32[:], float32[:,:], float32)\",fastmath=True)\n",
101 |     "# def GridSurfaceDensity_core_cuda(f, x2d, h, grid, size):\n",
102 |     "#     res = np.int32(grid.shape[0])\n",
103 |     "#     dx = np.float32(size / (res - 1))\n",
104 |     "\n",
105 |     "#     # numba provides this function for working out which element you're\n",
106 |     "#     # supposed to be accessing\n",
107 |     "#     i = cuda.grid(1)\n",
108 |     "#     if i<f.shape[0]: # and j<n3.shape[1]: # check we're in range\n",
109 |     "#        # print(i)\n",
110 |     "#         # do work on a single element\n",
111 |     "#         xs = x2d[i]\n",
112 |     "#         hs = h[i]\n",
113 |     "#         hs_sqr = hs*hs\n",
114 |     "#         hinv = 1 / hs\n",
115 |     "#         mh2 = f[i] * hinv * hinv * 1.8189136353359467\n",
116 |     "\n",
117 |     "#         gxmin = max(int((xs[0] - hs) / dx + 1), 0)\n",
118 |     "#         gxmax = min(int((xs[0] + hs) / dx), res - 1)\n",
119 |     "#         gymin = max(int((xs[1] - hs) / dx + 1), 0)\n",
120 |     "#         gymax = min(int((xs[1] + hs) / dx), res - 1)\n",
121 |     "\n",
122 |     "#         for gx in range(gxmin, gxmax + 1):\n",
123 |     "#             delta_x_Sqr = xs[0] - gx * dx\n",
124 |     "#             delta_x_Sqr *= delta_x_Sqr\n",
125 |     "#             for gy in range(gymin, gymax + 1):\n",
126 |     "#                 delta_y_Sqr = xs[1] - gy * dx\n",
127 |     "#                 delta_y_Sqr *= delta_y_Sqr\n",
128 |     "#                 r = delta_x_Sqr + delta_y_Sqr\n",
129 |     "#                 if r > hs_sqr:\n",
130 |     "#                     continue\n",
131 |     "#                 r = sqrt(r)\n",
132 |     "#                 q = r * hinv\n",
133 |     "#                 if q <= 0.5:\n",
134 |     "#                     kernel = 1 - 6 * q * q * (1 - q)\n",
135 |     "#                 else: # q <= 1.0:\n",
136 |     "#                     a = 1 - q\n",
137 |     "#                     kernel = 2 * a * a * a\n",
138 |     "#                 cuda.atomic.add(grid, (gx,gy), kernel * mh2)\n",
139 |     "# cuda.syncthreads()"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 4,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "4.55 s ± 87.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
152 |      ]
153 |     }
154 |    ],
155 |    "source": [
156 |     "from pytreegrav.bruteforce import Potential_bruteforce_parallel\n",
157 |     "\n",
158 |     "%timeit  Potential_bruteforce_parallel(x,np.ones(Np),np.zeros(Np))"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 39,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "array([-1883.2377, -1630.0737, -2068.1045, ..., -1790.0035, -1583.7726,\n",
170 |        "       -1866.4478], dtype=float32)"
171 |       ]
172 |      },
173 |      "execution_count": 39,
174 |      "metadata": {},
175 |      "output_type": "execute_result"
176 |     }
177 |    ],
178 |    "source": [
179 |     "phi.copy_to_host()"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "from pytreegrav.bruteforce import Po"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "kernelspec": {
194 |    "display_name": "2023.2.0",
195 |    "language": "python",
196 |    "name": "python3"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.9.16"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 2
213 | }
214 | 


--------------------------------------------------------------------------------
/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: '``pytreegrav``: A fast Python gravity solver'
 3 | tags:
 4 |   - Python
 5 |   - physics
 6 |   - gravity
 7 |   - simulations
 8 | authors:
 9 |   - name: Michael Y. Grudić
10 |     orcid: 0000-0002-1655-5604
11 |     affiliation: "1,2"
12 |   - name: Alexander B. Gurvich
13 |     orcid: 0000-0002-6145-3674
14 |     affiliation: 2
15 | affiliations:
16 |  - name: NASA Hubble Fellow, Carnegie Observatories
17 |    index: 1
18 |  - name: Department of Physics & Astronomy and CIERA, Northwestern University
19 |    index: 2
20 | date: 9 June 2021
21 | bibliography: paper.bib
22 | ---
23 | 
24 | # Summary
25 | 
26 | Gravity is important in a wide variety of science problems. In particular, questions in astrophysics nearly all involve gravity, and can have large ($\gg10^4$) numbers of gravitating masses, such as the stars in a cluster or galaxy, or the discrete fluid elements in a hydrodynamics simulation. Often the gravitational field of such a large number of masses can be too computationally expensive to compute by directly summing the contribution of every single element at every point of interest.
27 | 
28 | ``pytreegrav`` is a multi-method Python package for computing gravitational fields and potentials. It includes an exact direct-summation ("brute force") solver and a fast, approximate tree-based method that can be orders of magnitude faster than the naïve method. It can compute fields and potentials from arbitrary particle distributions at arbitrary points, with arbitrary softening/smoothing lengths, and is parallelized with OpenMP.
29 | 
30 | # Statement of need
31 | 
32 | The problem addressed by ``pytreegrav`` is the following: given an arbitrary set of "source" masses $m_i$ with 3D coordinates $\mathbf{x}_i$, and optionally each having a finite spatial extent $h_i$ (the _softening radius_), one would like to compute the gravitational potential $\Phi$ and/or the gravitational field $\mathbf{g}$ at an arbitrary set of "target" points in space $\mathbf{y}_i$. A common application for this is N-body simulations (wherein $\mathbf{y}_i=\mathbf{x}_i$). It is also often useful for _analyzing_ simulation results after the fact -- $\Phi$ and $\mathbf{g}$ are sometimes not saved in simulation outputs, and even when they are it is often useful to analyze the gravitational interactions between specific _subsets_ of the mass elements in the simulation. Computing $\mathbf{g}$ is also important for generating equilibrium _initial conditions_ for N-body simulations [@makedisk;@galic], and for identifying interesting gravitationally-bound structures such as halos, star clusters, and giant molecular clouds [@rockstar;@grudic2018;@guszejnov2020].
33 | 
34 | Many gravity simulation codes (or multi-physics simulation codes _including_ gravity) have been written that address the problem of gravity computation in a variety of ways for their own internal purposes [@aarseth_nbody;@dehnen]. However, ``pykdgrav`` (the precursor of ``pytreegrav``) was the first Python package to offer a generic, modular, trivially-installable gravity solver that could be easily integrated into any other Python code, using the fast, approximate tree-based @barneshut method to be practical for large particle numbers. ``pykdgrav`` used a KD-tree implementation accelerated with ``numba`` [@numba] to achieve high performance in the potential/field evaluation, however the prerequisite tree-building step had relatively high overhead and a very large memory footprint, because the entire dataset was redundantly stored at every level in the tree hierarchy. This made it difficult to scale to various practical research problems, such as analyzing high-resolution galaxy simulations [@fire_pressurebalance]. ``pytreegrav`` is a full refactor of ``pykdgrav`` that addresses these shortcomings with a new octree implementation, with drastically reduced tree-build time and memory footprint, and a more efficient non-recursive tree traversal for field summation. This makes it suitable for post-processing datasets from state-of-the-art astrophysics simulations, with upwards of $10^8$ particles in the region of interest. 
35 | 
36 | # Methods
37 | 
38 | ``pytreegrav`` can compute $\Phi$ and $\mathbf{g}$ using one of two methods: by "brute force" (explcitly summing the field of every particle, which is exact to machine precision), or using the fast, approximate @barneshut tree-based method (which is approximate, but much faster for large particle numbers). In $N$-body problems where the fields at all particle positions must be known, the cost of the brute-force method scales as $\propto N^2$, while the cost of the tree-based method scales less steeply, as $\propto N \log N$.
39 | 
40 | ![Wall-clock time per particle running ``pytreegrav`` on a sample of $N$ particles from a @plummer distribution for various $N$. Test was run on an Intel i9 9900K workstation on a single core (_left_) and in parallel on 16 logical cores (_right_).\label{fig:cputime}](images/CPU_Time_both.png)
41 | 
42 | The brute-force methods are often fastest for small ($<10^3$ particle) point sets because they lack the overheads of tree construction and traversal, while the tree-based methods will typically be faster for larger datasets because they reduce the number of floating-point operations required. Both methods are optimized with the ``numba`` LLVM JIT compiler [@numba], and the basic ``Accel`` and ``Potential`` front-end functions will automatically choose the method is likely to be faster, based on this heuristic crossover point of $10^3$ particles. Both methods can also optionally be parallelized with OpenMP, via the ``numba`` ``@njit(parallel=True)`` interface.
43 | 
44 | The implementation of the tree build and tree-based field summation largely follows that of ``GADGET-2`` [@gadget2]. Starting with an initial cube enclosing all particles, particles are inserted into the tree one at a time. Nodes are divided into 8 subnodes until each subnode contains at most one particle. The indices of the 8 subnodes of each node are stored for an initial recursive traversal of the completed tree, but an optimized tree traversal only needs to know the _first_ subnode (if the node is to be refined) and the index of the next branch of the tree (if the field due to the node is summed directly), so these indices are recorded in the initial recursive tree traversal, and the 8 explicit subnode indices are then deleted, saving memory and removing any empty nodes from consideration. Once these "next branch" and "first subnode" indices are known, the tree field summations can be done in a single ``while`` loop with no recursive function calls, which generally improves performance and memory usage.
45 | 
46 | The field summation itself uses the @barneshut geometric opening criterion, with improvements suggested by @dubinski: for a node of side length $L$ with centre of mass located at distance $r$ from the target point, its contribution is summed using the monopole approximation (treating the whole node as a point mass) only if $r > L/\Theta + \delta$, where $\Theta=0.7$ by default (giving $\sim 1\%$ RMS error in $\mathbf{g}$), $\delta$ is the distance from the node's geometric center to its center of mass. If the conditions for approximation are not satisfied, the node's subnodes are considered in turn, until the field contribution of all mass within the node is summed.
47 | 
48 | ``pytreegrav`` supports gravitational softening by assuming the mass distribution of each particle takes the form of a standard M4 cubic spline kernel, which is zero beyond the softening radius $h$ (outside which the field reduces to that of a point mass). Explicit expressions for this form of the softened gravitational potential and field are given in @gizmo. $h$ is allowed to vary from particle to particle, and when summing the field the larger of the source or the target softening is used (symmetrizing the force between overlapping particles). When softenings are nonzero, the largest softening $h_{\rm max}$ of all particles in a node is stored, and a node is always opened in the field summation if $r < 0.6L + \max\left(h_{\rm target}, h_{\rm max}\right) + \delta$, where $h_{\rm target}$ is the softening of the target particle where the field is being summed. This ensures that any interactions between physically-overlapping particles are summed directly with the softening kernel.
49 | 
50 | # Acknowledgements
51 | 
52 | We acknowledge code contributions from Ben Keller and Martin Beroiz, and helpful feedback from Elisa Bortolas, Thorsten García, and GitHub user ``herkesg`` during the development of ``pykdgrav``, which were incorporated into ``pytreegrav``.
53 | 
54 | # References
55 | 


--------------------------------------------------------------------------------
/README.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Introduction\n",
  8 |     "pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation, without writing a single line of C or Cython."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "# Walkthrough\n",
 16 |     "First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 1,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import numpy as np\n",
 26 |     "from pytreegrav import Accel, Potential"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "N = 10**5  # number of particles\n",
 36 |     "x = np.random.rand(N, 3)  # positions randomly sampled in the unit cube\n",
 37 |     "m = np.repeat(1.0 / N, N)  # masses - let the system have unit mass\n",
 38 |     "h = np.repeat(0.01, N)  # softening radii - these are optional, assumed 0 if not provided to the frontend functions"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential:"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "print(Accel(x, m, h))\n",
 55 |     "print(Potential(x, m, h))"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes:"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "from time import time\n",
 72 |     "\n",
 73 |     "t = time()\n",
 74 |     "# tree gravitational acceleration\n",
 75 |     "accel_tree = Accel(x, m, h, method=\"tree\")\n",
 76 |     "print(\"Tree accel runtime: %gs\" % (time() - t))\n",
 77 |     "t = time()\n",
 78 |     "\n",
 79 |     "accel_bruteforce = Accel(x, m, h, method=\"bruteforce\")\n",
 80 |     "print(\"Brute force accel runtime: %gs\" % (time() - t))\n",
 81 |     "t = time()\n",
 82 |     "\n",
 83 |     "phi_tree = Potential(x, m, h, method=\"tree\")\n",
 84 |     "print(\"Tree potential runtime: %gs\" % (time() - t))\n",
 85 |     "t = time()\n",
 86 |     "\n",
 87 |     "phi_bruteforce = Potential(x, m, h, method=\"bruteforce\")\n",
 88 |     "print(\"Brute force potential runtime: %gs\" % (time() - t))\n",
 89 |     "t = time()"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding $10^4$. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation: <img src=\"CPU_time_serial.png\" alt=\"Performance scaling\" width=\"600\"/>\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions:"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1)))  # RMS force error\n",
113 |     "print(\"RMS force error: \", acc_error)\n",
114 |     "phi_error = np.std(phi_tree - phi_bruteforce)\n",
115 |     "print(\"RMS potential error: \", phi_error)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "The above errors are typical for default settings: $\\sim 1\\%$ force error and $\\sim 0.1\\%$ potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle $\\Theta$, set to 0.7 by default. Smaller $\\Theta$ gives higher accuracy, but also runs slower:"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "thetas = 0.1, 0.2, 0.4, 0.8  # different thetas to try\n",
132 |     "for theta in thetas:\n",
133 |     "    t = time()\n",
134 |     "    accel_tree = Accel(x, m, h, method=\"tree\", theta=theta)\n",
135 |     "    acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1)))\n",
136 |     "    print(\"theta=%g Runtime: %gs RMS force error: %g\" % (theta, time() - t, acc_error))"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "markdown",
141 |    "metadata": {},
142 |    "source": [
143 |     "Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number:"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "from time import time\n",
153 |     "\n",
154 |     "t = time()\n",
155 |     "# tree gravitational acceleration\n",
156 |     "accel_tree = Accel(x, m, h, method=\"tree\", parallel=True)\n",
157 |     "print(\"Tree accel runtime in parallel: %gs\" % (time() - t))\n",
158 |     "t = time()\n",
159 |     "\n",
160 |     "accel_bruteforce = Accel(x, m, h, method=\"bruteforce\", parallel=True)\n",
161 |     "print(\"Brute force accel runtime in parallel: %gs\" % (time() - t))\n",
162 |     "t = time()\n",
163 |     "\n",
164 |     "phi_tree = Potential(x, m, h, method=\"tree\", parallel=True)\n",
165 |     "print(\"Tree potential runtime in parallel: %gs\" % (time() - t))\n",
166 |     "t = time()\n",
167 |     "\n",
168 |     "phi_bruteforce = Potential(x, m, h, method=\"bruteforce\", parallel=True)\n",
169 |     "print(\"Brute force potential runtime in parallel: %gs\" % (time() - t))\n",
170 |     "t = time()"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "# What if I want to evaluate the fields at different points than where the particles are?\n",
178 |     "\n",
179 |     "We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver):"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "from pytreegrav import AccelTarget, PotentialTarget\n",
189 |     "\n",
190 |     "# generate a separate set of \"target\" positions where we want to know the potential and field\n",
191 |     "N_target = 10**4\n",
192 |     "x_target = np.random.rand(N_target, 3)\n",
193 |     "h_target = np.repeat(\n",
194 |     "    0.01, N_target\n",
195 |     ")  # optional \"target\" softening: this sets a floor on the softening length of all forces/potentials computed\n",
196 |     "\n",
197 |     "accel_tree = AccelTarget(\n",
198 |     "    x_target, x, m, h_target=h_target, h_source=h, method=\"tree\"\n",
199 |     ")  # we provide the points/masses/softenings we generated before as the \"source\" particles\n",
200 |     "accel_bruteforce = AccelTarget(x_target, x, m, h_source=h, method=\"bruteforce\")\n",
201 |     "\n",
202 |     "acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1)))  # RMS force error\n",
203 |     "print(\"RMS force error: \", acc_error)\n",
204 |     "\n",
205 |     "phi_tree = PotentialTarget(\n",
206 |     "    x_target, x, m, h_target=h_target, h_source=h, method=\"tree\"\n",
207 |     ")  # we provide the points/masses/softenings we generated before as the \"source\" particles\n",
208 |     "phi_bruteforce = PotentialTarget(x_target, x, m, h_target=h_target, h_source=h, method=\"bruteforce\")\n",
209 |     "\n",
210 |     "phi_error = np.std(phi_tree - phi_bruteforce)\n",
211 |     "print(\"RMS potential error: \", phi_error)"
212 |    ]
213 |   }
214 |  ],
215 |  "metadata": {
216 |   "kernelspec": {
217 |    "display_name": "Python 3",
218 |    "language": "python",
219 |    "name": "python3"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.8.5"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 4
236 | }
237 | 


--------------------------------------------------------------------------------
/src/pytreegrav/bruteforce.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy import sqrt, empty, zeros, empty_like, zeros_like
  3 | from numba import njit, prange
  4 | from .kernel import *
  5 | 
  6 | 
  7 | def PotentialTarget_bruteforce(x_target, softening_target, x_source, m_source, softening_source, G=1.0):
  8 |     """Returns the exact gravitational potential due to a set of particles, at a set of positions that need not be the same as the particle positions.
  9 | 
 10 |     Arguments:
 11 |     x_target -- shape (N,3) array of positions where the potential is to be evaluated
 12 |     softening_target -- shape (N,) array of minimum softening lengths to be used
 13 |     x_source -- shape (M,3) array of positions of gravitating particles
 14 |     m_source -- shape (M,) array of particle masses
 15 |     softening_source -- shape (M,) array of softening lengths
 16 | 
 17 |     Optional arguments:
 18 |     G -- gravitational constant (default 0.7)
 19 | 
 20 |     Returns:
 21 |     shape (N,) array of potential values
 22 |     """
 23 |     potential = np.zeros(x_target.shape[0])
 24 |     dx = np.empty(3)
 25 |     for i in prange(x_target.shape[0]):
 26 |         for j in range(x_source.shape[0]):
 27 |             for k in range(3):
 28 |                 dx[k] = x_target[i, k] - x_source[j, k]
 29 |             r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2])
 30 | 
 31 |             h = max(softening_source[j], softening_target[i])
 32 |             if r < h:
 33 |                 potential[i] += m_source[j] * PotentialKernel(r, h)
 34 |             else:
 35 |                 if r > 0:
 36 |                     potential[i] -= m_source[j] / r
 37 |     return G * potential
 38 | 
 39 | 
 40 | PotentialTarget_bruteforce_parallel = njit(PotentialTarget_bruteforce, fastmath=True, parallel=True)
 41 | PotentialTarget_bruteforce = njit(PotentialTarget_bruteforce, fastmath=True)
 42 | 
 43 | 
 44 | @njit(fastmath=True)
 45 | def Potential_bruteforce(x, m, softening, G=1.0):
 46 |     """Returns the exact mutually-interacting gravitational potential for a set of particles with positions x and masses m, evaluated by brute force.
 47 | 
 48 |     Arguments:
 49 |     x -- shape (N,3) array of particle positions
 50 |     m -- shape (N,) array of particle masses
 51 |     softening -- shape (N,) array containing kernel support radii for gravitational softening
 52 | 
 53 |     Optional arguments:
 54 |     G -- gravitational constant (default 1.0)
 55 | 
 56 |     Returns:
 57 |     shape (N,) array containing potential values
 58 |     """
 59 |     potential = zeros_like(m)
 60 |     dx = zeros(3)
 61 |     for i in range(x.shape[0]):
 62 |         for j in range(i + 1, x.shape[0]):
 63 |             for k in range(3):
 64 |                 dx[k] = x[i, k] - x[j, k]
 65 |             r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2])
 66 |             h = max(softening[i], softening[j])
 67 |             if r < h:
 68 |                 kernel = PotentialKernel(r, h)
 69 |                 potential[j] += m[i] * kernel
 70 |                 potential[i] += m[j] * kernel
 71 |             elif r > 0:
 72 |                 potential[i] -= m[j] / r
 73 |                 potential[j] -= m[i] / r
 74 |     return G * potential
 75 | 
 76 | 
 77 | @njit(fastmath=True, parallel=True)
 78 | def Potential_bruteforce_parallel(x, m, softening, G=1.0):
 79 |     """Returns the exact mutually-interacting gravitational potential for a set of particles with positions x and masses m, evaluated by brute force.
 80 | 
 81 |     Arguments:
 82 |     x -- shape (N,3) array of particle positions
 83 |     m -- shape (N,) array of particle masses
 84 |     softening -- shape (N,) array containing kernel support radii for gravitational softening
 85 | 
 86 |     Optional arguments:
 87 |     G -- gravitational constant (default 1.0)
 88 | 
 89 |     Returns:
 90 |     shape (N,) array containing potential values
 91 |     """
 92 |     potential = zeros_like(m)
 93 |     for i in prange(x.shape[0]):
 94 |         dx = zeros(3)
 95 |         for j in range(x.shape[0]):
 96 |             if i == j:
 97 |                 continue  # neglect self-potential
 98 |             for k in range(3):
 99 |                 dx[k] = x[i, k] - x[j, k]
100 |             r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2])
101 |             h = max(softening[i], softening[j])
102 |             if r < h:
103 |                 kernel = PotentialKernel(r, h)
104 |                 potential[i] += m[j] * kernel
105 |             elif r > 0:
106 |                 potential[i] -= m[j] / r
107 |     return G * potential
108 | 
109 | 
110 | @njit(fastmath=True)
111 | def Accel_bruteforce(x, m, softening, G=1.0):
112 |     """Returns the exact mutually-interacting gravitational accelerations of a set of particles.
113 | 
114 |     Arguments:
115 |     x -- shape (N,3) array of positions where the potential is to be evaluated
116 |     m -- shape (N,) array of particle masses
117 |     softening -- shape (N,) array of softening lengths
118 | 
119 |     Optional arguments:
120 |     G -- gravitational constant (default 1.0)
121 | 
122 |     Returns:
123 |     shape (N,3) array of gravitational accelerations
124 |     """
125 |     if softening is None:
126 |         softening = np.zeros_like(m)
127 |     accel = zeros_like(x)
128 |     dx = zeros(3)
129 |     for i in range(x.shape[0]):
130 |         for j in range(i + 1, x.shape[0]):
131 |             h = max(
132 |                 softening[i], softening[j]
133 |             )  # if there is overlap, we symmetrize the softenings to maintain momentum conservation
134 |             r2 = 0
135 |             for k in range(3):
136 |                 dx[k] = x[i, k] - x[j, k]
137 |                 r2 += dx[k] * dx[k]
138 |             if r2 == 0:
139 |                 continue
140 |             r = sqrt(r2)
141 | 
142 |             if r < h:
143 |                 kernel = ForceKernel(r, h)
144 |                 for k in range(3):
145 |                     accel[j, k] += kernel * m[i] * dx[k]
146 |                     accel[i, k] -= kernel * m[j] * dx[k]
147 |             else:
148 |                 fac = 1 / (r2 * r)
149 |                 for k in range(3):
150 |                     accel[j, k] += m[i] * fac * dx[k]
151 |                     accel[i, k] -= m[j] * fac * dx[k]
152 |     return G * accel
153 | 
154 | 
155 | @njit(fastmath=True, parallel=True)
156 | def Accel_bruteforce_parallel(x, m, softening, G=1.0):
157 |     """Returns the exact mutually-interacting gravitational accelerations of a set of particles.
158 | 
159 |     Arguments:
160 |     x -- shape (N,3) array of positions where the potential is to be evaluated
161 |     m -- shape (N,) array of particle masses
162 |     softening -- shape (N,) array of softening lengths
163 | 
164 |     Optional arguments:
165 |     G -- gravitational constant (default 1.0)
166 | 
167 |     Returns:
168 |     shape (N,3) array of gravitational accelerations
169 |     """
170 |     if softening is None:
171 |         softening = np.zeros_like(m)
172 |     accel = zeros_like(x)
173 |     for i in prange(x.shape[0]):
174 |         dx = zeros(3)
175 |         for j in range(x.shape[0]):
176 |             if i == j:
177 |                 continue
178 |             h = max(
179 |                 softening[i], softening[j]
180 |             )  # if there is overlap, we symmetrize the softenings to maintain momentum conservation
181 |             r2 = 0
182 |             for k in range(3):
183 |                 dx[k] = x[j, k] - x[i, k]
184 |                 r2 += dx[k] * dx[k]
185 |             if r2 == 0:
186 |                 continue
187 |             r = sqrt(r2)
188 | 
189 |             if r < h:
190 |                 kernel = ForceKernel(r, h)
191 |                 for k in range(3):
192 |                     accel[i, k] += kernel * m[j] * dx[k]
193 |             else:
194 |                 fac = 1 / (r2 * r)
195 |                 for k in range(3):
196 |                     accel[i, k] += m[j] * fac * dx[k]
197 |     return G * accel
198 | 
199 | 
200 | def AccelTarget_bruteforce(x_target, softening_target, x_source, m_source, softening_source, G=1.0):
201 |     """Returns the gravitational acceleration at a set of target positions, due to a set of source particles.
202 | 
203 |     Arguments:
204 |     x_target -- shape (N,3) array of positions where the field is to be evaluated
205 |     softening_target -- shape (N,) array of minimum softening lengths to be used
206 |     x_source -- shape (M,3) array of positions of gravitating particles
207 |     m_source -- shape (M,) array of particle masses
208 |     softening_source -- shape (M,) array of softening lengths
209 | 
210 |     Optional arguments:
211 |     G -- gravitational constant (default 1.0)
212 | 
213 |     Returns:
214 |     shape (N,3) array of gravitational accelerations
215 |     """
216 |     accel = zeros_like(x_target)
217 |     for i in prange(x_target.shape[0]):
218 |         dx = zeros(3)
219 |         for j in range(x_source.shape[0]):
220 |             h = max(
221 |                 softening_target[i], softening_source[j]
222 |             )  # if there is overlap, we symmetrize the softenings to maintain momentum conservation
223 |             r2 = 0
224 |             for k in range(3):
225 |                 dx[k] = x_source[j, k] - x_target[i, k]
226 |                 r2 += dx[k] * dx[k]
227 |             if r2 == 0:
228 |                 continue  # no force if at the origin
229 |             r = sqrt(r2)
230 | 
231 |             if r < h:
232 |                 kernel = ForceKernel(r, h)
233 |                 for k in range(3):
234 |                     accel[i, k] += kernel * m_source[j] * dx[k]
235 |             else:
236 |                 fac = 1 / (r2 * r)
237 |                 for k in range(3):
238 |                     accel[i, k] += m_source[j] * fac * dx[k]
239 |     return G * accel
240 | 
241 | 
242 | AccelTarget_bruteforce_parallel = njit(AccelTarget_bruteforce, fastmath=True, parallel=True)
243 | AccelTarget_bruteforce = njit(AccelTarget_bruteforce, fastmath=True)
244 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![PyPI](https://img.shields.io/pypi/v/pytreegrav)](https://pypi.org/project/pytreegrav)[![Documentation Status](https://readthedocs.org/projects/pytreegrav/badge/?version=latest)](https://pytreegrav.readthedocs.io/en/latest/?badge=latest)
  2 | 
  3 | # Introduction
  4 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation, without writing a single line of C or Cython. Full documentation is available [here](http://pytreegrav.readthedocs.io).
  5 | 
  6 | # Installation
  7 | 
  8 | ```pip install pytreegrav``` or clone the repo and run ```python setup.py install``` from the repo directory.
  9 | 
 10 | # Walkthrough
 11 | First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is.
 12 | 
 13 | 
 14 | ```python
 15 | import numpy as np
 16 | from pytreegrav import Accel, Potential
 17 | ```
 18 | 
 19 | 
 20 | ```python
 21 | N = 10**5 # number of particles
 22 | x = np.random.rand(N,3) # positions randomly sampled in the unit cube
 23 | m = np.repeat(1./N,N) # masses - let the system have unit mass
 24 | h = np.repeat(0.01,N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions
 25 | ```
 26 | 
 27 | Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential at each particle position:
 28 | 
 29 | 
 30 | ```python
 31 | print(Accel(x,m,h))
 32 | print(Potential(x,m,h))
 33 | ```
 34 | 
 35 |     [[-0.1521787   0.2958852  -0.30109005]
 36 |      [-0.50678204 -0.37489886 -1.0558666 ]
 37 |      [-0.24650087  0.95423467 -0.175074  ]
 38 |      ...
 39 |      [ 0.87868472 -1.28332176 -0.22718531]
 40 |      [-0.41962742  0.32372245 -1.31829084]
 41 |      [ 2.45127054  0.38292881  0.05820412]]
 42 |     [-2.35518057 -2.19299372 -2.28494218 ... -2.11783337 -2.1653377
 43 |      -1.80464695]
 44 | 
 45 | 
 46 | By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes:
 47 | 
 48 | 
 49 | ```python
 50 | from time import time
 51 | t = time()
 52 | # tree gravitational acceleration
 53 | accel_tree = Accel(x,m,h,method='tree')
 54 | print("Tree accel runtime: %gs"%(time() - t)); t = time()
 55 | 
 56 | accel_bruteforce = Accel(x,m,h,method='bruteforce')
 57 | print("Brute force accel runtime: %gs"%(time() - t)); t = time()
 58 | 
 59 | phi_tree = Potential(x,m,h,method='tree')
 60 | print("Tree potential runtime: %gs"%(time() - t)); t = time()
 61 | 
 62 | phi_bruteforce = Potential(x,m,h,method='bruteforce')
 63 | print("Brute force potential runtime: %gs"%(time() - t)); t = time()
 64 | ```
 65 | 
 66 |     Tree accel runtime: 0.927745s
 67 |     Brute force accel runtime: 44.1175s
 68 |     Tree potential runtime: 0.802386s
 69 |     Brute force potential runtime: 20.0234s
 70 | 
 71 | 
 72 | As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding 10^4. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation:
 73 | ![Benchmark](images/CPU_Time_serial.png)
 74 | 
 75 | 
 76 | But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions:
 77 | 
 78 | 
 79 | ```python
 80 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error
 81 | print("RMS force error: ", acc_error)
 82 | phi_error = np.std(phi_tree - phi_bruteforce)
 83 | print("RMS potential error: ", phi_error)
 84 | ```
 85 | 
 86 |     RMS force error:  0.006739311224338851
 87 |     RMS potential error:  0.0003888328578588027
 88 | 
 89 | 
 90 | The above errors are typical for default settings: ~1% force error and ~0.1\% potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle ``theta``, set to 0.7 by default. Smaller ``theta`` gives higher accuracy, but also runs slower:
 91 | 
 92 | 
 93 | ```python
 94 | thetas = 0.1,0.2,0.4,0.8 # different thetas to try
 95 | for theta in thetas:
 96 |     t = time()    
 97 |     accel_tree = Accel(x,m,h,method='tree',theta=theta)
 98 |     acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1)))
 99 |     print("theta=%g Runtime: %gs RMS force error: %g"%(theta, time()-t, acc_error))
100 | ```
101 | 
102 |     theta=0.1 Runtime: 63.1738s RMS force error: 3.78978e-05
103 |     theta=0.2 Runtime: 14.3356s RMS force error: 0.000258755
104 |     theta=0.4 Runtime: 2.91292s RMS force error: 0.00148698
105 |     theta=0.8 Runtime: 0.724668s RMS force error: 0.0105937
106 | 
107 | 
108 | Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number:
109 | 
110 | 
111 | ```python
112 | from time import time
113 | t = time()
114 | # tree gravitational acceleration
115 | accel_tree = Accel(x,m,h,method='tree',parallel=True)
116 | print("Tree accel runtime in parallel: %gs"%(time() - t)); t = time()
117 | 
118 | accel_bruteforce = Accel(x,m,h,method='bruteforce',parallel=True)
119 | print("Brute force accel runtime in parallel: %gs"%(time() - t)); t = time()
120 | 
121 | phi_tree = Potential(x,m,h,method='tree',parallel=True)
122 | print("Tree potential runtime in parallel: %gs"%(time() - t)); t = time()
123 | 
124 | phi_bruteforce = Potential(x,m,h,method='bruteforce',parallel=True)
125 | print("Brute force potential runtime in parallel: %gs"%(time() - t)); t = time()
126 | ```
127 | 
128 |     Tree accel runtime in parallel: 0.222271s
129 |     Brute force accel runtime in parallel: 7.25576s
130 |     Tree potential runtime in parallel: 0.181393s
131 |     Brute force potential runtime in parallel: 5.72611s
132 | 
133 | 
134 | # What if I want to evaluate the fields at different points than where the particles are?
135 | 
136 | We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver):
137 | 
138 | 
139 | ```python
140 | from pytreegrav import AccelTarget, PotentialTarget
141 | 
142 | # generate a separate set of "target" positions where we want to know the potential and field
143 | N_target = 10**4
144 | x_target = np.random.rand(N_target,3)
145 | h_target = np.repeat(0.01,N_target) # optional "target" softening: this sets a floor on the softening length of all forces/potentials computed
146 | 
147 | accel_tree = AccelTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles
148 | accel_bruteforce = AccelTarget(x_target,x,m,h_source=h,method='bruteforce')
149 | 
150 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error
151 | print("RMS force error: ", acc_error)
152 | 
153 | phi_tree = PotentialTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles
154 | phi_bruteforce = PotentialTarget(x_target,x,m,h_target=h_target, h_source=h,method='bruteforce')
155 | 
156 | phi_error = np.std(phi_tree - phi_bruteforce)
157 | print("RMS potential error: ", phi_error)
158 | ```
159 | 
160 |     RMS force error:  0.006719983300560105
161 |     RMS potential error:  0.0003873676304955059
162 | 
163 | # Ray-tracing
164 | 
165 | pytreegrav's octree implementation can be used for efficient tree-based searches for ray-tracing of unstructured data. Currently implemented is the method ``ColumnDensity``, which calculates the integral of the density field to infinity along a grid of rays originating at each particle (defaulting to 6 rays). For example:
166 | 
167 | ```python
168 | columns = ColumnDensity(x, m, h, parallel=True) # shape (N,6) array of column densities in 6 angular bins - this is fastest but least accurate
169 | columns_10 = ColumnDensity(x, m, h, rays=10, parallel=True) # shape (N, 10) array column densities along 10 random rays
170 | columns_random = ColumnDensity(x, m, h, randomize_rays=True, parallel=True) # can randomize the ray grid for each particle so that there are no correlated errors due to the angular discretization
171 | columns_custom = ColumnDensity(x, m, h, rays=np.random.normal(size=(100,3)), parallel=True)  # can also pass an arbitrary set of rays for the raygrid; these need not be normalized
172 | κ = 0.02 # example opacity, in code units
173 | σ = m * κ # total cross-section in each particle is product of mass and opacity
174 | 𝛕 = ColumnDensity(x, σ, h, parallel=True) # can pass cross-section instead of mass to get optical depth
175 | 𝛕_eff = -np.log(np.exp(-𝛕.clip(-300,300)).mean(axis=1)) # effective optical depth that would give the same radiation flux from a background; note clipping because overflow is not uncommon here
176 | Σ_eff = 𝛕_eff / κ # effective column density *for this opacity* in code mass/code length^2
177 | NH_eff = Σ_eff X_H / m_p  # column density in H nuclei code length^-2
178 | ```
179 | 
180 | # Community
181 | 
182 | This code is actively developed and maintained by Mike Grudic.
183 | 
184 | If you would like help using pytreegrav, please ask a question on our [Discussions](https://github.com/mikegrudic/pytreegrav/discussions) page.
185 | 
186 | If you have found a bug or an issue using pytreegrav, please open an [issue](https://github.com/mikegrudic/pytreegrav/issues).
187 | 


--------------------------------------------------------------------------------
/src/pytreegrav/octree.py:
--------------------------------------------------------------------------------
  1 | """Implementation of the Octree jitclass"""
  2 | 
  3 | import numpy as np
  4 | from numpy import zeros, ones, concatenate
  5 | from numba import float64, boolean, int64, njit
  6 | from numba.experimental import jitclass
  7 | 
  8 | spec = [
  9 |     ("Sizes", float64[:]),  # side length of tree nodes
 10 |     ("Deltas", float64[:]),  # distance between COM and geometric center of node
 11 |     # location of center of mass of node (actually stores _geometric_ center before we do the moments pass)
 12 |     ("Coordinates", float64[:, :]),
 13 |     ("Masses", float64[:]),  # total mass of node
 14 |     ("Quadrupoles", float64[:, :, :]),  # Quadrupole moment of the node
 15 |     # Allow us to quickly check if Quadrupole moments exist to keep monopole calculations fast
 16 |     ("HasQuads", boolean),
 17 |     ("NumParticles", int64),  # number of particles in the tree
 18 |     ("NumNodes", int64),  # number of particles + nodes (i.e. mass elements) in the tree
 19 |     # individual softenings for particles, _maximum_ softening of inhabitant particles for nodes
 20 |     ("Softenings", float64[:]),
 21 |     ("NextBranch", int64[:]),
 22 |     ("FirstSubnode", int64[:]),
 23 |     ("TreewalkIndices", int64[:]),
 24 | ]
 25 | 
 26 | 
 27 | octant_offsets = 0.25 * np.array(
 28 |     [
 29 |         [-1, -1, -1],
 30 |         [1, -1, -1],
 31 |         [-1, 1, -1],
 32 |         [1, 1, -1],
 33 |         [-1, -1, 1],
 34 |         [1, -1, 1],
 35 |         [-1, 1, 1],
 36 |         [1, 1, 1],
 37 |     ]
 38 | )
 39 | 
 40 | 
 41 | @jitclass(spec)
 42 | class Octree:
 43 |     """Octree implementation."""
 44 | 
 45 |     def __init__(
 46 |         self,
 47 |         points,
 48 |         masses,
 49 |         softening,
 50 |         morton_order=True,
 51 |         quadrupole=False,
 52 |         compute_moments=True,
 53 |     ):
 54 |         self.NumNodes = 0
 55 |         self.TreewalkIndices = -ones(points.shape[0], dtype=np.int64)
 56 |         self.HasQuads = quadrupole
 57 |         # first provisional treebuild to get the ordering right
 58 |         children = self.BuildTree(points, masses, softening)
 59 |         # set up the order of the treewalk
 60 |         SetupTreewalk(self, self.NumParticles, children)
 61 |         self.GetWalkIndices()  # get the Morton ordering of the points
 62 | 
 63 |         # if enabled, we rebuild the tree in Morton order (the order that points are visited in the depth-first traversal)
 64 |         if morton_order:
 65 |             children = self.BuildTree(
 66 |                 points[self.TreewalkIndices],
 67 |                 np.take(masses, self.TreewalkIndices),
 68 |                 np.take(softening, self.TreewalkIndices),
 69 |             )  # now re-build the tree with everything in order
 70 |             # re-do the treewalk order with the new indices
 71 |             SetupTreewalk(self, self.NumParticles, children)
 72 | 
 73 |         if compute_moments:
 74 |             # compute centers of mass, etc.
 75 |             ComputeMoments(self, self.NumParticles, children)
 76 | 
 77 |     def BuildTree(self, points, masses, softening):
 78 |         # initialize random seed in case of non-unique positions
 79 |         np.random.seed(42)
 80 | 
 81 |         self.Initialize(len(points), self.NumNodes)
 82 | 
 83 |         # set the properties of the root node
 84 |         self.Sizes[self.NumParticles] = max(
 85 |             points[:, 0].max() - points[:, 0].min(),
 86 |             points[:, 1].max() - points[:, 1].min(),
 87 |             points[:, 2].max() - points[:, 2].min(),
 88 |         )
 89 |         for dim in range(3):
 90 |             self.Coordinates[self.NumParticles, dim] = 0.5 * (points[:, dim].max() + points[:, dim].min())
 91 | 
 92 |         # set values for particles
 93 |         self.Coordinates[: self.NumParticles] = points
 94 |         self.Masses[: self.NumParticles] = masses
 95 |         self.Softenings[: self.NumParticles] = softening
 96 |         children = -ones((self.NumNodes, 8), dtype=np.int64)
 97 |         new_node_idx = self.NumParticles + 1
 98 |         # now we insert particles into the tree one at a time, setting up child pointers and initializing node properties as we go
 99 |         for i in range(self.NumParticles):
100 |             pos = points[i]
101 | 
102 |             no = self.NumParticles  # walk the tree, starting at the root
103 |             while no > -1:
104 |                 # first make sure we have enough storage
105 |                 while new_node_idx + 1 > self.NumNodes:
106 |                     size_increase = increase_tree_size(self)
107 |                     children = concatenate((children, -ones((size_increase, 8), dtype=np.int64)))
108 | 
109 |                 octant = 0  # the index of the octant that the present point lives in
110 |                 for dim in range(3):
111 |                     if pos[dim] > self.Coordinates[no, dim]:
112 |                         octant += 1 << dim
113 |                 # check if there is a pre-existing node among the present node's children
114 |                 child_candidate = children[no, octant]
115 |                 if child_candidate > -1:
116 |                     # it exists, now check if it's a node or a particle
117 |                     if child_candidate < self.NumParticles:
118 |                         # it's a particle - we have to create a new node of index new_node_idx containing the 2 points we've got, and point the pre-existing particle to the new particle
119 |                         # EXCEPTION: if the pre-existing particle is at the same coordinate, we will perturb the position of the new particle slightly and start over
120 |                         same_coord = True
121 |                         for k in range(3):
122 |                             if self.Coordinates[i, k] != self.Coordinates[child_candidate, k]:
123 |                                 same_coord = False
124 |                         if same_coord:
125 |                             self.Coordinates[i] *= np.exp(3e-16 * (np.random.rand(3) - 0.5))  # random perturbation
126 |                             points[i] = self.Coordinates[i]
127 |                             no = self.NumParticles  # restart the tree traversal
128 |                             continue
129 |                         # end exception
130 | 
131 |                         children[no, octant] = new_node_idx
132 |                         # set the center of the new node
133 |                         self.Coordinates[new_node_idx] = self.Coordinates[no] + self.Sizes[no] * octant_offsets[octant]
134 |                         # set the size of the new node
135 |                         self.Sizes[new_node_idx] = self.Sizes[no] / 2
136 |                         new_octant = 0
137 |                         for dim in range(3):
138 |                             if self.Coordinates[child_candidate, dim] > self.Coordinates[new_node_idx, dim]:
139 |                                 # get the octant of the new node that pre-existing particle lives in
140 |                                 new_octant += 1 << dim
141 |                         # set the pre-existing particle as a child of the new node
142 |                         children[new_node_idx, new_octant] = child_candidate
143 |                         no = new_node_idx
144 |                         new_node_idx += 1
145 |                         continue  # restart the loop looking at the new node
146 |                     else:  # if the child is an existing node, go to that one and start the loop anew
147 |                         no = children[no, octant]
148 |                         continue
149 |                 else:  # if the child does not exist, we let this point be that child (inserting it in the tree) and we're done with this point
150 |                     children[no, octant] = i
151 |                     no = -1
152 |         return children
153 | 
154 |     def GetWalkIndices(self):  # gets the ordering of the particles in the treewalk
155 |         index = 0
156 |         no = self.NumParticles
157 |         while no > -1:
158 |             if no < self.NumParticles:
159 |                 self.TreewalkIndices[index] = no
160 |                 index += 1
161 |                 no = self.NextBranch[no]
162 |             else:
163 |                 no = self.FirstSubnode[no]
164 | 
165 |     def Initialize(self, Npart, NumNodes):
166 |         """Allocate all attribute arrays and initialize"""
167 |         self.NumParticles = Npart
168 |         # this is the number of elements in the tree, whether nodes or particles. can make this smaller but this has a safety factor
169 |         if NumNodes:
170 |             self.NumNodes = NumNodes
171 |         else:
172 |             # initial guess for storage needed; can always increase if needed
173 |             self.NumNodes = int(1.5 * Npart + 1)
174 |         self.Sizes = zeros(self.NumNodes)
175 |         self.Deltas = zeros(self.NumNodes)
176 |         self.Masses = zeros(self.NumNodes)
177 |         # No need to initialize this beyond zero, all n>0 moments are 0 for a single particle
178 |         if self.HasQuads:
179 |             self.Quadrupoles = zeros((self.NumNodes, 3, 3))
180 |         self.Softenings = zeros(self.NumNodes)
181 |         self.Coordinates = zeros((self.NumNodes, 3))
182 |         self.Deltas = zeros(self.NumNodes)
183 |         self.NextBranch = -ones(self.NumNodes, dtype=np.int64)
184 |         self.FirstSubnode = -ones(self.NumNodes, dtype=np.int64)
185 | 
186 | 
187 | @njit
188 | def ComputeMoments(tree, no, children):
189 |     """Does a recursive pass through the tree and computes centers of mass, total mass, max softening, and distance between geometric center and COM"""
190 |     quad = zeros((3, 3))
191 |     if no < tree.NumParticles:  # if this is a particle, just return the properties
192 |         return tree.Softenings[no], tree.Masses[no], quad, tree.Coordinates[no]
193 |     else:
194 |         m = 0
195 |         com = zeros(3)
196 |         hmax = 0
197 |         for c in children[no]:
198 |             if c > -1:
199 |                 hi, mi, quadi, comi = ComputeMoments(tree, c, children)
200 |                 m += mi
201 |                 com += mi * comi
202 |                 hmax = max(hi, hmax)
203 |         tree.Masses[no] = m
204 |         com = com / m
205 |         if tree.HasQuads:
206 |             for c in children[no]:
207 |                 if c > -1:
208 |                     comi = tree.Coordinates[c]
209 |                     quadi = tree.Quadrupoles[c]
210 |                     ri = comi - com
211 |                     r2 = 0
212 |                     for k in range(3):
213 |                         r2 += ri[k] * ri[k]
214 |                     for k in range(3):
215 |                         for l in range(3):
216 |                             quad[k, l] += quadi[k, l] + mi * 3 * ri[k] * ri[l]
217 |                             if k == l:
218 |                                 quad[k, l] -= (
219 |                                     mi * r2
220 |                                 )  # Calculate the quadrupole moment based on the moments of the subcells
221 |             tree.Quadrupoles[no] = quad
222 |         delta = 0
223 |         for dim in range(3):
224 |             dx = com[dim] - tree.Coordinates[no, dim]
225 |             delta += dx * dx
226 |         tree.Deltas[no] = np.sqrt(delta)
227 |         tree.Coordinates[no] = com
228 |         tree.Softenings[no] = hmax
229 |         return hmax, m, quad, com
230 | 
231 | 
232 | @njit
233 | def SetupTreewalk(tree, no, children):
234 |     if no < tree.NumParticles:
235 |         return  # leaf nodes are handled from above
236 |     last_node = -1
237 |     for c in children[no]:
238 |         if c < 0:
239 |             continue
240 |         # if we haven't yet set current node's next node, do so
241 |         if tree.FirstSubnode[no] < 0:
242 |             tree.FirstSubnode[no] = c
243 |         # set this up to point to the next "branch" of the tree to look at if we sum the force for the current branch
244 |         if last_node > -1:
245 |             tree.NextBranch[last_node] = c
246 |         last_node = c
247 | 
248 |     # need to deal with the last child: must link it up to the sibling of the present node
249 |     tree.NextBranch[last_node] = tree.NextBranch[no]
250 | 
251 |     for c in children[no]:
252 |         if c >= tree.NumParticles:  # if we have a node, call routine recursively
253 |             SetupTreewalk(tree, c, children)
254 | 
255 | 
256 | @njit
257 | def increase_tree_size(tree, fac=1.2):
258 |     """Reallocate the tree data with storage increased by factor fac"""
259 |     old_size = tree.NumNodes
260 |     size_increase = max(int(old_size * fac + 1) - old_size, 1)
261 |     #    print("Increasing size of node list by ", size_increase)  # by %g" % fac)
262 | 
263 |     tree.Sizes = concatenate((tree.Sizes, zeros(size_increase)))
264 |     tree.Deltas = concatenate((tree.Deltas, zeros(size_increase)))
265 |     tree.Masses = concatenate((tree.Masses, zeros(size_increase)))
266 |     tree.Softenings = concatenate((tree.Softenings, zeros(size_increase)))
267 |     tree.NextBranch = concatenate((tree.NextBranch, -ones(size_increase, dtype=np.int64)))
268 |     tree.FirstSubnode = concatenate((tree.FirstSubnode, -ones(size_increase, dtype=np.int64)))
269 |     tree.Coordinates = concatenate((tree.Coordinates, zeros((size_increase, 3))))
270 |     if tree.HasQuads:
271 |         tree.Quadrupoles = concatenate((tree.Quadrupoles, zeros((size_increase, 3, 3))))
272 |     tree.NumNodes += size_increase
273 | 
274 |     return size_increase
275 | 


--------------------------------------------------------------------------------
/src/pytreegrav/dynamic_tree.py:
--------------------------------------------------------------------------------
  1 | from numba import (
  2 |     int32,
  3 |     deferred_type,
  4 |     optional,
  5 |     float64,
  6 |     boolean,
  7 |     int64,
  8 |     njit,
  9 |     jit,
 10 |     prange,
 11 |     types,
 12 | )
 13 | from numba.experimental import jitclass
 14 | import numpy as np
 15 | from numpy import empty, empty_like, zeros, zeros_like, sqrt, ones
 16 | 
 17 | spec = [
 18 |     ("Sizes", float64[:]),  # side length of tree nodes
 19 |     ("Deltas", float64[:]),  # distance between COM and geometric center of node
 20 |     (
 21 |         "Coordinates",
 22 |         float64[:, :],
 23 |     ),  # location of center of mass of node (actually stores _geometric_ center before we do the moments pass)
 24 |     ("Velocities", float64[:, :]),  # velocity of the center of mass of node
 25 |     ("VelocityDisp", float64[:]),  # center-of-mass velocity dispersion
 26 |     ("Masses", float64[:]),  # total mass of node
 27 |     ("Quadrupoles", float64[:, :, :]),  # Quadrupole moment of the node
 28 |     (
 29 |         "HasQuads",
 30 |         boolean,
 31 |     ),  # Allow us to quickly check if Quadrupole moments exist to keep monopole calculations fast
 32 |     ("NumParticles", int64),  # number of particles in the tree
 33 |     ("NumNodes", int64),  # number of particles + nodes (i.e. mass elements) in the tree
 34 |     (
 35 |         "Softenings",
 36 |         float64[:],
 37 |     ),  # individual softenings for particles, _maximum_ softening of inhabitant particles for nodes
 38 |     ("NextBranch", int64[:]),
 39 |     ("FirstSubnode", int64[:]),
 40 |     ("TreewalkIndices", int64[:]),
 41 |     # ('children',int64[:,:]) # indices of child nodes
 42 | ]
 43 | 
 44 | 
 45 | octant_offsets = 0.25 * np.array(
 46 |     [
 47 |         [-1, -1, -1],
 48 |         [1, -1, -1],
 49 |         [-1, 1, -1],
 50 |         [1, 1, -1],
 51 |         [-1, -1, 1],
 52 |         [1, -1, 1],
 53 |         [-1, 1, 1],
 54 |         [1, 1, 1],
 55 |     ]
 56 | )
 57 | 
 58 | 
 59 | @jitclass(spec)
 60 | class DynamicOctree(object):
 61 |     """Octree implementation that stores node velocities for correlation functions and dynamic updates."""
 62 | 
 63 |     def __init__(self, points, masses, softening, vels, morton_order=True, quadrupole=False):
 64 |         self.TreewalkIndices = -ones(points.shape[0], dtype=np.int64)
 65 |         self.HasQuads = quadrupole
 66 |         children = self.BuildTree(
 67 |             points, masses, softening, vels
 68 |         )  # first provisional treebuild to get the ordering right
 69 |         SetupTreewalk(self, self.NumParticles, children)  # set up the order of the treewalk
 70 |         ComputeMomentsDynamic(self, self.NumParticles, children)  # compute centers of mass, etc.
 71 |         self.GetWalkIndices()  # get the Morton ordering of the points
 72 | 
 73 |         if (
 74 |             morton_order
 75 |         ):  # if enabled, we rebuild the tree in Morton order (the order that points are visited in the depth-first traversal)
 76 |             children = self.BuildTree(
 77 |                 points[self.TreewalkIndices],
 78 |                 np.take(masses, self.TreewalkIndices),
 79 |                 np.take(softening, self.TreewalkIndices),
 80 |                 vels[self.TreewalkIndices],
 81 |             )  # now re-build the tree with everything in order
 82 |             SetupTreewalk(self, self.NumParticles, children)  # re-do the treewalk order with the new indices
 83 | 
 84 |         ComputeMomentsDynamic(self, self.NumParticles, children)  # compute centers of mass, etc.
 85 | 
 86 |     def BuildTree(self, points, masses, softening, vels):
 87 |         # initialize all attributes
 88 |         self.NumParticles = points.shape[0]
 89 |         self.NumNodes = (
 90 |             2 * self.NumParticles
 91 |         )  # this is the number of elements in the tree, whether nodes or particles. can make this smaller but this has a safety factor
 92 |         self.Sizes = zeros(self.NumNodes)
 93 |         self.Deltas = zeros(self.NumNodes)
 94 |         self.Masses = zeros(self.NumNodes)
 95 |         if self.HasQuads:
 96 |             self.Quadrupoles = zeros(
 97 |                 (self.NumNodes, 3, 3)
 98 |             )  # No need to initialize this beyond zero, all n>0 moments are 0 for a single particle
 99 |         self.Softenings = zeros(self.NumNodes)
100 |         self.Coordinates = zeros((self.NumNodes, 3))
101 |         self.Velocities = zeros((self.NumNodes, 3))
102 |         self.VelocityDisp = zeros(self.NumNodes)
103 |         self.Deltas = zeros(self.NumNodes)
104 |         self.NextBranch = -ones(self.NumNodes, dtype=np.int64)
105 |         self.FirstSubnode = -ones(self.NumNodes, dtype=np.int64)
106 |         #        self.ParentNode = -ones(self.NumNodes, dtype=np.int64)
107 | 
108 |         # set the properties of the root node
109 |         self.Sizes[self.NumParticles] = max(
110 |             points[:, 0].max() - points[:, 0].min(),
111 |             points[:, 1].max() - points[:, 1].min(),
112 |             points[:, 2].max() - points[:, 2].min(),
113 |         )
114 |         for dim in range(3):
115 |             self.Coordinates[self.NumParticles, dim] = 0.5 * (points[:, dim].max() + points[:, dim].min())
116 | 
117 |         # set values for particles
118 |         self.Coordinates[: self.NumParticles] = points
119 |         self.Velocities[: self.NumParticles] = vels
120 |         self.Masses[: self.NumParticles] = masses
121 |         self.Softenings[: self.NumParticles] = softening
122 |         children = -ones((self.NumNodes, 8), dtype=np.int64)
123 |         new_node_idx = self.NumParticles + 1
124 | 
125 |         # now we insert particles into the tree one at a time, setting up child pointers and initializing node properties as we go
126 |         for i in range(self.NumParticles):
127 |             pos = points[i]
128 | 
129 |             no = self.NumParticles  # walk the tree, starting at the root
130 |             while no > -1:
131 |                 octant = 0  # the index of the octant that the present point lives in
132 |                 for dim in range(3):
133 |                     if pos[dim] > self.Coordinates[no, dim]:
134 |                         octant += 1 << dim
135 | 
136 |                 # check if there is a pre-existing node among the present node's children
137 |                 child_candidate = children[no, octant]
138 |                 if child_candidate > -1:  # it exists, now check if it's a node or a particle
139 |                     if (
140 |                         child_candidate < self.NumParticles
141 |                     ):  # it's a particle - we have to create a new node of index new_node_idx containing the 2 points we've got, and point the pre-existing particle to the new particle
142 |                         # EXCEPTION: if the pre-existing particle is at the same coordinate, we will perturb the position of the new particle slightly and start over
143 |                         same_coord = True
144 |                         for k in range(3):
145 |                             if self.Coordinates[i, k] != self.Coordinates[child_candidate, k]:
146 |                                 same_coord = False
147 |                         if same_coord:
148 |                             self.Coordinates[i] *= np.exp(3e-16 * (np.random.rand(3) - 0.5))  # random perturbation
149 |                             points[i] = self.Coordinates[i]
150 |                             no = self.NumParticles  # restart the tree traversal
151 |                             continue
152 |                         # end exception
153 | 
154 |                         children[no, octant] = new_node_idx
155 |                         self.Coordinates[new_node_idx] = (
156 |                             self.Coordinates[no] + self.Sizes[no] * octant_offsets[octant]
157 |                         )  # set the center of the new node
158 |                         self.Sizes[new_node_idx] = self.Sizes[no] / 2  # set the size of the new node
159 |                         new_octant = 0
160 |                         for dim in range(3):
161 |                             if self.Coordinates[child_candidate, dim] > self.Coordinates[new_node_idx, dim]:
162 |                                 new_octant += (
163 |                                     1 << dim
164 |                                 )  # get the octant of the new node that pre-existing particle lives in
165 |                         children[new_node_idx, new_octant] = (
166 |                             child_candidate  # set the pre-existing particle as a child of the new node
167 |                         )
168 |                         no = new_node_idx
169 |                         new_node_idx += 1
170 |                         continue  # restart the loop looking at the new node
171 |                     else:  # if the child is an existing node, go to that one and start the loop anew
172 |                         no = children[no, octant]
173 |                         continue
174 |                 else:  # if the child does not exist, we let this point be that child (inserting it in the tree) and we're done with this point
175 |                     children[no, octant] = i
176 |                     no = -1
177 |         return children
178 | 
179 |     def ReorderTree(self):
180 |         no = self.NumParticles
181 | 
182 |     def GetWalkIndices(self):  # gets the ordering of the particles in the treewalk
183 |         index = 0
184 |         node_index = 0
185 |         no = self.NumParticles
186 |         while no > -1:
187 |             if no < self.NumParticles:
188 |                 self.TreewalkIndices[index] = no
189 |                 index += 1
190 |                 no = self.NextBranch[no]
191 |             else:
192 |                 no = self.FirstSubnode[no]
193 | 
194 | 
195 | @njit
196 | def ComputeMomentsDynamic(
197 |     tree, no, children
198 | ):  # does a recursive pass through the tree and computes centers of mass, total mass, max softening, and distance between geometric center and COM
199 |     quad = zeros((3, 3))
200 |     if no < tree.NumParticles:  # if this is a particle, just return the properties
201 |         return (
202 |             tree.Softenings[no],
203 |             tree.Masses[no],
204 |             quad,
205 |             tree.Coordinates[no],
206 |             tree.Velocities[no],
207 |             0,
208 |         )
209 |     else:
210 |         m = 0
211 |         com = zeros(3)
212 |         vel = zeros(3)
213 |         vdisp = 0
214 |         hmax = 0
215 |         for c in children[no]:
216 |             if c > -1:
217 |                 hi, mi, quadi, comi, veli, vdispi = ComputeMomentsDynamic(tree, c, children)
218 |                 m += mi
219 |                 com += mi * comi
220 |                 vel += mi * veli
221 |                 vdisp += mi * vdispi
222 |                 hmax = max(hi, hmax)
223 |         tree.Masses[no] = m
224 |         com = com / m
225 |         vel = vel / m
226 |         #        vdisp = vdisp/m
227 |         for c in children[no]:
228 |             if c > -1:
229 |                 dv = tree.Velocities[c] - vel
230 |                 vdisp += tree.Masses[c] * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2])
231 |         vdisp = vdisp / m
232 |         if tree.HasQuads:
233 |             for c in children[no]:
234 |                 if c > -1:
235 |                     comi = tree.Coordinates[c]
236 |                     quadi = tree.Quadrupoles[c]
237 |                     ri = comi - com
238 |                     r2 = 0
239 |                     for k in range(3):
240 |                         r2 += ri[k] * ri[k]
241 |                     for k in range(3):
242 |                         for l in range(3):
243 |                             quad[k, l] += quadi[k, l] + mi * 3 * ri[k] * ri[l]
244 |                             if k == l:
245 |                                 quad[k, l] -= (
246 |                                     mi * r2
247 |                                 )  # Calculate the quadrupole moment based on the moments of the subcells
248 |             tree.Quadrupoles[no] = quad
249 |         delta = 0
250 |         for dim in range(3):
251 |             dx = com[dim] - tree.Coordinates[no, dim]
252 |             delta += dx * dx
253 |         tree.Deltas[no] = np.sqrt(delta)
254 |         tree.Coordinates[no] = com
255 |         tree.Softenings[no] = hmax
256 |         tree.Velocities[no] = vel
257 |         tree.VelocityDisp[no] = vdisp
258 |         return hmax, m, quad, com, vel, vdisp
259 | 
260 | 
261 | @njit
262 | def SetupTreewalk(tree, no, children):
263 |     # print(no)
264 |     if no < tree.NumParticles:
265 |         return  # leaf nodes are handled from above
266 |     last_node = -1
267 |     last_child = -1
268 |     for c in children[no]:
269 |         if c < 0:
270 |             continue
271 |         #        tree.ParentNode[c] = no
272 |         if tree.FirstSubnode[no] < 0:
273 |             tree.FirstSubnode[no] = c  # if we haven't yet set current node's next node, do so
274 | 
275 |         if last_node > -1:
276 |             tree.NextBranch[last_node] = (
277 |                 c  # set this up to point to the next "branch" of the tree to look at if we sum the force for the current branch
278 |             )
279 |         last_node = c
280 | 
281 |     # need to deal with the last child: must link it up to the sibling of the present node
282 |     tree.NextBranch[last_node] = tree.NextBranch[no]
283 | 
284 |     for c in children[no]:
285 |         if c >= tree.NumParticles:  # if we have a node, call routine recursively
286 |             SetupTreewalk(tree, c, children)
287 | 


--------------------------------------------------------------------------------
/paper.bib:
--------------------------------------------------------------------------------
  1 | @inproceedings{numba,
  2 | author = {Lam, Siu Kwan and Pitrou, Antoine and Seibert, Stanley},
  3 | title = {Numba: A LLVM-Based Python JIT Compiler},
  4 | year = {2015},
  5 | isbn = {9781450340052},
  6 | publisher = {Association for Computing Machinery},
  7 | address = {New York, NY, USA},
  8 | url = {https://doi.org/10.1145/2833157.2833162},
  9 | doi = {10.1145/2833157.2833162},
 10 | abstract = {Dynamic, interpreted languages, like Python, are attractive for domain-experts and scientists experimenting with new ideas. However, the performance of the interpreter is often a barrier when scaling to larger data sets. This paper presents a just-in-time compiler for Python that focuses in scientific and array-oriented computing. Starting with the simple syntax of Python, Numba compiles a subset of the language into efficient machine code that is comparable in performance to a traditional compiled language. In addition, we share our experience in building a JIT compiler using LLVM[1].},
 11 | booktitle = {Proceedings of the Second Workshop on the LLVM Compiler Infrastructure in HPC},
 12 | articleno = {7},
 13 | numpages = {6},
 14 | keywords = {compiler, Python, LLVM},
 15 | location = {Austin, Texas},
 16 | series = {LLVM '15}
 17 | }
 18 | 
 19 |   
 20 | 
 21 | @ARTICLE{dubinski,
 22 |        author = {{Dubinski}, John},
 23 |         title = "{A parallel tree code}",
 24 |       journal = {New Astronomy},
 25 |      keywords = {Astrophysics},
 26 |          year = 1996,
 27 |         month = oct,
 28 |        volume = {1},
 29 |        number = {2},
 30 |         pages = {133-147},
 31 |           doi = {10.1016/S1384-1076(96)00009-7},
 32 | archivePrefix = {arXiv},
 33 |        eprint = {astro-ph/9603097},
 34 |  primaryClass = {astro-ph},
 35 |        adsurl = {https://ui.adsabs.harvard.edu/abs/1996NewA....1..133D},
 36 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 37 | }
 38 | 
 39 | 
 40 | @ARTICLE{fire_pressurebalance,
 41 |        author = {{Gurvich}, Alexander B. and {Faucher-Gigu{\`e}re}, Claude-Andr{\'e} and {Richings}, Alexander J. and {Hopkins}, Philip F. and {Grudi{\'c}}, Michael Y. and {Hafen}, Zachary and {Wellons}, Sarah and {Stern}, Jonathan and {Quataert}, Eliot and {Chan}, T.~K. and {Orr}, Matthew E. and {Kere{\v{s}}}, Du{\v{s}}an and {Wetzel}, Andrew and {Hayward}, Christopher C. and {Loebman}, Sarah R. and {Murray}, Norman},
 42 |         title = "{Pressure balance in the multiphase ISM of cosmologically simulated disc galaxies}",
 43 |       journal = {Monthly Notices of the Royal Astronomical Society},
 44 |      keywords = {galaxies: evolution, galaxies: formation, galaxies: ISM, galaxies: star formation, cosmology: theory, Astrophysics - Astrophysics of Galaxies},
 45 |          year = 2020,
 46 |         month = nov,
 47 |        volume = {498},
 48 |        number = {3},
 49 |         pages = {3664-3683},
 50 |           doi = {10.1093/mnras/staa2578},
 51 | archivePrefix = {arXiv},
 52 |        eprint = {2005.12916},
 53 |  primaryClass = {astro-ph.GA},
 54 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.498.3664G},
 55 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 56 | }
 57 | 
 58 | @ARTICLE{salmonwarren,
 59 |        author = {{Salmon}, John K. and {Warren}, Michael S.},
 60 |         title = "{Skeletons from the Treecode Closet}",
 61 |       journal = {Journal of Computational Physics},
 62 |      keywords = {Celestial Mechanics, Error Analysis, Many Body Problem, Multipolar Fields, Trees (Mathematics), Field Theory (Physics), Gravitational Fields, Root-Mean-Square Errors, Statistical Mechanics, Thermodynamics and Statistical Physics},
 63 |          year = 1994,
 64 |         month = mar,
 65 |        volume = {111},
 66 |        number = {1},
 67 |         pages = {136-155},
 68 |           doi = {10.1006/jcph.1994.1050},
 69 |        adsurl = {https://ui.adsabs.harvard.edu/abs/1994JCoPh.111..136S},
 70 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 71 | }
 72 | 
 73 | 
 74 | 
 75 | @ARTICLE{plummer,
 76 |        author = {{Plummer}, H.~C.},
 77 |         title = "{On the problem of distribution in globular star clusters}",
 78 |       journal = {Monthly Notices of the Royal Astronomical Society},
 79 |          year = 1911,
 80 |         month = mar,
 81 |        volume = {71},
 82 |         pages = {460-470},
 83 |           doi = {10.1093/mnras/71.5.460},
 84 |        adsurl = {https://ui.adsabs.harvard.edu/abs/1911MNRAS..71..460P},
 85 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
 86 | }
 87 | 
 88 | 
 89 | @ARTICLE{rockstar,
 90 |        author = {{Behroozi}, Peter S. and {Wechsler}, Risa H. and {Wu}, Hao-Yi},
 91 |         title = "{The ROCKSTAR Phase-space Temporal Halo Finder and the Velocity Offsets of Cluster Cores}",
 92 |       journal = {The Astrophysical Journal},
 93 |      keywords = {dark matter, methods: numerical, Astrophysics - Cosmology and Extragalactic Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics},
 94 |          year = 2013,
 95 |         month = jan,
 96 |        volume = {762},
 97 |        number = {2},
 98 |           eid = {109},
 99 |         pages = {109},
100 |           doi = {10.1088/0004-637X/762/2/109},
101 | archivePrefix = {arXiv},
102 |        eprint = {1110.4372},
103 |  primaryClass = {astro-ph.CO},
104 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2013ApJ...762..109B},
105 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
106 | }
107 | 
108 | 
109 | 
110 | @ARTICLE{    grudic2018,
111 |        author = {{Grudi{\'c}}, Michael Y. and {Guszejnov}, D{\'a}vid and
112 |          {Hopkins}, Philip F. and {Lamberts}, Astrid and
113 |          {Boylan-Kolchin}, Michael and {Murray}, Norman and {Schmitz}, Denise},
114 |         title = "{From the top down and back up again: star cluster structure from hierarchical star formation}",
115 |       journal = {Monthly Notices of the Royal Astronomical Society},
116 |      keywords = {stars: formation, galaxies: star clusters: general, galaxies: star formation, Astrophysics - Astrophysics of Galaxies},
117 |          year = 2018,
118 |         month = nov,
119 |        volume = {481},
120 |        number = {1},
121 |         pages = {688-702},
122 |           doi = {10.1093/mnras/sty2303},
123 | archivePrefix = {arXiv},
124 |        eprint = {1708.09065},
125 |  primaryClass = {astro-ph.GA},
126 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2018MNRAS.481..688G},
127 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
128 | }
129 | 
130 | 
131 | @ARTICLE{guszejnov2020,
132 |        author = {{Guszejnov}, D{\'a}vid and {Grudi{\'c}}, Michael Y. and
133 |          {Offner}, Stella S.~R. and {Boylan-Kolchin}, Michael and
134 |          {Faucher-Gig{\`e}re}, Claude-Andr{\'e} and {Wetzel}, Andrew and
135 |          {Benincasa}, Samantha M. and {Loebman}, Sarah},
136 |         title = "{Evolution of giant molecular clouds across cosmic time}",
137 |       journal = {Monthly Notices of the Royal Astronomical Society},
138 |      keywords = {turbulence, stars: formation, ISM: clouds, galaxies: ISM, galaxies: star formation, cosmology: theory, Astrophysics - Astrophysics of Galaxies},
139 |          year = "2020",
140 |         month = "Feb",
141 |        volume = {492},
142 |        number = {1},
143 |         pages = {488-502},
144 |           doi = {10.1093/mnras/stz3527},
145 | archivePrefix = {arXiv},
146 |        eprint = {1910.01163},
147 |  primaryClass = {astro-ph.GA},
148 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.492..488G},
149 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
150 | }
151 | 
152 | 
153 | @ARTICLE{makedisk,
154 |        author = {{Springel}, Volker and {White}, Simon D.~M.},
155 |         title = "{Tidal tails in cold dark matter cosmologies}",
156 |       journal = {Monthly Notices of the Royal Astronomical Society},
157 |      keywords = {Astrophysics},
158 |          year = 1999,
159 |         month = jul,
160 |        volume = {307},
161 |        number = {1},
162 |         pages = {162-178},
163 |           doi = {10.1046/j.1365-8711.1999.02613.x},
164 | archivePrefix = {arXiv},
165 |        eprint = {astro-ph/9807320},
166 |  primaryClass = {astro-ph},
167 |        adsurl = {https://ui.adsabs.harvard.edu/abs/1999MNRAS.307..162S},
168 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
169 | }
170 | 
171 | 
172 | 
173 | @Article{	  gizmo,
174 |   author	= {{Hopkins}, P.~F.},
175 |   title		= "{A new class of accurate, mesh-free hydrodynamic
176 | 		  simulation methods}",
177 |   journal	= {Monthly Notices of the Royal Astronomical Society},
178 |   archiveprefix	= "arXiv",
179 |   eprint	= {1409.7395},
180 |   keywords	= {hydrodynamics, instabilities, turbulence, methods:
181 | 		  numerical, cosmology: theory},
182 |   year		= 2015,
183 |   month		= jun,
184 |   volume	= 450,
185 |   pages		= {53-110},
186 |   doi		= {10.1093/mnras/stv195},
187 |   adsurl	= {http://adsabs.harvard.edu/abs/2015MNRAS.450...53H},
188 |   adsnote	= {Provided by the SAO/NASA Astrophysics Data System}
189 | }
190 | 
191 | 
192 | @ARTICLE{gadget2,
193 |    author = {{Springel}, V.},
194 |     title = "{The cosmological simulation code GADGET-2}",
195 |   journal = {Monthly Notices of the Royal Astronomical Society},
196 |    eprint = {astro-ph/0505010},
197 |  keywords = {methods: numerical, galaxies: interactions, dark matter},
198 |      year = 2005,
199 |     month = dec,
200 |    volume = 364,
201 |     pages = {1105-1134},
202 |       doi = {10.1111/j.1365-2966.2005.09655.x},
203 |    adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S},
204 |   adsnote = {Provided by the SAO/NASA Astrophysics Data System}
205 | }
206 | 
207 | @ARTICLE{galic,
208 |        author = {{Yurin}, Denis and {Springel}, Volker},
209 |         title = "{An iterative method for the construction of N-body galaxy models in collisionless equilibrium}",
210 |       journal = {Monthly Notices of the Royal Astronomical Society},
211 |      keywords = {methods: numerical, stars: kinematics and dynamics, galaxies: haloes, galaxies: kinematics and dynamics, galaxies: structure, Astrophysics - Cosmology and Nongalactic Astrophysics},
212 |          year = 2014,
213 |         month = oct,
214 |        volume = {444},
215 |        number = {1},
216 |         pages = {62-79},
217 |           doi = {10.1093/mnras/stu1421},
218 | archivePrefix = {arXiv},
219 |        eprint = {1402.1623},
220 |  primaryClass = {astro-ph.CO},
221 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2014MNRAS.444...62Y},
222 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
223 | }
224 | 
225 | @ARTICLE{fire2,
226 |        author = {{Hopkins}, Philip F. and {Wetzel}, Andrew and
227 |          {Kere{\v{s}}}, Du{\v{s}}an and {Faucher-Gigu{\`e}re}, Claude-Andr{\'e} and
228 |          {Quataert}, Eliot and {Boylan-Kolchin}, Michael and {Murray}, Norman and
229 |          {Hayward}, Christopher C. and {Garrison-Kimmel}, Shea and
230 |          {Hummels}, Cameron and {Feldmann}, Robert and {Torrey}, Paul and
231 |          {Ma}, Xiangcheng and {Angl{\'e}s-Alc{\'a}zar}, Daniel and
232 |          {Su}, Kung-Yi and {Orr}, Matthew and {Schmitz}, Denise and
233 |          {Escala}, Ivanna and {Sanderson}, Robyn and {Grudi{\'c}}, Michael Y. and
234 |          {Hafen}, Zachary and {Kim}, Ji-Hoon and {Fitts}, Alex and
235 |          {Bullock}, James S. and {Wheeler}, Coral and {Chan}, T.~K. and
236 |          {Elbert}, Oliver D. and {Narayanan}, Desika},
237 |         title = "{FIRE-2 simulations: physics versus numerics in galaxy formation}",
238 |       journal = {Monthly Notices of the Royal Astronomical Society},
239 |      keywords = {methods: numerical, stars: formation, galaxies: active, galaxies: evolution, galaxies: formation, cosmology: theory, Astrophysics - Astrophysics of Galaxies, Astrophysics - Cosmology and Nongalactic Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics},
240 |          year = "2018",
241 |         month = "Oct",
242 |        volume = {480},
243 |         pages = {800-863},
244 |           doi = {10.1093/mnras/sty1690},
245 | archivePrefix = {arXiv},
246 |        eprint = {1702.06148},
247 |  primaryClass = {astro-ph.GA},
248 |        adsurl = {https://ui.adsabs.harvard.edu/\#abs/2018MNRAS.480..800H},
249 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
250 | }
251 | 
252 | @ARTICLE{dehnen,
253 |        author = {{Dehnen}, W. and {Read}, J.~I.},
254 |         title = "{N-body simulations of gravitational dynamics}",
255 |       journal = {European Physical Journal Plus},
256 |      keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Physics - Computational Physics},
257 |          year = 2011,
258 |         month = may,
259 |        volume = {126},
260 |           eid = {55},
261 |         pages = {55},
262 |           doi = {10.1140/epjp/i2011-11055-3},
263 | archivePrefix = {arXiv},
264 |        eprint = {1105.1082},
265 |  primaryClass = {astro-ph.IM},
266 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2011EPJP..126...55D},
267 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
268 | }
269 | 
270 | @BOOK{aarseth_nbody,
271 |        author = {{Aarseth}, Sverre J.},
272 |         title = "{Gravitational N-Body Simulations}",
273 |          year = 2003,
274 |        adsurl = {https://ui.adsabs.harvard.edu/abs/2003gnbs.book.....A},
275 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
276 | }
277 | @ARTICLE{barneshut,
278 |        author = {{Barnes}, Josh and {Hut}, Piet},
279 |         title = "{A hierarchical O(N log N) force-calculation algorithm}",
280 |       journal = {Nature},
281 |      keywords = {Computational Astrophysics, Many Body Problem, Numerical Integration, Stellar Motions, Algorithms, Hierarchies, Physics (General)},
282 |          year = 1986,
283 |         month = dec,
284 |        volume = {324},
285 |        number = {6096},
286 |         pages = {446-449},
287 |           doi = {10.1038/324446a0},
288 |        adsurl = {https://ui.adsabs.harvard.edu/abs/1986Natur.324..446B},
289 |       adsnote = {Provided by the SAO/NASA Astrophysics Data System}
290 | }


--------------------------------------------------------------------------------
/src/pytreegrav/frontend.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import warnings
  3 | from numpy import zeros_like, zeros
  4 | from .kernel import *
  5 | from .octree import *
  6 | from .dynamic_tree import *
  7 | from .treewalk import *
  8 | from .bruteforce import *
  9 | from .misc import *
 10 | 
 11 | 
 12 | def valueTestMethod(method):
 13 |     methods = ["adaptive", "bruteforce", "tree"]
 14 | 
 15 |     ## check if method is a str
 16 |     if type(method) != str:
 17 |         raise TypeError("Invalid method type %s, must be str" % type(method))
 18 | 
 19 |     ## check if method is a valid method
 20 |     if method not in methods:
 21 |         raise ValueError("Invalid method %s. Must be one of: %s" % (method, str(methods)))
 22 | 
 23 | 
 24 | def warn_if_nonunique_positions(pos, softening=None):
 25 |     """Checks whether a potential/field calculation will return undefined values
 26 |     and warns the user if so.
 27 |     """
 28 | 
 29 |     unique_positions = True
 30 |     for i in range(pos.shape[1]):
 31 |         if np.unique(pos[:, i]).size < pos.shape[0]:
 32 |             unique_positions = False
 33 |             break
 34 | 
 35 |     if unique_positions:
 36 |         return
 37 | 
 38 |     if softening is not None:
 39 |         if np.any(softening > 0):
 40 |             warnings.warn(
 41 |                 "Warning: Particle positions are non-unique. Softening will \
 42 |                     determine the answer for overlapping particles."
 43 |             )
 44 |             return
 45 | 
 46 |     warnings.warn(
 47 |         "Warning: Particle positions are non-unique. The answer will be singular \
 48 |             or garbage for overlapping particles."
 49 |     )
 50 |     return
 51 | 
 52 | 
 53 | def ConstructTree(
 54 |     pos,
 55 |     m=None,
 56 |     softening=None,
 57 |     quadrupole=False,
 58 |     vel=None,
 59 |     compute_moments=True,
 60 |     morton_order=True,
 61 | ):
 62 |     """Builds a tree containing particle data, for subsequent potential/field evaluation
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     pos: array_like
 67 |         shape (N,3) array of particle positions
 68 |     m: array_like or None, optional
 69 |         shape (N,) array of particle masses - if None then zeros will be used (e.g. if all you need the tree for is spatial algorithms)
 70 |     softening: array_like or None, optional
 71 |         shape (N,) array of particle softening lengths - these give the radius of compact support of the M4 cubic spline mass distribution of each particle
 72 |     quadrupole: bool, optional
 73 |         Whether to store quadrupole moments (default False)
 74 |     vel: bool, optional
 75 |         Whether to store node velocities in the tree (default False)
 76 | 
 77 |     Returns
 78 |     -------
 79 |     tree: octree
 80 |         Octree instance built from particle data
 81 |     """
 82 | 
 83 |     warn_if_nonunique_positions(pos, softening)
 84 | 
 85 |     if m is None:
 86 |         m = zeros(len(pos))
 87 |         compute_moments = False
 88 |     if softening is None:
 89 |         softening = zeros_like(m)
 90 |     if not (np.all(np.isfinite(pos)) and np.all(np.isfinite(m)) and np.all(np.isfinite(softening))):
 91 |         print("Invalid input detected - aborting treebuild to avoid going into an infinite loop!")
 92 |         raise
 93 | 
 94 |     if vel is None:
 95 |         return Octree(
 96 |             pos,
 97 |             m,
 98 |             softening,
 99 |             quadrupole=quadrupole,
100 |             compute_moments=compute_moments,
101 |             morton_order=morton_order,
102 |         )
103 |     else:
104 |         return DynamicOctree(pos, m, softening, vel, quadrupole=quadrupole)
105 | 
106 | 
107 | def Potential(
108 |     pos,
109 |     m,
110 |     softening=None,
111 |     G=1.0,
112 |     theta=0.7,
113 |     tree=None,
114 |     return_tree=False,
115 |     parallel=False,
116 |     method="adaptive",
117 |     quadrupole=False,
118 | ):
119 |     """Gravitational potential calculation
120 | 
121 |     Returns the gravitational potential for a set of particles with positions x and masses m, at the positions of those particles, using either brute force or tree-based methods depending on the number of particles.
122 | 
123 |     Parameters
124 |     ----------
125 |     pos: array_like
126 |         shape (N,3) array of particle positions
127 |     m: array_like
128 |         shape (N,) array of particle masses
129 |     G: float, optional
130 |         gravitational constant (default 1.0)
131 |     softening: None or array_like, optional
132 |         shape (N,) array containing kernel support radii for gravitational softening -  - these give the radius of compact support of the M4 cubic spline mass distribution - set to 0 by default
133 |     theta: float, optional
134 |         cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy)
135 |     parallel: bool, optional
136 |         If True, will parallelize the force summation over all available cores. (default False)
137 |     tree: Octree, optional
138 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
139 |     return_tree: bool, optional
140 |         return the tree used for future use (default False)
141 |     method: str, optional
142 |         Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice)
143 |     quadrupole: bool, optional
144 |         Whether to use quadrupole moments in tree summation (default False)
145 | 
146 |     Returns
147 |     -------
148 |     phi: array_like
149 |         shape (N,) array of potentials at the particle positions
150 |     """
151 | 
152 |     ## test if method is correct, otherwise raise a ValueError
153 |     valueTestMethod(method)
154 | 
155 |     if softening is None:
156 |         softening = np.zeros_like(m)
157 | 
158 |     # figure out which method to use
159 |     if method == "adaptive":
160 |         if len(pos) > 1000:
161 |             method = "tree"
162 |         else:
163 |             method = "bruteforce"
164 | 
165 |     if method == "bruteforce":  # we're using brute force
166 |         if parallel:
167 |             phi = Potential_bruteforce_parallel(pos, m, softening, G=G)
168 |         else:
169 |             phi = Potential_bruteforce(pos, m, softening, G=G)
170 |         if return_tree:
171 |             tree = None
172 |     else:  # we're using the tree algorithm
173 |         if tree is None:
174 |             tree = ConstructTree(
175 |                 np.float64(pos),
176 |                 np.float64(m),
177 |                 np.float64(softening),
178 |                 quadrupole=quadrupole,
179 |             )  # build the tree if needed
180 |         idx = tree.TreewalkIndices
181 | 
182 |         # sort by the order they appear in the treewalk to improve access pattern efficiency
183 |         pos_sorted = np.take(pos, idx, axis=0)
184 |         h_sorted = np.take(softening, idx)
185 | 
186 |         if parallel:
187 |             phi = PotentialTarget_tree_parallel(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole)
188 |         else:
189 |             phi = PotentialTarget_tree(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole)
190 | 
191 |         # now reorder phi back to the order of the input positions
192 |         phi = np.take(phi, idx.argsort())
193 | 
194 |     if return_tree:
195 |         return phi, tree
196 |     else:
197 |         return phi
198 | 
199 | 
200 | def PotentialTarget(
201 |     pos_target,
202 |     pos_source,
203 |     m_source,
204 |     softening_target=None,
205 |     softening_source=None,
206 |     G=1.0,
207 |     theta=0.7,
208 |     tree=None,
209 |     return_tree=False,
210 |     parallel=False,
211 |     method="adaptive",
212 |     quadrupole=False,
213 | ):
214 |     """Gravitational potential calculation for general N+M body case
215 | 
216 |     Returns the gravitational potential for a set of M particles with positions x_source and masses m_source, at the positions of a set of N particles that need not be the same.
217 | 
218 |     Parameters
219 |     ----------
220 |     pos_target: array_like
221 |         shape (N,3) array of target particle positions where you want to know the potential
222 |     pos_source: array_like
223 |         shape (M,3) array of source particle positions (positions of particles sourcing the gravitational field)
224 |     m_source: array_like
225 |         shape (M,) array of source particle masses
226 |     softening_target: array_like or None, optional
227 |         shape (N,) array of target particle softening radii - these give the radius of compact support of the M4 cubic spline mass distribution
228 |     softening_source: array_like or None, optional
229 |         shape (M,) array of source particle radii  - these give the radius of compact support of the M4 cubic spline mass distribution
230 |     G: float, optional
231 |         gravitational constant (default 1.0)
232 |     theta: float, optional
233 |         cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy)
234 |     parallel: bool, optional
235 |         If True, will parallelize the force summation over all available cores. (default False)
236 |     tree: Octree, optional
237 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
238 |     return_tree: bool, optional
239 |         return the tree used for future use (default False)
240 |     method: str, optional
241 |         Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice)
242 |     quadrupole: bool, optional
243 |         Whether to use quadrupole moments in tree summation (default False)
244 | 
245 |     Returns
246 |     -------
247 |     phi: array_like
248 |         shape (N,) array of potentials at the target positions
249 |     """
250 | 
251 |     ## test if method is correct, otherwise raise a ValueError
252 |     valueTestMethod(method)
253 | 
254 |     ## allow user to pass in tree without passing in source pos and m
255 |     ##  but catch if they don't pass in the tree.
256 |     if tree is None and (pos_source is None or m_source is None):
257 |         raise ValueError("Must pass either pos_source & m_source or source tree.")
258 | 
259 |     if softening_target is None:
260 |         softening_target = zeros(len(pos_target))
261 |     if softening_source is None and pos_source is not None:
262 |         softening_source = zeros(len(pos_source))
263 | 
264 |     # figure out which method to use
265 |     if method == "adaptive":
266 |         if pos_source is None or len(pos_target) * len(pos_source) > 10**6:
267 |             method = "tree"
268 |         else:
269 |             method = "bruteforce"
270 | 
271 |     if method == "bruteforce":  # we're using brute force
272 |         if parallel:
273 |             phi = PotentialTarget_bruteforce_parallel(
274 |                 pos_target,
275 |                 softening_target,
276 |                 pos_source,
277 |                 m_source,
278 |                 softening_source,
279 |                 G=G,
280 |             )
281 |         else:
282 |             phi = PotentialTarget_bruteforce(
283 |                 pos_target,
284 |                 softening_target,
285 |                 pos_source,
286 |                 m_source,
287 |                 softening_source,
288 |                 G=G,
289 |             )
290 |         if return_tree:
291 |             tree = None
292 |     else:  # we're using the tree algorithm
293 |         if tree is None:
294 |             tree = ConstructTree(
295 |                 np.float64(pos_source),
296 |                 np.float64(m_source),
297 |                 np.float64(softening_source),
298 |                 quadrupole=quadrupole,
299 |             )  # build the tree if needed
300 |         if parallel:
301 |             phi = PotentialTarget_tree_parallel(
302 |                 pos_target,
303 |                 softening_target,
304 |                 tree,
305 |                 theta=theta,
306 |                 G=G,
307 |                 quadrupole=quadrupole,
308 |             )
309 |         else:
310 |             phi = PotentialTarget_tree(
311 |                 pos_target,
312 |                 softening_target,
313 |                 tree,
314 |                 theta=theta,
315 |                 G=G,
316 |                 quadrupole=quadrupole,
317 |             )
318 | 
319 |     if return_tree:
320 |         return phi, tree
321 |     else:
322 |         return phi
323 | 
324 | 
325 | def Accel(
326 |     pos,
327 |     m,
328 |     softening=None,
329 |     G=1.0,
330 |     theta=0.7,
331 |     tree=None,
332 |     return_tree=False,
333 |     parallel=False,
334 |     method="adaptive",
335 |     quadrupole=False,
336 | ):
337 |     """Gravitational acceleration calculation
338 | 
339 |     Returns the gravitational acceleration for a set of particles with positions x and masses m, at the positions of those particles, using either brute force or tree-based methods depending on the number of particles.
340 | 
341 |     Parameters
342 |     ----------
343 |     pos: array_like
344 |         shape (N,3) array of particle positions
345 |     m: array_like
346 |         shape (N,) array of particle masses
347 |     G: float, optional
348 |         gravitational constant (default 1.0)
349 |     softening: None or array_like, optional
350 |         shape (N,) array containing kernel support radii for gravitational softening - these give the radius of compact support of the M4 cubic spline mass distribution - set to 0 by default
351 |     theta: float, optional
352 |         cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy)
353 |     parallel: bool, optional
354 |         If True, will parallelize the force summation over all available cores. (default False)
355 |     tree: Octree, optional
356 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
357 |     return_tree: bool, optional
358 |         return the tree used for future use (default False)
359 |     method: str, optional
360 |         Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice)
361 |     quadrupole: bool, optional
362 |         Whether to use quadrupole moments in tree summation (default False)
363 | 
364 |     Returns
365 |     -------
366 |     g: array_like
367 |         shape (N,3) array of acceleration vectors at the particle positions
368 |     """
369 | 
370 |     ## test if method is correct, otherwise raise a ValueError
371 |     valueTestMethod(method)
372 | 
373 |     if softening is None:
374 |         softening = np.zeros_like(m)
375 | 
376 |     # figure out which method to use
377 |     if method == "adaptive":
378 |         if len(pos) > 1000:
379 |             method = "tree"
380 |         else:
381 |             method = "bruteforce"
382 | 
383 |     if method == "bruteforce":  # we're using brute force
384 |         if parallel:
385 |             g = Accel_bruteforce_parallel(pos, m, softening, G=G)
386 |         else:
387 |             g = Accel_bruteforce(pos, m, softening, G=G)
388 |         if return_tree:
389 |             tree = None
390 |     else:  # we're using the tree algorithm
391 |         if tree is None:
392 |             tree = ConstructTree(
393 |                 np.float64(pos),
394 |                 np.float64(m),
395 |                 np.float64(softening),
396 |                 quadrupole=quadrupole,
397 |             )  # build the tree if needed
398 |         idx = tree.TreewalkIndices
399 | 
400 |         # sort by the order they appear in the treewalk to improve access pattern efficiency
401 |         pos_sorted = np.take(pos, idx, axis=0)
402 |         h_sorted = np.take(softening, idx)
403 | 
404 |         if parallel:
405 |             g = AccelTarget_tree_parallel(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole)
406 |         else:
407 |             g = AccelTarget_tree(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole)
408 | 
409 |         # now g is in the tree-order: reorder it back to the original order
410 |         g = np.take(g, idx.argsort(), axis=0)
411 | 
412 |     if return_tree:
413 |         return g, tree
414 |     else:
415 |         return g
416 | 
417 | 
418 | def AccelTarget(
419 |     pos_target,
420 |     pos_source,
421 |     m_source,
422 |     softening_target=None,
423 |     softening_source=None,
424 |     G=1.0,
425 |     theta=0.7,
426 |     tree=None,
427 |     return_tree=False,
428 |     parallel=False,
429 |     method="adaptive",
430 |     quadrupole=False,
431 | ):
432 |     """Gravitational acceleration calculation for general N+M body case
433 | 
434 |     Returns the gravitational acceleration for a set of M particles with positions x_source and masses m_source, at the positions of a set of N particles that need not be the same.
435 | 
436 |     Parameters
437 |     ----------
438 |     pos_target: array_like
439 |         shape (N,3) array of target particle positions where you want to know the acceleration
440 |     pos_source: array_like
441 |         shape (M,3) array of source particle positions (positions of particles sourcing the gravitational field)
442 |     m_source: array_like
443 |         shape (M,) array of source particle masses
444 |     softening_target: array_like or None, optional
445 |         shape (N,) array of target particle softening radii - these give the radius of compact support of the M4 cubic spline mass distribution
446 |     softening_source: array_like or None, optional
447 |         shape (M,) array of source particle radii - these give the radius of compact support of the M4 cubic spline mass distribution
448 |     G: float, optional
449 |         gravitational constant (default 1.0)
450 |     theta: float, optional
451 |         cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy)
452 |     parallel: bool, optional
453 |         If True, will parallelize the force summation over all available cores. (default False)
454 |     tree: Octree, optional
455 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
456 |     return_tree: bool, optional
457 |         return the tree used for future use (default False)
458 |     method: str, optional
459 |         Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice)
460 |     quadrupole: bool, optional
461 |         Whether to use quadrupole moments in tree summation (default False)
462 | 
463 |     Returns
464 |     -------
465 |     phi: array_like
466 |         shape (N,3) array of accelerations at the target positions
467 |     """
468 | 
469 |     ## test if method is correct, otherwise raise a ValueError
470 |     valueTestMethod(method)
471 | 
472 |     ## allow user to pass in tree without passing in source pos and m
473 |     ##  but catch if they don't pass in the tree.
474 |     if tree is None and (pos_source is None or m_source is None):
475 |         raise ValueError("Must pass either pos_source & m_source or source tree.")
476 | 
477 |     if softening_target is None:
478 |         softening_target = zeros(len(pos_target))
479 |     if softening_source is None and pos_source is not None:
480 |         softening_source = zeros(len(pos_source))
481 | 
482 |     # figure out which method to use
483 |     if method == "adaptive":
484 |         if pos_source is None or len(pos_target) * len(pos_source) > 10**6:
485 |             method = "tree"
486 |         else:
487 |             method = "bruteforce"
488 | 
489 |     if method == "bruteforce":  # we're using brute force
490 |         if parallel:
491 |             g = AccelTarget_bruteforce_parallel(
492 |                 pos_target,
493 |                 softening_target,
494 |                 pos_source,
495 |                 m_source,
496 |                 softening_source,
497 |                 G=G,
498 |             )
499 |         else:
500 |             g = AccelTarget_bruteforce(
501 |                 pos_target,
502 |                 softening_target,
503 |                 pos_source,
504 |                 m_source,
505 |                 softening_source,
506 |                 G=G,
507 |             )
508 |         if return_tree:
509 |             tree = None
510 |     else:  # we're using the tree algorithm
511 |         if tree is None:
512 |             tree = ConstructTree(
513 |                 np.float64(pos_source),
514 |                 np.float64(m_source),
515 |                 np.float64(softening_source),
516 |                 quadrupole=quadrupole,
517 |             )  # build the tree if needed
518 |         if parallel:
519 |             g = AccelTarget_tree_parallel(
520 |                 pos_target,
521 |                 softening_target,
522 |                 tree,
523 |                 theta=theta,
524 |                 G=G,
525 |                 quadrupole=quadrupole,
526 |             )
527 |         else:
528 |             g = AccelTarget_tree(
529 |                 pos_target,
530 |                 softening_target,
531 |                 tree,
532 |                 theta=theta,
533 |                 G=G,
534 |                 quadrupole=quadrupole,
535 |             )
536 | 
537 |     if return_tree:
538 |         return g, tree
539 |     else:
540 |         return g
541 | 
542 | 
543 | def DensityCorrFunc(
544 |     pos,
545 |     m,
546 |     rbins=None,
547 |     max_bin_size_ratio=100,
548 |     theta=1.0,
549 |     tree=None,
550 |     return_tree=False,
551 |     parallel=False,
552 |     boxsize=0,
553 |     weighted_binning=False,
554 | ):
555 |     """Computes the average amount of mass in radial bin [r,r+dr] around a point, provided a set of radial bins.
556 | 
557 |     Parameters
558 |     ----------
559 |     pos: array_like
560 |         shape (N,3) array of particle positions
561 |     m: array_like
562 |         shape (N,) array of particle masses
563 |     rbins: array_like or None, optional
564 |         1D array of radial bin edges - if None will use heuristics to determine sensible bins. Otherwise MUST BE LOGARITHMICALLY SPACED (default None)
565 |     max_bin_size_ratio: float, optional
566 |         controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 100)
567 |     theta: float, optional
568 |         cell opening angle used to control accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0)
569 |     parallel: bool, optional
570 |         If True, will parallelize the correlation function computation over all available cores. (default False)
571 |     tree: Octree, optional
572 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
573 |     return_tree: bool, optional
574 |         if True will return the generated or used tree for future use (default False)
575 |     boxsize: float, optional
576 |         finite periodic box size, if periodic boundary conditions are to be used (default 0)
577 |     weighted_binning: bool, optional
578 |         (experimental) if True will distribute mass among radial bings with a weighted kernel (default False)
579 | 
580 |     Returns
581 |     -------
582 |     rbins: array_like
583 |         array containing radial bin edges
584 |     mbins: array_like
585 |         array containing mean mass in radial bins, averaged over all points
586 |     """
587 | 
588 |     if rbins is None:
589 |         r = np.sort(np.sqrt(np.sum((pos - np.median(pos, axis=0)) ** 2, axis=1)))
590 |         rbins = 10 ** np.linspace(np.log10(r[10]), np.log10(r[-1]), int(len(r) ** (1.0 / 3)))
591 | 
592 |     if tree is None:
593 |         softening = np.zeros_like(m)
594 |         tree = ConstructTree(np.float64(pos), np.float64(m), np.float64(softening))  # build the tree if needed
595 |     idx = tree.TreewalkIndices
596 | 
597 |     # sort by the order they appear in the treewalk to improve access pattern efficiency
598 |     pos_sorted = np.take(pos, idx, axis=0)
599 | 
600 |     if parallel:
601 |         mbins = DensityCorrFunc_tree_parallel(
602 |             pos_sorted,
603 |             tree,
604 |             rbins,
605 |             max_bin_size_ratio=max_bin_size_ratio,
606 |             theta=theta,
607 |             boxsize=boxsize,
608 |             weighted_binning=weighted_binning,
609 |         )
610 |     else:
611 |         mbins = DensityCorrFunc_tree(
612 |             pos_sorted,
613 |             tree,
614 |             rbins,
615 |             max_bin_size_ratio=max_bin_size_ratio,
616 |             theta=theta,
617 |             boxsize=boxsize,
618 |             weighted_binning=weighted_binning,
619 |         )
620 | 
621 |     if return_tree:
622 |         return rbins, mbins, tree
623 |     else:
624 |         return rbins, mbins
625 | 
626 | 
627 | def VelocityCorrFunc(
628 |     pos,
629 |     m,
630 |     v,
631 |     rbins=None,
632 |     max_bin_size_ratio=100,
633 |     theta=1.0,
634 |     tree=None,
635 |     return_tree=False,
636 |     parallel=False,
637 |     boxsize=0,
638 |     weighted_binning=False,
639 | ):
640 |     """Computes the weighted average product v(x).v(x+r), for a vector field v, in radial bins
641 | 
642 |     Parameters
643 |     ----------
644 |     pos: array_like
645 |         shape (N,3) array of particle positions
646 |     m: array_like
647 |         shape (N,) array of particle masses
648 |     v: array_like
649 |         shape (N,3) of vector quantity (e.g. velocity, magnetic field, etc)
650 |     rbins: array_like or None, optional
651 |         1D array of radial bin edges - if None will use heuristics to determine sensible bins. Otherwise MUST BE LOGARITHMICALLY SPACED (default None)
652 |     max_bin_size_ratio: float, optional
653 |         controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 100)
654 |     theta: float, optional
655 |         cell opening angle used to control accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0)
656 |     parallel: bool, optional
657 |         If True, will parallelize the correlation function computation over all available cores. (default False)
658 |     tree: Octree, optional
659 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
660 |     return_tree: bool, optional
661 |         if True will return the generated or used tree for future use (default False)
662 |     boxsize: float, optional
663 |         finite periodic box size, if periodic boundary conditions are to be used (default 0)
664 |     weighted_binning: bool, optional
665 |         (experimental) if True will distribute mass among radial bings with a weighted kernel (default False)
666 | 
667 |     Returns
668 |     -------
669 |     rbins: array_like
670 |         array containing radial bin edges
671 |     corr: array_like
672 |         array containing correlation function values in radial bins
673 |     """
674 | 
675 |     if rbins is None:
676 |         r = np.sort(np.sqrt(np.sum((pos - np.median(pos, axis=0)) ** 2, axis=1)))
677 |         rbins = 10 ** np.linspace(np.log10(r[10]), np.log10(r[-1]), int(len(r) ** (1.0 / 3)))
678 | 
679 |     if tree is None:
680 |         softening = np.zeros_like(m)
681 |         tree = ConstructTree(np.float64(pos), np.float64(m), np.float64(softening), vel=v)  # build the tree if needed
682 |     idx = tree.TreewalkIndices
683 | 
684 |     # sort by the order they appear in the treewalk to improve access pattern efficiency
685 |     pos_sorted = np.take(pos, idx, axis=0)
686 |     v_sorted = np.take(v, idx, axis=0)
687 |     wt_sorted = np.take(m, idx, axis=0)
688 |     if parallel:
689 |         corr = VelocityCorrFunc_tree_parallel(
690 |             pos_sorted,
691 |             v_sorted,
692 |             wt_sorted,
693 |             tree,
694 |             rbins,
695 |             max_bin_size_ratio=max_bin_size_ratio,
696 |             theta=theta,
697 |             boxsize=boxsize,
698 |             weighted_binning=weighted_binning,
699 |         )
700 |     else:
701 |         corr = VelocityCorrFunc_tree(
702 |             pos_sorted,
703 |             v_sorted,
704 |             wt_sorted,
705 |             tree,
706 |             rbins,
707 |             max_bin_size_ratio=max_bin_size_ratio,
708 |             theta=theta,
709 |             boxsize=boxsize,
710 |             weighted_binning=weighted_binning,
711 |         )
712 | 
713 |     if return_tree:
714 |         return rbins, corr, tree
715 |     else:
716 |         return rbins, corr
717 | 
718 | 
719 | def VelocityStructFunc(
720 |     pos,
721 |     m,
722 |     v,
723 |     rbins=None,
724 |     max_bin_size_ratio=100,
725 |     theta=1.0,
726 |     tree=None,
727 |     return_tree=False,
728 |     parallel=False,
729 |     boxsize=0,
730 |     weighted_binning=False,
731 | ):
732 |     """Computes the structure function for a vector field: the average value of |v(x)-v(x+r)|^2, in radial bins for r
733 | 
734 |     Parameters
735 |     ----------
736 |     pos: array_like
737 |         shape (N,3) array of particle positions
738 |     m: array_like
739 |         shape (N,) array of particle masses
740 |     v: array_like
741 |         shape (N,3) of vector quantity (e.g. velocity, magnetic field, etc)
742 |     rbins: array_like or None, optional
743 |         1D array of radial bin edges - if None will use heuristics to determine sensible bins. Otherwise MUST BE LOGARITHMICALLY SPACED (default None)
744 |     max_bin_size_ratio: float, optional
745 |         controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 100)
746 |     theta: float, optional
747 |         cell opening angle used to control accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0)
748 |     parallel: bool, optional
749 |         If True, will parallelize the correlation function computation over all available cores. (default False)
750 |     tree: Octree, optional
751 |         optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None)
752 |     return_tree: bool, optional
753 |         if True will return the generated or used tree for future use (default False)
754 |     boxsize: float, optional
755 |         finite periodic box size, if periodic boundary conditions are to be used (default 0)
756 |     weighted_binning: bool, optional
757 |         (experimental) if True will distribute mass among radial bings with a weighted kernel (default False)
758 | 
759 |     Returns
760 |     -------
761 |     rbins: array_like
762 |         array containing radial bin edges
763 |     Sv: array_like
764 |         array containing structure function values
765 |     """
766 | 
767 |     if rbins is None:
768 |         r = np.sort(np.sqrt(np.sum((pos - np.median(pos, axis=0)) ** 2, axis=1)))
769 |         rbins = 10 ** np.linspace(np.log10(r[10]), np.log10(r[-1]), int(len(r) ** (1.0 / 3)))
770 | 
771 |     if tree is None:
772 |         softening = np.zeros_like(m)
773 |         tree = ConstructTree(np.float64(pos), np.float64(m), np.float64(softening), vel=v)  # build the tree if needed
774 |     idx = tree.TreewalkIndices
775 | 
776 |     # sort by the order they appear in the treewalk to improve access pattern efficiency
777 |     pos_sorted = np.take(pos, idx, axis=0)
778 |     v_sorted = np.take(v, idx, axis=0)
779 |     wt_sorted = np.take(m, idx, axis=0)
780 |     if parallel:
781 |         Sv = VelocityStructFunc_tree_parallel(
782 |             pos_sorted,
783 |             v_sorted,
784 |             wt_sorted,
785 |             tree,
786 |             rbins,
787 |             max_bin_size_ratio=max_bin_size_ratio,
788 |             theta=theta,
789 |             boxsize=boxsize,
790 |             weighted_binning=weighted_binning,
791 |         )
792 |     else:
793 |         Sv = VelocityStructFunc_tree(
794 |             pos_sorted,
795 |             v_sorted,
796 |             wt_sorted,
797 |             tree,
798 |             rbins,
799 |             max_bin_size_ratio=max_bin_size_ratio,
800 |             theta=theta,
801 |             boxsize=boxsize,
802 |             weighted_binning=weighted_binning,
803 |         )
804 | 
805 |     if return_tree:
806 |         return rbins, Sv, tree
807 |     else:
808 |         return rbins, Sv
809 | 
810 | 
811 | def ColumnDensity(
812 |     pos,
813 |     m,
814 |     radii,
815 |     rays=None,
816 |     randomize_rays=False,
817 |     healpix=False,
818 |     tree=None,
819 |     theta=0.5,
820 |     return_tree=False,
821 |     parallel=False,
822 | ):
823 |     """Ray-traced or angle-binned column density calculation.
824 | 
825 |     Returns an estimate of the column density from the position of each particle
826 |     integrated to infinity, assuming the particles are represented by uniform spheres. Note
827 |     that optical depth can be obtained by supplying "sigma = opacity * mass" in
828 |     place of mass, useful in situations where opacity is highly variable.
829 | 
830 |     Parameters
831 |     ----------
832 |     pos: array_like
833 |         shape (N,3) array of particle positions
834 |     m: array_like
835 |         shape (N,) array of particle masses
836 |     radii: array_like
837 |         shape (N,) array containing particle radii of the uniform spheres that
838 |         we use to model the particles' mass distribution
839 |     rays: optional
840 |         Which ray directions to raytrace the columns.
841 |         None: use the angular-binned column density method with 6 bins on the sky
842 |         OPTION 2: Integer number: use this many rays, with 6 using the standard
843 |         6-ray grid and other numbers sampling random directions
844 |         OPTION 3: Give a (N_rays,3) array of vectors specifying the
845 |         directions, which will be automatically normalized.
846 |     healpix: int, optional
847 |         Use healpix ray grid with specified resolution level NSIDE
848 |     randomize_rays: bool, optional
849 |         Randomize the orientation of the ray-grid *for each particle*
850 |     parallel: bool, optional
851 |         If True, will parallelize the column density over all available cores.
852 |         (default False)
853 |     tree: Octree, optional
854 |         optional pre-generated Octree: this can contain any set of particles,
855 |         not necessarily the target particles at pos (default None)
856 |     theta: float, optional
857 |         Opening angle for beam-traced angular bin estimator
858 |     return_tree: bool, optional
859 |         return the tree used for future use (default False)
860 | 
861 |     Returns
862 |     -------
863 |     columns: array_like
864 |         shape (N,N_rays) float array of column densities from particle
865 |         centers integrated along the rays
866 |     """
867 | 
868 |     if tree is None:
869 |         tree = ConstructTree(
870 |             np.float64(pos),
871 |             np.float64(m),
872 |             np.float64(radii),
873 |         )  # build the tree if needed
874 |     idx = tree.TreewalkIndices
875 |     pos_sorted = np.take(pos, idx, axis=0)
876 | 
877 |     if type(rays) == int:
878 |         if rays == 6:
879 |             rays = np.vstack([np.eye(3), -np.eye(3)])  # 6-ray grid
880 |         else:
881 |             # generate a random grid of ray directions
882 |             rays = np.random.normal(size=(rays, 3))  # normalize later
883 |     elif type(rays) == np.ndarray:
884 |         # check that the shape is correct
885 |         if not len(rays.shape) == 2:
886 |             raise Exception("rays array argument must be 2D.")
887 |         elif rays.shape[1] != 3:
888 |             raise Exception("rays array argument is not an array of 3D vectors.")
889 |         rays = np.copy(rays)  # so that we don't overwrite the argument
890 |     elif rays is not None:
891 |         raise Exception("rays argument type is not supported")
892 | 
893 |     if healpix:
894 |         nside = healpix
895 |         npix = hp.nside2npix(nside)
896 |         rays = np.array(hp.pix2vec(nside, np.arange(npix))).T
897 | 
898 |     if rays is not None:
899 |         rays /= np.sqrt((rays * rays).sum(1))[:, None]  # normalize the ray vectors
900 | 
901 |     if parallel:
902 |         columns = ColumnDensity_tree_parallel(pos_sorted, tree, rays, randomize_rays=randomize_rays, theta=theta)
903 |     else:
904 |         columns = ColumnDensity_tree(pos_sorted, tree, rays, randomize_rays=randomize_rays, theta=theta)
905 |     if np.any(np.isnan(columns)):
906 |         print("WARNING some column densities are NaN!")
907 |     columns = np.take(columns, idx.argsort(), axis=0)
908 | 
909 |     if return_tree:
910 |         return columns, tree
911 |     else:
912 |         return columns
913 | 


--------------------------------------------------------------------------------
/src/pytreegrav/treewalk.py:
--------------------------------------------------------------------------------
  1 | from numpy import sqrt, empty, zeros, empty_like, zeros_like, dot, fabs
  2 | from numba import njit, prange, get_num_threads, set_parallel_chunksize, int64, float64
  3 | from math import copysign
  4 | from .kernel import *
  5 | from .misc import *
  6 | import numpy as np
  7 | from scipy.spatial.transform import Rotation as R
  8 | 
  9 | 
 10 | @njit(fastmath=True)
 11 | def acceptance_criterion(r: float, h: float, size: float, delta: float, theta: float) -> bool:
 12 |     """Criterion for accepting the multipole approximation for summing the contribution of a node"""
 13 |     return r > max(size / theta + delta, h + size * 0.6 + delta)
 14 | 
 15 | 
 16 | @njit([int64(float64[:])], fastmath=True)
 17 | def angular_bin(dx):
 18 |     """Angular bin for binned column density estimator"""
 19 |     if fabs(dx[0]) > fabs(dx[1]) and fabs(dx[0]) > fabs(dx[2]):
 20 |         if dx[0] > 0:
 21 |             bin = 0
 22 |         else:
 23 |             bin = 1
 24 |     elif fabs(dx[1]) > fabs(dx[2]):
 25 |         if dx[1] > 0:
 26 |             bin = 2
 27 |         else:
 28 |             bin = 3
 29 |     else:
 30 |         if dx[2] > 0:
 31 |             bin = 4
 32 |         else:
 33 |             bin = 5
 34 |     return bin
 35 | 
 36 | 
 37 | @njit(fastmath=True)
 38 | def NearestImage(x, boxsize):
 39 |     if abs(x) > boxsize / 2:
 40 |         return -copysign(boxsize - abs(x), x)
 41 |     else:
 42 |         return x
 43 | 
 44 | 
 45 | @njit(fastmath=True)
 46 | def PotentialWalk(pos, tree, softening=0, no=-1, theta=0.7):
 47 |     """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance
 48 |     Arguments:
 49 |     pos - (3,) array containing position of interest
 50 |     tree - octree object storing the tree structure
 51 |     Keyword arguments:
 52 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
 53 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
 54 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
 55 |     """
 56 |     if no < 0:
 57 |         no = tree.NumParticles  # we default to the top-level node index
 58 |     phi = 0
 59 |     dx = np.empty(3, dtype=np.float64)
 60 | 
 61 |     while no > -1:
 62 |         r = 0
 63 |         for k in range(3):
 64 |             dx[k] = tree.Coordinates[no, k] - pos[k]
 65 |             r += dx[k] * dx[k]
 66 |         r = sqrt(r)
 67 |         h = max(tree.Softenings[no], softening)
 68 | 
 69 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
 70 |             if r > 0:  # by default we neglect the self-potential
 71 |                 if r < h:
 72 |                     phi += tree.Masses[no] * PotentialKernel(r, h)
 73 |                 else:
 74 |                     phi -= tree.Masses[no] / r
 75 |             no = tree.NextBranch[no]
 76 |         elif acceptance_criterion(
 77 |             r, h, tree.Sizes[no], tree.Deltas[no], theta
 78 |         ):  # if we satisfy the criteria for accepting the monopole
 79 |             phi -= tree.Masses[no] / r
 80 |             no = tree.NextBranch[no]
 81 |         else:  # open the node
 82 |             no = tree.FirstSubnode[no]
 83 | 
 84 |     return phi
 85 | 
 86 | 
 87 | @njit(fastmath=True)
 88 | def PotentialWalk_quad(pos, tree, softening=0, no=-1, theta=0.7):
 89 |     """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance. Uses the quadrupole expansion.
 90 |     Arguments:
 91 |     pos - (3,) array containing position of interest
 92 |     tree - octree object storing the tree structure
 93 |     Keyword arguments:
 94 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
 95 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
 96 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
 97 |     """
 98 |     if no < 0:
 99 |         no = tree.NumParticles  # we default to the top-level node index
100 |     phi = 0
101 |     dx = np.empty(3, dtype=np.float64)
102 | 
103 |     while no > -1:
104 |         r = 0
105 |         for k in range(3):
106 |             dx[k] = tree.Coordinates[no, k] - pos[k]
107 |             r += dx[k] * dx[k]
108 |         r = sqrt(r)
109 |         h = max(tree.Softenings[no], softening)
110 | 
111 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
112 |             if r > 0:  # by default we neglect the self-potential
113 |                 if r < h:
114 |                     phi += tree.Masses[no] * PotentialKernel(r, h)
115 |                 else:
116 |                     phi -= tree.Masses[no] / r
117 |             no = tree.NextBranch[no]
118 |         elif acceptance_criterion(r, h, tree.Sizes[no], tree.Deltas[no], theta):
119 |             # if we satisfy the criteria for accepting the monopole
120 |             phi -= tree.Masses[no] / r
121 |             quad = tree.Quadrupoles[no]
122 |             r5inv = 1 / (r * r * r * r * r)
123 |             for k in range(3):
124 |                 for l in range(3):
125 |                     phi -= 0.5 * dx[k] * quad[k, l] * dx[l] * r5inv
126 |             no = tree.NextBranch[no]
127 |         else:  # open the node
128 |             no = tree.FirstSubnode[no]
129 | 
130 |     return phi
131 | 
132 | 
133 | @njit(fastmath=True)
134 | def AccelWalk(pos, tree, softening=0, no=-1, theta=0.7):
135 |     """Returns the gravitational acceleration field at position x by performing the Barnes-Hut treewalk using the provided octree instance
136 |     Arguments:
137 |     pos - (3,) array containing position of interest
138 |     tree - octree instance storing the tree structure
139 |     Keyword arguments:
140 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
141 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
142 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
143 |     """
144 |     if no < 0:
145 |         no = tree.NumParticles  # we default to the top-level node index
146 |     g = zeros(3, dtype=np.float64)
147 |     dx = np.empty(3, dtype=np.float64)
148 | 
149 |     while no > -1:  # loop until we get to the end of the tree
150 |         r2 = 0
151 |         for k in range(3):
152 |             dx[k] = tree.Coordinates[no, k] - pos[k]
153 |             r2 += dx[k] * dx[k]
154 |         r = sqrt(r2)
155 |         h = max(tree.Softenings[no], softening)
156 | 
157 |         sum_field = False
158 | 
159 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
160 |             if r > 0:  # no self-force
161 |                 if r < h:  # within the softening radius
162 |                     # fac stores the quantity M(<R)/R^3 to be used later for force computation
163 |                     fac = tree.Masses[no] * ForceKernel(r, h)
164 |                 else:  # use point mass force
165 |                     fac = tree.Masses[no] / (r * r2)
166 |                 sum_field = True
167 |             no = tree.NextBranch[no]
168 |         elif acceptance_criterion(r, h, tree.Sizes[no], tree.Deltas[no], theta):
169 |             # if we satisfy the criteria for accepting the monopole
170 |             fac = tree.Masses[no] / (r * r2)
171 |             sum_field = True
172 |             no = tree.NextBranch[no]  # go to the next branch in the tree
173 |         else:  # open the node
174 |             no = tree.FirstSubnode[no]
175 |             continue
176 | 
177 |         if sum_field:  # OK, we have fac for this element and can now sum the force
178 |             for k in range(3):
179 |                 g[k] += fac * dx[k]
180 | 
181 |     return g
182 | 
183 | 
184 | @njit(fastmath=True)
185 | def AccelWalk_quad(pos, tree, softening=0, no=-1, theta=0.7):  # ,include_self_potential=False):
186 |     """Returns the gravitational acceleration field at position x by performing the Barnes-Hut treewalk using the provided octree instance. Uses the quadrupole expansion.
187 |     Arguments:
188 |     pos - (3,) array containing position of interest
189 |     tree - octree instance storing the tree structure
190 |     Keyword arguments:
191 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
192 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
193 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
194 |     """
195 |     if no < 0:
196 |         no = tree.NumParticles  # we default to the top-level node index
197 |     g = zeros(3, dtype=np.float64)
198 |     dx = np.empty(3, dtype=np.float64)
199 | 
200 |     while no > -1:  # loop until we get to the end of the tree
201 |         r2 = 0
202 |         for k in range(3):
203 |             dx[k] = tree.Coordinates[no, k] - pos[k]
204 |             r2 += dx[k] * dx[k]
205 |         r = sqrt(r2)
206 |         h = max(tree.Softenings[no], softening)
207 | 
208 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
209 |             if r > 0:  # no self-force
210 |                 if r < h:  # within the softening radius
211 |                     # fac stores the quantity M(<R)/R^3 to be used later for force computation
212 |                     fac = tree.Masses[no] * ForceKernel(r, h)
213 |                 else:  # use point mass force
214 |                     fac = tree.Masses[no] / (r * r2)
215 |             for k in range(3):
216 |                 g[k] += fac * dx[k]  # monopole
217 |             no = tree.NextBranch[no]
218 |             continue
219 |         elif acceptance_criterion(r, h, tree.Sizes[no], tree.Deltas[no], theta):
220 |             # if we satisfy the criteria for accepting the multipole expansion
221 |             fac = tree.Masses[no] / (r * r2)
222 |             quad = tree.Quadrupoles[no]
223 |             r5inv = 1 / (r2 * r2 * r)
224 |             quad_fac = 0
225 |             for k in range(3):
226 |                 g[k] += fac * dx[k]  # monopole
227 |                 for l in range(3):  # prepass to compute contraction of quad with dx
228 |                     quad_fac += quad[k, l] * dx[k] * dx[l]
229 |             quad_fac *= r5inv / r2
230 |             for k in range(3):
231 |                 g[k] += 2.5 * quad_fac * dx[k]
232 |                 for l in range(3):
233 |                     g[k] -= quad[k, l] * dx[l] * r5inv
234 | 
235 |             no = tree.NextBranch[no]  # go to the next branch in the tree
236 |         else:  # open the node
237 |             no = tree.FirstSubnode[no]
238 |             continue
239 | 
240 |     return g
241 | 
242 | 
243 | def PotentialTarget_tree(pos_target, softening_target, tree, G=1.0, theta=0.7, quadrupole=False):
244 |     """Returns the gravitational potential at the specified points, given a tree containing the mass distribution
245 |     Arguments:
246 |     pos_target -- shape (N,3) array of positions at which to evaluate the potential
247 |     softening_target -- shape (N,) array of *minimum* softening lengths to be used in all potential computations
248 |     tree -- Octree instance containing the positions, masses, and softenings of the source particles
249 |     Optional arguments:
250 |     G -- gravitational constant (default 1.0)
251 |     theta -- accuracy parameter, smaller is more accurate, larger is faster (default 0.7)
252 |     Returns:
253 |     shape (N,) array of potential values at each point in pos
254 |     """
255 |     result = empty(pos_target.shape[0])
256 |     set_parallel_chunksize(10000)
257 |     if quadrupole:
258 |         for i in prange(pos_target.shape[0]):
259 |             result[i] = G * PotentialWalk_quad(pos_target[i], tree, softening=softening_target[i], theta=theta)
260 |     else:
261 |         for i in prange(pos_target.shape[0]):
262 |             result[i] = G * PotentialWalk(pos_target[i], tree, softening=softening_target[i], theta=theta)
263 |     return result
264 | 
265 | 
266 | # JIT this function and its parallel version
267 | PotentialTarget_tree_parallel = njit(PotentialTarget_tree, fastmath=True, parallel=True)
268 | PotentialTarget_tree = njit(PotentialTarget_tree, fastmath=True)
269 | 
270 | 
271 | def AccelTarget_tree(pos_target, softening_target, tree, G=1.0, theta=0.7, quadrupole=False):
272 |     """Returns the gravitational acceleration at the specified points, given a tree containing the mass distribution
273 |     Arguments:
274 |     pos_target -- shape (N,3) array of positions at which to evaluate the field
275 |     softening_target -- shape (N,) array of *minimum* softening lengths to be used in all accel computations
276 |     tree -- Octree instance containing the positions, masses, and softenings of the source particles
277 |     Optional arguments:
278 |     G -- gravitational constant (default 1.0)
279 |     theta -- accuracy parameter, smaller is more accurate, larger is faster (default 0.7)
280 |     Returns:
281 |     shape (N,3) array of acceleration values at each point in pos_target
282 |     """
283 |     if softening_target is None:
284 |         softening_target = zeros(pos_target.shape[0])
285 |     result = empty(pos_target.shape)
286 |     set_parallel_chunksize(10000)
287 |     if quadrupole:
288 |         for i in prange(pos_target.shape[0]):
289 |             result[i] = G * AccelWalk_quad(pos_target[i], tree, softening=softening_target[i], theta=theta)
290 |     else:
291 |         for i in prange(pos_target.shape[0]):
292 |             result[i] = G * AccelWalk(pos_target[i], tree, softening=softening_target[i], theta=theta)
293 |     return result
294 | 
295 | 
296 | # JIT this function and its parallel version
297 | AccelTarget_tree_parallel = njit(AccelTarget_tree, fastmath=True, parallel=True)
298 | AccelTarget_tree = njit(AccelTarget_tree, fastmath=True)
299 | 
300 | 
301 | @njit(fastmath=True)
302 | def do_weighted_binning(tree, no, rbins, mbin, r, r_idx, quantity):
303 |     h = 0.5 * tree.Sizes[no]
304 |     Nbins = rbins.shape[0] - 1
305 |     if (r + h < rbins[r_idx + 1]) and (r - h > rbins[r_idx]):
306 |         mbin[r_idx] += tree.Masses[no] * quantity
307 |     else:
308 |         min_bin = int((np.log10((r - h) / rbins[0]) / np.log10(rbins[1] / rbins[0])))
309 |         max_bin = min(int(np.log10((r + h) / rbins[0]) / np.log10(rbins[1] / rbins[0]) + 1), Nbins)
310 |         total_wt = 0
311 |         for i in range(min_bin, max_bin):  # range(min_bin,max_bin): # first the prepass to get the total weight
312 |             # (r > rbins[i] and r < rbins[i+1]) or dr < 0.5*tree.Sizes[no]:
313 |             i1, i2 = max(r - h, rbins[i]), min(r + h, rbins[i + 1])
314 |             overlap = i2 - i1
315 |             if overlap > 0:  # if there's overlap
316 |                 reff = 0.5 * (i1 + i2)  # sqrt(rbins[i]*rbins[i+1])
317 |                 dr = fabs(r - reff)
318 |                 wt = max(0, 1 - dr * dr / (h * h)) * overlap
319 |                 total_wt += wt
320 | 
321 |         for i in range(
322 |             min_bin, max_bin
323 |         ):  # range(min_bin,max_bin): # then distribute according to the normalized weighting
324 |             i1, i2 = max(r - h, rbins[i]), min(r + h, rbins[i + 1])
325 |             overlap = i2 - i1
326 |             if overlap > 0:  # if there's overlap
327 |                 reff = 0.5 * (i1 + i2)  # sqrt(rbins[i]*rbins[i+1])
328 |                 dr = fabs(r - reff)
329 |                 wt = max(0, 1 - dr * dr / (h * h)) * overlap / total_wt
330 |                 mbin[i] += wt * tree.Masses[no] * quantity
331 | 
332 | 
333 | @njit(fastmath=True)
334 | def DensityCorrWalk(
335 |     pos,
336 |     tree,
337 |     rbins,
338 |     max_bin_size_ratio=100,
339 |     theta=0.7,
340 |     no=-1,
341 |     boxsize=0,
342 |     weighted_binning=False,
343 | ):
344 |     """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance
345 | 
346 |     Arguments:
347 |     pos - (3,) array containing position of interest
348 |     tree - octree object storing the tree structure
349 | 
350 |     Keyword arguments:
351 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
352 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
353 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
354 |     """
355 |     if no < 0:
356 |         no = tree.NumParticles  # we default to the top-level node index
357 | 
358 |     Nbins = rbins.shape[0] - 1
359 |     mbin = zeros(Nbins)
360 |     counts = zeros(Nbins)
361 |     rmin = rbins[0]
362 |     rmax = rbins[-1]
363 |     dx = np.empty(3, dtype=np.float64)
364 | 
365 |     logr_min = np.log10(rmin)
366 |     logr_max = np.log10(rmax)
367 |     dlogr = logr_max - logr_min
368 | 
369 |     while no > -1:
370 |         r = 0
371 |         for k in range(3):
372 |             dx[k] = tree.Coordinates[no, k] - pos[k]
373 |             if boxsize > 0:
374 |                 dx[k] = NearestImage(dx[k], boxsize)
375 |             r += dx[k] * dx[k]
376 | 
377 |         r = sqrt(r)
378 |         within_bounds = (r > rmin) and (r < rmax)
379 |         if within_bounds:
380 |             logr = np.log10(r)
381 |             r_idx = int(Nbins * (logr - logr_min) / dlogr)
382 |             if no < tree.NumParticles:
383 |                 mbin[r_idx] += tree.Masses[no]
384 |                 no = tree.NextBranch[no]
385 |             elif (
386 |                 r
387 |                 > max(
388 |                     tree.Sizes[no] / theta + tree.Deltas[no],
389 |                     tree.Sizes[no] * 0.6 + tree.Deltas[no],
390 |                 )
391 |             ) and (tree.Sizes[no] < max_bin_size_ratio * (rbins[r_idx + 1] - rbins[r_idx])):
392 |                 if weighted_binning:
393 |                     do_weighted_binning(tree, no, rbins, mbin, r, r_idx, 1)
394 |                 else:
395 |                     rnew = r + (np.random.rand() - 0.5) * tree.Sizes[no]
396 |                     r_idx = int(Nbins * (np.log10(rnew) - logr_min) / dlogr)
397 |                     mbin[r_idx] += tree.Masses[no]
398 |                 no = tree.NextBranch[no]
399 |             else:
400 |                 no = tree.FirstSubnode[no]
401 |         else:
402 |             if no < tree.NumParticles:
403 |                 no = tree.NextBranch[no]
404 |             elif r > max(
405 |                 tree.Sizes[no] / theta + tree.Deltas[no],
406 |                 tree.Sizes[no] * 0.6 + tree.Deltas[no],
407 |             ):
408 |                 no = tree.NextBranch[no]
409 |             else:
410 |                 no = tree.FirstSubnode[no]
411 |     return mbin
412 | 
413 | 
414 | def DensityCorrFunc_tree(
415 |     pos,
416 |     tree,
417 |     rbins,
418 |     max_bin_size_ratio=100,
419 |     theta=0.7,
420 |     boxsize=0,
421 |     weighted_binning=False,
422 | ):
423 |     """Returns the average mass in radial bins surrounding a point
424 | 
425 |     Arguments:
426 |     pos -- shape (N,3) array of particle positions
427 |     tree -- Octree instance containing the positions, masses, and softenings of the source particles
428 | 
429 |     Optional arguments:
430 |     rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins
431 |     max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width
432 | 
433 |     Returns:
434 |     mbins -- arrays containing total mass in each bin
435 |     """
436 |     Nthreads = get_num_threads()
437 |     mbin = zeros((Nthreads, rbins.shape[0] - 1))
438 |     # break into chunks for parallelization
439 |     for chunk in prange(Nthreads):
440 |         for i in range(chunk, pos.shape[0], Nthreads):
441 |             dmbin = DensityCorrWalk(
442 |                 pos[i],
443 |                 tree,
444 |                 rbins,
445 |                 max_bin_size_ratio=max_bin_size_ratio,
446 |                 theta=theta,
447 |                 boxsize=boxsize,
448 |                 weighted_binning=weighted_binning,
449 |             )
450 |             for j in range(mbin.shape[1]):
451 |                 mbin[chunk, j] += dmbin[j]
452 |     return mbin.sum(0) / pos.shape[0]
453 | 
454 | 
455 | # JIT this function and its parallel version
456 | DensityCorrFunc_tree_parallel = njit(DensityCorrFunc_tree, fastmath=True, parallel=True)
457 | DensityCorrFunc_tree = njit(DensityCorrFunc_tree, fastmath=True)
458 | 
459 | 
460 | @njit(fastmath=True)
461 | def VelocityCorrWalk(
462 |     pos,
463 |     vel,
464 |     tree,
465 |     rbins,
466 |     max_bin_size_ratio=100,
467 |     theta=0.7,
468 |     no=-1,
469 |     boxsize=0,
470 |     weighted_binning=False,
471 | ):
472 |     """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance
473 | 
474 |     Arguments:
475 |     pos - (3,) array containing position of interest
476 |     vel - (3,) array containing velocity of point of interest
477 |     tree - octree object storing the tree structure
478 | 
479 |     Keyword arguments:
480 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
481 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
482 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
483 |     """
484 |     if no < 0:
485 |         no = tree.NumParticles  # we default to the top-level node index
486 | 
487 |     Nbins = rbins.shape[0] - 1
488 |     binsums = zeros(Nbins)
489 |     wtsums = zeros(Nbins)
490 |     #    counts = zeros(Nbins)
491 |     rmin = rbins[0]
492 |     rmax = rbins[-1]
493 |     dx = np.empty(3, dtype=np.float64)
494 | 
495 |     logr_min = np.log10(rmin)
496 |     logr_max = np.log10(rmax)
497 |     dlogr = logr_max - logr_min
498 | 
499 |     while no > -1:
500 |         r = 0
501 |         for k in range(3):
502 |             dx[k] = tree.Coordinates[no, k] - pos[k]
503 |             if boxsize > 0:
504 |                 dx[k] = NearestImage(dx[k], boxsize)
505 |             r += dx[k] * dx[k]
506 |         r = sqrt(r)
507 |         #        theta = min(1,theta * np.exp(0.5*np.random.normal())) # if we randomize the opening criteria a bit we'll get fewer binning artifacts
508 |         within_bounds = (r > rmin) and (r < rmax)
509 |         if within_bounds:
510 |             logr = np.log10(r)
511 |             r_idx = int(Nbins * (logr - logr_min) / dlogr)
512 |             if no < tree.NumParticles:
513 |                 vprod = 0
514 |                 for k in range(3):
515 |                     vprod += vel[k] * tree.Velocities[no][k] * tree.Masses[no]
516 |                 binsums[r_idx] += vprod
517 |                 wtsums[r_idx] += tree.Masses[no]
518 |                 no = tree.NextBranch[no]
519 |             elif r > max(
520 |                 tree.Sizes[no] / theta + tree.Deltas[no],
521 |                 tree.Sizes[no] * 0.6 + tree.Deltas[no],
522 |             ) and tree.Sizes[no] < max_bin_size_ratio * (rbins[r_idx + 1] - rbins[r_idx]):
523 |                 vprod = 0
524 |                 for k in range(3):
525 |                     vprod += vel[k] * tree.Velocities[no][k]
526 |                 if weighted_binning:
527 |                     do_weighted_binning(tree, no, rbins, binsums, r, r_idx, vprod)
528 |                     do_weighted_binning(tree, no, rbins, wtsums, r, r_idx, 1)
529 |                 else:
530 |                     rnew = r + (np.random.rand() - 0.5) * tree.Sizes[no]
531 |                     r_idx = int(Nbins * (np.log10(rnew) - logr_min) / dlogr)
532 |                     binsums[r_idx] += vprod * tree.Masses[no]
533 |                     wtsums[r_idx] += tree.Masses[no]
534 |                 no = tree.NextBranch[no]
535 |             else:
536 |                 no = tree.FirstSubnode[no]
537 |         else:
538 |             if no < tree.NumParticles:
539 |                 no = tree.NextBranch[no]
540 |             elif r > max(
541 |                 tree.Sizes[no] / theta + tree.Deltas[no],
542 |                 tree.Sizes[no] * 0.6 + tree.Deltas[no],
543 |             ):
544 |                 no = tree.NextBranch[no]
545 |             else:
546 |                 no = tree.FirstSubnode[no]
547 |     return wtsums, binsums
548 | 
549 | 
550 | def VelocityCorrFunc_tree(
551 |     pos,
552 |     vel,
553 |     weight,
554 |     tree,
555 |     rbins,
556 |     max_bin_size_ratio=100,
557 |     theta=0.7,
558 |     boxsize=0,
559 |     weighted_binning=False,
560 | ):
561 |     """Returns the average mass in radial bins surrounding a point
562 | 
563 |     Arguments:
564 |     pos -- shape (N,3) array of particle positions
565 |     tree -- Octree instance containing the positions, masses, and softenings of the source particles
566 | 
567 |     Optional arguments:
568 |     rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins
569 |     max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 0.5)
570 | 
571 |     Returns:
572 |     mbins -- arrays containing total mass in each bin
573 |     """
574 |     Nthreads = get_num_threads()
575 |     mbin = zeros((Nthreads, rbins.shape[0] - 1))
576 |     wtsum = zeros_like(mbin)
577 |     # break into chunks for parallelization
578 |     for chunk in prange(Nthreads):
579 |         for i in range(chunk, pos.shape[0], Nthreads):
580 |             dwtsum, dmbin = VelocityCorrWalk(
581 |                 pos[i],
582 |                 vel[i],
583 |                 tree,
584 |                 rbins,
585 |                 max_bin_size_ratio=max_bin_size_ratio,
586 |                 theta=theta,
587 |                 boxsize=boxsize,
588 |                 weighted_binning=weighted_binning,
589 |             )
590 |             for j in range(mbin.shape[1]):
591 |                 mbin[chunk, j] += dmbin[j] * weight[i]
592 |                 wtsum[chunk, j] += weight[i] * dwtsum[j]
593 |     return mbin.sum(0) / wtsum.sum(0)
594 | 
595 | 
596 | # JIT this function and its parallel version
597 | VelocityCorrFunc_tree_parallel = njit(VelocityCorrFunc_tree, fastmath=True, parallel=True)
598 | VelocityCorrFunc_tree = njit(VelocityCorrFunc_tree, fastmath=True)
599 | 
600 | 
601 | @njit(fastmath=True)
602 | def VelocityStructWalk(
603 |     pos,
604 |     vel,
605 |     tree,
606 |     rbins,
607 |     max_bin_size_ratio=100,
608 |     theta=0.7,
609 |     no=-1,
610 |     boxsize=0,
611 |     weighted_binning=False,
612 | ):
613 |     """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance
614 | 
615 |     Arguments:
616 |     pos - (3,) array containing position of interest
617 |     vel - (3,) array containing velocity of point of interest
618 |     tree - octree object storing the tree structure
619 | 
620 |     Keyword arguments:
621 |     softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential
622 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
623 |     theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy)
624 |     """
625 |     if no < 0:
626 |         no = tree.NumParticles  # we default to the top-level node index
627 | 
628 |     Nbins = rbins.shape[0] - 1
629 |     binsums = zeros(Nbins)
630 |     wtsums = zeros(Nbins)
631 |     rmin = rbins[0]
632 |     rmax = rbins[-1]
633 |     dx = np.empty(3, dtype=np.float64)
634 |     logr_min = np.log10(rmin)
635 |     logr_max = np.log10(rmax)
636 |     dlogr = logr_max - logr_min
637 | 
638 |     while no > -1:
639 |         r = 0
640 |         for k in range(3):
641 |             dx[k] = tree.Coordinates[no, k] - pos[k]
642 |             if boxsize > 0:
643 |                 dx[k] = NearestImage(dx[k], boxsize)
644 |             r += dx[k] * dx[k]
645 |         r = sqrt(r)
646 | 
647 |         #        theta = min(1,theta * np.exp(0.5*np.random.normal())) # if we randomize the opening criteria a bit we'll get fewer binning artifacts
648 |         within_bounds = (r > rmin) and (r < rmax)
649 |         if within_bounds:
650 |             logr = np.log10(r)
651 |             r_idx = int(Nbins * (logr - logr_min) / dlogr)
652 |             if no < tree.NumParticles:
653 |                 vprod = 0
654 |                 for k in range(3):
655 |                     vprod += (vel[k] - tree.Velocities[no][k]) * (vel[k] - tree.Velocities[no][k]) * tree.Masses[no]
656 |                 binsums[r_idx] += vprod
657 |                 wtsums[r_idx] += tree.Masses[no]
658 |                 no = tree.NextBranch[no]
659 |             elif r > max(
660 |                 tree.Sizes[no] / theta + tree.Deltas[no],
661 |                 tree.Sizes[no] * 0.6 + tree.Deltas[no],
662 |             ) and (tree.Sizes[no] < max_bin_size_ratio * (rbins[r_idx + 1] - rbins[r_idx])):
663 |                 vprod = 0
664 |                 for k in range(3):
665 |                     vprod += (vel[k] - tree.Velocities[no][k]) * (vel[k] - tree.Velocities[no][k])
666 |                 vprod += tree.VelocityDisp[no]
667 |                 if weighted_binning:
668 |                     do_weighted_binning(tree, no, rbins, binsums, r, r_idx, vprod)
669 |                     do_weighted_binning(tree, no, rbins, wtsums, r, r_idx, 1)
670 |                 else:
671 |                     rnew = r + (np.random.rand() - 0.5) * tree.Sizes[no]
672 |                     r_idx = int(Nbins * (np.log10(rnew) - logr_min) / dlogr)
673 |                     binsums[r_idx] += vprod * tree.Masses[no]
674 |                     wtsums[r_idx] += tree.Masses[no]
675 |                 no = tree.NextBranch[no]
676 |             else:
677 |                 no = tree.FirstSubnode[no]
678 |         else:
679 |             if no < tree.NumParticles:
680 |                 no = tree.NextBranch[no]
681 |             elif r > max(
682 |                 tree.Sizes[no] / theta + tree.Deltas[no],
683 |                 tree.Sizes[no] * 0.6 + tree.Deltas[no],
684 |             ):
685 |                 no = tree.NextBranch[no]
686 |             else:
687 |                 no = tree.FirstSubnode[no]
688 |     return wtsums, binsums
689 | 
690 | 
691 | def VelocityStructFunc_tree(
692 |     pos,
693 |     vel,
694 |     weight,
695 |     tree,
696 |     rbins,
697 |     max_bin_size_ratio=100,
698 |     theta=0.7,
699 |     boxsize=0,
700 |     weighted_binning=False,
701 | ):
702 |     """Returns the average mass in radial bins surrounding a point
703 | 
704 |     Arguments:
705 |     pos -- shape (N,3) array of particle positions
706 |     tree -- Octree instance containing the positions, masses, and softenings of the source particles
707 | 
708 |     Optional arguments:
709 |     rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins
710 |     max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 0.5)
711 | 
712 |     Returns:
713 |     mbins -- arrays containing total mass in each bin
714 |     """
715 | 
716 |     Nthreads = get_num_threads()
717 |     mbin = zeros((Nthreads, rbins.shape[0] - 1))
718 |     wtsum = zeros_like(mbin)
719 |     # break into chunks for parallelization
720 |     for chunk in prange(Nthreads):
721 |         for i in range(chunk, pos.shape[0], Nthreads):
722 |             dwtsum, dmbin = VelocityStructWalk(
723 |                 pos[i],
724 |                 vel[i],
725 |                 tree,
726 |                 rbins,
727 |                 max_bin_size_ratio=max_bin_size_ratio,
728 |                 theta=theta,
729 |                 boxsize=boxsize,
730 |                 weighted_binning=weighted_binning,
731 |             )
732 |             for j in range(mbin.shape[1]):
733 |                 mbin[chunk, j] += dmbin[j] * weight[i]
734 |                 wtsum[chunk, j] += weight[i] * dwtsum[j]
735 |     return mbin.sum(0) / wtsum.sum(0)
736 | 
737 | 
738 | # JIT this function and its parallel version
739 | VelocityStructFunc_tree_parallel = njit(VelocityStructFunc_tree, fastmath=True, parallel=True)
740 | VelocityStructFunc_tree = njit(VelocityStructFunc_tree, fastmath=True)
741 | 
742 | 
743 | @njit(fastmath=True)
744 | def ColumnDensityWalk_multiray(pos, rays, tree, no=-1):
745 |     """Returns the integrated column density to infinity from pos, in the directions given by the rays argument
746 | 
747 |     Arguments:
748 |     pos - (3,) array containing position of interest
749 |     rays - (N_rays, 3) array of unit vectors
750 |     tree - octree object storing the tree structure
751 | 
752 |     Returns:
753 |     columns - (N_rays,) array of column densities along directions given by rays
754 | 
755 |     Keyword arguments:
756 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
757 |     """
758 |     if no < 0:
759 |         no = tree.NumParticles  # we default to the top-level node index
760 | 
761 |     N_rays = rays.shape[0]
762 |     columns = np.zeros(N_rays)
763 |     dx = np.empty(3, dtype=np.float64)
764 |     z_ray = np.zeros(N_rays)  # perpendicular distances of elements to nearest point on rays
765 | 
766 |     fac_density = 3 / (4 * np.pi)
767 | 
768 |     while no > -1:
769 |         r2 = 0
770 |         for k in range(3):
771 |             dx[k] = tree.Coordinates[no, k] - pos[k]
772 |             r2 += dx[k] * dx[k]
773 |         r = sqrt(r2)
774 |         for i in range(N_rays):
775 |             z_ray[i] = rays[i, 0] * dx[0] + rays[i, 1] * dx[1] + rays[i, 2] * dx[2]
776 |         h_no = tree.Softenings[no]
777 |         h_no_inv = 1.0 / h_no
778 |         h = h_no  # max(h_no,softening)
779 | 
780 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
781 |             # add the particle's column if it's in the right direction
782 |             fac = fac_density * tree.Masses[no] * h_no_inv * h_no_inv  # assumes uniform sphere geometry
783 |             for i in range(N_rays):
784 |                 r_proj = r2 - z_ray[i] * z_ray[i]
785 |                 if r_proj < 0:
786 |                     continue
787 |                 r_proj = sqrt(r2 - z_ray[i] * z_ray[i])
788 |                 q = r_proj * h_no_inv
789 |                 if r_proj < h_no:
790 |                     if r > h_no:  # not overlapping the target point - integrate the whole cell
791 |                         if z_ray[i] < 0:
792 |                             continue  # not on the ray
793 |                         columns[i] += fac * 2 * sqrt(1 - q * q)
794 |                     else:  # overlapping, so need to integrate only a portion of the cell - this case includes the self-shielding if the point is in the tree!
795 |                         dz = z_ray[i] * h_no_inv
796 |                         columns[i] += fac * (dz + sqrt(1 - q * q))
797 | 
798 |             no = tree.NextBranch[no]
799 | 
800 |         else:  # we have a node, need to check if it intersects a ray
801 |             node_intersects_ray = False
802 |             R_eff = (
803 |                 tree.Sizes[no] * 0.8660254037844386 + tree.Deltas[no]
804 |             )  # effective search radius from center of mass
805 |             for i in range(N_rays):
806 |                 if r < h + R_eff:  # if node contains the origin then it must intersect all rays
807 |                     node_intersects_ray = True
808 |                     break
809 |                 elif (z_ray[i] > 0) and (
810 |                     (r2 - z_ray[i] * z_ray[i]) < (tree.Softenings[no] + R_eff) * (tree.Softenings[no] + R_eff)
811 |                 ):  # if perpendicular distance is less than node effective size
812 |                     node_intersects_ray = True
813 |                     break
814 | 
815 |             if node_intersects_ray:
816 |                 no = tree.FirstSubnode[no]  # open the node
817 |             else:
818 |                 no = tree.NextBranch[no]  # no intersection with any way, so go to next node
819 | 
820 |     return columns
821 | 
822 | 
823 | @njit(fastmath=True)
824 | def ColumnDensityWalk_singleray(pos, ray, tree, no=-1):
825 |     """Returns the integrated column density to infinity from pos, in the directions given by the rays argument
826 | 
827 |     Arguments:
828 |     pos - (3,) array containing position of interest
829 |     ray - (3,) array with the unit vector of the ray
830 |     tree - octree object storing the tree structure
831 | 
832 |     Returns:
833 |     columns - (N_rays,) array of column densities along directions given by rays
834 | 
835 |     Keyword arguments:
836 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
837 |     """
838 |     if no < 0:
839 |         no = tree.NumParticles  # we default to the top-level node index
840 | 
841 |     column = 0
842 |     dx = np.empty(3, dtype=np.float64)
843 |     z_ray = 0  # perpendicular distances of elements to nearest point on rays
844 |     fac_density = 3 / (4 * np.pi)
845 | 
846 |     while no > -1:
847 |         r2 = 0
848 |         for k in range(3):
849 |             dx[k] = tree.Coordinates[no, k] - pos[k]
850 |             r2 += dx[k] * dx[k]
851 |         r = sqrt(r2)
852 |         z_ray = ray[0] * dx[0] + ray[1] * dx[1] + ray[2] * dx[2]
853 |         if r2 - z_ray * z_ray < 0:
854 |             no = tree.NextBranch[no]
855 |             continue
856 |         h_no = tree.Softenings[no]
857 |         h_no_inv = 1.0 / h_no
858 |         h = h_no  # max(h_no,softening)
859 | 
860 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
861 |             # add the particle's column if it's in the right direction
862 |             fac = fac_density * tree.Masses[no] * h_no_inv * h_no_inv
863 |             # assumes uniform sphere geometry
864 |             r_proj = sqrt(r2 - z_ray * z_ray)
865 |             q = r_proj * h_no_inv
866 |             if r_proj < h_no:
867 |                 if r > h_no:  # not overlapping the target point - integrate the whole cell
868 |                     if z_ray > 0:
869 |                         column += fac * 2 * sqrt(1 - q * q)
870 |                 else:  # overlapping, so need to integrate only a portion of the cell - this case includes the self-shielding if the point is in the tree!
871 |                     dz = z_ray * h_no_inv
872 |                     column += fac * (dz + sqrt(1 - q * q))
873 |             no = tree.NextBranch[no]
874 | 
875 |         else:  # we have a node, need to check if it intersects a ray
876 |             node_intersects_ray = False
877 |             R_eff = tree.Sizes[no] * 0.8660254037844386 + tree.Deltas[no]
878 |             # effective search radius from center of mass
879 |             if r < h + R_eff:
880 |                 # if node contains the origin then it must intersect all rays
881 |                 node_intersects_ray = True
882 |             elif (z_ray > 0) and (
883 |                 (r2 - z_ray * z_ray) < (tree.Softenings[no] + R_eff) * (tree.Softenings[no] + R_eff)
884 |             ):  # if perpendicular distance is less than node effective size
885 |                 node_intersects_ray = True
886 | 
887 |             if node_intersects_ray:
888 |                 no = tree.FirstSubnode[no]  # open the node
889 |             else:  # no intersection with any way, so go to next node
890 |                 no = tree.NextBranch[no]
891 |     return column
892 | 
893 | 
894 | @njit(fastmath=True)
895 | def ColumnDensityWalk_binned(pos, tree, theta=0.5, no=-1):
896 |     """Returns the integrated column density to infinity from pos, in the directions given by the rays argument
897 | 
898 |     Arguments:
899 |     pos - (3,) array containing position of interest
900 |     tree - octree object storing the tree structure
901 | 
902 |     Returns:
903 |     columns - shape (6,) array of average column densities in the 6 equal bins on the sphere
904 | 
905 |     Keyword arguments:
906 |     no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization
907 |     """
908 |     if no < 0:
909 |         no = tree.NumParticles  # we default to the top-level node index
910 | 
911 |     n_bins = 6
912 |     column = np.zeros(n_bins)
913 |     dx = np.empty(3, dtype=np.float64)
914 |     angular_bin_size = (4 * np.pi) / n_bins
915 | 
916 |     while no > -1:
917 |         r2 = 0
918 |         for k in range(3):
919 |             dx[k] = tree.Coordinates[no, k] - pos[k]
920 |             r2 += dx[k] * dx[k]
921 | 
922 |         h_no = tree.Softenings[no]
923 |         h = h_no
924 |         if no < tree.NumParticles:  # if we're looking at a leaf/particle
925 |             # add the particle's column if it's in the right direction
926 |             bin = angular_bin(dx)
927 |             if r2 > h * h:
928 |                 col_bin = tree.Masses[no] / r2 / angular_bin_size
929 |             else:  # interpolate between full overlap case and no overlap
930 |                 col0 = tree.Masses[no] * (3 / (4 * np.pi * h * h))
931 |                 fac = sqrt(r2) / h  # 0 to 1 when there is overlap
932 |                 col_bin = col0 * fac * 2
933 |                 col_isotropic = (1 - fac) * col0
934 |                 for k in range(n_bins):
935 |                     column[k] += col_isotropic
936 |             column[bin] += col_bin
937 |             no = tree.NextBranch[no]
938 |         elif acceptance_criterion(
939 |             sqrt(r2), h, tree.Sizes[no], tree.Deltas[no], theta
940 |         ):  # we can put the whole node in a bin
941 |             column[angular_bin(dx)] += tree.Masses[no] / r2 / angular_bin_size
942 |             no = tree.NextBranch[no]
943 |         else:
944 |             no = tree.FirstSubnode[no]
945 |     return column
946 | 
947 | 
948 | def ColumnDensity_tree(pos_target, tree, rays=None, randomize_rays=False, theta=0.7):
949 |     """Returns the column density integrated to infinity from pos_target along rays, given the mass distribution in an Octree
950 | 
951 |     Parameters
952 |     ----------
953 |     pos_target: array_like
954 |         shape (N,3) array of target particle positions where you want to know the potential.
955 |     tree: Octree
956 |         Octree instance initialized with the positions, masses, and softenings of the source particles.
957 |     rays: array_like
958 |         Shape (N_rays,3) array of ray direction unit vectors. If None then we instead compute average column densities in a 6-bin tesselation of the sphere.
959 |     randomize_rays: bool, optional
960 |     Randomly orients the raygrid for each particle.
961 |     Randomly orients the raygrid for each particle.
962 | 
963 |         Randomly orients the raygrid for each particle.
964 | 
965 |     """
966 |     set_parallel_chunksize(10000)
967 | 
968 |     if rays is None:  # do angular-binned column density
969 |         result = empty((pos_target.shape[0], 6))
970 |         for i in prange(pos_target.shape[0]):
971 |             result[i] = ColumnDensityWalk_binned(pos_target[i], tree, theta)
972 |     elif randomize_rays:
973 |         # use the multi-ray treewalk; more efficient
974 |         result = empty((pos_target.shape[0], len(rays)))
975 |         for i in prange(pos_target.shape[0]):
976 |             rays_random = rays @ random_rotation(i)
977 |             result[i] = ColumnDensityWalk_multiray(pos_target[i], rays_random, tree)
978 |     else:
979 |         result = empty((pos_target.shape[0], len(rays)))
980 |         for i in range(rays.shape[0]):
981 |             # outer loop over rays - empirically better access pattern
982 |             for j in prange(pos_target.shape[0]):
983 |                 result[j, i] = ColumnDensityWalk_singleray(pos_target[j], rays[i], tree)
984 |     return result
985 | 
986 | 
987 | ColumnDensity_tree_parallel = njit(ColumnDensity_tree, fastmath=True, parallel=True)
988 | ColumnDensity_tree = njit(ColumnDensity_tree, fastmath=True)
989 | 


--------------------------------------------------------------------------------