├── _config.yml ├── docs ├── requirements.txt ├── source │ ├── modules.rst │ ├── Nbody_simulation_9_1.png │ ├── usage │ │ ├── CPU_Time_serial.png │ │ ├── installation.rst │ │ ├── quickstart.md │ │ └── quickstart.rst │ ├── frontend_API.rst │ ├── community.rst │ ├── index.rst │ ├── pytreegrav.rst │ ├── conf.py │ └── Nbody_simulation.rst ├── Makefile └── make.bat ├── src └── pytreegrav │ ├── __init__.py │ ├── kernel.py │ ├── misc.py │ ├── kdtree │ ├── kdtree.py │ └── treewalk.py │ ├── bruteforce.py │ ├── octree.py │ ├── dynamic_tree.py │ ├── frontend.py │ └── treewalk.py ├── requirements.txt ├── images ├── CPU_Time_both.png ├── CPU_Time_parallel.png └── CPU_Time_serial.png ├── pyproject.toml ├── tests ├── __pycache__ │ └── test.cpython-38-pytest-6.2.5.pyc └── tree_test.py ├── .github └── workflows │ └── python-package.yml ├── LICENSE.txt ├── .readthedocs.yaml ├── setup.py ├── examples ├── benchmark.py └── cuda_test.ipynb ├── paper.md ├── README.ipynb ├── README.md └── paper.bib /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme 2 | numba 3 | numpy 4 | scipy 5 | healpy 6 | -------------------------------------------------------------------------------- /src/pytreegrav/__init__.py: -------------------------------------------------------------------------------- 1 | from .octree import * 2 | from .frontend import * 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | healpy 2 | numba 3 | numpy 4 | scipy 5 | pytest 6 | pyerfa>=2.0.1.4 7 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | src 2 | === 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | pytreegrav 8 | -------------------------------------------------------------------------------- /images/CPU_Time_both.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/images/CPU_Time_both.png -------------------------------------------------------------------------------- /images/CPU_Time_parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/images/CPU_Time_parallel.png -------------------------------------------------------------------------------- /images/CPU_Time_serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/images/CPU_Time_serial.png -------------------------------------------------------------------------------- /docs/source/Nbody_simulation_9_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/docs/source/Nbody_simulation_9_1.png -------------------------------------------------------------------------------- /docs/source/usage/CPU_Time_serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/docs/source/usage/CPU_Time_serial.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /docs/source/frontend_API.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================= 3 | 4 | .. automodule:: pytreegrav.frontend 5 | :noindex: 6 | :members: 7 | -------------------------------------------------------------------------------- /tests/__pycache__/test.cpython-38-pytest-6.2.5.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mikegrudic/pytreegrav/HEAD/tests/__pycache__/test.cpython-38-pytest-6.2.5.pyc -------------------------------------------------------------------------------- /docs/source/community.rst: -------------------------------------------------------------------------------- 1 | Feedback, Support, and Contributions 2 | ==================================== 3 | 4 | To contribute to pytreegrav, report an issue, or seek support, please initiate a pull request or issue through the project `project github `_ 5 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: black-action 2 | on: [push, pull_request] 3 | jobs: 4 | linter_name: 5 | name: runner / black formatter 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v4 9 | - uses: rickstaa/action-black@v1 10 | with: 11 | black_args: "-l 119 ." -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. pytreegrav documentation master file, created by 2 | sphinx-quickstart on Mon Nov 22 10:52:56 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to pytreegrav's documentation! 7 | ====================================== 8 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation, without writing a single line of C or Cython. 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | :caption: Contents: 13 | 14 | usage/installation 15 | usage/quickstart 16 | Nbody_simulation 17 | frontend_API 18 | community 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | -------------------------------------------------------------------------------- /tests/tree_test.py: -------------------------------------------------------------------------------- 1 | # simple test program for the tree solver: computes acceleration and potential and checks that it is as accurate as expected 2 | 3 | import numpy as np 4 | from pytreegrav import Accel, Potential 5 | from time import time 6 | 7 | 8 | def test_answer(): 9 | # generate points 10 | np.random.seed(42) 11 | N = 4 * 10**4 12 | x = np.random.rand(N, 3) 13 | m = np.ones(N) / N 14 | h = np.repeat(0.01, N) 15 | 16 | accel_tree = Accel(x, m, h, method="tree", parallel=True) 17 | accel_bruteforce = Accel(x, m, h, method="bruteforce", parallel=True) 18 | phi_tree = Potential(x, m, h, method="tree", parallel=True) 19 | phi_bruteforce = Potential(x, m, h, method="bruteforce", parallel=True) 20 | 21 | acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1))) # RMS force error 22 | print("RMS force error: ", acc_error) 23 | phi_error = np.std(phi_tree - phi_bruteforce) 24 | print("RMS potential error: ", phi_error) 25 | np.save("phi_error.npy", phi_tree - phi_bruteforce) 26 | assert acc_error < 0.02 27 | assert phi_error < 0.02 28 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Michael Y. Grudić 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/source/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | python: 34 | install: 35 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools, os 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | thelibFolder = os.path.dirname(os.path.realpath(__file__)) 7 | requirementPath = thelibFolder + "/requirements.txt" 8 | install_requires = [] 9 | if os.path.isfile(requirementPath): 10 | with open(requirementPath) as f: 11 | install_requires = f.read().splitlines() 12 | 13 | setuptools.setup( 14 | name="pytreegrav", 15 | version="1.1.6", 16 | author="Mike Grudic", 17 | author_email="mike.grudich@gmail.com", 18 | description="Fast approximate gravitational force and potential calculations", 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | project_urls={ 22 | "Bug Tracker": "https://github.com/mikegrudic/pytreegrav", 23 | }, 24 | classifiers=[ 25 | "Programming Language :: Python :: 3", 26 | "License :: OSI Approved :: MIT License", 27 | "Operating System :: OS Independent", 28 | ], 29 | package_dir={"": "src"}, 30 | packages=setuptools.find_packages(where="src"), 31 | python_requires=">=3.6", 32 | install_requires=install_requires, 33 | ) 34 | -------------------------------------------------------------------------------- /docs/source/pytreegrav.rst: -------------------------------------------------------------------------------- 1 | pytreegrav package 2 | ================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | pytreegrav.bruteforce module 8 | ---------------------------- 9 | 10 | .. automodule:: pytreegrav.bruteforce 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | pytreegrav.dynamic\_tree module 16 | ------------------------------- 17 | 18 | .. automodule:: pytreegrav.dynamic_tree 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pytreegrav.frontend module 24 | -------------------------- 25 | 26 | .. automodule:: pytreegrav.frontend 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | pytreegrav.kernel module 32 | ------------------------ 33 | 34 | .. automodule:: pytreegrav.kernel 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | pytreegrav.octree module 40 | ------------------------ 41 | 42 | .. automodule:: pytreegrav.octree 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | pytreegrav.treewalk module 48 | -------------------------- 49 | 50 | .. automodule:: pytreegrav.treewalk 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | Module contents 56 | --------------- 57 | 58 | .. automodule:: pytreegrav 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | -------------------------------------------------------------------------------- /src/pytreegrav/kernel.py: -------------------------------------------------------------------------------- 1 | from numba import njit, float64, float32 2 | 3 | 4 | @njit(fastmath=True) # ([float64(float64,float64),float32(float32,float32)]) 5 | def ForceKernel(r, h): 6 | """ 7 | Returns the quantity equivalent to (fraction of mass enclosed)/ r^3 for a cubic-spline mass distribution of compact support radius h. Used to calculate the softened gravitational force. 8 | 9 | Arguments: 10 | r - radius 11 | h - softening 12 | """ 13 | if r > h: 14 | return 1.0 / (r * r * r) 15 | hinv = 1.0 / h 16 | q = r * hinv 17 | if q <= 0.5: 18 | return (10.666666666666666666 + q * q * (-38.4 + 32.0 * q)) * hinv * hinv * hinv 19 | else: 20 | return ( 21 | (21.333333333333 - 48.0 * q + 38.4 * q * q - 10.666666666667 * q * q * q - 0.066666666667 / (q * q * q)) 22 | * hinv 23 | * hinv 24 | * hinv 25 | ) 26 | 27 | 28 | @njit(fastmath=True) # ([float64(float64,float64)]) 29 | def PotentialKernel(r, h): 30 | """ 31 | Returns the equivalent of -1/r for a cubic-spline mass distribution of compact support radius h. Used to calculate the softened gravitational potential. 32 | 33 | Arguments: 34 | r - radius 35 | h - softening 36 | """ 37 | if h == 0.0: 38 | return -1.0 / r 39 | hinv = 1.0 / h 40 | q = r * hinv 41 | if q <= 0.5: 42 | return (-2.8 + q * q * (5.33333333333333333 + q * q * (6.4 * q - 9.6))) * hinv 43 | elif q <= 1: 44 | return ( 45 | -3.2 46 | + 0.066666666666666666666 / q 47 | + q * q * (10.666666666666666666666 + q * (-16.0 + q * (9.6 - 2.1333333333333333333333 * q))) 48 | ) * hinv 49 | else: 50 | return -1.0 / r 51 | -------------------------------------------------------------------------------- /src/pytreegrav/misc.py: -------------------------------------------------------------------------------- 1 | from numba import njit 2 | import numpy as np 3 | from numpy import zeros, sqrt 4 | 5 | 6 | @njit(fastmath=True) 7 | def random_rotation(seed): 8 | """Returns a random rotation matrix reproducibly, given a random seed 9 | 10 | Parameters 11 | ---------- 12 | seed: int 13 | Random seed 14 | 15 | Returns 16 | ------- 17 | rotation_matrix: array_like 18 | 3x3 array of random rotation matrix entries 19 | """ 20 | 21 | rotation_matrix = zeros((3, 3)) 22 | np.random.seed(seed) 23 | # generate x axis 24 | costheta = np.random.uniform(-1, 1) 25 | sintheta = sqrt(max(1 - costheta * costheta, 0)) 26 | phi = 2 * np.pi * np.random.uniform(0,1) 27 | rotation_matrix[0] = sintheta * np.cos(phi), sintheta * np.sin(phi), costheta 28 | 29 | # generate independent y axis and orthogonalize 30 | costheta = np.random.uniform(-1, 1) 31 | sintheta = sqrt(max(1 - costheta * costheta, 0)) 32 | phi = 2 * np.pi * np.random.uniform(0,1) 33 | rotation_matrix[1] = sintheta * np.cos(phi), sintheta * np.sin(phi), costheta 34 | 35 | sum = 0 36 | for k in range(3): # dot product 37 | sum += rotation_matrix[0, k] * rotation_matrix[1, k] 38 | for k in range(3): # deproject 39 | rotation_matrix[1, k] -= sum * rotation_matrix[0, k] 40 | sum = 0 41 | for k in range(3): # normalize 42 | sum += rotation_matrix[1, k] * rotation_matrix[1, k] 43 | sum = sqrt(sum) 44 | for k in range(3): 45 | rotation_matrix[1, k] /= sum 46 | 47 | # now z axis is the cross product 48 | for i in range(3): 49 | j, k = (i + 1) % 3, (i + 2) % 3 50 | rotation_matrix[2, i] = ( 51 | rotation_matrix[0, j] * rotation_matrix[1, k] - rotation_matrix[1, j] * rotation_matrix[0, k] 52 | ) 53 | 54 | return rotation_matrix 55 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("../../src")) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = "pytreegrav" 22 | copyright = "2021, Mike Grudic" 23 | author = "Mike Grudic" 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = "1.2.1" 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ["sphinx.ext.napoleon", "sphinx_rtd_theme", "sphinx.ext.autodoc"] 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ["_templates"] 38 | 39 | # List of patterns, relative to source directory, that match files and 40 | # directories to ignore when looking for source files. 41 | # This pattern also affects html_static_path and html_extra_path. 42 | exclude_patterns = [] 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = "sphinx_rtd_theme" 51 | 52 | # Add any paths that contain custom static files (such as style sheets) here, 53 | # relative to this directory. They are copied after the builtin static files, 54 | # so a file named "default.css" will overwrite the builtin "default.css". 55 | html_static_path = ["_static"] 56 | -------------------------------------------------------------------------------- /docs/source/usage/installation.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | The below will help you quickly install pytreegrav. 7 | 8 | Requirements 9 | ------------ 10 | 11 | You will need a working Python 3.x installation; we recommend installing `Anaconda `_ Python version 3.x. 12 | You will also need to install the following packages: 13 | 14 | * numpy 15 | 16 | * numba 17 | 18 | Installing the latest stable release 19 | ------------------------------------ 20 | 21 | Install the latest stable release with 22 | 23 | .. code-block:: bash 24 | 25 | pip install pytreegrav 26 | 27 | This is the preferred way to install pytreegrav as it will 28 | automatically install the necessary requirements and put Pytreegrav 29 | into your :code:`${PYTHONPATH}` environment variable so you can 30 | import it. 31 | 32 | Install from source 33 | ------------------- 34 | 35 | Alternatively, you can install the latest version directly from the most up-to-date version 36 | of the source-code by cloning/forking the GitHub repository 37 | 38 | .. code-block:: bash 39 | 40 | git clone https://github.com/mikegrudic/pytreegrav.git 41 | 42 | 43 | Once you have the source, you can build pytreegrav (and add it to your environment) 44 | by executing 45 | 46 | .. code-block:: bash 47 | 48 | python setup.py install 49 | 50 | or 51 | 52 | .. code-block:: bash 53 | 54 | pip install -e . 55 | 56 | in the top level directory. The required Python packages will automatically be 57 | installed as well. 58 | 59 | You can test your installation by looking for the pytreegrav 60 | executable built by the installation 61 | 62 | .. code-block:: bash 63 | 64 | which pytreegrav 65 | 66 | and by importing the pytreegrav Python frontend in Python 67 | 68 | .. code-block:: python 69 | 70 | import pytreegrav 71 | 72 | Testing 73 | ------- 74 | 75 | To test that the tree solver is working correctly, run 76 | 77 | .. code-block:: bash 78 | 79 | pytest 80 | 81 | from the root directory of the package. This will run a basic test problem comparing the acceleration and potential from the tree and brute force solvers respectively, and check that the answers are within the expected tolerance. 82 | -------------------------------------------------------------------------------- /examples/benchmark.py: -------------------------------------------------------------------------------- 1 | # script to run the Plummer sphere benchmark from the JOSS paper: time how long it takes to run acceleration and potential solves for tree and bruteforce methods, and plot the results 2 | 3 | from pytreegrav import * 4 | import numpy as np 5 | from time import time 6 | from matplotlib import pyplot as plt 7 | import palettable 8 | 9 | parallel = True 10 | theta = 0.7 11 | soft = 0.0 12 | N = 2 ** np.arange(6, 28) 13 | t1 = [] 14 | t2 = [] 15 | t3 = [] 16 | t4 = [] 17 | force_error = [] 18 | phi_error = [] 19 | x = np.random.rand(10**1, 3) 20 | m = np.random.rand(10**1) 21 | Accel(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="tree") 22 | Accel(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="bruteforce") 23 | # BruteForceAccel(x,m,np.repeat(soft,len(m))) 24 | Potential(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="tree") 25 | Potential(x, m, np.repeat(soft, len(m)), parallel=parallel, theta=theta, method="bruteforce") 26 | 27 | fig, ax = plt.subplots(figsize=(4, 4)) 28 | ax.set_prop_cycle("color", palettable.colorbrewer.qualitative.Dark2_4.mpl_colors) 29 | for n in N: 30 | print(n) 31 | x = np.random.rand(n) 32 | r = np.sqrt(x ** (2.0 / 3) * (1 + x ** (2.0 / 3) + x ** (4.0 / 3)) / (1 - x**2)) 33 | phi_exact = -((1 + r**2) ** -0.5) 34 | x = np.random.normal(size=(n, 3)) 35 | x = (x.T * r / np.sum(x**2, axis=1) ** 0.5).T 36 | m = np.repeat(1.0 / n, n) 37 | h = np.ones_like(m) * soft 38 | t = time() 39 | phitree = Potential(x, m, h, parallel=parallel, theta=theta, method="tree") 40 | t = time() - t 41 | t1.append(t) 42 | t = time() 43 | atree = Accel(x, m, h, parallel=parallel, theta=theta, method="tree") 44 | print(atree) 45 | t = time() - t 46 | t2.append(t) 47 | if n < 64**3: 48 | t = time() 49 | phibrute = Potential(x, m, h, parallel=parallel, theta=theta, method="bruteforce") 50 | t = time() - t 51 | t3.append(t) 52 | phi_error.append(np.std((phitree - phibrute) / phibrute)) 53 | t = time() 54 | abrute = Accel(x, m, h, parallel=parallel, theta=theta, method="bruteforce") 55 | t = time() - t 56 | t4.append(t) 57 | amag = (np.sum(abrute**2, axis=1) + np.sum(atree**2, axis=1)) / 2 58 | aerror = np.sum((abrute - atree) ** 2, axis=1) 59 | force_error.append((aerror / amag).mean() ** 0.5) 60 | print(force_error[-1]) 61 | else: 62 | t4.append(np.nan) 63 | t3.append(np.nan) 64 | force_error.append(np.nan) 65 | phi_error.append(np.nan) 66 | 67 | 68 | ax.loglog(N, np.array(t1) / N, label="Potential (Tree)") 69 | ax.loglog(N, np.array(t2) / N, label="Acceleration (Tree)") 70 | ax.loglog(N, np.array(t3) / N, label="Potential (Brute Force)") 71 | ax.loglog(N, np.array(t4) / N, label="Acceleration (Brute Force)") 72 | ax.legend(labelspacing=0.1, frameon=True) 73 | ax.set_ylabel("Time per particle (s)") 74 | ax.set_xlabel("Number of particles") 75 | plt.savefig("CPU_Time.png", bbox_inches="tight") 76 | plt.clf() 77 | plt.loglog(N, phi_error, label="Potential error") 78 | plt.loglog(N, force_error, label="Acceleration error") 79 | print(force_error, phi_error) 80 | plt.legend() 81 | plt.savefig("Errors.png", bbox_inches="tight") 82 | -------------------------------------------------------------------------------- /src/pytreegrav/kdtree/kdtree.py: -------------------------------------------------------------------------------- 1 | from numba import int32, deferred_type, optional, float64, boolean, int64, njit, jit, prange, types 2 | from numba.experimental import jitclass 3 | import numpy as np 4 | from numpy import empty, empty_like, zeros, zeros_like, sqrt 5 | from numba.typed import List 6 | 7 | node_type = deferred_type() 8 | 9 | spec = [ 10 | ("bounds", float64[:, :]), 11 | ("size", float64), 12 | ("delta", float64), 13 | ("points", float64[:, :]), 14 | ("masses", float64[:]), 15 | ("Npoints", int64), 16 | ("h", float64), 17 | ("softening", float64[:]), 18 | ("mass", float64), 19 | ("COM", float64[:]), 20 | ("IsLeaf", boolean), 21 | ("HasLeft", boolean), 22 | ("HasRight", boolean), 23 | ("left", optional(node_type)), 24 | ("right", optional(node_type)), 25 | ] 26 | 27 | 28 | @jitclass(spec) 29 | class KDNode(object): 30 | def __init__(self, points, masses, softening): 31 | self.bounds = empty((3, 2)) 32 | self.bounds[0, 0] = points[:, 0].min() 33 | self.bounds[0, 1] = points[:, 0].max() 34 | self.bounds[1, 0] = points[:, 1].min() 35 | self.bounds[1, 1] = points[:, 1].max() 36 | self.bounds[2, 0] = points[:, 2].min() 37 | self.bounds[2, 1] = points[:, 2].max() 38 | 39 | self.softening = softening 40 | self.h = self.softening.max() 41 | 42 | self.size = max( 43 | self.bounds[0, 1] - self.bounds[0, 0], 44 | self.bounds[1, 1] - self.bounds[1, 0], 45 | self.bounds[2, 1] - self.bounds[2, 0], 46 | ) 47 | self.points = points 48 | self.Npoints = points.shape[0] 49 | self.masses = masses 50 | self.mass = np.sum(masses) 51 | self.delta = 0.0 52 | if self.Npoints == 1: 53 | self.IsLeaf = True 54 | self.COM = points[0] 55 | else: 56 | self.IsLeaf = False 57 | self.COM = zeros(3) 58 | for k in range(3): 59 | for i in range(self.Npoints): 60 | self.COM[k] += points[i, k] * masses[i] 61 | self.COM[k] /= self.mass 62 | self.delta += (0.5 * (self.bounds[k, 1] + self.bounds[k, 0]) - self.COM[k]) ** 2 63 | self.delta = sqrt(self.delta) 64 | 65 | self.HasLeft = False 66 | self.HasRight = False 67 | self.left = None 68 | self.right = None 69 | 70 | def GenerateChildren(self, axis): 71 | if self.IsLeaf: 72 | return 0 73 | x = self.points[:, axis] 74 | med = (self.bounds[axis, 0] + self.bounds[axis, 1]) / 2 75 | index = x < med 76 | if np.any(index): 77 | self.left = KDNode(self.points[index], self.masses[index], self.softening[index]) 78 | self.HasLeft = True 79 | index = np.invert(index) 80 | if np.any(index): 81 | self.right = KDNode(self.points[index], self.masses[index], self.softening[index]) 82 | self.HasRight = True 83 | self.points = empty((1, 1)) 84 | self.masses = empty(1) 85 | self.softening = empty(1) 86 | return 1 87 | 88 | 89 | node_type.define(KDNode.class_type.instance_type) 90 | 91 | 92 | @njit 93 | def ConstructKDTree(x, m, softening): 94 | if len(np.unique(x[:, 0])) < len(x): 95 | raise Exception( 96 | "Non-unique particle positions are currently not supported by the tree-building algorithm. Consider perturbing your positions with a bit of noise if you really want to proceed." 97 | ) 98 | root = KDNode(x, m, softening) 99 | nodes = [ 100 | root, 101 | ] 102 | axis = 0 103 | divisible_nodes = 1 104 | count = 0 105 | while divisible_nodes > 0: 106 | N = len(nodes) 107 | divisible_nodes = 0 108 | for i in range(count, N): # loop through the nodes we spawned in the previous pass 109 | count += 1 110 | if nodes[i].IsLeaf: 111 | continue 112 | else: 113 | generated_children = nodes[i].GenerateChildren(axis) 114 | divisible_nodes += generated_children 115 | if nodes[i].HasLeft: 116 | nodes.append(nodes[i].left) 117 | if nodes[i].HasRight: 118 | nodes.append(nodes[i].right) 119 | 120 | axis = (axis + 1) % 3 121 | return root 122 | -------------------------------------------------------------------------------- /docs/source/Nbody_simulation.rst: -------------------------------------------------------------------------------- 1 | Example: N-body simulation 2 | ========================== 3 | 4 | Here we provide a simple example of an N-body integrator implemented 5 | using force and potential evaluation routines from pytreegrav. If you 6 | were writing a more serious simulation code you would want to adopt a 7 | more modular, object-oriented approach, but this suffices to demonstrate 8 | the use of pytreegrav. 9 | 10 | Initial Conditions 11 | ------------------ 12 | 13 | We first make a function to initialize some particles in a Gaussian 14 | blob. You can try modifying the IC generator and playing around with the 15 | initial velocity and geometry for extra fun. We also write a function to 16 | evaluate the total energy, which is conserved down to tree-force and 17 | integration errors. 18 | 19 | .. code:: ipython3 20 | 21 | %pylab 22 | from pytreegrav import Accel, Potential 23 | 24 | def GenerateICs(N,seed=42): 25 | np.random.seed(seed) # seed the RNG for reproducibility 26 | pos = np.random.normal(size=(N,3)) # positions of particles 27 | pos -= np.average(pos,axis=0) # put center of mass at the origin 28 | vel = np.zeros_like(pos) # initialize at rest 29 | vel -= np.average(vel,axis=0) # make average velocity 0 30 | softening = np.repeat(0.1,N) # initialize softening to 0.1 31 | masses = np.repeat(1./N,N) # make the system have unit mass 32 | return pos, masses, vel, softening 33 | 34 | def TotalEnergy(pos, masses, vel, softening): 35 | kinetic = 0.5 * np.sum(masses[:,None] * vel**2) 36 | potential = 0.5 * np.sum(masses * Potential(pos,masses,softening,parallel=True)) 37 | return kinetic + potential 38 | 39 | 40 | .. parsed-literal:: 41 | 42 | Using matplotlib backend: MacOSX 43 | Populating the interactive namespace from numpy and matplotlib 44 | 45 | 46 | Stepper function 47 | ---------------- 48 | 49 | Now let’s define the basic timestep for a leapfrog integrator, put in 50 | the Hamiltonian split kick-drift-kick form (e.g. Springel 2005). 51 | 52 | .. code:: ipython3 53 | 54 | def leapfrog_kdk_timestep(dt, pos, masses, softening, vel, accel): 55 | # first a half-step kick 56 | vel[:] = vel + 0.5 * dt * accel # note that you must slice arrays to modify them in place in the function! 57 | # then full-step drift 58 | pos[:] = pos + dt * vel 59 | # then recompute accelerations 60 | accel[:] = Accel(pos,masses,softening,parallel=True) 61 | # then another half-step kick 62 | vel[:] = vel + 0.5 * dt * accel 63 | 64 | Main simulation loop 65 | -------------------- 66 | 67 | .. code:: ipython3 68 | 69 | pos, masses, vel, softening = GenerateICs(10000) # initialize initial condition with 10k particles 70 | 71 | accel = Accel(pos,masses,softening,parallel=True) # initialize acceleration 72 | 73 | t = 0 # initial time 74 | Tmax = 50 # final/max time 75 | 76 | energies = [] #energies 77 | r50s = [] #half-mass radii 78 | ts = [] # times 79 | 80 | 81 | while t <= Tmax: # actual simulation loop - this may take a couple minutes to run 82 | r50s.append(np.median(np.sum((pos - np.median(pos,axis=0))**2,axis=1)**0.5)) 83 | energies.append(TotalEnergy(pos,masses,vel,softening)) 84 | ts.append(t) 85 | 86 | dt = 0.03 # adjust this to control integration error 87 | 88 | leapfrog_kdk_timestep(dt, pos, masses, softening, vel, accel) 89 | t += dt 90 | 91 | print("Simulation complete! Relative energy error: %g"%(np.abs((energies[0]-energies[-1])/energies[0]))) 92 | 93 | 94 | .. parsed-literal:: 95 | 96 | Simulation complete! Relative energy error: 0.00161328 97 | 98 | 99 | Analysis 100 | -------- 101 | 102 | Now we can plot the half-mass radius (to get an idea of how the system 103 | pulsates over time) and the total energy (to check for accuracy) as a 104 | function of time 105 | 106 | .. code:: ipython3 107 | 108 | %matplotlib inline 109 | plt.figure(figsize=(4,4),dpi=300) 110 | plt.plot(ts,energies,label="Total Energy") 111 | plt.plot(ts,r50s,label="Half-mass Radius") 112 | plt.xlabel("Time") 113 | plt.legend() 114 | 115 | 116 | 117 | 118 | .. parsed-literal:: 119 | 120 | 121 | 122 | 123 | 124 | 125 | .. image:: Nbody_simulation_9_1.png 126 | 127 | -------------------------------------------------------------------------------- /src/pytreegrav/kdtree/treewalk.py: -------------------------------------------------------------------------------- 1 | from numpy import sqrt, empty, zeros, empty_like, zeros_like 2 | from numba import njit, prange 3 | from ..kernel import * 4 | import numpy as np 5 | 6 | 7 | @njit(fastmath=True) 8 | def PotentialWalk(pos, node, phi, softening=0, theta=0.7): 9 | """Returns the gravitational field at position x by performing the Barnes-Hut treewalk using the provided KD-tree node 10 | 11 | Arguments: 12 | pos - (3,) array containing position of interest 13 | node - KD-tree to walk 14 | 15 | Keyword arguments: 16 | g - (3,) array containing initial value of the gravitational field, used when adding up the contributions in recursive calls 17 | softening - softening radius of the particle at which the force is being evaluated - needed if you want the short-range force to be momentum-conserving 18 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0, gives ~1\ 19 | % accuracy) 20 | """ 21 | ## (ABG) NOTE softening is not actually used here... 22 | dx = node.COM[0] - pos[0] 23 | dy = node.COM[1] - pos[1] 24 | dz = node.COM[2] - pos[2] 25 | r = sqrt(dx * dx + dy * dy + dz * dz) 26 | if node.IsLeaf: 27 | if r > 0: 28 | phi += node.mass * PotentialKernel(r, node.h) 29 | elif r > max(node.size / theta, node.h + node.size): 30 | phi -= node.mass / r 31 | else: 32 | if node.HasLeft: 33 | phi = PotentialWalk(pos, node.left, phi, theta=theta) 34 | if node.HasRight: 35 | phi = PotentialWalk(pos, node.right, phi, theta=theta) 36 | return phi 37 | 38 | 39 | @njit(fastmath=True) 40 | def ForceWalk(pos, node, g, softening=0.0, theta=0.7): 41 | """Returns the gravitational field at position pos by performing the Barnes-Hut treewalk using the provided KD-tree node 42 | 43 | Arguments: 44 | pos - (3,) array containing position of interest 45 | node - KD-tree to walk 46 | 47 | Parameters: 48 | g - (3,) array containing initial value of the gravitational field, used when adding up the contributions in recursive calls 49 | softening - softening radius of the particle at which the force is being evaluated - needed if you want the short-range force to be momentum-conserving 50 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0, gives ~1\ 51 | % accuracy) 52 | """ 53 | dx = node.COM[0] - pos[0] 54 | dy = node.COM[1] - pos[1] 55 | dz = node.COM[2] - pos[2] 56 | r = sqrt(dx * dx + dy * dy + dz * dz) 57 | add_accel = False 58 | fac = 0 59 | if r > 0: 60 | if node.IsLeaf: 61 | add_accel = True 62 | if r < max(node.h, softening): 63 | fac = node.mass * ForceKernel(r, max(node.h, softening)) 64 | else: 65 | fac = node.mass / (r * r * r) 66 | elif r > max(node.size / theta + node.delta, max(node.h, softening) + node.size): 67 | add_accel = True 68 | fac = node.mass / (r * r * r) 69 | 70 | if add_accel: 71 | g[0] += dx * fac 72 | g[1] += dy * fac 73 | g[2] += dz * fac 74 | else: 75 | if node.HasLeft: 76 | g = ForceWalk(pos, node.left, g, softening=softening, theta=theta) 77 | if node.HasRight: 78 | g = ForceWalk(pos, node.right, g, softening=softening, theta=theta) 79 | return g 80 | 81 | 82 | @njit(parallel=True, fastmath=True) 83 | def GetPotentialParallel(pos, tree, softening=None, G=1.0, theta=0.7): 84 | if softening is None: 85 | softening = zeros(pos.shape[0]) 86 | result = empty(pos.shape[0]) 87 | for i in prange(pos.shape[0]): 88 | result[i] = G * PotentialWalk(pos[i], tree, 0.0, softening=softening[i], theta=theta) 89 | return result 90 | 91 | 92 | @njit(fastmath=True) 93 | def GetPotential(pos, tree, softening=None, G=1.0, theta=0.7): 94 | if softening is None: 95 | softening = zeros(pos.shape[0]) 96 | result = empty(pos.shape[0]) 97 | for i in range(pos.shape[0]): 98 | result[i] = G * PotentialWalk(pos[i], tree, 0.0, softening=softening[i], theta=theta) 99 | return result 100 | 101 | 102 | @njit(fastmath=True) 103 | def GetAccel(pos, tree, softening=None, G=1.0, theta=0.7): 104 | if softening is None: 105 | softening = zeros(pos.shape[0]) 106 | result = empty(pos.shape) 107 | for i in range(pos.shape[0]): 108 | result[i] = G * ForceWalk(pos[i], tree, zeros(3), softening=softening[i], theta=theta) 109 | return result 110 | 111 | 112 | @njit(parallel=True, fastmath=True) 113 | def GetAccelParallel(pos, tree, softening, G=1.0, theta=0.7): 114 | if softening is None: 115 | softening = zeros(len(pos), dtype=np.float64) 116 | result = empty(pos.shape) 117 | for i in prange(pos.shape[0]): 118 | result[i] = G * ForceWalk(pos[i], tree, zeros(3), softening=softening[i], theta=theta) 119 | return result 120 | -------------------------------------------------------------------------------- /docs/source/usage/quickstart.md: -------------------------------------------------------------------------------- 1 | # Quickstart 2 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation. 3 | 4 | First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is. 5 | 6 | 7 | ```python 8 | import numpy as np 9 | from pytreegrav import Accel, Potential 10 | ``` 11 | 12 | 13 | ```python 14 | N = 10**5 # number of particles 15 | x = np.random.rand(N,3) # positions randomly sampled in the unit cube 16 | m = np.repeat(1./N,N) # masses - let the system have unit mass 17 | h = np.repeat(0.01,N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions 18 | ``` 19 | 20 | Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential at each particle position: 21 | 22 | 23 | ```python 24 | print(Accel(x,m,h)) 25 | print(Potential(x,m,h)) 26 | ``` 27 | 28 | [[-0.1521787 0.2958852 -0.30109005] 29 | [-0.50678204 -0.37489886 -1.0558666 ] 30 | [-0.24650087 0.95423467 -0.175074 ] 31 | ... 32 | [ 0.87868472 -1.28332176 -0.22718531] 33 | [-0.41962742 0.32372245 -1.31829084] 34 | [ 2.45127054 0.38292881 0.05820412]] 35 | [-2.35518057 -2.19299372 -2.28494218 ... -2.11783337 -2.1653377 36 | -1.80464695] 37 | 38 | 39 | By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes: 40 | 41 | 42 | ```python 43 | from time import time 44 | t = time() 45 | # tree gravitational acceleration 46 | accel_tree = Accel(x,m,h,method='tree') 47 | print("Tree accel runtime: %gs"%(time() - t)); t = time() 48 | 49 | accel_bruteforce = Accel(x,m,h,method='bruteforce') 50 | print("Brute force accel runtime: %gs"%(time() - t)); t = time() 51 | 52 | phi_tree = Potential(x,m,h,method='tree') 53 | print("Tree potential runtime: %gs"%(time() - t)); t = time() 54 | 55 | phi_bruteforce = Potential(x,m,h,method='bruteforce') 56 | print("Brute force potential runtime: %gs"%(time() - t)); t = time() 57 | ``` 58 | 59 | Tree accel runtime: 0.927745s 60 | Brute force accel runtime: 44.1175s 61 | Tree potential runtime: 0.802386s 62 | Brute force potential runtime: 20.0234s 63 | 64 | 65 | As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding 10^4. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation: 66 | ![Benchmark](./CPU_Time_serial.png) 67 | 68 | 69 | But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions: 70 | 71 | 72 | ```python 73 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error 74 | print("RMS force error: ", acc_error) 75 | phi_error = np.std(phi_tree - phi_bruteforce) 76 | print("RMS potential error: ", phi_error) 77 | ``` 78 | 79 | RMS force error: 0.006739311224338851 80 | RMS potential error: 0.0003888328578588027 81 | 82 | 83 | The above errors are typical for default settings: ~1% force error and ~0.1\% potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle ``theta``, set to 0.7 by default. Smaller ``theta`` gives higher accuracy, but also runs slower: 84 | 85 | 86 | ```python 87 | thetas = 0.1,0.2,0.4,0.8 # different thetas to try 88 | for theta in thetas: 89 | t = time() 90 | accel_tree = Accel(x,m,h,method='tree',theta=theta) 91 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) 92 | print("theta=%g Runtime: %gs RMS force error: %g"%(theta, time()-t, acc_error)) 93 | ``` 94 | 95 | theta=0.1 Runtime: 63.1738s RMS force error: 3.78978e-05 96 | theta=0.2 Runtime: 14.3356s RMS force error: 0.000258755 97 | theta=0.4 Runtime: 2.91292s RMS force error: 0.00148698 98 | theta=0.8 Runtime: 0.724668s RMS force error: 0.0105937 99 | 100 | 101 | Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number: 102 | 103 | 104 | ```python 105 | from time import time 106 | t = time() 107 | # tree gravitational acceleration 108 | accel_tree = Accel(x,m,h,method='tree',parallel=True) 109 | print("Tree accel runtime in parallel: %gs"%(time() - t)); t = time() 110 | 111 | accel_bruteforce = Accel(x,m,h,method='bruteforce',parallel=True) 112 | print("Brute force accel runtime in parallel: %gs"%(time() - t)); t = time() 113 | 114 | phi_tree = Potential(x,m,h,method='tree',parallel=True) 115 | print("Tree potential runtime in parallel: %gs"%(time() - t)); t = time() 116 | 117 | phi_bruteforce = Potential(x,m,h,method='bruteforce',parallel=True) 118 | print("Brute force potential runtime in parallel: %gs"%(time() - t)); t = time() 119 | ``` 120 | 121 | Tree accel runtime in parallel: 0.222271s 122 | Brute force accel runtime in parallel: 7.25576s 123 | Tree potential runtime in parallel: 0.181393s 124 | Brute force potential runtime in parallel: 5.72611s 125 | 126 | 127 | ## What if I want to evaluate the fields at different points than where the particles are? 128 | 129 | We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver): 130 | 131 | 132 | ```python 133 | from pytreegrav import AccelTarget, PotentialTarget 134 | 135 | # generate a separate set of "target" positions where we want to know the potential and field 136 | N_target = 10**4 137 | x_target = np.random.rand(N_target,3) 138 | h_target = np.repeat(0.01,N_target) # optional "target" softening: this sets a floor on the softening length of all forces/potentials computed 139 | 140 | accel_tree = AccelTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles 141 | accel_bruteforce = AccelTarget(x_target,x,m,h_source=h,method='bruteforce') 142 | 143 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error 144 | print("RMS force error: ", acc_error) 145 | 146 | phi_tree = PotentialTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles 147 | phi_bruteforce = PotentialTarget(x_target,x,m,h_target=h_target, h_source=h,method='bruteforce') 148 | 149 | phi_error = np.std(phi_tree - phi_bruteforce) 150 | print("RMS potential error: ", phi_error) 151 | ``` 152 | 153 | RMS force error: 0.006719983300560105 154 | RMS potential error: 0.0003873676304955059 155 | 156 | -------------------------------------------------------------------------------- /docs/source/usage/quickstart.rst: -------------------------------------------------------------------------------- 1 | 2 | Quickstart 3 | ========== 4 | 5 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation. 6 | 7 | First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is. 8 | 9 | .. code-block:: python 10 | 11 | import numpy as np 12 | from pytreegrav import Accel, Potential 13 | 14 | .. code-block:: python 15 | 16 | N = 10**5 # number of particles 17 | x = np.random.rand(N,3) # positions randomly sampled in the unit cube 18 | m = np.repeat(1./N,N) # masses - let the system have unit mass 19 | h = np.repeat(0.01,N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions 20 | 21 | Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential at each particle position: 22 | 23 | .. code-block:: python 24 | 25 | print(Accel(x,m,h)) 26 | print(Potential(x,m,h)) 27 | 28 | .. code-block:: 29 | 30 | [[-0.1521787 0.2958852 -0.30109005] 31 | [-0.50678204 -0.37489886 -1.0558666 ] 32 | [-0.24650087 0.95423467 -0.175074 ] 33 | ... 34 | [ 0.87868472 -1.28332176 -0.22718531] 35 | [-0.41962742 0.32372245 -1.31829084] 36 | [ 2.45127054 0.38292881 0.05820412]] 37 | [-2.35518057 -2.19299372 -2.28494218 ... -2.11783337 -2.1653377 38 | -1.80464695] 39 | 40 | 41 | 42 | By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes: 43 | 44 | .. code-block:: python 45 | 46 | from time import time 47 | t = time() 48 | # tree gravitational acceleration 49 | accel_tree = Accel(x,m,h,method='tree') 50 | print("Tree accel runtime: %gs"%(time() - t)); t = time() 51 | 52 | accel_bruteforce = Accel(x,m,h,method='bruteforce') 53 | print("Brute force accel runtime: %gs"%(time() - t)); t = time() 54 | 55 | phi_tree = Potential(x,m,h,method='tree') 56 | print("Tree potential runtime: %gs"%(time() - t)); t = time() 57 | 58 | phi_bruteforce = Potential(x,m,h,method='bruteforce') 59 | print("Brute force potential runtime: %gs"%(time() - t)); t = time() 60 | 61 | .. code-block:: 62 | 63 | Tree accel runtime: 0.927745s 64 | Brute force accel runtime: 44.1175s 65 | Tree potential runtime: 0.802386s 66 | Brute force potential runtime: 20.0234s 67 | 68 | 69 | 70 | As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding 10^4. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation: 71 | 72 | .. image:: ./CPU_Time_serial.png 73 | :target: ./CPU_Time_serial.png 74 | :alt: Benchmark 75 | 76 | 77 | But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions: 78 | 79 | .. code-block:: python 80 | 81 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error 82 | print("RMS force error: ", acc_error) 83 | phi_error = np.std(phi_tree - phi_bruteforce) 84 | print("RMS potential error: ", phi_error) 85 | 86 | .. code-block:: 87 | 88 | RMS force error: 0.006739311224338851 89 | RMS potential error: 0.0003888328578588027 90 | 91 | 92 | 93 | The above errors are typical for default settings: ~1% force error and ~0.1\% potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle ``theta``\ , set to 0.7 by default. Smaller ``theta`` gives higher accuracy, but also runs slower: 94 | 95 | .. code-block:: python 96 | 97 | thetas = 0.1,0.2,0.4,0.8 # different thetas to try 98 | for theta in thetas: 99 | t = time() 100 | accel_tree = Accel(x,m,h,method='tree',theta=theta) 101 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) 102 | print("theta=%g Runtime: %gs RMS force error: %g"%(theta, time()-t, acc_error)) 103 | 104 | .. code-block:: 105 | 106 | theta=0.1 Runtime: 63.1738s RMS force error: 3.78978e-05 107 | theta=0.2 Runtime: 14.3356s RMS force error: 0.000258755 108 | theta=0.4 Runtime: 2.91292s RMS force error: 0.00148698 109 | theta=0.8 Runtime: 0.724668s RMS force error: 0.0105937 110 | 111 | 112 | 113 | Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number: 114 | 115 | .. code-block:: python 116 | 117 | from time import time 118 | t = time() 119 | # tree gravitational acceleration 120 | accel_tree = Accel(x,m,h,method='tree',parallel=True) 121 | print("Tree accel runtime in parallel: %gs"%(time() - t)); t = time() 122 | 123 | accel_bruteforce = Accel(x,m,h,method='bruteforce',parallel=True) 124 | print("Brute force accel runtime in parallel: %gs"%(time() - t)); t = time() 125 | 126 | phi_tree = Potential(x,m,h,method='tree',parallel=True) 127 | print("Tree potential runtime in parallel: %gs"%(time() - t)); t = time() 128 | 129 | phi_bruteforce = Potential(x,m,h,method='bruteforce',parallel=True) 130 | print("Brute force potential runtime in parallel: %gs"%(time() - t)); t = time() 131 | 132 | .. code-block:: 133 | 134 | Tree accel runtime in parallel: 0.222271s 135 | Brute force accel runtime in parallel: 7.25576s 136 | Tree potential runtime in parallel: 0.181393s 137 | Brute force potential runtime in parallel: 5.72611s 138 | 139 | 140 | 141 | What if I want to evaluate the fields at different points than where the particles are? 142 | --------------------------------------------------------------------------------------- 143 | 144 | We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver): 145 | 146 | .. code-block:: python 147 | 148 | from pytreegrav import AccelTarget, PotentialTarget 149 | 150 | # generate a separate set of "target" positions where we want to know the potential and field 151 | N_target = 10**4 152 | x_target = np.random.rand(N_target,3) 153 | h_target = np.repeat(0.01,N_target) # optional "target" softening: this sets a floor on the softening length of all forces/potentials computed 154 | 155 | accel_tree = AccelTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles 156 | accel_bruteforce = AccelTarget(x_target,x,m,h_source=h,method='bruteforce') 157 | 158 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error 159 | print("RMS force error: ", acc_error) 160 | 161 | phi_tree = PotentialTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles 162 | phi_bruteforce = PotentialTarget(x_target,x,m,h_target=h_target, h_source=h,method='bruteforce') 163 | 164 | phi_error = np.std(phi_tree - phi_bruteforce) 165 | print("RMS potential error: ", phi_error) 166 | 167 | .. code-block:: 168 | 169 | RMS force error: 0.006719983300560105 170 | RMS potential error: 0.0003873676304955059 171 | -------------------------------------------------------------------------------- /examples/cuda_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using matplotlib backend: TkAgg\n", 13 | "%pylab is deprecated, use %matplotlib inline and import the required libraries.\n", 14 | "Populating the interactive namespace from numpy and matplotlib\n", 15 | "Found 1 CUDA devices\n", 16 | "id 0 b'NVIDIA T1000' [SUPPORTED]\n", 17 | " Compute Capability: 7.5\n", 18 | " PCI Device ID: 0\n", 19 | " PCI Bus ID: 101\n", 20 | " UUID: GPU-b303fbe2-bd8d-69ed-9a8c-01198eed12ed\n", 21 | " Watchdog: Enabled\n", 22 | " FP32/FP64 Performance Ratio: 32\n", 23 | "Summary:\n", 24 | "\t1/1 devices are supported\n" 25 | ] 26 | }, 27 | { 28 | "name": "stderr", 29 | "output_type": "stream", 30 | "text": [ 31 | "/home/mgrudic/.local/lib/python3.9/site-packages/IPython/core/magics/pylab.py:162: UserWarning: pylab import has clobbered these variables: ['sqrt', 'vectorize']\n", 32 | "`%matplotlib` prevents importing * from pylab and numpy\n", 33 | " warn(\"pylab import has clobbered these variables: %s\" % clobbered +\n", 34 | "/home/mgrudic/.local/lib/python3.9/site-packages/numba/cuda/cudadrv/devicearray.py:886: NumbaPerformanceWarning: \u001b[1mHost array used in CUDA kernel will incur copy overhead to/from device.\u001b[0m\n", 35 | " warn(NumbaPerformanceWarning(msg))\n" 36 | ] 37 | }, 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "3.88 s ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "%pylab\n", 48 | "from numba import (\n", 49 | " jit,\n", 50 | " vectorize,\n", 51 | " float32,\n", 52 | " float64,\n", 53 | " cfunc,\n", 54 | " njit,\n", 55 | " prange,\n", 56 | " get_num_threads,\n", 57 | " set_num_threads,\n", 58 | ")\n", 59 | "import numpy as np\n", 60 | "from math import sqrt\n", 61 | "from scipy.special import comb\n", 62 | "from scipy.interpolate import interp2d, RectBivariateSpline\n", 63 | "from numba import cuda\n", 64 | "from numpy import float64, float32, int32, ndarray\n", 65 | "\n", 66 | "cuda.detect()\n", 67 | "\n", 68 | "\n", 69 | "# Controls threads per block and shared memory usage.\n", 70 | "# The computation will be done on blocks of TPBxTPB elements.\n", 71 | "TPB = 16\n", 72 | "\n", 73 | "\n", 74 | "@cuda.jit(\"void(float32[:,:],float32[:])\", fastmath=True)\n", 75 | "def bruteforce_potential(x, phi):\n", 76 | " i, j = cuda.grid(2)\n", 77 | " if i < x.shape[0] and j < x.shape[0] and j < i:\n", 78 | " r = (\n", 79 | " (x[i, 0] - x[j, 0]) * (x[i, 0] - x[j, 0])\n", 80 | " + (x[i, 1] - x[j, 1]) * (x[i, 1] - x[j, 1])\n", 81 | " + (x[i, 2] - x[j, 2]) * (x[i, 2] - x[j, 2])\n", 82 | " )\n", 83 | " dphi = -1 / sqrt(r)\n", 84 | "\n", 85 | " cuda.atomic.add(phi, i, dphi)\n", 86 | " cuda.atomic.add(phi, j, dphi)\n", 87 | "\n", 88 | "\n", 89 | "Np = 4096 * 32\n", 90 | "\n", 91 | "x = np.float32(np.random.rand(Np, 3))\n", 92 | "phi = np.zeros(Np) # cuda.device_array(Np, dtype=np.float32); phi[:] = 0.\n", 93 | "\n", 94 | "\n", 95 | "threadsperblock = (16, 16)\n", 96 | "blockspergrid = (Np // threadsperblock[0], Np // threadsperblock[1]) # int(ceil(Np // threadsperblock))\n", 97 | "\n", 98 | "%timeit bruteforce_potential[blockspergrid,threadsperblock](x,phi)\n", 99 | "\n", 100 | "# @cuda.jit(\"void(float32[:], float32[:,:], float32[:], float32[:,:], float32)\",fastmath=True)\n", 101 | "# def GridSurfaceDensity_core_cuda(f, x2d, h, grid, size):\n", 102 | "# res = np.int32(grid.shape[0])\n", 103 | "# dx = np.float32(size / (res - 1))\n", 104 | "\n", 105 | "# # numba provides this function for working out which element you're\n", 106 | "# # supposed to be accessing\n", 107 | "# i = cuda.grid(1)\n", 108 | "# if i hs_sqr:\n", 130 | "# continue\n", 131 | "# r = sqrt(r)\n", 132 | "# q = r * hinv\n", 133 | "# if q <= 0.5:\n", 134 | "# kernel = 1 - 6 * q * q * (1 - q)\n", 135 | "# else: # q <= 1.0:\n", 136 | "# a = 1 - q\n", 137 | "# kernel = 2 * a * a * a\n", 138 | "# cuda.atomic.add(grid, (gx,gy), kernel * mh2)\n", 139 | "# cuda.syncthreads()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "4.55 s ± 87.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "from pytreegrav.bruteforce import Potential_bruteforce_parallel\n", 157 | "\n", 158 | "%timeit Potential_bruteforce_parallel(x,np.ones(Np),np.zeros(Np))" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 39, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/plain": [ 169 | "array([-1883.2377, -1630.0737, -2068.1045, ..., -1790.0035, -1583.7726,\n", 170 | " -1866.4478], dtype=float32)" 171 | ] 172 | }, 173 | "execution_count": 39, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "phi.copy_to_host()" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "from pytreegrav.bruteforce import Po" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "2023.2.0", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.9.16" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: '``pytreegrav``: A fast Python gravity solver' 3 | tags: 4 | - Python 5 | - physics 6 | - gravity 7 | - simulations 8 | authors: 9 | - name: Michael Y. Grudić 10 | orcid: 0000-0002-1655-5604 11 | affiliation: "1,2" 12 | - name: Alexander B. Gurvich 13 | orcid: 0000-0002-6145-3674 14 | affiliation: 2 15 | affiliations: 16 | - name: NASA Hubble Fellow, Carnegie Observatories 17 | index: 1 18 | - name: Department of Physics & Astronomy and CIERA, Northwestern University 19 | index: 2 20 | date: 9 June 2021 21 | bibliography: paper.bib 22 | --- 23 | 24 | # Summary 25 | 26 | Gravity is important in a wide variety of science problems. In particular, questions in astrophysics nearly all involve gravity, and can have large ($\gg10^4$) numbers of gravitating masses, such as the stars in a cluster or galaxy, or the discrete fluid elements in a hydrodynamics simulation. Often the gravitational field of such a large number of masses can be too computationally expensive to compute by directly summing the contribution of every single element at every point of interest. 27 | 28 | ``pytreegrav`` is a multi-method Python package for computing gravitational fields and potentials. It includes an exact direct-summation ("brute force") solver and a fast, approximate tree-based method that can be orders of magnitude faster than the naïve method. It can compute fields and potentials from arbitrary particle distributions at arbitrary points, with arbitrary softening/smoothing lengths, and is parallelized with OpenMP. 29 | 30 | # Statement of need 31 | 32 | The problem addressed by ``pytreegrav`` is the following: given an arbitrary set of "source" masses $m_i$ with 3D coordinates $\mathbf{x}_i$, and optionally each having a finite spatial extent $h_i$ (the _softening radius_), one would like to compute the gravitational potential $\Phi$ and/or the gravitational field $\mathbf{g}$ at an arbitrary set of "target" points in space $\mathbf{y}_i$. A common application for this is N-body simulations (wherein $\mathbf{y}_i=\mathbf{x}_i$). It is also often useful for _analyzing_ simulation results after the fact -- $\Phi$ and $\mathbf{g}$ are sometimes not saved in simulation outputs, and even when they are it is often useful to analyze the gravitational interactions between specific _subsets_ of the mass elements in the simulation. Computing $\mathbf{g}$ is also important for generating equilibrium _initial conditions_ for N-body simulations [@makedisk;@galic], and for identifying interesting gravitationally-bound structures such as halos, star clusters, and giant molecular clouds [@rockstar;@grudic2018;@guszejnov2020]. 33 | 34 | Many gravity simulation codes (or multi-physics simulation codes _including_ gravity) have been written that address the problem of gravity computation in a variety of ways for their own internal purposes [@aarseth_nbody;@dehnen]. However, ``pykdgrav`` (the precursor of ``pytreegrav``) was the first Python package to offer a generic, modular, trivially-installable gravity solver that could be easily integrated into any other Python code, using the fast, approximate tree-based @barneshut method to be practical for large particle numbers. ``pykdgrav`` used a KD-tree implementation accelerated with ``numba`` [@numba] to achieve high performance in the potential/field evaluation, however the prerequisite tree-building step had relatively high overhead and a very large memory footprint, because the entire dataset was redundantly stored at every level in the tree hierarchy. This made it difficult to scale to various practical research problems, such as analyzing high-resolution galaxy simulations [@fire_pressurebalance]. ``pytreegrav`` is a full refactor of ``pykdgrav`` that addresses these shortcomings with a new octree implementation, with drastically reduced tree-build time and memory footprint, and a more efficient non-recursive tree traversal for field summation. This makes it suitable for post-processing datasets from state-of-the-art astrophysics simulations, with upwards of $10^8$ particles in the region of interest. 35 | 36 | # Methods 37 | 38 | ``pytreegrav`` can compute $\Phi$ and $\mathbf{g}$ using one of two methods: by "brute force" (explcitly summing the field of every particle, which is exact to machine precision), or using the fast, approximate @barneshut tree-based method (which is approximate, but much faster for large particle numbers). In $N$-body problems where the fields at all particle positions must be known, the cost of the brute-force method scales as $\propto N^2$, while the cost of the tree-based method scales less steeply, as $\propto N \log N$. 39 | 40 | ![Wall-clock time per particle running ``pytreegrav`` on a sample of $N$ particles from a @plummer distribution for various $N$. Test was run on an Intel i9 9900K workstation on a single core (_left_) and in parallel on 16 logical cores (_right_).\label{fig:cputime}](images/CPU_Time_both.png) 41 | 42 | The brute-force methods are often fastest for small ($<10^3$ particle) point sets because they lack the overheads of tree construction and traversal, while the tree-based methods will typically be faster for larger datasets because they reduce the number of floating-point operations required. Both methods are optimized with the ``numba`` LLVM JIT compiler [@numba], and the basic ``Accel`` and ``Potential`` front-end functions will automatically choose the method is likely to be faster, based on this heuristic crossover point of $10^3$ particles. Both methods can also optionally be parallelized with OpenMP, via the ``numba`` ``@njit(parallel=True)`` interface. 43 | 44 | The implementation of the tree build and tree-based field summation largely follows that of ``GADGET-2`` [@gadget2]. Starting with an initial cube enclosing all particles, particles are inserted into the tree one at a time. Nodes are divided into 8 subnodes until each subnode contains at most one particle. The indices of the 8 subnodes of each node are stored for an initial recursive traversal of the completed tree, but an optimized tree traversal only needs to know the _first_ subnode (if the node is to be refined) and the index of the next branch of the tree (if the field due to the node is summed directly), so these indices are recorded in the initial recursive tree traversal, and the 8 explicit subnode indices are then deleted, saving memory and removing any empty nodes from consideration. Once these "next branch" and "first subnode" indices are known, the tree field summations can be done in a single ``while`` loop with no recursive function calls, which generally improves performance and memory usage. 45 | 46 | The field summation itself uses the @barneshut geometric opening criterion, with improvements suggested by @dubinski: for a node of side length $L$ with centre of mass located at distance $r$ from the target point, its contribution is summed using the monopole approximation (treating the whole node as a point mass) only if $r > L/\Theta + \delta$, where $\Theta=0.7$ by default (giving $\sim 1\%$ RMS error in $\mathbf{g}$), $\delta$ is the distance from the node's geometric center to its center of mass. If the conditions for approximation are not satisfied, the node's subnodes are considered in turn, until the field contribution of all mass within the node is summed. 47 | 48 | ``pytreegrav`` supports gravitational softening by assuming the mass distribution of each particle takes the form of a standard M4 cubic spline kernel, which is zero beyond the softening radius $h$ (outside which the field reduces to that of a point mass). Explicit expressions for this form of the softened gravitational potential and field are given in @gizmo. $h$ is allowed to vary from particle to particle, and when summing the field the larger of the source or the target softening is used (symmetrizing the force between overlapping particles). When softenings are nonzero, the largest softening $h_{\rm max}$ of all particles in a node is stored, and a node is always opened in the field summation if $r < 0.6L + \max\left(h_{\rm target}, h_{\rm max}\right) + \delta$, where $h_{\rm target}$ is the softening of the target particle where the field is being summed. This ensures that any interactions between physically-overlapping particles are summed directly with the softening kernel. 49 | 50 | # Acknowledgements 51 | 52 | We acknowledge code contributions from Ben Keller and Martin Beroiz, and helpful feedback from Elisa Bortolas, Thorsten García, and GitHub user ``herkesg`` during the development of ``pykdgrav``, which were incorporated into ``pytreegrav``. 53 | 54 | # References 55 | -------------------------------------------------------------------------------- /README.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation, without writing a single line of C or Cython." 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "# Walkthrough\n", 16 | "First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import numpy as np\n", 26 | "from pytreegrav import Accel, Potential" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "N = 10**5 # number of particles\n", 36 | "x = np.random.rand(N, 3) # positions randomly sampled in the unit cube\n", 37 | "m = np.repeat(1.0 / N, N) # masses - let the system have unit mass\n", 38 | "h = np.repeat(0.01, N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential:" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "print(Accel(x, m, h))\n", 55 | "print(Potential(x, m, h))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes:" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "from time import time\n", 72 | "\n", 73 | "t = time()\n", 74 | "# tree gravitational acceleration\n", 75 | "accel_tree = Accel(x, m, h, method=\"tree\")\n", 76 | "print(\"Tree accel runtime: %gs\" % (time() - t))\n", 77 | "t = time()\n", 78 | "\n", 79 | "accel_bruteforce = Accel(x, m, h, method=\"bruteforce\")\n", 80 | "print(\"Brute force accel runtime: %gs\" % (time() - t))\n", 81 | "t = time()\n", 82 | "\n", 83 | "phi_tree = Potential(x, m, h, method=\"tree\")\n", 84 | "print(\"Tree potential runtime: %gs\" % (time() - t))\n", 85 | "t = time()\n", 86 | "\n", 87 | "phi_bruteforce = Potential(x, m, h, method=\"bruteforce\")\n", 88 | "print(\"Brute force potential runtime: %gs\" % (time() - t))\n", 89 | "t = time()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding $10^4$. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation: \"Performance\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions:" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1))) # RMS force error\n", 113 | "print(\"RMS force error: \", acc_error)\n", 114 | "phi_error = np.std(phi_tree - phi_bruteforce)\n", 115 | "print(\"RMS potential error: \", phi_error)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "The above errors are typical for default settings: $\\sim 1\\%$ force error and $\\sim 0.1\\%$ potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle $\\Theta$, set to 0.7 by default. Smaller $\\Theta$ gives higher accuracy, but also runs slower:" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "thetas = 0.1, 0.2, 0.4, 0.8 # different thetas to try\n", 132 | "for theta in thetas:\n", 133 | " t = time()\n", 134 | " accel_tree = Accel(x, m, h, method=\"tree\", theta=theta)\n", 135 | " acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1)))\n", 136 | " print(\"theta=%g Runtime: %gs RMS force error: %g\" % (theta, time() - t, acc_error))" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "from time import time\n", 153 | "\n", 154 | "t = time()\n", 155 | "# tree gravitational acceleration\n", 156 | "accel_tree = Accel(x, m, h, method=\"tree\", parallel=True)\n", 157 | "print(\"Tree accel runtime in parallel: %gs\" % (time() - t))\n", 158 | "t = time()\n", 159 | "\n", 160 | "accel_bruteforce = Accel(x, m, h, method=\"bruteforce\", parallel=True)\n", 161 | "print(\"Brute force accel runtime in parallel: %gs\" % (time() - t))\n", 162 | "t = time()\n", 163 | "\n", 164 | "phi_tree = Potential(x, m, h, method=\"tree\", parallel=True)\n", 165 | "print(\"Tree potential runtime in parallel: %gs\" % (time() - t))\n", 166 | "t = time()\n", 167 | "\n", 168 | "phi_bruteforce = Potential(x, m, h, method=\"bruteforce\", parallel=True)\n", 169 | "print(\"Brute force potential runtime in parallel: %gs\" % (time() - t))\n", 170 | "t = time()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "# What if I want to evaluate the fields at different points than where the particles are?\n", 178 | "\n", 179 | "We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver):" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "from pytreegrav import AccelTarget, PotentialTarget\n", 189 | "\n", 190 | "# generate a separate set of \"target\" positions where we want to know the potential and field\n", 191 | "N_target = 10**4\n", 192 | "x_target = np.random.rand(N_target, 3)\n", 193 | "h_target = np.repeat(\n", 194 | " 0.01, N_target\n", 195 | ") # optional \"target\" softening: this sets a floor on the softening length of all forces/potentials computed\n", 196 | "\n", 197 | "accel_tree = AccelTarget(\n", 198 | " x_target, x, m, h_target=h_target, h_source=h, method=\"tree\"\n", 199 | ") # we provide the points/masses/softenings we generated before as the \"source\" particles\n", 200 | "accel_bruteforce = AccelTarget(x_target, x, m, h_source=h, method=\"bruteforce\")\n", 201 | "\n", 202 | "acc_error = np.sqrt(np.mean(np.sum((accel_tree - accel_bruteforce) ** 2, axis=1))) # RMS force error\n", 203 | "print(\"RMS force error: \", acc_error)\n", 204 | "\n", 205 | "phi_tree = PotentialTarget(\n", 206 | " x_target, x, m, h_target=h_target, h_source=h, method=\"tree\"\n", 207 | ") # we provide the points/masses/softenings we generated before as the \"source\" particles\n", 208 | "phi_bruteforce = PotentialTarget(x_target, x, m, h_target=h_target, h_source=h, method=\"bruteforce\")\n", 209 | "\n", 210 | "phi_error = np.std(phi_tree - phi_bruteforce)\n", 211 | "print(\"RMS potential error: \", phi_error)" 212 | ] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "Python 3", 218 | "language": "python", 219 | "name": "python3" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.8.5" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 4 236 | } 237 | -------------------------------------------------------------------------------- /src/pytreegrav/bruteforce.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import sqrt, empty, zeros, empty_like, zeros_like 3 | from numba import njit, prange 4 | from .kernel import * 5 | 6 | 7 | def PotentialTarget_bruteforce(x_target, softening_target, x_source, m_source, softening_source, G=1.0): 8 | """Returns the exact gravitational potential due to a set of particles, at a set of positions that need not be the same as the particle positions. 9 | 10 | Arguments: 11 | x_target -- shape (N,3) array of positions where the potential is to be evaluated 12 | softening_target -- shape (N,) array of minimum softening lengths to be used 13 | x_source -- shape (M,3) array of positions of gravitating particles 14 | m_source -- shape (M,) array of particle masses 15 | softening_source -- shape (M,) array of softening lengths 16 | 17 | Optional arguments: 18 | G -- gravitational constant (default 0.7) 19 | 20 | Returns: 21 | shape (N,) array of potential values 22 | """ 23 | potential = np.zeros(x_target.shape[0]) 24 | dx = np.empty(3) 25 | for i in prange(x_target.shape[0]): 26 | for j in range(x_source.shape[0]): 27 | for k in range(3): 28 | dx[k] = x_target[i, k] - x_source[j, k] 29 | r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) 30 | 31 | h = max(softening_source[j], softening_target[i]) 32 | if r < h: 33 | potential[i] += m_source[j] * PotentialKernel(r, h) 34 | else: 35 | if r > 0: 36 | potential[i] -= m_source[j] / r 37 | return G * potential 38 | 39 | 40 | PotentialTarget_bruteforce_parallel = njit(PotentialTarget_bruteforce, fastmath=True, parallel=True) 41 | PotentialTarget_bruteforce = njit(PotentialTarget_bruteforce, fastmath=True) 42 | 43 | 44 | @njit(fastmath=True) 45 | def Potential_bruteforce(x, m, softening, G=1.0): 46 | """Returns the exact mutually-interacting gravitational potential for a set of particles with positions x and masses m, evaluated by brute force. 47 | 48 | Arguments: 49 | x -- shape (N,3) array of particle positions 50 | m -- shape (N,) array of particle masses 51 | softening -- shape (N,) array containing kernel support radii for gravitational softening 52 | 53 | Optional arguments: 54 | G -- gravitational constant (default 1.0) 55 | 56 | Returns: 57 | shape (N,) array containing potential values 58 | """ 59 | potential = zeros_like(m) 60 | dx = zeros(3) 61 | for i in range(x.shape[0]): 62 | for j in range(i + 1, x.shape[0]): 63 | for k in range(3): 64 | dx[k] = x[i, k] - x[j, k] 65 | r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) 66 | h = max(softening[i], softening[j]) 67 | if r < h: 68 | kernel = PotentialKernel(r, h) 69 | potential[j] += m[i] * kernel 70 | potential[i] += m[j] * kernel 71 | elif r > 0: 72 | potential[i] -= m[j] / r 73 | potential[j] -= m[i] / r 74 | return G * potential 75 | 76 | 77 | @njit(fastmath=True, parallel=True) 78 | def Potential_bruteforce_parallel(x, m, softening, G=1.0): 79 | """Returns the exact mutually-interacting gravitational potential for a set of particles with positions x and masses m, evaluated by brute force. 80 | 81 | Arguments: 82 | x -- shape (N,3) array of particle positions 83 | m -- shape (N,) array of particle masses 84 | softening -- shape (N,) array containing kernel support radii for gravitational softening 85 | 86 | Optional arguments: 87 | G -- gravitational constant (default 1.0) 88 | 89 | Returns: 90 | shape (N,) array containing potential values 91 | """ 92 | potential = zeros_like(m) 93 | for i in prange(x.shape[0]): 94 | dx = zeros(3) 95 | for j in range(x.shape[0]): 96 | if i == j: 97 | continue # neglect self-potential 98 | for k in range(3): 99 | dx[k] = x[i, k] - x[j, k] 100 | r = sqrt(dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) 101 | h = max(softening[i], softening[j]) 102 | if r < h: 103 | kernel = PotentialKernel(r, h) 104 | potential[i] += m[j] * kernel 105 | elif r > 0: 106 | potential[i] -= m[j] / r 107 | return G * potential 108 | 109 | 110 | @njit(fastmath=True) 111 | def Accel_bruteforce(x, m, softening, G=1.0): 112 | """Returns the exact mutually-interacting gravitational accelerations of a set of particles. 113 | 114 | Arguments: 115 | x -- shape (N,3) array of positions where the potential is to be evaluated 116 | m -- shape (N,) array of particle masses 117 | softening -- shape (N,) array of softening lengths 118 | 119 | Optional arguments: 120 | G -- gravitational constant (default 1.0) 121 | 122 | Returns: 123 | shape (N,3) array of gravitational accelerations 124 | """ 125 | if softening is None: 126 | softening = np.zeros_like(m) 127 | accel = zeros_like(x) 128 | dx = zeros(3) 129 | for i in range(x.shape[0]): 130 | for j in range(i + 1, x.shape[0]): 131 | h = max( 132 | softening[i], softening[j] 133 | ) # if there is overlap, we symmetrize the softenings to maintain momentum conservation 134 | r2 = 0 135 | for k in range(3): 136 | dx[k] = x[i, k] - x[j, k] 137 | r2 += dx[k] * dx[k] 138 | if r2 == 0: 139 | continue 140 | r = sqrt(r2) 141 | 142 | if r < h: 143 | kernel = ForceKernel(r, h) 144 | for k in range(3): 145 | accel[j, k] += kernel * m[i] * dx[k] 146 | accel[i, k] -= kernel * m[j] * dx[k] 147 | else: 148 | fac = 1 / (r2 * r) 149 | for k in range(3): 150 | accel[j, k] += m[i] * fac * dx[k] 151 | accel[i, k] -= m[j] * fac * dx[k] 152 | return G * accel 153 | 154 | 155 | @njit(fastmath=True, parallel=True) 156 | def Accel_bruteforce_parallel(x, m, softening, G=1.0): 157 | """Returns the exact mutually-interacting gravitational accelerations of a set of particles. 158 | 159 | Arguments: 160 | x -- shape (N,3) array of positions where the potential is to be evaluated 161 | m -- shape (N,) array of particle masses 162 | softening -- shape (N,) array of softening lengths 163 | 164 | Optional arguments: 165 | G -- gravitational constant (default 1.0) 166 | 167 | Returns: 168 | shape (N,3) array of gravitational accelerations 169 | """ 170 | if softening is None: 171 | softening = np.zeros_like(m) 172 | accel = zeros_like(x) 173 | for i in prange(x.shape[0]): 174 | dx = zeros(3) 175 | for j in range(x.shape[0]): 176 | if i == j: 177 | continue 178 | h = max( 179 | softening[i], softening[j] 180 | ) # if there is overlap, we symmetrize the softenings to maintain momentum conservation 181 | r2 = 0 182 | for k in range(3): 183 | dx[k] = x[j, k] - x[i, k] 184 | r2 += dx[k] * dx[k] 185 | if r2 == 0: 186 | continue 187 | r = sqrt(r2) 188 | 189 | if r < h: 190 | kernel = ForceKernel(r, h) 191 | for k in range(3): 192 | accel[i, k] += kernel * m[j] * dx[k] 193 | else: 194 | fac = 1 / (r2 * r) 195 | for k in range(3): 196 | accel[i, k] += m[j] * fac * dx[k] 197 | return G * accel 198 | 199 | 200 | def AccelTarget_bruteforce(x_target, softening_target, x_source, m_source, softening_source, G=1.0): 201 | """Returns the gravitational acceleration at a set of target positions, due to a set of source particles. 202 | 203 | Arguments: 204 | x_target -- shape (N,3) array of positions where the field is to be evaluated 205 | softening_target -- shape (N,) array of minimum softening lengths to be used 206 | x_source -- shape (M,3) array of positions of gravitating particles 207 | m_source -- shape (M,) array of particle masses 208 | softening_source -- shape (M,) array of softening lengths 209 | 210 | Optional arguments: 211 | G -- gravitational constant (default 1.0) 212 | 213 | Returns: 214 | shape (N,3) array of gravitational accelerations 215 | """ 216 | accel = zeros_like(x_target) 217 | for i in prange(x_target.shape[0]): 218 | dx = zeros(3) 219 | for j in range(x_source.shape[0]): 220 | h = max( 221 | softening_target[i], softening_source[j] 222 | ) # if there is overlap, we symmetrize the softenings to maintain momentum conservation 223 | r2 = 0 224 | for k in range(3): 225 | dx[k] = x_source[j, k] - x_target[i, k] 226 | r2 += dx[k] * dx[k] 227 | if r2 == 0: 228 | continue # no force if at the origin 229 | r = sqrt(r2) 230 | 231 | if r < h: 232 | kernel = ForceKernel(r, h) 233 | for k in range(3): 234 | accel[i, k] += kernel * m_source[j] * dx[k] 235 | else: 236 | fac = 1 / (r2 * r) 237 | for k in range(3): 238 | accel[i, k] += m_source[j] * fac * dx[k] 239 | return G * accel 240 | 241 | 242 | AccelTarget_bruteforce_parallel = njit(AccelTarget_bruteforce, fastmath=True, parallel=True) 243 | AccelTarget_bruteforce = njit(AccelTarget_bruteforce, fastmath=True) 244 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI](https://img.shields.io/pypi/v/pytreegrav)](https://pypi.org/project/pytreegrav)[![Documentation Status](https://readthedocs.org/projects/pytreegrav/badge/?version=latest)](https://pytreegrav.readthedocs.io/en/latest/?badge=latest) 2 | 3 | # Introduction 4 | pytreegrav is a package for computing the gravitational potential and/or field of a set of particles. It includes methods for brute-force direction summation and for the fast, approximate Barnes-Hut treecode method. For the Barnes-Hut method we implement an oct-tree as a numba jitclass to achieve much higher peformance than the equivalent pure Python implementation, without writing a single line of C or Cython. Full documentation is available [here](http://pytreegrav.readthedocs.io). 5 | 6 | # Installation 7 | 8 | ```pip install pytreegrav``` or clone the repo and run ```python setup.py install``` from the repo directory. 9 | 10 | # Walkthrough 11 | First let's import the stuff we want and generate some particle positions and masses - these would be your particle data for whatever your problem is. 12 | 13 | 14 | ```python 15 | import numpy as np 16 | from pytreegrav import Accel, Potential 17 | ``` 18 | 19 | 20 | ```python 21 | N = 10**5 # number of particles 22 | x = np.random.rand(N,3) # positions randomly sampled in the unit cube 23 | m = np.repeat(1./N,N) # masses - let the system have unit mass 24 | h = np.repeat(0.01,N) # softening radii - these are optional, assumed 0 if not provided to the frontend functions 25 | ``` 26 | 27 | Now we can use the ``Accel`` and ``Potential`` functions to compute the gravitational field and potential at each particle position: 28 | 29 | 30 | ```python 31 | print(Accel(x,m,h)) 32 | print(Potential(x,m,h)) 33 | ``` 34 | 35 | [[-0.1521787 0.2958852 -0.30109005] 36 | [-0.50678204 -0.37489886 -1.0558666 ] 37 | [-0.24650087 0.95423467 -0.175074 ] 38 | ... 39 | [ 0.87868472 -1.28332176 -0.22718531] 40 | [-0.41962742 0.32372245 -1.31829084] 41 | [ 2.45127054 0.38292881 0.05820412]] 42 | [-2.35518057 -2.19299372 -2.28494218 ... -2.11783337 -2.1653377 43 | -1.80464695] 44 | 45 | 46 | By default, pytreegrav will try to make the optimal choice between brute-force and tree methods for speed, but we can also force it to use one method or another. Let's try both and compare their runtimes: 47 | 48 | 49 | ```python 50 | from time import time 51 | t = time() 52 | # tree gravitational acceleration 53 | accel_tree = Accel(x,m,h,method='tree') 54 | print("Tree accel runtime: %gs"%(time() - t)); t = time() 55 | 56 | accel_bruteforce = Accel(x,m,h,method='bruteforce') 57 | print("Brute force accel runtime: %gs"%(time() - t)); t = time() 58 | 59 | phi_tree = Potential(x,m,h,method='tree') 60 | print("Tree potential runtime: %gs"%(time() - t)); t = time() 61 | 62 | phi_bruteforce = Potential(x,m,h,method='bruteforce') 63 | print("Brute force potential runtime: %gs"%(time() - t)); t = time() 64 | ``` 65 | 66 | Tree accel runtime: 0.927745s 67 | Brute force accel runtime: 44.1175s 68 | Tree potential runtime: 0.802386s 69 | Brute force potential runtime: 20.0234s 70 | 71 | 72 | As you can see, the tree-based methods can be much faster than the brute-force methods, especially for particle counts exceeding 10^4. Here's an example of how much faster the treecode is when run on a Plummer sphere with a variable number of particles, on a single core of an Intel i9 9900k workstation: 73 | ![Benchmark](images/CPU_Time_serial.png) 74 | 75 | 76 | But there's no free lunch here: the tree methods are approximate. Let's quantify the RMS errors of the stuff we just computed, compared to the exact brute-force solutions: 77 | 78 | 79 | ```python 80 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error 81 | print("RMS force error: ", acc_error) 82 | phi_error = np.std(phi_tree - phi_bruteforce) 83 | print("RMS potential error: ", phi_error) 84 | ``` 85 | 86 | RMS force error: 0.006739311224338851 87 | RMS potential error: 0.0003888328578588027 88 | 89 | 90 | The above errors are typical for default settings: ~1% force error and ~0.1\% potential error. The error in the tree approximation is controlled by the Barnes-Hut opening angle ``theta``, set to 0.7 by default. Smaller ``theta`` gives higher accuracy, but also runs slower: 91 | 92 | 93 | ```python 94 | thetas = 0.1,0.2,0.4,0.8 # different thetas to try 95 | for theta in thetas: 96 | t = time() 97 | accel_tree = Accel(x,m,h,method='tree',theta=theta) 98 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) 99 | print("theta=%g Runtime: %gs RMS force error: %g"%(theta, time()-t, acc_error)) 100 | ``` 101 | 102 | theta=0.1 Runtime: 63.1738s RMS force error: 3.78978e-05 103 | theta=0.2 Runtime: 14.3356s RMS force error: 0.000258755 104 | theta=0.4 Runtime: 2.91292s RMS force error: 0.00148698 105 | theta=0.8 Runtime: 0.724668s RMS force error: 0.0105937 106 | 107 | 108 | Both brute-force and tree-based calculations can be parallelized across all available logical cores via OpenMP, by specifying ``parallel=True``. This can speed things up considerably, with parallel scaling that will vary with your core and particle number: 109 | 110 | 111 | ```python 112 | from time import time 113 | t = time() 114 | # tree gravitational acceleration 115 | accel_tree = Accel(x,m,h,method='tree',parallel=True) 116 | print("Tree accel runtime in parallel: %gs"%(time() - t)); t = time() 117 | 118 | accel_bruteforce = Accel(x,m,h,method='bruteforce',parallel=True) 119 | print("Brute force accel runtime in parallel: %gs"%(time() - t)); t = time() 120 | 121 | phi_tree = Potential(x,m,h,method='tree',parallel=True) 122 | print("Tree potential runtime in parallel: %gs"%(time() - t)); t = time() 123 | 124 | phi_bruteforce = Potential(x,m,h,method='bruteforce',parallel=True) 125 | print("Brute force potential runtime in parallel: %gs"%(time() - t)); t = time() 126 | ``` 127 | 128 | Tree accel runtime in parallel: 0.222271s 129 | Brute force accel runtime in parallel: 7.25576s 130 | Tree potential runtime in parallel: 0.181393s 131 | Brute force potential runtime in parallel: 5.72611s 132 | 133 | 134 | # What if I want to evaluate the fields at different points than where the particles are? 135 | 136 | We got you covered. The ``Target`` methods do exactly this: you specify separate sets of points for the particle positions and the field evaluation, and everything otherwise works exactly the same (including optional parallelization and choice of solver): 137 | 138 | 139 | ```python 140 | from pytreegrav import AccelTarget, PotentialTarget 141 | 142 | # generate a separate set of "target" positions where we want to know the potential and field 143 | N_target = 10**4 144 | x_target = np.random.rand(N_target,3) 145 | h_target = np.repeat(0.01,N_target) # optional "target" softening: this sets a floor on the softening length of all forces/potentials computed 146 | 147 | accel_tree = AccelTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles 148 | accel_bruteforce = AccelTarget(x_target,x,m,h_source=h,method='bruteforce') 149 | 150 | acc_error = np.sqrt(np.mean(np.sum((accel_tree-accel_bruteforce)**2,axis=1))) # RMS force error 151 | print("RMS force error: ", acc_error) 152 | 153 | phi_tree = PotentialTarget(x_target, x,m, h_target=h_target, h_source=h,method='tree') # we provide the points/masses/softenings we generated before as the "source" particles 154 | phi_bruteforce = PotentialTarget(x_target,x,m,h_target=h_target, h_source=h,method='bruteforce') 155 | 156 | phi_error = np.std(phi_tree - phi_bruteforce) 157 | print("RMS potential error: ", phi_error) 158 | ``` 159 | 160 | RMS force error: 0.006719983300560105 161 | RMS potential error: 0.0003873676304955059 162 | 163 | # Ray-tracing 164 | 165 | pytreegrav's octree implementation can be used for efficient tree-based searches for ray-tracing of unstructured data. Currently implemented is the method ``ColumnDensity``, which calculates the integral of the density field to infinity along a grid of rays originating at each particle (defaulting to 6 rays). For example: 166 | 167 | ```python 168 | columns = ColumnDensity(x, m, h, parallel=True) # shape (N,6) array of column densities in 6 angular bins - this is fastest but least accurate 169 | columns_10 = ColumnDensity(x, m, h, rays=10, parallel=True) # shape (N, 10) array column densities along 10 random rays 170 | columns_random = ColumnDensity(x, m, h, randomize_rays=True, parallel=True) # can randomize the ray grid for each particle so that there are no correlated errors due to the angular discretization 171 | columns_custom = ColumnDensity(x, m, h, rays=np.random.normal(size=(100,3)), parallel=True) # can also pass an arbitrary set of rays for the raygrid; these need not be normalized 172 | κ = 0.02 # example opacity, in code units 173 | σ = m * κ # total cross-section in each particle is product of mass and opacity 174 | 𝛕 = ColumnDensity(x, σ, h, parallel=True) # can pass cross-section instead of mass to get optical depth 175 | 𝛕_eff = -np.log(np.exp(-𝛕.clip(-300,300)).mean(axis=1)) # effective optical depth that would give the same radiation flux from a background; note clipping because overflow is not uncommon here 176 | Σ_eff = 𝛕_eff / κ # effective column density *for this opacity* in code mass/code length^2 177 | NH_eff = Σ_eff X_H / m_p # column density in H nuclei code length^-2 178 | ``` 179 | 180 | # Community 181 | 182 | This code is actively developed and maintained by Mike Grudic. 183 | 184 | If you would like help using pytreegrav, please ask a question on our [Discussions](https://github.com/mikegrudic/pytreegrav/discussions) page. 185 | 186 | If you have found a bug or an issue using pytreegrav, please open an [issue](https://github.com/mikegrudic/pytreegrav/issues). 187 | -------------------------------------------------------------------------------- /src/pytreegrav/octree.py: -------------------------------------------------------------------------------- 1 | """Implementation of the Octree jitclass""" 2 | 3 | import numpy as np 4 | from numpy import zeros, ones, concatenate 5 | from numba import float64, boolean, int64, njit 6 | from numba.experimental import jitclass 7 | 8 | spec = [ 9 | ("Sizes", float64[:]), # side length of tree nodes 10 | ("Deltas", float64[:]), # distance between COM and geometric center of node 11 | # location of center of mass of node (actually stores _geometric_ center before we do the moments pass) 12 | ("Coordinates", float64[:, :]), 13 | ("Masses", float64[:]), # total mass of node 14 | ("Quadrupoles", float64[:, :, :]), # Quadrupole moment of the node 15 | # Allow us to quickly check if Quadrupole moments exist to keep monopole calculations fast 16 | ("HasQuads", boolean), 17 | ("NumParticles", int64), # number of particles in the tree 18 | ("NumNodes", int64), # number of particles + nodes (i.e. mass elements) in the tree 19 | # individual softenings for particles, _maximum_ softening of inhabitant particles for nodes 20 | ("Softenings", float64[:]), 21 | ("NextBranch", int64[:]), 22 | ("FirstSubnode", int64[:]), 23 | ("TreewalkIndices", int64[:]), 24 | ] 25 | 26 | 27 | octant_offsets = 0.25 * np.array( 28 | [ 29 | [-1, -1, -1], 30 | [1, -1, -1], 31 | [-1, 1, -1], 32 | [1, 1, -1], 33 | [-1, -1, 1], 34 | [1, -1, 1], 35 | [-1, 1, 1], 36 | [1, 1, 1], 37 | ] 38 | ) 39 | 40 | 41 | @jitclass(spec) 42 | class Octree: 43 | """Octree implementation.""" 44 | 45 | def __init__( 46 | self, 47 | points, 48 | masses, 49 | softening, 50 | morton_order=True, 51 | quadrupole=False, 52 | compute_moments=True, 53 | ): 54 | self.NumNodes = 0 55 | self.TreewalkIndices = -ones(points.shape[0], dtype=np.int64) 56 | self.HasQuads = quadrupole 57 | # first provisional treebuild to get the ordering right 58 | children = self.BuildTree(points, masses, softening) 59 | # set up the order of the treewalk 60 | SetupTreewalk(self, self.NumParticles, children) 61 | self.GetWalkIndices() # get the Morton ordering of the points 62 | 63 | # if enabled, we rebuild the tree in Morton order (the order that points are visited in the depth-first traversal) 64 | if morton_order: 65 | children = self.BuildTree( 66 | points[self.TreewalkIndices], 67 | np.take(masses, self.TreewalkIndices), 68 | np.take(softening, self.TreewalkIndices), 69 | ) # now re-build the tree with everything in order 70 | # re-do the treewalk order with the new indices 71 | SetupTreewalk(self, self.NumParticles, children) 72 | 73 | if compute_moments: 74 | # compute centers of mass, etc. 75 | ComputeMoments(self, self.NumParticles, children) 76 | 77 | def BuildTree(self, points, masses, softening): 78 | # initialize random seed in case of non-unique positions 79 | np.random.seed(42) 80 | 81 | self.Initialize(len(points), self.NumNodes) 82 | 83 | # set the properties of the root node 84 | self.Sizes[self.NumParticles] = max( 85 | points[:, 0].max() - points[:, 0].min(), 86 | points[:, 1].max() - points[:, 1].min(), 87 | points[:, 2].max() - points[:, 2].min(), 88 | ) 89 | for dim in range(3): 90 | self.Coordinates[self.NumParticles, dim] = 0.5 * (points[:, dim].max() + points[:, dim].min()) 91 | 92 | # set values for particles 93 | self.Coordinates[: self.NumParticles] = points 94 | self.Masses[: self.NumParticles] = masses 95 | self.Softenings[: self.NumParticles] = softening 96 | children = -ones((self.NumNodes, 8), dtype=np.int64) 97 | new_node_idx = self.NumParticles + 1 98 | # now we insert particles into the tree one at a time, setting up child pointers and initializing node properties as we go 99 | for i in range(self.NumParticles): 100 | pos = points[i] 101 | 102 | no = self.NumParticles # walk the tree, starting at the root 103 | while no > -1: 104 | # first make sure we have enough storage 105 | while new_node_idx + 1 > self.NumNodes: 106 | size_increase = increase_tree_size(self) 107 | children = concatenate((children, -ones((size_increase, 8), dtype=np.int64))) 108 | 109 | octant = 0 # the index of the octant that the present point lives in 110 | for dim in range(3): 111 | if pos[dim] > self.Coordinates[no, dim]: 112 | octant += 1 << dim 113 | # check if there is a pre-existing node among the present node's children 114 | child_candidate = children[no, octant] 115 | if child_candidate > -1: 116 | # it exists, now check if it's a node or a particle 117 | if child_candidate < self.NumParticles: 118 | # it's a particle - we have to create a new node of index new_node_idx containing the 2 points we've got, and point the pre-existing particle to the new particle 119 | # EXCEPTION: if the pre-existing particle is at the same coordinate, we will perturb the position of the new particle slightly and start over 120 | same_coord = True 121 | for k in range(3): 122 | if self.Coordinates[i, k] != self.Coordinates[child_candidate, k]: 123 | same_coord = False 124 | if same_coord: 125 | self.Coordinates[i] *= np.exp(3e-16 * (np.random.rand(3) - 0.5)) # random perturbation 126 | points[i] = self.Coordinates[i] 127 | no = self.NumParticles # restart the tree traversal 128 | continue 129 | # end exception 130 | 131 | children[no, octant] = new_node_idx 132 | # set the center of the new node 133 | self.Coordinates[new_node_idx] = self.Coordinates[no] + self.Sizes[no] * octant_offsets[octant] 134 | # set the size of the new node 135 | self.Sizes[new_node_idx] = self.Sizes[no] / 2 136 | new_octant = 0 137 | for dim in range(3): 138 | if self.Coordinates[child_candidate, dim] > self.Coordinates[new_node_idx, dim]: 139 | # get the octant of the new node that pre-existing particle lives in 140 | new_octant += 1 << dim 141 | # set the pre-existing particle as a child of the new node 142 | children[new_node_idx, new_octant] = child_candidate 143 | no = new_node_idx 144 | new_node_idx += 1 145 | continue # restart the loop looking at the new node 146 | else: # if the child is an existing node, go to that one and start the loop anew 147 | no = children[no, octant] 148 | continue 149 | else: # if the child does not exist, we let this point be that child (inserting it in the tree) and we're done with this point 150 | children[no, octant] = i 151 | no = -1 152 | return children 153 | 154 | def GetWalkIndices(self): # gets the ordering of the particles in the treewalk 155 | index = 0 156 | no = self.NumParticles 157 | while no > -1: 158 | if no < self.NumParticles: 159 | self.TreewalkIndices[index] = no 160 | index += 1 161 | no = self.NextBranch[no] 162 | else: 163 | no = self.FirstSubnode[no] 164 | 165 | def Initialize(self, Npart, NumNodes): 166 | """Allocate all attribute arrays and initialize""" 167 | self.NumParticles = Npart 168 | # this is the number of elements in the tree, whether nodes or particles. can make this smaller but this has a safety factor 169 | if NumNodes: 170 | self.NumNodes = NumNodes 171 | else: 172 | # initial guess for storage needed; can always increase if needed 173 | self.NumNodes = int(1.5 * Npart + 1) 174 | self.Sizes = zeros(self.NumNodes) 175 | self.Deltas = zeros(self.NumNodes) 176 | self.Masses = zeros(self.NumNodes) 177 | # No need to initialize this beyond zero, all n>0 moments are 0 for a single particle 178 | if self.HasQuads: 179 | self.Quadrupoles = zeros((self.NumNodes, 3, 3)) 180 | self.Softenings = zeros(self.NumNodes) 181 | self.Coordinates = zeros((self.NumNodes, 3)) 182 | self.Deltas = zeros(self.NumNodes) 183 | self.NextBranch = -ones(self.NumNodes, dtype=np.int64) 184 | self.FirstSubnode = -ones(self.NumNodes, dtype=np.int64) 185 | 186 | 187 | @njit 188 | def ComputeMoments(tree, no, children): 189 | """Does a recursive pass through the tree and computes centers of mass, total mass, max softening, and distance between geometric center and COM""" 190 | quad = zeros((3, 3)) 191 | if no < tree.NumParticles: # if this is a particle, just return the properties 192 | return tree.Softenings[no], tree.Masses[no], quad, tree.Coordinates[no] 193 | else: 194 | m = 0 195 | com = zeros(3) 196 | hmax = 0 197 | for c in children[no]: 198 | if c > -1: 199 | hi, mi, quadi, comi = ComputeMoments(tree, c, children) 200 | m += mi 201 | com += mi * comi 202 | hmax = max(hi, hmax) 203 | tree.Masses[no] = m 204 | com = com / m 205 | if tree.HasQuads: 206 | for c in children[no]: 207 | if c > -1: 208 | comi = tree.Coordinates[c] 209 | quadi = tree.Quadrupoles[c] 210 | ri = comi - com 211 | r2 = 0 212 | for k in range(3): 213 | r2 += ri[k] * ri[k] 214 | for k in range(3): 215 | for l in range(3): 216 | quad[k, l] += quadi[k, l] + mi * 3 * ri[k] * ri[l] 217 | if k == l: 218 | quad[k, l] -= ( 219 | mi * r2 220 | ) # Calculate the quadrupole moment based on the moments of the subcells 221 | tree.Quadrupoles[no] = quad 222 | delta = 0 223 | for dim in range(3): 224 | dx = com[dim] - tree.Coordinates[no, dim] 225 | delta += dx * dx 226 | tree.Deltas[no] = np.sqrt(delta) 227 | tree.Coordinates[no] = com 228 | tree.Softenings[no] = hmax 229 | return hmax, m, quad, com 230 | 231 | 232 | @njit 233 | def SetupTreewalk(tree, no, children): 234 | if no < tree.NumParticles: 235 | return # leaf nodes are handled from above 236 | last_node = -1 237 | for c in children[no]: 238 | if c < 0: 239 | continue 240 | # if we haven't yet set current node's next node, do so 241 | if tree.FirstSubnode[no] < 0: 242 | tree.FirstSubnode[no] = c 243 | # set this up to point to the next "branch" of the tree to look at if we sum the force for the current branch 244 | if last_node > -1: 245 | tree.NextBranch[last_node] = c 246 | last_node = c 247 | 248 | # need to deal with the last child: must link it up to the sibling of the present node 249 | tree.NextBranch[last_node] = tree.NextBranch[no] 250 | 251 | for c in children[no]: 252 | if c >= tree.NumParticles: # if we have a node, call routine recursively 253 | SetupTreewalk(tree, c, children) 254 | 255 | 256 | @njit 257 | def increase_tree_size(tree, fac=1.2): 258 | """Reallocate the tree data with storage increased by factor fac""" 259 | old_size = tree.NumNodes 260 | size_increase = max(int(old_size * fac + 1) - old_size, 1) 261 | # print("Increasing size of node list by ", size_increase) # by %g" % fac) 262 | 263 | tree.Sizes = concatenate((tree.Sizes, zeros(size_increase))) 264 | tree.Deltas = concatenate((tree.Deltas, zeros(size_increase))) 265 | tree.Masses = concatenate((tree.Masses, zeros(size_increase))) 266 | tree.Softenings = concatenate((tree.Softenings, zeros(size_increase))) 267 | tree.NextBranch = concatenate((tree.NextBranch, -ones(size_increase, dtype=np.int64))) 268 | tree.FirstSubnode = concatenate((tree.FirstSubnode, -ones(size_increase, dtype=np.int64))) 269 | tree.Coordinates = concatenate((tree.Coordinates, zeros((size_increase, 3)))) 270 | if tree.HasQuads: 271 | tree.Quadrupoles = concatenate((tree.Quadrupoles, zeros((size_increase, 3, 3)))) 272 | tree.NumNodes += size_increase 273 | 274 | return size_increase 275 | -------------------------------------------------------------------------------- /src/pytreegrav/dynamic_tree.py: -------------------------------------------------------------------------------- 1 | from numba import ( 2 | int32, 3 | deferred_type, 4 | optional, 5 | float64, 6 | boolean, 7 | int64, 8 | njit, 9 | jit, 10 | prange, 11 | types, 12 | ) 13 | from numba.experimental import jitclass 14 | import numpy as np 15 | from numpy import empty, empty_like, zeros, zeros_like, sqrt, ones 16 | 17 | spec = [ 18 | ("Sizes", float64[:]), # side length of tree nodes 19 | ("Deltas", float64[:]), # distance between COM and geometric center of node 20 | ( 21 | "Coordinates", 22 | float64[:, :], 23 | ), # location of center of mass of node (actually stores _geometric_ center before we do the moments pass) 24 | ("Velocities", float64[:, :]), # velocity of the center of mass of node 25 | ("VelocityDisp", float64[:]), # center-of-mass velocity dispersion 26 | ("Masses", float64[:]), # total mass of node 27 | ("Quadrupoles", float64[:, :, :]), # Quadrupole moment of the node 28 | ( 29 | "HasQuads", 30 | boolean, 31 | ), # Allow us to quickly check if Quadrupole moments exist to keep monopole calculations fast 32 | ("NumParticles", int64), # number of particles in the tree 33 | ("NumNodes", int64), # number of particles + nodes (i.e. mass elements) in the tree 34 | ( 35 | "Softenings", 36 | float64[:], 37 | ), # individual softenings for particles, _maximum_ softening of inhabitant particles for nodes 38 | ("NextBranch", int64[:]), 39 | ("FirstSubnode", int64[:]), 40 | ("TreewalkIndices", int64[:]), 41 | # ('children',int64[:,:]) # indices of child nodes 42 | ] 43 | 44 | 45 | octant_offsets = 0.25 * np.array( 46 | [ 47 | [-1, -1, -1], 48 | [1, -1, -1], 49 | [-1, 1, -1], 50 | [1, 1, -1], 51 | [-1, -1, 1], 52 | [1, -1, 1], 53 | [-1, 1, 1], 54 | [1, 1, 1], 55 | ] 56 | ) 57 | 58 | 59 | @jitclass(spec) 60 | class DynamicOctree(object): 61 | """Octree implementation that stores node velocities for correlation functions and dynamic updates.""" 62 | 63 | def __init__(self, points, masses, softening, vels, morton_order=True, quadrupole=False): 64 | self.TreewalkIndices = -ones(points.shape[0], dtype=np.int64) 65 | self.HasQuads = quadrupole 66 | children = self.BuildTree( 67 | points, masses, softening, vels 68 | ) # first provisional treebuild to get the ordering right 69 | SetupTreewalk(self, self.NumParticles, children) # set up the order of the treewalk 70 | ComputeMomentsDynamic(self, self.NumParticles, children) # compute centers of mass, etc. 71 | self.GetWalkIndices() # get the Morton ordering of the points 72 | 73 | if ( 74 | morton_order 75 | ): # if enabled, we rebuild the tree in Morton order (the order that points are visited in the depth-first traversal) 76 | children = self.BuildTree( 77 | points[self.TreewalkIndices], 78 | np.take(masses, self.TreewalkIndices), 79 | np.take(softening, self.TreewalkIndices), 80 | vels[self.TreewalkIndices], 81 | ) # now re-build the tree with everything in order 82 | SetupTreewalk(self, self.NumParticles, children) # re-do the treewalk order with the new indices 83 | 84 | ComputeMomentsDynamic(self, self.NumParticles, children) # compute centers of mass, etc. 85 | 86 | def BuildTree(self, points, masses, softening, vels): 87 | # initialize all attributes 88 | self.NumParticles = points.shape[0] 89 | self.NumNodes = ( 90 | 2 * self.NumParticles 91 | ) # this is the number of elements in the tree, whether nodes or particles. can make this smaller but this has a safety factor 92 | self.Sizes = zeros(self.NumNodes) 93 | self.Deltas = zeros(self.NumNodes) 94 | self.Masses = zeros(self.NumNodes) 95 | if self.HasQuads: 96 | self.Quadrupoles = zeros( 97 | (self.NumNodes, 3, 3) 98 | ) # No need to initialize this beyond zero, all n>0 moments are 0 for a single particle 99 | self.Softenings = zeros(self.NumNodes) 100 | self.Coordinates = zeros((self.NumNodes, 3)) 101 | self.Velocities = zeros((self.NumNodes, 3)) 102 | self.VelocityDisp = zeros(self.NumNodes) 103 | self.Deltas = zeros(self.NumNodes) 104 | self.NextBranch = -ones(self.NumNodes, dtype=np.int64) 105 | self.FirstSubnode = -ones(self.NumNodes, dtype=np.int64) 106 | # self.ParentNode = -ones(self.NumNodes, dtype=np.int64) 107 | 108 | # set the properties of the root node 109 | self.Sizes[self.NumParticles] = max( 110 | points[:, 0].max() - points[:, 0].min(), 111 | points[:, 1].max() - points[:, 1].min(), 112 | points[:, 2].max() - points[:, 2].min(), 113 | ) 114 | for dim in range(3): 115 | self.Coordinates[self.NumParticles, dim] = 0.5 * (points[:, dim].max() + points[:, dim].min()) 116 | 117 | # set values for particles 118 | self.Coordinates[: self.NumParticles] = points 119 | self.Velocities[: self.NumParticles] = vels 120 | self.Masses[: self.NumParticles] = masses 121 | self.Softenings[: self.NumParticles] = softening 122 | children = -ones((self.NumNodes, 8), dtype=np.int64) 123 | new_node_idx = self.NumParticles + 1 124 | 125 | # now we insert particles into the tree one at a time, setting up child pointers and initializing node properties as we go 126 | for i in range(self.NumParticles): 127 | pos = points[i] 128 | 129 | no = self.NumParticles # walk the tree, starting at the root 130 | while no > -1: 131 | octant = 0 # the index of the octant that the present point lives in 132 | for dim in range(3): 133 | if pos[dim] > self.Coordinates[no, dim]: 134 | octant += 1 << dim 135 | 136 | # check if there is a pre-existing node among the present node's children 137 | child_candidate = children[no, octant] 138 | if child_candidate > -1: # it exists, now check if it's a node or a particle 139 | if ( 140 | child_candidate < self.NumParticles 141 | ): # it's a particle - we have to create a new node of index new_node_idx containing the 2 points we've got, and point the pre-existing particle to the new particle 142 | # EXCEPTION: if the pre-existing particle is at the same coordinate, we will perturb the position of the new particle slightly and start over 143 | same_coord = True 144 | for k in range(3): 145 | if self.Coordinates[i, k] != self.Coordinates[child_candidate, k]: 146 | same_coord = False 147 | if same_coord: 148 | self.Coordinates[i] *= np.exp(3e-16 * (np.random.rand(3) - 0.5)) # random perturbation 149 | points[i] = self.Coordinates[i] 150 | no = self.NumParticles # restart the tree traversal 151 | continue 152 | # end exception 153 | 154 | children[no, octant] = new_node_idx 155 | self.Coordinates[new_node_idx] = ( 156 | self.Coordinates[no] + self.Sizes[no] * octant_offsets[octant] 157 | ) # set the center of the new node 158 | self.Sizes[new_node_idx] = self.Sizes[no] / 2 # set the size of the new node 159 | new_octant = 0 160 | for dim in range(3): 161 | if self.Coordinates[child_candidate, dim] > self.Coordinates[new_node_idx, dim]: 162 | new_octant += ( 163 | 1 << dim 164 | ) # get the octant of the new node that pre-existing particle lives in 165 | children[new_node_idx, new_octant] = ( 166 | child_candidate # set the pre-existing particle as a child of the new node 167 | ) 168 | no = new_node_idx 169 | new_node_idx += 1 170 | continue # restart the loop looking at the new node 171 | else: # if the child is an existing node, go to that one and start the loop anew 172 | no = children[no, octant] 173 | continue 174 | else: # if the child does not exist, we let this point be that child (inserting it in the tree) and we're done with this point 175 | children[no, octant] = i 176 | no = -1 177 | return children 178 | 179 | def ReorderTree(self): 180 | no = self.NumParticles 181 | 182 | def GetWalkIndices(self): # gets the ordering of the particles in the treewalk 183 | index = 0 184 | node_index = 0 185 | no = self.NumParticles 186 | while no > -1: 187 | if no < self.NumParticles: 188 | self.TreewalkIndices[index] = no 189 | index += 1 190 | no = self.NextBranch[no] 191 | else: 192 | no = self.FirstSubnode[no] 193 | 194 | 195 | @njit 196 | def ComputeMomentsDynamic( 197 | tree, no, children 198 | ): # does a recursive pass through the tree and computes centers of mass, total mass, max softening, and distance between geometric center and COM 199 | quad = zeros((3, 3)) 200 | if no < tree.NumParticles: # if this is a particle, just return the properties 201 | return ( 202 | tree.Softenings[no], 203 | tree.Masses[no], 204 | quad, 205 | tree.Coordinates[no], 206 | tree.Velocities[no], 207 | 0, 208 | ) 209 | else: 210 | m = 0 211 | com = zeros(3) 212 | vel = zeros(3) 213 | vdisp = 0 214 | hmax = 0 215 | for c in children[no]: 216 | if c > -1: 217 | hi, mi, quadi, comi, veli, vdispi = ComputeMomentsDynamic(tree, c, children) 218 | m += mi 219 | com += mi * comi 220 | vel += mi * veli 221 | vdisp += mi * vdispi 222 | hmax = max(hi, hmax) 223 | tree.Masses[no] = m 224 | com = com / m 225 | vel = vel / m 226 | # vdisp = vdisp/m 227 | for c in children[no]: 228 | if c > -1: 229 | dv = tree.Velocities[c] - vel 230 | vdisp += tree.Masses[c] * (dv[0] * dv[0] + dv[1] * dv[1] + dv[2] * dv[2]) 231 | vdisp = vdisp / m 232 | if tree.HasQuads: 233 | for c in children[no]: 234 | if c > -1: 235 | comi = tree.Coordinates[c] 236 | quadi = tree.Quadrupoles[c] 237 | ri = comi - com 238 | r2 = 0 239 | for k in range(3): 240 | r2 += ri[k] * ri[k] 241 | for k in range(3): 242 | for l in range(3): 243 | quad[k, l] += quadi[k, l] + mi * 3 * ri[k] * ri[l] 244 | if k == l: 245 | quad[k, l] -= ( 246 | mi * r2 247 | ) # Calculate the quadrupole moment based on the moments of the subcells 248 | tree.Quadrupoles[no] = quad 249 | delta = 0 250 | for dim in range(3): 251 | dx = com[dim] - tree.Coordinates[no, dim] 252 | delta += dx * dx 253 | tree.Deltas[no] = np.sqrt(delta) 254 | tree.Coordinates[no] = com 255 | tree.Softenings[no] = hmax 256 | tree.Velocities[no] = vel 257 | tree.VelocityDisp[no] = vdisp 258 | return hmax, m, quad, com, vel, vdisp 259 | 260 | 261 | @njit 262 | def SetupTreewalk(tree, no, children): 263 | # print(no) 264 | if no < tree.NumParticles: 265 | return # leaf nodes are handled from above 266 | last_node = -1 267 | last_child = -1 268 | for c in children[no]: 269 | if c < 0: 270 | continue 271 | # tree.ParentNode[c] = no 272 | if tree.FirstSubnode[no] < 0: 273 | tree.FirstSubnode[no] = c # if we haven't yet set current node's next node, do so 274 | 275 | if last_node > -1: 276 | tree.NextBranch[last_node] = ( 277 | c # set this up to point to the next "branch" of the tree to look at if we sum the force for the current branch 278 | ) 279 | last_node = c 280 | 281 | # need to deal with the last child: must link it up to the sibling of the present node 282 | tree.NextBranch[last_node] = tree.NextBranch[no] 283 | 284 | for c in children[no]: 285 | if c >= tree.NumParticles: # if we have a node, call routine recursively 286 | SetupTreewalk(tree, c, children) 287 | -------------------------------------------------------------------------------- /paper.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{numba, 2 | author = {Lam, Siu Kwan and Pitrou, Antoine and Seibert, Stanley}, 3 | title = {Numba: A LLVM-Based Python JIT Compiler}, 4 | year = {2015}, 5 | isbn = {9781450340052}, 6 | publisher = {Association for Computing Machinery}, 7 | address = {New York, NY, USA}, 8 | url = {https://doi.org/10.1145/2833157.2833162}, 9 | doi = {10.1145/2833157.2833162}, 10 | abstract = {Dynamic, interpreted languages, like Python, are attractive for domain-experts and scientists experimenting with new ideas. However, the performance of the interpreter is often a barrier when scaling to larger data sets. This paper presents a just-in-time compiler for Python that focuses in scientific and array-oriented computing. Starting with the simple syntax of Python, Numba compiles a subset of the language into efficient machine code that is comparable in performance to a traditional compiled language. In addition, we share our experience in building a JIT compiler using LLVM[1].}, 11 | booktitle = {Proceedings of the Second Workshop on the LLVM Compiler Infrastructure in HPC}, 12 | articleno = {7}, 13 | numpages = {6}, 14 | keywords = {compiler, Python, LLVM}, 15 | location = {Austin, Texas}, 16 | series = {LLVM '15} 17 | } 18 | 19 | 20 | 21 | @ARTICLE{dubinski, 22 | author = {{Dubinski}, John}, 23 | title = "{A parallel tree code}", 24 | journal = {New Astronomy}, 25 | keywords = {Astrophysics}, 26 | year = 1996, 27 | month = oct, 28 | volume = {1}, 29 | number = {2}, 30 | pages = {133-147}, 31 | doi = {10.1016/S1384-1076(96)00009-7}, 32 | archivePrefix = {arXiv}, 33 | eprint = {astro-ph/9603097}, 34 | primaryClass = {astro-ph}, 35 | adsurl = {https://ui.adsabs.harvard.edu/abs/1996NewA....1..133D}, 36 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 37 | } 38 | 39 | 40 | @ARTICLE{fire_pressurebalance, 41 | author = {{Gurvich}, Alexander B. and {Faucher-Gigu{\`e}re}, Claude-Andr{\'e} and {Richings}, Alexander J. and {Hopkins}, Philip F. and {Grudi{\'c}}, Michael Y. and {Hafen}, Zachary and {Wellons}, Sarah and {Stern}, Jonathan and {Quataert}, Eliot and {Chan}, T.~K. and {Orr}, Matthew E. and {Kere{\v{s}}}, Du{\v{s}}an and {Wetzel}, Andrew and {Hayward}, Christopher C. and {Loebman}, Sarah R. and {Murray}, Norman}, 42 | title = "{Pressure balance in the multiphase ISM of cosmologically simulated disc galaxies}", 43 | journal = {Monthly Notices of the Royal Astronomical Society}, 44 | keywords = {galaxies: evolution, galaxies: formation, galaxies: ISM, galaxies: star formation, cosmology: theory, Astrophysics - Astrophysics of Galaxies}, 45 | year = 2020, 46 | month = nov, 47 | volume = {498}, 48 | number = {3}, 49 | pages = {3664-3683}, 50 | doi = {10.1093/mnras/staa2578}, 51 | archivePrefix = {arXiv}, 52 | eprint = {2005.12916}, 53 | primaryClass = {astro-ph.GA}, 54 | adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.498.3664G}, 55 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 56 | } 57 | 58 | @ARTICLE{salmonwarren, 59 | author = {{Salmon}, John K. and {Warren}, Michael S.}, 60 | title = "{Skeletons from the Treecode Closet}", 61 | journal = {Journal of Computational Physics}, 62 | keywords = {Celestial Mechanics, Error Analysis, Many Body Problem, Multipolar Fields, Trees (Mathematics), Field Theory (Physics), Gravitational Fields, Root-Mean-Square Errors, Statistical Mechanics, Thermodynamics and Statistical Physics}, 63 | year = 1994, 64 | month = mar, 65 | volume = {111}, 66 | number = {1}, 67 | pages = {136-155}, 68 | doi = {10.1006/jcph.1994.1050}, 69 | adsurl = {https://ui.adsabs.harvard.edu/abs/1994JCoPh.111..136S}, 70 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 71 | } 72 | 73 | 74 | 75 | @ARTICLE{plummer, 76 | author = {{Plummer}, H.~C.}, 77 | title = "{On the problem of distribution in globular star clusters}", 78 | journal = {Monthly Notices of the Royal Astronomical Society}, 79 | year = 1911, 80 | month = mar, 81 | volume = {71}, 82 | pages = {460-470}, 83 | doi = {10.1093/mnras/71.5.460}, 84 | adsurl = {https://ui.adsabs.harvard.edu/abs/1911MNRAS..71..460P}, 85 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 86 | } 87 | 88 | 89 | @ARTICLE{rockstar, 90 | author = {{Behroozi}, Peter S. and {Wechsler}, Risa H. and {Wu}, Hao-Yi}, 91 | title = "{The ROCKSTAR Phase-space Temporal Halo Finder and the Velocity Offsets of Cluster Cores}", 92 | journal = {The Astrophysical Journal}, 93 | keywords = {dark matter, methods: numerical, Astrophysics - Cosmology and Extragalactic Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics}, 94 | year = 2013, 95 | month = jan, 96 | volume = {762}, 97 | number = {2}, 98 | eid = {109}, 99 | pages = {109}, 100 | doi = {10.1088/0004-637X/762/2/109}, 101 | archivePrefix = {arXiv}, 102 | eprint = {1110.4372}, 103 | primaryClass = {astro-ph.CO}, 104 | adsurl = {https://ui.adsabs.harvard.edu/abs/2013ApJ...762..109B}, 105 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 106 | } 107 | 108 | 109 | 110 | @ARTICLE{ grudic2018, 111 | author = {{Grudi{\'c}}, Michael Y. and {Guszejnov}, D{\'a}vid and 112 | {Hopkins}, Philip F. and {Lamberts}, Astrid and 113 | {Boylan-Kolchin}, Michael and {Murray}, Norman and {Schmitz}, Denise}, 114 | title = "{From the top down and back up again: star cluster structure from hierarchical star formation}", 115 | journal = {Monthly Notices of the Royal Astronomical Society}, 116 | keywords = {stars: formation, galaxies: star clusters: general, galaxies: star formation, Astrophysics - Astrophysics of Galaxies}, 117 | year = 2018, 118 | month = nov, 119 | volume = {481}, 120 | number = {1}, 121 | pages = {688-702}, 122 | doi = {10.1093/mnras/sty2303}, 123 | archivePrefix = {arXiv}, 124 | eprint = {1708.09065}, 125 | primaryClass = {astro-ph.GA}, 126 | adsurl = {https://ui.adsabs.harvard.edu/abs/2018MNRAS.481..688G}, 127 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 128 | } 129 | 130 | 131 | @ARTICLE{guszejnov2020, 132 | author = {{Guszejnov}, D{\'a}vid and {Grudi{\'c}}, Michael Y. and 133 | {Offner}, Stella S.~R. and {Boylan-Kolchin}, Michael and 134 | {Faucher-Gig{\`e}re}, Claude-Andr{\'e} and {Wetzel}, Andrew and 135 | {Benincasa}, Samantha M. and {Loebman}, Sarah}, 136 | title = "{Evolution of giant molecular clouds across cosmic time}", 137 | journal = {Monthly Notices of the Royal Astronomical Society}, 138 | keywords = {turbulence, stars: formation, ISM: clouds, galaxies: ISM, galaxies: star formation, cosmology: theory, Astrophysics - Astrophysics of Galaxies}, 139 | year = "2020", 140 | month = "Feb", 141 | volume = {492}, 142 | number = {1}, 143 | pages = {488-502}, 144 | doi = {10.1093/mnras/stz3527}, 145 | archivePrefix = {arXiv}, 146 | eprint = {1910.01163}, 147 | primaryClass = {astro-ph.GA}, 148 | adsurl = {https://ui.adsabs.harvard.edu/abs/2020MNRAS.492..488G}, 149 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 150 | } 151 | 152 | 153 | @ARTICLE{makedisk, 154 | author = {{Springel}, Volker and {White}, Simon D.~M.}, 155 | title = "{Tidal tails in cold dark matter cosmologies}", 156 | journal = {Monthly Notices of the Royal Astronomical Society}, 157 | keywords = {Astrophysics}, 158 | year = 1999, 159 | month = jul, 160 | volume = {307}, 161 | number = {1}, 162 | pages = {162-178}, 163 | doi = {10.1046/j.1365-8711.1999.02613.x}, 164 | archivePrefix = {arXiv}, 165 | eprint = {astro-ph/9807320}, 166 | primaryClass = {astro-ph}, 167 | adsurl = {https://ui.adsabs.harvard.edu/abs/1999MNRAS.307..162S}, 168 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 169 | } 170 | 171 | 172 | 173 | @Article{ gizmo, 174 | author = {{Hopkins}, P.~F.}, 175 | title = "{A new class of accurate, mesh-free hydrodynamic 176 | simulation methods}", 177 | journal = {Monthly Notices of the Royal Astronomical Society}, 178 | archiveprefix = "arXiv", 179 | eprint = {1409.7395}, 180 | keywords = {hydrodynamics, instabilities, turbulence, methods: 181 | numerical, cosmology: theory}, 182 | year = 2015, 183 | month = jun, 184 | volume = 450, 185 | pages = {53-110}, 186 | doi = {10.1093/mnras/stv195}, 187 | adsurl = {http://adsabs.harvard.edu/abs/2015MNRAS.450...53H}, 188 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 189 | } 190 | 191 | 192 | @ARTICLE{gadget2, 193 | author = {{Springel}, V.}, 194 | title = "{The cosmological simulation code GADGET-2}", 195 | journal = {Monthly Notices of the Royal Astronomical Society}, 196 | eprint = {astro-ph/0505010}, 197 | keywords = {methods: numerical, galaxies: interactions, dark matter}, 198 | year = 2005, 199 | month = dec, 200 | volume = 364, 201 | pages = {1105-1134}, 202 | doi = {10.1111/j.1365-2966.2005.09655.x}, 203 | adsurl = {http://adsabs.harvard.edu/abs/2005MNRAS.364.1105S}, 204 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 205 | } 206 | 207 | @ARTICLE{galic, 208 | author = {{Yurin}, Denis and {Springel}, Volker}, 209 | title = "{An iterative method for the construction of N-body galaxy models in collisionless equilibrium}", 210 | journal = {Monthly Notices of the Royal Astronomical Society}, 211 | keywords = {methods: numerical, stars: kinematics and dynamics, galaxies: haloes, galaxies: kinematics and dynamics, galaxies: structure, Astrophysics - Cosmology and Nongalactic Astrophysics}, 212 | year = 2014, 213 | month = oct, 214 | volume = {444}, 215 | number = {1}, 216 | pages = {62-79}, 217 | doi = {10.1093/mnras/stu1421}, 218 | archivePrefix = {arXiv}, 219 | eprint = {1402.1623}, 220 | primaryClass = {astro-ph.CO}, 221 | adsurl = {https://ui.adsabs.harvard.edu/abs/2014MNRAS.444...62Y}, 222 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 223 | } 224 | 225 | @ARTICLE{fire2, 226 | author = {{Hopkins}, Philip F. and {Wetzel}, Andrew and 227 | {Kere{\v{s}}}, Du{\v{s}}an and {Faucher-Gigu{\`e}re}, Claude-Andr{\'e} and 228 | {Quataert}, Eliot and {Boylan-Kolchin}, Michael and {Murray}, Norman and 229 | {Hayward}, Christopher C. and {Garrison-Kimmel}, Shea and 230 | {Hummels}, Cameron and {Feldmann}, Robert and {Torrey}, Paul and 231 | {Ma}, Xiangcheng and {Angl{\'e}s-Alc{\'a}zar}, Daniel and 232 | {Su}, Kung-Yi and {Orr}, Matthew and {Schmitz}, Denise and 233 | {Escala}, Ivanna and {Sanderson}, Robyn and {Grudi{\'c}}, Michael Y. and 234 | {Hafen}, Zachary and {Kim}, Ji-Hoon and {Fitts}, Alex and 235 | {Bullock}, James S. and {Wheeler}, Coral and {Chan}, T.~K. and 236 | {Elbert}, Oliver D. and {Narayanan}, Desika}, 237 | title = "{FIRE-2 simulations: physics versus numerics in galaxy formation}", 238 | journal = {Monthly Notices of the Royal Astronomical Society}, 239 | keywords = {methods: numerical, stars: formation, galaxies: active, galaxies: evolution, galaxies: formation, cosmology: theory, Astrophysics - Astrophysics of Galaxies, Astrophysics - Cosmology and Nongalactic Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics}, 240 | year = "2018", 241 | month = "Oct", 242 | volume = {480}, 243 | pages = {800-863}, 244 | doi = {10.1093/mnras/sty1690}, 245 | archivePrefix = {arXiv}, 246 | eprint = {1702.06148}, 247 | primaryClass = {astro-ph.GA}, 248 | adsurl = {https://ui.adsabs.harvard.edu/\#abs/2018MNRAS.480..800H}, 249 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 250 | } 251 | 252 | @ARTICLE{dehnen, 253 | author = {{Dehnen}, W. and {Read}, J.~I.}, 254 | title = "{N-body simulations of gravitational dynamics}", 255 | journal = {European Physical Journal Plus}, 256 | keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Physics - Computational Physics}, 257 | year = 2011, 258 | month = may, 259 | volume = {126}, 260 | eid = {55}, 261 | pages = {55}, 262 | doi = {10.1140/epjp/i2011-11055-3}, 263 | archivePrefix = {arXiv}, 264 | eprint = {1105.1082}, 265 | primaryClass = {astro-ph.IM}, 266 | adsurl = {https://ui.adsabs.harvard.edu/abs/2011EPJP..126...55D}, 267 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 268 | } 269 | 270 | @BOOK{aarseth_nbody, 271 | author = {{Aarseth}, Sverre J.}, 272 | title = "{Gravitational N-Body Simulations}", 273 | year = 2003, 274 | adsurl = {https://ui.adsabs.harvard.edu/abs/2003gnbs.book.....A}, 275 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 276 | } 277 | @ARTICLE{barneshut, 278 | author = {{Barnes}, Josh and {Hut}, Piet}, 279 | title = "{A hierarchical O(N log N) force-calculation algorithm}", 280 | journal = {Nature}, 281 | keywords = {Computational Astrophysics, Many Body Problem, Numerical Integration, Stellar Motions, Algorithms, Hierarchies, Physics (General)}, 282 | year = 1986, 283 | month = dec, 284 | volume = {324}, 285 | number = {6096}, 286 | pages = {446-449}, 287 | doi = {10.1038/324446a0}, 288 | adsurl = {https://ui.adsabs.harvard.edu/abs/1986Natur.324..446B}, 289 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 290 | } -------------------------------------------------------------------------------- /src/pytreegrav/frontend.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import warnings 3 | from numpy import zeros_like, zeros 4 | from .kernel import * 5 | from .octree import * 6 | from .dynamic_tree import * 7 | from .treewalk import * 8 | from .bruteforce import * 9 | from .misc import * 10 | 11 | 12 | def valueTestMethod(method): 13 | methods = ["adaptive", "bruteforce", "tree"] 14 | 15 | ## check if method is a str 16 | if type(method) != str: 17 | raise TypeError("Invalid method type %s, must be str" % type(method)) 18 | 19 | ## check if method is a valid method 20 | if method not in methods: 21 | raise ValueError("Invalid method %s. Must be one of: %s" % (method, str(methods))) 22 | 23 | 24 | def warn_if_nonunique_positions(pos, softening=None): 25 | """Checks whether a potential/field calculation will return undefined values 26 | and warns the user if so. 27 | """ 28 | 29 | unique_positions = True 30 | for i in range(pos.shape[1]): 31 | if np.unique(pos[:, i]).size < pos.shape[0]: 32 | unique_positions = False 33 | break 34 | 35 | if unique_positions: 36 | return 37 | 38 | if softening is not None: 39 | if np.any(softening > 0): 40 | warnings.warn( 41 | "Warning: Particle positions are non-unique. Softening will \ 42 | determine the answer for overlapping particles." 43 | ) 44 | return 45 | 46 | warnings.warn( 47 | "Warning: Particle positions are non-unique. The answer will be singular \ 48 | or garbage for overlapping particles." 49 | ) 50 | return 51 | 52 | 53 | def ConstructTree( 54 | pos, 55 | m=None, 56 | softening=None, 57 | quadrupole=False, 58 | vel=None, 59 | compute_moments=True, 60 | morton_order=True, 61 | ): 62 | """Builds a tree containing particle data, for subsequent potential/field evaluation 63 | 64 | Parameters 65 | ---------- 66 | pos: array_like 67 | shape (N,3) array of particle positions 68 | m: array_like or None, optional 69 | shape (N,) array of particle masses - if None then zeros will be used (e.g. if all you need the tree for is spatial algorithms) 70 | softening: array_like or None, optional 71 | shape (N,) array of particle softening lengths - these give the radius of compact support of the M4 cubic spline mass distribution of each particle 72 | quadrupole: bool, optional 73 | Whether to store quadrupole moments (default False) 74 | vel: bool, optional 75 | Whether to store node velocities in the tree (default False) 76 | 77 | Returns 78 | ------- 79 | tree: octree 80 | Octree instance built from particle data 81 | """ 82 | 83 | warn_if_nonunique_positions(pos, softening) 84 | 85 | if m is None: 86 | m = zeros(len(pos)) 87 | compute_moments = False 88 | if softening is None: 89 | softening = zeros_like(m) 90 | if not (np.all(np.isfinite(pos)) and np.all(np.isfinite(m)) and np.all(np.isfinite(softening))): 91 | print("Invalid input detected - aborting treebuild to avoid going into an infinite loop!") 92 | raise 93 | 94 | if vel is None: 95 | return Octree( 96 | pos, 97 | m, 98 | softening, 99 | quadrupole=quadrupole, 100 | compute_moments=compute_moments, 101 | morton_order=morton_order, 102 | ) 103 | else: 104 | return DynamicOctree(pos, m, softening, vel, quadrupole=quadrupole) 105 | 106 | 107 | def Potential( 108 | pos, 109 | m, 110 | softening=None, 111 | G=1.0, 112 | theta=0.7, 113 | tree=None, 114 | return_tree=False, 115 | parallel=False, 116 | method="adaptive", 117 | quadrupole=False, 118 | ): 119 | """Gravitational potential calculation 120 | 121 | Returns the gravitational potential for a set of particles with positions x and masses m, at the positions of those particles, using either brute force or tree-based methods depending on the number of particles. 122 | 123 | Parameters 124 | ---------- 125 | pos: array_like 126 | shape (N,3) array of particle positions 127 | m: array_like 128 | shape (N,) array of particle masses 129 | G: float, optional 130 | gravitational constant (default 1.0) 131 | softening: None or array_like, optional 132 | shape (N,) array containing kernel support radii for gravitational softening - - these give the radius of compact support of the M4 cubic spline mass distribution - set to 0 by default 133 | theta: float, optional 134 | cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy) 135 | parallel: bool, optional 136 | If True, will parallelize the force summation over all available cores. (default False) 137 | tree: Octree, optional 138 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 139 | return_tree: bool, optional 140 | return the tree used for future use (default False) 141 | method: str, optional 142 | Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice) 143 | quadrupole: bool, optional 144 | Whether to use quadrupole moments in tree summation (default False) 145 | 146 | Returns 147 | ------- 148 | phi: array_like 149 | shape (N,) array of potentials at the particle positions 150 | """ 151 | 152 | ## test if method is correct, otherwise raise a ValueError 153 | valueTestMethod(method) 154 | 155 | if softening is None: 156 | softening = np.zeros_like(m) 157 | 158 | # figure out which method to use 159 | if method == "adaptive": 160 | if len(pos) > 1000: 161 | method = "tree" 162 | else: 163 | method = "bruteforce" 164 | 165 | if method == "bruteforce": # we're using brute force 166 | if parallel: 167 | phi = Potential_bruteforce_parallel(pos, m, softening, G=G) 168 | else: 169 | phi = Potential_bruteforce(pos, m, softening, G=G) 170 | if return_tree: 171 | tree = None 172 | else: # we're using the tree algorithm 173 | if tree is None: 174 | tree = ConstructTree( 175 | np.float64(pos), 176 | np.float64(m), 177 | np.float64(softening), 178 | quadrupole=quadrupole, 179 | ) # build the tree if needed 180 | idx = tree.TreewalkIndices 181 | 182 | # sort by the order they appear in the treewalk to improve access pattern efficiency 183 | pos_sorted = np.take(pos, idx, axis=0) 184 | h_sorted = np.take(softening, idx) 185 | 186 | if parallel: 187 | phi = PotentialTarget_tree_parallel(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole) 188 | else: 189 | phi = PotentialTarget_tree(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole) 190 | 191 | # now reorder phi back to the order of the input positions 192 | phi = np.take(phi, idx.argsort()) 193 | 194 | if return_tree: 195 | return phi, tree 196 | else: 197 | return phi 198 | 199 | 200 | def PotentialTarget( 201 | pos_target, 202 | pos_source, 203 | m_source, 204 | softening_target=None, 205 | softening_source=None, 206 | G=1.0, 207 | theta=0.7, 208 | tree=None, 209 | return_tree=False, 210 | parallel=False, 211 | method="adaptive", 212 | quadrupole=False, 213 | ): 214 | """Gravitational potential calculation for general N+M body case 215 | 216 | Returns the gravitational potential for a set of M particles with positions x_source and masses m_source, at the positions of a set of N particles that need not be the same. 217 | 218 | Parameters 219 | ---------- 220 | pos_target: array_like 221 | shape (N,3) array of target particle positions where you want to know the potential 222 | pos_source: array_like 223 | shape (M,3) array of source particle positions (positions of particles sourcing the gravitational field) 224 | m_source: array_like 225 | shape (M,) array of source particle masses 226 | softening_target: array_like or None, optional 227 | shape (N,) array of target particle softening radii - these give the radius of compact support of the M4 cubic spline mass distribution 228 | softening_source: array_like or None, optional 229 | shape (M,) array of source particle radii - these give the radius of compact support of the M4 cubic spline mass distribution 230 | G: float, optional 231 | gravitational constant (default 1.0) 232 | theta: float, optional 233 | cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy) 234 | parallel: bool, optional 235 | If True, will parallelize the force summation over all available cores. (default False) 236 | tree: Octree, optional 237 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 238 | return_tree: bool, optional 239 | return the tree used for future use (default False) 240 | method: str, optional 241 | Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice) 242 | quadrupole: bool, optional 243 | Whether to use quadrupole moments in tree summation (default False) 244 | 245 | Returns 246 | ------- 247 | phi: array_like 248 | shape (N,) array of potentials at the target positions 249 | """ 250 | 251 | ## test if method is correct, otherwise raise a ValueError 252 | valueTestMethod(method) 253 | 254 | ## allow user to pass in tree without passing in source pos and m 255 | ## but catch if they don't pass in the tree. 256 | if tree is None and (pos_source is None or m_source is None): 257 | raise ValueError("Must pass either pos_source & m_source or source tree.") 258 | 259 | if softening_target is None: 260 | softening_target = zeros(len(pos_target)) 261 | if softening_source is None and pos_source is not None: 262 | softening_source = zeros(len(pos_source)) 263 | 264 | # figure out which method to use 265 | if method == "adaptive": 266 | if pos_source is None or len(pos_target) * len(pos_source) > 10**6: 267 | method = "tree" 268 | else: 269 | method = "bruteforce" 270 | 271 | if method == "bruteforce": # we're using brute force 272 | if parallel: 273 | phi = PotentialTarget_bruteforce_parallel( 274 | pos_target, 275 | softening_target, 276 | pos_source, 277 | m_source, 278 | softening_source, 279 | G=G, 280 | ) 281 | else: 282 | phi = PotentialTarget_bruteforce( 283 | pos_target, 284 | softening_target, 285 | pos_source, 286 | m_source, 287 | softening_source, 288 | G=G, 289 | ) 290 | if return_tree: 291 | tree = None 292 | else: # we're using the tree algorithm 293 | if tree is None: 294 | tree = ConstructTree( 295 | np.float64(pos_source), 296 | np.float64(m_source), 297 | np.float64(softening_source), 298 | quadrupole=quadrupole, 299 | ) # build the tree if needed 300 | if parallel: 301 | phi = PotentialTarget_tree_parallel( 302 | pos_target, 303 | softening_target, 304 | tree, 305 | theta=theta, 306 | G=G, 307 | quadrupole=quadrupole, 308 | ) 309 | else: 310 | phi = PotentialTarget_tree( 311 | pos_target, 312 | softening_target, 313 | tree, 314 | theta=theta, 315 | G=G, 316 | quadrupole=quadrupole, 317 | ) 318 | 319 | if return_tree: 320 | return phi, tree 321 | else: 322 | return phi 323 | 324 | 325 | def Accel( 326 | pos, 327 | m, 328 | softening=None, 329 | G=1.0, 330 | theta=0.7, 331 | tree=None, 332 | return_tree=False, 333 | parallel=False, 334 | method="adaptive", 335 | quadrupole=False, 336 | ): 337 | """Gravitational acceleration calculation 338 | 339 | Returns the gravitational acceleration for a set of particles with positions x and masses m, at the positions of those particles, using either brute force or tree-based methods depending on the number of particles. 340 | 341 | Parameters 342 | ---------- 343 | pos: array_like 344 | shape (N,3) array of particle positions 345 | m: array_like 346 | shape (N,) array of particle masses 347 | G: float, optional 348 | gravitational constant (default 1.0) 349 | softening: None or array_like, optional 350 | shape (N,) array containing kernel support radii for gravitational softening - these give the radius of compact support of the M4 cubic spline mass distribution - set to 0 by default 351 | theta: float, optional 352 | cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy) 353 | parallel: bool, optional 354 | If True, will parallelize the force summation over all available cores. (default False) 355 | tree: Octree, optional 356 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 357 | return_tree: bool, optional 358 | return the tree used for future use (default False) 359 | method: str, optional 360 | Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice) 361 | quadrupole: bool, optional 362 | Whether to use quadrupole moments in tree summation (default False) 363 | 364 | Returns 365 | ------- 366 | g: array_like 367 | shape (N,3) array of acceleration vectors at the particle positions 368 | """ 369 | 370 | ## test if method is correct, otherwise raise a ValueError 371 | valueTestMethod(method) 372 | 373 | if softening is None: 374 | softening = np.zeros_like(m) 375 | 376 | # figure out which method to use 377 | if method == "adaptive": 378 | if len(pos) > 1000: 379 | method = "tree" 380 | else: 381 | method = "bruteforce" 382 | 383 | if method == "bruteforce": # we're using brute force 384 | if parallel: 385 | g = Accel_bruteforce_parallel(pos, m, softening, G=G) 386 | else: 387 | g = Accel_bruteforce(pos, m, softening, G=G) 388 | if return_tree: 389 | tree = None 390 | else: # we're using the tree algorithm 391 | if tree is None: 392 | tree = ConstructTree( 393 | np.float64(pos), 394 | np.float64(m), 395 | np.float64(softening), 396 | quadrupole=quadrupole, 397 | ) # build the tree if needed 398 | idx = tree.TreewalkIndices 399 | 400 | # sort by the order they appear in the treewalk to improve access pattern efficiency 401 | pos_sorted = np.take(pos, idx, axis=0) 402 | h_sorted = np.take(softening, idx) 403 | 404 | if parallel: 405 | g = AccelTarget_tree_parallel(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole) 406 | else: 407 | g = AccelTarget_tree(pos_sorted, h_sorted, tree, theta=theta, G=G, quadrupole=quadrupole) 408 | 409 | # now g is in the tree-order: reorder it back to the original order 410 | g = np.take(g, idx.argsort(), axis=0) 411 | 412 | if return_tree: 413 | return g, tree 414 | else: 415 | return g 416 | 417 | 418 | def AccelTarget( 419 | pos_target, 420 | pos_source, 421 | m_source, 422 | softening_target=None, 423 | softening_source=None, 424 | G=1.0, 425 | theta=0.7, 426 | tree=None, 427 | return_tree=False, 428 | parallel=False, 429 | method="adaptive", 430 | quadrupole=False, 431 | ): 432 | """Gravitational acceleration calculation for general N+M body case 433 | 434 | Returns the gravitational acceleration for a set of M particles with positions x_source and masses m_source, at the positions of a set of N particles that need not be the same. 435 | 436 | Parameters 437 | ---------- 438 | pos_target: array_like 439 | shape (N,3) array of target particle positions where you want to know the acceleration 440 | pos_source: array_like 441 | shape (M,3) array of source particle positions (positions of particles sourcing the gravitational field) 442 | m_source: array_like 443 | shape (M,) array of source particle masses 444 | softening_target: array_like or None, optional 445 | shape (N,) array of target particle softening radii - these give the radius of compact support of the M4 cubic spline mass distribution 446 | softening_source: array_like or None, optional 447 | shape (M,) array of source particle radii - these give the radius of compact support of the M4 cubic spline mass distribution 448 | G: float, optional 449 | gravitational constant (default 1.0) 450 | theta: float, optional 451 | cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7, gives ~1% accuracy) 452 | parallel: bool, optional 453 | If True, will parallelize the force summation over all available cores. (default False) 454 | tree: Octree, optional 455 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 456 | return_tree: bool, optional 457 | return the tree used for future use (default False) 458 | method: str, optional 459 | Which summation method to use: 'adaptive', 'tree', or 'bruteforce' (default adaptive tries to pick the faster choice) 460 | quadrupole: bool, optional 461 | Whether to use quadrupole moments in tree summation (default False) 462 | 463 | Returns 464 | ------- 465 | phi: array_like 466 | shape (N,3) array of accelerations at the target positions 467 | """ 468 | 469 | ## test if method is correct, otherwise raise a ValueError 470 | valueTestMethod(method) 471 | 472 | ## allow user to pass in tree without passing in source pos and m 473 | ## but catch if they don't pass in the tree. 474 | if tree is None and (pos_source is None or m_source is None): 475 | raise ValueError("Must pass either pos_source & m_source or source tree.") 476 | 477 | if softening_target is None: 478 | softening_target = zeros(len(pos_target)) 479 | if softening_source is None and pos_source is not None: 480 | softening_source = zeros(len(pos_source)) 481 | 482 | # figure out which method to use 483 | if method == "adaptive": 484 | if pos_source is None or len(pos_target) * len(pos_source) > 10**6: 485 | method = "tree" 486 | else: 487 | method = "bruteforce" 488 | 489 | if method == "bruteforce": # we're using brute force 490 | if parallel: 491 | g = AccelTarget_bruteforce_parallel( 492 | pos_target, 493 | softening_target, 494 | pos_source, 495 | m_source, 496 | softening_source, 497 | G=G, 498 | ) 499 | else: 500 | g = AccelTarget_bruteforce( 501 | pos_target, 502 | softening_target, 503 | pos_source, 504 | m_source, 505 | softening_source, 506 | G=G, 507 | ) 508 | if return_tree: 509 | tree = None 510 | else: # we're using the tree algorithm 511 | if tree is None: 512 | tree = ConstructTree( 513 | np.float64(pos_source), 514 | np.float64(m_source), 515 | np.float64(softening_source), 516 | quadrupole=quadrupole, 517 | ) # build the tree if needed 518 | if parallel: 519 | g = AccelTarget_tree_parallel( 520 | pos_target, 521 | softening_target, 522 | tree, 523 | theta=theta, 524 | G=G, 525 | quadrupole=quadrupole, 526 | ) 527 | else: 528 | g = AccelTarget_tree( 529 | pos_target, 530 | softening_target, 531 | tree, 532 | theta=theta, 533 | G=G, 534 | quadrupole=quadrupole, 535 | ) 536 | 537 | if return_tree: 538 | return g, tree 539 | else: 540 | return g 541 | 542 | 543 | def DensityCorrFunc( 544 | pos, 545 | m, 546 | rbins=None, 547 | max_bin_size_ratio=100, 548 | theta=1.0, 549 | tree=None, 550 | return_tree=False, 551 | parallel=False, 552 | boxsize=0, 553 | weighted_binning=False, 554 | ): 555 | """Computes the average amount of mass in radial bin [r,r+dr] around a point, provided a set of radial bins. 556 | 557 | Parameters 558 | ---------- 559 | pos: array_like 560 | shape (N,3) array of particle positions 561 | m: array_like 562 | shape (N,) array of particle masses 563 | rbins: array_like or None, optional 564 | 1D array of radial bin edges - if None will use heuristics to determine sensible bins. Otherwise MUST BE LOGARITHMICALLY SPACED (default None) 565 | max_bin_size_ratio: float, optional 566 | controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 100) 567 | theta: float, optional 568 | cell opening angle used to control accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0) 569 | parallel: bool, optional 570 | If True, will parallelize the correlation function computation over all available cores. (default False) 571 | tree: Octree, optional 572 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 573 | return_tree: bool, optional 574 | if True will return the generated or used tree for future use (default False) 575 | boxsize: float, optional 576 | finite periodic box size, if periodic boundary conditions are to be used (default 0) 577 | weighted_binning: bool, optional 578 | (experimental) if True will distribute mass among radial bings with a weighted kernel (default False) 579 | 580 | Returns 581 | ------- 582 | rbins: array_like 583 | array containing radial bin edges 584 | mbins: array_like 585 | array containing mean mass in radial bins, averaged over all points 586 | """ 587 | 588 | if rbins is None: 589 | r = np.sort(np.sqrt(np.sum((pos - np.median(pos, axis=0)) ** 2, axis=1))) 590 | rbins = 10 ** np.linspace(np.log10(r[10]), np.log10(r[-1]), int(len(r) ** (1.0 / 3))) 591 | 592 | if tree is None: 593 | softening = np.zeros_like(m) 594 | tree = ConstructTree(np.float64(pos), np.float64(m), np.float64(softening)) # build the tree if needed 595 | idx = tree.TreewalkIndices 596 | 597 | # sort by the order they appear in the treewalk to improve access pattern efficiency 598 | pos_sorted = np.take(pos, idx, axis=0) 599 | 600 | if parallel: 601 | mbins = DensityCorrFunc_tree_parallel( 602 | pos_sorted, 603 | tree, 604 | rbins, 605 | max_bin_size_ratio=max_bin_size_ratio, 606 | theta=theta, 607 | boxsize=boxsize, 608 | weighted_binning=weighted_binning, 609 | ) 610 | else: 611 | mbins = DensityCorrFunc_tree( 612 | pos_sorted, 613 | tree, 614 | rbins, 615 | max_bin_size_ratio=max_bin_size_ratio, 616 | theta=theta, 617 | boxsize=boxsize, 618 | weighted_binning=weighted_binning, 619 | ) 620 | 621 | if return_tree: 622 | return rbins, mbins, tree 623 | else: 624 | return rbins, mbins 625 | 626 | 627 | def VelocityCorrFunc( 628 | pos, 629 | m, 630 | v, 631 | rbins=None, 632 | max_bin_size_ratio=100, 633 | theta=1.0, 634 | tree=None, 635 | return_tree=False, 636 | parallel=False, 637 | boxsize=0, 638 | weighted_binning=False, 639 | ): 640 | """Computes the weighted average product v(x).v(x+r), for a vector field v, in radial bins 641 | 642 | Parameters 643 | ---------- 644 | pos: array_like 645 | shape (N,3) array of particle positions 646 | m: array_like 647 | shape (N,) array of particle masses 648 | v: array_like 649 | shape (N,3) of vector quantity (e.g. velocity, magnetic field, etc) 650 | rbins: array_like or None, optional 651 | 1D array of radial bin edges - if None will use heuristics to determine sensible bins. Otherwise MUST BE LOGARITHMICALLY SPACED (default None) 652 | max_bin_size_ratio: float, optional 653 | controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 100) 654 | theta: float, optional 655 | cell opening angle used to control accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0) 656 | parallel: bool, optional 657 | If True, will parallelize the correlation function computation over all available cores. (default False) 658 | tree: Octree, optional 659 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 660 | return_tree: bool, optional 661 | if True will return the generated or used tree for future use (default False) 662 | boxsize: float, optional 663 | finite periodic box size, if periodic boundary conditions are to be used (default 0) 664 | weighted_binning: bool, optional 665 | (experimental) if True will distribute mass among radial bings with a weighted kernel (default False) 666 | 667 | Returns 668 | ------- 669 | rbins: array_like 670 | array containing radial bin edges 671 | corr: array_like 672 | array containing correlation function values in radial bins 673 | """ 674 | 675 | if rbins is None: 676 | r = np.sort(np.sqrt(np.sum((pos - np.median(pos, axis=0)) ** 2, axis=1))) 677 | rbins = 10 ** np.linspace(np.log10(r[10]), np.log10(r[-1]), int(len(r) ** (1.0 / 3))) 678 | 679 | if tree is None: 680 | softening = np.zeros_like(m) 681 | tree = ConstructTree(np.float64(pos), np.float64(m), np.float64(softening), vel=v) # build the tree if needed 682 | idx = tree.TreewalkIndices 683 | 684 | # sort by the order they appear in the treewalk to improve access pattern efficiency 685 | pos_sorted = np.take(pos, idx, axis=0) 686 | v_sorted = np.take(v, idx, axis=0) 687 | wt_sorted = np.take(m, idx, axis=0) 688 | if parallel: 689 | corr = VelocityCorrFunc_tree_parallel( 690 | pos_sorted, 691 | v_sorted, 692 | wt_sorted, 693 | tree, 694 | rbins, 695 | max_bin_size_ratio=max_bin_size_ratio, 696 | theta=theta, 697 | boxsize=boxsize, 698 | weighted_binning=weighted_binning, 699 | ) 700 | else: 701 | corr = VelocityCorrFunc_tree( 702 | pos_sorted, 703 | v_sorted, 704 | wt_sorted, 705 | tree, 706 | rbins, 707 | max_bin_size_ratio=max_bin_size_ratio, 708 | theta=theta, 709 | boxsize=boxsize, 710 | weighted_binning=weighted_binning, 711 | ) 712 | 713 | if return_tree: 714 | return rbins, corr, tree 715 | else: 716 | return rbins, corr 717 | 718 | 719 | def VelocityStructFunc( 720 | pos, 721 | m, 722 | v, 723 | rbins=None, 724 | max_bin_size_ratio=100, 725 | theta=1.0, 726 | tree=None, 727 | return_tree=False, 728 | parallel=False, 729 | boxsize=0, 730 | weighted_binning=False, 731 | ): 732 | """Computes the structure function for a vector field: the average value of |v(x)-v(x+r)|^2, in radial bins for r 733 | 734 | Parameters 735 | ---------- 736 | pos: array_like 737 | shape (N,3) array of particle positions 738 | m: array_like 739 | shape (N,) array of particle masses 740 | v: array_like 741 | shape (N,3) of vector quantity (e.g. velocity, magnetic field, etc) 742 | rbins: array_like or None, optional 743 | 1D array of radial bin edges - if None will use heuristics to determine sensible bins. Otherwise MUST BE LOGARITHMICALLY SPACED (default None) 744 | max_bin_size_ratio: float, optional 745 | controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 100) 746 | theta: float, optional 747 | cell opening angle used to control accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 1.0) 748 | parallel: bool, optional 749 | If True, will parallelize the correlation function computation over all available cores. (default False) 750 | tree: Octree, optional 751 | optional pre-generated Octree: this can contain any set of particles, not necessarily the target particles at pos (default None) 752 | return_tree: bool, optional 753 | if True will return the generated or used tree for future use (default False) 754 | boxsize: float, optional 755 | finite periodic box size, if periodic boundary conditions are to be used (default 0) 756 | weighted_binning: bool, optional 757 | (experimental) if True will distribute mass among radial bings with a weighted kernel (default False) 758 | 759 | Returns 760 | ------- 761 | rbins: array_like 762 | array containing radial bin edges 763 | Sv: array_like 764 | array containing structure function values 765 | """ 766 | 767 | if rbins is None: 768 | r = np.sort(np.sqrt(np.sum((pos - np.median(pos, axis=0)) ** 2, axis=1))) 769 | rbins = 10 ** np.linspace(np.log10(r[10]), np.log10(r[-1]), int(len(r) ** (1.0 / 3))) 770 | 771 | if tree is None: 772 | softening = np.zeros_like(m) 773 | tree = ConstructTree(np.float64(pos), np.float64(m), np.float64(softening), vel=v) # build the tree if needed 774 | idx = tree.TreewalkIndices 775 | 776 | # sort by the order they appear in the treewalk to improve access pattern efficiency 777 | pos_sorted = np.take(pos, idx, axis=0) 778 | v_sorted = np.take(v, idx, axis=0) 779 | wt_sorted = np.take(m, idx, axis=0) 780 | if parallel: 781 | Sv = VelocityStructFunc_tree_parallel( 782 | pos_sorted, 783 | v_sorted, 784 | wt_sorted, 785 | tree, 786 | rbins, 787 | max_bin_size_ratio=max_bin_size_ratio, 788 | theta=theta, 789 | boxsize=boxsize, 790 | weighted_binning=weighted_binning, 791 | ) 792 | else: 793 | Sv = VelocityStructFunc_tree( 794 | pos_sorted, 795 | v_sorted, 796 | wt_sorted, 797 | tree, 798 | rbins, 799 | max_bin_size_ratio=max_bin_size_ratio, 800 | theta=theta, 801 | boxsize=boxsize, 802 | weighted_binning=weighted_binning, 803 | ) 804 | 805 | if return_tree: 806 | return rbins, Sv, tree 807 | else: 808 | return rbins, Sv 809 | 810 | 811 | def ColumnDensity( 812 | pos, 813 | m, 814 | radii, 815 | rays=None, 816 | randomize_rays=False, 817 | healpix=False, 818 | tree=None, 819 | theta=0.5, 820 | return_tree=False, 821 | parallel=False, 822 | ): 823 | """Ray-traced or angle-binned column density calculation. 824 | 825 | Returns an estimate of the column density from the position of each particle 826 | integrated to infinity, assuming the particles are represented by uniform spheres. Note 827 | that optical depth can be obtained by supplying "sigma = opacity * mass" in 828 | place of mass, useful in situations where opacity is highly variable. 829 | 830 | Parameters 831 | ---------- 832 | pos: array_like 833 | shape (N,3) array of particle positions 834 | m: array_like 835 | shape (N,) array of particle masses 836 | radii: array_like 837 | shape (N,) array containing particle radii of the uniform spheres that 838 | we use to model the particles' mass distribution 839 | rays: optional 840 | Which ray directions to raytrace the columns. 841 | None: use the angular-binned column density method with 6 bins on the sky 842 | OPTION 2: Integer number: use this many rays, with 6 using the standard 843 | 6-ray grid and other numbers sampling random directions 844 | OPTION 3: Give a (N_rays,3) array of vectors specifying the 845 | directions, which will be automatically normalized. 846 | healpix: int, optional 847 | Use healpix ray grid with specified resolution level NSIDE 848 | randomize_rays: bool, optional 849 | Randomize the orientation of the ray-grid *for each particle* 850 | parallel: bool, optional 851 | If True, will parallelize the column density over all available cores. 852 | (default False) 853 | tree: Octree, optional 854 | optional pre-generated Octree: this can contain any set of particles, 855 | not necessarily the target particles at pos (default None) 856 | theta: float, optional 857 | Opening angle for beam-traced angular bin estimator 858 | return_tree: bool, optional 859 | return the tree used for future use (default False) 860 | 861 | Returns 862 | ------- 863 | columns: array_like 864 | shape (N,N_rays) float array of column densities from particle 865 | centers integrated along the rays 866 | """ 867 | 868 | if tree is None: 869 | tree = ConstructTree( 870 | np.float64(pos), 871 | np.float64(m), 872 | np.float64(radii), 873 | ) # build the tree if needed 874 | idx = tree.TreewalkIndices 875 | pos_sorted = np.take(pos, idx, axis=0) 876 | 877 | if type(rays) == int: 878 | if rays == 6: 879 | rays = np.vstack([np.eye(3), -np.eye(3)]) # 6-ray grid 880 | else: 881 | # generate a random grid of ray directions 882 | rays = np.random.normal(size=(rays, 3)) # normalize later 883 | elif type(rays) == np.ndarray: 884 | # check that the shape is correct 885 | if not len(rays.shape) == 2: 886 | raise Exception("rays array argument must be 2D.") 887 | elif rays.shape[1] != 3: 888 | raise Exception("rays array argument is not an array of 3D vectors.") 889 | rays = np.copy(rays) # so that we don't overwrite the argument 890 | elif rays is not None: 891 | raise Exception("rays argument type is not supported") 892 | 893 | if healpix: 894 | nside = healpix 895 | npix = hp.nside2npix(nside) 896 | rays = np.array(hp.pix2vec(nside, np.arange(npix))).T 897 | 898 | if rays is not None: 899 | rays /= np.sqrt((rays * rays).sum(1))[:, None] # normalize the ray vectors 900 | 901 | if parallel: 902 | columns = ColumnDensity_tree_parallel(pos_sorted, tree, rays, randomize_rays=randomize_rays, theta=theta) 903 | else: 904 | columns = ColumnDensity_tree(pos_sorted, tree, rays, randomize_rays=randomize_rays, theta=theta) 905 | if np.any(np.isnan(columns)): 906 | print("WARNING some column densities are NaN!") 907 | columns = np.take(columns, idx.argsort(), axis=0) 908 | 909 | if return_tree: 910 | return columns, tree 911 | else: 912 | return columns 913 | -------------------------------------------------------------------------------- /src/pytreegrav/treewalk.py: -------------------------------------------------------------------------------- 1 | from numpy import sqrt, empty, zeros, empty_like, zeros_like, dot, fabs 2 | from numba import njit, prange, get_num_threads, set_parallel_chunksize, int64, float64 3 | from math import copysign 4 | from .kernel import * 5 | from .misc import * 6 | import numpy as np 7 | from scipy.spatial.transform import Rotation as R 8 | 9 | 10 | @njit(fastmath=True) 11 | def acceptance_criterion(r: float, h: float, size: float, delta: float, theta: float) -> bool: 12 | """Criterion for accepting the multipole approximation for summing the contribution of a node""" 13 | return r > max(size / theta + delta, h + size * 0.6 + delta) 14 | 15 | 16 | @njit([int64(float64[:])], fastmath=True) 17 | def angular_bin(dx): 18 | """Angular bin for binned column density estimator""" 19 | if fabs(dx[0]) > fabs(dx[1]) and fabs(dx[0]) > fabs(dx[2]): 20 | if dx[0] > 0: 21 | bin = 0 22 | else: 23 | bin = 1 24 | elif fabs(dx[1]) > fabs(dx[2]): 25 | if dx[1] > 0: 26 | bin = 2 27 | else: 28 | bin = 3 29 | else: 30 | if dx[2] > 0: 31 | bin = 4 32 | else: 33 | bin = 5 34 | return bin 35 | 36 | 37 | @njit(fastmath=True) 38 | def NearestImage(x, boxsize): 39 | if abs(x) > boxsize / 2: 40 | return -copysign(boxsize - abs(x), x) 41 | else: 42 | return x 43 | 44 | 45 | @njit(fastmath=True) 46 | def PotentialWalk(pos, tree, softening=0, no=-1, theta=0.7): 47 | """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance 48 | Arguments: 49 | pos - (3,) array containing position of interest 50 | tree - octree object storing the tree structure 51 | Keyword arguments: 52 | softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential 53 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 54 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy) 55 | """ 56 | if no < 0: 57 | no = tree.NumParticles # we default to the top-level node index 58 | phi = 0 59 | dx = np.empty(3, dtype=np.float64) 60 | 61 | while no > -1: 62 | r = 0 63 | for k in range(3): 64 | dx[k] = tree.Coordinates[no, k] - pos[k] 65 | r += dx[k] * dx[k] 66 | r = sqrt(r) 67 | h = max(tree.Softenings[no], softening) 68 | 69 | if no < tree.NumParticles: # if we're looking at a leaf/particle 70 | if r > 0: # by default we neglect the self-potential 71 | if r < h: 72 | phi += tree.Masses[no] * PotentialKernel(r, h) 73 | else: 74 | phi -= tree.Masses[no] / r 75 | no = tree.NextBranch[no] 76 | elif acceptance_criterion( 77 | r, h, tree.Sizes[no], tree.Deltas[no], theta 78 | ): # if we satisfy the criteria for accepting the monopole 79 | phi -= tree.Masses[no] / r 80 | no = tree.NextBranch[no] 81 | else: # open the node 82 | no = tree.FirstSubnode[no] 83 | 84 | return phi 85 | 86 | 87 | @njit(fastmath=True) 88 | def PotentialWalk_quad(pos, tree, softening=0, no=-1, theta=0.7): 89 | """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance. Uses the quadrupole expansion. 90 | Arguments: 91 | pos - (3,) array containing position of interest 92 | tree - octree object storing the tree structure 93 | Keyword arguments: 94 | softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential 95 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 96 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy) 97 | """ 98 | if no < 0: 99 | no = tree.NumParticles # we default to the top-level node index 100 | phi = 0 101 | dx = np.empty(3, dtype=np.float64) 102 | 103 | while no > -1: 104 | r = 0 105 | for k in range(3): 106 | dx[k] = tree.Coordinates[no, k] - pos[k] 107 | r += dx[k] * dx[k] 108 | r = sqrt(r) 109 | h = max(tree.Softenings[no], softening) 110 | 111 | if no < tree.NumParticles: # if we're looking at a leaf/particle 112 | if r > 0: # by default we neglect the self-potential 113 | if r < h: 114 | phi += tree.Masses[no] * PotentialKernel(r, h) 115 | else: 116 | phi -= tree.Masses[no] / r 117 | no = tree.NextBranch[no] 118 | elif acceptance_criterion(r, h, tree.Sizes[no], tree.Deltas[no], theta): 119 | # if we satisfy the criteria for accepting the monopole 120 | phi -= tree.Masses[no] / r 121 | quad = tree.Quadrupoles[no] 122 | r5inv = 1 / (r * r * r * r * r) 123 | for k in range(3): 124 | for l in range(3): 125 | phi -= 0.5 * dx[k] * quad[k, l] * dx[l] * r5inv 126 | no = tree.NextBranch[no] 127 | else: # open the node 128 | no = tree.FirstSubnode[no] 129 | 130 | return phi 131 | 132 | 133 | @njit(fastmath=True) 134 | def AccelWalk(pos, tree, softening=0, no=-1, theta=0.7): 135 | """Returns the gravitational acceleration field at position x by performing the Barnes-Hut treewalk using the provided octree instance 136 | Arguments: 137 | pos - (3,) array containing position of interest 138 | tree - octree instance storing the tree structure 139 | Keyword arguments: 140 | softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential 141 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 142 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy) 143 | """ 144 | if no < 0: 145 | no = tree.NumParticles # we default to the top-level node index 146 | g = zeros(3, dtype=np.float64) 147 | dx = np.empty(3, dtype=np.float64) 148 | 149 | while no > -1: # loop until we get to the end of the tree 150 | r2 = 0 151 | for k in range(3): 152 | dx[k] = tree.Coordinates[no, k] - pos[k] 153 | r2 += dx[k] * dx[k] 154 | r = sqrt(r2) 155 | h = max(tree.Softenings[no], softening) 156 | 157 | sum_field = False 158 | 159 | if no < tree.NumParticles: # if we're looking at a leaf/particle 160 | if r > 0: # no self-force 161 | if r < h: # within the softening radius 162 | # fac stores the quantity M( -1: # loop until we get to the end of the tree 201 | r2 = 0 202 | for k in range(3): 203 | dx[k] = tree.Coordinates[no, k] - pos[k] 204 | r2 += dx[k] * dx[k] 205 | r = sqrt(r2) 206 | h = max(tree.Softenings[no], softening) 207 | 208 | if no < tree.NumParticles: # if we're looking at a leaf/particle 209 | if r > 0: # no self-force 210 | if r < h: # within the softening radius 211 | # fac stores the quantity M( rbins[r_idx]): 306 | mbin[r_idx] += tree.Masses[no] * quantity 307 | else: 308 | min_bin = int((np.log10((r - h) / rbins[0]) / np.log10(rbins[1] / rbins[0]))) 309 | max_bin = min(int(np.log10((r + h) / rbins[0]) / np.log10(rbins[1] / rbins[0]) + 1), Nbins) 310 | total_wt = 0 311 | for i in range(min_bin, max_bin): # range(min_bin,max_bin): # first the prepass to get the total weight 312 | # (r > rbins[i] and r < rbins[i+1]) or dr < 0.5*tree.Sizes[no]: 313 | i1, i2 = max(r - h, rbins[i]), min(r + h, rbins[i + 1]) 314 | overlap = i2 - i1 315 | if overlap > 0: # if there's overlap 316 | reff = 0.5 * (i1 + i2) # sqrt(rbins[i]*rbins[i+1]) 317 | dr = fabs(r - reff) 318 | wt = max(0, 1 - dr * dr / (h * h)) * overlap 319 | total_wt += wt 320 | 321 | for i in range( 322 | min_bin, max_bin 323 | ): # range(min_bin,max_bin): # then distribute according to the normalized weighting 324 | i1, i2 = max(r - h, rbins[i]), min(r + h, rbins[i + 1]) 325 | overlap = i2 - i1 326 | if overlap > 0: # if there's overlap 327 | reff = 0.5 * (i1 + i2) # sqrt(rbins[i]*rbins[i+1]) 328 | dr = fabs(r - reff) 329 | wt = max(0, 1 - dr * dr / (h * h)) * overlap / total_wt 330 | mbin[i] += wt * tree.Masses[no] * quantity 331 | 332 | 333 | @njit(fastmath=True) 334 | def DensityCorrWalk( 335 | pos, 336 | tree, 337 | rbins, 338 | max_bin_size_ratio=100, 339 | theta=0.7, 340 | no=-1, 341 | boxsize=0, 342 | weighted_binning=False, 343 | ): 344 | """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance 345 | 346 | Arguments: 347 | pos - (3,) array containing position of interest 348 | tree - octree object storing the tree structure 349 | 350 | Keyword arguments: 351 | softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential 352 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 353 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy) 354 | """ 355 | if no < 0: 356 | no = tree.NumParticles # we default to the top-level node index 357 | 358 | Nbins = rbins.shape[0] - 1 359 | mbin = zeros(Nbins) 360 | counts = zeros(Nbins) 361 | rmin = rbins[0] 362 | rmax = rbins[-1] 363 | dx = np.empty(3, dtype=np.float64) 364 | 365 | logr_min = np.log10(rmin) 366 | logr_max = np.log10(rmax) 367 | dlogr = logr_max - logr_min 368 | 369 | while no > -1: 370 | r = 0 371 | for k in range(3): 372 | dx[k] = tree.Coordinates[no, k] - pos[k] 373 | if boxsize > 0: 374 | dx[k] = NearestImage(dx[k], boxsize) 375 | r += dx[k] * dx[k] 376 | 377 | r = sqrt(r) 378 | within_bounds = (r > rmin) and (r < rmax) 379 | if within_bounds: 380 | logr = np.log10(r) 381 | r_idx = int(Nbins * (logr - logr_min) / dlogr) 382 | if no < tree.NumParticles: 383 | mbin[r_idx] += tree.Masses[no] 384 | no = tree.NextBranch[no] 385 | elif ( 386 | r 387 | > max( 388 | tree.Sizes[no] / theta + tree.Deltas[no], 389 | tree.Sizes[no] * 0.6 + tree.Deltas[no], 390 | ) 391 | ) and (tree.Sizes[no] < max_bin_size_ratio * (rbins[r_idx + 1] - rbins[r_idx])): 392 | if weighted_binning: 393 | do_weighted_binning(tree, no, rbins, mbin, r, r_idx, 1) 394 | else: 395 | rnew = r + (np.random.rand() - 0.5) * tree.Sizes[no] 396 | r_idx = int(Nbins * (np.log10(rnew) - logr_min) / dlogr) 397 | mbin[r_idx] += tree.Masses[no] 398 | no = tree.NextBranch[no] 399 | else: 400 | no = tree.FirstSubnode[no] 401 | else: 402 | if no < tree.NumParticles: 403 | no = tree.NextBranch[no] 404 | elif r > max( 405 | tree.Sizes[no] / theta + tree.Deltas[no], 406 | tree.Sizes[no] * 0.6 + tree.Deltas[no], 407 | ): 408 | no = tree.NextBranch[no] 409 | else: 410 | no = tree.FirstSubnode[no] 411 | return mbin 412 | 413 | 414 | def DensityCorrFunc_tree( 415 | pos, 416 | tree, 417 | rbins, 418 | max_bin_size_ratio=100, 419 | theta=0.7, 420 | boxsize=0, 421 | weighted_binning=False, 422 | ): 423 | """Returns the average mass in radial bins surrounding a point 424 | 425 | Arguments: 426 | pos -- shape (N,3) array of particle positions 427 | tree -- Octree instance containing the positions, masses, and softenings of the source particles 428 | 429 | Optional arguments: 430 | rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins 431 | max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width 432 | 433 | Returns: 434 | mbins -- arrays containing total mass in each bin 435 | """ 436 | Nthreads = get_num_threads() 437 | mbin = zeros((Nthreads, rbins.shape[0] - 1)) 438 | # break into chunks for parallelization 439 | for chunk in prange(Nthreads): 440 | for i in range(chunk, pos.shape[0], Nthreads): 441 | dmbin = DensityCorrWalk( 442 | pos[i], 443 | tree, 444 | rbins, 445 | max_bin_size_ratio=max_bin_size_ratio, 446 | theta=theta, 447 | boxsize=boxsize, 448 | weighted_binning=weighted_binning, 449 | ) 450 | for j in range(mbin.shape[1]): 451 | mbin[chunk, j] += dmbin[j] 452 | return mbin.sum(0) / pos.shape[0] 453 | 454 | 455 | # JIT this function and its parallel version 456 | DensityCorrFunc_tree_parallel = njit(DensityCorrFunc_tree, fastmath=True, parallel=True) 457 | DensityCorrFunc_tree = njit(DensityCorrFunc_tree, fastmath=True) 458 | 459 | 460 | @njit(fastmath=True) 461 | def VelocityCorrWalk( 462 | pos, 463 | vel, 464 | tree, 465 | rbins, 466 | max_bin_size_ratio=100, 467 | theta=0.7, 468 | no=-1, 469 | boxsize=0, 470 | weighted_binning=False, 471 | ): 472 | """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance 473 | 474 | Arguments: 475 | pos - (3,) array containing position of interest 476 | vel - (3,) array containing velocity of point of interest 477 | tree - octree object storing the tree structure 478 | 479 | Keyword arguments: 480 | softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential 481 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 482 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy) 483 | """ 484 | if no < 0: 485 | no = tree.NumParticles # we default to the top-level node index 486 | 487 | Nbins = rbins.shape[0] - 1 488 | binsums = zeros(Nbins) 489 | wtsums = zeros(Nbins) 490 | # counts = zeros(Nbins) 491 | rmin = rbins[0] 492 | rmax = rbins[-1] 493 | dx = np.empty(3, dtype=np.float64) 494 | 495 | logr_min = np.log10(rmin) 496 | logr_max = np.log10(rmax) 497 | dlogr = logr_max - logr_min 498 | 499 | while no > -1: 500 | r = 0 501 | for k in range(3): 502 | dx[k] = tree.Coordinates[no, k] - pos[k] 503 | if boxsize > 0: 504 | dx[k] = NearestImage(dx[k], boxsize) 505 | r += dx[k] * dx[k] 506 | r = sqrt(r) 507 | # theta = min(1,theta * np.exp(0.5*np.random.normal())) # if we randomize the opening criteria a bit we'll get fewer binning artifacts 508 | within_bounds = (r > rmin) and (r < rmax) 509 | if within_bounds: 510 | logr = np.log10(r) 511 | r_idx = int(Nbins * (logr - logr_min) / dlogr) 512 | if no < tree.NumParticles: 513 | vprod = 0 514 | for k in range(3): 515 | vprod += vel[k] * tree.Velocities[no][k] * tree.Masses[no] 516 | binsums[r_idx] += vprod 517 | wtsums[r_idx] += tree.Masses[no] 518 | no = tree.NextBranch[no] 519 | elif r > max( 520 | tree.Sizes[no] / theta + tree.Deltas[no], 521 | tree.Sizes[no] * 0.6 + tree.Deltas[no], 522 | ) and tree.Sizes[no] < max_bin_size_ratio * (rbins[r_idx + 1] - rbins[r_idx]): 523 | vprod = 0 524 | for k in range(3): 525 | vprod += vel[k] * tree.Velocities[no][k] 526 | if weighted_binning: 527 | do_weighted_binning(tree, no, rbins, binsums, r, r_idx, vprod) 528 | do_weighted_binning(tree, no, rbins, wtsums, r, r_idx, 1) 529 | else: 530 | rnew = r + (np.random.rand() - 0.5) * tree.Sizes[no] 531 | r_idx = int(Nbins * (np.log10(rnew) - logr_min) / dlogr) 532 | binsums[r_idx] += vprod * tree.Masses[no] 533 | wtsums[r_idx] += tree.Masses[no] 534 | no = tree.NextBranch[no] 535 | else: 536 | no = tree.FirstSubnode[no] 537 | else: 538 | if no < tree.NumParticles: 539 | no = tree.NextBranch[no] 540 | elif r > max( 541 | tree.Sizes[no] / theta + tree.Deltas[no], 542 | tree.Sizes[no] * 0.6 + tree.Deltas[no], 543 | ): 544 | no = tree.NextBranch[no] 545 | else: 546 | no = tree.FirstSubnode[no] 547 | return wtsums, binsums 548 | 549 | 550 | def VelocityCorrFunc_tree( 551 | pos, 552 | vel, 553 | weight, 554 | tree, 555 | rbins, 556 | max_bin_size_ratio=100, 557 | theta=0.7, 558 | boxsize=0, 559 | weighted_binning=False, 560 | ): 561 | """Returns the average mass in radial bins surrounding a point 562 | 563 | Arguments: 564 | pos -- shape (N,3) array of particle positions 565 | tree -- Octree instance containing the positions, masses, and softenings of the source particles 566 | 567 | Optional arguments: 568 | rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins 569 | max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 0.5) 570 | 571 | Returns: 572 | mbins -- arrays containing total mass in each bin 573 | """ 574 | Nthreads = get_num_threads() 575 | mbin = zeros((Nthreads, rbins.shape[0] - 1)) 576 | wtsum = zeros_like(mbin) 577 | # break into chunks for parallelization 578 | for chunk in prange(Nthreads): 579 | for i in range(chunk, pos.shape[0], Nthreads): 580 | dwtsum, dmbin = VelocityCorrWalk( 581 | pos[i], 582 | vel[i], 583 | tree, 584 | rbins, 585 | max_bin_size_ratio=max_bin_size_ratio, 586 | theta=theta, 587 | boxsize=boxsize, 588 | weighted_binning=weighted_binning, 589 | ) 590 | for j in range(mbin.shape[1]): 591 | mbin[chunk, j] += dmbin[j] * weight[i] 592 | wtsum[chunk, j] += weight[i] * dwtsum[j] 593 | return mbin.sum(0) / wtsum.sum(0) 594 | 595 | 596 | # JIT this function and its parallel version 597 | VelocityCorrFunc_tree_parallel = njit(VelocityCorrFunc_tree, fastmath=True, parallel=True) 598 | VelocityCorrFunc_tree = njit(VelocityCorrFunc_tree, fastmath=True) 599 | 600 | 601 | @njit(fastmath=True) 602 | def VelocityStructWalk( 603 | pos, 604 | vel, 605 | tree, 606 | rbins, 607 | max_bin_size_ratio=100, 608 | theta=0.7, 609 | no=-1, 610 | boxsize=0, 611 | weighted_binning=False, 612 | ): 613 | """Returns the gravitational potential at position x by performing the Barnes-Hut treewalk using the provided octree instance 614 | 615 | Arguments: 616 | pos - (3,) array containing position of interest 617 | vel - (3,) array containing velocity of point of interest 618 | tree - octree object storing the tree structure 619 | 620 | Keyword arguments: 621 | softening - softening radius of the particle at which the force is being evaluated - we use the greater of the target and source softenings when evaluating the softened potential 622 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 623 | theta - cell opening angle used to control force accuracy; smaller is slower (runtime ~ theta^-3) but more accurate. (default 0.7 gives ~1% accuracy) 624 | """ 625 | if no < 0: 626 | no = tree.NumParticles # we default to the top-level node index 627 | 628 | Nbins = rbins.shape[0] - 1 629 | binsums = zeros(Nbins) 630 | wtsums = zeros(Nbins) 631 | rmin = rbins[0] 632 | rmax = rbins[-1] 633 | dx = np.empty(3, dtype=np.float64) 634 | logr_min = np.log10(rmin) 635 | logr_max = np.log10(rmax) 636 | dlogr = logr_max - logr_min 637 | 638 | while no > -1: 639 | r = 0 640 | for k in range(3): 641 | dx[k] = tree.Coordinates[no, k] - pos[k] 642 | if boxsize > 0: 643 | dx[k] = NearestImage(dx[k], boxsize) 644 | r += dx[k] * dx[k] 645 | r = sqrt(r) 646 | 647 | # theta = min(1,theta * np.exp(0.5*np.random.normal())) # if we randomize the opening criteria a bit we'll get fewer binning artifacts 648 | within_bounds = (r > rmin) and (r < rmax) 649 | if within_bounds: 650 | logr = np.log10(r) 651 | r_idx = int(Nbins * (logr - logr_min) / dlogr) 652 | if no < tree.NumParticles: 653 | vprod = 0 654 | for k in range(3): 655 | vprod += (vel[k] - tree.Velocities[no][k]) * (vel[k] - tree.Velocities[no][k]) * tree.Masses[no] 656 | binsums[r_idx] += vprod 657 | wtsums[r_idx] += tree.Masses[no] 658 | no = tree.NextBranch[no] 659 | elif r > max( 660 | tree.Sizes[no] / theta + tree.Deltas[no], 661 | tree.Sizes[no] * 0.6 + tree.Deltas[no], 662 | ) and (tree.Sizes[no] < max_bin_size_ratio * (rbins[r_idx + 1] - rbins[r_idx])): 663 | vprod = 0 664 | for k in range(3): 665 | vprod += (vel[k] - tree.Velocities[no][k]) * (vel[k] - tree.Velocities[no][k]) 666 | vprod += tree.VelocityDisp[no] 667 | if weighted_binning: 668 | do_weighted_binning(tree, no, rbins, binsums, r, r_idx, vprod) 669 | do_weighted_binning(tree, no, rbins, wtsums, r, r_idx, 1) 670 | else: 671 | rnew = r + (np.random.rand() - 0.5) * tree.Sizes[no] 672 | r_idx = int(Nbins * (np.log10(rnew) - logr_min) / dlogr) 673 | binsums[r_idx] += vprod * tree.Masses[no] 674 | wtsums[r_idx] += tree.Masses[no] 675 | no = tree.NextBranch[no] 676 | else: 677 | no = tree.FirstSubnode[no] 678 | else: 679 | if no < tree.NumParticles: 680 | no = tree.NextBranch[no] 681 | elif r > max( 682 | tree.Sizes[no] / theta + tree.Deltas[no], 683 | tree.Sizes[no] * 0.6 + tree.Deltas[no], 684 | ): 685 | no = tree.NextBranch[no] 686 | else: 687 | no = tree.FirstSubnode[no] 688 | return wtsums, binsums 689 | 690 | 691 | def VelocityStructFunc_tree( 692 | pos, 693 | vel, 694 | weight, 695 | tree, 696 | rbins, 697 | max_bin_size_ratio=100, 698 | theta=0.7, 699 | boxsize=0, 700 | weighted_binning=False, 701 | ): 702 | """Returns the average mass in radial bins surrounding a point 703 | 704 | Arguments: 705 | pos -- shape (N,3) array of particle positions 706 | tree -- Octree instance containing the positions, masses, and softenings of the source particles 707 | 708 | Optional arguments: 709 | rbins -- 1D array of radial bin edges - if None will use heuristics to determine sensible bins 710 | max_bin_size_ratio -- controls the accuracy of the binning - tree nodes are subdivided until their side length is at most this factor * the radial bin width (default 0.5) 711 | 712 | Returns: 713 | mbins -- arrays containing total mass in each bin 714 | """ 715 | 716 | Nthreads = get_num_threads() 717 | mbin = zeros((Nthreads, rbins.shape[0] - 1)) 718 | wtsum = zeros_like(mbin) 719 | # break into chunks for parallelization 720 | for chunk in prange(Nthreads): 721 | for i in range(chunk, pos.shape[0], Nthreads): 722 | dwtsum, dmbin = VelocityStructWalk( 723 | pos[i], 724 | vel[i], 725 | tree, 726 | rbins, 727 | max_bin_size_ratio=max_bin_size_ratio, 728 | theta=theta, 729 | boxsize=boxsize, 730 | weighted_binning=weighted_binning, 731 | ) 732 | for j in range(mbin.shape[1]): 733 | mbin[chunk, j] += dmbin[j] * weight[i] 734 | wtsum[chunk, j] += weight[i] * dwtsum[j] 735 | return mbin.sum(0) / wtsum.sum(0) 736 | 737 | 738 | # JIT this function and its parallel version 739 | VelocityStructFunc_tree_parallel = njit(VelocityStructFunc_tree, fastmath=True, parallel=True) 740 | VelocityStructFunc_tree = njit(VelocityStructFunc_tree, fastmath=True) 741 | 742 | 743 | @njit(fastmath=True) 744 | def ColumnDensityWalk_multiray(pos, rays, tree, no=-1): 745 | """Returns the integrated column density to infinity from pos, in the directions given by the rays argument 746 | 747 | Arguments: 748 | pos - (3,) array containing position of interest 749 | rays - (N_rays, 3) array of unit vectors 750 | tree - octree object storing the tree structure 751 | 752 | Returns: 753 | columns - (N_rays,) array of column densities along directions given by rays 754 | 755 | Keyword arguments: 756 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 757 | """ 758 | if no < 0: 759 | no = tree.NumParticles # we default to the top-level node index 760 | 761 | N_rays = rays.shape[0] 762 | columns = np.zeros(N_rays) 763 | dx = np.empty(3, dtype=np.float64) 764 | z_ray = np.zeros(N_rays) # perpendicular distances of elements to nearest point on rays 765 | 766 | fac_density = 3 / (4 * np.pi) 767 | 768 | while no > -1: 769 | r2 = 0 770 | for k in range(3): 771 | dx[k] = tree.Coordinates[no, k] - pos[k] 772 | r2 += dx[k] * dx[k] 773 | r = sqrt(r2) 774 | for i in range(N_rays): 775 | z_ray[i] = rays[i, 0] * dx[0] + rays[i, 1] * dx[1] + rays[i, 2] * dx[2] 776 | h_no = tree.Softenings[no] 777 | h_no_inv = 1.0 / h_no 778 | h = h_no # max(h_no,softening) 779 | 780 | if no < tree.NumParticles: # if we're looking at a leaf/particle 781 | # add the particle's column if it's in the right direction 782 | fac = fac_density * tree.Masses[no] * h_no_inv * h_no_inv # assumes uniform sphere geometry 783 | for i in range(N_rays): 784 | r_proj = r2 - z_ray[i] * z_ray[i] 785 | if r_proj < 0: 786 | continue 787 | r_proj = sqrt(r2 - z_ray[i] * z_ray[i]) 788 | q = r_proj * h_no_inv 789 | if r_proj < h_no: 790 | if r > h_no: # not overlapping the target point - integrate the whole cell 791 | if z_ray[i] < 0: 792 | continue # not on the ray 793 | columns[i] += fac * 2 * sqrt(1 - q * q) 794 | else: # overlapping, so need to integrate only a portion of the cell - this case includes the self-shielding if the point is in the tree! 795 | dz = z_ray[i] * h_no_inv 796 | columns[i] += fac * (dz + sqrt(1 - q * q)) 797 | 798 | no = tree.NextBranch[no] 799 | 800 | else: # we have a node, need to check if it intersects a ray 801 | node_intersects_ray = False 802 | R_eff = ( 803 | tree.Sizes[no] * 0.8660254037844386 + tree.Deltas[no] 804 | ) # effective search radius from center of mass 805 | for i in range(N_rays): 806 | if r < h + R_eff: # if node contains the origin then it must intersect all rays 807 | node_intersects_ray = True 808 | break 809 | elif (z_ray[i] > 0) and ( 810 | (r2 - z_ray[i] * z_ray[i]) < (tree.Softenings[no] + R_eff) * (tree.Softenings[no] + R_eff) 811 | ): # if perpendicular distance is less than node effective size 812 | node_intersects_ray = True 813 | break 814 | 815 | if node_intersects_ray: 816 | no = tree.FirstSubnode[no] # open the node 817 | else: 818 | no = tree.NextBranch[no] # no intersection with any way, so go to next node 819 | 820 | return columns 821 | 822 | 823 | @njit(fastmath=True) 824 | def ColumnDensityWalk_singleray(pos, ray, tree, no=-1): 825 | """Returns the integrated column density to infinity from pos, in the directions given by the rays argument 826 | 827 | Arguments: 828 | pos - (3,) array containing position of interest 829 | ray - (3,) array with the unit vector of the ray 830 | tree - octree object storing the tree structure 831 | 832 | Returns: 833 | columns - (N_rays,) array of column densities along directions given by rays 834 | 835 | Keyword arguments: 836 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 837 | """ 838 | if no < 0: 839 | no = tree.NumParticles # we default to the top-level node index 840 | 841 | column = 0 842 | dx = np.empty(3, dtype=np.float64) 843 | z_ray = 0 # perpendicular distances of elements to nearest point on rays 844 | fac_density = 3 / (4 * np.pi) 845 | 846 | while no > -1: 847 | r2 = 0 848 | for k in range(3): 849 | dx[k] = tree.Coordinates[no, k] - pos[k] 850 | r2 += dx[k] * dx[k] 851 | r = sqrt(r2) 852 | z_ray = ray[0] * dx[0] + ray[1] * dx[1] + ray[2] * dx[2] 853 | if r2 - z_ray * z_ray < 0: 854 | no = tree.NextBranch[no] 855 | continue 856 | h_no = tree.Softenings[no] 857 | h_no_inv = 1.0 / h_no 858 | h = h_no # max(h_no,softening) 859 | 860 | if no < tree.NumParticles: # if we're looking at a leaf/particle 861 | # add the particle's column if it's in the right direction 862 | fac = fac_density * tree.Masses[no] * h_no_inv * h_no_inv 863 | # assumes uniform sphere geometry 864 | r_proj = sqrt(r2 - z_ray * z_ray) 865 | q = r_proj * h_no_inv 866 | if r_proj < h_no: 867 | if r > h_no: # not overlapping the target point - integrate the whole cell 868 | if z_ray > 0: 869 | column += fac * 2 * sqrt(1 - q * q) 870 | else: # overlapping, so need to integrate only a portion of the cell - this case includes the self-shielding if the point is in the tree! 871 | dz = z_ray * h_no_inv 872 | column += fac * (dz + sqrt(1 - q * q)) 873 | no = tree.NextBranch[no] 874 | 875 | else: # we have a node, need to check if it intersects a ray 876 | node_intersects_ray = False 877 | R_eff = tree.Sizes[no] * 0.8660254037844386 + tree.Deltas[no] 878 | # effective search radius from center of mass 879 | if r < h + R_eff: 880 | # if node contains the origin then it must intersect all rays 881 | node_intersects_ray = True 882 | elif (z_ray > 0) and ( 883 | (r2 - z_ray * z_ray) < (tree.Softenings[no] + R_eff) * (tree.Softenings[no] + R_eff) 884 | ): # if perpendicular distance is less than node effective size 885 | node_intersects_ray = True 886 | 887 | if node_intersects_ray: 888 | no = tree.FirstSubnode[no] # open the node 889 | else: # no intersection with any way, so go to next node 890 | no = tree.NextBranch[no] 891 | return column 892 | 893 | 894 | @njit(fastmath=True) 895 | def ColumnDensityWalk_binned(pos, tree, theta=0.5, no=-1): 896 | """Returns the integrated column density to infinity from pos, in the directions given by the rays argument 897 | 898 | Arguments: 899 | pos - (3,) array containing position of interest 900 | tree - octree object storing the tree structure 901 | 902 | Returns: 903 | columns - shape (6,) array of average column densities in the 6 equal bins on the sphere 904 | 905 | Keyword arguments: 906 | no - index of the top-level node whose field is being summed - defaults to the global top-level node, can use a subnode in principle for e.g. parallelization 907 | """ 908 | if no < 0: 909 | no = tree.NumParticles # we default to the top-level node index 910 | 911 | n_bins = 6 912 | column = np.zeros(n_bins) 913 | dx = np.empty(3, dtype=np.float64) 914 | angular_bin_size = (4 * np.pi) / n_bins 915 | 916 | while no > -1: 917 | r2 = 0 918 | for k in range(3): 919 | dx[k] = tree.Coordinates[no, k] - pos[k] 920 | r2 += dx[k] * dx[k] 921 | 922 | h_no = tree.Softenings[no] 923 | h = h_no 924 | if no < tree.NumParticles: # if we're looking at a leaf/particle 925 | # add the particle's column if it's in the right direction 926 | bin = angular_bin(dx) 927 | if r2 > h * h: 928 | col_bin = tree.Masses[no] / r2 / angular_bin_size 929 | else: # interpolate between full overlap case and no overlap 930 | col0 = tree.Masses[no] * (3 / (4 * np.pi * h * h)) 931 | fac = sqrt(r2) / h # 0 to 1 when there is overlap 932 | col_bin = col0 * fac * 2 933 | col_isotropic = (1 - fac) * col0 934 | for k in range(n_bins): 935 | column[k] += col_isotropic 936 | column[bin] += col_bin 937 | no = tree.NextBranch[no] 938 | elif acceptance_criterion( 939 | sqrt(r2), h, tree.Sizes[no], tree.Deltas[no], theta 940 | ): # we can put the whole node in a bin 941 | column[angular_bin(dx)] += tree.Masses[no] / r2 / angular_bin_size 942 | no = tree.NextBranch[no] 943 | else: 944 | no = tree.FirstSubnode[no] 945 | return column 946 | 947 | 948 | def ColumnDensity_tree(pos_target, tree, rays=None, randomize_rays=False, theta=0.7): 949 | """Returns the column density integrated to infinity from pos_target along rays, given the mass distribution in an Octree 950 | 951 | Parameters 952 | ---------- 953 | pos_target: array_like 954 | shape (N,3) array of target particle positions where you want to know the potential. 955 | tree: Octree 956 | Octree instance initialized with the positions, masses, and softenings of the source particles. 957 | rays: array_like 958 | Shape (N_rays,3) array of ray direction unit vectors. If None then we instead compute average column densities in a 6-bin tesselation of the sphere. 959 | randomize_rays: bool, optional 960 | Randomly orients the raygrid for each particle. 961 | Randomly orients the raygrid for each particle. 962 | 963 | Randomly orients the raygrid for each particle. 964 | 965 | """ 966 | set_parallel_chunksize(10000) 967 | 968 | if rays is None: # do angular-binned column density 969 | result = empty((pos_target.shape[0], 6)) 970 | for i in prange(pos_target.shape[0]): 971 | result[i] = ColumnDensityWalk_binned(pos_target[i], tree, theta) 972 | elif randomize_rays: 973 | # use the multi-ray treewalk; more efficient 974 | result = empty((pos_target.shape[0], len(rays))) 975 | for i in prange(pos_target.shape[0]): 976 | rays_random = rays @ random_rotation(i) 977 | result[i] = ColumnDensityWalk_multiray(pos_target[i], rays_random, tree) 978 | else: 979 | result = empty((pos_target.shape[0], len(rays))) 980 | for i in range(rays.shape[0]): 981 | # outer loop over rays - empirically better access pattern 982 | for j in prange(pos_target.shape[0]): 983 | result[j, i] = ColumnDensityWalk_singleray(pos_target[j], rays[i], tree) 984 | return result 985 | 986 | 987 | ColumnDensity_tree_parallel = njit(ColumnDensity_tree, fastmath=True, parallel=True) 988 | ColumnDensity_tree = njit(ColumnDensity_tree, fastmath=True) 989 | --------------------------------------------------------------------------------