├── .coveragerc ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ └── tests.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── AUTHORS.rst ├── CMakeLists.txt ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── .gitignore ├── Makefile ├── _static │ ├── js │ │ └── analytics.js │ ├── notebooks │ │ ├── advanced-kernels.ipynb │ │ ├── bayesopt.ipynb │ │ ├── first.ipynb │ │ ├── hyper.ipynb │ │ ├── mixture.ipynb │ │ ├── model.ipynb │ │ ├── new-kernel.ipynb │ │ └── scaling.ipynb │ └── quickstart │ │ └── conditional.png ├── conf.py ├── hodlr.ipynb ├── index.rst ├── requirements.txt ├── tutorials │ ├── bayesopt.rst │ ├── bayesopt_files │ │ ├── bayesopt_4_0.png │ │ └── bayesopt_6_1.png │ ├── first.rst │ ├── first_files │ │ ├── first_12_0.png │ │ ├── first_4_0.png │ │ └── first_6_0.png │ ├── hyper.rst │ ├── hyper_files │ │ ├── hyper_12_0.png │ │ ├── hyper_18_0.png │ │ ├── hyper_18_1.png │ │ └── hyper_4_0.png │ ├── index.rst │ ├── mixture.rst │ ├── mixture_files │ │ ├── mixture_4_0.png │ │ └── mixture_8_0.png │ ├── model.rst │ ├── model_files │ │ ├── model_13_0.png │ │ ├── model_15_0.png │ │ ├── model_22_0.png │ │ ├── model_23_0.png │ │ └── model_5_0.png │ ├── new-kernel.rst │ ├── new-kernel_files │ │ ├── new-kernel_10_0.png │ │ └── new-kernel_8_0.png │ ├── scaling.rst │ ├── scaling_files │ │ └── scaling_16_0.png │ └── tutorial_rst.tpl └── user │ ├── gp.rst │ ├── index.rst │ ├── kernels.rst.template │ ├── modeling.rst │ ├── quickstart.rst │ └── solvers.rst ├── generate_kernels.py ├── kernels ├── Constant.yml ├── Cosine.yml ├── DotProduct.yml ├── Empty.yml ├── Exp.yml ├── ExpSine2.yml ├── ExpSquared.yml ├── Linear.yml ├── LocalGaussian.yml ├── Matern32.yml ├── Matern52.yml ├── MaternGeneral.yml.example ├── Polynomial.yml └── RationalQuadratic.yml ├── paper ├── .gitignore ├── Makefile ├── aasjournal.bst ├── aastex61.cls ├── george.bib └── ms.tex ├── pyproject.toml ├── scripts └── compile_kernels.py ├── src └── george │ ├── __init__.py │ ├── gp.py │ ├── include │ └── george │ │ ├── exceptions.h │ │ ├── george.h │ │ ├── hodlr.h │ │ ├── kernels.h │ │ ├── metrics.h │ │ ├── parser.h │ │ └── subspace.h │ ├── kernel_interface.cpp │ ├── kernels.py │ ├── metrics.py │ ├── modeling.py │ ├── solvers │ ├── __init__.py │ ├── _hodlr.cpp │ ├── basic.py │ ├── hodlr.py │ └── trivial.py │ └── utils.py ├── templates ├── kernels.h ├── kernels.py └── parser.h └── tests ├── test_gp.py ├── test_kernels.py ├── test_metrics.py ├── test_modeling.py ├── test_pickle.py ├── test_solvers.py └── test_tutorial.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | docs/* 4 | *__init__* 5 | relative_files = True 6 | 7 | [report] 8 | exclude_lines = 9 | pragma: no cover 10 | raise NotImplementedError 11 | raise ImportError 12 | except ImportError 13 | def __len__ 14 | def __repr__ 15 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | vendor/* linguist-vendored 2 | paper/* linguist-documentation 3 | *.h linguist-language=C++ 4 | *.ipynb filter=nbstripout 5 | *.ipynb diff=ipynb 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | branches: 5 | - main 6 | tags: 7 | - "*" 8 | pull_request: 9 | branches: 10 | - main 11 | workflow_dispatch: 12 | inputs: 13 | prerelease: 14 | description: "Run a pre-release, testing the build" 15 | required: false 16 | type: boolean 17 | default: false 18 | 19 | jobs: 20 | tests: 21 | name: "py${{ matrix.python-version }} / ${{ matrix.os }}" 22 | runs-on: ${{ matrix.os }} 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | python-version: ["3.9", "3.10", "3.11"] 27 | os: [ubuntu-latest] 28 | include: 29 | - python-version: "3.10" 30 | os: macos-latest 31 | - python-version: "3.10" 32 | os: windows-latest 33 | steps: 34 | - name: Clone the repo 35 | uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | submodules: recursive 39 | - name: Set up Python ${{ matrix.python-version }} 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: ${{ matrix.python-version }} 43 | - name: Install dependencies 44 | run: | 45 | python -m pip install -U pip pytest 46 | python -m pip install . 47 | - name: Run tests 48 | run: python -m pytest -v tests 49 | 50 | build_wheels: 51 | runs-on: ${{ matrix.os }} 52 | strategy: 53 | matrix: 54 | os: 55 | - "ubuntu-22.04" 56 | - "macos-14" 57 | - "windows-latest" 58 | steps: 59 | - uses: actions/checkout@v4 60 | with: 61 | submodules: true 62 | fetch-depth: 0 63 | - uses: pypa/cibuildwheel@v2.23.3 64 | - uses: actions/upload-artifact@v4 65 | with: 66 | name: binary-${{ matrix.os }} 67 | path: ./wheelhouse/*.whl 68 | 69 | build_sdist: 70 | runs-on: ubuntu-latest 71 | steps: 72 | - uses: actions/checkout@v4 73 | with: 74 | submodules: true 75 | fetch-depth: 0 76 | - uses: actions/setup-python@v5 77 | name: Install Python 78 | with: 79 | python-version: "3.10" 80 | - name: Install dependencies 81 | run: | 82 | python -m pip install -U pip 83 | python -m pip install -U build twine 84 | - name: Build sdist 85 | run: python -m build --sdist . 86 | - name: Check the sdist 87 | run: python -m twine check dist/*.tar.gz 88 | - name: Check sdist build 89 | run: | 90 | python -m pip install dist/*.tar.gz 91 | python -c "import george" 92 | - uses: actions/upload-artifact@v4 93 | with: 94 | name: sdist 95 | path: dist/*.tar.gz 96 | 97 | upload_pypi: 98 | environment: 99 | name: pypi 100 | url: https://pypi.org/p/george 101 | permissions: 102 | id-token: write 103 | needs: [build_wheels, build_sdist] 104 | runs-on: ubuntu-latest 105 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') 106 | steps: 107 | - uses: actions/download-artifact@v4 108 | with: 109 | path: dist 110 | merge-multiple: true 111 | - uses: pypa/gh-action-pypi-publish@v1.12.4 112 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | *.o 4 | *.so 5 | *.png 6 | *.gif 7 | dist 8 | *.egg-info 9 | .syntastic_c_config 10 | .syntastic_cpp_config 11 | build 12 | *.cpp 13 | .coverage 14 | src/george/george_version.py 15 | kernels/MyLocalGaussian.yml 16 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/eigen"] 2 | path = vendor/eigen 3 | url = https://gitlab.com/libeigen/eigen.git 4 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | submodules: 4 | include: all 5 | 6 | build: 7 | os: ubuntu-20.04 8 | tools: 9 | python: "3.10" 10 | 11 | python: 12 | install: 13 | - method: pip 14 | path: . 15 | - requirements: docs/requirements.txt 16 | 17 | sphinx: 18 | builder: dirhtml 19 | configuration: docs/conf.py 20 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | George is being developed by `Dan Foreman-Mackey (@dfm) 2 | `_ with many contributions from: 3 | 4 | - `Ruth Angus (@RuthAngus) `_ 5 | - `Jonah Bernhard (@jbernhard) `_ 6 | - `Miguel de Val-Borro (@migueldvb) `_ 7 | - `Gregory Hitz (@hitzg) `_ 8 | - `Stephan Hoyer (@shoyer) `_ 9 | - `Simon Walker (@mindriot101) `_ 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.15...3.26) 2 | project(${SKBUILD_PROJECT_NAME} LANGUAGES CXX) 3 | 4 | set(PYBIND11_NEWPYTHON ON) 5 | find_package(pybind11 CONFIG REQUIRED) 6 | 7 | include_directories( 8 | "src/george/include" 9 | "vendor/eigen" 10 | ) 11 | 12 | pybind11_add_module(kernel_interface "src/george/kernel_interface.cpp") 13 | install(TARGETS kernel_interface LIBRARY DESTINATION .) 14 | 15 | pybind11_add_module(_hodlr "src/george/solvers/_hodlr.cpp") 16 | install(TARGETS _hodlr LIBRARY DESTINATION ./solvers) 17 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | 0.4.0 (2021-02-15) 4 | ++++++++++++++++++ 5 | 6 | - Updated packaging and installation issues 7 | - Updated CI and Python version testing 8 | 9 | 0.3.1 (2018-01-08) 10 | ++++++++++++++++++ 11 | 12 | - Fixed issue where george would not compile with GCC<4.9 because of a 13 | compiler bug 14 | - Fixed bug when re-using HODLR factorizations 15 | 16 | 0.3.0 (2017-07-12) 17 | ++++++++++++++++++ 18 | 19 | - New modeling protocol for parameter fitting 20 | - Rewritten HODLR algorithm - more stable and better performance 21 | - New interface for "easily" implementing new kernel functions 22 | 23 | pre-0.3 24 | +++++++ 25 | 26 | - This project has had a long and tumultuous history 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2021 Daniel Foreman-Mackey and contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | 2 | graft vendor/eigen/Eigen 3 | 4 | exclude .* 5 | prune .github 6 | prune docs 7 | prune paper 8 | prune kernels 9 | prune templates 10 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | George 2 | ====== 3 | 4 | **Fast and flexible Gaussian Process regression in Python.** 5 | 6 | .. image:: https://img.shields.io/badge/GitHub-dfm%2Fgeorge-blue.svg?style=flat 7 | :target: https://github.com/dfm/george 8 | .. image:: http://img.shields.io/badge/license-MIT-blue.svg?style=flat 9 | :target: https://github.com/dfm/george/blob/main/LICENSE 10 | .. image:: https://github.com/dfm/george/workflows/Tests/badge.svg?style=flat 11 | :target: https://github.com/dfm/george/actions?query=workflow%3ATests 12 | .. image:: https://coveralls.io/repos/github/dfm/george/badge.svg?branch=main&style=flat 13 | :target: https://coveralls.io/github/dfm/george?branch=main 14 | .. image:: https://readthedocs.org/projects/george/badge/?version=latest 15 | :target: http://george.readthedocs.io/en/latest/?badge=latest 16 | 17 | Read the documentation at: `george.readthedocs.io `_. 18 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | !*.png 3 | user/kernels.rst 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | default: dirhtml 18 | TUTORIALS = tutorials/hyper.rst tutorials/model.rst tutorials/new-kernel.rst \ 19 | tutorials/first.rst tutorials/scaling.rst tutorials/mixture.rst \ 20 | tutorials/bayesopt.rst 21 | 22 | tutorials/%.rst: _static/notebooks/%.ipynb 23 | jupyter nbconvert --template tutorials/tutorial_rst --to rst $< --output-dir tutorials 24 | 25 | .PHONY: clean 26 | clean: 27 | rm -rf $(BUILDDIR)/* $(TUTORIALS) 28 | 29 | .PHONY: html 30 | html: $(TUTORIALS) 31 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 32 | @echo 33 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 34 | 35 | .PHONY: dirhtml 36 | dirhtml: $(TUTORIALS) 37 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 38 | @echo 39 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 40 | 41 | .PHONY: singlehtml 42 | singlehtml: $(TUTORIALS) 43 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 44 | @echo 45 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 46 | -------------------------------------------------------------------------------- /docs/_static/js/analytics.js: -------------------------------------------------------------------------------- 1 | (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ 2 | (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), 3 | m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) 4 | })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); 5 | 6 | ga('create', 'UA-22909046-1', 'auto'); 7 | ga('require', 'displayfeatures'); 8 | ga('send', 'pageview'); 9 | -------------------------------------------------------------------------------- /docs/_static/quickstart/conditional.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/_static/quickstart/conditional.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import glob 5 | import yaml 6 | import george 7 | 8 | # Inject the kernel docs 9 | d = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 10 | with open(os.path.join(d, "docs", "user", "kernels.rst.template"), "r") as f: 11 | TEMPLATE = f.read() 12 | 13 | fns = glob.glob(os.path.join(d, "kernels", "*.yml")) 14 | if len(fns): 15 | specs = [] 16 | for i, fn in enumerate(fns): 17 | with open(fn, "r") as f: 18 | specs.append(yaml.safe_load(f.read())) 19 | tokens = [] 20 | for spec in specs: 21 | if spec["stationary"]: 22 | tokens += [ 23 | ".. autoclass:: george.kernels.{0}".format(spec["name"]) 24 | ] 25 | TEMPLATE = TEMPLATE.replace("STATIONARYKERNELS", "\n".join(tokens)) 26 | tokens = [] 27 | for spec in specs: 28 | if not spec["stationary"]: 29 | tokens += [ 30 | ".. autoclass:: george.kernels.{0}".format(spec["name"]) 31 | ] 32 | TEMPLATE = TEMPLATE.replace("OTHERKERNELS", "\n".join(tokens)) 33 | 34 | with open(os.path.join(d, "docs", "user", "kernels.rst"), "w") as f: 35 | f.write(TEMPLATE) 36 | 37 | extensions = [ 38 | "sphinx.ext.autodoc", 39 | "sphinx.ext.napoleon", 40 | "sphinx.ext.mathjax", 41 | ] 42 | templates_path = ["_templates"] 43 | source_suffix = ".rst" 44 | master_doc = "index" 45 | 46 | # General information about the project. 47 | project = "george" 48 | copyright = "2012-2023 Dan Foreman-Mackey" 49 | 50 | version = george.__version__ 51 | release = george.__version__ 52 | 53 | exclude_patterns = ["_build"] 54 | pygments_style = "sphinx" 55 | 56 | # Readthedocs. 57 | html_theme = "pydata_sphinx_theme" 58 | html_title = "george" 59 | htmp_theme_options = dict( 60 | analytics_id="analytics_id", 61 | ) 62 | # html_context = dict( 63 | # display_github=True, 64 | # github_user="dfm", 65 | # github_repo="george", 66 | # github_version="main", 67 | # conf_py_path="/docs/", 68 | # ) 69 | html_static_path = ["_static"] 70 | # html_show_sourcelink = False 71 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | George 2 | ====== 3 | 4 | George is a fast and flexible Python library for Gaussian Process (GP) 5 | Regression. A full introduction to the theory of Gaussian Processes is beyond 6 | the scope of this documentation but the best resource is available for free 7 | online: `Rasmussen & Williams (2006) `_. 8 | 9 | Unlike some other GP implementations, george is focused on efficiently 10 | evaluating the marginalized likelihood of a dataset under a GP prior, even as 11 | this dataset gets Big™. As you'll see in these pages of documentation, the 12 | module exposes quite a few other features but it is designed to be used 13 | alongside your favorite `non-linear optimization 14 | `_ 15 | or `posterior inference `_ library for the best results. 16 | 17 | George is being actively developed in `a public repository on GitHub 18 | `_ so if you have any trouble, `open an issue 19 | `_ there. 20 | 21 | .. image:: https://img.shields.io/badge/GitHub-dfm%2Fgeorge-blue.svg?style=flat 22 | :target: https://github.com/dfm/george 23 | .. image:: http://img.shields.io/badge/license-MIT-blue.svg?style=flat 24 | :target: https://github.com/dfm/george/blob/main/LICENSE 25 | .. image:: https://github.com/dfm/george/workflows/Tests/badge.svg?style=flat 26 | :target: https://github.com/dfm/george/actions?query=workflow%3ATests 27 | .. image:: https://coveralls.io/repos/github/dfm/george/badge.svg?branch=main&style=flat 28 | :target: https://coveralls.io/github/dfm/george?branch=main 29 | 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | 34 | user/index 35 | tutorials/index 36 | 37 | 38 | Contributors 39 | ------------ 40 | 41 | .. include:: ../AUTHORS.rst 42 | 43 | 44 | License & Attribution 45 | --------------------- 46 | 47 | Copyright 2012-2023 Daniel Foreman-Mackey and contributors. 48 | 49 | George is being developed by `Dan Foreman-Mackey `_ in a 50 | `public GitHub repository `_. 51 | The source code is made available under the terms of the MIT license. 52 | 53 | If you make use of this code, please cite `the paper which is in 54 | IEEE Transactions on Pattern Analysis and Machine Intelligence 55 | `_: 56 | 57 | .. code-block:: tex 58 | 59 | @ARTICLE{2015ITPAM..38..252A, 60 | author = {{Ambikasaran}, Sivaram and {Foreman-Mackey}, Daniel and {Greengard}, Leslie and {Hogg}, David W. and {O'Neil}, Michael}, 61 | title = "{Fast Direct Methods for Gaussian Processes}", 62 | journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, 63 | keywords = {Mathematics - Numerical Analysis, Astrophysics - Instrumentation and Methods for Astrophysics, Mathematics - Statistics Theory, Mathematics - Numerical Analysis, Astrophysics - Instrumentation and Methods for Astrophysics, Mathematics - Statistics Theory}, 64 | year = 2015, 65 | month = jun, 66 | volume = {38}, 67 | pages = {252}, 68 | doi = {10.1109/TPAMI.2015.2448083}, 69 | archivePrefix = {arXiv}, 70 | eprint = {1403.6015}, 71 | primaryClass = {math.NA}, 72 | adsurl = {https://ui.adsabs.harvard.edu/abs/2015ITPAM..38..252A}, 73 | adsnote = {Provided by the SAO/NASA Astrophysics Data System} 74 | } 75 | 76 | 77 | Changelog 78 | --------- 79 | 80 | .. include:: ../HISTORY.rst 81 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | pydata-sphinx-theme 3 | -------------------------------------------------------------------------------- /docs/tutorials/bayesopt.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/bayesopt.ipynb>`_. 3 | 4 | .. _bayesopt: 5 | 6 | 7 | Bayesian optimization 8 | ===================== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | In this tutorial, we'll show a very simple example of implementing 27 | "Bayesian optimization" using george. Now's not the time to get into a 28 | discussion of the issues with the name given to these methods, but I 29 | think that the "Bayesian" part of the title comes from the fact that the 30 | method relies on the (prior) assumption that the objective function is 31 | smooth. The basic idea is that you can reduce the number of function 32 | evaluations needed to minimize a black-box function by using a GP as a 33 | surrogate model. This can be huge if evaluating your model is 34 | computationally expensive. I think that the classic reference is `Jones 35 | et al. (1998) `__ and the 36 | example here will look a bit like their section 4.1. 37 | 38 | First, we'll define the scalar objective, parametrized by :math:`\theta`, 39 | that we want to minimize in the range :math:`-5 \le \theta \le 5`. 40 | 41 | .. code:: python 42 | 43 | import numpy as np 44 | import matplotlib.pyplot as plt 45 | 46 | def objective(theta): 47 | return -0.5 * np.exp(-0.5*(theta - 2)**2) - 0.5 * np.exp(-0.5 * (theta + 2.1)**2 / 5) + 0.3 48 | 49 | t = np.linspace(-5, 5, 5000) 50 | 51 | plt.figure(figsize=(8, 5)) 52 | plt.plot(t, objective(t)) 53 | plt.ylim(-0.37, 0.37) 54 | plt.xlim(-5, 5) 55 | plt.xlabel("$\\theta$") 56 | plt.ylabel("objective"); 57 | 58 | 59 | 60 | .. image:: bayesopt_files/bayesopt_4_0.png 61 | 62 | 63 | Now, for the "Bayesian" optimization, the basic procedure that we'll 64 | follow is: 65 | 66 | 1. Start by evaluating the model at a set of points. In this case, we'll 67 | start with a uniform grid in :math:`\theta`. 68 | 2. Fit a GP (optimize the hyperparameters) to the set of training 69 | points. 70 | 3. Find the input coordinate that maximizes the "expected improvement" 71 | (see Section 4 of Jones+ 1998). For simplicity, we simply use a grid 72 | search to maximize this, but this should probably be a numerical 73 | optimization in any real application of this method. 74 | 4. At this new coordinate, run the model and add this as a new training 75 | point. 76 | 5. Return to step 2 until converged. We'll judge convergence using 77 | relative changes in the location of the minimum. 78 | 79 | .. code:: python 80 | 81 | from george import kernels 82 | from scipy.special import erf 83 | from scipy.optimize import minimize 84 | 85 | N_init = 4 86 | train_theta = np.linspace(-5, 5, N_init + 1)[1:] 87 | train_theta -= 0.5 * (train_theta[1] - train_theta[0]) 88 | train_f = objective(train_theta) 89 | 90 | gp = george.GP(np.var(train_f) * kernels.Matern52Kernel(3.0), 91 | fit_mean=True) 92 | gp.compute(train_theta) 93 | 94 | def nll(params): 95 | gp.set_parameter_vector(params) 96 | g = gp.grad_log_likelihood(train_f, quiet=True) 97 | return -gp.log_likelihood(train_f, quiet=True), -g 98 | 99 | fig, axes = plt.subplots(2, 2, figsize=(8, 6)) 100 | j = 0 101 | old_min = None 102 | converged = False 103 | 104 | for i in range(1000): 105 | # Update the GP parameters 106 | soln = minimize(nll, gp.get_parameter_vector(), jac=True) 107 | 108 | # Compute the acquisition function 109 | mu, var = gp.predict(train_f, t, return_var=True) 110 | std = np.sqrt(var) 111 | f_min = np.min(train_f) 112 | chi = (f_min - mu) / std 113 | Phi = 0.5 * (1.0 + erf(chi / np.sqrt(2))) 114 | phi = np.exp(-0.5 * chi**2) / np.sqrt(2*np.pi*var) 115 | A_ei = (f_min - mu) * Phi + var * phi 116 | A_max = t[np.argmax(A_ei)] 117 | 118 | # Add a new point 119 | train_theta = np.append(train_theta, A_max) 120 | train_f = np.append(train_f, objective(train_theta[-1])) 121 | gp.compute(train_theta) 122 | 123 | # Estimate the minimum - I'm sure that there's a better way! 124 | i_min = np.argmin(mu) 125 | sl = slice(max(0, i_min - 1), min(len(t), i_min + 2)) 126 | ts = t[sl] 127 | D = np.vander(np.arange(len(ts)).astype(float)) 128 | w = np.linalg.solve(D, mu[sl]) 129 | minimum = ts[0] + (ts[1] - ts[0]) * np.roots(np.polyder(w[::-1])) 130 | 131 | # Check convergence 132 | if i > 0 and np.abs((old_min - minimum) / minimum) < 1e-5: 133 | converged = True 134 | old_min = float(minimum[0]) 135 | 136 | # Make the plots 137 | if converged or i in [0, 1, 2]: 138 | ax = axes.flat[j] 139 | j += 1 140 | ax.plot(t, objective(t)) 141 | ax.plot(t, mu, "k") 142 | ax.plot(train_theta[:-1], train_f[:-1], "or") 143 | ax.plot(train_theta[-1], train_f[-1], "og") 144 | ax.fill_between(t, mu+std, mu-std, color="k", alpha=0.1) 145 | if i <= 3: 146 | ax2 = ax.twinx() 147 | ax2.plot(t, A_ei, "g", lw=0.75) 148 | ax2.set_yticks([]) 149 | ax.axvline(old_min, color="k", lw=0.75) 150 | ax.set_ylim(-0.37, 0.37) 151 | ax.set_xlim(-5, 5) 152 | ax.set_yticklabels([]) 153 | ax.annotate("step {0}; {1:.3f}".format(i+1, old_min), xy=(0, 1), 154 | xycoords="axes fraction", ha="left", va="top", 155 | xytext=(5, -5), textcoords="offset points", 156 | fontsize=14) 157 | 158 | if converged: 159 | break 160 | 161 | plt.tight_layout() 162 | 163 | print("{0} model evaluations".format(len(train_f))) 164 | 165 | 166 | .. parsed-literal:: 167 | 168 | 10 model evaluations 169 | 170 | 171 | 172 | .. image:: bayesopt_files/bayesopt_6_1.png 173 | 174 | 175 | There's a lot going on in these plots. Each panel shows the results 176 | after a certain iteration (indicated in the top left corner of the 177 | panel). In each panel: 178 | 179 | 1. The blue line is the true objective function. 180 | 2. The black line and gray contours indicate the current estimate of the 181 | objective using the GP model. 182 | 3. The green line is the expected improvement. 183 | 4. The red points are the training set. 184 | 5. The green point is the new point that was added at this step. 185 | 6. The vertical black line is the current estimate of the location 186 | minimum. This is also indicated in the top left corner of the panel. 187 | 188 | As you can see, only 10 model evaluations (including the original 189 | training set) were needed to converge to the correct minimum. In this 190 | simple example, there are certainly other methods that could have easily 191 | been used to minimize this function, but you can imagine that this 192 | method could be useful for cases where ``objective`` is very expensive 193 | to compute. 194 | 195 | -------------------------------------------------------------------------------- /docs/tutorials/bayesopt_files/bayesopt_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/bayesopt_files/bayesopt_4_0.png -------------------------------------------------------------------------------- /docs/tutorials/bayesopt_files/bayesopt_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/bayesopt_files/bayesopt_6_1.png -------------------------------------------------------------------------------- /docs/tutorials/first.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/first.ipynb>`_. 3 | 4 | .. _first: 5 | 6 | 7 | A gentle introduction to Gaussian Process Regression 8 | ==================================================== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | We'll start by generating some fake data (from a sinusoidal model) with 27 | error bars: 28 | 29 | .. code:: python 30 | 31 | import numpy as np 32 | import matplotlib.pyplot as pl 33 | 34 | np.random.seed(1234) 35 | x = 10 * np.sort(np.random.rand(15)) 36 | yerr = 0.2 * np.ones_like(x) 37 | y = np.sin(x) + yerr * np.random.randn(len(x)) 38 | 39 | pl.errorbar(x, y, yerr=yerr, fmt=".k", capsize=0) 40 | pl.xlim(0, 10) 41 | pl.ylim(-1.45, 1.45) 42 | pl.xlabel("x") 43 | pl.ylabel("y"); 44 | 45 | 46 | 47 | .. image:: first_files/first_4_0.png 48 | 49 | 50 | Now, we'll choose a kernel (covariance) function to model these data, 51 | assume a zero mean model, and predict the function values across the 52 | full range. The full kernel specification language is documented 53 | `here <../../user/kernels/>`__ but here's an example for this dataset: 54 | 55 | .. code:: python 56 | 57 | from george import kernels 58 | 59 | kernel = np.var(y) * kernels.ExpSquaredKernel(0.5) 60 | gp = george.GP(kernel) 61 | gp.compute(x, yerr) 62 | 63 | x_pred = np.linspace(0, 10, 500) 64 | pred, pred_var = gp.predict(y, x_pred, return_var=True) 65 | 66 | pl.fill_between(x_pred, pred - np.sqrt(pred_var), pred + np.sqrt(pred_var), 67 | color="k", alpha=0.2) 68 | pl.plot(x_pred, pred, "k", lw=1.5, alpha=0.5) 69 | pl.errorbar(x, y, yerr=yerr, fmt=".k", capsize=0) 70 | pl.plot(x_pred, np.sin(x_pred), "--g") 71 | pl.xlim(0, 10) 72 | pl.ylim(-1.45, 1.45) 73 | pl.xlabel("x") 74 | pl.ylabel("y"); 75 | 76 | 77 | 78 | .. image:: first_files/first_6_0.png 79 | 80 | 81 | The ``gp`` model provides a handler for computing the marginalized 82 | likelihood of the data under this model: 83 | 84 | .. code:: python 85 | 86 | print("Initial ln-likelihood: {0:.2f}".format(gp.log_likelihood(y))) 87 | 88 | 89 | .. parsed-literal:: 90 | 91 | Initial ln-likelihood: -11.82 92 | 93 | 94 | So we can use this—combined with scipy's 95 | `minimize `__ 96 | function—to fit for the maximum likelihood parameters: 97 | 98 | .. code:: python 99 | 100 | from scipy.optimize import minimize 101 | 102 | def neg_ln_like(p): 103 | gp.set_parameter_vector(p) 104 | return -gp.log_likelihood(y) 105 | 106 | def grad_neg_ln_like(p): 107 | gp.set_parameter_vector(p) 108 | return -gp.grad_log_likelihood(y) 109 | 110 | result = minimize(neg_ln_like, gp.get_parameter_vector(), jac=grad_neg_ln_like) 111 | print(result) 112 | 113 | gp.set_parameter_vector(result.x) 114 | print("\nFinal ln-likelihood: {0:.2f}".format(gp.log_likelihood(y))) 115 | 116 | 117 | .. parsed-literal:: 118 | 119 | fun: 9.225282556043894 120 | hess_inv: array([[ 0.52320809, 0.30041273], 121 | [ 0.30041273, 0.40708074]]) 122 | jac: array([ -5.07047669e-06, 2.56077806e-06]) 123 | message: 'Optimization terminated successfully.' 124 | nfev: 10 125 | nit: 8 126 | njev: 10 127 | status: 0 128 | success: True 129 | x: array([-0.48730733, 0.60407551]) 130 | 131 | Final ln-likelihood: -9.23 132 | 133 | 134 | And plot the maximum likelihood model: 135 | 136 | .. code:: python 137 | 138 | pred, pred_var = gp.predict(y, x_pred, return_var=True) 139 | 140 | pl.fill_between(x_pred, pred - np.sqrt(pred_var), pred + np.sqrt(pred_var), 141 | color="k", alpha=0.2) 142 | pl.plot(x_pred, pred, "k", lw=1.5, alpha=0.5) 143 | pl.errorbar(x, y, yerr=yerr, fmt=".k", capsize=0) 144 | pl.plot(x_pred, np.sin(x_pred), "--g") 145 | pl.xlim(0, 10) 146 | pl.ylim(-1.45, 1.45) 147 | pl.xlabel("x") 148 | pl.ylabel("y"); 149 | 150 | 151 | 152 | .. image:: first_files/first_12_0.png 153 | 154 | 155 | And there you have it! Read on to see what else you can do with george 156 | or just dive right into your own problem. 157 | 158 | Finally, don't forget `Rasmussen & 159 | Williams `__, the reference for 160 | everything Gaussian Process. 161 | 162 | -------------------------------------------------------------------------------- /docs/tutorials/first_files/first_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/first_files/first_12_0.png -------------------------------------------------------------------------------- /docs/tutorials/first_files/first_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/first_files/first_4_0.png -------------------------------------------------------------------------------- /docs/tutorials/first_files/first_6_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/first_files/first_6_0.png -------------------------------------------------------------------------------- /docs/tutorials/hyper.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/hyper.ipynb>`_. 3 | 4 | .. _hyper: 5 | 6 | 7 | Hyperparameter optimization 8 | =========================== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | In this tutorial, we’ll reproduce the analysis for Figure 5.6 in 27 | `Chapter 5 of Rasmussen & Williams 28 | (R&W) `__. The 29 | data are measurements of the atmospheric CO2 concentration made at Mauna 30 | Loa, Hawaii (Keeling & Whorf 2004). The dataset is said to be available 31 | online but I couldn’t seem to download it from the original source. 32 | Luckily the `statsmodels `__ 33 | package `includes a 34 | copy `__ 35 | that we can load as follows: 36 | 37 | .. code:: python 38 | 39 | import numpy as np 40 | import matplotlib.pyplot as pl 41 | from statsmodels.datasets import co2 42 | 43 | data = co2.load_pandas().data 44 | t = 2000 + (np.array(data.index.to_julian_date()) - 2451545.0) / 365.25 45 | y = np.array(data.co2) 46 | m = np.isfinite(t) & np.isfinite(y) & (t < 1996) 47 | t, y = t[m][::4], y[m][::4] 48 | 49 | pl.plot(t, y, ".k") 50 | pl.xlim(t.min(), t.max()) 51 | pl.xlabel("year") 52 | pl.ylabel("CO$_2$ in ppm"); 53 | 54 | 55 | 56 | .. image:: hyper_files/hyper_4_0.png 57 | 58 | 59 | In this figure, you can see that there is periodic (or quasi-periodic) 60 | signal with a year-long period superimposed on a long term trend. We 61 | will follow R&W and model these effects non-parametrically using a 62 | complicated covariance function. The covariance function that we’ll use 63 | is: 64 | 65 | .. math:: k(r) = k_1(r) + k_2(r) + k_3(r) + k_4(r) 66 | 67 | where 68 | 69 | .. math:: 70 | 71 | 72 | \begin{eqnarray} 73 | k_1(r) &=& \theta_1^2 \, \exp \left(-\frac{r^2}{2\,\theta_2} \right) \\ 74 | k_2(r) &=& \theta_3^2 \, \exp \left(-\frac{r^2}{2\,\theta_4} 75 | -\theta_5\,\sin^2\left( 76 | \frac{\pi\,r}{\theta_6}\right) 77 | \right) \\ 78 | k_3(r) &=& \theta_7^2 \, \left [ 1 + \frac{r^2}{2\,\theta_8\,\theta_9} 79 | \right ]^{-\theta_8} \\ 80 | k_4(r) &=& \theta_{10}^2 \, \exp \left(-\frac{r^2}{2\,\theta_{11}} \right) 81 | + \theta_{12}^2\,\delta_{ij} 82 | \end{eqnarray} 83 | 84 | We can implement this kernel in George as follows (we'll use the R&W 85 | results as the hyperparameters for now): 86 | 87 | .. code:: python 88 | 89 | from george import kernels 90 | 91 | k1 = 66**2 * kernels.ExpSquaredKernel(metric=67**2) 92 | k2 = 2.4**2 * kernels.ExpSquaredKernel(90**2) * kernels.ExpSine2Kernel(gamma=2/1.3**2, log_period=0.0) 93 | k3 = 0.66**2 * kernels.RationalQuadraticKernel(log_alpha=np.log(0.78), metric=1.2**2) 94 | k4 = 0.18**2 * kernels.ExpSquaredKernel(1.6**2) 95 | kernel = k1 + k2 + k3 + k4 96 | 97 | Optimization 98 | ------------ 99 | 100 | If we want to find the "best-fit" hyperparameters, we should *optimize* 101 | an objective function. The two standard functions (as described in 102 | Chapter 5 of R&W) are the marginalized ln-likelihood and the cross 103 | validation likelihood. George implements the former in the 104 | ``GP.lnlikelihood`` function and the gradient with respect to the 105 | hyperparameters in the ``GP.grad_lnlikelihood`` function: 106 | 107 | .. code:: python 108 | 109 | import george 110 | gp = george.GP(kernel, mean=np.mean(y), fit_mean=True, 111 | white_noise=np.log(0.19**2), fit_white_noise=True) 112 | gp.compute(t) 113 | print(gp.log_likelihood(y)) 114 | print(gp.grad_log_likelihood(y)) 115 | 116 | 117 | .. parsed-literal:: 118 | 119 | -608.938634447 120 | [ 1.20325990e-02 5.65011826e+02 4.94824823e-02 -1.20640197e+00 121 | 3.33835864e+00 2.43343814e-01 7.10415081e+00 -5.38311429e+03 122 | 1.96801995e+01 -2.05383850e+01 -5.36816015e+01 -1.32619973e-02 123 | -6.14334711e-03] 124 | 125 | 126 | We'll use a gradient based optimization routine from SciPy to fit this 127 | model as follows: 128 | 129 | .. code:: python 130 | 131 | import scipy.optimize as op 132 | 133 | # Define the objective function (negative log-likelihood in this case). 134 | def nll(p): 135 | gp.set_parameter_vector(p) 136 | ll = gp.log_likelihood(y, quiet=True) 137 | return -ll if np.isfinite(ll) else 1e25 138 | 139 | # And the gradient of the objective function. 140 | def grad_nll(p): 141 | gp.set_parameter_vector(p) 142 | return -gp.grad_log_likelihood(y, quiet=True) 143 | 144 | # You need to compute the GP once before starting the optimization. 145 | gp.compute(t) 146 | 147 | # Print the initial ln-likelihood. 148 | print(gp.log_likelihood(y)) 149 | 150 | # Run the optimization routine. 151 | p0 = gp.get_parameter_vector() 152 | results = op.minimize(nll, p0, jac=grad_nll, method="L-BFGS-B") 153 | 154 | # Update the kernel and print the final log-likelihood. 155 | gp.set_parameter_vector(results.x) 156 | print(gp.log_likelihood(y)) 157 | 158 | 159 | .. parsed-literal:: 160 | 161 | -608.938634447 162 | -299.16486543 163 | 164 | 165 | **Warning:** *An optimization code something like this should work on 166 | most problems but the results can be very sensitive to your choice of 167 | initialization and algorithm. If the results are nonsense, try choosing 168 | a better initial guess or try a different value of the ``method`` 169 | parameter in ``op.minimize``.* 170 | 171 | We can plot our prediction of the CO2 concentration into the future 172 | using our optimized Gaussian process model by running: 173 | 174 | .. code:: python 175 | 176 | x = np.linspace(max(t), 2025, 2000) 177 | mu, var = gp.predict(y, x, return_var=True) 178 | std = np.sqrt(var) 179 | 180 | pl.plot(t, y, ".k") 181 | pl.fill_between(x, mu+std, mu-std, color="g", alpha=0.5) 182 | 183 | pl.xlim(t.min(), 2025) 184 | pl.xlabel("year") 185 | pl.ylabel("CO$_2$ in ppm"); 186 | 187 | 188 | 189 | .. image:: hyper_files/hyper_12_0.png 190 | 191 | 192 | Sampling & Marginalization 193 | -------------------------- 194 | 195 | The prediction made in the previous section take into account 196 | uncertainties due to the fact that a Gaussian process is stochastic but 197 | it doesn’t take into account any uncertainties in the values of the 198 | hyperparameters. This won’t matter if the hyperparameters are very well 199 | constrained by the data but in this case, many of the parameters are 200 | actually poorly constrained. To take this effect into account, we can 201 | apply prior probability functions to the hyperparameters and marginalize 202 | using Markov chain Monte Carlo (MCMC). To do this, we’ll use the 203 | `emcee `__ package. 204 | 205 | First, we define the probabilistic model: 206 | 207 | .. code:: python 208 | 209 | def lnprob(p): 210 | # Trivial uniform prior. 211 | if np.any((-100 > p[1:]) + (p[1:] > 100)): 212 | return -np.inf 213 | 214 | # Update the kernel and compute the lnlikelihood. 215 | gp.set_parameter_vector(p) 216 | return gp.lnlikelihood(y, quiet=True) 217 | 218 | In this function, we’ve applied a prior on every parameter that is 219 | uniform between -100 and 100 for every parameter. In real life, you 220 | should probably use something more intelligent but this will work for 221 | this problem. The quiet argument in the call to ``GP.lnlikelihood()`` 222 | means that that function will return ``-numpy.inf`` if the kernel is 223 | invalid or if there are any linear algebra errors (otherwise it would 224 | raise an exception). 225 | 226 | Then, we run the sampler (this will probably take a while to run if you 227 | want to repeat this analysis): 228 | 229 | .. code:: python 230 | 231 | import emcee 232 | 233 | gp.compute(t) 234 | 235 | # Set up the sampler. 236 | nwalkers, ndim = 36, len(gp) 237 | sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob) 238 | 239 | # Initialize the walkers. 240 | p0 = gp.get_parameter_vector() + 1e-4 * np.random.randn(nwalkers, ndim) 241 | 242 | print("Running burn-in") 243 | p0, _, _ = sampler.run_mcmc(p0, 200) 244 | 245 | print("Running production chain") 246 | sampler.run_mcmc(p0, 200); 247 | 248 | 249 | .. parsed-literal:: 250 | 251 | Running burn-in 252 | Running production chain 253 | 254 | 255 | After this run, you can plot 50 samples from the marginalized predictive 256 | probability distribution: 257 | 258 | .. code:: python 259 | 260 | x = np.linspace(max(t), 2025, 250) 261 | for i in range(50): 262 | # Choose a random walker and step. 263 | w = np.random.randint(sampler.chain.shape[0]) 264 | n = np.random.randint(sampler.chain.shape[1]) 265 | gp.set_parameter_vector(sampler.chain[w, n]) 266 | 267 | # Plot a single sample. 268 | pl.plot(x, gp.sample_conditional(y, x), "g", alpha=0.1) 269 | 270 | pl.plot(t, y, ".k") 271 | 272 | pl.xlim(t.min(), 2025) 273 | pl.xlabel("year") 274 | pl.ylabel("CO$_2$ in ppm"); 275 | 276 | 277 | .. parsed-literal:: 278 | 279 | /Users/dforeman/research/projects/george/george/utils.py:30: RuntimeWarning: covariance is not positive-semidefinite. 280 | samples = np.random.multivariate_normal(mean, matrix, N) 281 | 282 | 283 | 284 | .. image:: hyper_files/hyper_18_1.png 285 | 286 | 287 | Comparing this to the same figure in the previous section, you’ll notice 288 | that the error bars on the prediction are now substantially larger than 289 | before. This is because we are now considering all the predictions that 290 | are consistent with the data, not just the “best” prediction. In 291 | general, even though it requires much more computation, it is more 292 | conservative (and honest) to take all these sources of uncertainty into 293 | account. 294 | 295 | -------------------------------------------------------------------------------- /docs/tutorials/hyper_files/hyper_12_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/hyper_files/hyper_12_0.png -------------------------------------------------------------------------------- /docs/tutorials/hyper_files/hyper_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/hyper_files/hyper_18_0.png -------------------------------------------------------------------------------- /docs/tutorials/hyper_files/hyper_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/hyper_files/hyper_18_1.png -------------------------------------------------------------------------------- /docs/tutorials/hyper_files/hyper_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/hyper_files/hyper_4_0.png -------------------------------------------------------------------------------- /docs/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | first 8 | model 9 | hyper 10 | scaling 11 | new-kernel 12 | mixture 13 | bayesopt 14 | 15 | -------------------------------------------------------------------------------- /docs/tutorials/mixture.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/mixture.ipynb>`_. 3 | 4 | .. _mixture: 5 | 6 | 7 | Mixtures of GPs 8 | =============== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | It can be useful to model a dataset using a mixture of GPs. For example, 27 | the data might have both systematic effects and a physical signal that 28 | can be modeled using a GP. I konw of a few examples where this method 29 | has been used in the context of time series analysis for the discovery 30 | of transiting exoplanets (for example, `Aigrain et al. 31 | 2016 `__ and `Luger et al. 32 | 2016 `__), but I'm sure that these 33 | aren't the earliest references. The idea is pretty simple: if your model 34 | is a mixture of two GPs (with covariance matrices :math:`K_1` and 35 | :math:`K_2` respectively), this is equivalent to a single GP where the 36 | kernel is the sum of two kernels, one for each component 37 | (:math:`K = K_1 + K_2`). In this case, the equation for the predictive 38 | mean conditioned on a dataset :math:`\boldsymbol{y}` is 39 | 40 | .. math:: 41 | 42 | 43 | \boldsymbol{\mu} = (K_1 + K_2)\,(K_1 + K_2 + N)^{-1} \, \boldsymbol{y} 44 | 45 | where :math:`N` is the (possibly diagonal) matrix describing the 46 | measurement uncertainties. It turns out that the equation for computing 47 | the predictive mean for component 1 is simply 48 | 49 | .. math:: 50 | 51 | 52 | \boldsymbol{\mu}_1 = K_1\,(K_1 + K_2 + N)^{-1} \, \boldsymbol{y} 53 | 54 | and the equivalent expression can be written for component 2. 55 | 56 | This can be implemented in george using the new ``kernel`` keyword 57 | argument in the ``predict`` method. To demonstrate this, let's start by 58 | generating a synthetic dataset. Component 1 is a systematic signal that 59 | depends on two input parameters (:math:`t` and :math:`\theta` following 60 | Aigrain) and component 2 is a quasiperiodic oscillation that is the 61 | target of our analysis. 62 | 63 | .. code:: python 64 | 65 | import numpy as np 66 | import matplotlib.pyplot as plt 67 | 68 | from george import kernels 69 | 70 | np.random.seed(42) 71 | N = 256 72 | t = np.sort(np.random.uniform(0, 10, N)) 73 | theta = np.random.uniform(-np.pi, np.pi, N) 74 | X = np.vstack((t, theta)).T 75 | yerr = np.random.uniform(0.05, 0.25, N) 76 | 77 | kernel1 = 2.0 * kernels.Matern32Kernel([5.0, 0.5], ndim=2) 78 | kernel2 = 2.0 * kernels.ExpSine2Kernel(gamma=10.0, log_period=np.log(5.), ndim=2, axes=0) 79 | kernel2 *= kernels.ExpSquaredKernel([15.0], ndim=2, axes=0) 80 | kernel = kernel1 + kernel2 81 | 82 | gp = george.GP(kernel) 83 | y = gp.sample(X) 84 | y += yerr * np.random.randn(N) 85 | gp.compute(X, yerr) 86 | 87 | plt.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0) 88 | plt.ylim(-6.5, 6.5) 89 | plt.xlim(0, 10) 90 | plt.xlabel("t") 91 | plt.ylabel("y"); 92 | 93 | 94 | 95 | .. image:: mixture_files/mixture_4_0.png 96 | 97 | 98 | The physical (oscillatory) component is not obvious in this dataset 99 | because it is swamped by the systematics. Now, we'll find the maximum 100 | likelihood hyperparameters by numerically minimizing the negative 101 | log-likelihood function. 102 | 103 | .. code:: python 104 | 105 | from scipy.optimize import minimize 106 | 107 | def nll(params): 108 | gp.set_parameter_vector(params) 109 | l = gp.log_likelihood(y, quiet=True) 110 | g = gp.grad_log_likelihood(y, quiet=True) 111 | return -l, -g 112 | 113 | params = gp.get_parameter_vector() 114 | params += 0.05*np.random.randn(len(params)) 115 | soln = minimize(nll, params, jac=True) 116 | gp.set_parameter_vector(soln.x) 117 | print(soln.success, soln.x) 118 | 119 | 120 | .. parsed-literal:: 121 | 122 | True [ 0.38315985 1.88867446 -0.35673864 -0.33250025 11.98452994 123 | 1.59429178 2.11371316] 124 | 125 | 126 | Now let's use the trick from above to compute the prediction of 127 | component 1 and remove it to see the periodic signal. 128 | 129 | .. code:: python 130 | 131 | # Compute the predictive means - note the "kernel" argument 132 | mu1 = gp.predict(y, X, return_cov=False, kernel=kernel1) 133 | mu2 = gp.predict(y, X, return_cov=False, kernel=kernel2) 134 | 135 | plt.plot(t, y, ".k", mec="none", alpha=0.3) 136 | plt.plot(t, y - mu1, ".k") 137 | plt.plot(t, mu2) 138 | 139 | plt.ylim(-6.5, 6.5) 140 | plt.xlim(0, 10) 141 | plt.xlabel("t") 142 | plt.ylabel("y"); 143 | 144 | 145 | 146 | .. image:: mixture_files/mixture_8_0.png 147 | 148 | 149 | In this plot, the original dataset is plotted in light gray points and 150 | the "de-trended" data with component 1 removed is plotted as black 151 | points. The prediction of the GP model for component 2 is shown as a 152 | blue line. 153 | 154 | -------------------------------------------------------------------------------- /docs/tutorials/mixture_files/mixture_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/mixture_files/mixture_4_0.png -------------------------------------------------------------------------------- /docs/tutorials/mixture_files/mixture_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/mixture_files/mixture_8_0.png -------------------------------------------------------------------------------- /docs/tutorials/model.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/model.ipynb>`_. 3 | 4 | .. _model: 5 | 6 | 7 | Model fitting with correlated noise 8 | =================================== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | In this example, we’re going to simulate a common data analysis 27 | situation where our dataset exhibits unknown correlations in the noise. 28 | When taking data, it is often possible to estimate the independent 29 | measurement uncertainty on a single point (due to, for example, Poisson 30 | counting statistics) but there are often residual systematics that 31 | correlate data points. The effect of this correlated noise can often be 32 | hard to estimate but ignoring it can introduce substantial biases into 33 | your inferences. In the following sections, we will consider a synthetic 34 | dataset with correlated noise and a simple non-linear model. We will 35 | start by fitting the model assuming that the noise is uncorrelated and 36 | then improve on this model by modeling the covariance structure in the 37 | data using a Gaussian process. 38 | 39 | A Simple Mean Model 40 | ------------------- 41 | 42 | The model that we’ll fit in this demo is a single Gaussian feature with 43 | three parameters: amplitude :math:`\alpha`, location :math:`\ell`, and 44 | width :math:`\sigma^2`. I’ve chosen this model because is is the 45 | simplest non-linear model that I could think of, and it is qualitatively 46 | similar to a few problems in astronomy (fitting spectral features, 47 | measuring transit times, etc.). 48 | 49 | Simulated Dataset 50 | ----------------- 51 | 52 | Let's start by simulating a dataset of 50 points with known correlated 53 | noise. In fact, this example is somewhat artificial since the data were 54 | drawn from a Gaussian process but in everything that follows, we’ll use 55 | a different kernel function for our inferences in an attempt to make the 56 | situation slightly more realistic. A known white variance was also added 57 | to each data point. 58 | 59 | Using the parameters 60 | 61 | .. math:: \alpha = −1 \quad, \quad\quad \ell = 0.1 \quad, \quad\quad \sigma^2 = 0.4 \quad. 62 | 63 | the resulting dataset is: 64 | 65 | .. code:: python 66 | 67 | from george.modeling import Model 68 | 69 | class Model(Model): 70 | parameter_names = ("amp", "location", "log_sigma2") 71 | 72 | def get_value(self, t): 73 | return self.amp * np.exp(-0.5*(t.flatten()-self.location)**2 * np.exp(-self.log_sigma2)) 74 | 75 | .. code:: python 76 | 77 | import numpy as np 78 | import matplotlib.pyplot as pl 79 | from george import kernels 80 | 81 | np.random.seed(1234) 82 | 83 | def generate_data(params, N, rng=(-5, 5)): 84 | gp = george.GP(0.1 * kernels.ExpSquaredKernel(3.3)) 85 | t = rng[0] + np.diff(rng) * np.sort(np.random.rand(N)) 86 | y = gp.sample(t) 87 | y += Model(**params).get_value(t) 88 | yerr = 0.05 + 0.05 * np.random.rand(N) 89 | y += yerr * np.random.randn(N) 90 | return t, y, yerr 91 | 92 | truth = dict(amp=-1.0, location=0.1, log_sigma2=np.log(0.4)) 93 | t, y, yerr = generate_data(truth, 50) 94 | 95 | pl.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0) 96 | pl.ylabel(r"$y$") 97 | pl.xlabel(r"$t$") 98 | pl.xlim(-5, 5) 99 | pl.title("simulated data"); 100 | 101 | 102 | 103 | .. image:: model_files/model_5_0.png 104 | 105 | 106 | Assuming White Noise 107 | -------------------- 108 | 109 | Let's start by doing the standard thing and assuming that the noise is 110 | uncorrelated. In this case, the ln-likelihood function of the data 111 | :math:`\{y_n\}` given the parameters :math:`\theta` is 112 | 113 | .. math:: 114 | 115 | 116 | \ln p(\{y_n\}\,|\,\{t_n\},\,\{\sigma_n^2\},\,\theta) = 117 | -\frac{1}{2}\,\sum_{n=1}^N \frac{[y_n - f_\theta(t_n)]^2}{\sigma_n^2} 118 | + A 119 | 120 | where :math:`A` doesn't depend on :math:`\theta` so it is irrelevant for 121 | our purposes and :math:`f_\theta(t)` is our model function. 122 | 123 | It is clear that there is some sort of systematic trend in the data and 124 | we don't want to ignore that so we'll simultaneously model a linear 125 | trend and the Gaussian feature described in the previous section. 126 | Therefore, our model is 127 | 128 | .. math:: 129 | 130 | 131 | f_\theta (t) = m\,t + b + 132 | \alpha\,\exp\left(-\frac{[t-\ell]^2}{2\,\sigma^2} \right) 133 | 134 | where :math:`\theta` is the 5-dimensional parameter vector 135 | 136 | .. math:: 137 | 138 | 139 | \theta = \{ m,\,b,\,\alpha,\,\ell,\,\sigma^2 \} \quad. 140 | 141 | The following code snippet is a simple implementation of this model in 142 | Python. 143 | 144 | .. code:: python 145 | 146 | class PolynomialModel(Model): 147 | parameter_names = ("m", "b", "amp", "location", "log_sigma2") 148 | 149 | def get_value(self, t): 150 | t = t.flatten() 151 | return (t * self.m + self.b + 152 | self.amp * np.exp(-0.5*(t-self.location)**2*np.exp(-self.log_sigma2))) 153 | 154 | To fit this model using MCMC (using `emcee `__), we 155 | need to first choose priors—in this case we’ll just use a simple uniform 156 | prior on each parameter—and then combine these with our likelihood 157 | function to compute the ln-probability (up to a normalization constant). 158 | In code, this will be: 159 | 160 | .. code:: python 161 | 162 | model = george.GP(mean=PolynomialModel(m=0, b=0, amp=-1, location=0.1, log_sigma2=np.log(0.4))) 163 | model.compute(t, yerr) 164 | 165 | def lnprob(p): 166 | model.set_parameter_vector(p) 167 | return model.log_likelihood(y, quiet=True) + model.log_prior() 168 | 169 | Now that we have our model implemented, we’ll initialize the walkers and 170 | run both a burn-in and production chain: 171 | 172 | .. code:: python 173 | 174 | import emcee 175 | 176 | initial = model.get_parameter_vector() 177 | ndim, nwalkers = len(initial), 32 178 | p0 = initial + 1e-8 * np.random.randn(nwalkers, ndim) 179 | sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob) 180 | 181 | print("Running burn-in...") 182 | p0, _, _ = sampler.run_mcmc(p0, 500) 183 | sampler.reset() 184 | 185 | print("Running production...") 186 | sampler.run_mcmc(p0, 1000); 187 | 188 | 189 | .. parsed-literal:: 190 | 191 | Running burn-in... 192 | Running production... 193 | 194 | 195 | After running the chain, we can plot the predicted results. It is often 196 | useful to plot the results on top of the data as well. To do this, we 197 | can over plot 24 posterior samples on top of the data: 198 | 199 | .. code:: python 200 | 201 | # Plot the data. 202 | pl.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0) 203 | 204 | # The positions where the prediction should be computed. 205 | x = np.linspace(-5, 5, 500) 206 | 207 | # Plot 24 posterior samples. 208 | samples = sampler.flatchain 209 | for s in samples[np.random.randint(len(samples), size=24)]: 210 | model.set_parameter_vector(s) 211 | pl.plot(x, model.mean.get_value(x), color="#4682b4", alpha=0.3) 212 | 213 | pl.ylabel(r"$y$") 214 | pl.xlabel(r"$t$") 215 | pl.xlim(-5, 5) 216 | pl.title("fit assuming uncorrelated noise"); 217 | 218 | 219 | 220 | .. image:: model_files/model_13_0.png 221 | 222 | 223 | In this figure, the data are shown as black points with error bars and 224 | the posterior samples are shown as translucent blue lines. These results 225 | seem, at face value, pretty satisfying. But, since we know the true 226 | model parameters that were used to simulate the data, we can assess our 227 | original assumption of uncorrelated noise. To do this, we’ll plot all 228 | the projections of our posterior samples using 229 | `corner.py `__ and over plot the true 230 | values: 231 | 232 | .. code:: python 233 | 234 | import corner 235 | 236 | tri_cols = ["amp", "location", "log_sigma2"] 237 | tri_labels = [r"$\alpha$", r"$\ell$", r"$\ln\sigma^2$"] 238 | tri_truths = [truth[k] for k in tri_cols] 239 | tri_range = [(-2, -0.01), (-3, -0.5), (-1, 1)] 240 | names = model.get_parameter_names() 241 | inds = np.array([names.index("mean:"+k) for k in tri_cols]) 242 | corner.corner(sampler.flatchain[:, inds], truths=tri_truths, labels=tri_labels); 243 | 244 | 245 | 246 | .. image:: model_files/model_15_0.png 247 | 248 | 249 | In this figure, the blue lines are the true values used to simulate the 250 | data and the black contours and histograms show the posterior 251 | constraints. The constraints on the amplitude :math:`\alpha` and the 252 | width :math:`\sigma^2` are consistent with the truth but the location of 253 | the feature :math:`\ell` is *almost completely inconsistent with the 254 | truth!* This would matter a lot if we were trying to precisely measure 255 | radial velocities or transit times. 256 | 257 | Modeling the Noise 258 | ------------------ 259 | 260 | **Note:** A full discussion of the theory of Gaussian processes is 261 | beyond the scope of this demo—you should probably check out `Rasmussen & 262 | Williams (2006) `__—but I'll try 263 | to give a quick qualitative motivation for our model. 264 | 265 | In this section, instead of assuming that the noise is white, we'll 266 | generalize the likelihood function to include covariances between data 267 | points. To do this, let's start by re-writing the likelihood function 268 | from the previous section as a matrix equation (if you squint, you'll be 269 | able to work out that we haven't changed it at all): 270 | 271 | .. math:: 272 | 273 | 274 | \ln p(\{y_n\}\,|\,\{t_n\},\,\{\sigma_n^2\},\,\theta) = 275 | -\frac{1}{2}\,\boldsymbol{r}^\mathrm{T}\,K^{-1}\,\boldsymbol{r} 276 | -\frac{1}{2}\,\ln\det K - \frac{N}{2}\,\ln 2\pi 277 | 278 | where 279 | 280 | .. math:: 281 | 282 | 283 | \boldsymbol{r} = \left ( \begin{array}{c} 284 | y_1 - f_\theta(t_1) \\ 285 | y_2 - f_\theta(t_2) \\ 286 | \vdots \\ 287 | y_N - f_\theta(t_N) \\ 288 | \end{array}\right) 289 | 290 | is the residual vector and 291 | 292 | .. math:: 293 | 294 | 295 | K = \left ( \begin{array}{cccc} 296 | \sigma_1^2 & 0 & & 0 \\ 297 | 0 & \sigma_2^2 & & 0 \\ 298 | & & \ddots & \\ 299 | 0 & 0 & & \sigma_N^2 \\ 300 | \end{array}\right) 301 | 302 | is the :math:`N \times N` data covariance matrix (where :math:``N`` is 303 | the number of data points). 304 | 305 | The fact that :math:`K` is diagonal is the result of our earlier 306 | assumption that the noise was white. If we want to relax this 307 | assumption, we just need to start populating the off-diagonal elements 308 | of this covariance matrix. If we wanted to make every off-diagonal 309 | element of the matrix a free parameter, there would be too many 310 | parameters to actually do any inference. Instead, we can simply *model* 311 | the elements of this array as 312 | 313 | .. math:: 314 | 315 | 316 | K_{ij} = \sigma_i^2\,\delta_{ij} + k(t_i,\,t_j) 317 | 318 | where :math:`\delta_{ij}` is the 319 | `Kronecker\_delta `__ and 320 | :math:`k(\cdot,\,\cdot)` is a covariance function that we get to choose. 321 | `Chapter 4 `__ of 322 | Rasmussen & Williams discusses various choices for :math:`k` but for 323 | this demo, we'll just use the `Matérn-3/2 324 | function `__: 325 | 326 | .. math:: 327 | 328 | 329 | k(r) = a^2 \, \left( 1+\frac{\sqrt{3}\,r}{\tau} \right)\, 330 | \exp \left (-\frac{\sqrt{3}\,r}{\tau} \right ) 331 | 332 | where :math:`r = |t_i - t_j|`, and :math:`a^2` and :math:`\tau` are the 333 | parameters of the model. 334 | 335 | The Final Fit 336 | ------------- 337 | 338 | Now we could go ahead and implement the ln-likelihood function that we 339 | came up with in the previous section but that's what George is for, 340 | after all! To implement the model from the previous section using 341 | George, we can write the following ln-likelihood function in Python: 342 | 343 | .. code:: python 344 | 345 | kwargs = dict(**truth) 346 | kwargs["bounds"] = dict(location=(-2, 2)) 347 | mean_model = Model(**kwargs) 348 | gp = george.GP(np.var(y) * kernels.Matern32Kernel(10.0), mean=mean_model) 349 | gp.compute(t, yerr) 350 | 351 | def lnprob2(p): 352 | gp.set_parameter_vector(p) 353 | return gp.log_likelihood(y, quiet=True) + gp.log_prior() 354 | 355 | As before, let’s run MCMC on this model: 356 | 357 | .. code:: python 358 | 359 | initial = gp.get_parameter_vector() 360 | ndim, nwalkers = len(initial), 32 361 | sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob2) 362 | 363 | print("Running first burn-in...") 364 | p0 = initial + 1e-8 * np.random.randn(nwalkers, ndim) 365 | p0, lp, _ = sampler.run_mcmc(p0, 2000) 366 | 367 | print("Running second burn-in...") 368 | p0 = p0[np.argmax(lp)] + 1e-8 * np.random.randn(nwalkers, ndim) 369 | sampler.reset() 370 | p0, _, _ = sampler.run_mcmc(p0, 2000) 371 | sampler.reset() 372 | 373 | print("Running production...") 374 | sampler.run_mcmc(p0, 2000); 375 | 376 | 377 | .. parsed-literal:: 378 | 379 | Running first burn-in... 380 | Running second burn-in... 381 | Running production... 382 | 383 | 384 | You’ll notice that this time I’ve run two burn-in phases. Before the 385 | second burn-in, I re-sample the positions of the walkers in a tiny ball 386 | around the position of the best walker in the previous run. I found that 387 | this re-sampling step was useful because otherwise some of the walkers 388 | started in a bad part of parameter space and took a while to converge to 389 | something reasonable. 390 | 391 | The plotting code for the results for this model is similar to the code 392 | in the previous section. First, we can plot the posterior samples on top 393 | of the data: 394 | 395 | .. code:: python 396 | 397 | # Plot the data. 398 | pl.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0) 399 | 400 | # The positions where the prediction should be computed. 401 | x = np.linspace(-5, 5, 500) 402 | 403 | # Plot 24 posterior samples. 404 | samples = sampler.flatchain 405 | for s in samples[np.random.randint(len(samples), size=24)]: 406 | gp.set_parameter_vector(s) 407 | mu = gp.sample_conditional(y, x) 408 | pl.plot(x, mu, color="#4682b4", alpha=0.3) 409 | 410 | pl.ylabel(r"$y$") 411 | pl.xlabel(r"$t$") 412 | pl.xlim(-5, 5) 413 | pl.title("fit with GP noise model"); 414 | 415 | 416 | 417 | .. image:: model_files/model_22_0.png 418 | 419 | 420 | .. code:: python 421 | 422 | names = gp.get_parameter_names() 423 | inds = np.array([names.index("mean:"+k) for k in tri_cols]) 424 | corner.corner(sampler.flatchain[:, inds], truths=tri_truths, labels=tri_labels); 425 | 426 | 427 | 428 | .. image:: model_files/model_23_0.png 429 | 430 | 431 | It is clear from this figure that the constraints obtained when modeling 432 | the noise are less precise (the error bars are larger) but more accurate 433 | (and honest). 434 | 435 | -------------------------------------------------------------------------------- /docs/tutorials/model_files/model_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/model_files/model_13_0.png -------------------------------------------------------------------------------- /docs/tutorials/model_files/model_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/model_files/model_15_0.png -------------------------------------------------------------------------------- /docs/tutorials/model_files/model_22_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/model_files/model_22_0.png -------------------------------------------------------------------------------- /docs/tutorials/model_files/model_23_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/model_files/model_23_0.png -------------------------------------------------------------------------------- /docs/tutorials/model_files/model_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/model_files/model_5_0.png -------------------------------------------------------------------------------- /docs/tutorials/new-kernel.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/new-kernel.ipynb>`_. 3 | 4 | .. _new-kernel: 5 | 6 | 7 | Implementing new kernels 8 | ======================== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | All the kernels used by george must be implemented in C++ with Python 27 | bindings. This means that you need to recompile the code anytime you 28 | want to add a new kernel. This is an unavoidable PITA but—as of version 29 | 1.0—george comes with a kernel specification language that is designed 30 | to make this process as painless as possible. 31 | 32 | To follow this tutorial, you'll need the development version of george. 33 | You should `follow the instructions here <../quickstart/>`__ to get that 34 | set up and once you can build the code, let's start implementing a new 35 | kernel. 36 | 37 | The kernel function 38 | ------------------- 39 | 40 | In this tutorial, we will work through the implementation of a kernel 41 | that wasn't available in early versions of the code: the 42 | ``LocalGaussianKernel``. This kernel has been used by `Ian 43 | Czekala `__ in his stellar spectrum fitting 44 | algorithm `Starfish `__. 45 | 46 | This kernel is not stationary and its value is given by the following 47 | equation: 48 | 49 | .. math:: 50 | 51 | k(x_i,\,x_j) = \exp \left( 52 | -\frac{(x_i - x_0)^2 + (x_j - x_0)^2}{2\,w} 53 | \right) 54 | 55 | where the parameters :math:`x_0` and :math:`w` are the location and 56 | width of the Gaussian respectively. We're actually going to parameterize 57 | this kernel using :math:`\ln w` instead of :math:`w` because it must be 58 | strictly positive. 59 | 60 | In our implementation, we'll also need the derivatives of this function 61 | with respect to the hyperparameters so let's list those now: 62 | 63 | .. math:: 64 | 65 | \frac{\mathrm{d}k(x_i,\,x_j)}{\mathrm{d}x_0} = \exp \left( 66 | -\frac{(x_i - x_0)^2 + (x_j - x_0)^2}{2\,w} 67 | \right) \, \frac{x_i + x_j - 2\,x_0}{w} 68 | 69 | and 70 | 71 | .. math:: 72 | 73 | \frac{\mathrm{d}k(x_i,\,x_j)}{\mathrm{d}\ln w} = \exp \left( 74 | -\frac{(x_i - x_0)^2 + (x_j - x_0)^2}{2\,w} 75 | \right) \, \frac{(x_i - x_0)^2 + (x_j - x_0)^2}{2\,w} \quad. 76 | 77 | Kernel specification 78 | -------------------- 79 | 80 | In the root directory of your development version of george, there 81 | should be a directory called ``kernels``. In this directory, create a 82 | new file called ``MyLocalGaussian.yml`` and edit it to have the 83 | following contents: 84 | 85 | :: 86 | 87 | name: MyLocalGaussianKernel 88 | doc: You should always document your code. 89 | stationary: false 90 | params: [x0, log_w] 91 | 92 | reparams: 93 | inv_2w: return 0.5 * exp(-log_w); 94 | 95 | value: | 96 | double d1 = x1 - x0, d2 = x2 - x0; 97 | return exp(-(d1*d1 + d2*d2) * inv_2w); 98 | 99 | grad: 100 | x0: | 101 | double d1 = x1 - x0, d2 = x2 - x0; 102 | return 2 * exp(-(d1*d1 + d2*d2) * inv_2w) * inv_2w * (d1 + d2); 103 | log_w: | 104 | double d1 = x1 - x0, d2 = x2 - x0, 105 | arg = (d1*d1 + d2*d2) * inv_2w; 106 | return exp(-arg) * arg; 107 | x1: | 108 | double d1 = x1 - x0, d2 = x2 - x0; 109 | return -2.0 * exp(-(d1*d1 + d2*d2) * inv_2w) * d1 * inv_2w; 110 | x2: | 111 | double d1 = x1 - x0, d2 = x2 - x0; 112 | return -2.0 * exp(-(d1*d1 + d2*d2) * inv_2w) * d2 * inv_2w; 113 | 114 | This file is written in a markup language called YAML and there are a 115 | lot of online resources for the details of the syntax but let's go 116 | through it line-by-line now to explain what's going on. 117 | 118 | 1. The ``name`` field is the name that you want the Python class to 119 | have. The convention is to have it end in ``Kernel`` but I guess you 120 | can call it more-or-less anything. 121 | 122 | 2. The ``doc`` field let's you write a docstring for the class. This is 123 | always a good idea and you can look at the existing kernels for 124 | inspiration. 125 | 126 | 3. This kernel is not stationary and you specify that using the 127 | ``stationary`` field. 128 | 129 | 4. ``params`` lists the "natural" parameters of the kernel. The 130 | derivatives should be computed with respect to these parameters. 131 | 132 | 5. It is often useful (for speed) to pre-compute a reparameterized form 133 | of the parameters. In this case, we don't want to make too many calls 134 | to the ``exp`` function so we'll pre-compute :math:`(2\,w)^{-1}`. To 135 | do this, we add an entry to the ``reparams`` dictionary with raw C++ 136 | code for a function that returns the reparameterization. This 137 | function will take the natural parameters as input so you can use 138 | them directly by name. 139 | 140 | 6. The ``value`` entry gives the raw C++ code for evaluating the kernel 141 | function at input ``double``\ s ``x1`` and ``x2``. This function will 142 | take the parameters and the reparameterizations as inputs so you can 143 | use them by name. 144 | 145 | 7. Finally, the ``grad`` dictionary gives the raw C++ code for computing 146 | the gradient as a function of each parameter. 147 | 148 | After you save this file and recompile george, you should now have 149 | access to this kernel as follows: 150 | 151 | .. code:: python 152 | 153 | import numpy as np 154 | from george import kernels 155 | 156 | kernel = 5 * kernels.MyLocalGaussianKernel(x0=0.0, log_w=np.log(0.2)) 157 | kernel += 5 * kernels.Matern32Kernel(100.0) 158 | 159 | Whenever you implement a new kernel, you should numerically test that 160 | you've implemented the gradients correctly. The ``Kernel`` 161 | implementation includes a function for doing exactly that and here's how 162 | you would call it: 163 | 164 | .. code:: python 165 | 166 | x = np.linspace(-10, 10, 500) 167 | kernel.test_gradient(np.atleast_2d(x).T) 168 | 169 | If our implementation was wrong, this would have raised an exception so 170 | this looks pretty promising! 171 | 172 | Now, we can plot the covariance matrix given by this kernel as follows: 173 | 174 | .. code:: python 175 | 176 | import matplotlib.pyplot as pl 177 | 178 | k = kernel.get_value(np.atleast_2d(x).T) 179 | pl.figure(figsize=(6, 6)) 180 | pl.imshow(k, cmap="gray", interpolation="nearest") 181 | pl.gca().set_xticklabels([]) 182 | pl.gca().set_yticklabels([]); 183 | 184 | 185 | 186 | .. image:: new-kernel_files/new-kernel_8_0.png 187 | 188 | 189 | From this covariance function, we can sample some representative 190 | functions: 191 | 192 | .. code:: python 193 | 194 | np.random.seed(123) 195 | gp = george.GP(kernel) 196 | gp.compute(x) 197 | y = gp.sample(size=10) 198 | 199 | pl.plot(x, y.T, "g", lw=1.5, alpha=0.5) 200 | pl.xlim(-5, 5); 201 | 202 | 203 | 204 | .. image:: new-kernel_files/new-kernel_10_0.png 205 | 206 | 207 | George already includes an implementation of this kernel (called the 208 | ``LocalGaussianKernel``) so we'll finish here but when you implement 209 | your own favorite kernel, you should now open a pull request to include 210 | the kernel in the released version of george. 211 | 212 | -------------------------------------------------------------------------------- /docs/tutorials/new-kernel_files/new-kernel_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/new-kernel_files/new-kernel_10_0.png -------------------------------------------------------------------------------- /docs/tutorials/new-kernel_files/new-kernel_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/new-kernel_files/new-kernel_8_0.png -------------------------------------------------------------------------------- /docs/tutorials/scaling.rst: -------------------------------------------------------------------------------- 1 | .. note:: This tutorial was generated from an IPython notebook that can be 2 | downloaded `here <../../_static/notebooks/scaling.ipynb>`_. 3 | 4 | .. _scaling: 5 | 6 | 7 | Scaling Gaussian Processes to big datasets 8 | ========================================== 9 | 10 | This notebook was made with the following version of george: 11 | 12 | .. code:: python 13 | 14 | import george 15 | george.__version__ 16 | 17 | 18 | 19 | 20 | .. parsed-literal:: 21 | 22 | '0.3.1' 23 | 24 | 25 | 26 | One of the biggest technical challenges faced when using Gaussian 27 | Processes to model big datasets is that the computational cost naïvely 28 | scales as :math:`\mathcal{O}(N^3)` where :math:`N` is the number of 29 | points in you dataset. This cost can be prohibitive even for moderately 30 | sized datasets. There are a lot of methods for making these types of 31 | problems tractable by exploiting structure or making approximations. 32 | George comes equipped with one approximate method with controllable 33 | precision that works well with one-dimensional inputs (time series, for 34 | example). The method comes from `this 35 | paper `__ and it can help speed up 36 | many—but not all—Gaussian Process models. 37 | 38 | To demonstrate this method, in this tutorial, we'll benchmark the two 39 | Gaussian Process "solvers" included with george. For comparison, we'll 40 | also measure the computational cost of the same operations using the 41 | popular `GPy library `__ and the 42 | `new scikit-learn 43 | interface `__. 44 | Note that GPy is designed a Gaussian Process toolkit and it comes with a 45 | huge number state-of-the-art algorithms for the application of Gaussian 46 | Processes and it is not meant for efficiently computing marginalized 47 | likelihoods so the comparison isn't totally fair. 48 | 49 | As usual, we'll start by generating a large fake dataset: 50 | 51 | .. code:: python 52 | 53 | import numpy as np 54 | import matplotlib.pyplot as pl 55 | 56 | np.random.seed(1234) 57 | x = np.sort(np.random.uniform(0, 10, 50000)) 58 | yerr = 0.1 * np.ones_like(x) 59 | y = np.sin(x) 60 | 61 | The standard method for computing the marginalized likelihood of this 62 | dataset under a GP model is: 63 | 64 | .. code:: python 65 | 66 | from george import kernels 67 | kernel = np.var(y) * kernels.ExpSquaredKernel(1.0) 68 | 69 | gp_basic = george.GP(kernel) 70 | gp_basic.compute(x[:100], yerr[:100]) 71 | print(gp_basic.log_likelihood(y[:100])) 72 | 73 | 74 | .. parsed-literal:: 75 | 76 | 133.946394912 77 | 78 | 79 | When using only 100 data points, this computation is very fast but we 80 | could also use the approximate solver as follows: 81 | 82 | .. code:: python 83 | 84 | gp_hodlr = george.GP(kernel, solver=george.HODLRSolver, seed=42) 85 | gp_hodlr.compute(x[:100], yerr[:100]) 86 | print(gp_hodlr.log_likelihood(y[:100])) 87 | 88 | 89 | .. parsed-literal:: 90 | 91 | 133.946394912 92 | 93 | 94 | The new scikit-learn interface is quite similar (you'll need to install 95 | a recent version of scikit-learn to execute this cell): 96 | 97 | .. code:: python 98 | 99 | import sklearn 100 | print("sklearn version: {0}".format(sklearn.__version__)) 101 | from sklearn.gaussian_process.kernels import RBF 102 | from sklearn.gaussian_process import GaussianProcessRegressor 103 | 104 | kernel_skl = np.var(y) * RBF(length_scale=1.0) 105 | gp_skl = GaussianProcessRegressor(kernel_skl, 106 | alpha=yerr[:100]**2, 107 | optimizer=None, 108 | copy_X_train=False) 109 | gp_skl.fit(x[:100, None], y[:100]) 110 | print(gp_skl.log_marginal_likelihood(kernel_skl.theta)) 111 | 112 | 113 | .. parsed-literal:: 114 | 115 | sklearn version: 0.19.1 116 | 133.946394918 117 | 118 | 119 | To implement this same model in GPy, you would do something like (I've 120 | never been able to get the heteroscedastic regression to work in GPy): 121 | 122 | .. code:: python 123 | 124 | import GPy 125 | 126 | print("GPy version: {0}".format(GPy.__version__)) 127 | kernel_gpy = GPy.kern.RBF(input_dim=1, variance=np.var(y), lengthscale=1.) 128 | gp_gpy = GPy.models.GPRegression(x[:100, None], y[:100, None], kernel_gpy) 129 | gp_gpy['.*Gaussian_noise'] = yerr[0]**2 130 | print(gp_gpy.log_likelihood()) 131 | 132 | 133 | .. parsed-literal:: 134 | 135 | GPy version: 1.8.4 136 | 133.946345613 137 | 138 | 139 | Now that we have working implementations of this model using all of the 140 | different methods and modules, let's run a benchmark to look at the 141 | computational cost and scaling of each option. The code here doesn't 142 | matter too much but we'll compute the best-of-"K" runtime for each 143 | method where "K" depends on how long I'm willing to wait. This cell 144 | takes a few minutes to run. 145 | 146 | .. code:: python 147 | 148 | import time 149 | 150 | ns = np.array([50, 100, 200, 500, 1000, 5000, 10000, 50000], dtype=int) 151 | t_basic = np.nan + np.zeros(len(ns)) 152 | t_hodlr = np.nan + np.zeros(len(ns)) 153 | t_gpy = np.nan + np.zeros(len(ns)) 154 | t_skl = np.nan + np.zeros(len(ns)) 155 | for i, n in enumerate(ns): 156 | # Time the HODLR solver. 157 | best = np.inf 158 | for _ in range(100000 // n): 159 | strt = time.time() 160 | gp_hodlr.compute(x[:n], yerr[:n]) 161 | gp_hodlr.log_likelihood(y[:n]) 162 | dt = time.time() - strt 163 | if dt < best: 164 | best = dt 165 | t_hodlr[i] = best 166 | 167 | # Time the basic solver. 168 | best = np.inf 169 | for _ in range(10000 // n): 170 | strt = time.time() 171 | gp_basic.compute(x[:n], yerr[:n]) 172 | gp_basic.log_likelihood(y[:n]) 173 | dt = time.time() - strt 174 | if dt < best: 175 | best = dt 176 | t_basic[i] = best 177 | 178 | # Compare to the proposed scikit-learn interface. 179 | best = np.inf 180 | if n <= 10000: 181 | gp_skl = GaussianProcessRegressor(kernel_skl, 182 | alpha=yerr[:n]**2, 183 | optimizer=None, 184 | copy_X_train=False) 185 | gp_skl.fit(x[:n, None], y[:n]) 186 | for _ in range(10000 // n): 187 | strt = time.time() 188 | gp_skl.log_marginal_likelihood(kernel_skl.theta) 189 | dt = time.time() - strt 190 | if dt < best: 191 | best = dt 192 | t_skl[i] = best 193 | 194 | # Compare to GPy. 195 | best = np.inf 196 | for _ in range(5000 // n): 197 | kernel_gpy = GPy.kern.RBF(input_dim=1, variance=np.var(y), lengthscale=1.) 198 | strt = time.time() 199 | gp_gpy = GPy.models.GPRegression(x[:n, None], y[:n, None], kernel_gpy) 200 | gp_gpy['.*Gaussian_noise'] = yerr[0]**2 201 | gp_gpy.log_likelihood() 202 | dt = time.time() - strt 203 | if dt < best: 204 | best = dt 205 | t_gpy[i] = best 206 | 207 | Finally, here are the results of the benchmark plotted on a logarithmic 208 | scale: 209 | 210 | .. code:: python 211 | 212 | pl.loglog(ns, t_gpy, "-o", label="GPy") 213 | pl.loglog(ns, t_skl, "-o", label="sklearn") 214 | pl.loglog(ns, t_basic, "-o", label="basic") 215 | pl.loglog(ns, t_hodlr, "-o", label="HODLR") 216 | pl.xlim(30, 80000) 217 | pl.ylim(1.1e-4, 50.) 218 | pl.xlabel("number of datapoints") 219 | pl.ylabel("time [seconds]") 220 | pl.legend(loc=2, fontsize=16); 221 | 222 | 223 | 224 | .. image:: scaling_files/scaling_16_0.png 225 | 226 | 227 | The sklearn and basic solver perform similarly with george being 228 | consistently slightly faster. This is not surprising because they both 229 | use LAPACK (via numpy/scipy) to naïvely compute the likelihood. GPy is 230 | consistently slower (probably because of Python overheads) even for 231 | small datasets but, like I mentioned previously, this wasn't really what 232 | GPy was designed to do and it comes with a lot of other features. For 233 | large datasets (:math:`N \gtrsim 1000`), the ``HODLRSolver`` really 234 | shines. In practice, this gain is less significant for multidimensional 235 | inputs and some other kernels but for reasonably well-behaved 236 | time-series models, it might solve all of your problems! 237 | 238 | -------------------------------------------------------------------------------- /docs/tutorials/scaling_files/scaling_16_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfm/george/8191c6b101889e5c0dc552dbc358e57a0a1a9961/docs/tutorials/scaling_files/scaling_16_0.png -------------------------------------------------------------------------------- /docs/tutorials/tutorial_rst.tpl: -------------------------------------------------------------------------------- 1 | {%- extends 'display_priority.tpl' -%} 2 | 3 | {% block header %} 4 | .. note:: This tutorial was generated from an IPython notebook that can be 5 | downloaded `here <../../_static/notebooks/{{ resources.metadata.name }}.ipynb>`_. 6 | 7 | .. _{{resources.metadata.name}}: 8 | {% endblock %} 9 | 10 | {% block in_prompt %} 11 | {% endblock in_prompt %} 12 | 13 | {% block output_prompt %} 14 | {% endblock output_prompt %} 15 | 16 | {% block input %} 17 | {%- if cell.source.strip() and not cell.source.startswith("%") -%} 18 | .. code:: python 19 | 20 | {{ cell.source | indent}} 21 | {% endif -%} 22 | {% endblock input %} 23 | 24 | {% block error %} 25 | :: 26 | 27 | {{ super() }} 28 | {% endblock error %} 29 | 30 | {% block traceback_line %} 31 | {{ line | indent | strip_ansi }} 32 | {% endblock traceback_line %} 33 | 34 | {% block execute_result %} 35 | {% block data_priority scoped %} 36 | {{ super() }} 37 | {% endblock %} 38 | {% endblock execute_result %} 39 | 40 | {% block stream %} 41 | .. parsed-literal:: 42 | 43 | {{ output.text | indent }} 44 | {% endblock stream %} 45 | 46 | {% block data_svg %} 47 | .. image:: {{ output.metadata.filenames['image/svg+xml'] | urlencode }} 48 | {% endblock data_svg %} 49 | 50 | {% block data_png %} 51 | .. image:: {{ output.metadata.filenames['image/png'] | urlencode }} 52 | {% endblock data_png %} 53 | 54 | {% block data_jpg %} 55 | .. image:: {{ output.metadata.filenames['image/jpeg'] | urlencode }} 56 | {% endblock data_jpg %} 57 | 58 | {% block data_latex %} 59 | .. math:: 60 | 61 | {{ output.data['text/latex'] | strip_dollars | indent }} 62 | {% endblock data_latex %} 63 | 64 | {% block data_text scoped %} 65 | .. parsed-literal:: 66 | 67 | {{ output.data['text/plain'] | indent }} 68 | {% endblock data_text %} 69 | 70 | {% block data_html scoped %} 71 | .. raw:: html 72 | 73 | {{ output.data['text/html'] | indent }} 74 | {% endblock data_html %} 75 | 76 | {% block markdowncell scoped %} 77 | {{ cell.source | markdown2rst }} 78 | {% endblock markdowncell %} 79 | 80 | {%- block rawcell scoped -%} 81 | {%- if cell.metadata.get('raw_mimetype', '').lower() in resources.get('raw_mimetypes', ['']) %} 82 | {{cell.source}} 83 | {% endif -%} 84 | {%- endblock rawcell -%} 85 | 86 | {% block headingcell scoped %} 87 | {{ ("#" * cell.level + cell.source) | replace('\n', ' ') | markdown2rst }} 88 | {% endblock headingcell %} 89 | 90 | {% block unknowncell scoped %} 91 | unknown type {{cell.type}} 92 | {% endblock unknowncell %} 93 | -------------------------------------------------------------------------------- /docs/user/gp.rst: -------------------------------------------------------------------------------- 1 | .. module:: george 2 | 3 | .. _gp: 4 | 5 | The GP object 6 | ============= 7 | 8 | The core element of George is the :class:`GP` object. 9 | All of the available methods and properties are documented here: 10 | 11 | .. autoclass:: george.GP 12 | :inherited-members: 13 | -------------------------------------------------------------------------------- /docs/user/index.rst: -------------------------------------------------------------------------------- 1 | User Guide 2 | ========== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | quickstart 8 | kernels 9 | gp 10 | solvers 11 | modeling 12 | 13 | -------------------------------------------------------------------------------- /docs/user/kernels.rst.template: -------------------------------------------------------------------------------- 1 | .. module:: george.kernels 2 | 3 | .. _kernels: 4 | 5 | Kernels 6 | ======= 7 | 8 | George comes equipped with a suite of standard covariance functions or 9 | kernels that can be combined to build more complex models. 10 | The standard kernels fall into the following categories: 11 | 12 | 1. :ref:`stationary-kernels` — functions that depend only on the radial 13 | distance between points in some user-defined metric, and 14 | 2. :ref:`non-stationary-kernels` — functions that depend on the value of the 15 | input coordinates themselves. 16 | 17 | :ref:`combining-kernels` describes how to combine kernels to build more 18 | sophisticated models and :ref:`new-kernels` explains how you would go about 19 | incorporating a custom kernel. 20 | 21 | 22 | Common parameters 23 | ----------------- 24 | 25 | Every kernel accepts the two keyword arguments ``ndim`` and ``axes``. By 26 | default, kernels are only one dimensional so you must specify the ``ndim`` 27 | argument if you want the kernel to work with higher dimensional inputs. 28 | By default, higher dimensional kernels are applied to every dimension but you 29 | can restrict the evaluation to a subspace using the ``axes`` argument. 30 | For example, if you have a 3 dimensional input space but you want one of the 31 | kernels to only act in the first dimension, you would do the following: 32 | 33 | .. code-block:: python 34 | 35 | from george import kernels 36 | kernel = 10.0 * kernels.Matern32Kernel(1.0, ndim=3, axes=0) 37 | 38 | Similarly, if you wanted the kernel to act on only the second and third 39 | dimensions, you could do something like: 40 | 41 | .. code-block:: python 42 | 43 | kernel = 10.0 * kernels.ExpSquaredKernel([1.0, 0.5], ndim=3, axes=[1, 2]) 44 | 45 | Finally, all of the stationary kernels can be "blocked". This means that the 46 | kernel will only be applied within some parameter range. In practice, the 47 | covariance matrix will have a block diagonal structure. To use this feature, 48 | you use the ``block`` keyword argument: 49 | 50 | .. code-block:: python 51 | 52 | kernel = 10.0 * kernels.ExpSquaredKernel(1.0, block=(-1.0, 1.0)) 53 | 54 | # or... 55 | kernel = kernels.ExpSquaredKernel([1.0, 1.5], ndim=2, 56 | block=[(-1.0, 1.0), (0.5, 1.5)]) 57 | 58 | 59 | .. _implementation: 60 | 61 | Implementation details & modeling interface 62 | ------------------------------------------- 63 | 64 | It's worth understanding how these kernels are implemented. 65 | Most of the hard work is done at a low level (in C++) and the Python is only a 66 | thin wrapper to this functionality. 67 | This makes the code fast and consistent across interfaces but it also means 68 | that it isn't currently possible to implement new kernel functions without 69 | recompiling the code. 70 | Almost every kernel has hyperparameters that you can set to control its 71 | behavior and these are controlled using the :ref:`modeling-protocol`. 72 | 73 | .. code-block:: python 74 | 75 | k = 2.0 * kernels.Matern32Kernel(5.0) 76 | 77 | print(k.get_parameter_names()) 78 | # ['k1:log_constant', 'k2:metric:log_M_0_0'] 79 | 80 | print(k.get_parameter_vector()) 81 | # [ 0.69314718 1.60943791] 82 | 83 | You'll notice that, in this case, the parameter vector is the logarithm of 84 | the parameters given when building the kernel. 85 | This will be the case for any strictly positive parameters because it is 86 | always better to fit in the logarithm of these types of parameters. 87 | You probably also noticed that the parameters have names. 88 | This opens up a few interesting features. 89 | For example, if you want to change any of the parameters, you can do it as 90 | follows: 91 | 92 | .. code-block:: python 93 | 94 | import numpy as np 95 | 96 | k["k1:log_constant"] = np.log(10.0) 97 | print(k.get_parameter_vector()) 98 | # [ 2.30258509 1.60943791] 99 | 100 | # ... or: 101 | k[0] = np.log(2.0) 102 | print(k.get_parameter_vector()) 103 | # [ 0.69314718 1.60943791] 104 | 105 | Finally, if you want to update the entire vector, you can use the 106 | :func:`set_vector` method: 107 | 108 | .. code-block:: python 109 | 110 | k.set_parameter_vector(k.get_parameter_vector() + np.random.randn(2)) 111 | 112 | Another feature common to the kernels is that you can "freeze" and "thaw" 113 | parameters by name. 114 | For example, let's say that you want to keep the amplitude of your kernel 115 | fixed and fit for only the scale length: 116 | 117 | .. code-block:: python 118 | 119 | k = 2.0 * kernels.Matern32Kernel(5.0) 120 | k.freeze_parameter("k1:log_constant") 121 | 122 | print(k.get_parameter_names()) 123 | # ['k2:metric:log_M_0_0'] 124 | 125 | print(k.get_parameter_vector()) 126 | # [ 1.60943791] 127 | 128 | Bringing a parameter back into the fold is as easy as 129 | 130 | .. code-block:: python 131 | 132 | k.thaw_parameter("k1:log_constant") 133 | 134 | print(k.get_parameter_names()) 135 | # ['k1:log_constant', 'k2:log_M_0_0'] 136 | 137 | print(k.get_vector()) 138 | # [ 0.69314718 1.60943791] 139 | 140 | 141 | .. _stationary-kernels: 142 | 143 | Stationary kernels 144 | ------------------ 145 | 146 | Stationary kernels are a class of functions that depend on the input 147 | coordinates :math:`\mathbf{x}_i` and :math:`\mathbf{x}_j` through their 148 | squared distance under some metric :math:`C`: 149 | 150 | .. math:: 151 | 152 | r^2 = (\mathbf{x}_i - \mathbf{x}_j)^\mathrm{T}\,C^{-1}\, 153 | (\mathbf{x}_i - \mathbf{x}_j) 154 | 155 | The currently supported metrics are: 156 | 157 | 1. "isotropic" — the scale length is equal in all dimensions, 158 | 2. "axis-aligned" — there is a different scale length in each dimension, and 159 | 3. "general" — arbitrary covariances between dimensions are allowed. 160 | 161 | The "isotropic" and "axis-aligned" metrics are parameterized by the logarithms 162 | of their scale lengths. 163 | For example: 164 | 165 | .. code-block:: python 166 | 167 | from george.metrics import Metric 168 | m = Metric(2.0, ndim=2) 169 | print(m.get_parameter_vector()) 170 | # [ 0.69314718] 171 | 172 | gives a two-dimensional isotropic metric with 173 | 174 | .. math:: 175 | 176 | C = \left(\begin{array}{cc} 2 & 0 \\ 0 & 2 \end{array}\right) 177 | 178 | and 179 | 180 | .. code-block:: python 181 | 182 | m = Metric([2.0, 4.0], ndim=2) 183 | print(m.get_parameter_vector()) 184 | # [ 0.69314718 1.38629436] 185 | 186 | specifies the following matrix 187 | 188 | .. math:: 189 | 190 | C = \left(\begin{array}{cc} 2 & 0 \\ 0 & 4 \end{array}\right) \quad. 191 | 192 | .. note:: Another way to define the isotropic metric is that it scales the 193 | square of the distance between points such that the following equality 194 | holds for a kernel evaluated at two points a distance :math:`r`: apart: 195 | :math:`k(r^2;\,\textrm{metric}=\lambda) = 196 | k(r^2 / \lambda;\,\mathrm{metric}=1)`. 197 | 198 | In the "general" case, the matrix is parameterized by the elements of the 199 | Cholesky decomposition :math:`C = L\,L^\mathrm{T}` with logarithms along the 200 | diagonal. 201 | For example: 202 | 203 | .. code-block:: python 204 | 205 | m = Metric([[2.0, 0.1], [0.1, 4.0]], ndim=2) 206 | print(m.get_parameter_vector()) 207 | # [ 0.34657359 0.07071068 0.69252179] 208 | 209 | All the stationary kernels take the ``metric`` specification as a keyword 210 | argument: 211 | 212 | .. code-block:: python 213 | 214 | k = kernels.ExpSquaredKernel(metric=[[5.0, 0.1], [0.1, 4.0]], ndim=2) 215 | print(k.get_parameter_vector()) 216 | # [ 0.80471896 0.04472136 0.69289712] 217 | 218 | The currently available stationary kernels are: 219 | 220 | STATIONARYKERNELS 221 | 222 | 223 | .. _non-stationary-kernels: 224 | 225 | Non-stationary kernels 226 | ---------------------- 227 | 228 | Non-stationary kernels are specified by a (symmetric) function of the input 229 | coordinates themselves. 230 | They are applied identically to every axis so the ``axes`` keyword argument 231 | will probably come in handy. 232 | 233 | For example, to implement a quasi-periodic kernel with a three-dimensional 234 | input space where you only want to apply the periodicity along the first 235 | (e.g. time) dimension, you would use something like: 236 | 237 | .. code-block:: python 238 | 239 | k = kernels.ExpSine2Kernel(gamma=0.1, log_period=5.0, ndim=3, axes=0) 240 | k *= 10.0 * kernels.ExpSquaredKernel(metric=5.0, ndim=3, axes=0) 241 | k += 4.0 * kernels.Matern32Kernel(metric=4.0, ndim=3, axes=[1, 2]) 242 | 243 | The currently available non-stationary kernels are: 244 | 245 | OTHERKERNELS 246 | 247 | .. _combining-kernels: 248 | 249 | Combining kernels 250 | ----------------- 251 | 252 | More complicated kernels can be constructed by algebraically combining the 253 | basic kernels listed in the previous sections. 254 | In particular, all the kernels support addition and multiplication. 255 | For example, an exponential-squared kernel with a non-trivial variance can be 256 | constructed as follows: 257 | 258 | .. code-block:: python 259 | 260 | from george import kernels 261 | kernel = 1e-3 * kernels.ExpSquaredKernel(3.4) 262 | 263 | This is equivalent to: 264 | 265 | .. code-block:: python 266 | 267 | from math import log 268 | kernel = kernels.Product(kernels.ConstantKernel(log_constant=log(1e-3)), 269 | kernels.ExpSquaredKernel(3.4)) 270 | 271 | As demonstrated in :ref:`hyper`, a mixture of kernels can be implemented with 272 | addition: 273 | 274 | .. code-block:: python 275 | 276 | k1 = 1e-3 * kernels.ExpSquaredKernel(3.4) 277 | k2 = 1e-4 * kernels.Matern32Kernel(14.53) 278 | kernel = k1 + k2 279 | 280 | 281 | .. _new-kernels: 282 | 283 | Implementing new kernels 284 | ------------------------ 285 | 286 | As mentioned previously, because of technical limitations, new kernels can 287 | only be implemented by re-compiling george. 288 | See :ref:`new-kernel` for a detailed example of implementing a new kernel. 289 | -------------------------------------------------------------------------------- /docs/user/modeling.rst: -------------------------------------------------------------------------------- 1 | .. module:: george.modeling 2 | 3 | .. _modeling-protocol: 4 | 5 | Modeling Protocol 6 | ================= 7 | 8 | This module provides some infrastructure that makes it easy to implement 9 | abstract "models" to be used within the george framework. Many of the 10 | methods are probably more generally applicable but the implementation 11 | constraints can be simplified since we're just concerned about supporting the 12 | needs of george. 13 | 14 | The basic premise is that a :class:`Model` is an object that has an ordered set 15 | of named parameters. These parameters are assumed to be continuous but they can 16 | have bounds. There is also the concept of an "active set" of parameters that 17 | are being varied in a fit procedure. The other parameters are "frozen" to a 18 | particular value. Frozen parameters can be "thawed" to be returned to the 19 | active set. 20 | 21 | There isn't a formal requirement for the "value" interface that a 22 | :class:`Model` subclass should implement but in some cases, a model will be 23 | expected to implement a ``get_value`` method that returns the "value" of the 24 | model (this can mean many different things but we'll motivate this with an 25 | example below) for the current setting of the parameters. 26 | 27 | Since these models will be used in the context of Bayesian parameter estimation 28 | each model also implements a :func:`Model.log_prior` method that computes the 29 | log of the prior probability of the current setting of the model parameters. 30 | 31 | The full interface is described in detail below and the tutorials demonstrate 32 | the basic usage of the protocol. 33 | 34 | .. autoclass:: george.modeling.Model 35 | :inherited-members: 36 | 37 | .. autoclass:: george.modeling.ModelSet 38 | :members: 39 | 40 | .. autoclass:: george.modeling.ConstantModel 41 | :members: 42 | 43 | .. autoclass:: george.modeling.CallableModel 44 | :members: 45 | -------------------------------------------------------------------------------- /docs/user/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _quickstart: 2 | 3 | Getting started 4 | =============== 5 | 6 | Installation 7 | ------------ 8 | 9 | The core implementation of george is written in C++ so this will need to be 10 | compiled to be called from Python. The easiest way for a new user to do this 11 | will be by following the directions in the :ref:`using-conda` section below. 12 | 13 | .. _using-conda: 14 | 15 | Using conda 16 | +++++++++++ 17 | 18 | The easiest way to install george is using `conda 19 | `_ (via `conda-forge 20 | `_) with the following command: 21 | 22 | .. code-block:: bash 23 | 24 | conda install -c conda-forge george 25 | 26 | 27 | Using pip 28 | +++++++++ 29 | 30 | George can also be installed using `pip `_: 31 | 32 | .. code-block:: bash 33 | 34 | python -m pip install george 35 | 36 | .. _source: 37 | 38 | From Source 39 | +++++++++++ 40 | 41 | The source code for george can be downloaded `from GitHub 42 | `_ by running 43 | 44 | .. code-block:: bash 45 | 46 | git clone --recursive https://github.com/dfm/george.git 47 | 48 | 49 | .. _python-deps: 50 | 51 | **Dependencies** 52 | 53 | You'll need a Python installation and I recommend `conda 54 | `_ if you don't already have your own opinions. 55 | 56 | After installing Python, the following dependencies are required to build and 57 | run george: 58 | 59 | 1. `NumPy `_, 60 | 2. `SciPy `_, and 61 | 3. `pybind11 `_. 62 | 63 | If you're using conda, you can install all of the dependencies with the 64 | following command: 65 | 66 | .. code-block:: bash 67 | 68 | conda install -c conda-forge numpy scipy pybind11 69 | 70 | **Building** 71 | 72 | After installing the dependencies, you can build george by running: 73 | 74 | .. code-block:: bash 75 | 76 | python -m pip install -e . 77 | 78 | in the root directory of the source tree. 79 | 80 | Testing 81 | ------- 82 | 83 | To run the unit tests, install `pytest `_ and then 84 | execute: 85 | 86 | .. code-block:: bash 87 | 88 | python -m pytest -v tests 89 | 90 | All of the tests should (of course) pass. 91 | If any of the tests don't pass and if you can't sort out why, `open an issue 92 | on GitHub `_. 93 | 94 | Examples 95 | -------- 96 | 97 | Take a look at :ref:`first` to get started and then check out the other 98 | tutorials for some more advanced usage examples. 99 | -------------------------------------------------------------------------------- /docs/user/solvers.rst: -------------------------------------------------------------------------------- 1 | .. module:: george 2 | 3 | .. _solvers: 4 | 5 | Solvers 6 | ======= 7 | 8 | There are currently two different GP solvers included with George using 9 | different libraries for doing linear algebra. 10 | Both of the solvers implement the same API and should (up to some tolerance) 11 | give the same answers on the same datasets. 12 | A solver is just a class that takes a :class:`Kernel` and that exposes 3 13 | methods: 14 | 15 | 1. ``compute`` --- to compute and factorize the kernel matrix, 16 | 2. ``apply_inverse`` --- to left-multiply the input by the covariance matrix 17 | :math:`C^{-1}\,b` (actually implemented by solving the system 18 | :math:`C\,x = b`), and 19 | 3. ``apply_sqrt`` --- to apply the (Cholesky) square root of the covariance. 20 | 21 | The solvers also provide the properties ``computed`` and ``log_determinant``. 22 | 23 | The simplest solver provided by George (:class:`BasicSolver`) uses `scipy's 24 | Cholesky implementation 25 | `_ 26 | and the second implementation (:class:`HODLRSolver`) uses `Sivaram 27 | Amambikasaran's HODLR algorithm `_ to do the 28 | linear algebra in :math:`\mathcal{O}(N\,\log^2 N)` instead of 29 | :math:`\mathcal{O}(N^3)`. 30 | 31 | By default, George uses the :class:`BasicSolver` but the :class:`HODLRSolver` 32 | can be used as follows: 33 | 34 | .. code-block:: python 35 | 36 | import george 37 | kernel = ... 38 | gp = george.GP(kernel, solver=george.HODLRSolver) 39 | 40 | The :class:`HODLRSolver` is probably best for most one-dimensional problems 41 | and some large multi-dimensional problems but it doesn't (in general) scale 42 | well with the number of input dimensions. 43 | In practice, it's worth trying both solvers on your specific problem to see 44 | which runs faster. 45 | 46 | 47 | Basic Solver 48 | ------------ 49 | 50 | .. autoclass:: george.BasicSolver 51 | :inherited-members: 52 | 53 | 54 | HODLR Solver 55 | ------------ 56 | 57 | .. autoclass:: george.HODLRSolver 58 | :inherited-members: 59 | -------------------------------------------------------------------------------- /generate_kernels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import glob 6 | import yaml 7 | from jinja2 import Template 8 | 9 | 10 | def compile_kernels(fns): 11 | template_dir = "templates" 12 | output_dir = os.path.join("src", "george") 13 | 14 | with open(os.path.join(template_dir, "parser.h")) as f: 15 | PARSER_TEMPLATE = Template(f.read()) 16 | with open(os.path.join(template_dir, "kernels.h")) as f: 17 | CPP_TEMPLATE = Template(f.read()) 18 | with open(os.path.join(template_dir, "kernels.py")) as f: 19 | PYTHON_TEMPLATE = Template(f.read()) 20 | 21 | specs = [] 22 | for i, fn in enumerate(fns): 23 | with open(fn, "r") as f: 24 | spec = yaml.load(f.read(), Loader=yaml.FullLoader) 25 | print("Found kernel '{0}'".format(spec["name"])) 26 | spec["index"] = i 27 | spec["reparams"] = spec.get("reparams", {}) 28 | specs.append(spec) 29 | print("Found {0} kernel specifications".format(len(specs))) 30 | 31 | fn = os.path.join(output_dir, "include", "george", "parser.h") 32 | with open(fn, "w") as f: 33 | print("Saving parser to '{0}'".format(fn)) 34 | f.write(PARSER_TEMPLATE.render(specs=specs)) 35 | fn = os.path.join(output_dir, "include", "george", "kernels.h") 36 | with open(fn, "w") as f: 37 | print("Saving C++ kernels to '{0}'".format(fn)) 38 | f.write(CPP_TEMPLATE.render(specs=specs)) 39 | fn = os.path.join(output_dir, "kernels.py") 40 | with open(fn, "w") as f: 41 | print("Saving Python kernels to '{0}'".format(fn)) 42 | f.write(PYTHON_TEMPLATE.render(specs=specs)) 43 | 44 | 45 | if __name__ == "__main__": 46 | # If the kernel specifications are included (development mode) re-compile 47 | # them first. 48 | kernel_specs = glob.glob(os.path.join("kernels", "*.yml")) 49 | compile_kernels(kernel_specs) 50 | -------------------------------------------------------------------------------- /kernels/Constant.yml: -------------------------------------------------------------------------------- 1 | name: ConstantKernel 2 | stationary: false 3 | params: [log_constant] 4 | 5 | doc: | 6 | This kernel returns the constant 7 | 8 | .. math:: 9 | 10 | k(\mathbf{x}_i,\,\mathbf{x}_j) = c 11 | 12 | where :math:`c` is a parameter. 13 | 14 | :param log_constant: 15 | The log of :math:`c` in the above equation. 16 | 17 | reparams: 18 | constant: 19 | return exp(log_constant); 20 | 21 | value: | 22 | return constant; 23 | 24 | grad: 25 | log_constant: | 26 | return constant; 27 | 28 | x1: | 29 | return 0.0; 30 | 31 | x2: | 32 | return 0.0; 33 | -------------------------------------------------------------------------------- /kernels/Cosine.yml: -------------------------------------------------------------------------------- 1 | name: CosineKernel 2 | stationary: false 3 | params: [log_period] 4 | 5 | doc: | 6 | The simplest periodic kernel. This 7 | 8 | .. math:: 9 | k(\mathbf{x}_i,\,\mathbf{x}_j) = \cos\left( 10 | \frac{2\,\pi\,|x_i - x_j|}{P} \right) 11 | 12 | where the parameter :math:`P` is the period of the oscillation. This 13 | kernel should probably always be multiplied be a stationary kernel 14 | (e.g. :class:`ExpSquaredKernel`) to allow quasi-periodic variations. 15 | 16 | :param log_period: 17 | The period of the oscillation. 18 | 19 | reparams: 20 | factor: 21 | return 2 * M_PI * exp(-log_period); 22 | 23 | value: | 24 | return cos((x1 - x2) * factor); 25 | 26 | grad: 27 | log_period: | 28 | double r = factor * (x1 - x2); 29 | return r * sin(r); 30 | 31 | x1: | 32 | return -factor*sin(factor * (x1-x2)); 33 | 34 | x2: | 35 | return factor*sin(factor * (x1-x2)); 36 | 37 | -------------------------------------------------------------------------------- /kernels/DotProduct.yml: -------------------------------------------------------------------------------- 1 | name: DotProductKernel 2 | stationary: false 3 | 4 | doc: | 5 | The dot product kernel 6 | 7 | .. math:: 8 | 9 | k(\mathbf{x}_i,\,\mathbf{x}_j) = \mathbf{x}_i \cdot \mathbf{x}_j 10 | 11 | with no parameters. 12 | 13 | value: return x1 * x2; 14 | 15 | grad: 16 | x1: return x2; 17 | x2: return x1; 18 | -------------------------------------------------------------------------------- /kernels/Empty.yml: -------------------------------------------------------------------------------- 1 | name: EmptyKernel 2 | stationary: false 3 | doc: This kernel is a no-op 4 | value: | 5 | return 0.0; 6 | grad: 7 | x1: return 0.0; 8 | x2: return 0.0; 9 | -------------------------------------------------------------------------------- /kernels/Exp.yml: -------------------------------------------------------------------------------- 1 | name: ExpKernel 2 | stationary: true 3 | 4 | doc: | 5 | The exponential kernel is a stationary kernel where the value 6 | at a given radius :math:`r^2` is given by: 7 | 8 | .. math:: 9 | 10 | k(r^2) = \exp \left ( -\sqrt{r^2} \right ) 11 | 12 | value: return exp(-sqrt(r2)); 13 | 14 | grad: 15 | r2: | 16 | if (r2 < DBL_EPSILON) return 0.0; 17 | double r = sqrt(r2); 18 | return -0.5 * exp(-r) / r; 19 | -------------------------------------------------------------------------------- /kernels/ExpSine2.yml: -------------------------------------------------------------------------------- 1 | name: ExpSine2Kernel 2 | stationary: false 3 | params: [gamma, log_period] 4 | 5 | doc: | 6 | The exp-sine-squared kernel is a commonly used periodic kernel. Unlike 7 | the :class:`CosineKernel`, this kernel never has negative covariance 8 | which might be useful for your problem. Here's the equation: 9 | 10 | .. math:: 11 | k(\mathbf{x}_i,\,\mathbf{x}_j) = 12 | \exp \left( -\Gamma\,\sin^2\left[ 13 | \frac{\pi}{P}\,\left|x_i-x_j\right| 14 | \right] \right) 15 | 16 | :param gamma: 17 | The scale :math:`\Gamma` of the correlations. 18 | 19 | :param log_period: 20 | The log of the period :math:`P` of the oscillation (in the same units 21 | as :math:`\mathbf{x}`). 22 | 23 | reparams: 24 | factor: 25 | return M_PI * exp(-log_period); 26 | 27 | value: | 28 | double s = sin((x1 - x2) * factor); 29 | return exp(-gamma * s * s); 30 | 31 | grad: 32 | gamma: | 33 | double s = sin((x1 - x2) * factor), s2 = s * s; 34 | return -s2 * exp(-gamma * s2); 35 | log_period: | 36 | double arg = (x1 - x2) * factor, 37 | s = sin(arg), c = cos(arg), 38 | A = exp(-gamma * s * s); 39 | return 2 * gamma * arg * c * s * A; 40 | x1: | 41 | double d = x1 - x2; 42 | double s = sin(d * factor); 43 | return -exp(-gamma * s * s) * factor * gamma * sin(2.0 * factor * d); 44 | x2: | 45 | double d = x1 - x2; 46 | double s = sin(d * factor); 47 | return exp(-gamma * s * s) * factor * gamma * sin(2.0 * factor * d); 48 | 49 | -------------------------------------------------------------------------------- /kernels/ExpSquared.yml: -------------------------------------------------------------------------------- 1 | name: ExpSquaredKernel 2 | stationary: true 3 | 4 | doc: | 5 | The exponential-squared kernel is a stationary kernel where the value 6 | at a given radius :math:`r^2` is given by: 7 | 8 | .. math:: 9 | 10 | k(r^2) = \exp \left ( -\frac{r^2}{2} \right ) 11 | 12 | value: return exp(-0.5 * r2); 13 | 14 | grad: 15 | r2: return -0.5 * exp(-0.5 * r2); 16 | -------------------------------------------------------------------------------- /kernels/Linear.yml: -------------------------------------------------------------------------------- 1 | name: LinearKernel 2 | stationary: false 3 | constants: [{name: order, type: double}] 4 | params: [log_gamma2] 5 | 6 | doc: | 7 | The linear regression kernel 8 | 9 | .. math:: 10 | 11 | k(\mathbf{x}_i,\,\mathbf{x}_j) = 12 | \frac{(\mathbf{x}_i \cdot \mathbf{x}_j)^P}{\gamma^2} 13 | 14 | :param order: 15 | The power :math:`P`. This parameter is a *constant*; it is not 16 | included in the parameter vector. 17 | 18 | :param log_gamma2: 19 | The scale factor :math:`\gamma^2`. 20 | 21 | reparams: 22 | inv_gamma2: 23 | return exp(-log_gamma2); 24 | 25 | value: | 26 | if (order == 0.0) return inv_gamma2; 27 | return pow(x1 * x2, order) * inv_gamma2; 28 | 29 | grad: 30 | log_gamma2: | 31 | if (order == 0.0) return -inv_gamma2; 32 | return -pow(x1 * x2, order) * inv_gamma2; 33 | 34 | x1: | 35 | if (order == 0.0) return 0.0; 36 | return x2 * order * pow(x1 * x2, order - 1.0) * inv_gamma2; 37 | 38 | x2: | 39 | if (order == 0.0) return 0.0; 40 | return x1 * order * pow(x1 * x2, order - 1.0) * inv_gamma2; 41 | -------------------------------------------------------------------------------- /kernels/LocalGaussian.yml: -------------------------------------------------------------------------------- 1 | name: LocalGaussianKernel 2 | stationary: false 3 | params: [location, log_width] 4 | 5 | doc: | 6 | A local Gaussian kernel. 7 | 8 | .. math:: 9 | k(\mathbf{x}_i,\,\mathbf{x}_j) = \exp\left( 10 | -\frac{(x_i - x_0)^2 + (x_j - x_0)^2}{2\,w} \right)) 11 | 12 | :param location: 13 | The location :math:`x_0` of the Gaussian. 14 | 15 | :param log_width: 16 | The (squared) width :math:`w` of the Gaussian. 17 | 18 | reparams: 19 | inv_2w: return 0.5 * exp(-log_width); 20 | 21 | value: | 22 | double d1 = x1 - location, d2 = x2 - location; 23 | return exp(-(d1*d1 + d2*d2) * inv_2w); 24 | 25 | grad: 26 | location: | 27 | double d1 = x1 - location, d2 = x2 - location; 28 | return 2 * exp(-(d1*d1 + d2*d2) * inv_2w) * inv_2w * (d1 + d2); 29 | log_width: | 30 | double d1 = x1 - location, d2 = x2 - location, 31 | arg = (d1*d1 + d2*d2) * inv_2w; 32 | return exp(-arg) * arg; 33 | x1: | 34 | double d1 = x1 - location, d2 = x2 - location; 35 | return -2.0 * exp(-(d1*d1 + d2*d2) * inv_2w) * d1 * inv_2w; 36 | x2: | 37 | double d1 = x1 - location, d2 = x2 - location; 38 | return -2.0 * exp(-(d1*d1 + d2*d2) * inv_2w) * d2 * inv_2w; 39 | -------------------------------------------------------------------------------- /kernels/Matern32.yml: -------------------------------------------------------------------------------- 1 | name: Matern32Kernel 2 | stationary: true 3 | 4 | doc: | 5 | The Matern-3/2 kernel is stationary kernel where the value at a 6 | given radius :math:`r^2` is given by: 7 | 8 | .. math:: 9 | 10 | k(r^2) = \left( 1+\sqrt{3\,r^2} \right)\, 11 | \exp \left (-\sqrt{3\,r^2} \right ) 12 | 13 | value: | 14 | double r = sqrt(3.0 * r2); 15 | return (1.0 + r) * exp(-r); 16 | 17 | grad: 18 | r2: | 19 | double r = sqrt(3.0 * r2); 20 | return -3.0 * 0.5 * exp(-r); 21 | -------------------------------------------------------------------------------- /kernels/Matern52.yml: -------------------------------------------------------------------------------- 1 | name: Matern52Kernel 2 | stationary: true 3 | 4 | doc: | 5 | The Matern-5/2 kernel is stationary kernel where the value at a 6 | given radius :math:`r^2` is given by: 7 | 8 | .. math:: 9 | 10 | k(r^2) = \left( 1+\sqrt{5\,r^2}+ \frac{5\,r^2}{3} \right)\, 11 | \exp \left (-\sqrt{5\,r^2} \right ) 12 | 13 | value: | 14 | double r = sqrt(5.0 * r2); 15 | return (1 + r + 5.0 * r2 / 3.0) * exp(-r); 16 | 17 | grad: 18 | r2: | 19 | double r = sqrt(5.0 * r2); 20 | return -5 * (1 + r) * exp(-r) / 6.0; 21 | -------------------------------------------------------------------------------- /kernels/MaternGeneral.yml.example: -------------------------------------------------------------------------------- 1 | name: MaternGeneralKernel 2 | stationary: true 3 | constants: [{name: nu, type: double}] 4 | includes: [ 5 | , 6 | 7 | ] 8 | 9 | doc: | 10 | The Matern kernel is stationary kernel where the value at a 11 | given radius :math:`r^2` is given by: 12 | 13 | .. math:: 14 | 15 | k(r^2) = \frac{1}{\Gamma(\nu)\,2^{\nu-1}}\, 16 | \left( 2\,\nu\,r^2 \right)^{\nu/2}\, 17 | K_\nu \left (\sqrt{2\,\nu\,r^2} \right ) 18 | 19 | reparams: 20 | factor: 21 | return 1.0 / (boost::math::tgamma(nu) * pow(2, nu-1.0)); 22 | 23 | value: | 24 | double K_nu, r = sqrt(2.0 * nu * r2); 25 | if (std::abs(r) < DBL_EPSILON) return 1.0; 26 | K_nu = boost::math::cyl_bessel_k(nu, r); 27 | return pow(r, nu) * K_nu * factor; 28 | 29 | grad: 30 | r2: | 31 | // http://www.wolframalpha.com/input/?i=D%5Bx%5Enu+*+K%5Bnu%2C+x%5D%2C+x%5D 32 | double K_nu, K_nu_pm_1, drdr2, r = sqrt(2.0 * nu * r2); 33 | if (std::abs(r) < DBL_EPSILON) return 0.0; 34 | K_nu = boost::math::cyl_bessel_k(nu, r); 35 | K_nu_pm_1 = boost::math::cyl_bessel_k(nu+1.0, r) + boost::math::cyl_bessel_k(nu-1.0, r); 36 | drdr2 = 0.5 * sqrt(2.0 * nu / r2); 37 | return -0.5 * drdr2 * pow(r, nu-1.0) * (r*K_nu_pm_1-2*nu*K_nu) * factor; 38 | -------------------------------------------------------------------------------- /kernels/Polynomial.yml: -------------------------------------------------------------------------------- 1 | name: PolynomialKernel 2 | stationary: false 3 | constants: [{name: order, type: double}] 4 | params: [log_sigma2] 5 | 6 | doc: | 7 | A polynomial kernel 8 | 9 | .. math:: 10 | 11 | k(\mathbf{x}_i,\,\mathbf{x}_j) = 12 | (\mathbf{x}_i \cdot \mathbf{x}_j + \sigma^2)^P 13 | 14 | :param order: 15 | The power :math:`P`. This parameter is a *constant*; it is not 16 | included in the parameter vector. 17 | 18 | :param log_sigma2: 19 | The variance :math:`\sigma^2 > 0`. 20 | 21 | reparams: 22 | sigma2: 23 | return exp(log_sigma2); 24 | 25 | value: | 26 | if (order == 0.0) return 1.0; 27 | return pow(x1 * x2 + sigma2, order); 28 | 29 | grad: 30 | log_sigma2: | 31 | if (order == 0.0) return 0.0; 32 | return sigma2 * pow(x1 * x2 + sigma2, order-1.0) * order; 33 | x1: | 34 | if (order == 0.0) return 0.0; 35 | return x2 * order * pow(x1 * x2 + sigma2, order-1.0); 36 | x2: | 37 | if (order == 0.0) return 0.0; 38 | return x1 * order * pow(x1 * x2 + sigma2, order-1.0); 39 | -------------------------------------------------------------------------------- /kernels/RationalQuadratic.yml: -------------------------------------------------------------------------------- 1 | name: RationalQuadraticKernel 2 | stationary: true 3 | params: [log_alpha] 4 | 5 | doc: | 6 | This is equivalent to a "scale mixture" of :class:`ExpSquaredKernel` 7 | kernels with different scale lengths drawn from a gamma distribution. 8 | See R&W for more info but here's the equation: 9 | 10 | .. math:: 11 | k(r^2) = \left[1 - \frac{r^2}{2\,\alpha} \right]^\alpha 12 | 13 | :param log_alpha: 14 | The Gamma distribution parameter. 15 | 16 | reparams: 17 | alpha: 18 | return exp(log_alpha); 19 | 20 | value: | 21 | return pow(1 + 0.5 * r2 / alpha, -alpha); 22 | 23 | grad: 24 | r2: | 25 | return -0.5 * pow(1 + 0.5 * r2 / alpha, -alpha-1); 26 | log_alpha: | 27 | double t1 = 1.0 + 0.5 * r2 / alpha, 28 | t2 = 2.0 * alpha * t1; 29 | return alpha * pow(t1, -alpha) * (r2 / t2 - log(t1)); 30 | -------------------------------------------------------------------------------- /paper/.gitignore: -------------------------------------------------------------------------------- 1 | *.aux 2 | *.brf 3 | *.log 4 | *.out 5 | *.pdf 6 | *.bbl 7 | *.blg 8 | *.dvi 9 | *.ps 10 | *.synctex.gz 11 | vc.tex 12 | old 13 | -------------------------------------------------------------------------------- /paper/Makefile: -------------------------------------------------------------------------------- 1 | RM = rm -rf 2 | TMP_SUFFS = pdf aux bbl blg log dvi ps eps out 3 | NAME = ms 4 | 5 | all: ${NAME}.pdf 6 | 7 | vc.tex: ../.git/logs/HEAD 8 | echo "%%% This file is generated by the Makefile." > vc.tex 9 | git log -1 --date=short --format="format:\\newcommand{\\githash}{%h}\\newcommand{\\gitdate}{%ad}\\newcommand{\\gitauthor}{%an}" >> vc.tex 10 | 11 | ${NAME}.pdf: ${NAME}.tex *.bib vc.tex 12 | tectonic ${NAME}.tex 13 | 14 | clean: 15 | ${RM} $(foreach suff, ${TMP_SUFFS}, ${NAME}.${suff}) 16 | ${RM} msNotes.bib 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "george" 3 | description = "Blazingly fast Gaussian Processes for regression." 4 | readme = "README.rst" 5 | authors = [{ name = "Daniel Foreman-Mackey", email = "foreman.mackey@gmail.com" }] 6 | requires-python = ">=3.9" 7 | license = { file = "LICENSE" } 8 | urls = { Homepage = "https://github.com/dfm/george" } 9 | dependencies = ["numpy", "scipy"] 10 | dynamic = ["version"] 11 | 12 | [build-system] 13 | requires = ["scikit-build-core", "pybind11"] 14 | build-backend = "scikit_build_core.build" 15 | 16 | [tool.scikit-build] 17 | metadata.version.provider = "scikit_build_core.metadata.setuptools_scm" 18 | sdist.include = [ 19 | "src/george/george_version.py", 20 | "src/george/kernel_interface.cpp", 21 | "src/george/solvers/_hodlr.cpp", 22 | ] 23 | wheel.install-dir = "george" 24 | minimum-version = "0.5" 25 | build-dir = "build/{wheel_tag}" 26 | 27 | [tool.setuptools_scm] 28 | write_to = "src/george/george_version.py" 29 | 30 | [tool.cibuildwheel] 31 | skip = "pp* *-win32 *-musllinux_* *-manylinux_i686" 32 | -------------------------------------------------------------------------------- /scripts/compile_kernels.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | from jinja2 import Template 5 | 6 | 7 | def compile_kernels(fns): 8 | template_dir = "templates" 9 | output_dir = os.path.join("src", "george") 10 | 11 | with open(os.path.join(template_dir, "parser.h")) as f: 12 | PARSER_TEMPLATE = Template(f.read()) 13 | with open(os.path.join(template_dir, "kernels.h")) as f: 14 | CPP_TEMPLATE = Template(f.read()) 15 | with open(os.path.join(template_dir, "kernels.py")) as f: 16 | PYTHON_TEMPLATE = Template(f.read()) 17 | 18 | specs = [] 19 | for i, fn in enumerate(fns): 20 | with open(fn, "r") as f: 21 | spec = yaml.load(f.read(), Loader=yaml.FullLoader) 22 | print("Found kernel '{0}'".format(spec["name"])) 23 | spec["index"] = i 24 | spec["reparams"] = spec.get("reparams", {}) 25 | specs.append(spec) 26 | print("Found {0} kernel specifications".format(len(specs))) 27 | 28 | fn = os.path.join(output_dir, "include", "george", "parser.h") 29 | with open(fn, "w") as f: 30 | print("Saving parser to '{0}'".format(fn)) 31 | f.write(PARSER_TEMPLATE.render(specs=specs)) 32 | fn = os.path.join(output_dir, "include", "george", "kernels.h") 33 | with open(fn, "w") as f: 34 | print("Saving C++ kernels to '{0}'".format(fn)) 35 | f.write(CPP_TEMPLATE.render(specs=specs)) 36 | fn = os.path.join(output_dir, "kernels.py") 37 | with open(fn, "w") as f: 38 | print("Saving Python kernels to '{0}'".format(fn)) 39 | f.write(PYTHON_TEMPLATE.render(specs=specs)) 40 | -------------------------------------------------------------------------------- /src/george/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __all__ = [ 4 | "__version__", 5 | "kernels", 6 | "GP", 7 | "Metric", 8 | "TrivialSolver", 9 | "BasicSolver", 10 | "HODLRSolver", 11 | ] 12 | 13 | from .george_version import version as __version__ 14 | 15 | from . import kernels 16 | from .gp import GP 17 | from .metrics import Metric 18 | from .solvers import TrivialSolver, BasicSolver, HODLRSolver 19 | -------------------------------------------------------------------------------- /src/george/include/george/exceptions.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEORGE_EXCEPTIONS_H_ 2 | #define _GEORGE_EXCEPTIONS_H_ 3 | 4 | #include 5 | 6 | namespace george { 7 | 8 | struct dimension_mismatch : public std::exception { 9 | const char * what () const throw () { 10 | return "dimension mismatch"; 11 | } 12 | }; 13 | 14 | struct not_computed : public std::exception { 15 | const char * what () const throw () { 16 | return "you must call 'compute' first"; 17 | } 18 | }; 19 | 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/george/include/george/george.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEORGE_H_ 2 | #define _GEORGE_H_ 3 | 4 | #include "george/exceptions.h" 5 | #include "george/kernels.h" 6 | #include "george/hodlr.h" 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/george/include/george/hodlr.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEORGE_HODLR_H_ 2 | #define _GEORGE_HODLR_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | namespace george { 11 | namespace hodlr { 12 | 13 | template 14 | class Node { 15 | private: 16 | const Eigen::VectorXd& diag_; 17 | KernelType* kernel_; 18 | Node* parent_; 19 | std::vector*> children_; 20 | int start_, size_, direction_, rank_; 21 | bool is_leaf_; 22 | std::vector U_, V_; 23 | Eigen::FullPivLU lu_; 24 | Eigen::LDLT ldlt_; 25 | double log_det_; 26 | 27 | public: 28 | 29 | Node (const Eigen::VectorXd& diag, 30 | KernelType* kernel, 31 | int start, 32 | int size, 33 | int min_size, 34 | double tol, 35 | std::mt19937& random, 36 | int direction = 0, 37 | Node* parent = NULL) 38 | : diag_(diag) 39 | , kernel_(kernel) 40 | , parent_(parent) 41 | , children_(2) 42 | , start_(start) 43 | , size_(size) 44 | , direction_(direction) 45 | , U_(2) 46 | , V_(2) 47 | { 48 | int half = size_ / 2; 49 | if (half >= min_size) { 50 | is_leaf_ = false; 51 | 52 | // Low-rank approximation 53 | rank_ = low_rank_approx(start+half, size-half, start, half, tol, random, U_[1], V_[0]); 54 | U_[0] = V_[0]; 55 | V_[1] = U_[1]; 56 | 57 | // Build the children 58 | children_[0] = new Node( 59 | diag_, kernel_, start_, half, min_size, tol, random, 0, this); 60 | children_[1] = new Node( 61 | diag_, kernel_, start_+half, size_-half, min_size, tol, random, 1, this); 62 | 63 | } else { 64 | is_leaf_ = true; 65 | } 66 | }; 67 | 68 | ~Node () { 69 | if (!is_leaf_) { 70 | delete children_[0]; 71 | delete children_[1]; 72 | } 73 | }; 74 | 75 | void compute () { 76 | log_det_ = 0.0; 77 | if (!is_leaf_) { 78 | children_[0]->compute(); 79 | children_[1]->compute(); 80 | log_det_ = children_[0]->log_det_ + children_[1]->log_det_; 81 | } 82 | 83 | // Compute a factorize the inner matrix S 84 | factorize(); 85 | 86 | // Compute the determinant 87 | if (is_leaf_) { 88 | Eigen::VectorXd diag = ldlt_.vectorD(); 89 | for (int n = 0; n < diag.rows(); ++n) log_det_ += log(std::abs(diag(n))); 90 | } else { 91 | Eigen::MatrixXd lu = lu_.matrixLU(); 92 | for (int n = 0; n < lu.rows(); ++n) log_det_ += log(std::abs(lu(n, n))); 93 | } 94 | 95 | Node* node = parent_; 96 | int start = start_, ind = direction_; 97 | while (node) { 98 | apply_inverse(node->U_[ind], start); 99 | start = node->start_; 100 | ind = node->direction_; 101 | node = node->parent_; 102 | } 103 | }; 104 | 105 | double log_determinant () const { return log_det_; }; 106 | 107 | template 108 | void solve (Eigen::MatrixBase& x) const { 109 | if (!is_leaf_) { 110 | children_[0]->solve(x); 111 | children_[1]->solve(x); 112 | } 113 | apply_inverse(x, 0); 114 | }; 115 | 116 | Eigen::VectorXd dot_solve (Eigen::MatrixXd& x) const { 117 | Eigen::MatrixXd b = x; 118 | solve(b); 119 | return x.transpose() * b; 120 | }; 121 | 122 | Eigen::MatrixXd get_exact_matrix () const { 123 | Eigen::MatrixXd K(size_, size_); 124 | for (int n = 0; n < size_; ++n) { 125 | K(n, n) = diag_(start_ + n) + kernel_->get_value(start_ + n, start_ + n); 126 | for (int m = n+1; m < size_; ++m) { 127 | double value = kernel_->get_value(start_ + m, start_ + n); 128 | K(m, n) = value; 129 | K(n, m) = value; 130 | } 131 | } 132 | return K; 133 | }; 134 | 135 | private: 136 | int low_rank_approx (int start_row, 137 | int n_rows, 138 | int start_col, 139 | int n_cols, 140 | double tol, 141 | std::mt19937& random, 142 | Eigen::MatrixXd& U_out, 143 | Eigen::MatrixXd& V_out) const 144 | { 145 | 146 | // Allocate all the memory that we'll need. 147 | int max_rank = std::min(n_rows, n_cols); 148 | Eigen::MatrixXd U(n_rows, max_rank), 149 | V(n_cols, max_rank); 150 | 151 | // Setup 152 | int rank = 0; 153 | double norm = 0.0, tol2 = tol * tol; 154 | std::vector index(n_rows); 155 | for (int n = 0; n < n_rows; ++n) index[n] = n; 156 | 157 | while (1) { 158 | int i, j, k; 159 | do { 160 | // If we run out of rows to try, just return the trivial factorization 161 | if (index.empty()) { 162 | U_out.resize(n_rows, max_rank); 163 | V_out.resize(n_cols, max_rank); 164 | if (n_cols <= n_rows) { 165 | V_out.setIdentity(); 166 | for (int m = 0; m < n_cols; ++m) 167 | for (int n = 0; n < n_rows; ++n) 168 | U_out(n, m) = kernel_->get_value(start_row + n, start_col + m); 169 | } else { 170 | U_out.setIdentity(); 171 | for (int n = 0; n < n_rows; ++n) 172 | for (int m = 0; m < n_cols; ++m) 173 | V_out(m, n) = kernel_->get_value(start_row + n, start_col + m); 174 | } 175 | return max_rank; 176 | } 177 | 178 | // Choose a random row 179 | std::uniform_int_distribution uniform_dist(0, index.size()-1); 180 | k = uniform_dist(random); 181 | i = index[k]; 182 | index[k] = index.back(); 183 | index.pop_back(); 184 | 185 | // Compute the residual and choose the pivot 186 | for (int n = 0; n < n_cols; ++n) 187 | V(n, rank) = kernel_->get_value(start_row + i, start_col + n); 188 | V.col(rank) -= U.row(i).head(rank) * V.block(0, 0, n_cols, rank).transpose(); 189 | V.col(rank).cwiseAbs().maxCoeff(&j); 190 | 191 | } while (std::abs(V(j, rank)) < 1e-14); 192 | 193 | // Normalize 194 | V.col(rank) /= V(j, rank); 195 | 196 | // Compute the U factorization 197 | for (int n = 0; n < n_rows; ++n) 198 | U(n, rank) = kernel_->get_value(start_row + n, start_col + j); 199 | U.col(rank) -= V.row(j).head(rank) * U.block(0, 0, n_rows, rank).transpose(); 200 | 201 | // Update the rank 202 | rank++; 203 | if (rank >= max_rank) break; 204 | 205 | // Only update if this is a substantial change 206 | double rowcol_norm = U.col(rank-1).squaredNorm() * V.col(rank-1).squaredNorm(); 207 | if (rowcol_norm < tol2 * norm) break; 208 | 209 | // Update the estimate of the norm 210 | norm += rowcol_norm; 211 | if (rank > 1) { 212 | norm += 2.0 * (U.block(0, 0, n_rows, rank-1).transpose() * U.col(rank-1)).cwiseAbs().maxCoeff(); 213 | norm += 2.0 * (V.block(0, 0, n_cols, rank-1).transpose() * V.col(rank-1)).cwiseAbs().maxCoeff(); 214 | } 215 | } 216 | 217 | U_out = U.block(0, 0, n_rows, rank); 218 | V_out = V.block(0, 0, n_cols, rank); 219 | 220 | return rank; 221 | }; 222 | 223 | void factorize () { 224 | Eigen::MatrixXd S; 225 | if (is_leaf_) { 226 | S = get_exact_matrix(); 227 | ldlt_.compute(S); 228 | } else { 229 | S.resize(2*rank_, 2*rank_); 230 | S.setIdentity(); 231 | S.block(0, rank_, rank_, rank_) = V_[1].transpose() * U_[1]; 232 | S.block(rank_, 0, rank_, rank_) = V_[0].transpose() * U_[0]; 233 | lu_.compute(S); 234 | } 235 | }; 236 | 237 | template 238 | void apply_inverse (Eigen::MatrixBase& x, int start) const { 239 | int nrhs = x.cols(); 240 | start = start_ - start; 241 | if (is_leaf_) { 242 | x.block(start, 0, size_, nrhs) = ldlt_.solve(x.block(start, 0, size_, nrhs)); 243 | return; 244 | } 245 | 246 | int s1 = size_ / 2, s2 = size_ - s1; 247 | Eigen::MatrixXd temp(2*rank_, nrhs); 248 | temp.block(0, 0, rank_, nrhs) = V_[1].transpose() * x.block(start+s1, 0, s2, nrhs); 249 | temp.block(rank_, 0, rank_, nrhs) = V_[0].transpose() * x.block(start, 0, s1, nrhs); 250 | temp = lu_.solve(temp); 251 | 252 | x.block(start, 0, s1, nrhs) -= U_[0] * temp.block(0, 0, rank_, nrhs); 253 | x.block(start+s1, 0, s2, nrhs) -= U_[1] * temp.block(rank_, 0, rank_, nrhs); 254 | }; 255 | 256 | }; 257 | 258 | } // namespace hodlr 259 | } // namespace george 260 | 261 | #endif 262 | -------------------------------------------------------------------------------- /src/george/include/george/metrics.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEORGE_METRICS_H_ 2 | #define _GEORGE_METRICS_H_ 3 | 4 | #include 5 | #include 6 | #include "george/subspace.h" 7 | 8 | #include 9 | 10 | namespace george { 11 | namespace metrics { 12 | 13 | // 14 | // This is an abstract metric base class. The subclasses have all the good 15 | // stuff. 16 | // 17 | class Metric { 18 | public: 19 | Metric (size_t ndim, size_t naxes, size_t size) 20 | : updated_(true) 21 | , vector_(size) 22 | , subspace_(ndim, naxes) {}; 23 | virtual ~Metric () {}; 24 | 25 | // Return the distance between two vectors. 26 | virtual double value (const double* x1, const double* x2) { 27 | return 0.0; 28 | }; 29 | 30 | // Return the gradient of `value` with respect to the parameter vector. 31 | virtual double gradient (const double* x1, const double* x2, double* grad) { 32 | return 0.0; 33 | }; 34 | 35 | virtual void x1_gradient (const double* x1, const double* x2, double* grad) {}; 36 | 37 | virtual void x2_gradient (const double* x1, const double* x2, double* grad) { 38 | this->x1_gradient(x1, x2, grad); 39 | //for (size_t i = 0; i < this->subspace_.get_ndim(); ++i) { 40 | // grad[i] *= -1.0; 41 | //} 42 | }; 43 | 44 | // Parameter vector specification. 45 | size_t size () const { return this->vector_.size(); }; 46 | virtual void set_parameter (size_t i, double value) { 47 | this->updated_ = true; 48 | this->vector_[i] = exp(-value); 49 | }; 50 | virtual double get_parameter (size_t i) const { 51 | return -log(this->vector_[i]); 52 | }; 53 | 54 | // Axes specification. 55 | void set_axis (size_t i, size_t value) { 56 | this->subspace_.set_axis(i, value); 57 | }; 58 | size_t get_axis (size_t i) const { 59 | return this->subspace_.get_axis(i); 60 | }; 61 | size_t get_ndim () const { 62 | return this->subspace_.get_ndim(); 63 | }; 64 | 65 | protected: 66 | bool updated_; 67 | std::vector vector_; 68 | george::subspace::Subspace subspace_; 69 | }; 70 | 71 | class IsotropicMetric : public Metric { 72 | public: 73 | 74 | IsotropicMetric (size_t ndim, size_t naxes) 75 | : Metric(ndim, naxes, 1) {}; 76 | double value (const double* x1, const double* x2) { 77 | size_t i, j; 78 | double d, r2 = 0.0; 79 | for (i = 0; i < this->subspace_.get_naxes(); ++i) { 80 | j = this->subspace_.get_axis(i); 81 | d = x1[j] - x2[j]; 82 | r2 += d*d; 83 | } 84 | return r2 * this->vector_[0]; 85 | }; 86 | 87 | double gradient (const double* x1, const double* x2, double* grad) { 88 | double r2 = this->value(x1, x2); 89 | grad[0] = -r2; 90 | return r2; 91 | }; 92 | 93 | void x1_gradient (const double* x1, const double* x2, double* grad) { 94 | size_t i, j; 95 | for (i = 0; i < this->subspace_.get_naxes(); ++i) { 96 | j = this->subspace_.get_axis(i); 97 | grad[j] = this->vector_[0] * (x1[j] - x2[j]); 98 | } 99 | }; 100 | }; 101 | 102 | class AxisAlignedMetric : public Metric { 103 | public: 104 | 105 | AxisAlignedMetric (size_t ndim, size_t naxes) 106 | : Metric(ndim, naxes, naxes) {}; 107 | 108 | double value (const double* x1, const double* x2) { 109 | size_t i, j; 110 | double d, r2 = 0.0; 111 | for (i = 0; i < this->subspace_.get_naxes(); ++i) { 112 | j = this->subspace_.get_axis(i); 113 | d = x1[j] - x2[j]; 114 | r2 += d * d * this->vector_[i]; 115 | } 116 | return r2; 117 | }; 118 | 119 | double gradient (const double* x1, const double* x2, double* grad) { 120 | size_t i, j; 121 | double d, r2 = 0.0; 122 | for (i = 0; i < this->subspace_.get_naxes(); ++i) { 123 | j = this->subspace_.get_axis(i); 124 | d = x1[j] - x2[j]; 125 | d = d * d * this->vector_[i]; 126 | r2 += d; 127 | grad[i] = -d; 128 | } 129 | return r2; 130 | }; 131 | 132 | void x1_gradient (const double* x1, const double* x2, double* grad) { 133 | size_t i, j; 134 | for (i = 0; i < this->subspace_.get_naxes(); ++i) { 135 | j = this->subspace_.get_axis(i); 136 | grad[j] = this->vector_[i] * (x1[j] - x2[j]); 137 | } 138 | }; 139 | }; 140 | 141 | // 142 | // Warning: Herein lie custom Cholesky functions. Use at your own risk! 143 | // 144 | inline void _custom_forward_sub (size_t n, double* L, double* b) { 145 | size_t i, j, k; 146 | for (i = 0, k = 0; i < n; ++i) { 147 | for (j = 0; j < i; ++j, ++k) 148 | b[i] -= L[k] * b[j]; 149 | b[i] *= L[k++]; // The inverse has already been taken along the diagonal. 150 | } 151 | } 152 | 153 | inline void _custom_backward_sub (size_t n, double* L, double* b) { 154 | long long i, j; 155 | size_t k, k0 = (n + 1) * n / 2; 156 | for (i = n - 1; i >= 0; --i) { 157 | k = k0 - n + i; 158 | for (j = n-1; j > i; --j) { 159 | b[i] -= L[k] * b[j]; 160 | k -= j; 161 | } 162 | b[i] *= L[k]; // The inverse has already been taken along the diagonal. 163 | } 164 | } 165 | 166 | class GeneralMetric : public Metric { 167 | public: 168 | GeneralMetric (size_t ndim, size_t naxes) 169 | : Metric(ndim, naxes, naxes*(naxes+1)/2) {}; 170 | 171 | void set_parameter (size_t i, double value) { 172 | size_t j, d; 173 | this->updated_ = true; 174 | for (j = 0, d = 2; j <= i; j += d, ++d) { 175 | if (i == j) { 176 | this->vector_[i] = exp(-value); 177 | return; 178 | } 179 | } 180 | this->vector_[i] = value; 181 | }; 182 | double get_parameter (size_t i) const { 183 | size_t j, d; 184 | for (j = 0, d = 2; j <= i; j += d, ++d) 185 | if (i == j) 186 | return -log(this->vector_[i]); 187 | return this->vector_[i]; 188 | }; 189 | 190 | double value (const double* x1, const double* x2) { 191 | size_t i, j, n = this->subspace_.get_naxes(); 192 | double r2; 193 | std::vector r(n); 194 | for (i = 0; i < n; ++i) { 195 | j = this->subspace_.get_axis(i); 196 | r[i] = x1[j] - x2[j]; 197 | } 198 | 199 | _custom_forward_sub(n, &(this->vector_[0]), &(r[0])); 200 | 201 | r2 = 0.0; 202 | for (i = 0; i < n; ++i) r2 += r[i] * r[i]; 203 | return r2; 204 | }; 205 | 206 | // For a matrix A, dA^-1/dt = -A^-1 dA/dt A^-1. In this case, we want 207 | // d(r A^-1 r)/dA = -(A^-1 r)^T (A^-1 r). 208 | double gradient (const double* x1, const double* x2, double* grad) { 209 | size_t i, j, k, n = this->subspace_.get_naxes(); 210 | double r2; 211 | std::vector r(n), Lir(n); 212 | for (i = 0; i < n; ++i) { 213 | j = this->subspace_.get_axis(i); 214 | r[i] = x1[j] - x2[j]; 215 | } 216 | 217 | // Compute L^{-1} r and save it. 218 | _custom_forward_sub(n, &(this->vector_[0]), &(r[0])); 219 | for (i = 0; i < n; ++i) Lir[i] = r[i]; 220 | 221 | // Compute K^{-1} r. 222 | _custom_backward_sub(n, &(this->vector_[0]), &(r[0])); 223 | 224 | // Compute the gradient. 225 | for (i = 0, k = 0; i < n; ++i) { 226 | grad[k] = -2 * r[i] * Lir[i] * exp(this->vector_[k]); 227 | k++; 228 | for (j = i+1; j < n; ++j) 229 | grad[k++] = -2 * r[j] * Lir[i]; 230 | } 231 | 232 | // Compute the distance. 233 | r2 = 0.0; 234 | for (i = 0; i < n; ++i) r2 += Lir[i] * Lir[i]; 235 | return r2; 236 | }; 237 | 238 | void x1_gradient (const double* x1, const double* x2, double* grad) { 239 | size_t i, j, n = this->subspace_.get_naxes(); 240 | std::vector r(n); 241 | for (i = 0; i < n; ++i) { 242 | j = this->subspace_.get_axis(i); 243 | r[i] = x1[j] - x2[j]; 244 | } 245 | 246 | _custom_forward_sub(n, &(this->vector_[0]), &(r[0])); 247 | 248 | for (i = 0; i < n; ++i) { 249 | j = this->subspace_.get_axis(i); 250 | grad[j] = r[i]; 251 | } 252 | }; 253 | }; 254 | 255 | }; // namespace metrics 256 | }; // namespace george 257 | 258 | #endif 259 | -------------------------------------------------------------------------------- /src/george/include/george/subspace.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEORGE_SUBSPACE_H_ 2 | #define _GEORGE_SUBSPACE_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace george { 8 | namespace subspace { 9 | 10 | class Subspace { 11 | public: 12 | Subspace (size_t ndim, size_t naxes) 13 | : ndim_(ndim) 14 | , naxes_(naxes) 15 | , axes_(naxes) {}; 16 | 17 | size_t get_ndim () const { return ndim_; }; 18 | size_t get_naxes () const { return naxes_; }; 19 | size_t get_axis (size_t i) const { return axes_[i]; }; 20 | void set_axis (size_t i, size_t value) { axes_[i] = value; }; 21 | 22 | private: 23 | size_t ndim_, naxes_; 24 | std::vector axes_; 25 | }; 26 | 27 | }; // namespace subspace 28 | }; // namespace george 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/george/kernel_interface.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "george/parser.h" 5 | #include "george/kernels.h" 6 | #include "george/exceptions.h" 7 | 8 | namespace py = pybind11; 9 | 10 | class KernelInterface { 11 | public: 12 | KernelInterface (py::object kernel_spec) : kernel_spec_(kernel_spec) { 13 | kernel_ = george::parse_kernel_spec(kernel_spec_); 14 | }; 15 | ~KernelInterface () { 16 | delete kernel_; 17 | }; 18 | size_t ndim () const { return kernel_->get_ndim(); }; 19 | size_t size () const { return kernel_->size(); }; 20 | double value (const double* x1, const double* x2) const { return kernel_->value(x1, x2); }; 21 | void gradient (const double* x1, const double* x2, const unsigned* which, double* grad) const { 22 | return kernel_->gradient(x1, x2, which, grad); 23 | }; 24 | void x1_gradient (const double* x1, const double* x2, double* grad) const { 25 | return kernel_->x1_gradient(x1, x2, grad); 26 | }; 27 | void x2_gradient (const double* x1, const double* x2, double* grad) const { 28 | return kernel_->x2_gradient(x1, x2, grad); 29 | }; 30 | py::object kernel_spec () const { return kernel_spec_; }; 31 | 32 | private: 33 | py::object kernel_spec_; 34 | george::kernels::Kernel* kernel_; 35 | }; 36 | 37 | 38 | PYBIND11_MODULE(kernel_interface, m) { 39 | 40 | m.doc() = R"delim( 41 | Docs... 42 | )delim"; 43 | 44 | py::class_ interface(m, "KernelInterface"); 45 | interface.def(py::init()); 46 | 47 | interface.def("value_general", [](KernelInterface& self, py::array_t x1, py::array_t x2) { 48 | auto x1p = x1.unchecked<2>(); 49 | auto x2p = x2.unchecked<2>(); 50 | size_t n1 = x1p.shape(0), n2 = x2p.shape(0); 51 | if (x1p.shape(1) != py::ssize_t(self.ndim()) || x2p.shape(1) != py::ssize_t(self.ndim())) throw george::dimension_mismatch(); 52 | py::array_t result({n1, n2}); 53 | auto resultp = result.mutable_unchecked<2>(); 54 | for (size_t i = 0; i < n1; ++i) { 55 | for (size_t j = 0; j < n2; ++j) { 56 | resultp(i, j) = self.value(&(x1p(i, 0)), &(x2p(j, 0))); 57 | } 58 | } 59 | return result; 60 | }); 61 | 62 | interface.def("value_symmetric", [](KernelInterface& self, py::array_t x) { 63 | auto xp = x.unchecked<2>(); 64 | size_t n = xp.shape(0); 65 | if (xp.shape(1) != py::ssize_t(self.ndim())) throw george::dimension_mismatch(); 66 | py::array_t result({n, n}); 67 | auto resultp = result.mutable_unchecked<2>(); 68 | for (size_t i = 0; i < n; ++i) { 69 | resultp(i, i) = self.value(&(xp(i, 0)), &(xp(i, 0))); 70 | for (size_t j = i+1; j < n; ++j) { 71 | double value = self.value(&(xp(i, 0)), &(xp(j, 0))); 72 | resultp(i, j) = value; 73 | resultp(j, i) = value; 74 | } 75 | } 76 | return result; 77 | }); 78 | 79 | interface.def("value_diagonal", [](KernelInterface& self, py::array_t x1, py::array_t x2) { 80 | auto x1p = x1.unchecked<2>(); 81 | auto x2p = x2.unchecked<2>(); 82 | size_t n = x1p.shape(0); 83 | if (py::ssize_t(n) != x2p.shape(0) || x1p.shape(1) != py::ssize_t(self.ndim()) || x2p.shape(1) != py::ssize_t(self.ndim())) throw george::dimension_mismatch(); 84 | py::array_t result(n); 85 | auto resultp = result.mutable_unchecked<1>(); 86 | for (size_t i = 0; i < n; ++i) { 87 | resultp(i) = self.value(&(x1p(i, 0)), &(x2p(i, 0))); 88 | } 89 | return result; 90 | }); 91 | 92 | interface.def("gradient_general", [](KernelInterface& self, py::array_t which, py::array_t x1, py::array_t x2) { 93 | auto x1p = x1.unchecked<2>(); 94 | auto x2p = x2.unchecked<2>(); 95 | size_t n1 = x1p.shape(0), n2 = x2p.shape(0), size = self.size(); 96 | if (x1p.shape(1) != py::ssize_t(self.ndim()) || x2p.shape(1) != py::ssize_t(self.ndim())) throw george::dimension_mismatch(); 97 | py::array_t result({n1, n2, size}); 98 | auto resultp = result.mutable_unchecked<3>(); 99 | auto w = which.unchecked<1>(); 100 | unsigned* wp = (unsigned*)&(w(0)); 101 | for (size_t i = 0; i < n1; ++i) { 102 | for (size_t j = 0; j < n2; ++j) { 103 | self.gradient(&(x1p(i, 0)), &(x2p(j, 0)), wp, &(resultp(i, j, 0))); 104 | } 105 | } 106 | return result; 107 | }); 108 | 109 | interface.def("gradient_symmetric", [](KernelInterface& self, py::array_t which, py::array_t x) { 110 | auto xp = x.unchecked<2>(); 111 | size_t n = xp.shape(0), size = self.size(); 112 | if (xp.shape(1) != py::ssize_t(self.ndim())) throw george::dimension_mismatch(); 113 | py::array_t result({n, n, size}); 114 | auto resultp = result.mutable_unchecked<3>(); 115 | auto w = which.unchecked<1>(); 116 | unsigned* wp = (unsigned*)&(w(0)); 117 | for (size_t i = 0; i < n; ++i) { 118 | self.gradient(&(xp(i, 0)), &(xp(i, 0)), wp, &(resultp(i, i, 0))); 119 | for (size_t j = i+1; j < n; ++j) { 120 | self.gradient(&(xp(i, 0)), &(xp(j, 0)), wp, &(resultp(i, j, 0))); 121 | for (size_t k = 0; k < size; ++k) resultp(j, i, k) = resultp(i, j, k); 122 | } 123 | } 124 | return result; 125 | }); 126 | 127 | interface.def("x1_gradient_general", [](KernelInterface& self, py::array_t x1, py::array_t x2) { 128 | auto x1p = x1.unchecked<2>(); 129 | auto x2p = x2.unchecked<2>(); 130 | size_t n1 = x1p.shape(0), n2 = x2p.shape(0), ndim = self.ndim(); 131 | if (x1p.shape(1) != py::ssize_t(ndim) || x2p.shape(1) != py::ssize_t(ndim)) throw george::dimension_mismatch(); 132 | py::array_t result({n1, n2, ndim}); 133 | auto resultp = result.mutable_unchecked<3>(); 134 | for (size_t i = 0; i < n1; ++i) { 135 | for (size_t j = 0; j < n2; ++j) { 136 | for (size_t k = 0; k < ndim; ++k) resultp(i, j, k) = 0.0; 137 | self.x1_gradient(&(x1p(i, 0)), &(x2p(j, 0)), &(resultp(i, j, 0))); 138 | } 139 | } 140 | return result; 141 | }); 142 | 143 | interface.def("x2_gradient_general", [](KernelInterface& self, py::array_t x1, py::array_t x2) { 144 | auto x1p = x1.unchecked<2>(); 145 | auto x2p = x2.unchecked<2>(); 146 | size_t n1 = x1p.shape(0), n2 = x2p.shape(0), ndim = self.ndim(); 147 | if (x1p.shape(1) != py::ssize_t(ndim) || x2p.shape(1) != py::ssize_t(ndim)) throw george::dimension_mismatch(); 148 | py::array_t result({n1, n2, ndim}); 149 | auto resultp = result.mutable_unchecked<3>(); 150 | for (size_t i = 0; i < n1; ++i) { 151 | for (size_t j = 0; j < n2; ++j) { 152 | for (size_t k = 0; k < ndim; ++k) resultp(i, j, k) = 0.0; 153 | self.x2_gradient(&(x1p(i, 0)), &(x2p(j, 0)), &(resultp(i, j, 0))); 154 | } 155 | } 156 | return result; 157 | }); 158 | 159 | interface.def(py::pickle( 160 | [](const KernelInterface& self) { 161 | return py::make_tuple(self.kernel_spec()); 162 | }, 163 | [](py::tuple t) { 164 | if (t.size() != 1) throw std::runtime_error("Invalid state!"); 165 | return new KernelInterface(t[0]); 166 | } 167 | )); 168 | 169 | 170 | //interface.def("__getstate__", [](const KernelInterface& self) { 171 | // return std::make_tuple(self.kernel_spec()); 172 | //}); 173 | 174 | //interface.def("__setstate__", [](KernelInterface& self, py::tuple t) { 175 | // if (t.size() != 1) throw std::runtime_error("Invalid state!"); 176 | // new (&self) KernelInterface(t[0]); 177 | //}); 178 | } 179 | -------------------------------------------------------------------------------- /src/george/metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = ["Metric", "Subspace"] 6 | 7 | import numpy as np 8 | from scipy.linalg import cho_factor 9 | 10 | from .modeling import Model 11 | 12 | 13 | class Subspace(object): 14 | 15 | def __init__(self, ndim, axes=None): 16 | self.ndim = int(ndim) 17 | if axes is None: 18 | axes = np.arange(self.ndim) 19 | self.axes = np.atleast_1d(axes).astype(int) 20 | if np.any(self.axes >= self.ndim): 21 | raise ValueError("invalid axis for {0} dimensional metric" 22 | .format(self.ndim)) 23 | 24 | 25 | class Metric(Model): 26 | 27 | def __init__(self, 28 | metric, 29 | bounds=None, 30 | ndim=None, 31 | axes=None, 32 | lower=True): 33 | if isinstance(metric, Metric): 34 | self.metric_type = metric.metric_type 35 | self.parameter_names = metric.parameter_names 36 | self.unfrozen_mask = metric.unfrozen_mask 37 | self.set_parameter_vector( 38 | metric.get_parameter_vector(include_frozen=True), 39 | include_frozen=True) 40 | self.parameter_bounds = metric.parameter_bounds 41 | self.ndim = metric.ndim 42 | self.axes = metric.axes 43 | return 44 | 45 | if ndim is None: 46 | raise ValueError("missing required parameter 'ndim'") 47 | 48 | # Conform with the modeling protocol. 49 | parameter_names = [] 50 | parameters = [] 51 | 52 | # Save the number of dimensions. 53 | subspace = Subspace(ndim, axes=axes) 54 | self.ndim = subspace.ndim 55 | self.axes = subspace.axes 56 | 57 | # See if the parameter is a scalar. 58 | try: 59 | metric = float(metric) 60 | 61 | except TypeError: 62 | metric = np.atleast_1d(metric) 63 | 64 | # If the metric is a vector, it is meant to be axis aligned. 65 | if len(metric.shape) == 1: 66 | self.metric_type = 1 67 | if len(metric) != len(self.axes): 68 | raise ValueError("dimension mismatch") 69 | if np.any(metric <= 0.0): 70 | raise ValueError("invalid (negative) metric") 71 | for i, v in enumerate(metric): 72 | parameter_names.append("log_M_{0}_{0}".format(i)) 73 | parameters.append(np.log(v)) 74 | elif len(metric.shape) == 2: 75 | self.metric_type = 2 76 | if metric.shape[0] != metric.shape[1]: 77 | raise ValueError("metric must be square") 78 | if len(metric) != len(self.axes): 79 | raise ValueError("dimension mismatch") 80 | 81 | # Compute the Cholesky factorization and log the diagonal. 82 | params = cho_factor(metric, lower=True)[0] 83 | i = np.diag_indices_from(params) 84 | params[i] = np.log(params[i]) 85 | params = params[np.tril_indices_from(params)] 86 | 87 | # Save the parameter vector. 88 | k = 0 89 | for i in range(len(self.axes)): 90 | parameter_names.append("log_L_{0}_{0}".format(i)) 91 | parameters.append(params[k]) 92 | k += 1 93 | for j in range(i+1, len(self.axes)): 94 | parameter_names.append("L_{0}_{1}".format(i, j)) 95 | parameters.append(params[k]) 96 | k += 1 97 | else: 98 | raise ValueError("invalid metric dimensions") 99 | 100 | else: 101 | self.metric_type = 0 102 | parameter_names.append("log_M_0_0") 103 | parameters.append(np.log(metric)) 104 | 105 | self.parameter_names = tuple(parameter_names) 106 | kwargs = dict(zip(parameter_names, parameters)) 107 | if bounds is not None: 108 | kwargs["bounds"] = bounds 109 | super(Metric, self).__init__(**kwargs) 110 | 111 | def to_matrix(self): 112 | vector = self.get_parameter_vector(include_frozen=True) 113 | if self.metric_type == 0: 114 | return np.exp(vector) * np.eye(len(self.axes)) 115 | elif self.metric_type == 1: 116 | return np.diag(np.exp(vector)) 117 | else: 118 | n = len(self.axes) 119 | L = np.zeros((n, n)) 120 | L[np.tril_indices_from(L)] = vector 121 | i = np.diag_indices_from(L) 122 | L[i] = np.exp(L[i]) 123 | return np.dot(L, L.T) 124 | 125 | def __repr__(self): 126 | vector = self.get_parameter_vector(include_frozen=True) 127 | if self.metric_type == 0: 128 | params = ["{0}".format(float(np.exp(vector)))] 129 | elif self.metric_type == 1: 130 | params = ["{0}".format(repr(np.exp(vector)))] 131 | elif self.metric_type == 2: 132 | params = ["{0}".format(repr(self.to_matrix().tolist()))] 133 | params += ["ndim={0}".format(self.ndim), 134 | "axes={0}".format(repr(self.axes))] 135 | params += ["bounds={0}".format([ 136 | (None if a is None else np.exp(a), 137 | None if b is None else np.exp(b)) 138 | for a, b in self.get_parameter_bounds(include_frozen=True) 139 | ])] 140 | return "Metric({0})".format(", ".join(params)) 141 | -------------------------------------------------------------------------------- /src/george/solvers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __all__ = ["TrivialSolver", "BasicSolver", "HODLRSolver"] 4 | 5 | from .trivial import TrivialSolver 6 | from .basic import BasicSolver 7 | from .hodlr import HODLRSolver 8 | -------------------------------------------------------------------------------- /src/george/solvers/_hodlr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "george/hodlr.h" 6 | #include "george/kernels.h" 7 | #include "george/parser.h" 8 | #include "george/exceptions.h" 9 | 10 | namespace py = pybind11; 11 | 12 | using RowMatrixXd = Eigen::Matrix; 13 | 14 | class SolverMatrix { 15 | public: 16 | SolverMatrix (george::kernels::Kernel* kernel) 17 | : kernel_(kernel) {}; 18 | void set_input_coordinates (RowMatrixXd x) { 19 | if (size_t(x.cols()) != kernel_->get_ndim()) { 20 | throw george::dimension_mismatch(); 21 | } 22 | t_ = x; 23 | }; 24 | double get_value (const int i, const int j) { 25 | if (i < 0 || i >= t_.rows() || j < 0 || j >= t_.rows()) { 26 | throw std::out_of_range("attempting to index outside of the dimension of the input coordinates"); 27 | } 28 | return kernel_->value(t_.row(i).data(), t_.row(j).data()); 29 | }; 30 | 31 | private: 32 | george::kernels::Kernel* kernel_; 33 | RowMatrixXd t_; 34 | }; 35 | 36 | class Solver { 37 | public: 38 | 39 | Solver () { 40 | solver_ = NULL; 41 | kernel_ = NULL; 42 | matrix_ = NULL; 43 | computed_ = 0; 44 | }; 45 | ~Solver () { 46 | if (solver_ != NULL) delete solver_; 47 | if (matrix_ != NULL) delete matrix_; 48 | if (kernel_ != NULL) delete kernel_; 49 | }; 50 | 51 | int get_status () const { return 0; }; 52 | int get_computed () const { return computed_; }; 53 | double log_determinant () const { return log_det_; }; 54 | 55 | int compute ( 56 | const py::object& kernel_spec, 57 | const py::array_t& x, 58 | const py::array_t& yerr, 59 | int min_size = 100, double tol = 0.1, int seed = 0 60 | ) { 61 | computed_ = 0; 62 | kernel_ = george::parse_kernel_spec(kernel_spec); 63 | matrix_ = new SolverMatrix(kernel_); 64 | 65 | // Random number generator for reproducibility 66 | std::random_device r; 67 | std::mt19937 random(r()); 68 | random.seed(seed); 69 | 70 | // Extract the data from the numpy arrays 71 | py::detail::unchecked_reference x_p = x.unchecked<2>(); 72 | py::detail::unchecked_reference yerr_p = yerr.unchecked<1>(); 73 | size_t n = x_p.shape(0), ndim = x_p.shape(1); 74 | RowMatrixXd X(n, ndim); 75 | Eigen::VectorXd diag(n); 76 | for (size_t i = 0; i < n; ++i) { 77 | diag(i) = yerr_p(i) * yerr_p(i); 78 | for (size_t j = 0; j < ndim; ++j) X(i, j) = x_p(i, j); 79 | } 80 | 81 | matrix_->set_input_coordinates(X); 82 | 83 | // Set up the solver object. 84 | if (solver_ != NULL) delete solver_; 85 | solver_ = new george::hodlr::Node ( 86 | diag, matrix_, 0, n, min_size, tol, random); 87 | solver_->compute(); 88 | log_det_ = solver_->log_determinant(); 89 | 90 | // Update the bookkeeping flags. 91 | computed_ = 1; 92 | size_ = n; 93 | return 0; 94 | }; 95 | 96 | template 97 | void apply_inverse (Eigen::MatrixBase& x) { 98 | if (!computed_) throw george::not_computed(); 99 | solver_->solve(x); 100 | }; 101 | 102 | int size () const { return size_; }; 103 | 104 | private: 105 | double log_det_; 106 | int size_; 107 | int computed_; 108 | 109 | george::kernels::Kernel* kernel_; 110 | SolverMatrix* matrix_; 111 | george::hodlr::Node* solver_; 112 | }; 113 | 114 | 115 | PYBIND11_MODULE(_hodlr, m) { 116 | py::class_ solver(m, "HODLRSolver", R"delim( 117 | A solver using `Sivaram Amambikasaran's HODLR algorithm 118 | `_ to approximately solve the GP linear 119 | algebra in :math:`\mathcal{O}(N\,\log^2 N)`. 120 | 121 | )delim"); 122 | solver.def(py::init()); 123 | solver.def_property_readonly("computed", &Solver::get_computed); 124 | solver.def_property_readonly("log_determinant", &Solver::log_determinant); 125 | solver.def("compute", &Solver::compute, R"delim( 126 | Compute and factorize the covariance matrix. 127 | 128 | Args: 129 | kernel (george.kernels.Kernel): A subclass of :class:`Kernel` specifying 130 | the kernel function. 131 | x (ndarray[nsamples, ndim]): The independent coordinates of the data 132 | points. 133 | yerr (ndarray[nsamples]): The Gaussian uncertainties on the data points at 134 | coordinates ``x``. These values will be added in quadrature to the 135 | diagonal of the covariance matrix. 136 | min_size (Optional[int]): The block size where the solver switches to a 137 | general direct factorization algorithm. This can be tuned for platform 138 | and problem specific performance and accuracy. As a general rule, 139 | larger values will be more accurate and slower, but there is some 140 | overhead for very small values, so we recommend choosing values in the 141 | hundreds. (default: ``100``) 142 | tol (Optional[float]): The precision tolerance for the low-rank 143 | approximation. This value is used as an approximate limit on the 144 | Frobenius norm between the low-rank approximation and the true matrix 145 | when reconstructing the off-diagonal blocks. Smaller values of ``tol`` 146 | will generally give more accurate results with higher computational 147 | cost. (default: ``0.1``) 148 | seed (Optional[int]): The low-rank approximation method within the HODLR 149 | algorithm is not deterministic and, without a fixed seed, the method 150 | can give different results for the same matrix. Therefore, we require 151 | that the user provide a seed for the random number generator. 152 | (default: ``42``, obviously) 153 | )delim", 154 | py::arg("kernel_spec"), py::arg("x"), py::arg("yerr"), py::arg("min_size") = 100, py::arg("tol") = 0.1, py::arg("seed") = 42 155 | ); 156 | solver.def("apply_inverse", [](Solver& self, Eigen::MatrixXd& x, bool in_place = false){ 157 | if (in_place) { 158 | self.apply_inverse(x); 159 | return x; 160 | } 161 | Eigen::MatrixXd alpha = x; 162 | self.apply_inverse(alpha); 163 | return alpha; 164 | }, py::arg("x"), py::arg("in_place") = false, R"delim( 165 | Apply the inverse of the covariance matrix to the input by solving 166 | 167 | .. math:: 168 | 169 | K\,x = y 170 | 171 | Args: 172 | y (ndarray[nsamples] or ndadrray[nsamples, nrhs]): The vector or matrix 173 | :math:`y`. 174 | in_place (Optional[bool]): Should the data in ``y`` be overwritten with 175 | the result :math:`x`? (default: ``False``) 176 | )delim"); 177 | 178 | solver.def("dot_solve", [](Solver& self, const Eigen::VectorXd& x){ 179 | Eigen::VectorXd alpha = x; 180 | self.apply_inverse(alpha); 181 | return double(x.transpose() * alpha); 182 | }, R"delim( 183 | Compute the inner product of a vector with the inverse of the covariance 184 | matrix applied to itself: 185 | 186 | .. math:: 187 | 188 | y\,K^{-1}\,y 189 | 190 | Args: 191 | y (ndarray[nsamples]): The vector :math:`y`. 192 | )delim"); 193 | 194 | solver.def("get_inverse", [](Solver& self){ 195 | int n = self.size(); 196 | Eigen::MatrixXd eye(n, n); 197 | eye.setIdentity(); 198 | self.apply_inverse(eye); 199 | return eye; 200 | }, R"delim( 201 | Get the dense inverse covariance matrix. This is used for computing gradients, 202 | but it is not recommended in general. 203 | )delim"); 204 | } 205 | -------------------------------------------------------------------------------- /src/george/solvers/basic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = ["BasicSolver"] 6 | 7 | import numpy as np 8 | from scipy.linalg import cholesky, cho_solve 9 | 10 | 11 | class BasicSolver(object): 12 | """ 13 | This is the most basic solver built using :func:`scipy.linalg.cholesky`. 14 | 15 | kernel (george.kernels.Kernel): A subclass of :class:`Kernel` specifying 16 | the kernel function. 17 | 18 | """ 19 | 20 | def __init__(self, kernel): 21 | self.kernel = kernel 22 | self._computed = False 23 | self._log_det = None 24 | 25 | @property 26 | def computed(self): 27 | """ 28 | A flag indicating whether or not the covariance matrix was computed 29 | and factorized (using the :func:`compute` method). 30 | 31 | """ 32 | return self._computed 33 | 34 | @computed.setter 35 | def computed(self, v): 36 | self._computed = v 37 | 38 | @property 39 | def log_determinant(self): 40 | """ 41 | The log-determinant of the covariance matrix. This will only be 42 | non-``None`` after calling the :func:`compute` method. 43 | 44 | """ 45 | return self._log_det 46 | 47 | @log_determinant.setter 48 | def log_determinant(self, v): 49 | self._log_det = v 50 | 51 | def compute(self, x, yerr): 52 | """ 53 | Compute and factorize the covariance matrix. 54 | 55 | Args: 56 | x (ndarray[nsamples, ndim]): The independent coordinates of the 57 | data points. 58 | yerr (ndarray[nsamples] or float): The Gaussian uncertainties on 59 | the data points at coordinates ``x``. These values will be 60 | added in quadrature to the diagonal of the covariance matrix. 61 | 62 | """ 63 | # Compute the kernel matrix. 64 | K = self.kernel.get_value(x) 65 | K[np.diag_indices_from(K)] += yerr ** 2 66 | 67 | # Factor the matrix and compute the log-determinant. 68 | self._factor = (cholesky(K, overwrite_a=True, lower=False), False) 69 | self.log_determinant = 2 * np.sum(np.log(np.diag(self._factor[0]))) 70 | self.computed = True 71 | 72 | def apply_inverse(self, y, in_place=False): 73 | r""" 74 | Apply the inverse of the covariance matrix to the input by solving 75 | 76 | .. math:: 77 | 78 | K\,x = y 79 | 80 | Args: 81 | y (ndarray[nsamples] or ndadrray[nsamples, nrhs]): The vector or 82 | matrix :math:`y`. 83 | in_place (Optional[bool]): Should the data in ``y`` be overwritten 84 | with the result :math:`x`? (default: ``False``) 85 | 86 | """ 87 | return cho_solve(self._factor, y, overwrite_b=in_place) 88 | 89 | def dot_solve(self, y): 90 | r""" 91 | Compute the inner product of a vector with the inverse of the 92 | covariance matrix applied to itself: 93 | 94 | .. math:: 95 | 96 | y\,K^{-1}\,y 97 | 98 | Args: 99 | y (ndarray[nsamples]): The vector :math:`y`. 100 | 101 | """ 102 | return np.dot(y.T, cho_solve(self._factor, y)) 103 | 104 | def apply_sqrt(self, r): 105 | """ 106 | Apply the Cholesky square root of the covariance matrix to the input 107 | vector or matrix. 108 | 109 | Args: 110 | r (ndarray[nsamples] or ndarray[nsamples, nrhs]: The input vector 111 | or matrix. 112 | 113 | """ 114 | return np.dot(r, self._factor[0]) 115 | 116 | def get_inverse(self): 117 | """ 118 | Get the dense inverse covariance matrix. This is used for computing 119 | gradients, but it is not recommended in general. 120 | """ 121 | return self.apply_inverse(np.eye(len(self._factor[0])), in_place=True) 122 | -------------------------------------------------------------------------------- /src/george/solvers/hodlr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = ["HODLRSolver"] 6 | 7 | import numpy as np 8 | 9 | from .basic import BasicSolver 10 | from ._hodlr import HODLRSolver as HODLRSolverInterface 11 | 12 | 13 | class HODLRSolver(BasicSolver): 14 | r""" 15 | A solver using `Sivaram Amambikasaran's HODLR algorithm 16 | `_ to approximately solve the GP linear 17 | algebra in :math:`\mathcal{O}(N\,\log^2 N)`. 18 | 19 | :param kernel: 20 | An instance of a subclass of :class:`kernels.Kernel`. 21 | :param min_size: (optional[int]) 22 | The block size where the solver switches to a general direct 23 | factorization algorithm. This can be tuned for platform and 24 | problem specific performance and accuracy. As a general rule, 25 | larger values will be more accurate and slower, but there is some 26 | overhead for very small values, so we recommend choosing values in the 27 | hundreds. (default: ``100``) 28 | :param tol: (optional[float]) 29 | The precision tolerance for the low-rank approximation. 30 | This value is used as an approximate limit on the Frobenius norm 31 | between the low-rank approximation and the true matrix 32 | when reconstructing the off-diagonal blocks. Smaller values of ``tol`` 33 | will generally give more accurate results with higher computational 34 | cost. (default: ``0.1``) 35 | :param seed: (optional[int]) 36 | The low-rank approximation method within the HODLR algorithm 37 | is not deterministic and, without a fixed seed, the method 38 | can give different results for the same matrix. Therefore, we require 39 | that the user provide a seed for the random number generator. 40 | (default: ``42``) 41 | """ 42 | 43 | def __init__(self, kernel, min_size=100, tol=0.1, seed=42): 44 | self.min_size = min_size 45 | self.tol = tol 46 | self.seed = seed 47 | super(HODLRSolver, self).__init__(kernel) 48 | 49 | def compute(self, x, yerr): 50 | self.solver = HODLRSolverInterface() 51 | self.solver.compute(self.kernel, x, yerr, 52 | self.min_size, self.tol, self.seed) 53 | self._log_det = self.solver.log_determinant 54 | self.computed = self.solver.computed 55 | 56 | def apply_inverse(self, y, in_place=False): 57 | return self.solver.apply_inverse(y, in_place=in_place) 58 | 59 | def dot_solve(self, y): 60 | return self.solver.dot_solve(y) 61 | 62 | def apply_sqrt(self, r): 63 | raise NotImplementedError("apply_sqrt is not implemented for the " 64 | "HODLRSolver") 65 | 66 | def get_inverse(self): 67 | return self.solver.get_inverse() 68 | 69 | def __getstate__(self): 70 | state = self.__dict__.copy() 71 | state["_computed"] = False 72 | del state["solver"] 73 | return state 74 | 75 | def __setstate__(self, state): 76 | self.__dict__.update(state) 77 | -------------------------------------------------------------------------------- /src/george/solvers/trivial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = ["TrivialSolver"] 6 | 7 | import numpy as np 8 | from ..kernels import EmptyKernel 9 | 10 | 11 | class TrivialSolver(object): 12 | 13 | def __init__(self, kernel=None): 14 | if (kernel is not None and 15 | kernel.kernel_type != EmptyKernel.kernel_type): 16 | raise ValueError("the trivial solver doesn't work with a kernel") 17 | self.computed = False 18 | self.log_determinant = None 19 | 20 | def compute(self, x, yerr): 21 | self._ivar = 1.0 / yerr ** 2 22 | self.log_determinant = 2 * np.sum(np.log(yerr)) 23 | self.computed = True 24 | 25 | def apply_inverse(self, y, in_place=False): 26 | if not in_place: 27 | y = np.array(y) 28 | y[:] *= self._ivar 29 | return y 30 | 31 | def dot_solve(self, y): 32 | return np.sum(y**2 * self._ivar) 33 | 34 | def apply_sqrt(self, r): 35 | return r * np.sqrt(self._ivar) 36 | -------------------------------------------------------------------------------- /src/george/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = ["multivariate_gaussian_samples", "nd_sort_samples"] 6 | 7 | import numpy as np 8 | from scipy.spatial import cKDTree 9 | 10 | 11 | def multivariate_gaussian_samples(matrix, N, mean=None): 12 | """ 13 | Generate samples from a multidimensional Gaussian with a given covariance. 14 | 15 | :param matrix: ``(k, k)`` 16 | The covariance matrix. 17 | 18 | :param N: 19 | The number of samples to generate. 20 | 21 | :param mean: ``(k,)`` (optional) 22 | The mean of the Gaussian. Assumed to be zero if not given. 23 | 24 | :returns samples: ``(k,)`` or ``(N, k)`` 25 | Samples from the given multivariate normal. 26 | 27 | """ 28 | if mean is None: 29 | mean = np.zeros(len(matrix)) 30 | samples = np.random.multivariate_normal(mean, matrix, N) 31 | if N == 1: 32 | return samples[0] 33 | return samples 34 | 35 | 36 | def nd_sort_samples(samples): 37 | """ 38 | Sort an N-dimensional list of samples using a KDTree. 39 | 40 | :param samples: ``(nsamples, ndim)`` 41 | The list of samples. This must be a two-dimensional array. 42 | 43 | :returns i: ``(nsamples,)`` 44 | The list of indices into the original array that return the correctly 45 | sorted version. 46 | 47 | """ 48 | # Check the shape of the sample list. 49 | assert len(samples.shape) == 2 50 | 51 | # Build a KD-tree on the samples. 52 | tree = cKDTree(samples) 53 | 54 | # Compute the distances. 55 | d, i = tree.query(samples[0], k=len(samples)) 56 | return i 57 | 58 | 59 | def numerical_gradient(f, x, dx=1.234e-6): 60 | g = np.empty_like(x, dtype=float) 61 | for i in range(len(g)): 62 | x[i] += dx 63 | fp = f(x) 64 | x[i] -= 2*dx 65 | fm = f(x) 66 | x[i] += dx 67 | g[i] = 0.5 * (fp - fm) / dx 68 | return g 69 | 70 | 71 | def check_gradient(obj, *args, **kwargs): 72 | eps = kwargs.pop("eps", 1.23e-5) 73 | 74 | grad0 = obj.get_gradient(*args, **kwargs) 75 | vector = obj.get_parameter_vector() 76 | for i, v in enumerate(vector): 77 | # Compute the centered finite difference approximation to the gradient. 78 | vector[i] = v + eps 79 | obj.set_parameter_vector(vector) 80 | p = obj.get_value(*args, **kwargs) 81 | 82 | vector[i] = v - eps 83 | obj.set_parameter_vector(vector) 84 | m = obj.get_value(*args, **kwargs) 85 | 86 | vector[i] = v 87 | obj.set_parameter_vector(vector) 88 | 89 | grad = 0.5 * (p - m) / eps 90 | assert np.allclose(grad0[i], grad), \ 91 | "grad computation failed for '{0}' ({1})" \ 92 | .format(obj.get_parameter_names()[i], i) 93 | -------------------------------------------------------------------------------- /templates/kernels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = [ 6 | "Kernel", "Sum", "Product", 7 | {%- for spec in specs %} 8 | "{{ spec.name }}", 9 | {%- endfor %} 10 | ] 11 | 12 | import numpy as np 13 | 14 | from .modeling import Model, ModelSet 15 | from .metrics import Metric, Subspace 16 | from .kernel_interface import KernelInterface 17 | 18 | 19 | class Kernel(ModelSet): 20 | """ 21 | The abstract kernel type. Every kernel implemented in George should be 22 | a subclass of this object. 23 | 24 | """ 25 | 26 | is_kernel = True 27 | kernel_type = -1 28 | 29 | # This function deals with weird behavior when performing arithmetic 30 | # operations with numpy scalars. 31 | def __array_wrap__(self, array, context=None): 32 | if context is None: 33 | raise TypeError("Invalid operation") 34 | ufunc, args, _ = context 35 | if ufunc.__name__ == "multiply": 36 | return float(args[0]) * args[1] 37 | elif ufunc.__name__ == "add": 38 | return float(args[0]) + args[1] 39 | raise TypeError("Invalid operation") 40 | __array_priority__ = np.inf 41 | 42 | def __getstate__(self): 43 | odict = self.__dict__.copy() 44 | odict["_kernel"] = None 45 | return odict 46 | 47 | # We must overload the ModelSet attribute getter to pass the requests 48 | # to the "BaseKernel" 49 | def __getattr__(self, name): 50 | if "models" in self.__dict__: 51 | if name in self.models: 52 | return self.models[name] 53 | if None in self.models: 54 | return getattr(self.models[None], name) 55 | raise AttributeError(name) 56 | 57 | @property 58 | def kernel(self): 59 | return KernelInterface(self) 60 | 61 | def __repr__(self): 62 | kernel = self.models[None] 63 | params = ["{0}={1}".format(k, getattr(kernel, k)) 64 | for k in kernel.parameter_names] 65 | if self.stationary: 66 | params += ["metric={0}".format(repr(self.metric)), 67 | "block={0}".format(repr(self.block))] 68 | else: 69 | params += ["ndim={0}".format(self.ndim), 70 | "axes={0}".format(repr(self.axes))] 71 | return "{0}({1})".format(self.__class__.__name__, ", ".join(params)) 72 | 73 | def __add__(self, b): 74 | if not hasattr(b, "is_kernel"): 75 | return Sum(ConstantKernel(log_constant=np.log(float(b)/self.ndim), 76 | ndim=self.ndim), self) 77 | return Sum(self, b) 78 | 79 | def __radd__(self, b): 80 | return self.__add__(b) 81 | 82 | def __mul__(self, b): 83 | if not hasattr(b, "is_kernel"): 84 | log_constant = np.log(float(b)/self.ndim) 85 | return Product(ConstantKernel(log_constant=log_constant, 86 | ndim=self.ndim), self) 87 | return Product(self, b) 88 | 89 | def __rmul__(self, b): 90 | return self.__mul__(b) 91 | 92 | def get_value(self, x1, x2=None, diag=False): 93 | x1 = np.ascontiguousarray(x1, dtype=np.float64) 94 | if x2 is None: 95 | if diag: 96 | return self.kernel.value_diagonal(x1, x1) 97 | else: 98 | return self.kernel.value_symmetric(x1) 99 | x2 = np.ascontiguousarray(x2, dtype=np.float64) 100 | if diag: 101 | return self.kernel.value_diagonal(x1, x2) 102 | else: 103 | return self.kernel.value_general(x1, x2) 104 | 105 | def get_gradient(self, x1, x2=None, include_frozen=False): 106 | mask = ( 107 | np.ones(self.full_size, dtype=bool) 108 | if include_frozen else self.unfrozen_mask 109 | ) 110 | which = mask.astype(np.uint32) 111 | x1 = np.ascontiguousarray(x1, dtype=np.float64) 112 | if x2 is None: 113 | g = self.kernel.gradient_symmetric(which, x1) 114 | else: 115 | x2 = np.ascontiguousarray(x2, dtype=np.float64) 116 | g = self.kernel.gradient_general(which, x1, x2) 117 | return g[:, :, mask] 118 | 119 | def get_x1_gradient(self, x1, x2=None): 120 | x1 = np.ascontiguousarray(x1, dtype=np.float64) 121 | if x2 is None: 122 | x2 = x1 123 | else: 124 | x2 = np.ascontiguousarray(x2, dtype=np.float64) 125 | return self.kernel.x1_gradient_general(x1, x2) 126 | 127 | def get_x2_gradient(self, x1, x2=None): 128 | x1 = np.ascontiguousarray(x1, dtype=np.float64) 129 | if x2 is None: 130 | x2 = x1 131 | else: 132 | x2 = np.ascontiguousarray(x2, dtype=np.float64) 133 | return self.kernel.x2_gradient_general(x1, x2) 134 | 135 | def test_gradient(self, x1, x2=None, eps=1.32e-6, **kwargs): 136 | vector = self.get_parameter_vector() 137 | g0 = self.get_gradient(x1, x2=x2) 138 | 139 | for i, v in enumerate(vector): 140 | vector[i] = v + eps 141 | self.set_parameter_vector(vector) 142 | kp = self.get_value(x1, x2=x2) 143 | 144 | vector[i] = v - eps 145 | self.set_parameter_vector(vector) 146 | km = self.get_value(x1, x2=x2) 147 | 148 | vector[i] = v 149 | self.set_parameter_vector(vector) 150 | 151 | grad = 0.5 * (kp - km) / eps 152 | assert np.allclose(g0[:, :, i], grad, **kwargs), \ 153 | "incorrect gradient for parameter '{0}' ({1})" \ 154 | .format(self.get_parameter_names()[i], i) 155 | 156 | def test_x1_gradient(self, x1, x2=None, eps=1.32e-6, **kwargs): 157 | kwargs["atol"] = kwargs.get("atol", 0.5 * eps) 158 | g0 = self.get_x1_gradient(x1, x2=x2) 159 | if x2 is None: 160 | x2 = np.array(x1) 161 | for i in range(len(x1)): 162 | for k in range(self.ndim): 163 | x1[i, k] += eps 164 | kp = self.get_value(x1, x2=x2) 165 | 166 | x1[i, k] -= 2*eps 167 | km = self.get_value(x1, x2=x2) 168 | 169 | x1[i, k] += eps 170 | 171 | grad = 0.5 * (kp - km) / eps 172 | assert np.allclose(g0[i, :, k], grad[i], **kwargs) 173 | 174 | def test_x2_gradient(self, x1, x2=None, eps=1.32e-6, **kwargs): 175 | kwargs["atol"] = kwargs.get("atol", 0.5 * eps) 176 | g0 = self.get_x2_gradient(x1, x2=x2) 177 | if x2 is None: 178 | x2 = np.array(x1) 179 | for i in range(len(x2)): 180 | for k in range(self.ndim): 181 | x2[i, k] += eps 182 | kp = self.get_value(x1, x2=x2) 183 | 184 | x2[i, k] -= 2*eps 185 | km = self.get_value(x1, x2=x2) 186 | 187 | x2[i, k] += eps 188 | 189 | grad = 0.5 * (kp - km) / eps 190 | assert np.allclose(g0[:, i, k], grad[:, i], **kwargs) 191 | 192 | 193 | class _operator(Kernel): 194 | is_kernel = False 195 | kernel_type = -1 196 | operator_type = -1 197 | 198 | def __init__(self, k1, k2): 199 | if k1.ndim != k2.ndim: 200 | raise ValueError("Dimension mismatch") 201 | self.ndim = k1.ndim 202 | self._dirty = True 203 | super(_operator, self).__init__([("k1", k1), ("k2", k2)]) 204 | 205 | @property 206 | def k1(self): 207 | return self.models["k1"] 208 | 209 | @property 210 | def k2(self): 211 | return self.models["k2"] 212 | 213 | @property 214 | def dirty(self): 215 | return self._dirty or self.k1.dirty or self.k2.dirty 216 | 217 | @dirty.setter 218 | def dirty(self, v): 219 | self._dirty = v 220 | self.k1.dirty = False 221 | self.k2.dirty = False 222 | 223 | 224 | class Sum(_operator): 225 | is_kernel = False 226 | operator_type = 0 227 | 228 | def __repr__(self): 229 | return "{0} + {1}".format(self.k1, self.k2) 230 | 231 | 232 | class Product(_operator): 233 | is_kernel = False 234 | operator_type = 1 235 | 236 | def __repr__(self): 237 | return "{0} * {1}".format(self.k1, self.k2) 238 | 239 | {% for spec in specs %} 240 | class Base{{ spec.name }} (Model): 241 | parameter_names = ({% for p in spec.params -%}"{{ p }}", {% endfor %}) 242 | 243 | 244 | class {{ spec.name }} (Kernel): 245 | r""" 246 | {{ spec.doc | indent(4) }} 247 | 248 | """ 249 | 250 | kernel_type = {{ spec.index }} 251 | stationary = {{ spec.stationary }} 252 | 253 | def __init__(self, 254 | {% for p in spec.params %}{{ p }}=None, 255 | {% endfor -%} 256 | {% for con in spec.constants %}{{ con.name }}=None, 257 | {% endfor -%} 258 | {% if spec.stationary -%} 259 | metric=None, 260 | metric_bounds=None, 261 | lower=True, 262 | block=None, 263 | {% endif -%} 264 | bounds=None, 265 | ndim=1, 266 | axes=None): 267 | {% for con in spec.constants %} 268 | if {{ con.name }} is None: 269 | raise ValueError("missing required parameter '{{ con.name }}'") 270 | self.{{ con.name }} = {{ con.name }} 271 | {% endfor %} 272 | {% if spec.stationary -%} 273 | if metric is None: 274 | raise ValueError("missing required parameter 'metric'") 275 | metric = Metric(metric, bounds=metric_bounds, ndim=ndim, 276 | axes=axes, lower=lower) 277 | self.ndim = metric.ndim 278 | self.axes = metric.axes 279 | self.block = block 280 | {%- else -%} 281 | self.subspace = Subspace(ndim, axes=axes) 282 | self.ndim = self.subspace.ndim 283 | self.axes = self.subspace.axes 284 | {%- endif %} 285 | 286 | kwargs = dict({% for p in spec.params -%}{{ p }}={{ p }}, {% endfor -%}) 287 | if bounds is not None: 288 | kwargs["bounds"] = bounds 289 | base = Base{{ spec.name }}(**kwargs) 290 | super({{ spec.name }}, self).__init__([ 291 | (None, base), {% if spec.stationary -%}("metric", metric){%- endif %} 292 | ]) 293 | 294 | # Common setup. 295 | self.dirty = True 296 | {% if spec.stationary %} 297 | @property 298 | def block(self): 299 | if not self.blocked: 300 | return None 301 | return list(zip(self.min_block, self.max_block)) 302 | 303 | @block.setter 304 | def block(self, block): 305 | if block is None: 306 | self.blocked = False 307 | self.min_block = -np.inf + np.zeros(len(self.axes)) 308 | self.max_block = np.inf + np.zeros(len(self.axes)) 309 | return 310 | 311 | block = np.atleast_2d(block) 312 | if block.shape != (len(self.axes), 2): 313 | raise ValueError("dimension mismatch in block specification") 314 | self.blocked = True 315 | self.min_block, self.max_block = map(np.array, zip(*block)) 316 | {% endif %} 317 | {% endfor %} 318 | -------------------------------------------------------------------------------- /templates/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef _GEORGE_PARSER_H_ 2 | #define _GEORGE_PARSER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "george/kernels.h" 8 | #include "george/exceptions.h" 9 | 10 | namespace george { 11 | 12 | namespace py = pybind11; 13 | 14 | kernels::Kernel* parse_kernel_spec (const py::object& kernel_spec) { 15 | 16 | if (!py::hasattr(kernel_spec, "is_kernel")) throw std::invalid_argument("invalid kernel"); 17 | 18 | // Deal with operators first 19 | bool is_kernel = py::bool_(kernel_spec.attr("is_kernel")); 20 | if (!is_kernel) { 21 | kernels::Kernel *k1, *k2; 22 | py::object spec1 = kernel_spec.attr("k1"), 23 | spec2 = kernel_spec.attr("k2"); 24 | k1 = parse_kernel_spec(spec1); 25 | k2 = parse_kernel_spec(spec2); 26 | if (k1->get_ndim() != k2->get_ndim()) throw dimension_mismatch(); 27 | size_t op = py::int_(kernel_spec.attr("operator_type")); 28 | if (op == 0) { 29 | return new kernels::Sum(k1, k2); 30 | } else if (op == 1) { 31 | return new kernels::Product(k1, k2); 32 | } else { 33 | throw std::invalid_argument("unrecognized operator"); 34 | } 35 | } 36 | 37 | 38 | kernels::Kernel* kernel; 39 | size_t kernel_type = py::int_(kernel_spec.attr("kernel_type")); 40 | switch (kernel_type) { 41 | {% for spec in specs %} 42 | case {{ spec.index }}: { 43 | {% if spec.stationary %} 44 | py::object metric = kernel_spec.attr("metric"); 45 | size_t metric_type = py::int_(metric.attr("metric_type")); 46 | size_t ndim = py::int_(metric.attr("ndim")); 47 | py::list axes = py::list(metric.attr("axes")); 48 | bool blocked = py::bool_(kernel_spec.attr("blocked")); 49 | py::array_t min_block = py::array_t(kernel_spec.attr("min_block")); 50 | py::array_t max_block = py::array_t(kernel_spec.attr("max_block")); 51 | 52 | // Select the correct template based on the metric type 53 | if (metric_type == 0) { 54 | kernel = new kernels::{{ spec.name }} ( 55 | {% for param in spec.params %} 56 | py::float_(kernel_spec.attr("{{ param }}")), 57 | {%- endfor %} 58 | {% for con in spec.constants %} 59 | py::float_(kernel_spec.attr("{{ con.name }}")), 60 | {%- endfor %} 61 | blocked, 62 | (double*)&(min_block.unchecked<1>()(0)), 63 | (double*)&(max_block.unchecked<1>()(0)), 64 | ndim, 65 | py::len(axes) 66 | ); 67 | } else if (metric_type == 1) { 68 | kernel = new kernels::{{ spec.name }} ( 69 | {% for param in spec.params %} 70 | py::float_(kernel_spec.attr("{{ param }}")), 71 | {%- endfor %} 72 | {% for con in spec.constants %} 73 | py::float_(kernel_spec.attr("{{ con.name }}")), 74 | {%- endfor %} 75 | blocked, 76 | (double*)&(min_block.unchecked<1>()(0)), 77 | (double*)&(max_block.unchecked<1>()(0)), 78 | ndim, 79 | py::len(axes) 80 | ); 81 | } else if (metric_type == 2) { 82 | kernel = new kernels::{{ spec.name }} ( 83 | {% for param in spec.params %} 84 | py::float_(kernel_spec.attr("{{ param }}")), 85 | {%- endfor %} 86 | {% for con in spec.constants %} 87 | py::float_(kernel_spec.attr("{{ con.name }}")), 88 | {%- endfor %} 89 | blocked, 90 | (double*)&(min_block.unchecked<1>()(0)), 91 | (double*)&(max_block.unchecked<1>()(0)), 92 | ndim, 93 | py::len(axes) 94 | ); 95 | } else { 96 | throw std::invalid_argument("unrecognized metric"); 97 | } 98 | 99 | // Get the parameters 100 | py::function f = py::function(metric.attr("get_parameter_vector")); 101 | py::array_t vector = py::array_t(f(true)); 102 | auto data = vector.unchecked<1>(); 103 | for (py::ssize_t i = 0; i < data.shape(0); ++i) { 104 | kernel->set_metric_parameter(i, data(i)); 105 | } 106 | 107 | {% else %} 108 | size_t ndim = py::int_(kernel_spec.attr("ndim")); 109 | py::list axes = py::list(kernel_spec.attr("axes")); 110 | kernel = new kernels::{{ spec.name }} ( 111 | {% for param in spec.params %} 112 | py::float_(kernel_spec.attr("{{ param }}")), 113 | {%- endfor %} 114 | {% for con in spec.constants %} 115 | py::float_(kernel_spec.attr("{{ con.name }}")), 116 | {%- endfor %} 117 | ndim, 118 | py::len(axes) 119 | ); 120 | {% endif %} 121 | 122 | for (size_t i = 0; i < py::len(axes); ++i) { 123 | kernel->set_axis(i, py::int_(axes[py::int_(i)])); 124 | } 125 | 126 | break; } 127 | {% endfor %} 128 | default: 129 | throw std::invalid_argument("unrecognized kernel"); 130 | } 131 | 132 | return kernel; 133 | } 134 | 135 | } 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /tests/test_gp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = [ 6 | "test_gradient", "test_prediction", "test_repeated_prediction_cache", 7 | "test_apply_inverse", 8 | ] 9 | 10 | import pytest 11 | import numpy as np 12 | from itertools import product 13 | 14 | from george import kernels, GP, BasicSolver, HODLRSolver 15 | 16 | @pytest.mark.parametrize("solver,white_noise", 17 | product([BasicSolver, HODLRSolver], [None, 0.1])) 18 | def test_gradient(solver, white_noise, seed=123, N=305, ndim=3, eps=1.32e-3): 19 | np.random.seed(seed) 20 | 21 | # Set up the solver. 22 | kernel = 1.0 * kernels.ExpSquaredKernel(0.5, ndim=ndim) 23 | kwargs = dict() 24 | if white_noise is not None: 25 | kwargs = dict(white_noise=white_noise, fit_white_noise=True) 26 | if solver == HODLRSolver: 27 | kwargs["tol"] = 1e-8 28 | gp = GP(kernel, solver=solver, **kwargs) 29 | 30 | # Sample some data. 31 | x = np.random.rand(N, ndim) 32 | x = x[np.argsort(x[:, 0])] 33 | y = gp.sample(x) 34 | gp.compute(x, yerr=0.1) 35 | 36 | # Compute the initial gradient. 37 | grad0 = gp.grad_log_likelihood(y) 38 | vector = gp.get_parameter_vector() 39 | 40 | for i, v in enumerate(vector): 41 | # Compute the centered finite difference approximation to the gradient. 42 | vector[i] = v + eps 43 | gp.set_parameter_vector(vector) 44 | lp = gp.lnlikelihood(y) 45 | 46 | vector[i] = v - eps 47 | gp.set_parameter_vector(vector) 48 | lm = gp.lnlikelihood(y) 49 | 50 | vector[i] = v 51 | gp.set_parameter_vector(vector) 52 | 53 | grad = 0.5 * (lp - lm) / eps 54 | assert np.abs(grad - grad0[i]) < 5 * eps, \ 55 | "Gradient computation failed in dimension {0} ({1})\n{2}" \ 56 | .format(i, solver.__name__, np.abs(grad - grad0[i])) 57 | 58 | 59 | @pytest.mark.parametrize("solver", [BasicSolver, HODLRSolver]) 60 | def test_prediction(solver, seed=42): 61 | """Basic sanity checks for GP regression.""" 62 | 63 | np.random.seed(seed) 64 | 65 | kernel = kernels.ExpSquaredKernel(1.0) 66 | kwargs = dict() 67 | if solver == HODLRSolver: 68 | kwargs["tol"] = 1e-8 69 | gp = GP(kernel, solver=solver, white_noise=0.0, **kwargs) 70 | 71 | x0 = np.linspace(-10, 10, 500) 72 | x = np.sort(np.random.uniform(-10, 10, 300)) 73 | gp.compute(x) 74 | 75 | y = np.sin(x) 76 | mu, cov = gp.predict(y, x0) 77 | 78 | Kstar = gp.get_matrix(x0, x) 79 | K = gp.get_matrix(x) 80 | K[np.diag_indices_from(K)] += 1.0 81 | mu0 = np.dot(Kstar, np.linalg.solve(K, y)) 82 | print(np.abs(mu - mu0).max()) 83 | assert np.allclose(mu, mu0) 84 | 85 | 86 | def test_repeated_prediction_cache(): 87 | kernel = kernels.ExpSquaredKernel(1.0) 88 | gp = GP(kernel) 89 | 90 | x = np.array((-1, 0, 1)) 91 | gp.compute(x) 92 | 93 | t = np.array((-.5, .3, 1.2)) 94 | 95 | y = x/x.std() 96 | mu0, mu1 = (gp.predict(y, t, return_cov=False) for _ in range(2)) 97 | assert np.array_equal(mu0, mu1), \ 98 | "Identical training data must give identical predictions " \ 99 | "(problem with GP cache)." 100 | 101 | y2 = 2*y 102 | mu2 = gp.predict(y2, t, return_cov=False) 103 | assert not np.array_equal(mu0, mu2), \ 104 | "Different training data must give different predictions " \ 105 | "(problem with GP cache)." 106 | 107 | a0 = gp._alpha 108 | gp.kernel[0] += 0.1 109 | gp.recompute() 110 | gp._compute_alpha(y2, True) 111 | a1 = gp._alpha 112 | assert not np.allclose(a0, a1), \ 113 | "Different kernel parameters must give different alphas " \ 114 | "(problem with GP cache)." 115 | 116 | mu, cov = gp.predict(y2, t) 117 | _, var = gp.predict(y2, t, return_var=True) 118 | assert np.allclose(np.diag(cov), var), \ 119 | "The predictive variance must be equal to the diagonal of the " \ 120 | "predictive covariance." 121 | 122 | 123 | @pytest.mark.parametrize("solver", [BasicSolver, HODLRSolver]) 124 | def test_apply_inverse(solver, seed=1234, N=201, yerr=0.1): 125 | np.random.seed(seed) 126 | 127 | # Set up the solver. 128 | kernel = 1.0 * kernels.ExpSquaredKernel(0.5) 129 | kwargs = dict() 130 | if solver == HODLRSolver: 131 | kwargs["tol"] = 1e-10 132 | gp = GP(kernel, solver=solver, **kwargs) 133 | 134 | # Sample some data. 135 | x = np.sort(np.random.rand(N)) 136 | y = gp.sample(x) 137 | gp.compute(x, yerr=yerr) 138 | 139 | K = gp.get_matrix(x) 140 | K[np.diag_indices_from(K)] += yerr**2 141 | 142 | b1 = np.linalg.solve(K, y) 143 | b2 = gp.apply_inverse(y) 144 | assert np.allclose(b1, b2) 145 | 146 | y = gp.sample(x, size=5).T 147 | b1 = np.linalg.solve(K, y) 148 | b2 = gp.apply_inverse(y) 149 | assert np.allclose(b1, b2) 150 | 151 | 152 | @pytest.mark.parametrize("solver", [BasicSolver, HODLRSolver]) 153 | def test_predict_single(solver, seed=1234, N=201, yerr=0.1): 154 | np.random.seed(seed) 155 | 156 | # Set up the solver. 157 | kernel = 1.0 * kernels.ExpSquaredKernel(0.5) 158 | kwargs = dict() 159 | if solver == HODLRSolver: 160 | kwargs["tol"] = 1e-8 161 | gp = GP(kernel, solver=solver, **kwargs) 162 | 163 | x = np.sort(np.random.rand(N)) 164 | y = gp.sample(x) 165 | gp.compute(x, yerr=yerr) 166 | mu0, var0 = gp.predict(y, [0.0], return_var=True) 167 | mu, var = gp.predict(y, [0.0, 1.0], return_var=True) 168 | _, cov = gp.predict(y, [0.0, 1.0]) 169 | assert np.allclose(mu0, mu[0]) 170 | assert np.allclose(var0, var[0]) 171 | assert np.allclose(var0, cov[0, 0]) 172 | -------------------------------------------------------------------------------- /tests/test_kernels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | import pytest 6 | import numpy as np 7 | 8 | from george import kernels, GP 9 | 10 | 11 | def test_dtype(seed=123): 12 | np.random.seed(seed) 13 | kernel = 0.1 * kernels.ExpSquaredKernel(1.5) 14 | kernel.pars = [1, 2] 15 | gp = GP(kernel) 16 | x = np.random.rand(100) 17 | gp.compute(x, 1e-2) 18 | 19 | kernels_to_test = [ 20 | kernels.ConstantKernel(log_constant=0.1), 21 | kernels.ConstantKernel(log_constant=10.0, ndim=2), 22 | kernels.ConstantKernel(log_constant=5.0, ndim=5), 23 | 24 | kernels.DotProductKernel(), 25 | kernels.DotProductKernel(ndim=2), 26 | kernels.DotProductKernel(ndim=5, axes=0), 27 | 28 | kernels.CosineKernel(log_period=1.0), 29 | kernels.CosineKernel(log_period=0.5, ndim=2), 30 | kernels.CosineKernel(log_period=0.5, ndim=2, axes=1), 31 | kernels.CosineKernel(log_period=0.75, ndim=5, axes=[2, 3]), 32 | 33 | kernels.ExpSine2Kernel(gamma=0.4, log_period=1.0), 34 | kernels.ExpSine2Kernel(gamma=12., log_period=0.5, ndim=2), 35 | kernels.ExpSine2Kernel(gamma=17., log_period=0.5, ndim=2, axes=1), 36 | kernels.ExpSine2Kernel(gamma=13.7, log_period=-0.75, ndim=5, axes=[2, 3]), 37 | kernels.ExpSine2Kernel(gamma=-0.7, log_period=0.75, ndim=5, axes=[2, 3]), 38 | kernels.ExpSine2Kernel(gamma=-10, log_period=0.75), 39 | 40 | kernels.LocalGaussianKernel(log_width=0.5, location=1.0), 41 | kernels.LocalGaussianKernel(log_width=0.1, location=0.5, ndim=2), 42 | kernels.LocalGaussianKernel(log_width=1.5, location=-0.5, ndim=2, axes=1), 43 | kernels.LocalGaussianKernel(log_width=2.0, location=0.75, ndim=5, 44 | axes=[2, 3]), 45 | 46 | kernels.LinearKernel(order=0, log_gamma2=0.0), 47 | kernels.LinearKernel(order=2, log_gamma2=0.0), 48 | kernels.LinearKernel(order=2, log_gamma2=0.0), 49 | kernels.LinearKernel(order=5, log_gamma2=1.0, ndim=2), 50 | kernels.LinearKernel(order=3, log_gamma2=-1.0, ndim=5, axes=2), 51 | kernels.LinearKernel(order=0, log_gamma2=0.0) + 52 | kernels.LinearKernel(order=1, log_gamma2=-1.0) + 53 | kernels.LinearKernel(order=2, log_gamma2=-2.0), 54 | 55 | kernels.PolynomialKernel(order=0, log_sigma2=-10.0), 56 | kernels.PolynomialKernel(order=2, log_sigma2=-10.0), 57 | kernels.PolynomialKernel(order=2, log_sigma2=0.0), 58 | kernels.PolynomialKernel(order=5, log_sigma2=1.0, ndim=2), 59 | kernels.PolynomialKernel(order=3, log_sigma2=-1.0, ndim=5, axes=2), 60 | 61 | 12. * kernels.ExpSine2Kernel(gamma=0.4, log_period=1.0, ndim=5), 62 | 12. * kernels.ExpSquaredKernel(0.4, ndim=3) + 0.1, 63 | ] 64 | 65 | @pytest.mark.parametrize("kernel", kernels_to_test) 66 | def test_kernel(kernel, N=20, seed=123, eps=1.32e-6): 67 | np.random.seed(seed) 68 | t1 = np.random.randn(N, kernel.ndim) 69 | kernel.test_gradient(t1, eps=eps) 70 | kernel.test_gradient(t1, t1[:1], eps=eps) 71 | 72 | 73 | @pytest.mark.parametrize("kernel", kernels_to_test) 74 | def test_x_gradient_kernel(kernel, N=20, seed=123, eps=1.32e-6): 75 | np.random.seed(seed) 76 | t1 = np.random.randn(N, kernel.ndim) 77 | kernel.test_x1_gradient(t1, eps=eps) 78 | kernel.test_x1_gradient(t1, np.array(t1[:1]), eps=eps) 79 | kernel.test_x2_gradient(t1, eps=eps) 80 | kernel.test_x2_gradient(np.array(t1[:1]), t1, eps=eps) 81 | 82 | 83 | stationary_kernels = [ 84 | (kernels.ExpKernel, {}), 85 | (kernels.ExpSquaredKernel, {}), 86 | (kernels.Matern32Kernel, {}), 87 | (kernels.Matern52Kernel, {}), 88 | (kernels.RationalQuadraticKernel, dict(log_alpha=np.log(1.0))), 89 | (kernels.RationalQuadraticKernel, dict(log_alpha=np.log(0.1))), 90 | (kernels.RationalQuadraticKernel, dict(log_alpha=np.log(10.0))), 91 | ] 92 | 93 | @pytest.mark.parametrize("kernel_type,kwargs", stationary_kernels) 94 | def test_stationary(kernel_type, kwargs): 95 | def build_kernel(metric, **more): 96 | kws = dict(kwargs, **more) 97 | return kernel_type(metric=metric, **kws) 98 | 99 | kernel = build_kernel(0.1) 100 | test_kernel(kernel) 101 | test_x_gradient_kernel(kernel) 102 | 103 | kernel = build_kernel(1.0) 104 | test_kernel(kernel) 105 | test_x_gradient_kernel(kernel) 106 | 107 | kernel = build_kernel(10.0) 108 | test_kernel(kernel) 109 | test_x_gradient_kernel(kernel) 110 | 111 | kernel = build_kernel([1.0, 0.1, 10.0], ndim=3) 112 | test_kernel(kernel) 113 | test_x_gradient_kernel(kernel) 114 | 115 | kernel = build_kernel(1.0, ndim=3) 116 | test_kernel(kernel) 117 | test_x_gradient_kernel(kernel) 118 | 119 | with pytest.raises(ValueError): 120 | kernel = build_kernel([1.0, 0.1, 10.0, 500], ndim=3) 121 | 122 | kernel = build_kernel(1.0, ndim=3, axes=2) 123 | test_kernel(kernel) 124 | test_x_gradient_kernel(kernel) 125 | 126 | kernel = build_kernel(1.0, ndim=3, axes=2, block=(-0.1, 0.1)) 127 | test_kernel(kernel) 128 | test_x_gradient_kernel(kernel) 129 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = [ 6 | ] 7 | 8 | import pytest 9 | import numpy as np 10 | 11 | from george import kernels, GP 12 | from george.solvers import hodlr 13 | 14 | @pytest.mark.xfail 15 | def test_custom_cholesky(seed=1234, ndim=5): 16 | np.random.seed(seed) 17 | 18 | # Build the matrix. 19 | L1 = np.random.randn(ndim, ndim) 20 | L1[np.diag_indices(ndim)] = np.exp(L1[np.diag_indices(ndim)]) 21 | L1[np.triu_indices(ndim, 1)] = 0.0 22 | 23 | for L in (L1, np.eye(ndim)): 24 | A = np.dot(L, L.T) 25 | b = np.random.randn(ndim) 26 | 27 | Lvec = np.array(L) 28 | Lvec[np.diag_indices(ndim)] = 1. / Lvec[np.diag_indices(ndim)] 29 | 30 | Lb1 = np.linalg.solve(L, b) 31 | Lb2 = hodlr.custom_forward_sub(Lvec[np.tril_indices(ndim)], 32 | np.array(b)) 33 | assert np.allclose(Lb1, Lb2) 34 | 35 | Ainvb1 = np.linalg.solve(A, b) 36 | Ainvb2 = hodlr.custom_backward_sub(Lvec[np.tril_indices(ndim)], Lb2) 37 | assert np.allclose(Ainvb1, Ainvb2) 38 | 39 | 40 | def _general_metric(metric, N=100, ndim=3): 41 | kernel = 0.1 * kernels.ExpSquaredKernel(metric, ndim=ndim) 42 | 43 | x = np.random.rand(N, ndim) 44 | M0 = kernel.get_value(x) 45 | 46 | gp = GP(kernel) 47 | M1 = gp.get_matrix(x) 48 | assert np.allclose(M0, M1) 49 | 50 | # Compute the expected matrix. 51 | M2 = np.empty((N, N)) 52 | for i in range(N): 53 | for j in range(N): 54 | r = x[i] - x[j] 55 | r2 = np.dot(r, np.linalg.solve(metric, r)) 56 | M2[i, j] = 0.1 * np.exp(-0.5*r2) 57 | 58 | if not np.allclose(M0, M2): 59 | print(M0) 60 | print() 61 | print(M2) 62 | print() 63 | print(M0 - M2) 64 | print() 65 | print(M0 / M2) 66 | 67 | L = np.linalg.cholesky(metric) 68 | i = N - 1 69 | j = N - 2 70 | r = x[j] - x[i] 71 | print(x[i], x[j]) 72 | print("r = ", r) 73 | print("L.r = ", np.dot(L, r)) 74 | assert np.allclose(M0, M2) 75 | 76 | 77 | def test_general_metric(seed=1234, N=2, ndim=3): 78 | np.random.seed(seed) 79 | 80 | _general_metric(np.eye(ndim), N=N, ndim=ndim) 81 | 82 | L = np.random.randn(ndim, ndim) 83 | L[np.diag_indices(ndim)] = np.exp(L[np.diag_indices(ndim)]) 84 | L[np.triu_indices(ndim, 1)] = 0.0 85 | metric = np.dot(L, L.T) 86 | _general_metric(metric, N=N, ndim=ndim) 87 | 88 | 89 | def test_axis_algined_metric(seed=1234, N=100, ndim=3): 90 | np.random.seed(seed) 91 | 92 | kernel = 0.1 * kernels.ExpSquaredKernel(np.ones(ndim), ndim=ndim) 93 | 94 | x = np.random.rand(N, ndim) 95 | M0 = kernel.get_value(x) 96 | 97 | gp = GP(kernel) 98 | M1 = gp.get_matrix(x) 99 | assert np.allclose(M0, M1) 100 | 101 | # Compute the expected matrix. 102 | M2 = 0.1*np.exp(-0.5*np.sum((x[None, :, :] - x[:, None, :])**2, axis=-1)) 103 | assert np.allclose(M0, M2) 104 | -------------------------------------------------------------------------------- /tests/test_modeling.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = [ 6 | "test_constant_mean", 7 | "test_callable_mean", 8 | "test_gp_mean", 9 | "test_gp_white_noise", 10 | "test_gp_callable_mean", 11 | "test_parameters", 12 | "test_bounds", 13 | ] 14 | 15 | import pytest 16 | import numpy as np 17 | 18 | from george import GP, kernels 19 | from george.utils import check_gradient 20 | from george.modeling import Model, ConstantModel, CallableModel 21 | 22 | 23 | def test_constant_mean(): 24 | m = ConstantModel(5.0) 25 | check_gradient(m, np.zeros(4)) 26 | 27 | 28 | def test_callable_mean(): 29 | m = CallableModel(lambda x: 5.0 * x) 30 | check_gradient(m, np.zeros(4)) 31 | 32 | 33 | def test_gp_mean(N=50, seed=1234): 34 | np.random.seed(seed) 35 | x = np.random.uniform(0, 5) 36 | y = 5 + np.sin(x) 37 | gp = GP(10. * kernels.ExpSquaredKernel(1.3), 38 | mean=5.0, fit_mean=True) 39 | gp.compute(x) 40 | check_gradient(gp, y) 41 | 42 | 43 | def test_gp_callable_mean(N=50, seed=1234): 44 | np.random.seed(seed) 45 | x = np.random.uniform(0, 5) 46 | y = 5 + np.sin(x) 47 | mean = CallableModel(lambda x: 5.0*x) 48 | gp = GP(10. * kernels.ExpSquaredKernel(1.3), mean=mean) 49 | gp.compute(x) 50 | check_gradient(gp, y) 51 | 52 | 53 | def test_gp_white_noise(N=50, seed=1234): 54 | np.random.seed(seed) 55 | x = np.random.uniform(0, 5) 56 | y = 5 + np.sin(x) 57 | gp = GP(10. * kernels.ExpSquaredKernel(1.3), 58 | mean=5.0, fit_mean=True, 59 | white_noise=0.1, fit_white_noise=True) 60 | gp.compute(x) 61 | check_gradient(gp, y) 62 | 63 | 64 | class LinearWhiteNoise(Model): 65 | parameter_names = ("m", "b") 66 | 67 | def get_value(self, x): 68 | return self.m * x + self.b 69 | 70 | @Model.parameter_sort 71 | def compute_gradient(self, x): 72 | return dict(m=x, b=np.ones(len(x))) 73 | 74 | 75 | def test_gp_callable_white_noise(N=50, seed=1234): 76 | np.random.seed(seed) 77 | x = np.random.uniform(0, 5) 78 | y = 5 + np.sin(x) 79 | gp = GP(10. * kernels.ExpSquaredKernel(1.3), mean=5.0, 80 | white_noise=LinearWhiteNoise(-6, 0.01), 81 | fit_white_noise=True) 82 | gp.compute(x) 83 | check_gradient(gp, y) 84 | 85 | gp.freeze_parameter("white_noise:m") 86 | check_gradient(gp, y) 87 | 88 | 89 | def test_parameters(): 90 | kernel = 10 * kernels.ExpSquaredKernel(1.0) 91 | kernel += 0.5 * kernels.RationalQuadraticKernel(log_alpha=0.1, metric=5.0) 92 | gp = GP(kernel, white_noise=LinearWhiteNoise(1.0, 0.1)) 93 | 94 | n = len(gp.get_parameter_vector()) 95 | assert n == len(gp.get_parameter_names()) 96 | assert n - 2 == len(kernel.get_parameter_names()) 97 | 98 | gp.freeze_parameter(gp.get_parameter_names()[0]) 99 | assert n - 1 == len(gp.get_parameter_names()) 100 | assert n - 1 == len(gp.get_parameter_vector()) 101 | 102 | gp.freeze_all_parameters() 103 | assert len(gp.get_parameter_names()) == 0 104 | assert len(gp.get_parameter_vector()) == 0 105 | 106 | gp.kernel.thaw_all_parameters() 107 | gp.white_noise.thaw_all_parameters() 108 | assert n == len(gp.get_parameter_vector()) 109 | assert n == len(gp.get_parameter_names()) 110 | 111 | assert np.allclose(kernel[0], np.log(10.)) 112 | 113 | 114 | def test_bounds(): 115 | kernel = 10 * kernels.ExpSquaredKernel(1.0, metric_bounds=[(None, 4.0)]) 116 | kernel += 0.5 * kernels.RationalQuadraticKernel(log_alpha=0.1, metric=5.0) 117 | gp = GP(kernel, white_noise=LinearWhiteNoise(1.0, 0.1)) 118 | 119 | # Test bounds length. 120 | assert len(gp.get_parameter_bounds()) == len(gp.get_parameter_vector()) 121 | gp.freeze_all_parameters() 122 | gp.thaw_parameter("white_noise:m") 123 | assert len(gp.get_parameter_bounds()) == len(gp.get_parameter_vector()) 124 | 125 | # Test invalid bounds specification. 126 | with pytest.raises(ValueError): 127 | kernels.ExpSine2Kernel(gamma=0.1, log_period=5.0, bounds=[10.0]) 128 | -------------------------------------------------------------------------------- /tests/test_pickle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | __all__ = [ 6 | "test_pickle", "test_pickle", 7 | ] 8 | 9 | import sys 10 | import pytest 11 | import pickle 12 | import numpy as np 13 | 14 | from george import GP, kernels, BasicSolver, HODLRSolver 15 | 16 | 17 | def _fake_compute(arg, *args, **kwargs): 18 | assert 0, "Unpickled GP shouldn't need to be computed" 19 | 20 | 21 | @pytest.mark.skipif(sys.version_info < (3, 0), reason="requires python3") 22 | @pytest.mark.parametrize("solver,success", [(BasicSolver, True), 23 | (HODLRSolver, False)]) 24 | def test_pickle(solver, success, N=50, seed=123): 25 | np.random.seed(seed) 26 | kernel = 0.1 * kernels.ExpSquaredKernel(1.5) 27 | kernel.pars = [1, 2] 28 | gp = GP(kernel, solver=solver) 29 | x = np.random.rand(100) 30 | gp.compute(x, 1e-2) 31 | 32 | s = pickle.dumps(gp, -1) 33 | gp = pickle.loads(s) 34 | if success: 35 | gp.compute = _fake_compute 36 | gp.lnlikelihood(np.sin(x)) 37 | -------------------------------------------------------------------------------- /tests/test_solvers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function 4 | 5 | __all__ = ["test_trivial_solver", "test_basic_solver", "test_hodlr_solver"] 6 | 7 | import numpy as np 8 | 9 | import george 10 | from george.utils import nd_sort_samples 11 | from george import kernels 12 | from george import TrivialSolver, BasicSolver, HODLRSolver 13 | 14 | 15 | def test_trivial_solver(N=300, seed=1234): 16 | # Sample some data. 17 | np.random.seed(seed) 18 | x = np.random.randn(N, 3) 19 | yerr = 1e-3 * np.ones(N) 20 | y = np.sin(np.sum(x, axis=1)) 21 | 22 | solver = TrivialSolver() 23 | solver.compute(x, yerr) 24 | 25 | assert np.allclose(solver.log_determinant, 2*np.sum(np.log(yerr))) 26 | assert np.allclose(solver.apply_inverse(y), y / yerr**2) 27 | 28 | 29 | def _test_solver(Solver, N=300, seed=1234, **kwargs): 30 | # Set up the solver. 31 | kernel = 1.0 * kernels.ExpSquaredKernel(1.0) 32 | solver = Solver(kernel, **kwargs) 33 | 34 | # Sample some data. 35 | np.random.seed(seed) 36 | x = np.atleast_2d(np.sort(10*np.random.randn(N))).T 37 | yerr = np.ones(N) 38 | solver.compute(x, yerr) 39 | 40 | # Build the matrix. 41 | K = kernel.get_value(x) 42 | K[np.diag_indices_from(K)] += yerr ** 2 43 | 44 | # Check the determinant. 45 | sgn, lndet = np.linalg.slogdet(K) 46 | assert sgn == 1.0, "Invalid determinant" 47 | assert np.allclose(solver.log_determinant, lndet), "Incorrect determinant" 48 | 49 | y = np.sin(x[:, 0]) 50 | b0 = np.linalg.solve(K, y) 51 | b = solver.apply_inverse(y).flatten() 52 | assert np.allclose(b, b0) 53 | 54 | # Check the inverse. 55 | assert np.allclose(solver.apply_inverse(K), np.eye(N)), "Incorrect inverse" 56 | 57 | def test_basic_solver(**kwargs): 58 | _test_solver(BasicSolver, **kwargs) 59 | 60 | 61 | def test_hodlr_solver(**kwargs): 62 | _test_solver(HODLRSolver, tol=1e-10, **kwargs) 63 | 64 | def test_strange_hodlr_bug(): 65 | np.random.seed(1234) 66 | x = np.sort(np.random.uniform(0, 10, 50000)) 67 | yerr = 0.1 * np.ones_like(x) 68 | y = np.sin(x) 69 | 70 | kernel = np.var(y) * kernels.ExpSquaredKernel(1.0) 71 | 72 | gp_hodlr = george.GP(kernel, solver=HODLRSolver, seed=42) 73 | n = 200 74 | gp_hodlr.compute(x[:n], yerr[:n]) 75 | gp_hodlr.log_likelihood(y[:n]) 76 | -------------------------------------------------------------------------------- /tests/test_tutorial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import division, print_function, absolute_import 4 | 5 | __all__ = ["test_tutorial"] 6 | 7 | import numpy as np 8 | 9 | from george import kernels, GP, BasicSolver, HODLRSolver 10 | 11 | 12 | def test_tutorial(): 13 | def model(params, t): 14 | _, _, amp, loc, sig2 = params 15 | return amp * np.exp(-0.5 * (t - loc) ** 2 / sig2) 16 | 17 | def lnlike(p, t, y, yerr, solver=BasicSolver): 18 | a, tau = np.exp(p[:2]) 19 | gp = GP(a * kernels.Matern32Kernel(tau) + 0.001, solver=solver) 20 | gp.compute(t, yerr) 21 | return gp.lnlikelihood(y - model(p, t)) 22 | 23 | def lnprior(p): 24 | lna, lntau, amp, loc, sig2 = p 25 | if (-5 < lna < 5 and -5 < lntau < 5 and -10 < amp < 10 and 26 | -5 < loc < 5 and 0 < sig2 < 3): 27 | return 0.0 28 | return -np.inf 29 | 30 | def lnprob(p, x, y, yerr, **kwargs): 31 | lp = lnprior(p) 32 | return lp + lnlike(p, x, y, yerr, **kwargs) \ 33 | if np.isfinite(lp) else -np.inf 34 | 35 | np.random.seed(1234) 36 | x = np.sort(np.random.rand(50)) 37 | yerr = 0.05 + 0.01 * np.random.rand(len(x)) 38 | y = np.sin(x) + yerr * np.random.randn(len(x)) 39 | p = [0, 0, -1.0, 0.1, 0.4] 40 | assert np.isfinite(lnprob(p, x, y, yerr)), "Incorrect result" 41 | assert np.allclose(lnprob(p, x, y, yerr), 42 | lnprob(p, x, y, yerr, solver=HODLRSolver)), \ 43 | "Inconsistent results" 44 | --------------------------------------------------------------------------------