├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yml
    │   ├── config.yml
    │   └── feature_request.yml
    ├── codecov.yml
    ├── dependabot.yml
    └── workflows
    │   ├── CI.yml
    │   ├── CI_Windows.yml
    │   ├── CI_apptainer.yml
    │   ├── CI_conda_forge.yml
    │   ├── CI_docker.yml
    │   ├── CI_docker_large_nightly.yml
    │   ├── CI_large_nightly.yml
    │   ├── CI_mac.yml
    │   ├── codeql-analysis.yml
    │   ├── docker_deploy.yml
    │   ├── docs.yml
    │   ├── pypi_deploy.yml
    │   ├── update_backend.yml
    │   └── update_backend_version.py
├── .gitignore
├── .pre-commit-config.yaml
├── Apptainer.def
├── CITATION.md
├── CONTRIBUTORS.md
├── Dockerfile
├── LICENSE
├── README.md
├── benchmarks
    ├── README.md
    ├── benchmark.sh
    ├── hyperparamopt.py
    ├── print_best_model.py
    └── space.py
├── docs
    ├── .gitignore
    ├── README.md
    ├── _api.md
    ├── all_contributors
    │   ├── .all-contributorsrc
    │   ├── .gitignore
    │   ├── package.json
    │   ├── run_all_contrib.sh
    │   └── yarn.lock
    ├── api-advanced.md
    ├── assets
    │   ├── 87712EA9B4B3CB1B.png
    │   ├── favicon.png
    │   ├── pysr_logo.svg
    │   └── pysr_logo_reduced.svg
    ├── backend.md
    ├── examples.md
    ├── gen_docs.sh
    ├── gen_param_docs.py
    ├── generate_papers.py
    ├── images
    │   ├── Planar_relation.png
    │   ├── SyReg_GasConc.png
    │   ├── Y_Mgal_Simba.png
    │   ├── back_to_formula.png
    │   ├── cloud_cover.jpg
    │   ├── economic_theory_gravity.png
    │   ├── electronnegativity_introduction.jpg
    │   ├── example_plot.png
    │   ├── hi_mass.png
    │   ├── hod_importances.png
    │   ├── hyperbolic_volume.png
    │   ├── illustris_example.png
    │   ├── jet_background_diagram.jpg
    │   ├── kidger_thesis.png
    │   └── rediscovering_gravity.png
    ├── js
    │   └── mathjax.js
    ├── operators.md
    ├── options.md
    ├── papers.yml
    ├── requirements.txt
    ├── stylesheets
    │   ├── extra.css
    │   └── papers_header.txt
    └── tuning.md
├── environment.yml
├── example.py
├── examples
    └── pysr_demo.ipynb
├── mkdocs.yml
├── mypy.ini
├── pyproject.toml
├── pysr
    ├── .gitignore
    ├── __init__.py
    ├── __main__.py
    ├── _cli
    │   ├── __init__.py
    │   └── main.py
    ├── denoising.py
    ├── deprecated.py
    ├── export.py
    ├── export_jax.py
    ├── export_latex.py
    ├── export_numpy.py
    ├── export_sympy.py
    ├── export_torch.py
    ├── expression_specs.py
    ├── feature_selection.py
    ├── julia_extensions.py
    ├── julia_helpers.py
    ├── julia_import.py
    ├── julia_registry_helpers.py
    ├── juliapkg.json
    ├── logger_specs.py
    ├── param_groupings.yml
    ├── sklearn_monkeypatch.py
    ├── sr.py
    ├── test
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── generate_dev_juliapkg.py
    │   ├── nb_sanitize.cfg
    │   ├── params.py
    │   ├── test_cli.py
    │   ├── test_dev.py
    │   ├── test_dev_pysr.dockerfile
    │   ├── test_jax.py
    │   ├── test_main.py
    │   ├── test_nb.ipynb
    │   ├── test_startup.py
    │   └── test_torch.py
    └── utils.py
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     */test/*
4 | source = pysr
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug report
 2 | description: File a bug report
 3 | title: "[BUG]: "
 4 | labels: ["bug"]
 5 | assignees:
 6 |   - MilesCranmer
 7 | 
 8 | body:
 9 |   - type: markdown
10 |     attributes:
11 |       value: |
12 |         Thanks for taking the time to fill out this bug report!
13 |   - type: textarea
14 |     id: what-happened
15 |     attributes:
16 |       label: What happened?
17 |       description: Also tell us, what did you expect to happen?
18 |       placeholder: Tell us what you see!
19 |       value: "A bug happened!"
20 |     validations:
21 |       required: true
22 |   - type: input
23 |     id: version
24 |     attributes:
25 |       label: Version
26 |       description: What version of PySR are you running? `python3 -c 'import pysr; print(pysr.__version__)'`. (You might want to check the latest version, in case the issue has already been fixed.)
27 |     validations:
28 |       required: true
29 |   - type: dropdown
30 |     id: os
31 |     attributes:
32 |       label: Operating System
33 |       description: What operating system do you see the issue on?
34 |       multiple: true
35 |       options:
36 |         - Windows
37 |         - macOS
38 |         - Linux
39 |   - type: dropdown
40 |     id: pkg-manager
41 |     attributes:
42 |       label: Package Manager
43 |       description: What package manager are you using to install PySR?
44 |       options:
45 |         - pip
46 |         - Conda
47 |         - Other (specify below)
48 |   - type: dropdown
49 |     id: display
50 |     attributes:
51 |       label: Interface
52 |       description: How are you running PySR?
53 |       options:
54 |         - Jupyter Notebook
55 |         - IPython Terminal
56 |         - Script (i.e., `python my_script.py`)
57 |         - Google Colab
58 |         - Other (specify below)
59 |     validations:
60 |       required: true
61 |   - type: textarea
62 |     id: logs
63 |     attributes:
64 |       label: Relevant log output
65 |       description: Please copy and paste any log output or error messages. This will be automatically formatted into code, so no need for backticks.
66 |       render: shell
67 |   - type: textarea
68 |     id: extra
69 |     attributes:
70 |       label: Extra Info
71 |       description: Please tell us any other information that you think might help. For example, what are your PySR settings? What dataset are you running on? If possible, please share a minimal code example that produces the error.
72 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: PySR Forums
4 |     url: https://github.com/MilesCranmer/PySR/discussions
5 |     about: Please ask and answer questions about how to use PySR here.
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature request
 2 | description: Suggest an idea for this project
 3 | title: "[Feature]: "
 4 | labels: ["enhancement"]
 5 | 
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         Thanks for taking the time to suggest a feature for PySR! Your interest in the project helps improve the software for everyone 🚀
11 |   - type: textarea
12 |     id: feature
13 |     attributes:
14 |       label: Feature Request
15 |       description: Describe your desired feature request here! The more details the better.
16 |     validations:
17 |       required: true
18 |   - type: markdown
19 |     attributes:
20 |       value: |
21 |         Be sure to check out the [PySR forums](https://github.com/MilesCranmer/PySR/discussions) to chat with other users about PySR use-cases!
22 | 


--------------------------------------------------------------------------------
/.github/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 |   status:
3 |     patch:
4 |       default:
5 |         informational: true
6 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 2 | 
 3 | version: 2
 4 | updates:
 5 | 
 6 |   - package-ecosystem: "pip" # See documentation for possible values
 7 |     directory: "/" # Location of package manifests
 8 |     schedule:
 9 |       interval: "daily"
10 | 
11 |   - package-ecosystem: "github-actions"
12 |     # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
13 |     directory: "/"
14 |     schedule:
15 |       # Check for updates to GitHub Actions every weekday
16 |       interval: "daily"
17 | 


--------------------------------------------------------------------------------
/.github/workflows/CI.yml:
--------------------------------------------------------------------------------
  1 | name: Linux
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - '**'
  7 |     paths:
  8 |       - '**'
  9 |     tags:
 10 |       - 'v*.*.*'
 11 |   pull_request:
 12 |     branches:
 13 |       - 'master'
 14 |     paths:
 15 |       - '**'
 16 | 
 17 | permissions:
 18 |   contents: write
 19 | 
 20 | jobs:
 21 |   test:
 22 |     runs-on: ${{ matrix.os }}
 23 |     timeout-minutes: 60
 24 |     env:
 25 |       COVERAGE_PROCESS_START: "${{ github.workspace }}/.coveragerc"
 26 |     defaults:
 27 |       run:
 28 |         shell: bash
 29 |     strategy:
 30 |       matrix:
 31 |         julia-version: ['1']
 32 |         python-version: ['3.13']
 33 |         os: [ubuntu-latest]
 34 |         test-id: [main]
 35 |         include:
 36 |           - julia-version: '1.10'
 37 |             python-version: '3.10'
 38 |             os: ubuntu-latest
 39 |             test-id: include
 40 |           - julia-version: '1'
 41 |             python-version: '3.13'
 42 |             os: ubuntu-latest
 43 |             test-id: include
 44 |           - julia-version: '1'
 45 |             python-version: '3.8'
 46 |             os: ubuntu-latest
 47 |             test-id: include
 48 | 
 49 |     steps:
 50 |       - uses: actions/checkout@v4
 51 |       - name: "Set up Julia"
 52 |         uses: julia-actions/setup-julia@v2
 53 |         with:
 54 |           version: ${{ matrix.julia-version }}
 55 |       - name: "Cache Julia"
 56 |         uses: julia-actions/cache@v2
 57 |         with:
 58 |           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
 59 |           cache-packages: false
 60 |       - name: "Set up Python"
 61 |         uses: actions/setup-python@v5
 62 |         with:
 63 |           python-version: ${{ matrix.python-version }}
 64 |           cache: pip
 65 |       - name: "Install PySR"
 66 |         run: |
 67 |             python -m pip install --upgrade pip
 68 |             pip install '.[dev]'
 69 |             python -c 'import pysr'
 70 |       - name: "Assert Julia version"
 71 |         if: ${{ matrix.julia-version != '1'}}
 72 |         run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor"
 73 |       - name: "Set up coverage for subprocesses"
 74 |         run: echo 'import coverage; coverage.process_startup()' > "${{ github.workspace }}/sitecustomize.py"
 75 |       - name: "Run tests"
 76 |         run: coverage run -m pysr test main,cli,startup
 77 |       - name: "Run JAX tests"
 78 |         run: coverage run --append -m pysr test jax
 79 |         if: ${{ matrix.test-id == 'main' }}
 80 |       - name: "Run Torch tests"
 81 |         run: coverage run --append -m pysr test torch
 82 |         if: ${{ matrix.test-id == 'main' }}
 83 |       - name: "Build coverage report"
 84 |         run: coverage xml
 85 |       - name: "Upload results to Codecov"
 86 |         uses: codecov/codecov-action@v5
 87 |         with:
 88 |           token: ${{ secrets.CODECOV_TOKEN }}
 89 |           slug: MilesCranmer/PySR
 90 | 
 91 |   dev_install:
 92 |     runs-on: ${{ matrix.os }}
 93 |     strategy:
 94 |       matrix:
 95 |         os: ['ubuntu-latest']
 96 |         python-version: ['3.13']
 97 |         julia-version: ['1']
 98 |         include:
 99 |           - os: ubuntu-latest
100 |             python-version: '3.10'
101 |             julia-version: '1.10'
102 |     steps:
103 |       - uses: actions/checkout@v4
104 |       - uses: actions/setup-python@v5
105 |       - name: "Install PySR"
106 |         run: |
107 |             python -m pip install --upgrade pip
108 |             pip install '.[dev]'
109 |       - name: "Run development test"
110 |         run: PYSR_TEST_JULIA_VERSION=${{ matrix.julia-version }} PYSR_TEST_PYTHON_VERSION=${{ matrix.python-version }} python -m pysr test dev
111 | 
112 |   conda_test:
113 |     runs-on: ${{ matrix.os }}
114 |     defaults:
115 |       run:
116 |         shell: bash -l {0}
117 |     strategy:
118 |       matrix:
119 |         python-version: ['3.13']
120 |         os: ['ubuntu-latest']
121 | 
122 |     steps:
123 |       - uses: actions/checkout@v4
124 |       - name: "Cache conda"
125 |         uses: actions/cache@v4
126 |         env:
127 |           CACHE_NUMBER: 0
128 |         with:
129 |           path: ~/conda_pkgs_dir
130 |           key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }}
131 |       - name: "Set up Conda"
132 |         uses: conda-incubator/setup-miniconda@v3
133 |         with:
134 |           miniforge-variant: Miniforge3
135 |           miniforge-version: latest
136 |           auto-activate-base: true
137 |           python-version: ${{ matrix.python-version }}
138 |           activate-environment: pysr-test
139 |           environment-file: environment.yml
140 |       - name: "Cache Julia"
141 |         uses: julia-actions/cache@v2
142 |         with:
143 |           cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }}
144 |           cache-packages: false
145 |       - name: "Install PySR"
146 |         run: |
147 |             python3 -m pip install .
148 |             python3 -c 'import pysr'
149 |       - name: "Run tests"
150 |         run: cd /tmp && python -m pysr test main
151 | 
152 |   types:
153 |     name: Check types
154 |     runs-on: ubuntu-latest
155 |     defaults:
156 |       run:
157 |         shell: bash -l {0}
158 |     strategy:
159 |       matrix:
160 |         python-version:
161 |           - '3.13'
162 |           - '3.10'
163 |         os: ['ubuntu-latest']
164 | 
165 |     steps:
166 |       - uses: actions/checkout@v4
167 |       - name: "Set up Python"
168 |         uses: actions/setup-python@v5
169 |         with:
170 |           python-version: ${{ matrix.python-version }}
171 |           cache: pip
172 |       - name: "Install PySR and all dependencies"
173 |         run: |
174 |             python -m pip install --upgrade pip
175 |             pip install '.[dev]'
176 |       - name: "Run mypy"
177 |         run: python -m mypy --install-types --non-interactive pysr
178 |         if: ${{ matrix.python-version != '3.10' }}
179 |       - name: "Run compatible mypy"
180 |         run: python -m mypy --ignore-missing-imports pysr
181 |         if: ${{ matrix.python-version == '3.10' }}
182 | 
183 |   beartype:
184 |     name: Test with beartype
185 |     runs-on: ubuntu-latest
186 |     defaults:
187 |       run:
188 |         shell: bash -l {0}
189 |     env:
190 |       PYSR_USE_BEARTYPE: "1"
191 |     strategy:
192 |       matrix:
193 |         python-version: ['3.13']
194 | 
195 |     steps:
196 |       - uses: actions/checkout@v4
197 |       - name: "Set up Python"
198 |         uses: actions/setup-python@v5
199 |         with:
200 |           python-version: ${{ matrix.python-version }}
201 |           cache: pip
202 |       - name: "Install PySR and all dependencies"
203 |         run: |
204 |             python -m pip install --upgrade pip
205 |             pip install '.[dev]'
206 |       - name: "Run tests"
207 |         run: python -m pysr test main,jax,torch
208 | 
209 |   wheel_test:
210 |     name: Test from wheel
211 |     runs-on: ubuntu-latest
212 |     strategy:
213 |       matrix:
214 |         python-version: ['3.13']
215 |         julia-version: ['1']
216 |     defaults:
217 |       run:
218 |         shell: bash -l {0}
219 | 
220 |     steps:
221 |       - uses: actions/checkout@v4
222 |       - uses: actions/setup-python@v5
223 |         with:
224 |           python-version: ${{ matrix.python-version }}
225 |           cache: pip
226 |       - uses: julia-actions/setup-julia@v2
227 |         with:
228 |           version: ${{ matrix.julia-version }}
229 | 
230 |       - name: "Build wheel"
231 |         run: |
232 |           python -m pip install --upgrade pip build virtualenv
233 |           python -m build --wheel
234 |           mkdir -p /tmp/artifacts
235 |           mv dist/*.whl /tmp/artifacts/
236 | 
237 |       - name: "Install wheel in venv & run smoke test"
238 |         run: |
239 |           mkdir -p /tmp/wheeltest
240 |           cd /tmp/wheeltest
241 |           python -m virtualenv .venv
242 |           source .venv/bin/activate
243 |           pip install /tmp/artifacts/*.whl
244 |           python -c "import pysr; pysr.PySRRegressor(niterations=1).fit([[1]], [1])"
245 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_Windows.yml:
--------------------------------------------------------------------------------
 1 | name: Windows
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |     paths:
 8 |       - '**'
 9 |     tags:
10 |       - 'v*.*.*'
11 |   pull_request:
12 |     branches:
13 |       - 'master'
14 |     paths:
15 |       - '**'
16 | 
17 | jobs:
18 |   test:
19 |     runs-on: ${{ matrix.os }}
20 |     timeout-minutes: 60
21 |     defaults:
22 |       run:
23 |         shell: bash
24 |     strategy:
25 |       matrix:
26 |         julia-version: ['1']
27 |         python-version: ['3.13']
28 |         os: [windows-latest]
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |       - name: "Set up Julia"
33 |         uses: julia-actions/setup-julia@v2
34 |         with:
35 |           version: ${{ matrix.julia-version }}
36 |       - name: "Cache Julia"
37 |         uses: julia-actions/cache@v2
38 |         with:
39 |           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
40 |           cache-packages: false
41 |       - name: "Set up Python"
42 |         uses: actions/setup-python@v5
43 |         with:
44 |           python-version: ${{ matrix.python-version }}
45 |           cache: pip
46 |       - name: "Install PySR"
47 |         run: |
48 |             python -m pip install --upgrade pip
49 |             pip install '.[dev]'
50 |             python -c 'import pysr'
51 |       - name: "Run tests"
52 |         run: |
53 |             python -m pysr test main,cli,startup
54 |       - name: "Install Torch"
55 |         run: pip install torch # (optional import)
56 |       - name: "Run Torch tests"
57 |         run: python -m pysr test torch
58 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_apptainer.yml:
--------------------------------------------------------------------------------
 1 | name: Apptainer
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |     paths:
 8 |       - '**'
 9 |     tags:
10 |       - 'v*.*.*'
11 |   pull_request:
12 |     branches:
13 |       - 'master'
14 |     paths:
15 |       - '**'
16 | 
17 | jobs:
18 |   test:
19 |     runs-on: ${{ matrix.os }}
20 |     timeout-minutes: 60
21 |     defaults:
22 |       run:
23 |         shell: bash
24 |     strategy:
25 |       matrix:
26 |         os: [ubuntu-latest]
27 | 
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |       - uses: eWaterCycle/setup-apptainer@v2
31 |         with:
32 |             apptainer-version: 1.3.0
33 |       - name: Build apptainer
34 |         run: sudo apptainer build --notest pysr.sif Apptainer.def
35 |       - name: Test apptainer
36 |         run: |
37 |           TMPDIR=$(mktemp -d)
38 |           cp pysr.sif $TMPDIR
39 |           cd $TMPDIR
40 |           sudo apptainer test ./pysr.sif
41 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_conda_forge.yml:
--------------------------------------------------------------------------------
 1 | name: conda-forge
 2 | 
 3 | # This CI tries the conda-forge version of PySR
 4 | 
 5 | on:
 6 |   schedule:
 7 |     # Run at the 0th minute of the 10th hour (UTC).
 8 |     # This means the job will run at 5am EST.
 9 |     - cron: "0 10 * * *"
10 |     # This will automatically run on master branch only.
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   conda_test:
15 |     runs-on: ${{ matrix.os }}
16 |     timeout-minutes: 60
17 |     defaults:
18 |       run:
19 |         shell: bash -el {0}
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: ['3.10', '3']
24 |         os: ['ubuntu-latest', 'windows-latest', 'macos-latest']
25 | 
26 |     steps:
27 |       - name: "Set up Conda"
28 |         uses: conda-incubator/setup-miniconda@v3
29 |         with:
30 |           miniforge-variant: Miniforge3
31 |           miniforge-version: latest
32 |           auto-activate-base: true
33 |           python-version: ${{ matrix.python-version }}
34 |           activate-environment: pysr-test
35 |       - name: "Install pysr"
36 |         run: |
37 |           conda install -y pysr
38 |           python -c "import pysr"
39 |           echo "Finished."
40 |       - name: "Run tests"
41 |         run: |
42 |           echo "Running tests"
43 |           pip install pytest nbval
44 |           python -m pysr test main,startup
45 |           echo "Finished."
46 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_docker.yml:
--------------------------------------------------------------------------------
 1 | name: Docker
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |     paths:
 8 |       - '**'
 9 |     tags:
10 |       - 'v*.*.*'
11 |   pull_request:
12 |     branches:
13 |       - 'master'
14 |     paths:
15 |       - '**'
16 | 
17 | jobs:
18 |   test:
19 |     runs-on: ${{ matrix.os }}
20 |     timeout-minutes: 60
21 |     defaults:
22 |       run:
23 |         shell: bash
24 |     strategy:
25 |       matrix:
26 |         os: [ubuntu-latest]
27 |         arch: ['linux/amd64']
28 | 
29 |     steps:
30 |       - uses: actions/checkout@v4
31 |       - name: Build docker
32 |         run: docker build --platform=${{ matrix.arch }} -t pysr .
33 |       - name: Test docker
34 |         run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup'
35 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_docker_large_nightly.yml:
--------------------------------------------------------------------------------
 1 | name: Docker_Large_Nightly
 2 | 
 3 | on:
 4 |   schedule:
 5 |     # Run at the 0th minute of the 10th hour (UTC).
 6 |     # This means the job will run at 5am EST.
 7 |     - cron: "0 10 * * *"
 8 |     # This will automatically run on master branch only.
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ${{ matrix.os }}
14 |     continue-on-error: ${{ matrix.arch == 'linux/arm64' }}
15 |     defaults:
16 |       run:
17 |         shell: bash
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         julia-version: ['1.10', '1']
22 |         python-version: ['3.10', '3.13']
23 |         os: [ubuntu-latest]
24 |         arch: ['linux/amd64', 'linux/arm64']
25 | 
26 | 
27 |     steps:
28 |       - uses: actions/checkout@v4
29 |       - name: Set up QEMU
30 |         uses: docker/setup-qemu-action@v3
31 |         with:
32 |           platforms: all
33 |       - name: Build docker
34 |         run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} .
35 |       - name: Test docker
36 |         run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup'
37 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_large_nightly.yml:
--------------------------------------------------------------------------------
 1 | name: large_nightly
 2 | 
 3 | # This CI only runs once per day, but tries
 4 | # many different configurations.
 5 | 
 6 | on:
 7 |   schedule:
 8 |     # Run at the 0th minute of the 10th hour (UTC).
 9 |     # This means the job will run at 5am EST.
10 |     - cron: "0 10 * * *"
11 |     # This will automatically run on master branch only.
12 |   workflow_dispatch:
13 | 
14 | jobs:
15 |   test:
16 |     runs-on: ${{ matrix.os }}
17 |     timeout-minutes: 60
18 |     # Windows jobs are flaky:
19 |     continue-on-error: ${{ matrix.os == 'windows-latest' }}
20 |     defaults:
21 |       run:
22 |         shell: bash
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         julia-version: ['1.10', '1']
27 |         python-version: ['3.10', '3.13']
28 |         os: [ubuntu-latest, macos-latest, windows-latest]
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |       - name: "Set up Julia"
33 |         uses: julia-actions/setup-julia@v2
34 |         with:
35 |           version: ${{ matrix.julia-version }}
36 |       - name: "Set up Python"
37 |         uses: actions/setup-python@v5
38 |         with:
39 |           python-version: ${{ matrix.python-version }}
40 |       - name: "Install PySR"
41 |         run: |
42 |             python -m pip install --upgrade pip
43 |             pip install '.[dev]'
44 |             python -c 'import pysr'
45 |       - name: "Assert Julia version"
46 |         if: ${{ matrix.julia-version != '1'}}
47 |         run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor"
48 |       - name: "Run tests"
49 |         run: python -m pysr test main,cli,startup
50 | 


--------------------------------------------------------------------------------
/.github/workflows/CI_mac.yml:
--------------------------------------------------------------------------------
 1 | name: macOS
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |     paths:
 8 |       - '**'
 9 |     tags:
10 |       - 'v*.*.*'
11 |   pull_request:
12 |     branches:
13 |       - 'master'
14 |     paths:
15 |       - '**'
16 | 
17 | jobs:
18 |   test:
19 |     runs-on: ${{ matrix.os }}
20 |     timeout-minutes: 60
21 |     defaults:
22 |       run:
23 |         shell: bash
24 |     strategy:
25 |       matrix:
26 |         julia-version: ['1']
27 |         python-version: ['3.13']
28 |         os: [macos-latest]
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |       - name: "Set up Julia"
33 |         uses: julia-actions/setup-julia@v2
34 |         with:
35 |           version: ${{ matrix.julia-version }}
36 |       - name: "Cache Julia"
37 |         uses: julia-actions/cache@v2
38 |         with:
39 |           cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }}
40 |           cache-packages: false
41 |       - name: "Set up Python"
42 |         uses: actions/setup-python@v5
43 |         with:
44 |           python-version: ${{ matrix.python-version }}
45 |           cache: pip
46 |       - name: "Install PySR"
47 |         run: |
48 |             python -m pip install --upgrade pip
49 |             pip install '.[dev]'
50 |             python -c 'import pysr'
51 |       - name: "Run tests"
52 |         run: |
53 |             python -m pysr test main,cli,startup
54 |       - name: "Run JAX tests"
55 |         run: python -m pysr test jax
56 |       - name: "Run Torch tests"
57 |         run: python -m pysr test torch
58 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     # The branches below must be a subset of the branches above
 8 |     branches: [ "master" ]
 9 |   schedule:
10 |     - cron: '28 17 * * 1'
11 | 
12 | jobs:
13 |   analyze:
14 |     name: Analyze
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       actions: read
18 |       contents: read
19 |       security-events: write
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         language: [ 'python' ]
25 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
26 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
27 | 
28 |     steps:
29 |     - name: Checkout repository
30 |       uses: actions/checkout@v4
31 | 
32 |     # Initializes the CodeQL tools for scanning.
33 |     - name: Initialize CodeQL
34 |       uses: github/codeql-action/init@v3
35 |       with:
36 |         languages: ${{ matrix.language }}
37 |         # If you wish to specify custom queries, you can do so here or in a config file.
38 |         # By default, queries listed here will override any specified in a config file.
39 |         # Prefix the list here with "+" to use these queries and those in the config file.
40 | 
41 |         # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
42 |         # queries: security-extended,security-and-quality
43 | 
44 | 
45 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
46 |     # If this step fails, then you should remove it and run the build manually (see below)
47 |     - name: Autobuild
48 |       uses: github/codeql-action/autobuild@v3
49 | 
50 |     # ℹ️ Command-line programs to run using the OS shell.
51 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
52 | 
53 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
54 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
55 | 
56 |     # - run: |
57 |     #   echo "Run, Build Application using script"
58 |     #   ./location_of_script_within_repo/buildscript.sh
59 | 
60 |     - name: Perform CodeQL Analysis
61 |       uses: github/codeql-action/analyze@v3
62 | 


--------------------------------------------------------------------------------
/.github/workflows/docker_deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Docker
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 10 * * *"
 6 |   push:
 7 |     branches:
 8 |       - "**"
 9 |     tags:
10 |       - "v*.*.*"
11 |   workflow_dispatch:
12 | 
13 | 
14 | jobs:
15 |   docker:
16 |     runs-on: ${{ matrix.os }}
17 |     strategy:
18 |       matrix:
19 |         os: [ubuntu-latest]
20 |         arch: [linux/amd64]
21 |         python-version: [3.12.3]
22 |         julia-version: [1.10.3]
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@v4
26 |       - name: Login to Docker Hub
27 |         uses: docker/login-action@v3
28 |         if: github.event_name != 'pull_request'
29 |         with:
30 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
31 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
32 |       - name: Login to GitHub registry
33 |         uses: docker/login-action@v3
34 |         if: github.event_name != 'pull_request'
35 |         with:
36 |           registry: ghcr.io
37 |           username: ${{ github.repository_owner }}
38 |           password: ${{ secrets.GITHUB_TOKEN }}
39 |       - name: Docker meta
40 |         id: meta
41 |         uses: docker/metadata-action@v5
42 |         with:
43 |           #  List of Docker images to use as base name for tags
44 |           images: |
45 |             mcranmer/pysr
46 |             ghcr.io/${{ github.repository }}
47 |           # generate Docker tags based on the following events/attributes
48 |           tags: |
49 |             type=schedule
50 |             type=ref,event=branch
51 |             type=ref,event=pr
52 |             type=semver,pattern={{version}}
53 |             type=semver,pattern={{major}}.{{minor}}
54 |             type=semver,pattern={{major}}
55 |             type=sha
56 |             type=raw,value=latest,enable={{is_default_branch}}
57 |       - name: Set up QEMU
58 |         uses: docker/setup-qemu-action@v3
59 |       - name: Set up Docker Buildx
60 |         uses: docker/setup-buildx-action@v3
61 |       - name: Build and push
62 |         uses: docker/build-push-action@v6
63 |         with:
64 |           context: .
65 |           platforms: ${{ matrix.arch }}
66 |           push: ${{ github.event_name != 'pull_request' }}
67 |           tags: ${{ steps.meta.outputs.tags }}
68 |           labels: ${{ steps.meta.outputs.labels }}
69 |           cache-from: type=registry,ref=mcranmer/pysr:buildcache
70 |           cache-to: type=registry,ref=mcranmer/pysr:buildcache,mode=max
71 |           build-args: |
72 |             PYVERSION=${{ matrix.python-version }}
73 |             JLVERSION=${{ matrix.julia-version }}
74 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |     paths:
 8 |       - 'pysr/**'
 9 |       - '.github/workflows/docs.yml'
10 |       - 'docs/**'
11 |       - 'README.md'
12 |       - 'mkdocs.yml'
13 |   workflow_dispatch:
14 | 
15 | jobs:
16 |   test:
17 |     runs-on: ubuntu-latest
18 |     defaults:
19 |       run:
20 |         shell: bash
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v4
24 |       - name: "Set up Python"
25 |         uses: actions/setup-python@v5
26 |         with:
27 |           python-version: 3.13
28 |           cache: pip
29 |       - name: "Install packages for docs building"
30 |         run: pip install -r docs/requirements.txt
31 |       - name: "Install PySR"
32 |         run: pip install . && python -c 'import pysr'
33 |       - name: "Build API docs"
34 |         run: cd docs && ./gen_docs.sh
35 |       - name: "Deploy documentation to primary repository"
36 |         run: mkdocs gh-deploy --force
37 |       - name: "Deploy documentation to secondary repository"
38 |         env:
39 |           DEPLOY_KEY: ${{ secrets.DAMTP_DEPLOY_KEY }}
40 |         run: |
41 |           # Set up SSH key for authentication
42 |           mkdir -p ~/.ssh
43 |           echo "$DEPLOY_KEY" > ~/.ssh/id_rsa
44 |           chmod 600 ~/.ssh/id_rsa
45 |           ssh-keyscan github.com >> ~/.ssh/known_hosts
46 | 
47 |           git checkout gh-pages
48 |           git remote add secondary git@github.com:ai-damtp-cam-ac-uk/pysr.git
49 |           git push secondary gh-pages --force
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi_deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy PyPI
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - 'v*.*.*'
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   pypi:
10 |     runs-on: ubuntu-latest
11 |     environment:
12 |       name: pypi
13 |       url: https://pypi.org/p/pysr
14 |     permissions:
15 |       id-token: write
16 |     steps:
17 |       - name: "Checkout"
18 |         uses: actions/checkout@v4
19 |       - name: "Set up Python"
20 |         uses: actions/setup-python@v5
21 |         with:
22 |           python-version: 3.10.8
23 |       - name: "Install building tools"
24 |         run: pip install build
25 |       - name: "Build package"
26 |         run: python -m build
27 |       - name: "Publish distribution 📦 to Test PyPI"
28 |         uses: pypa/gh-action-pypi-publish@release/v1
29 |         with:
30 |           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
31 |           repository-url: https://test.pypi.org/legacy/
32 |           skip-existing: true
33 |           verbose: true
34 |       - name: "Publish distribution 📦 to PyPI"
35 |         uses: pypa/gh-action-pypi-publish@release/v1
36 |         with:
37 |           password: ${{ secrets.PYPI_API_TOKEN }}
38 |           verbose: true
39 | 


--------------------------------------------------------------------------------
/.github/workflows/update_backend.yml:
--------------------------------------------------------------------------------
 1 | name: PySR backend update
 2 | on:
 3 |   schedule:
 4 |     - cron: '00 00 * * *'
 5 |   workflow_dispatch:
 6 | jobs:
 7 |   update_compat:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 |       - uses: actions/setup-python@v5
12 |         with:
13 |           python-version: 3.13
14 |           cache: pip
15 | 
16 |       - name: "Install dependencies"
17 |         run: |
18 |           python -m pip install --upgrade pip
19 |           pip install tomlkit
20 | 
21 |       - name: "Get SymbolicRegression.jl latest version"
22 |         id: get-latest
23 |         run: |
24 |           cd $(mktemp -d)
25 |           git clone https://github.com/MilesCranmer/SymbolicRegression.jl
26 |           cd SymbolicRegression.jl
27 |           echo "version=$(git describe --tags --match='v*' --abbrev=0 | sed 's/^v//')" >> $GITHUB_OUTPUT
28 | 
29 |       - name: "Update SymbolicRegression.jl version in PySR"
30 |         run: |
31 |           python .github/workflows/update_backend_version.py ${{ steps.get-latest.outputs.version }}
32 | 
33 |       - name: "Restore changes if no diff to `pysr/juliapkg.json`"
34 |         run: |
35 |           if git diff --quiet pysr/juliapkg.json; then
36 |             echo "No changes to pysr/juliapkg.json. Restoring changes."
37 |             git restore pyproject.toml
38 |           fi
39 | 
40 |       - name: "Create PR if necessary"
41 |         uses: peter-evans/create-pull-request@v7
42 |         with:
43 |           title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}"
44 |           body: |
45 |             This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml`
46 | 
47 |             It updates the backend version to v${{ steps.get-latest.outputs.version }}. For a full description of the changes, see the backend changelog: [v${{ steps.get-latest.outputs.version }}](https://github.com/MilesCranmer/SymbolicRegression.jl/releases/tag/v${{ steps.get-latest.outputs.version }}).
48 |           delete-branch: true
49 |           commit-message: "Update backend version to v${{ steps.get-latest.outputs.version }}"
50 |           add-paths: |
51 |             pyproject.toml
52 |             pysr/juliapkg.json
53 | 


--------------------------------------------------------------------------------
/.github/workflows/update_backend_version.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | import tomlkit
 6 | 
 7 | new_backend_version = sys.argv[1]
 8 | 
 9 | assert not new_backend_version.startswith("v"), "Version should not start with 'v'"
10 | 
11 | pyproject_toml = Path(__file__).parent / ".." / ".." / "pyproject.toml"
12 | juliapkg_json = Path(__file__).parent / ".." / ".." / "pysr" / "juliapkg.json"
13 | 
14 | with open(pyproject_toml) as toml_file:
15 |     pyproject_data = tomlkit.parse(toml_file.read())
16 | 
17 | with open(juliapkg_json) as f:
18 |     juliapkg_data = json.load(f)
19 | 
20 | major, minor, patch, *dev = pyproject_data["project"]["version"].split(".")
21 | pyproject_data["project"]["version"] = f"{major}.{minor}.{int(patch)+1}"
22 | 
23 | juliapkg_data["packages"]["SymbolicRegression"]["version"] = f"~{new_backend_version}"
24 | 
25 | with open(pyproject_toml, "w") as toml_file:
26 |     toml_file.write(tomlkit.dumps(pyproject_data))
27 | 
28 | with open(juliapkg_json, "w") as f:
29 |     json.dump(juliapkg_data, f, indent=4)
30 |     # Ensure ends with newline
31 |     f.write("\n")
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .dataset*.jl
 2 | .hyperparams*.jl
 3 | *.csv
 4 | *.csv.out*
 5 | *.bkup
 6 | *.pkl
 7 | performance*txt
 8 | *.out
 9 | trials*
10 | **/__pycache__
11 | build
12 | dist
13 | *.vs/*
14 | *.pyproj
15 | *.sln
16 | pysr/.vs/
17 | pysr.egg-info
18 | Manifest.toml
19 | workflow
20 | docs/index.md
21 | site
22 | **/.DS_Store
23 | **/*.code-workspace
24 | **/*.tar.gz
25 | venv
26 | requirements-dev.lock
27 | requirements.lock
28 | outputs
29 | .mypy_cache
30 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   # General linting
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v5.0.0
 5 |     hooks:
 6 |       - id: trailing-whitespace
 7 |       - id: end-of-file-fixer
 8 |       - id: check-yaml
 9 |       - id: check-added-large-files
10 |   # General formatting
11 |   - repo: https://github.com/psf/black
12 |     rev: 25.1.0
13 |     hooks:
14 |       - id: black
15 |       - id: black-jupyter
16 |         exclude: pysr/test/test_nb.ipynb
17 |   # Stripping notebooks
18 |   - repo: https://github.com/kynan/nbstripout
19 |     rev: 0.8.1
20 |     hooks:
21 |       - id: nbstripout
22 |         exclude: pysr/test/test_nb.ipynb
23 |   # Unused imports
24 |   - repo: https://github.com/hadialqattan/pycln
25 |     rev: "v2.5.0"
26 |     hooks:
27 |       - id: pycln
28 |   # Sorted imports
29 |   - repo: https://github.com/PyCQA/isort
30 |     rev: "6.0.0"
31 |     hooks:
32 |       - id: isort
33 |         additional_dependencies: [toml]
34 | 


--------------------------------------------------------------------------------
/Apptainer.def:
--------------------------------------------------------------------------------
 1 | # Build an Apptainer SIF file containing a working copy of PySR and its prereqs
 2 | Bootstrap: docker
 3 | From: julia:1.11.1-bullseye
 4 | Stage: jl
 5 | 
 6 | Bootstrap: docker
 7 | From: python:3.12.6-bullseye
 8 | Stage: runtime
 9 | 
10 | %environment
11 | 	# Use the container Julia binary
12 | 	export PATH="/usr/local/julia/bin:$PATH"
13 | 
14 | 	# Create a stacked environment for additional Julia packages
15 | 	export JULIA_DEPOT_PATH="$HOME/.pysr:/pysr/depot:$JULIA_DEPOT_PATH"
16 | 	export JULIA_LOAD_PATH="$HOME/.pysr:/pysr:$JULIA_LOAD_PATH"
17 | 
18 | %files from jl
19 | 	/usr/local/julia /usr/local/julia
20 | 
21 | %files
22 | 	./pyproject.toml /pysr/pyproject.toml
23 | 	./LICENSE /pysr/LICENSE
24 | 	./README.md /pysr/README.md
25 | 	./pysr /pysr/pysr
26 | 
27 | %post
28 | 	# Ensure we don't use the local pysr commands:
29 | 	cd $(mktemp -d)
30 | 
31 | 	export PATH="/usr/local/julia/bin:$PATH"
32 | 
33 | 	# Install IPython and other useful libraries:
34 | 	pip3 install --no-cache-dir ipython matplotlib pytest nbval
35 | 	# Install PySR and requirements:
36 | 	pip3 install --no-cache-dir /pysr
37 | 
38 | 	# Put the Julia dependencies in /pysr/depot
39 | 	mkdir /pysr/depot
40 | 	export JULIA_DEPOT_PATH="/pysr/depot"
41 | 
42 | 	# And set a specific environment for Julia dependencies
43 | 	mkdir /pysr/env
44 | 	export PYTHON_JULIAPKG_PROJECT="/pysr/env"
45 | 
46 | 	# Pull in all the Julia dependencies
47 | 	python3 -c 'import pysr; pysr.load_all_packages()'
48 | 
49 | %test
50 | 	python3 -m pysr test main,cli,startup
51 | 
52 | %runscript
53 | 	# Start ipython when the container is executed
54 | 	[ ! -d $HOME/.pysr ] && mkdir $HOME/.pysr
55 | 	PYTHONPATH=/pysr ipython
56 | 


--------------------------------------------------------------------------------
/CITATION.md:
--------------------------------------------------------------------------------
 1 | # Citing
 2 | 
 3 | To cite PySR or SymbolicRegression.jl, please use the following BibTeX entry:
 4 | 
 5 | ```bibtex
 6 | @misc{cranmerInterpretableMachineLearning2023,
 7 |     title = {Interpretable {Machine} {Learning} for {Science} with {PySR} and {SymbolicRegression}.jl},
 8 |     url = {http://arxiv.org/abs/2305.01582},
 9 |     doi = {10.48550/arXiv.2305.01582},
10 |     urldate = {2023-07-17},
11 |     publisher = {arXiv},
12 |     author = {Cranmer, Miles},
13 |     month = may,
14 |     year = {2023},
15 |     note = {arXiv:2305.01582 [astro-ph, physics:physics]},
16 |     keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Computer Science - Symbolic Computation, Physics - Data Analysis, Statistics and Probability},
17 | }
18 | ```
19 | 
20 | To cite symbolic distillation of neural networks, the following BibTeX entry can be used:
21 | 
22 | ```bibtex
23 | @article{cranmerDiscovering2020,
24 |     title={Discovering Symbolic Models from Deep Learning with Inductive Biases},
25 |     author={Miles Cranmer and Alvaro Sanchez-Gonzalez and Peter Battaglia and Rui Xu and Kyle Cranmer and David Spergel and Shirley Ho},
26 |     journal={NeurIPS 2020},
27 |     year={2020},
28 |     eprint={2006.11287},
29 |     archivePrefix={arXiv},
30 |     primaryClass={cs.LG}
31 | }
32 | ```
33 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # This builds a dockerfile containing a working copy of PySR
 2 | # with all pre-requisites installed.
 3 | 
 4 | ARG JLVERSION=1.11.1
 5 | ARG PYVERSION=3.12.6
 6 | ARG BASE_IMAGE=bullseye
 7 | 
 8 | FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
 9 | FROM python:${PYVERSION}-${BASE_IMAGE}
10 | 
11 | # Merge Julia image:
12 | COPY --from=jl /usr/local/julia /usr/local/julia
13 | ENV PATH="/usr/local/julia/bin:${PATH}"
14 | 
15 | # Install IPython and other useful libraries:
16 | RUN pip install --no-cache-dir ipython matplotlib
17 | 
18 | WORKDIR /pysr
19 | 
20 | # Install PySR:
21 | # We do a minimal copy so it doesn't need to rerun at every file change:
22 | ADD ./pyproject.toml /pysr/pyproject.toml
23 | ADD ./LICENSE /pysr/LICENSE
24 | ADD ./README.md /pysr/README.md
25 | ADD ./pysr /pysr/pysr
26 | RUN pip3 install --no-cache-dir .
27 | 
28 | # Install Julia pre-requisites:
29 | RUN python3 -c 'import pysr; pysr.load_all_packages()'
30 | 
31 | # metainformation
32 | LABEL org.opencontainers.image.authors = "Miles Cranmer"
33 | LABEL org.opencontainers.image.source = "https://github.com/MilesCranmer/PySR"
34 | LABEL org.opencontainers.image.licenses = "Apache License 2.0"
35 | 
36 | CMD ["ipython"]
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2020 Miles Cranmer
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmark 1
 2 | 
 3 | The following benchmarks were ran with this command on a node on CCA's BNL cluster (40-cores). At no time was the node fully busy. The tags were put into the file `tags.txt`, and the `benchmark.sh` was copied to the root folder. This is the command used:
 4 | 
 5 | ```bash
 6 | for x in $(cat tags.txt); do sleep 120 && git checkout $x &> /dev/null && nohup ./benchmark.sh > performance_v3_$x.txt &; done
 7 | ```
 8 | with this API call in `benchmark.sh`
 9 | ```python
10 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000)
11 | ```
12 | 
13 | 
14 | Version | Cycles/second
15 | --- | ---
16 | v0.3.2 | 37526
17 | v0.3.3 | 38400
18 | v0.3.4 | 28700
19 | v0.3.5 | 32700
20 | v0.3.6 | 25900
21 | v0.3.7 | 26600
22 | v0.3.8 | 7470
23 | v0.3.9 | 6760
24 | v0.3.10 |
25 | v0.3.11 | 19500
26 | v0.3.12 | 19000
27 | v0.3.13 | 15200
28 | v0.3.14 | 14700
29 | v0.3.15 | 42000
30 | v0.3.23 | 64000
31 | 
32 | v0.3.10 was frozen.
33 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark.sh:
--------------------------------------------------------------------------------
 1 | python setup.py install > /dev/null && python -c '
 2 | import pkg_resources
 3 | version = pkg_resources.get_distribution("pysr").version
 4 | version = [int(elem) for elem in version.split(".")]
 5 | import numpy as np
 6 | from pysr import pysr
 7 | X=np.random.randn(100, 2)*5
 8 | y=2*np.sin((X[:, 0]+X[:, 1]))*np.exp(X[:, 1]/3)
 9 | if version[1] >= 3 and version[2] >= 20:
10 |     eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000, maxdepth=6, fast_cycle=True, batching=True, batch_size=50)
11 | elif version[1] >= 3 and version[2] >= 17:
12 |     eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000, maxdepth=6, fast_cycle=True)
13 | elif version[1] >= 3 and version[2] >= 16:
14 |     eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000, maxdepth=6)
15 | elif version[1] >= 3 and version[2] >= 2:
16 |     eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000)
17 | else:
18 |     eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, threads=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000)
19 | ' 2>&1 | grep 'per second' | tail -n 1 | vims '%s/ //g' -l 'df:'
20 | 


--------------------------------------------------------------------------------
/benchmarks/hyperparamopt.py:
--------------------------------------------------------------------------------
  1 | """Start a hyperoptimization from a single node"""
  2 | 
  3 | import pickle as pkl
  4 | import sys
  5 | 
  6 | import hyperopt
  7 | import numpy as np
  8 | from hyperopt import Trials, fmin, hp, tpe
  9 | from hyperopt.fmin import generate_trials_to_calculate
 10 | from space import *
 11 | 
 12 | from pysr import PySRRegressor
 13 | 
 14 | # Change the following code to your file
 15 | ################################################################################
 16 | TRIALS_FOLDER = "trials2"
 17 | NUMBER_TRIALS_PER_RUN = 1
 18 | timeout_in_minutes = 10
 19 | start_from_init_vals = False
 20 | 
 21 | # Test run to compile everything:
 22 | julia_project = None
 23 | procs = 4
 24 | model = PySRRegressor(
 25 |     binary_operators=binary_operators,
 26 |     unary_operators=unary_operators,
 27 |     timeout_in_seconds=30,
 28 |     julia_project=julia_project,
 29 |     procs=procs,
 30 |     update=False,
 31 |     temp_equation_file=True,
 32 | )
 33 | model.fit(np.random.randn(100, 3), np.random.randn(100))
 34 | 
 35 | 
 36 | def run_trial(args):
 37 |     """Evaluate the model loss using the hyperparams in args
 38 | 
 39 |     :args: A dictionary containing all hyperparameters
 40 |     :returns: Dict with status and loss from cross-validation
 41 | 
 42 |     """
 43 |     # The arguments which are integers:
 44 |     integer_args = [
 45 |         "populations",
 46 |         "niterations",
 47 |         "ncyclesperiteration",
 48 |         "population_size",
 49 |         "topn",
 50 |         "maxsize",
 51 |         "optimizer_nrestarts",
 52 |         "optimizer_iterations",
 53 |     ]
 54 |     # Set these to int types:
 55 |     for k, v in args.items():
 56 |         if k in integer_args:
 57 |             args[k] = int(v)
 58 | 
 59 |     # Duplicate this argument:
 60 |     args["tournament_selection_n"] = args["topn"]
 61 | 
 62 |     # Invalid hyperparams:
 63 |     invalid = args["population_size"] < args["topn"]
 64 |     if invalid:
 65 |         return dict(status="fail", loss=float("inf"))
 66 | 
 67 |     args["timeout_in_seconds"] = timeout_in_minutes * 60
 68 |     args["julia_project"] = julia_project
 69 |     args["procs"] = procs
 70 |     args["update"] = False
 71 |     args["temp_equation_file"] = True
 72 | 
 73 |     print(f"Running trial with args: {args}")
 74 | 
 75 |     # Create the dataset:
 76 |     ntrials = 3
 77 |     losses = []
 78 | 
 79 |     # Old datasets:
 80 |     eval_str = [
 81 |         "np.cos(2.3 * X[:, 0]) * np.sin(2.3 * X[:, 0] * X[:, 1] * X[:, 2]) - 10.0",
 82 |         "(np.exp(X[:, 3]*0.3) + 3)/(np.exp(X[:, 1]*0.2) + np.cos(X[:, 0]) + 1.1)",
 83 |         # "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5",
 84 |         # "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)",
 85 |         # "X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0",
 86 |     ]
 87 | 
 88 |     for expression in eval_str:
 89 |         expression_losses = []
 90 |         for i in range(ntrials):
 91 |             rstate = np.random.RandomState(i)
 92 |             X = 3 * rstate.randn(200, 5)
 93 |             y = eval(expression)
 94 | 
 95 |             # Normalize y so that losses are fair:
 96 |             y = (y - np.average(y)) / np.std(y)
 97 | 
 98 |             # Create the model:
 99 |             model = PySRRegressor(**args)
100 | 
101 |             # Run the model:
102 |             try:
103 |                 model.fit(X, y)
104 |             except RuntimeError:
105 |                 return dict(status="fail", loss=float("inf"))
106 | 
107 |             # Compute loss:
108 |             cur_loss = float(model.get_best()["loss"])
109 |             expression_losses.append(cur_loss)
110 | 
111 |         losses.append(np.median(expression_losses))
112 | 
113 |     loss = np.average(losses)
114 |     print(f"Finished with {loss}", str(args))
115 | 
116 |     return dict(status="ok", loss=loss)
117 | 
118 | 
119 | rand_between = lambda lo, hi: (np.random.rand() * (hi - lo) + lo)
120 | 
121 | init_vals = [
122 |     dict(
123 |         model_selection=0,  # 0 means first choice
124 |         binary_operators=0,
125 |         unary_operators=0,
126 |         populations=100.0,
127 |         niterations=0,
128 |         ncyclesperiteration=rand_between(50, 150),
129 |         alpha=rand_between(0.05, 0.2),
130 |         annealing=0,
131 |         #     fraction_replaced=0.01,
132 |         fraction_replaced=0.01,
133 |         #     fraction_replaced_hof=0.005,
134 |         fraction_replaced_hof=0.005,
135 |         #     population_size=100,
136 |         population_size=rand_between(50, 200),
137 |         #     parsimony=1e-4,
138 |         parsimony=1e-4,
139 |         #     topn=10,
140 |         topn=10.0,
141 |         #     weight_add_node=1,
142 |         weight_add_node=1.0,
143 |         #     weight_insert_node=3,
144 |         weight_insert_node=3.0,
145 |         #     weight_delete_node=3,
146 |         weight_delete_node=3.0,
147 |         #     weight_do_nothing=1,
148 |         weight_do_nothing=1.0,
149 |         #     weight_mutate_constant=10,
150 |         weight_mutate_constant=10.0,
151 |         #     weight_mutate_operator=1,
152 |         weight_mutate_operator=1.0,
153 |         #     weight_swap_operands=1,
154 |         weight_swap_operands=1.0,
155 |         #     weight_randomize=1,
156 |         weight_randomize=1.0,
157 |         #     weight_simplify=0.002,
158 |         weight_simplify=0,  # One of these is fixed.
159 |         # crossover_probability=0.01
160 |         crossover_probability=0.01,
161 |         #     perturbation_factor=1.0,
162 |         perturbation_factor=1.0,
163 |         #     maxsize=20,
164 |         maxsize=0,
165 |         #     warmup_maxsize_by=0.0,
166 |         warmup_maxsize_by=0.0,
167 |         #     use_frequency=True,
168 |         use_frequency=1,
169 |         #     optimizer_nrestarts=3,
170 |         optimizer_nrestarts=3.0,
171 |         #     optimize_probability=1.0,
172 |         optimize_probability=1.0,
173 |         #     optimizer_iterations=10,
174 |         optimizer_iterations=10.0,
175 |         #     tournament_selection_p=1.0,
176 |         tournament_selection_p=rand_between(0.9, 0.999),
177 |     )
178 | ]
179 | 
180 | ################################################################################
181 | 
182 | 
183 | def merge_trials(trials1, trials2_slice):
184 |     """Merge two hyperopt trials objects
185 | 
186 |     :trials1: The primary trials object
187 |     :trials2_slice: A slice of the trials object to be merged,
188 |         obtained with, e.g., trials2.trials[:10]
189 |     :returns: The merged trials object
190 | 
191 |     """
192 |     max_tid = 0
193 |     if len(trials1.trials) > 0:
194 |         max_tid = max([trial["tid"] for trial in trials1.trials])
195 | 
196 |     for trial in trials2_slice:
197 |         tid = trial["tid"] + max_tid + 2
198 |         local_hyperopt_trial = Trials().new_trial_docs(
199 |             tids=[None], specs=[None], results=[None], miscs=[None]
200 |         )
201 |         local_hyperopt_trial[0] = trial
202 |         local_hyperopt_trial[0]["tid"] = tid
203 |         local_hyperopt_trial[0]["misc"]["tid"] = tid
204 |         for key in local_hyperopt_trial[0]["misc"]["idxs"].keys():
205 |             local_hyperopt_trial[0]["misc"]["idxs"][key] = [tid]
206 |         trials1.insert_trial_docs(local_hyperopt_trial)
207 |         trials1.refresh()
208 |     return trials1
209 | 
210 | 
211 | import glob
212 | 
213 | path = TRIALS_FOLDER + "/*.pkl"
214 | n_prior_trials = len(list(glob.glob(path)))
215 | 
216 | loaded_fnames = []
217 | if start_from_init_vals:
218 |     trials = generate_trials_to_calculate(init_vals)
219 |     i = 0
220 | else:
221 |     trials = Trials()
222 |     i = 1
223 | 
224 | n = NUMBER_TRIALS_PER_RUN
225 | 
226 | # Run new hyperparameter trials until killed
227 | while True:
228 |     np.random.seed()
229 | 
230 |     # Load up all runs:
231 | 
232 |     if i > 0:
233 |         for fname in glob.glob(path):
234 |             if fname in loaded_fnames:
235 |                 continue
236 | 
237 |             trials_obj = pkl.load(open(fname, "rb"))
238 |             n_trials = trials_obj["n"]
239 |             trials_obj = trials_obj["trials"]
240 |             if len(loaded_fnames) == 0:
241 |                 trials = trials_obj
242 |             else:
243 |                 print("Merging trials")
244 |                 trials = merge_trials(trials, trials_obj.trials[-n_trials:])
245 | 
246 |             loaded_fnames.append(fname)
247 | 
248 |         print("Loaded trials", len(loaded_fnames))
249 |         if len(loaded_fnames) == 0:
250 |             trials = Trials()
251 | 
252 |         try:
253 |             best = fmin(
254 |                 run_trial,
255 |                 space=space,
256 |                 algo=tpe.suggest,
257 |                 max_evals=n + len(trials.trials),
258 |                 trials=trials,
259 |                 verbose=1,
260 |                 rstate=np.random.RandomState(np.random.randint(1, 10**6)),
261 |             )
262 |         except hyperopt.exceptions.AllTrialsFailed:
263 |             continue
264 |     else:
265 |         best = fmin(
266 |             run_trial,
267 |             space=space,
268 |             algo=tpe.suggest,
269 |             max_evals=1,
270 |             trials=trials,
271 |             points_to_evaluate=init_vals,
272 |         )
273 | 
274 |     print("current best", best)
275 |     hyperopt_trial = Trials()
276 | 
277 |     # Merge with empty trials dataset:
278 |     if i == 0:
279 |         save_trials = merge_trials(hyperopt_trial, trials.trials)
280 |     else:
281 |         save_trials = merge_trials(hyperopt_trial, trials.trials[-n:])
282 | 
283 |     new_fname = TRIALS_FOLDER + "/" + str(np.random.randint(0, sys.maxsize)) + ".pkl"
284 |     pkl.dump({"trials": save_trials, "n": n}, open(new_fname, "wb"))
285 |     loaded_fnames.append(new_fname)
286 | 
287 |     i += 1
288 | 


--------------------------------------------------------------------------------
/benchmarks/print_best_model.py:
--------------------------------------------------------------------------------
  1 | """Print the best model parameters and loss"""
  2 | 
  3 | import pickle as pkl
  4 | from pprint import PrettyPrinter
  5 | 
  6 | import hyperopt
  7 | import numpy as np
  8 | from hyperopt import Trials, fmin, hp, tpe
  9 | from space import space
 10 | 
 11 | # Change the following code to your file
 12 | ################################################################################
 13 | # TODO: Declare a folder to hold all trials objects
 14 | TRIALS_FOLDER = "trials2"
 15 | ################################################################################
 16 | 
 17 | 
 18 | def merge_trials(trials1, trials2_slice):
 19 |     """Merge two hyperopt trials objects
 20 | 
 21 |     :trials1: The primary trials object
 22 |     :trials2_slice: A slice of the trials object to be merged,
 23 |         obtained with, e.g., trials2.trials[:10]
 24 |     :returns: The merged trials object
 25 | 
 26 |     """
 27 |     max_tid = 0
 28 |     if len(trials1.trials) > 0:
 29 |         max_tid = max([trial["tid"] for trial in trials1.trials])
 30 | 
 31 |     for trial in trials2_slice:
 32 |         tid = trial["tid"] + max_tid + 1
 33 |         hyperopt_trial = Trials().new_trial_docs(
 34 |             tids=[None], specs=[None], results=[None], miscs=[None]
 35 |         )
 36 |         hyperopt_trial[0] = trial
 37 |         hyperopt_trial[0]["tid"] = tid
 38 |         hyperopt_trial[0]["misc"]["tid"] = tid
 39 |         for key in hyperopt_trial[0]["misc"]["idxs"].keys():
 40 |             hyperopt_trial[0]["misc"]["idxs"][key] = [tid]
 41 |         trials1.insert_trial_docs(hyperopt_trial)
 42 |         trials1.refresh()
 43 |     return trials1
 44 | 
 45 | 
 46 | np.random.seed()
 47 | 
 48 | # Load up all runs:
 49 | import glob
 50 | 
 51 | path = TRIALS_FOLDER + "/*.pkl"
 52 | files = 0
 53 | for fname in glob.glob(path):
 54 |     trials_obj = pkl.load(open(fname, "rb"))
 55 |     n_trials = trials_obj["n"]
 56 |     trials_obj = trials_obj["trials"]
 57 |     if files == 0:
 58 |         trials = trials_obj
 59 |     else:
 60 |         trials = merge_trials(trials, trials_obj.trials[-n_trials:])
 61 |     files += 1
 62 | 
 63 | 
 64 | print(files, "trials merged")
 65 | 
 66 | 
 67 | best_loss = np.inf
 68 | best_trial = None
 69 | try:
 70 |     trials
 71 | except NameError:
 72 |     raise NameError("No trials loaded. Be sure to set the right folder")
 73 | 
 74 | # for trial in trials:
 75 | # if trial['result']['status'] == 'ok':
 76 | # loss = trial['result']['loss']
 77 | # if loss < best_loss:
 78 | # best_loss = loss
 79 | # best_trial = trial
 80 | 
 81 | # print(best_loss, best_trial['misc']['vals'])
 82 | # trials = sorted(trials, key=lambda x: (x['result']['loss'] if trials['result']['status'] == 'ok' else float('inf')))
 83 | 
 84 | clean_trials = []
 85 | for trial in trials:
 86 |     clean_trials.append((trial["result"]["loss"], trial["misc"]["vals"]))
 87 | 
 88 | clean_trials = sorted(clean_trials, key=lambda x: x[0])
 89 | 
 90 | pp = PrettyPrinter(indent=4)
 91 | 
 92 | for trial in clean_trials:
 93 |     loss, params = trial
 94 |     for k, value in params.items():
 95 |         value = value[0]
 96 |         if isinstance(value, int):
 97 |             possible_args = space[k].pos_args[1:]
 98 |             try:
 99 |                 value = possible_args[value].obj
100 |             except AttributeError:
101 |                 value = [arg.obj for arg in possible_args[value].pos_args]
102 | 
103 |         params[k] = value
104 | 
105 |     pp.pprint({"loss": loss, "params": params})
106 | 


--------------------------------------------------------------------------------
/benchmarks/space.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from hyperopt import Trials, fmin, hp, tpe
 3 | 
 4 | binary_operators = ["*", "/", "+", "-"]
 5 | unary_operators = ["sin", "cos", "exp", "log"]
 6 | 
 7 | space = dict(
 8 |     #     model_selection="best",
 9 |     model_selection=hp.choice("model_selection", ["accuracy"]),
10 |     #     binary_operators=None,
11 |     binary_operators=hp.choice("binary_operators", [binary_operators]),
12 |     #     unary_operators=None,
13 |     unary_operators=hp.choice("unary_operators", [unary_operators]),
14 |     #     populations=100,
15 |     populations=hp.qloguniform("populations", np.log(10), np.log(1000), 1),
16 |     #     niterations=4,
17 |     niterations=hp.choice(
18 |         "niterations", [10000]
19 |     ),  # We will quit automatically based on a clock.
20 |     #     ncyclesperiteration=100,
21 |     ncyclesperiteration=hp.qloguniform(
22 |         "ncyclesperiteration", np.log(10), np.log(5000), 1
23 |     ),
24 |     #     alpha=0.1,
25 |     alpha=hp.loguniform("alpha", np.log(0.0001), np.log(1000)),
26 |     #     annealing=False,
27 |     annealing=hp.choice("annealing", [False, True]),
28 |     #     fraction_replaced=0.01,
29 |     fraction_replaced=hp.loguniform("fraction_replaced", np.log(0.0001), np.log(0.5)),
30 |     #     fraction_replaced_hof=0.005,
31 |     fraction_replaced_hof=hp.loguniform(
32 |         "fraction_replaced_hof", np.log(0.0001), np.log(0.5)
33 |     ),
34 |     #     population_size=100,
35 |     population_size=hp.qloguniform("population_size", np.log(20), np.log(1000), 1),
36 |     #     parsimony=1e-4,
37 |     parsimony=hp.loguniform("parsimony", np.log(0.0001), np.log(0.5)),
38 |     #     topn=10,
39 |     topn=hp.qloguniform("topn", np.log(2), np.log(50), 1),
40 |     #     weight_add_node=1,
41 |     weight_add_node=hp.loguniform("weight_add_node", np.log(0.0001), np.log(100)),
42 |     #     weight_insert_node=3,
43 |     weight_insert_node=hp.loguniform("weight_insert_node", np.log(0.0001), np.log(100)),
44 |     #     weight_delete_node=3,
45 |     weight_delete_node=hp.loguniform("weight_delete_node", np.log(0.0001), np.log(100)),
46 |     #     weight_do_nothing=1,
47 |     weight_do_nothing=hp.loguniform("weight_do_nothing", np.log(0.0001), np.log(100)),
48 |     #     weight_mutate_constant=10,
49 |     weight_mutate_constant=hp.loguniform(
50 |         "weight_mutate_constant", np.log(0.0001), np.log(100)
51 |     ),
52 |     #     weight_mutate_operator=1,
53 |     weight_mutate_operator=hp.loguniform(
54 |         "weight_mutate_operator", np.log(0.0001), np.log(100)
55 |     ),
56 |     #     weight_swap_operands=1,
57 |     weight_swap_operands=hp.loguniform(
58 |         "weight_swap_operands", np.log(0.0001), np.log(100)
59 |     ),
60 |     #     weight_randomize=1,
61 |     weight_randomize=hp.loguniform("weight_randomize", np.log(0.0001), np.log(100)),
62 |     #     weight_simplify=0.002,
63 |     weight_simplify=hp.choice("weight_simplify", [0.002]),  # One of these is fixed.
64 |     #     crossover_probability=0.01,
65 |     crossover_probability=hp.loguniform(
66 |         "crossover_probability", np.log(0.00001), np.log(0.2)
67 |     ),
68 |     #     perturbation_factor=1.0,
69 |     perturbation_factor=hp.loguniform(
70 |         "perturbation_factor", np.log(0.0001), np.log(100)
71 |     ),
72 |     #     maxsize=20,
73 |     maxsize=hp.choice("maxsize", [30]),
74 |     #     warmup_maxsize_by=0.0,
75 |     warmup_maxsize_by=hp.uniform("warmup_maxsize_by", 0.0, 0.5),
76 |     #     use_frequency=True,
77 |     use_frequency=hp.choice("use_frequency", [True, False]),
78 |     #     optimizer_nrestarts=3,
79 |     optimizer_nrestarts=hp.quniform("optimizer_nrestarts", 1, 10, 1),
80 |     #     optimize_probability=1.0,
81 |     optimize_probability=hp.uniform("optimize_probability", 0.0, 1.0),
82 |     #     optimizer_iterations=10,
83 |     optimizer_iterations=hp.quniform("optimizer_iterations", 1, 10, 1),
84 |     #     tournament_selection_p=1.0,
85 |     tournament_selection_p=hp.uniform("tournament_selection_p", 0.0, 1.0),
86 | )
87 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | api.md
3 | index.md.bak
4 | papers.md
5 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # PySR Documentation
2 | 
3 | ## Building locally
4 | 
5 | 1. In the base directory, run `pip install -r docs/requirements.txt`.
6 | 2. Install PySR in editable mode: `pip install -e .`.
7 | 3. Build doc source with `cd docs && ./gen_docs.sh && cd ..`.
8 | 4. Create and serve docs with mkdocs: `mkdocs serve -w pysr`.
9 | 


--------------------------------------------------------------------------------
/docs/_api.md:
--------------------------------------------------------------------------------
  1 | # API Reference
  2 | 
  3 | `PySRRegressor` has many options for controlling a symbolic regression search.
  4 | Let's look at them below.
  5 | 
  6 | PARAMSKEY
  7 | 
  8 | ## PySRRegressor Functions
  9 | 
 10 | ::: pysr.PySRRegressor.fit
 11 |     options:
 12 |         show_root_heading: true
 13 |         heading_level: 3
 14 |         show_root_full_path: false
 15 | 
 16 | ::: pysr.PySRRegressor.predict
 17 |     options:
 18 |         show_root_heading: true
 19 |         heading_level: 3
 20 |         show_root_full_path: false
 21 | 
 22 | ::: pysr.PySRRegressor.from_file
 23 |     options:
 24 |         show_root_heading: true
 25 |         heading_level: 3
 26 |         show_root_full_path: false
 27 | 
 28 | ::: pysr.PySRRegressor.sympy
 29 |     options:
 30 |         show_root_heading: true
 31 |         heading_level: 3
 32 |         show_root_full_path: false
 33 | 
 34 | ::: pysr.PySRRegressor.latex
 35 |     options:
 36 |         show_root_heading: true
 37 |         heading_level: 3
 38 |         show_root_full_path: false
 39 | 
 40 | ::: pysr.PySRRegressor.pytorch
 41 |     options:
 42 |         show_root_heading: true
 43 |         heading_level: 3
 44 |         show_root_full_path: false
 45 | 
 46 | ::: pysr.PySRRegressor.jax
 47 |     options:
 48 |         show_root_heading: true
 49 |         heading_level: 3
 50 |         show_root_full_path: false
 51 | 
 52 | ::: pysr.PySRRegressor.latex_table
 53 |     options:
 54 |         show_root_heading: true
 55 |         heading_level: 3
 56 |         show_root_full_path: false
 57 | 
 58 | ::: pysr.PySRRegressor.refresh
 59 |     options:
 60 |         show_root_heading: true
 61 |         heading_level: 3
 62 |         show_root_full_path: false
 63 | 
 64 | ## Expression Specifications
 65 | 
 66 | ::: pysr.ExpressionSpec
 67 |     options:
 68 |         show_root_heading: true
 69 |         heading_level: 3
 70 |         show_root_full_path: false
 71 | 
 72 | ::: pysr.TemplateExpressionSpec
 73 |     options:
 74 |         show_root_heading: true
 75 |         heading_level: 3
 76 |         show_root_full_path: false
 77 | 
 78 | ::: pysr.ParametricExpressionSpec
 79 |     options:
 80 |         show_root_heading: true
 81 |         heading_level: 3
 82 |         show_root_full_path: false
 83 | 
 84 | ::: pysr.AbstractExpressionSpec
 85 |     options:
 86 |         show_root_heading: true
 87 |         heading_level: 3
 88 |         show_root_full_path: false
 89 | 
 90 | ## Logger Specifications
 91 | 
 92 | ::: pysr.TensorBoardLoggerSpec
 93 |     options:
 94 |         show_root_heading: true
 95 |         heading_level: 3
 96 |         show_root_full_path: false
 97 | 
 98 | ::: pysr.AbstractLoggerSpec
 99 |     options:
100 |         show_root_heading: true
101 |         heading_level: 3
102 |         show_root_full_path: false
103 | 


--------------------------------------------------------------------------------
/docs/all_contributors/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | 


--------------------------------------------------------------------------------
/docs/all_contributors/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "PySR",
 3 |   "version": "1.0.0",
 4 |   "main": "index.js",
 5 |   "repository": "git@github.com:MilesCranmer/PySR.git",
 6 |   "author": "MilesCranmer <miles.cranmer@gmail.com>",
 7 |   "license": "Apache-2.0",
 8 |   "devDependencies": {
 9 |     "all-contributors-cli": "6.25.1"
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/all_contributors/run_all_contrib.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | yarn install --frozen-lockfile
4 | yarn all-contributors $@
5 | 


--------------------------------------------------------------------------------
/docs/api-advanced.md:
--------------------------------------------------------------------------------
 1 | # Internal Reference
 2 | 
 3 | ## Julia Interface
 4 | 
 5 | ::: pysr.julia_helpers
 6 |     options:
 7 |         members:
 8 |             - init_julia
 9 |             - install
10 |         heading_level: 3
11 | 
12 | ## Exporting to LaTeX
13 | 
14 | ::: pysr.export_latex
15 |     options:
16 |         members:
17 |             - to_latex
18 |             - generate_single_table
19 |             - generate_multiple_tables
20 |             - generate_table_environment
21 |         heading_level: 3
22 | 
23 | ## Exporting to JAX
24 | 
25 | ::: pysr.export_jax
26 |     options:
27 |         members:
28 |             - sympy2jax
29 |             - sympy2jaxtext
30 |         heading_level: 3
31 | 
32 | ## Exporting to PyTorch
33 | 
34 | ::: pysr.export_torch
35 |     options:
36 |         members:
37 |             - sympy2torch
38 |         heading_level: 3
39 | 


--------------------------------------------------------------------------------
/docs/assets/87712EA9B4B3CB1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/assets/87712EA9B4B3CB1B.png


--------------------------------------------------------------------------------
/docs/assets/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/assets/favicon.png


--------------------------------------------------------------------------------
/docs/assets/pysr_logo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 788.96 788.96"><defs><style>.cls-1,.cls-5{fill:#c13345;}.cls-2{fill:#fff;}.cls-3,.cls-4{fill:none;stroke:#c13345;stroke-linecap:round;stroke-linejoin:round;}.cls-3{stroke-width:9.89px;}.cls-4{stroke-width:9.8px;}.cls-5{font-size:149.62px;font-family:Roboto-Bold, Roboto;font-weight:700;}.cls-6{letter-spacing:0.01em;}</style></defs><g id="Layer_6" data-name="Layer 6"><rect class="cls-1" width="788.96" height="788.96"/></g><g id="Hex"><path class="cls-2" d="M376.92,68.38,120.85,216.22a35.12,35.12,0,0,0-17.56,30.42V542.33a35.11,35.11,0,0,0,17.56,30.41L376.92,720.58a35.09,35.09,0,0,0,35.12,0L668.12,572.74a35.12,35.12,0,0,0,17.55-30.41V246.64a35.14,35.14,0,0,0-17.55-30.42L412,68.38A35.14,35.14,0,0,0,376.92,68.38Z" transform="translate(0 0)"/></g><g id="Head"><path class="cls-3" d="M353.73,416.44l.21-24.56L313,397.7l-7.8-54L279.89,341l23.64-57.74-.17-3.13a93.63,93.63,0,1,1,150.23,74.6v59.46" transform="translate(0 0)"/><circle class="cls-3" cx="394.48" cy="277.55" r="14.14"/><circle class="cls-3" cx="394.48" cy="272.06" r="43.59"/><line class="cls-3" x1="394.48" y1="291.69" x2="394.48" y2="315.65"/><polyline class="cls-3" points="413.95 330.62 413.95 319.64 375.68 319.64 375.68 330.62"/><path class="cls-4" d="M414,330.62a19.14,19.14,0,1,1-38.27,0" transform="translate(0 0)"/><line class="cls-3" x1="413.95" y1="319.64" x2="413.95" y2="311.94"/><line class="cls-3" x1="375.68" y1="322.61" x2="375.68" y2="314.9"/></g><g id="PySR"><text class="cls-5" transform="translate(214.4 561.87)"><tspan class="cls-6">P</tspan><tspan x="97.6" y="0">ySR</tspan></text></g></svg>
2 | 


--------------------------------------------------------------------------------
/docs/assets/pysr_logo_reduced.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 26.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 609 685" style="enable-background:new 0 0 609 685;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#FFFFFF;}
 7 | 	.st1{fill:none;stroke:#C13345;stroke-width:9.89;stroke-linecap:round;stroke-linejoin:round;}
 8 | 	.st2{fill:none;stroke:#C13345;stroke-width:9.8;stroke-linecap:round;stroke-linejoin:round;}
 9 | 	.st3{fill:#C13345;}
10 | 	.st4{font-family:'Roboto-Bold';}
11 | 	.st5{font-size:149.62px;}
12 | 	.st6{letter-spacing:1;}
13 | </style>
14 | <g id="Hex">
15 | 
16 | 		<image style="overflow:visible;opacity:0.75;" width="613" height="693" xlink:href="87712EA9B4B3CB1B.png"  transform="matrix(1 0 0 1 -1.96 -3.9966)">
17 | 	</image>
18 | 	<g>
19 | 		<g>
20 | 			<path class="st0" d="M287,16.4L30.9,164.3c-10.9,6.3-17.6,17.9-17.6,30.4v295.7c0,12.5,6.7,24.1,17.6,30.4L287,668.6
21 | 				c10.9,6.3,24.3,6.3,35.1,0l256.1-147.8c10.9-6.3,17.6-17.9,17.5-30.4V194.7c0-12.5-6.7-24.1-17.5-30.4L322,16.4
22 | 				C311.2,10.2,297.8,10.2,287,16.4z"/>
23 | 		</g>
24 | 	</g>
25 | </g>
26 | <g id="Head">
27 | 	<path class="st1" d="M263.8,364.5l0.2-24.6l-40.9,5.8l-7.8-54l-25.3-2.7l23.6-57.7l-0.2-3.1c0-51.7,41.9-93.6,93.6-93.6
28 | 		c51.7,0,93.6,41.9,93.6,93.6c0,29.3-13.7,56.9-37,74.6v59.5"/>
29 | 	<circle class="st1" cx="304.5" cy="225.6" r="14.1"/>
30 | 	<circle class="st1" cx="304.5" cy="220.1" r="43.6"/>
31 | 	<line class="st1" x1="304.5" y1="239.7" x2="304.5" y2="263.7"/>
32 | 	<polyline class="st1" points="324,278.7 324,267.7 285.7,267.7 285.7,278.7 	"/>
33 | 	<path class="st2" d="M324,278.7c0.2,10.6-8.1,19.3-18.7,19.6s-19.3-8.1-19.6-18.7c0-0.3,0-0.6,0-0.9"/>
34 | 	<line class="st1" x1="324" y1="267.7" x2="324" y2="260"/>
35 | 	<line class="st1" x1="285.7" y1="270.7" x2="285.7" y2="262.9"/>
36 | </g>
37 | <g id="PySR">
38 | 	<text transform="matrix(1 0 0 1 124.44 509.9133)" class="st3 st4 st5 st6">P</text>
39 | 	<text transform="matrix(1 0 0 1 222.04 509.9133)" class="st3 st4 st5">ySR</text>
40 | </g>
41 | </svg>
42 | 


--------------------------------------------------------------------------------
/docs/backend.md:
--------------------------------------------------------------------------------
 1 | # Customization
 2 | 
 3 | If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend.
 4 | The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl).
 5 | This package is accessed with [`juliacall`](https://github.com/JuliaPy/PythonCall.jl), which allows us to transfer objects back and forth between the Python and Julia runtimes.
 6 | 
 7 | PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself.
 8 | Generally you can do this as follows:
 9 | 
10 | ## 1. Check out the source code
11 | 
12 | Clone a copy of the backend as well as PySR:
13 | 
14 | ```bash
15 | git clone https://github.com/MilesCranmer/SymbolicRegression.jl
16 | git clone https://github.com/MilesCranmer/PySR
17 | ```
18 | 
19 | You may wish to check out the specific versions, which you can do with:
20 | 
21 | ```bash
22 | cd PySR
23 | git checkout <version>
24 | 
25 | # You can see the current backend version in `pysr/juliapkg.json`
26 | cd ../SymbolicRegression.jl
27 | git checkout <backend_version>
28 | ```
29 | 
30 | ## 2. Edit the source to your requirements
31 | 
32 | The main search code can be found in `src/SymbolicRegression.jl`.
33 | 
34 | Here are some tips:
35 | 
36 | -  The documentation for the backend is given [here](https://ai.damtp.cam.ac.uk/symbolicregression/dev/).
37 | - Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time.
38 | - Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://ai.damtp.cam.ac.uk/symbolicregression/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node).
39 | - For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl).
40 | - Parts of the code which are typically edited by users include:
41 |     - [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions.
42 |         - Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function.
43 |     - [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options.
44 | 
45 | ## 3. Let PySR use the modified backend
46 | 
47 | Once you have made your changes, you should edit the `pysr/juliapkg.json` file
48 | in the PySR repository to point to this local copy.
49 | Do this by removing the `"version"` key and adding a `"dev"` and `"path"` key:
50 | 
51 | ```json
52 |     ...
53 |     "packages": {
54 |         "SymbolicRegression": {
55 |             "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
56 |             "dev": true,
57 |             "path": "/path/to/SymbolicRegression.jl"
58 |         },
59 |     ...
60 | ```
61 | 
62 | You can then install PySR with this modified backend by running:
63 | 
64 | ```bash
65 | cd PySR
66 | pip install .
67 | ```
68 | 
69 | For more information on `juliapkg.json`, see [`pyjuliapkg`](https://github.com/JuliaPy/pyjuliapkg).
70 | 
71 | ## Additional notes
72 | 
73 | If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://ai.damtp.cam.ac.uk/symbolicregression/dev/).
74 | 
75 | If you make a change that you think could be useful to other users, don't hesitate to open a pull request on either the PySR or SymbolicRegression.jl repositories! Contributions are very appreciated.
76 | 


--------------------------------------------------------------------------------
/docs/gen_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Generate home page using README.md:
 4 | echo '<div style="width:100%;height:0px;position:relative;padding-bottom:56.250%;"><iframe src="https://streamable.com/e/ncvqhy" frameborder="0" width="100%" height="100%" allowfullscreen style="width:100%;height:100%;position:absolute;left:0px;top:0px;overflow:hidden;"></iframe></div>' >index.md
 5 | cat ../README.md | grep -v 'user-images' | grep -E -v '\<.*div.*\>' >>index.md
 6 | 
 7 | # Transform "### Test status" to "**Test status**":
 8 | sed -i.bak 's/\#\#\# Test status/**Test status**/g' index.md
 9 | # Change '# ' to '## ':
10 | sed -i.bak '10,$s/^\# /## /g' index.md
11 | 
12 | # Create papers.md
13 | python generate_papers.py
14 | 
15 | # Copy _api.md up to but not including PARAMSKEY and all tex after:
16 | cat _api.md | sed -n '1,/PARAMSKEY/p' | sed '$d' >api.md
17 | python gen_param_docs.py >> api.md
18 | # Copy _api.md after PARAMSKEY:
19 | cat _api.md | sed -n '/PARAMSKEY/,$p' | sed '1d' >>api.md
20 | 


--------------------------------------------------------------------------------
/docs/gen_param_docs.py:
--------------------------------------------------------------------------------
 1 | # Load YAML file param_groupings.yml:
 2 | import re
 3 | import sys
 4 | 
 5 | from docstring_parser import parse
 6 | from yaml import safe_load
 7 | 
 8 | sys.path.append("..")
 9 | 
10 | 
11 | from pysr import PySRRegressor
12 | 
13 | found_params = []
14 | 
15 | 
16 | def str_param_groups(param_groupings, params, cur_heading=2):
17 |     global found_params
18 |     # Recursively print the parameter descriptions, defaults,
19 |     # with headings from the param groupings dict.
20 |     if isinstance(param_groupings, list):
21 |         return "\n\n".join(
22 |             str_param_groups(param, params, cur_heading) for param in param_groupings
23 |         )
24 |     elif isinstance(param_groupings, dict):
25 |         for heading, param_grouping in param_groupings.items():
26 |             return (
27 |                 f"{'#' * cur_heading} {heading}"
28 |                 + "\n\n"
29 |                 + str_param_groups(param_grouping, params, cur_heading + 1)
30 |             )
31 |     elif isinstance(param_groupings, str):
32 |         found_params.append(param_groupings)
33 | 
34 |         default_value = re.search(
35 |             r"Default is `(.*)`", params[param_groupings].description
36 |         )
37 |         clean_desc = re.sub(r"Default is .*", "", params[param_groupings].description)
38 |         # Prepend every line with 4 spaces:
39 |         clean_desc = "\n".join("    " + line for line in clean_desc.splitlines())
40 |         return (
41 |             f"  - **`{param_groupings}`**"
42 |             + "\n\n"
43 |             + clean_desc
44 |             + (
45 |                 "\n\n    " + f"*Default:* `{default_value.group(1)}`"
46 |                 if default_value
47 |                 else ""
48 |             )
49 |         )
50 |     else:
51 |         raise TypeError(f"Unexpected type {type(param_groupings)}")
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     # This is the path to the param_groupings.yml file
56 |     # relative to the current file.
57 |     path = "../pysr/param_groupings.yml"
58 |     with open(path, "r") as f:
59 |         param_groupings = safe_load(f)
60 | 
61 |     # This is basically a dict of lists and dicts.
62 | 
63 |     # Let's load in the parameter descriptions from the docstring of PySRRegressor:
64 |     raw_params = parse(PySRRegressor.__doc__).params
65 |     params = {
66 |         param.arg_name: param
67 |         for param in raw_params
68 |         if param.arg_name[-1] != "_" and param.arg_name != "**kwargs"
69 |     }
70 | 
71 |     output = str_param_groups(param_groupings, params, cur_heading=3)
72 |     assert len(set(found_params) ^ set(params.keys())) == 0
73 |     print("## PySRRegressor Parameters")
74 |     print(output)
75 | 


--------------------------------------------------------------------------------
/docs/generate_papers.py:
--------------------------------------------------------------------------------
 1 | """This script generates the papers.md file from the papers.yml file."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import yaml
 6 | 
 7 | data_file = "papers.yml"
 8 | papers_header = Path("stylesheets") / "papers_header.txt"
 9 | output_file = "papers.md"
10 | 
11 | # Load YAML file:
12 | with open(data_file, "r") as stream:
13 |     papers = yaml.load(stream, Loader=yaml.SafeLoader)["papers"]
14 | 
15 | # Load header:
16 | with open(papers_header, "r") as stream:
17 |     header = stream.read()
18 | 
19 | with open(output_file, "w") as f:
20 |     f.write(header)
21 | 
22 |     # First, we sort the papers by date.
23 |     # This is in the format of "2022-03-15"
24 |     papers = sorted(papers, key=lambda paper: paper["date"], reverse=True)
25 | 
26 |     snippets = []
27 |     for paper in papers:
28 |         title = paper["title"]
29 |         authors = (
30 |             ", ".join(paper["authors"]).replace("(", "<sup>").replace(")", "</sup>")
31 |         )
32 |         affiliations = ", ".join(
33 |             f"<sup>{num}</sup>{affil}" for num, affil in paper["affiliations"].items()
34 |         )
35 |         link = paper["link"]
36 |         abstract = paper["abstract"]
37 |         image_file = paper["image"]
38 | 
39 |         if image_file.startswith("http"):
40 |             absolute_image_file = image_file
41 |         else:
42 |             absolute_image_file = f"images/{image_file}"
43 | 
44 |         # Begin:
45 |         paper_snippet = f"""
46 | 
47 | <figure markdown>
48 | ![]({absolute_image_file}){{ width="500"}}
49 | <figcaption>
50 | <!-- Large font: -->
51 | <h2>
52 | <a href="{link}">{title}</a>
53 | </h2>
54 | </figcaption>
55 | </figure>
56 | 
57 | <center>
58 | {authors}
59 | 
60 | <small>{affiliations}</small>
61 | </center>
62 | 
63 | **Abstract:** {abstract}\n\n
64 | """
65 |         snippets.append(paper_snippet)
66 | 
67 |     f.write("\n\n---\n\n".join(snippets))
68 | 


--------------------------------------------------------------------------------
/docs/images/Planar_relation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/Planar_relation.png


--------------------------------------------------------------------------------
/docs/images/SyReg_GasConc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/SyReg_GasConc.png


--------------------------------------------------------------------------------
/docs/images/Y_Mgal_Simba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/Y_Mgal_Simba.png


--------------------------------------------------------------------------------
/docs/images/back_to_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/back_to_formula.png


--------------------------------------------------------------------------------
/docs/images/cloud_cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/cloud_cover.jpg


--------------------------------------------------------------------------------
/docs/images/economic_theory_gravity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/economic_theory_gravity.png


--------------------------------------------------------------------------------
/docs/images/electronnegativity_introduction.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/electronnegativity_introduction.jpg


--------------------------------------------------------------------------------
/docs/images/example_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/example_plot.png


--------------------------------------------------------------------------------
/docs/images/hi_mass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/hi_mass.png


--------------------------------------------------------------------------------
/docs/images/hod_importances.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/hod_importances.png


--------------------------------------------------------------------------------
/docs/images/hyperbolic_volume.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/hyperbolic_volume.png


--------------------------------------------------------------------------------
/docs/images/illustris_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/illustris_example.png


--------------------------------------------------------------------------------
/docs/images/jet_background_diagram.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/jet_background_diagram.jpg


--------------------------------------------------------------------------------
/docs/images/kidger_thesis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/kidger_thesis.png


--------------------------------------------------------------------------------
/docs/images/rediscovering_gravity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/rediscovering_gravity.png


--------------------------------------------------------------------------------
/docs/js/mathjax.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |     tex: {
 3 |         inlineMath: [["\\(", "\\)"]],
 4 |         displayMath: [["\\[", "\\]"]],
 5 |         processEscapes: true,
 6 |         processEnvironments: true
 7 |     },
 8 |     options: {
 9 |         ignoreHtmlClass: ".*|",
10 |         processHtmlClass: "arithmatex"
11 |     }
12 | };
13 | 
14 | document$.subscribe(() => {
15 |     MathJax.typesetPromise()
16 | })
17 | 


--------------------------------------------------------------------------------
/docs/operators.md:
--------------------------------------------------------------------------------
 1 | # Operators
 2 | 
 3 | ## Pre-defined
 4 | 
 5 | First, note that pretty much any valid Julia function which
 6 | takes one or two scalars as input, and returns on scalar as output,
 7 | is likely to be a valid operator[^1].
 8 | A selection of these and other valid operators are stated below.
 9 | 
10 | Also, note that it's a good idea to not use too many operators, since
11 | it can exponentially increase the search space.
12 | 
13 | **Binary Operators**
14 | 
15 | | Arithmetic    | Comparison | Logic    |
16 | |--------------|------------|----------|
17 | | `+`          | `max`      | `logical_or`[^2] |
18 | | `-`          | `min`      | `logical_and`[^3]|
19 | | `*`          | `>`[^4]    |                 |
20 | | `/`          | `>=`       |                 |
21 | | `^`          | `<`        |                 |
22 | |              | `<=`       |                 |
23 | |              | `cond`[^5] |                 |
24 | |              | `mod`      |                 |
25 | 
26 | **Unary Operators**
27 | 
28 | | Basic      | Exp/Log    | Trig      | Hyperbolic | Special   | Rounding   |
29 | |------------|------------|-----------|------------|-----------|------------|
30 | | `neg`      | `exp`      | `sin`     | `sinh`     | `erf`     | `round`    |
31 | | `square`   | `log`      | `cos`     | `cosh`     | `erfc`    | `floor`    |
32 | | `cube`     | `log10`    | `tan`     | `tanh`     | `gamma`   | `ceil`     |
33 | | `cbrt`     | `log2`     | `asin`    | `asinh`    | `relu`    |            |
34 | | `sqrt`     | `log1p`    | `acos`    | `acosh`    | `sinc`    |            |
35 | | `abs`      |            | `atan`    | `atanh`    |           |            |
36 | | `sign`     |            |           |            |           |            |
37 | | `inv`      |            |           |            |           |            |
38 | 
39 | 
40 | ## Custom
41 | 
42 | Instead of passing a predefined operator as a string,
43 | you can just define a custom function as Julia code. For example:
44 | 
45 | ```python
46 |     PySRRegressor(
47 |         ...,
48 |         unary_operators=["myfunction(x) = x^2"],
49 |         binary_operators=["myotherfunction(x, y) = x^2*y"],
50 |         extra_sympy_mappings={
51 |             "myfunction": lambda x: x**2,
52 |             "myotherfunction": lambda x, y: x**2 * y,
53 |         },
54 |     )
55 | ```
56 | 
57 | 
58 | Make sure that it works with
59 | `Float32` as a datatype (for default precision, or `Float64` if you set `precision=64`). That means you need to write `1.5f3`
60 | instead of `1.5e3`, if you write any constant numbers, or simply convert a result to `Float64(...)`.
61 | 
62 | PySR expects that operators not throw an error for any input value over the entire real line from `-3.4e38` to `+3.4e38`.
63 | Thus, for invalid inputs, such as negative numbers to a `sqrt` function, you may simply return a `NaN` of the same type as the input. For example,
64 | 
65 | ```julia
66 | my_sqrt(x) = x >= 0 ? sqrt(x) : convert(typeof(x), NaN)
67 | ```
68 | 
69 | would be a valid operator. The genetic algorithm
70 | will preferentially selection expressions which avoid
71 | any invalid values over the training dataset.
72 | 
73 | 
74 | <!-- Footnote for 1: -->
75 | <!-- (Will say "However, you may need to define a `extra_sympy_mapping`":) -->
76 | 
77 | [^1]: However, you will need to define a sympy equivalent in `extra_sympy_mapping` if you want to use a function not in the above list.
78 | [^2]: `logical_or` is equivalent to `(x, y) -> (x > 0 || y > 0) ? 1 : 0`
79 | [^3]: `logical_and` is equivalent to `(x, y) -> (x > 0 && y > 0) ? 1 : 0`
80 | [^4]: `>` is equivalent to `(x, y) -> x > y ? 1 : 0`
81 | [^5]: `cond` is equivalent to `(x, y) -> x > 0 ? y : 0`
82 | 


--------------------------------------------------------------------------------
/docs/options.md:
--------------------------------------------------------------------------------
  1 | # Features and Options
  2 | 
  3 | Some configurable features and options in `PySR` which you
  4 | may find useful include:
  5 | 
  6 | - [Selecting from the accuracy-complexity curve](#model-selection)
  7 | - [Operators](#operators)
  8 | - [Number of outer search iterations](#iterations)
  9 | - [Number of inner search iterations](#cycles-per-iteration)
 10 | - [Multi-processing](#processors)
 11 | - [Populations](#populations)
 12 | - [Data weighting](#weighted-data)
 13 | - [Max complexity and depth](#max-size)
 14 | - [Mini-batching](#batching)
 15 | - [Variable names](#variable-names)
 16 | - [Constraining use of operators](#constraining-use-of-operators)
 17 | - [Custom complexities](#custom-complexity)
 18 | - [LaTeX and SymPy](#latex-and-sympy)
 19 | - [Exporting to numpy, pytorch, and jax](#exporting-to-numpy-pytorch-and-jax)
 20 | - [Loss functions](#loss)
 21 | - [Model loading](#model-loading)
 22 | 
 23 | These are described below.
 24 | Also check out the [tuning page](tuning.md) for workflow tips.
 25 | 
 26 | The program will output a pandas DataFrame containing the equations
 27 | to `PySRRegressor.equations` containing the loss value
 28 | and complexity.
 29 | 
 30 | It will also dump to a csv
 31 | at the end of every iteration,
 32 | which is `.hall_of_fame_{date_time}.csv` by default.
 33 | It also prints the equations to stdout.
 34 | 
 35 | ## Model selection
 36 | 
 37 | By default, `PySRRegressor` uses `model_selection='best'`
 38 | which selects an equation from `PySRRegressor.equations_` using
 39 | a combination of accuracy and complexity.
 40 | You can also select `model_selection='accuracy'`.
 41 | 
 42 | By printing a model (i.e., `print(model)`), you can see
 43 | the equation selection with the arrow shown in the `pick` column.
 44 | 
 45 | ## Operators
 46 | 
 47 | A list of operators can be found on the [operators page](operators.md).
 48 | One can define custom operators in Julia by passing a string:
 49 | 
 50 | ```python
 51 | PySRRegressor(niterations=100,
 52 |     binary_operators=["mult", "plus", "special(x, y) = x^2 + y"],
 53 |     extra_sympy_mappings={'special': lambda x, y: x**2 + y},
 54 |     unary_operators=["cos"])
 55 | ```
 56 | 
 57 | Now, the symbolic regression code can search using this `special` function
 58 | that squares its left argument and adds it to its right. Make sure
 59 | all passed functions are valid Julia code, and take one (unary)
 60 | or two (binary) float32 scalars as input, and output a float32. This means if you
 61 | write any real constants in your operator, like `2.5`, you have to write them
 62 | instead as `2.5f0`, which defines it as `Float32`.
 63 | Operators are automatically vectorized.
 64 | 
 65 | One should also define `extra_sympy_mappings`,
 66 | so that the SymPy code can understand the output equation from Julia,
 67 | when constructing a useable function. This step is optional, but
 68 | is necessary for the `lambda_format` to work.
 69 | 
 70 | ## Iterations
 71 | 
 72 | This is the total number of generations that `pysr` will run for.
 73 | I usually set this to a large number, and exit when I am satisfied
 74 | with the equations.
 75 | 
 76 | ## Cycles per iteration
 77 | 
 78 | Each cycle considers every 10-equation subsample (re-sampled for each individual 10,
 79 | unless `fast_cycle` is set in which case the subsamples are separate groups of equations)
 80 | a single time, producing one mutated equation for each.
 81 | The parameter `ncycles_per_iteration` defines how many times this
 82 | occurs before the equations are compared to the hall of fame,
 83 | and new equations are migrated from the hall of fame, or from other populations.
 84 | It also controls how slowly annealing occurs. You may find that increasing
 85 | `ncycles_per_iteration` results in a higher cycles-per-second, as the head
 86 | worker needs to reduce and distribute new equations less often, and also increases
 87 | diversity. But at the same
 88 | time, a smaller number it might be that migrating equations from the hall of fame helps
 89 | each population stay closer to the best current equations.
 90 | 
 91 | ## Processors
 92 | 
 93 | One can adjust the number of workers used by Julia with the
 94 | `procs` option. You should set this equal to the number of cores
 95 | you want `pysr` to use.
 96 | 
 97 | ## Populations
 98 | 
 99 | By default, `populations=15`, but you can set a different
100 | number of populations with this option.
101 | More populations may increase
102 | the diversity of equations discovered, though will take longer to train.
103 | However, it is usually more efficient to have `populations>procs`,
104 | as there are multiple populations running
105 | on each core.
106 | 
107 | ## Weighted data
108 | 
109 | Here, we assign weights to each row of data
110 | using inverse uncertainty squared. We also use 10 processes for the search
111 | instead of the default.
112 | 
113 | ```python
114 | sigma = ...
115 | weights = 1/sigma**2
116 | 
117 | model = PySRRegressor(procs=10)
118 | model.fit(X, y, weights=weights)
119 | ```
120 | 
121 | ## Max size
122 | 
123 | `maxsize` controls the maximum size of equation (number of operators,
124 | constants, variables). `maxdepth` is by default not used, but can be set
125 | to control the maximum depth of an equation. These will make processing
126 | faster, as longer equations take longer to test.
127 | 
128 | One can warm up the maxsize from a small number to encourage
129 | PySR to start simple, by using the `warmupMaxsize` argument.
130 | This specifies that maxsize increases every `warmupMaxsize`.
131 | 
132 | ## Batching
133 | 
134 | One can turn on mini-batching, with the `batching` flag,
135 | and control the batch size with `batch_size`. This will make
136 | evolution faster for large datasets. Equations are still evaluated
137 | on the entire dataset at the end of each iteration to compare to the hall
138 | of fame, but only on a random subset during mutations and annealing.
139 | 
140 | ## Variable Names
141 | 
142 | You can pass a list of strings naming each column of `X` with
143 | `variable_names`. Alternatively, you can pass `X` as a pandas dataframe
144 | and the columns will be used as variable names. Make sure only
145 | alphabetical characters and `_` are used in these names.
146 | 
147 | ## Constraining use of operators
148 | 
149 | One can limit the complexity of specific operators with the `constraints` parameter.
150 | There is a "maxsize" parameter to PySR, but there is also an operator-level
151 | "constraints" parameter. One supplies a dict, like so:
152 | 
153 | ```python
154 | constraints={'pow': (-1, 1), 'mult': (3, 3), 'cos': 5}
155 | ```
156 | 
157 | What this says is that: a power law $x^y$ can have an expression of arbitrary (-1) complexity in the x, but only complexity 1 (e.g., a constant or variable) in the y. So $(x_0 + 3)^{5.5}$ is allowed, but $5.5^{x_0 + 3}$ is not.
158 | I find this helps a lot for getting more interpretable equations.
159 | The other terms say that each multiplication can only have sub-expressions
160 | of up to complexity 3 (e.g., $5.0 + x_2$) in each side, and cosine can only operate on
161 | expressions of complexity 5 (e.g., $5.0 + x_2 exp(x_3)$).
162 | 
163 | ## Custom complexity
164 | 
165 | By default, all operators, constants, and instances of variables
166 | have a complexity of 1. The sum of the complexities of all terms
167 | is the total complexity of an expression.
168 | You may change this by configuring the options:
169 | 
170 | - `complexity_of_operators` - pass a dictionary of `<str>: <int>` pairs
171 |   to change the complexity of each operator. If an operator is not
172 |   specified, it will have the default complexity of 1.
173 | - `complexity_of_constants` - supplying an integer will make all constants
174 |   have that complexity.
175 | - `complexity_of_variables` - supplying an integer will make all variables
176 |   have that complexity.
177 | 
178 | ## LaTeX and SymPy
179 | 
180 | After running `model.fit(...)`, you can look at
181 | `model.equations` which is a pandas dataframe.
182 | The `sympy_format` column gives sympy equations,
183 | and the `lambda_format` gives callable functions.
184 | You can optionally pass a pandas dataframe to the callable function,
185 | if you called `.fit` on a pandas dataframe as well.
186 | 
187 | There are also some helper functions for doing this quickly.
188 | 
189 | - `model.latex()` will generate a TeX formatted output of your equation.
190 |   - `model.latex_table(indices=[2, 5, 8])` will generate a formatted LaTeX table including all the specified equations.
191 | - `model.sympy()` will return the SymPy representation.
192 | - `model.jax()` will return a callable JAX function combined with parameters (see below)
193 | - `model.pytorch()` will return a PyTorch model (see below).
194 | 
195 | ## Exporting to numpy, pytorch, and jax
196 | 
197 | By default, the dataframe of equations will contain columns
198 | with the identifier `lambda_format`.
199 | These are simple functions which correspond to the equation, but executed
200 | with numpy functions.
201 | You can pass your `X` matrix to these functions
202 | just as you did to the `model.fit` call. Thus, this allows
203 | you to numerically evaluate the equations over different output.
204 | 
205 | Calling `model.predict` will execute the `lambda_format` of
206 | the best equation, and return the result. If you selected
207 | `model_selection="best"`, this will use an equation that combines
208 | accuracy with simplicity. For `model_selection="accuracy"`, this will just
209 | look at accuracy.
210 | 
211 | One can do the same thing for PyTorch, which uses code
212 | from [sympytorch](https://github.com/patrick-kidger/sympytorch),
213 | and for JAX, which uses code from
214 | [sympy2jax](https://github.com/MilesCranmer/sympy2jax).
215 | 
216 | Calling `model.pytorch()` will return
217 | a PyTorch module which runs the equation, using PyTorch functions,
218 | over `X` (as a PyTorch tensor). This is differentiable, and the
219 | parameters of this PyTorch module correspond to the learned parameters
220 | in the equation, and are trainable.
221 | 
222 | ```python
223 | torch_model = model.pytorch()
224 | torch_model(X)
225 | ```
226 | 
227 | **Warning: If you are using custom operators, you must define `extra_torch_mappings` or `extra_jax_mappings` (both are `dict` of callables) to provide an equivalent definition of the functions.** (At any time you can set these parameters or any others with `model.set_params`.)
228 | 
229 | For JAX, you can equivalently call `model.jax()`
230 | This will return a dictionary containing a `'callable'` (a JAX function),
231 | and `'parameters'` (a list of parameters in the equation).
232 | You can execute this function with:
233 | 
234 | ```python
235 | jax_model = model.jax()
236 | jax_model['callable'](X, jax_model['parameters'])
237 | ```
238 | 
239 | Since the parameter list is a jax array, this therefore lets you also
240 | train the parameters within JAX (and is differentiable).
241 | 
242 | ## `loss`
243 | 
244 | The default loss is mean-square error, and weighted mean-square error.
245 | One can pass an arbitrary Julia string to define a custom loss, using,
246 | e.g., `elementwise_loss="myloss(x, y) = abs(x - y)^1.5"`. For more details,
247 | see the
248 | [Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/)
249 | page for SymbolicRegression.jl.
250 | 
251 | Here are some additional examples:
252 | 
253 | abs(x-y) loss
254 | 
255 | ```python
256 | PySRRegressor(..., elementwise_loss="f(x, y) = abs(x - y)^1.5")
257 | ```
258 | 
259 | Note that the function name doesn't matter:
260 | 
261 | ```python
262 | PySRRegressor(..., elementwise_loss="loss(x, y) = abs(x * y)")
263 | ```
264 | 
265 | With weights:
266 | 
267 | ```python
268 | model = PySRRegressor(..., elementwise_loss="myloss(x, y, w) = w * abs(x - y)")
269 | model.fit(..., weights=weights)
270 | ```
271 | 
272 | Weights can be used in arbitrary ways:
273 | 
274 | ```python
275 | model = PySRRegressor(..., weights=weights, elementwise_loss="myloss(x, y, w) = abs(x - y)^2/w^2")
276 | model.fit(..., weights=weights)
277 | ```
278 | 
279 | Built-in loss (faster) (see [losses](https://ai.damtp.cam.ac.uk/symbolicregression/dev/losses/)).
280 | This one computes the L3 norm:
281 | 
282 | ```python
283 | PySRRegressor(..., elementwise_loss="LPDistLoss{3}()")
284 | ```
285 | 
286 | Can also uses these losses for weighted (weighted-average):
287 | 
288 | ```python
289 | model = PySRRegressor(..., weights=weights, elementwise_loss="LPDistLoss{3}()")
290 | model.fit(..., weights=weights)
291 | ```
292 | 
293 | ## Model loading
294 | 
295 | PySR will automatically save a pickle file of the model state
296 | when you call `model.fit`, once before the search starts,
297 | and again after the search finishes. The filename will
298 | have the same base name as the input file, but with a `.pkl` extension.
299 | You can load the saved model state with:
300 | 
301 | ```python
302 | model = PySRRegressor.from_file(pickle_filename)
303 | ```
304 | 
305 | If you have a long-running job and would like to load the model
306 | before completion, you can also do this. In this case, the model
307 | loading will use the `csv` file to load the equations, since the
308 | `csv` file is continually updated during the search. Once
309 | the search completes, the model including its equations will
310 | be saved to the pickle file, overwriting the existing version.
311 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs-material
2 | mkdocs-autorefs
3 | mkdocstrings[python]
4 | docstring_parser
5 | 


--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | [data-md-color-scheme="pysr"] {
2 |     --md-primary-fg-color:        #C13245;
3 |     --md-primary-fg-color--light: #D35364;
4 |     --md-primary-fg-color--dark:  #982736;
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/stylesheets/papers_header.txt:
--------------------------------------------------------------------------------
1 | # Research
2 | Below is a showcase of papers which have used PySR to discover
3 | or rediscover a symbolic model.
4 | These are sorted by the date of release, with most recent papers at the top.
5 | 
6 | 
7 | If you have used PySR in your research,
8 | please submit a pull request to add your paper to [this file](https://github.com/MilesCranmer/PySR/blob/master/docs/papers.yml).
9 | 


--------------------------------------------------------------------------------
/docs/tuning.md:
--------------------------------------------------------------------------------
 1 | # Tuning and Workflow Tips
 2 | 
 3 | I give a short guide below on how I like to tune PySR for my applications.
 4 | 
 5 | First, my general tips would be to avoid using redundant operators, like how `pow` can do the same things as `square`, or how `-` (binary) and `neg` (unary) are equivalent. The fewer operators the better! Only use operators you need.
 6 | 
 7 | When running PySR, I usually do the following:
 8 | 
 9 | I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node of a slurm cluster. Passing `cluster_manager="slurm"` will make PySR set up a run over the entire allocation. I set `procs` equal to the total number of cores over my entire allocation.
10 | 
11 | I use the [tensorboard feature](https://ai.damtp.cam.ac.uk/pysr/examples/#12-using-tensorboard-for-logging) for experiment tracking.
12 | 
13 | [^1]: Jupyter Notebooks are supported by PySR, but miss out on some useful features available in IPython and Python: the progress bar, and early stopping with "q". In Jupyter you cannot interrupt a search once it has started; you have to restart the kernel. See [this issue](https://github.com/MilesCranmer/PySR/issues/260) for updates.
14 | 
15 | 1. I start by using the default parameters.
16 | 2. I use only the operators I think it needs and no more.
17 | 3. Increase `populations` to `3*num_cores`.
18 | 4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data).
19 | 5. While on a laptop or single node machine, you might leave the default `ncycles_per_iteration`, on a cluster with ~100 cores I like to set `ncycles_per_iteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!)
20 | 6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!)
21 | 7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore.
22 | 8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore.
23 | 9.  Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`.
24 | 10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently.
25 | 11. Set `turbo` to `True`. This turns on advanced loop vectorization, but is still quite experimental. It should give you a nice 20% or more speedup.
26 | 12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early.
27 | 
28 | Since I am running in IPython, I can just hit `q` and then `<enter>` to stop the job, tweak the hyperparameters, and then start the search again.
29 | I can also use `warm_start=True` if I wish to continue where I left off (though note that changing some parameters, like `maxsize`, are incompatible with warm starts).
30 | 
31 | Some things I try out to see if they help:
32 | 
33 | 1. Play around with `complexity_of_operators`. Set operators you dislike (e.g., `pow`) to have a larger complexity.
34 | 2. Try setting `adaptive_parsimony_scaling` a bit larger, maybe up to `1000`.
35 | 3. Sometimes I try using `warmup_maxsize_by`. This is useful if you find that the search finds a very complex equation very quickly, and then gets stuck. It basically forces it to start at the simpler equations and build up complexity slowly.
36 | 4. Play around with different losses:
37 |     - I typically try `L2DistLoss()` and `L1DistLoss()`. L1 loss is more robust to outliers compared to L2 (L1 finds the median, while L2 finds the mean of a random variable), so is often a good choice for a noisy dataset.
38 |     - I might also provide the `weights` parameter to `fit` if there is some reasonable choice of weighting. For example, maybe I know the signal-to-noise of a particular row of `y` - I would set that SNR equal to the weights. Or, perhaps I do some sort of importance sampling, and weight the rows by importance.
39 | 
40 | Very rarely I might also try tuning the mutation weights, the crossover probability, or the optimization parameters. I never use `denoise` or `select_k_features` as I find they aren't very useful.
41 | 
42 | For large datasets I usually just randomly sample ~1000 points or so. In case all the points matter, I might use `batching=True`.
43 | 
44 | If I find the equations get very complex and I'm not sure if they are numerically precise, I might set `precision=64`.
45 | 
46 | Once a run is finished, I use the `PySRRegressor.from_file` function to load the saved search in a different process (requires the pickle file, and possibly also the `.csv` file if you quit early). I can then explore the equations, convert them to LaTeX, and plot their output.
47 | 
48 | ## More Tips
49 | 
50 | You might also wish to explore the [discussions](https://github.com/MilesCranmer/PySR/discussions/) page for more tips, and to see if anyone else has had similar questions.
51 | Be sure to also read through the [reference](api.md).
52 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python>=3.8
 6 |   - sympy>=1.0.0,<2.0.0
 7 |   - pandas>=0.21.0,<3.0.0
 8 |   - numpy>=1.13.0,<3.0.0
 9 |   - scikit-learn>=1.0.0,<2.0.0
10 |   - pyjuliacall>=0.9.24,<0.9.26
11 |   - click>=7.0.0,<9.0.0
12 |   - typing-extensions>=4.0.0,<5.0.0
13 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | X = 2 * np.random.randn(100, 5)
 4 | y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 0.5
 5 | 
 6 | from pysr import PySRRegressor
 7 | 
 8 | model = PySRRegressor(
 9 |     model_selection="best",  # Result is mix of simplicity+accuracy
10 |     niterations=40,
11 |     binary_operators=["+", "*"],
12 |     unary_operators=[
13 |         "cos",
14 |         "exp",
15 |         "sin",
16 |         "inv(x) = 1/x",
17 |         # ^ Custom operator (julia syntax)
18 |     ],
19 |     extra_sympy_mappings={"inv": lambda x: 1 / x},
20 |     # ^ Define operator for SymPy as well
21 |     elementwise_loss="loss(x, y) = (x - y)^2",
22 |     # ^ Custom loss function (julia syntax)
23 | )
24 | 
25 | model.fit(X, y)
26 | 
27 | print(model)
28 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: PySR
 2 | theme:
 3 |   name: material
 4 |   palette:
 5 |     # Light mode:
 6 |     - media: "(prefers-color-scheme: light)"
 7 |       scheme: pysr
 8 |       toggle:
 9 |         icon: material/toggle-switch
10 |         name: Switch to dark mode
11 |     - media: "(prefers-color-scheme: dark)"
12 |       scheme: slate
13 |       toggle:
14 |         icon: material/toggle-switch-off-outline
15 |         name: Switch to light mode
16 | 
17 | 
18 |   features:
19 |     - navigation.expand
20 | 
21 |   logo: assets/pysr_logo_reduced.svg
22 |   icon:
23 |     repo: fontawesome/brands/github-alt
24 |   favicon: assets/favicon.png
25 | 
26 | nav:
27 |   - index.md
28 |   - examples.md
29 |   - api.md
30 |   - operators.md
31 |   - tuning.md
32 |   - options.md
33 |   - papers.md
34 |   - api-advanced.md
35 |   - backend.md
36 | 
37 | extra:
38 |   homepage: https://ai.damtp.cam.ac.uk/pysr
39 | 
40 | extra_css:
41 |   - stylesheets/extra.css
42 | 
43 | extra_javascript:
44 |   - js/mathjax.js
45 |   - https://polyfill.io/v3/polyfill.min.js?features=es6
46 |   - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
47 | 
48 | repo_url: https://github.com/MilesCranmer/PySR
49 | 
50 | plugins:
51 |   - search
52 |   - autorefs
53 |   - mkdocstrings:
54 |       default_handler: python
55 |       handlers:
56 |         python:
57 |           # paths: [pysr]
58 |           options:
59 |             # https://mkdocstrings.github.io/python/usage/#finding-modules
60 |             docstring_style: numpy
61 |             merge_init_into_class: True
62 |             # docstring_options
63 |             # separate_signature: True
64 |             show_bases: false
65 |             heading_level: 2
66 | 
67 | markdown_extensions:
68 |   - toc:
69 |       permalink: true
70 |   - attr_list
71 |   - footnotes
72 |   - md_in_html
73 |   - pymdownx.highlight:
74 |       anchor_linenums: True
75 |   - pymdownx.inlinehilite
76 |   - pymdownx.snippets
77 |   - pymdownx.superfences
78 |   - pymdownx.arithmatex:
79 |       generic: true
80 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | warn_return_any = True
3 | 
4 | [mypy-sklearn.*]
5 | ignore_missing_imports = True
6 | 
7 | [mypy-julia.*]
8 | ignore_missing_imports = True
9 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "pysr"
 7 | version = "1.5.8"
 8 | authors = [
 9 |     {name = "Miles Cranmer", email = "miles.cranmer@gmail.com"},
10 | ]
11 | description = "Simple and efficient symbolic regression"
12 | readme = {file = "README.md", content-type = "text/markdown"}
13 | license = {file = "LICENSE"}
14 | requires-python = ">=3.8"
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "Operating System :: OS Independent",
18 |     "License :: OSI Approved :: Apache Software License"
19 | ]
20 | dependencies = [
21 |     "sympy>=1.0.0,<2.0.0",
22 |     "pandas>=0.21.0,<3.0.0",
23 |     "numpy>=1.13.0,<3.0.0",
24 |     "scikit_learn>=1.0.0,<2.0.0",
25 |     "juliacall>=0.9.24,<0.9.26",
26 |     "click>=7.0.0,<9.0.0",
27 |     "typing-extensions>=4.0.0,<5.0.0",
28 | ]
29 | 
30 | [project.optional-dependencies]
31 | dev = [
32 |     "coverage>=7,<8",
33 |     "beartype>=0.19,<0.22",
34 |     "ipykernel>=6,<7",
35 |     "ipython>=8,<9",
36 |     "jax[cpu]>=0.4,<0.6",
37 |     "jupyter>=1,<2",
38 |     "mypy>=1,<2",
39 |     "nbval>=0.11,<0.12",
40 |     "pandas-stubs",
41 |     "pre-commit>=3.0,<5",
42 |     "pytest-cov>=5,<7",
43 |     "pytest>=8,<9",
44 |     "tensorboard>=2,<3",
45 |     "torch>=2,<3",
46 |     "types-openpyxl",
47 |     "types-pytz",
48 | ]
49 | 
50 | [tool.isort]
51 | profile = "black"
52 | 


--------------------------------------------------------------------------------
/pysr/.gitignore:
--------------------------------------------------------------------------------
1 | version.py
2 | 


--------------------------------------------------------------------------------
/pysr/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import logging
 4 | import os
 5 | 
 6 | pysr_logger = logging.getLogger("pysr")
 7 | pysr_logger.setLevel(logging.INFO)
 8 | handler = logging.StreamHandler()
 9 | handler.setLevel(logging.INFO)
10 | pysr_logger.addHandler(handler)
11 | 
12 | if os.environ.get("PYSR_USE_BEARTYPE", "0") == "1":
13 |     from beartype.claw import beartype_this_package
14 | 
15 |     beartype_this_package()
16 | 
17 | # This must be imported as early as possible to prevent
18 | # library linking issues caused by numpy/pytorch/etc. importing
19 | # old libraries:
20 | from .julia_import import jl, SymbolicRegression  # isort:skip
21 | 
22 | # Get the version using importlib.metadata (Python >= 3.8 is required):
23 | from importlib.metadata import PackageNotFoundError, version
24 | 
25 | from . import sklearn_monkeypatch
26 | from .deprecated import best, best_callable, best_row, best_tex, install, pysr
27 | from .export_jax import sympy2jax
28 | from .export_torch import sympy2torch
29 | from .expression_specs import (
30 |     AbstractExpressionSpec,
31 |     ExpressionSpec,
32 |     ParametricExpressionSpec,
33 |     TemplateExpressionSpec,
34 | )
35 | from .julia_extensions import load_all_packages
36 | from .logger_specs import AbstractLoggerSpec, TensorBoardLoggerSpec
37 | from .sr import PySRRegressor
38 | 
39 | try:
40 |     __version__ = version("pysr")
41 | except PackageNotFoundError:  # pragma: no cover
42 |     # package is not installed
43 |     __version__ = "unknown"
44 | 
45 | __all__ = [
46 |     "jl",
47 |     "SymbolicRegression",
48 |     "sklearn_monkeypatch",
49 |     "sympy2jax",
50 |     "sympy2torch",
51 |     "install",
52 |     "load_all_packages",
53 |     "PySRRegressor",
54 |     "AbstractExpressionSpec",
55 |     "ExpressionSpec",
56 |     "TemplateExpressionSpec",
57 |     "ParametricExpressionSpec",
58 |     "AbstractLoggerSpec",
59 |     "TensorBoardLoggerSpec",
60 |     "best",
61 |     "best_callable",
62 |     "best_row",
63 |     "best_tex",
64 |     "pysr",
65 |     "__version__",
66 | ]
67 | 


--------------------------------------------------------------------------------
/pysr/__main__.py:
--------------------------------------------------------------------------------
1 | from ._cli.main import pysr as _cli
2 | 
3 | if __name__ == "__main__":
4 |     _cli(prog_name="pysr")
5 | 


--------------------------------------------------------------------------------
/pysr/_cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/pysr/_cli/__init__.py


--------------------------------------------------------------------------------
/pysr/_cli/main.py:
--------------------------------------------------------------------------------
  1 | import fnmatch
  2 | import sys
  3 | import unittest
  4 | import warnings
  5 | 
  6 | import click
  7 | 
  8 | from ..test import (
  9 |     get_runtests_cli,
 10 |     runtests,
 11 |     runtests_dev,
 12 |     runtests_jax,
 13 |     runtests_startup,
 14 |     runtests_torch,
 15 | )
 16 | 
 17 | 
 18 | @click.group("pysr")
 19 | @click.pass_context
 20 | def pysr(context):
 21 |     ctx = context
 22 | 
 23 | 
 24 | @pysr.command("install", help="DEPRECATED (dependencies are now installed at import).")
 25 | @click.option(
 26 |     "-p",
 27 |     "julia_project",
 28 |     "--project",
 29 |     default=None,
 30 |     type=str,
 31 | )
 32 | @click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.")
 33 | @click.option(
 34 |     "--precompile",
 35 |     "precompile",
 36 |     flag_value=True,
 37 |     default=None,
 38 | )
 39 | @click.option(
 40 |     "--no-precompile",
 41 |     "precompile",
 42 |     flag_value=False,
 43 |     default=None,
 44 | )
 45 | def _install(julia_project, quiet, precompile):
 46 |     warnings.warn(
 47 |         "This command is deprecated. Julia dependencies are now installed at first import."
 48 |     )
 49 | 
 50 | 
 51 | TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"}
 52 | 
 53 | 
 54 | @pysr.command("test")
 55 | @click.argument("tests", nargs=1)
 56 | @click.option(
 57 |     "-k",
 58 |     "expressions",
 59 |     multiple=True,
 60 |     type=str,
 61 |     help="Filter expressions to select specific tests.",
 62 | )
 63 | def _tests(tests, expressions):
 64 |     """Run parts of the PySR test suite.
 65 | 
 66 |     Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas.
 67 |     """
 68 |     test_cases = []
 69 |     for test in tests.split(","):
 70 |         if test == "main":
 71 |             test_cases.extend(runtests(just_tests=True))
 72 |         elif test == "jax":
 73 |             test_cases.extend(runtests_jax(just_tests=True))
 74 |         elif test == "torch":
 75 |             test_cases.extend(runtests_torch(just_tests=True))
 76 |         elif test == "cli":
 77 |             runtests_cli = get_runtests_cli()
 78 |             test_cases.extend(runtests_cli(just_tests=True))
 79 |         elif test == "dev":
 80 |             test_cases.extend(runtests_dev(just_tests=True))
 81 |         elif test == "startup":
 82 |             test_cases.extend(runtests_startup(just_tests=True))
 83 |         else:
 84 |             warnings.warn(f"Invalid test {test}. Skipping.")
 85 | 
 86 |     loader = unittest.TestLoader()
 87 |     suite = unittest.TestSuite()
 88 |     for test_case in test_cases:
 89 |         loaded_tests = loader.loadTestsFromTestCase(test_case)
 90 |         for test in loaded_tests:
 91 |             if len(expressions) == 0 or any(
 92 |                 fnmatch.fnmatch(test.id(), "*" + expression + "*")
 93 |                 for expression in expressions
 94 |             ):
 95 |                 suite.addTest(test)
 96 | 
 97 |     runner = unittest.TextTestRunner()
 98 |     results = runner.run(suite)
 99 | 
100 |     if not results.wasSuccessful():
101 |         sys.exit(1)
102 | 


--------------------------------------------------------------------------------
/pysr/denoising.py:
--------------------------------------------------------------------------------
 1 | """Functions for denoising data during preprocessing."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import cast
 6 | 
 7 | import numpy as np
 8 | from numpy import ndarray
 9 | 
10 | 
11 | def denoise(
12 |     X: ndarray,
13 |     y: ndarray,
14 |     Xresampled: ndarray | None = None,
15 |     random_state: np.random.RandomState | None = None,
16 | ) -> tuple[ndarray, ndarray]:
17 |     """Denoise the dataset using a Gaussian process."""
18 |     from sklearn.gaussian_process import GaussianProcessRegressor
19 |     from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
20 | 
21 |     gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
22 |     gpr = GaussianProcessRegressor(
23 |         kernel=gp_kernel, n_restarts_optimizer=50, random_state=random_state
24 |     )
25 |     gpr.fit(X, y)
26 | 
27 |     if Xresampled is not None:
28 |         return Xresampled, cast(ndarray, gpr.predict(Xresampled))
29 | 
30 |     return X, cast(ndarray, gpr.predict(X))
31 | 
32 | 
33 | def multi_denoise(
34 |     X: ndarray,
35 |     y: ndarray,
36 |     Xresampled: ndarray | None = None,
37 |     random_state: np.random.RandomState | None = None,
38 | ):
39 |     """Perform `denoise` along each column of `y` independently."""
40 |     y = np.stack(
41 |         [
42 |             denoise(X, y[:, i], Xresampled=Xresampled, random_state=random_state)[1]
43 |             for i in range(y.shape[1])
44 |         ],
45 |         axis=1,
46 |     )
47 | 
48 |     if Xresampled is not None:
49 |         return Xresampled, y
50 | 
51 |     return X, y
52 | 


--------------------------------------------------------------------------------
/pysr/deprecated.py:
--------------------------------------------------------------------------------
  1 | """Various functions to deprecate features."""
  2 | 
  3 | import warnings
  4 | 
  5 | from .julia_import import jl
  6 | 
  7 | 
  8 | def install(*args, **kwargs):
  9 |     del args, kwargs
 10 |     warnings.warn(
 11 |         "The `install` function has been removed. "
 12 |         "PySR now uses the `juliacall` package to install its dependencies automatically at import time. ",
 13 |         FutureWarning,
 14 |     )
 15 | 
 16 | 
 17 | def init_julia(*args, **kwargs):
 18 |     del args, kwargs
 19 |     warnings.warn(
 20 |         "The `init_julia` function has been removed. "
 21 |         "Julia is now initialized automatically at import time.",
 22 |         FutureWarning,
 23 |     )
 24 |     return jl
 25 | 
 26 | 
 27 | def pysr(X, y, weights=None, **kwargs):  # pragma: no cover
 28 |     from .sr import PySRRegressor
 29 | 
 30 |     warnings.warn(
 31 |         "Calling `pysr` is deprecated. "
 32 |         "Please use `model = PySRRegressor(**params); "
 33 |         "model.fit(X, y)` going forward.",
 34 |         FutureWarning,
 35 |     )
 36 |     model = PySRRegressor(**kwargs)
 37 |     model.fit(X, y, weights=weights)
 38 |     return model.equations_
 39 | 
 40 | 
 41 | def best(*args, **kwargs):  # pragma: no cover
 42 |     raise NotImplementedError(
 43 |         "`best` has been deprecated. "
 44 |         "Please use the `PySRRegressor` interface. "
 45 |         "After fitting, you can return `.sympy()` "
 46 |         "to get the sympy representation "
 47 |         "of the best equation."
 48 |     )
 49 | 
 50 | 
 51 | def best_row(*args, **kwargs):  # pragma: no cover
 52 |     raise NotImplementedError(
 53 |         "`best_row` has been deprecated. "
 54 |         "Please use the `PySRRegressor` interface. "
 55 |         "After fitting, you can run `print(model)` to view the best equation, "
 56 |         "or "
 57 |         "`model.get_best()` to return the best equation's "
 58 |         "row in `model.equations_`."
 59 |     )
 60 | 
 61 | 
 62 | def best_tex(*args, **kwargs):  # pragma: no cover
 63 |     raise NotImplementedError(
 64 |         "`best_tex` has been deprecated. "
 65 |         "Please use the `PySRRegressor` interface. "
 66 |         "After fitting, you can return `.latex()` to "
 67 |         "get the sympy representation "
 68 |         "of the best equation."
 69 |     )
 70 | 
 71 | 
 72 | def best_callable(*args, **kwargs):  # pragma: no cover
 73 |     raise NotImplementedError(
 74 |         "`best_callable` has been deprecated. Please use the `PySRRegressor` "
 75 |         "interface. After fitting, you can use "
 76 |         "`.predict(X)` to use the best callable."
 77 |     )
 78 | 
 79 | 
 80 | DEPRECATED_KWARGS = {
 81 |     "fractionReplaced": "fraction_replaced",
 82 |     "fractionReplacedHof": "fraction_replaced_hof",
 83 |     "npop": "population_size",
 84 |     "hofMigration": "hof_migration",
 85 |     "shouldOptimizeConstants": "should_optimize_constants",
 86 |     "weightAddNode": "weight_add_node",
 87 |     "weightDeleteNode": "weight_delete_node",
 88 |     "weightDoNothing": "weight_do_nothing",
 89 |     "weightInsertNode": "weight_insert_node",
 90 |     "weightMutateConstant": "weight_mutate_constant",
 91 |     "weightMutateOperator": "weight_mutate_operator",
 92 |     "weightSwapOperands": "weight_swap_operands",
 93 |     "weightRandomize": "weight_randomize",
 94 |     "weightSimplify": "weight_simplify",
 95 |     "crossoverProbability": "crossover_probability",
 96 |     "perturbationFactor": "perturbation_factor",
 97 |     "batchSize": "batch_size",
 98 |     "warmupMaxsizeBy": "warmup_maxsize_by",
 99 |     "useFrequency": "use_frequency",
100 |     "useFrequencyInTournament": "use_frequency_in_tournament",
101 |     "ncyclesperiteration": "ncycles_per_iteration",
102 |     "loss": "elementwise_loss",
103 |     "full_objective": "loss_function",
104 | }
105 | 


--------------------------------------------------------------------------------
/pysr/export.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import copy
 4 | from collections.abc import Callable
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | from numpy.typing import NDArray
 9 | 
10 | from .export_jax import sympy2jax
11 | from .export_numpy import sympy2numpy
12 | from .export_sympy import create_sympy_symbols, pysr2sympy
13 | from .export_torch import sympy2torch
14 | from .utils import ArrayLike
15 | 
16 | 
17 | def add_export_formats(
18 |     output: pd.DataFrame,
19 |     *,
20 |     feature_names_in: ArrayLike[str],
21 |     selection_mask: NDArray[np.bool_] | None = None,
22 |     extra_sympy_mappings: dict[str, Callable] | None = None,
23 |     extra_torch_mappings: dict[Callable, Callable] | None = None,
24 |     output_torch_format: bool = False,
25 |     extra_jax_mappings: dict[Callable, str] | None = None,
26 |     output_jax_format: bool = False,
27 | ) -> pd.DataFrame:
28 |     """Create export formats for an equations dataframe.
29 | 
30 |     Returns a new dataframe containing only the exported formats.
31 |     """
32 |     output = copy.deepcopy(output)
33 | 
34 |     sympy_format = []
35 |     lambda_format = []
36 |     jax_format = []
37 |     torch_format = []
38 | 
39 |     for _, eqn_row in output.iterrows():
40 |         eqn = pysr2sympy(
41 |             eqn_row["equation"],
42 |             feature_names_in=feature_names_in,
43 |             extra_sympy_mappings=extra_sympy_mappings,
44 |         )
45 |         sympy_format.append(eqn)
46 | 
47 |         # NumPy:
48 |         sympy_symbols = create_sympy_symbols(feature_names_in)
49 |         lambda_format.append(
50 |             sympy2numpy(
51 |                 eqn,
52 |                 sympy_symbols,
53 |                 selection=selection_mask,
54 |             )
55 |         )
56 | 
57 |         # JAX:
58 |         if output_jax_format:
59 |             func, params = sympy2jax(
60 |                 eqn,
61 |                 sympy_symbols,
62 |                 selection=selection_mask,
63 |                 extra_jax_mappings=extra_jax_mappings,
64 |             )
65 |             jax_format.append({"callable": func, "parameters": params})
66 | 
67 |         # Torch:
68 |         if output_torch_format:
69 |             module = sympy2torch(
70 |                 eqn,
71 |                 sympy_symbols,
72 |                 selection=selection_mask,
73 |                 extra_torch_mappings=extra_torch_mappings,
74 |             )
75 |             torch_format.append(module)
76 | 
77 |     exports = pd.DataFrame(
78 |         {
79 |             "sympy_format": sympy_format,
80 |             "lambda_format": lambda_format,
81 |         },
82 |         index=output.index,
83 |     )
84 | 
85 |     if output_jax_format:
86 |         exports["jax_format"] = jax_format
87 |     if output_torch_format:
88 |         exports["torch_format"] = torch_format
89 | 
90 |     return exports
91 | 


--------------------------------------------------------------------------------
/pysr/export_jax.py:
--------------------------------------------------------------------------------
  1 | import numpy as np  # noqa: F401
  2 | import sympy  # type: ignore
  3 | from sympy.codegen.cfunctions import log2, log10  # type: ignore
  4 | 
  5 | # Special since need to reduce arguments.
  6 | MUL = 0
  7 | ADD = 1
  8 | 
  9 | _jnp_func_lookup = {
 10 |     sympy.Mul: MUL,
 11 |     sympy.Add: ADD,
 12 |     sympy.div: "jnp.div",
 13 |     sympy.Abs: "jnp.abs",
 14 |     sympy.sign: "jnp.sign",
 15 |     # Note: May raise error for ints.
 16 |     sympy.ceiling: "jnp.ceil",
 17 |     sympy.floor: "jnp.floor",
 18 |     sympy.log: "jnp.log",
 19 |     log2: "jnp.log2",
 20 |     log10: "jnp.log10",
 21 |     sympy.exp: "jnp.exp",
 22 |     sympy.sqrt: "jnp.sqrt",
 23 |     sympy.cos: "jnp.cos",
 24 |     sympy.acos: "jnp.acos",
 25 |     sympy.sin: "jnp.sin",
 26 |     sympy.asin: "jnp.asin",
 27 |     sympy.tan: "jnp.tan",
 28 |     sympy.atan: "jnp.atan",
 29 |     sympy.atan2: "jnp.atan2",
 30 |     # Note: Also may give NaN for complex results.
 31 |     sympy.cosh: "jnp.cosh",
 32 |     sympy.acosh: "jnp.acosh",
 33 |     sympy.sinh: "jnp.sinh",
 34 |     sympy.asinh: "jnp.asinh",
 35 |     sympy.tanh: "jnp.tanh",
 36 |     sympy.atanh: "jnp.atanh",
 37 |     sympy.Pow: "jnp.power",
 38 |     sympy.re: "jnp.real",
 39 |     sympy.im: "jnp.imag",
 40 |     sympy.arg: "jnp.angle",
 41 |     # Note: May raise error for ints and complexes
 42 |     sympy.erf: "jsp.erf",
 43 |     sympy.erfc: "jsp.erfc",
 44 |     sympy.LessThan: "jnp.less",
 45 |     sympy.GreaterThan: "jnp.greater",
 46 |     sympy.And: "jnp.logical_and",
 47 |     sympy.Or: "jnp.logical_or",
 48 |     sympy.Not: "jnp.logical_not",
 49 |     sympy.Max: "jnp.max",
 50 |     sympy.Min: "jnp.min",
 51 |     sympy.Mod: "jnp.mod",
 52 |     sympy.Heaviside: "jnp.heaviside",
 53 |     sympy.core.numbers.Half: "(lambda: 0.5)",
 54 |     sympy.core.numbers.One: "(lambda: 1.0)",
 55 | }
 56 | 
 57 | 
 58 | def sympy2jaxtext(expr, parameters, symbols_in, extra_jax_mappings=None):
 59 |     if issubclass(expr.func, sympy.Float):
 60 |         parameters.append(float(expr))
 61 |         return f"parameters[{len(parameters) - 1}]"
 62 |     elif issubclass(expr.func, sympy.Rational) or issubclass(
 63 |         expr.func, sympy.NumberSymbol
 64 |     ):
 65 |         return f"{float(expr)}"
 66 |     elif issubclass(expr.func, sympy.Integer):
 67 |         return f"{int(expr)}"
 68 |     elif issubclass(expr.func, sympy.Symbol):
 69 |         return (
 70 |             f"X[:, {[i for i in range(len(symbols_in)) if symbols_in[i] == expr][0]}]"
 71 |         )
 72 |     if extra_jax_mappings is None:
 73 |         extra_jax_mappings = {}
 74 |     try:
 75 |         _func = {**_jnp_func_lookup, **extra_jax_mappings}[expr.func]
 76 |     except KeyError:
 77 |         raise KeyError(
 78 |             f"Function {expr.func} was not found in JAX function mappings."
 79 |             "Please add it to extra_jax_mappings in the format, e.g., "
 80 |             "{sympy.sqrt: 'jnp.sqrt'}."
 81 |         )
 82 |     args = [
 83 |         sympy2jaxtext(
 84 |             arg, parameters, symbols_in, extra_jax_mappings=extra_jax_mappings
 85 |         )
 86 |         for arg in expr.args
 87 |     ]
 88 |     if _func == MUL:
 89 |         return " * ".join(["(" + arg + ")" for arg in args])
 90 |     if _func == ADD:
 91 |         return " + ".join(["(" + arg + ")" for arg in args])
 92 |     return f'{_func}({", ".join(args)})'
 93 | 
 94 | 
 95 | jax_initialized = False
 96 | jax = None
 97 | jnp = None
 98 | jsp = None
 99 | 
100 | 
101 | def _initialize_jax():
102 |     global jax_initialized
103 |     global jax
104 |     global jnp
105 |     global jsp
106 | 
107 |     if not jax_initialized:
108 |         import jax as _jax
109 |         from jax import numpy as _jnp
110 |         from jax.scipy import special as _jsp
111 | 
112 |         jax = _jax
113 |         jnp = _jnp
114 |         jsp = _jsp
115 | 
116 | 
117 | def sympy2jax(expression, symbols_in, selection=None, extra_jax_mappings=None):
118 |     """Returns a function f and its parameters;
119 |     the function takes an input matrix, and a list of arguments:
120 |             f(X, parameters)
121 |     where the parameters appear in the JAX equation.
122 | 
123 |     # Examples:
124 | 
125 |         Let's create a function in SymPy:
126 |         ```python
127 |         x, y = symbols('x y')
128 |         cosx = 1.0 * sympy.cos(x) + 3.2 * y
129 |         ```
130 |         Let's get the JAX version. We pass the equation, and
131 |         the symbols required.
132 |         ```python
133 |         f, params = sympy2jax(cosx, [x, y])
134 |         ```
135 |         The order you supply the symbols is the same order
136 |         you should supply the features when calling
137 |         the function `f` (shape `[nrows, nfeatures]`).
138 |         In this case, features=2 for x and y.
139 |         The `params` in this case will be
140 |         `jnp.array([1.0, 3.2])`. You pass these parameters
141 |         when calling the function, which will let you change them
142 |         and take gradients.
143 | 
144 |         Let's generate some JAX data to pass:
145 |         ```python
146 |         key = random.PRNGKey(0)
147 |         X = random.normal(key, (10, 2))
148 |         ```
149 | 
150 |         We can call the function with:
151 |         ```python
152 |         f(X, params)
153 | 
154 |         #> DeviceArray([-2.6080756 ,  0.72633684, -6.7557726 , -0.2963162 ,
155 |         #                6.6014843 ,  5.032483  , -0.810931  ,  4.2520013 ,
156 |         #                3.5427954 , -2.7479894 ], dtype=float32)
157 |         ```
158 | 
159 |         We can take gradients with respect
160 |         to the parameters for each row with JAX
161 |         gradient parameters now:
162 |         ```python
163 |         jac_f = jax.jacobian(f, argnums=1)
164 |         jac_f(X, params)
165 | 
166 |         #> DeviceArray([[ 0.49364874, -0.9692889 ],
167 |         #               [ 0.8283714 , -0.0318858 ],
168 |         #               [-0.7447336 , -1.8784496 ],
169 |         #               [ 0.70755106, -0.3137085 ],
170 |         #               [ 0.944834  ,  1.767703  ],
171 |         #               [ 0.51673377,  1.4111717 ],
172 |         #               [ 0.87347716, -0.52637756],
173 |         #               [ 0.8760679 ,  1.0549792 ],
174 |         #               [ 0.9961824 ,  0.79581654],
175 |         #               [-0.88465923, -0.5822907 ]], dtype=float32)
176 |         ```
177 | 
178 |         We can also JIT-compile our function:
179 |         ```python
180 |         compiled_f = jax.jit(f)
181 |         compiled_f(X, params)
182 | 
183 |         #> DeviceArray([-2.6080756 ,  0.72633684, -6.7557726 , -0.2963162 ,
184 |         #                6.6014843 ,  5.032483  , -0.810931  ,  4.2520013 ,
185 |         #                3.5427954 , -2.7479894 ], dtype=float32)
186 |         ```
187 |     """
188 |     _initialize_jax()
189 |     global jax_initialized
190 |     global jax
191 |     global jnp
192 |     global jsp
193 | 
194 |     parameters = []
195 |     functional_form_text = sympy2jaxtext(
196 |         expression, parameters, symbols_in, extra_jax_mappings
197 |     )
198 |     hash_string = "A_" + str(abs(hash(str(expression) + str(symbols_in))))
199 |     text = f"def {hash_string}(X, parameters):\n"
200 |     if selection is not None:
201 |         # Impose the feature selection:
202 |         text += f"    X = X[:, {list(selection)}]\n"
203 |     text += "    return "
204 |     text += functional_form_text
205 |     ldict = {}
206 |     exec(text, globals(), ldict)
207 |     return ldict[hash_string], jnp.array(parameters)
208 | 


--------------------------------------------------------------------------------
/pysr/export_latex.py:
--------------------------------------------------------------------------------
  1 | """Functions to help export PySR equations to LaTeX."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import pandas as pd
  6 | import sympy  # type: ignore
  7 | from sympy.printing.latex import LatexPrinter  # type: ignore
  8 | 
  9 | 
 10 | class PreciseLatexPrinter(LatexPrinter):
 11 |     """Modified SymPy printer with custom float precision."""
 12 | 
 13 |     def __init__(self, settings=None, prec=3):
 14 |         super().__init__(settings)
 15 |         self.prec = prec
 16 | 
 17 |     def _print_Float(self, expr):
 18 |         # Reduce precision of float:
 19 |         reduced_float = sympy.Float(expr, self.prec)
 20 |         return super()._print_Float(reduced_float)
 21 | 
 22 | 
 23 | def sympy2latex(expr, prec=3, full_prec=True, **settings) -> str:
 24 |     """Convert sympy expression to LaTeX with custom precision."""
 25 |     settings["full_prec"] = full_prec
 26 |     printer = PreciseLatexPrinter(settings=settings, prec=prec)
 27 |     return str(printer.doprint(expr))
 28 | 
 29 | 
 30 | def generate_table_environment(
 31 |     columns: list[str] = ["equation", "complexity", "loss"]
 32 | ) -> tuple[str, str]:
 33 |     margins = "c" * len(columns)
 34 |     column_map = {
 35 |         "complexity": "Complexity",
 36 |         "loss": "Loss",
 37 |         "equation": "Equation",
 38 |         "score": "Score",
 39 |     }
 40 |     columns = [column_map[col] for col in columns]
 41 |     top_pieces = [
 42 |         r"\begin{table}[h]",
 43 |         r"\begin{center}",
 44 |         r"\begin{tabular}{@{}" + margins + r"@{}}",
 45 |         r"\toprule",
 46 |         " & ".join(columns) + r" \\",
 47 |         r"\midrule",
 48 |     ]
 49 | 
 50 |     bottom_pieces = [
 51 |         r"\bottomrule",
 52 |         r"\end{tabular}",
 53 |         r"\end{center}",
 54 |         r"\end{table}",
 55 |     ]
 56 |     top_latex_table = "\n".join(top_pieces)
 57 |     bottom_latex_table = "\n".join(bottom_pieces)
 58 | 
 59 |     return top_latex_table, bottom_latex_table
 60 | 
 61 | 
 62 | def sympy2latextable(
 63 |     equations: pd.DataFrame,
 64 |     indices: list[int] | None = None,
 65 |     precision: int = 3,
 66 |     columns: list[str] = ["equation", "complexity", "loss", "score"],
 67 |     max_equation_length: int = 50,
 68 |     output_variable_name: str = "y",
 69 | ) -> str:
 70 |     """Generate a booktabs-style LaTeX table for a single set of equations."""
 71 |     assert isinstance(equations, pd.DataFrame)
 72 | 
 73 |     latex_top, latex_bottom = generate_table_environment(columns)
 74 |     latex_table_content = []
 75 | 
 76 |     if indices is None:
 77 |         indices = list(equations.index)
 78 | 
 79 |     for i in indices:
 80 |         latex_equation = sympy2latex(
 81 |             equations.iloc[i]["sympy_format"],
 82 |             prec=precision,
 83 |         )
 84 |         complexity = str(equations.iloc[i]["complexity"])
 85 |         loss = sympy2latex(
 86 |             sympy.Float(equations.iloc[i]["loss"]),
 87 |             prec=precision,
 88 |         )
 89 |         score = sympy2latex(
 90 |             sympy.Float(equations.iloc[i]["score"]),
 91 |             prec=precision,
 92 |         )
 93 | 
 94 |         row_pieces = []
 95 |         for col in columns:
 96 |             if col == "equation":
 97 |                 if len(latex_equation) < max_equation_length:
 98 |                     row_pieces.append(
 99 |                         "$" + output_variable_name + " = " + latex_equation + "$"
100 |                     )
101 |                 else:
102 |                     broken_latex_equation = " ".join(
103 |                         [
104 |                             r"\begin{minipage}{0.8\linewidth}",
105 |                             r"\vspace{-1em}",
106 |                             r"\begin{dmath*}",
107 |                             output_variable_name + " = " + latex_equation,
108 |                             r"\end{dmath*}",
109 |                             r"\end{minipage}",
110 |                         ]
111 |                     )
112 |                     row_pieces.append(broken_latex_equation)
113 | 
114 |             elif col == "complexity":
115 |                 row_pieces.append("$" + complexity + "$")
116 |             elif col == "loss":
117 |                 row_pieces.append("$" + loss + "$")
118 |             elif col == "score":
119 |                 row_pieces.append("$" + score + "$")
120 |             else:
121 |                 raise ValueError(f"Unknown column: {col}")
122 | 
123 |         latex_table_content.append(
124 |             " & ".join(row_pieces) + r" \\",
125 |         )
126 | 
127 |     return "\n".join([latex_top, *latex_table_content, latex_bottom])
128 | 
129 | 
130 | def sympy2multilatextable(
131 |     equations: list[pd.DataFrame],
132 |     indices: list[list[int]] | None = None,
133 |     precision: int = 3,
134 |     columns: list[str] = ["equation", "complexity", "loss", "score"],
135 |     output_variable_names: list[str] | None = None,
136 | ) -> str:
137 |     """Generate multiple latex tables for a list of equation sets."""
138 |     # TODO: Let user specify custom output variable
139 | 
140 |     latex_tables = [
141 |         sympy2latextable(
142 |             equations[i],
143 |             (None if not indices else indices[i]),
144 |             precision=precision,
145 |             columns=columns,
146 |             output_variable_name=(
147 |                 "y_{" + str(i) + "}"
148 |                 if output_variable_names is None
149 |                 else output_variable_names[i]
150 |             ),
151 |         )
152 |         for i in range(len(equations))
153 |     ]
154 | 
155 |     return "\n\n".join(latex_tables)
156 | 
157 | 
158 | def with_preamble(table_string: str) -> str:
159 |     preamble_string = [
160 |         r"\usepackage{breqn}",
161 |         r"\usepackage{booktabs}",
162 |         "",
163 |         "...",
164 |         "",
165 |         table_string,
166 |     ]
167 |     return "\n".join(preamble_string)
168 | 


--------------------------------------------------------------------------------
/pysr/export_numpy.py:
--------------------------------------------------------------------------------
 1 | """Code for exporting discovered expressions to numpy"""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import warnings
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | from numpy.typing import NDArray
10 | from sympy import Expr, Symbol, lambdify  # type: ignore
11 | 
12 | 
13 | def sympy2numpy(eqn, sympy_symbols, *, selection=None):
14 |     return CallableEquation(eqn, sympy_symbols, selection=selection)
15 | 
16 | 
17 | class CallableEquation:
18 |     """Simple wrapper for numpy lambda functions built with sympy"""
19 | 
20 |     _sympy: Expr
21 |     _sympy_symbols: list[Symbol]
22 |     _selection: NDArray[np.bool_] | None
23 | 
24 |     def __init__(self, eqn, sympy_symbols, selection=None):
25 |         self._sympy = eqn
26 |         self._sympy_symbols = sympy_symbols
27 |         self._selection = selection
28 | 
29 |     def __repr__(self):
30 |         return f"PySRFunction(X=>{self._sympy})"
31 | 
32 |     def __call__(self, X):
33 |         expected_shape = (X.shape[0],)
34 |         if isinstance(X, pd.DataFrame):
35 |             # Lambda function takes as argument:
36 |             return self._lambda(
37 |                 **{k: X[k].values for k in map(str, self._sympy_symbols)}
38 |             ) * np.ones(expected_shape)
39 | 
40 |         if self._selection is not None:
41 |             if X.shape[1] != self._selection.sum():
42 |                 warnings.warn(
43 |                     "`X` should be of shape (n_samples, len(self._selection)). "
44 |                     "Automatically filtering `X` to selection. "
45 |                     "Note: Filtered `X` column order may not match column order in fit "
46 |                     "this may lead to incorrect predictions and other errors."
47 |                 )
48 |                 X = X[:, self._selection]
49 | 
50 |         return self._lambda(*X.T) * np.ones(expected_shape)
51 | 
52 |     @property
53 |     def _lambda(self):
54 |         return lambdify(self._sympy_symbols, self._sympy)
55 | 


--------------------------------------------------------------------------------
/pysr/export_sympy.py:
--------------------------------------------------------------------------------
  1 | """Define utilities to export to sympy"""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from collections.abc import Callable
  6 | 
  7 | import sympy  # type: ignore
  8 | from sympy import sympify
  9 | from sympy.codegen.cfunctions import log2, log10  # type: ignore
 10 | 
 11 | from .utils import ArrayLike
 12 | 
 13 | sympy_mappings = {
 14 |     "div": lambda x, y: x / y,
 15 |     "inv": lambda x: 1 / x,
 16 |     "mult": lambda x, y: x * y,
 17 |     "sqrt": lambda x: sympy.sqrt(x),
 18 |     "sqrt_abs": lambda x: sympy.sqrt(abs(x)),
 19 |     "cbrt": lambda x: sympy.sign(x) * sympy.cbrt(abs(x)),
 20 |     "square": lambda x: x**2,
 21 |     "cube": lambda x: x**3,
 22 |     "plus": lambda x, y: x + y,
 23 |     "sub": lambda x, y: x - y,
 24 |     "neg": lambda x: -x,
 25 |     "pow": lambda x, y: x**y,
 26 |     "pow_abs": lambda x, y: abs(x) ** y,
 27 |     "cos": sympy.cos,
 28 |     "sin": sympy.sin,
 29 |     "tan": sympy.tan,
 30 |     "cosh": sympy.cosh,
 31 |     "sinh": sympy.sinh,
 32 |     "tanh": sympy.tanh,
 33 |     "exp": sympy.exp,
 34 |     "acos": sympy.acos,
 35 |     "asin": sympy.asin,
 36 |     "atan": sympy.atan,
 37 |     "acosh": lambda x: sympy.acosh(x),
 38 |     "acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
 39 |     "asinh": sympy.asinh,
 40 |     "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
 41 |     "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
 42 |     "abs": abs,
 43 |     "mod": sympy.Mod,
 44 |     "erf": sympy.erf,
 45 |     "erfc": sympy.erfc,
 46 |     "log": lambda x: sympy.log(x),
 47 |     "log10": lambda x: log10(x),
 48 |     "log2": lambda x: log2(x),
 49 |     "log1p": lambda x: sympy.log(x + 1),
 50 |     "log_abs": lambda x: sympy.log(abs(x)),
 51 |     "log10_abs": lambda x: sympy.log(abs(x), 10),
 52 |     "log2_abs": lambda x: sympy.log(abs(x), 2),
 53 |     "log1p_abs": lambda x: sympy.log(abs(x) + 1),
 54 |     "floor": sympy.floor,
 55 |     "ceil": sympy.ceiling,
 56 |     "sign": sympy.sign,
 57 |     "gamma": sympy.gamma,
 58 |     "round": lambda x: sympy.ceiling(x - 0.5),
 59 |     "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)),
 60 |     "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)),
 61 |     "greater": lambda x, y: sympy.Piecewise((1.0, x > y), (0.0, True)),
 62 |     "less": lambda x, y: sympy.Piecewise((1.0, x < y), (0.0, True)),
 63 |     "greater_equal": lambda x, y: sympy.Piecewise((1.0, x >= y), (0.0, True)),
 64 |     "less_equal": lambda x, y: sympy.Piecewise((1.0, x <= y), (0.0, True)),
 65 |     "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)),
 66 |     "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)),
 67 |     "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)),
 68 |     "relu": lambda x: sympy.Piecewise((0.0, x < 0), (x, True)),
 69 | }
 70 | 
 71 | 
 72 | def create_sympy_symbols_map(
 73 |     feature_names_in: ArrayLike[str],
 74 | ) -> dict[str, sympy.Symbol]:
 75 |     return {variable: sympy.Symbol(variable) for variable in feature_names_in}
 76 | 
 77 | 
 78 | def create_sympy_symbols(
 79 |     feature_names_in: ArrayLike[str],
 80 | ) -> list[sympy.Symbol]:
 81 |     return [sympy.Symbol(variable) for variable in feature_names_in]
 82 | 
 83 | 
 84 | def pysr2sympy(
 85 |     equation: str | float | int,
 86 |     *,
 87 |     feature_names_in: ArrayLike[str] | None = None,
 88 |     extra_sympy_mappings: dict[str, Callable] | None = None,
 89 | ):
 90 |     if feature_names_in is None:
 91 |         feature_names_in = []
 92 |     local_sympy_mappings = {
 93 |         **create_sympy_symbols_map(feature_names_in),
 94 |         **sympy_mappings,
 95 |         **(extra_sympy_mappings if extra_sympy_mappings is not None else {}),
 96 |     }
 97 | 
 98 |     try:
 99 |         return sympify(equation, locals=local_sympy_mappings, evaluate=False)
100 |     except TypeError as e:
101 |         if "got an unexpected keyword argument 'evaluate'" in str(e):
102 |             return sympify(equation, locals=local_sympy_mappings)
103 |         raise TypeError(f"Error processing equation '{equation}'") from e
104 | 
105 | 
106 | def assert_valid_sympy_symbol(var_name: str) -> None:
107 |     if var_name in sympy_mappings or var_name in sympy.__dict__.keys():
108 |         raise ValueError(f"Variable name {var_name} is already a function name.")
109 | 


--------------------------------------------------------------------------------
/pysr/export_torch.py:
--------------------------------------------------------------------------------
  1 | # Fork of https://github.com/patrick-kidger/sympytorch
  2 | 
  3 | import collections as co
  4 | import functools as ft
  5 | 
  6 | import numpy as np  # noqa: F401
  7 | import sympy  # type: ignore
  8 | from sympy.codegen.cfunctions import log2, log10  # type: ignore
  9 | 
 10 | 
 11 | def _reduce(fn):
 12 |     def fn_(*args):
 13 |         return ft.reduce(fn, args)
 14 | 
 15 |     return fn_
 16 | 
 17 | 
 18 | torch_initialized = False
 19 | torch = None
 20 | SingleSymPyModule = None
 21 | 
 22 | 
 23 | def _initialize_torch():
 24 |     global torch_initialized
 25 |     global torch
 26 |     global SingleSymPyModule
 27 | 
 28 |     # Way to lazy load torch, only if this is called,
 29 |     # but still allow this module to be loaded in __init__
 30 |     if not torch_initialized:
 31 |         import torch as _torch
 32 | 
 33 |         torch = _torch
 34 | 
 35 |         _global_func_lookup = {
 36 |             sympy.Mul: _reduce(torch.mul),
 37 |             sympy.Add: _reduce(torch.add),
 38 |             sympy.div: torch.div,
 39 |             sympy.Abs: torch.abs,
 40 |             sympy.sign: torch.sign,
 41 |             # Note: May raise error for ints.
 42 |             sympy.ceiling: torch.ceil,
 43 |             sympy.floor: torch.floor,
 44 |             sympy.log: torch.log,
 45 |             log2: torch.log2,
 46 |             log10: torch.log10,
 47 |             sympy.exp: torch.exp,
 48 |             sympy.sqrt: torch.sqrt,
 49 |             sympy.cos: torch.cos,
 50 |             sympy.acos: torch.acos,
 51 |             sympy.sin: torch.sin,
 52 |             sympy.asin: torch.asin,
 53 |             sympy.tan: torch.tan,
 54 |             sympy.atan: torch.atan,
 55 |             sympy.atan2: torch.atan2,
 56 |             # Note: May give NaN for complex results.
 57 |             sympy.cosh: torch.cosh,
 58 |             sympy.acosh: torch.acosh,
 59 |             sympy.sinh: torch.sinh,
 60 |             sympy.asinh: torch.asinh,
 61 |             sympy.tanh: torch.tanh,
 62 |             sympy.atanh: torch.atanh,
 63 |             sympy.Pow: torch.pow,
 64 |             sympy.re: torch.real,
 65 |             sympy.im: torch.imag,
 66 |             sympy.arg: torch.angle,
 67 |             # Note: May raise error for ints and complexes
 68 |             sympy.erf: torch.erf,
 69 |             sympy.loggamma: torch.lgamma,
 70 |             sympy.Eq: torch.eq,
 71 |             sympy.Ne: torch.ne,
 72 |             sympy.StrictGreaterThan: torch.gt,
 73 |             sympy.StrictLessThan: torch.lt,
 74 |             sympy.LessThan: torch.le,
 75 |             sympy.GreaterThan: torch.ge,
 76 |             sympy.And: torch.logical_and,
 77 |             sympy.Or: torch.logical_or,
 78 |             sympy.Not: torch.logical_not,
 79 |             sympy.Max: torch.max,
 80 |             sympy.Min: torch.min,
 81 |             sympy.Mod: torch.remainder,
 82 |             sympy.Heaviside: torch.heaviside,
 83 |             sympy.core.numbers.Half: (lambda: 0.5),
 84 |             sympy.core.numbers.One: (lambda: 1.0),
 85 |         }
 86 | 
 87 |         class _Node(torch.nn.Module):
 88 |             """Forked from https://github.com/patrick-kidger/sympytorch"""
 89 | 
 90 |             def __init__(self, *, expr, _memodict, _func_lookup, **kwargs):
 91 |                 super().__init__(**kwargs)
 92 | 
 93 |                 self._sympy_func = expr.func
 94 | 
 95 |                 if issubclass(expr.func, sympy.Float):
 96 |                     self._value = torch.nn.Parameter(torch.tensor(float(expr)))
 97 |                     self._torch_func = lambda: self._value
 98 |                     self._args = ()
 99 |                 elif issubclass(expr.func, sympy.Rational):
100 |                     # This is some fraction fixed in the operator.
101 |                     self._value = float(expr)
102 |                     self._torch_func = lambda: self._value
103 |                     self._args = ()
104 |                 elif issubclass(expr.func, sympy.UnevaluatedExpr):
105 |                     if len(expr.args) != 1 or not issubclass(
106 |                         expr.args[0].func, sympy.Float
107 |                     ):
108 |                         raise ValueError(
109 |                             "UnevaluatedExpr should only be used to wrap floats."
110 |                         )
111 |                     self.register_buffer("_value", torch.tensor(float(expr.args[0])))
112 |                     self._torch_func = lambda: self._value
113 |                     self._args = ()
114 |                 elif issubclass(expr.func, sympy.Integer):
115 |                     # Can get here if expr is one of the Integer special cases,
116 |                     # e.g. NegativeOne
117 |                     self._value = int(expr)
118 |                     self._torch_func = lambda: self._value
119 |                     self._args = ()
120 |                 elif issubclass(expr.func, sympy.NumberSymbol):
121 |                     # Can get here from exp(1) or exact pi
122 |                     self._value = float(expr)
123 |                     self._torch_func = lambda: self._value
124 |                     self._args = ()
125 |                 elif issubclass(expr.func, sympy.Symbol):
126 |                     self._name = expr.name
127 |                     self._torch_func = lambda value: value
128 |                     self._args = ((lambda memodict: memodict[expr.name]),)
129 |                 else:
130 |                     try:
131 |                         self._torch_func = _func_lookup[expr.func]
132 |                     except KeyError:
133 |                         raise KeyError(
134 |                             f"Function {expr.func} was not found in Torch function mappings."
135 |                             "Please add it to extra_torch_mappings in the format, e.g., "
136 |                             "{sympy.sqrt: torch.sqrt}."
137 |                         )
138 |                     args = []
139 |                     for arg in expr.args:
140 |                         try:
141 |                             arg_ = _memodict[arg]
142 |                         except KeyError:
143 |                             arg_ = type(self)(
144 |                                 expr=arg,
145 |                                 _memodict=_memodict,
146 |                                 _func_lookup=_func_lookup,
147 |                                 **kwargs,
148 |                             )
149 |                             _memodict[arg] = arg_
150 |                         args.append(arg_)
151 |                     self._args = torch.nn.ModuleList(args)
152 | 
153 |             def forward(self, memodict):
154 |                 args = []
155 |                 for arg in self._args:
156 |                     try:
157 |                         arg_ = memodict[arg]
158 |                     except KeyError:
159 |                         arg_ = arg(memodict)
160 |                         memodict[arg] = arg_
161 |                     args.append(arg_)
162 |                 return self._torch_func(*args)
163 | 
164 |         class _SingleSymPyModule(torch.nn.Module):
165 |             """Forked from https://github.com/patrick-kidger/sympytorch"""
166 | 
167 |             def __init__(
168 |                 self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs
169 |             ):
170 |                 super().__init__(**kwargs)
171 | 
172 |                 if extra_funcs is None:
173 |                     extra_funcs = {}
174 |                 _func_lookup = co.ChainMap(_global_func_lookup, extra_funcs)
175 | 
176 |                 _memodict = {}
177 |                 self._node = _Node(
178 |                     expr=expression, _memodict=_memodict, _func_lookup=_func_lookup
179 |                 )
180 |                 self._expression_string = str(expression)
181 |                 self._selection = selection
182 |                 self.symbols_in = [str(symbol) for symbol in symbols_in]
183 | 
184 |             def __repr__(self):
185 |                 return f"{type(self).__name__}(expression={self._expression_string})"
186 | 
187 |             def forward(self, X):
188 |                 if self._selection is not None:
189 |                     X = X[:, self._selection]
190 |                 symbols = {symbol: X[:, i] for i, symbol in enumerate(self.symbols_in)}
191 |                 return self._node(symbols)
192 | 
193 |         SingleSymPyModule = _SingleSymPyModule
194 | 
195 | 
196 | def sympy2torch(expression, symbols_in, selection=None, extra_torch_mappings=None):
197 |     """Returns a module for a given sympy expression with trainable parameters;
198 | 
199 |     This function will assume the input to the module is a matrix X, where
200 |         each column corresponds to each symbol you pass in `symbols_in`.
201 |     """
202 |     global SingleSymPyModule
203 | 
204 |     _initialize_torch()
205 | 
206 |     return SingleSymPyModule(
207 |         expression, symbols_in, selection=selection, extra_funcs=extra_torch_mappings
208 |     )
209 | 


--------------------------------------------------------------------------------
/pysr/feature_selection.py:
--------------------------------------------------------------------------------
 1 | """Functions for doing feature selection during preprocessing."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import logging
 6 | from typing import cast
 7 | 
 8 | import numpy as np
 9 | from numpy import ndarray
10 | from numpy.typing import NDArray
11 | 
12 | from .utils import ArrayLike
13 | 
14 | pysr_logger = logging.getLogger(__name__)
15 | 
16 | 
17 | def run_feature_selection(
18 |     X: ndarray,
19 |     y: ndarray,
20 |     select_k_features: int,
21 |     random_state: np.random.RandomState | None = None,
22 | ) -> NDArray[np.bool_]:
23 |     """
24 |     Find most important features.
25 | 
26 |     Uses a gradient boosting tree regressor as a proxy for finding
27 |     the k most important features in X, returning indices for those
28 |     features as output.
29 |     """
30 |     from sklearn.ensemble import RandomForestRegressor
31 |     from sklearn.feature_selection import SelectFromModel
32 | 
33 |     clf = RandomForestRegressor(
34 |         n_estimators=100, max_depth=3, random_state=random_state
35 |     )
36 |     clf.fit(X, y)
37 |     selector = SelectFromModel(
38 |         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
39 |     )
40 |     return cast(NDArray[np.bool_], selector.get_support(indices=False))
41 | 
42 | 
43 | # Function has not been removed only due to usage in module tests
44 | def _handle_feature_selection(
45 |     X: ndarray,
46 |     select_k_features: int | None,
47 |     y: ndarray,
48 |     variable_names: ArrayLike[str],
49 | ):
50 |     if select_k_features is not None:
51 |         selection = run_feature_selection(X, y, select_k_features)
52 |         pysr_logger.info(f"Using features {[variable_names[i] for i in selection]}")
53 |         X = X[:, selection]
54 |     else:
55 |         selection = None
56 | 
57 |     return X, selection
58 | 


--------------------------------------------------------------------------------
/pysr/julia_extensions.py:
--------------------------------------------------------------------------------
 1 | """This file installs and loads extensions for SymbolicRegression."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Literal
 6 | 
 7 | from .julia_import import Pkg, jl
 8 | from .julia_registry_helpers import try_with_registry_fallback
 9 | from .logger_specs import AbstractLoggerSpec, TensorBoardLoggerSpec
10 | 
11 | 
12 | def load_required_packages(
13 |     *,
14 |     turbo: bool = False,
15 |     bumper: bool = False,
16 |     autodiff_backend: Literal["Zygote"] | None = None,
17 |     cluster_manager: str | None = None,
18 |     logger_spec: AbstractLoggerSpec | None = None,
19 | ):
20 |     if turbo:
21 |         load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890")
22 |     if bumper:
23 |         load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e")
24 |     if autodiff_backend is not None:
25 |         load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f")
26 |     if cluster_manager is not None:
27 |         load_package("ClusterManagers", "34f1f09b-3a8b-5176-ab39-66d58a4d544e")
28 |     if isinstance(logger_spec, TensorBoardLoggerSpec):
29 |         load_package("TensorBoardLogger", "899adc3e-224a-11e9-021f-63837185c80f")
30 | 
31 | 
32 | def load_all_packages():
33 |     """Install and load all Julia extensions available to PySR."""
34 |     load_required_packages(
35 |         turbo=True,
36 |         bumper=True,
37 |         autodiff_backend="Zygote",
38 |         cluster_manager="slurm",
39 |         logger_spec=TensorBoardLoggerSpec(log_dir="logs"),
40 |     )
41 | 
42 | 
43 | # TODO: Refactor this file so we can install all packages at once using `juliapkg`,
44 | #       ideally parameterizable via the regular Python extras API
45 | 
46 | 
47 | def isinstalled(uuid_s: str):
48 |     return jl.haskey(Pkg.dependencies(), jl.Base.UUID(uuid_s))
49 | 
50 | 
51 | def load_package(package_name: str, uuid_s: str) -> None:
52 |     if not isinstalled(uuid_s):
53 | 
54 |         def _add_package():
55 |             Pkg.add(name=package_name, uuid=uuid_s)
56 |             Pkg.resolve()
57 | 
58 |         try_with_registry_fallback(_add_package)
59 | 
60 |     # TODO: Protect against loading the same symbol from two packages,
61 |     #       maybe with a @gensym here.
62 |     jl.seval(f"using {package_name}: {package_name}")
63 |     return None
64 | 


--------------------------------------------------------------------------------
/pysr/julia_helpers.py:
--------------------------------------------------------------------------------
 1 | """Functions for initializing the Julia environment and installing deps."""
 2 | 
 3 | from typing import Any, Callable, cast, overload
 4 | 
 5 | import numpy as np
 6 | from juliacall import convert as jl_convert  # type: ignore
 7 | from numpy.typing import NDArray
 8 | 
 9 | from .deprecated import init_julia, install
10 | from .julia_import import AnyValue, jl
11 | 
12 | jl_convert = cast(Callable[[Any, Any], Any], jl_convert)
13 | 
14 | jl.seval("using Serialization: Serialization")
15 | jl.seval("using PythonCall: PythonCall")
16 | 
17 | Serialization = jl.Serialization
18 | PythonCall = jl.PythonCall
19 | 
20 | jl.seval("using SymbolicRegression: plus, sub, mult, div, pow")
21 | 
22 | 
23 | def _escape_filename(filename):
24 |     """Turn a path into a string with correctly escaped backslashes."""
25 |     if filename is None:
26 |         return None
27 |     str_repr = str(filename)
28 |     str_repr = str_repr.replace("\\", "\\\\")
29 |     return str_repr
30 | 
31 | 
32 | def _load_cluster_manager(cluster_manager: str):
33 |     jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}")
34 |     return jl.seval(f"addprocs_{cluster_manager}")
35 | 
36 | 
37 | def jl_array(x, dtype=None):
38 |     if x is None:
39 |         return None
40 |     elif dtype is None:
41 |         return jl_convert(jl.Array, x)
42 |     else:
43 |         return jl_convert(jl.Array[dtype], x)
44 | 
45 | 
46 | def jl_dict(x):
47 |     return jl_convert(jl.Dict, x)
48 | 
49 | 
50 | def jl_is_function(f) -> bool:
51 |     return cast(bool, jl.seval("op -> op isa Function")(f))
52 | 
53 | 
54 | def jl_serialize(obj: Any) -> NDArray[np.uint8]:
55 |     buf = jl.IOBuffer()
56 |     Serialization.serialize(buf, obj)
57 |     return np.array(jl.take_b(buf))
58 | 
59 | 
60 | @overload
61 | def jl_deserialize(s: NDArray[np.uint8]) -> AnyValue: ...
62 | @overload
63 | def jl_deserialize(s: None) -> None: ...
64 | def jl_deserialize(s):
65 |     if s is None:
66 |         return s
67 |     buf = jl.IOBuffer()
68 |     jl.write(buf, jl_array(s))
69 |     jl.seekstart(buf)
70 |     return Serialization.deserialize(buf)
71 | 


--------------------------------------------------------------------------------
/pysr/julia_import.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import warnings
 4 | from types import ModuleType
 5 | from typing import cast
 6 | 
 7 | from .julia_registry_helpers import try_with_registry_fallback
 8 | 
 9 | # Check if JuliaCall is already loaded, and if so, warn the user
10 | # about the relevant environment variables. If not loaded,
11 | # set up sensible defaults.
12 | if "juliacall" in sys.modules:
13 |     warnings.warn(
14 |         "juliacall module already imported. "
15 |         "Make sure that you have set the environment variable `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` to avoid segfaults. "
16 |         "Also note that PySR will not be able to configure `PYTHON_JULIACALL_THREADS` or `PYTHON_JULIACALL_OPTLEVEL` for you."
17 |     )
18 | else:
19 |     # Required to avoid segfaults (https://juliapy.github.io/PythonCall.jl/dev/faq/)
20 |     if os.environ.get("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes") != "yes":
21 |         warnings.warn(
22 |             "PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set to something other than 'yes' or ''. "
23 |             + "You will experience segfaults if running with multithreading."
24 |         )
25 | 
26 |     if os.environ.get("PYTHON_JULIACALL_THREADS", "auto") != "auto":
27 |         warnings.warn(
28 |             "PYTHON_JULIACALL_THREADS environment variable is set to something other than 'auto', "
29 |             "so PySR was not able to set it. You may wish to set it to `'auto'` for full use "
30 |             "of your CPU."
31 |         )
32 | 
33 |     # TODO: Remove these when juliapkg lets you specify this
34 |     for k, default in (
35 |         ("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes"),
36 |         ("PYTHON_JULIACALL_THREADS", "auto"),
37 |         ("PYTHON_JULIACALL_OPTLEVEL", "3"),
38 |     ):
39 |         os.environ[k] = os.environ.get(k, default)
40 | 
41 | 
42 | autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS")
43 | if autoload_extensions is not None:
44 |     # Deprecated; so just pass to juliacall
45 |     os.environ["PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION"] = autoload_extensions
46 | 
47 | 
48 | def _import_juliacall():
49 |     import juliacall  # type: ignore
50 | 
51 | 
52 | try_with_registry_fallback(_import_juliacall)
53 | 
54 | 
55 | from juliacall import AnyValue  # type: ignore
56 | from juliacall import VectorValue  # type: ignore
57 | from juliacall import Main as jl  # type: ignore
58 | 
59 | jl = cast(ModuleType, jl)
60 | 
61 | 
62 | jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
63 | 
64 | jl.seval("using SymbolicRegression")
65 | SymbolicRegression = jl.SymbolicRegression
66 | 
67 | # Expose `D` operator:
68 | jl.seval("using SymbolicRegression: D")
69 | 
70 | # Expose other operators:
71 | jl.seval("using SymbolicRegression: less, greater_equal, less_equal")
72 | 
73 | jl.seval("using Pkg: Pkg")
74 | Pkg = jl.Pkg
75 | 


--------------------------------------------------------------------------------
/pysr/julia_registry_helpers.py:
--------------------------------------------------------------------------------
 1 | """Utilities for managing Julia registry preferences during package operations."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import os
 6 | import warnings
 7 | from collections.abc import Callable
 8 | from typing import TypeVar
 9 | 
10 | T = TypeVar("T")
11 | 
12 | PREFERENCE_KEY = "JULIA_PKG_SERVER_REGISTRY_PREFERENCE"
13 | 
14 | 
15 | def try_with_registry_fallback(f: Callable[..., T], *args, **kwargs) -> T:
16 |     """Execute function with modified Julia registry preference.
17 | 
18 |     First tries with existing registry preference. If that fails with a Julia registry error,
19 |     temporarily modifies the registry preference to 'eager'. Restores original preference after
20 |     execution.
21 |     """
22 |     try:
23 |         return f(*args, **kwargs)
24 |     except Exception as initial_error:
25 |         # Check if this is a Julia registry error by looking at the error message
26 |         if "JuliaError" not in str(
27 |             type(initial_error)
28 |         ) or "Unsatisfiable requirements detected" not in str(initial_error):
29 |             raise initial_error
30 | 
31 |         old_value = os.environ.get(PREFERENCE_KEY, None)
32 |         if old_value == "eager":
33 |             raise initial_error
34 | 
35 |         warnings.warn(
36 |             "Initial Julia registry operation failed. Attempting to use the `eager` registry flavor of the Julia "
37 |             + f"General registry from the Julia Pkg server (via the `{PREFERENCE_KEY}` environment variable)."
38 |         )
39 |         os.environ[PREFERENCE_KEY] = "eager"
40 |         try:
41 |             return f(*args, **kwargs)
42 |         finally:
43 |             if old_value is not None:
44 |                 os.environ[PREFERENCE_KEY] = old_value
45 |             else:
46 |                 del os.environ[PREFERENCE_KEY]
47 | 


--------------------------------------------------------------------------------
/pysr/juliapkg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "julia": "=1.10.0, 1.10.3",
 3 |     "packages": {
 4 |         "SymbolicRegression": {
 5 |             "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb",
 6 |             "version": "~1.11.0"
 7 |         },
 8 |         "Serialization": {
 9 |             "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b",
10 |             "version": "1"
11 |         }
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/pysr/logger_specs.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from dataclasses import dataclass
 5 | from typing import Any
 6 | 
 7 | from .julia_helpers import jl_array, jl_dict
 8 | from .julia_import import AnyValue, jl
 9 | 
10 | 
11 | class AbstractLoggerSpec(ABC):
12 |     """Abstract base class for logger specifications."""
13 | 
14 |     @abstractmethod
15 |     def create_logger(self) -> AnyValue:
16 |         """Create a logger instance."""
17 |         pass  # pragma: no cover
18 | 
19 |     @abstractmethod
20 |     def write_hparams(self, logger: AnyValue, hparams: dict[str, Any]) -> None:
21 |         """Write hyperparameters to the logger."""
22 |         pass  # pragma: no cover
23 | 
24 |     @abstractmethod
25 |     def close(self, logger: AnyValue) -> None:
26 |         """Close the logger instance."""
27 |         pass  # pragma: no cover
28 | 
29 | 
30 | @dataclass
31 | class TensorBoardLoggerSpec(AbstractLoggerSpec):
32 |     """Specification for TensorBoard logger.
33 | 
34 |     Parameters
35 |     ----------
36 |     log_dir : str
37 |         Directory where TensorBoard logs will be saved. If `overwrite` is `False`,
38 |         new logs will be saved to `{log_dir}_1`, and so on. Default is `"logs/run"`.
39 |     log_interval : int, optional
40 |         Interval (in steps) at which logs are written. Default is 10.
41 |     overwrite : bool, optional
42 |         Whether to overwrite existing logs in the directory. Default is False.
43 |     """
44 | 
45 |     log_dir: str = "logs/run"
46 |     log_interval: int = 1
47 |     overwrite: bool = False
48 | 
49 |     def create_logger(self) -> AnyValue:
50 |         # We assume that TensorBoardLogger is already imported via `julia_extensions.py`
51 |         make_logger = jl.seval(
52 |             """
53 |             function make_logger(log_dir::AbstractString, overwrite::Bool, log_interval::Int)
54 |                 base_logger = TensorBoardLogger.TBLogger(
55 |                     log_dir,
56 |                     (overwrite ? (TensorBoardLogger.tb_overwrite,) : ())...
57 |                 )
58 |                 return SRLogger(; logger=base_logger, log_interval)
59 |             end
60 |         """
61 |         )
62 |         log_dir = str(self.log_dir)
63 |         return make_logger(log_dir, self.overwrite, self.log_interval)
64 | 
65 |     def write_hparams(self, logger: AnyValue, hparams: dict[str, Any]) -> None:
66 |         base_logger = jl.SymbolicRegression.get_logger(logger)
67 |         writer = jl.seval("TensorBoardLogger.write_hparams!")
68 |         jl_clean_hparams = jl_dict(
69 |             {
70 |                 k: (v if isinstance(v, (bool, int, float)) else str(v))
71 |                 for k, v in hparams.items()
72 |             }
73 |         )
74 |         writer(
75 |             base_logger,
76 |             jl_clean_hparams,
77 |             jl_array(
78 |                 [
79 |                     "search/data/summaries/pareto_volume",
80 |                     "search/data/summaries/min_loss",
81 |                 ],
82 |             ),
83 |         )
84 | 
85 |     def close(self, logger: AnyValue) -> None:
86 |         base_logger = jl.SymbolicRegression.get_logger(logger)
87 |         jl.close(base_logger)
88 | 


--------------------------------------------------------------------------------
/pysr/param_groupings.yml:
--------------------------------------------------------------------------------
  1 | - The Algorithm:
  2 |   - Creating the Search Space:
  3 |     - binary_operators
  4 |     - unary_operators
  5 |     - expression_spec
  6 |     - maxsize
  7 |     - maxdepth
  8 |   - Setting the Search Size:
  9 |     - niterations
 10 |     - populations
 11 |     - population_size
 12 |     - ncycles_per_iteration
 13 |   - The Objective:
 14 |     - elementwise_loss
 15 |     - loss_function
 16 |     - loss_function_expression
 17 |     - loss_scale
 18 |     - model_selection
 19 |     - dimensional_constraint_penalty
 20 |     - dimensionless_constants_only
 21 |   - Working with Complexities:
 22 |     - parsimony
 23 |     - constraints
 24 |     - nested_constraints
 25 |     - complexity_of_operators
 26 |     - complexity_of_constants
 27 |     - complexity_of_variables
 28 |     - complexity_mapping
 29 |     - warmup_maxsize_by
 30 |     - use_frequency
 31 |     - use_frequency_in_tournament
 32 |     - adaptive_parsimony_scaling
 33 |     - should_simplify
 34 |   - Mutations:
 35 |     - weight_add_node
 36 |     - weight_insert_node
 37 |     - weight_delete_node
 38 |     - weight_do_nothing
 39 |     - weight_mutate_constant
 40 |     - weight_mutate_operator
 41 |     - weight_swap_operands
 42 |     - weight_rotate_tree
 43 |     - weight_randomize
 44 |     - weight_simplify
 45 |     - weight_optimize
 46 |     - crossover_probability
 47 |     - annealing
 48 |     - alpha
 49 |     - perturbation_factor
 50 |     - probability_negate_constant
 51 |     - skip_mutation_failures
 52 |   - Tournament Selection:
 53 |     - tournament_selection_n
 54 |     - tournament_selection_p
 55 |   - Constant Optimization:
 56 |     - optimizer_algorithm
 57 |     - optimizer_nrestarts
 58 |     - optimizer_f_calls_limit
 59 |     - optimize_probability
 60 |     - optimizer_iterations
 61 |     - should_optimize_constants
 62 |   - Migration between Populations:
 63 |     - fraction_replaced
 64 |     - fraction_replaced_hof
 65 |     - migration
 66 |     - hof_migration
 67 |     - topn
 68 | - Data Preprocessing:
 69 |   - denoise
 70 |   - select_k_features
 71 | - Stopping Criteria:
 72 |   - max_evals
 73 |   - timeout_in_seconds
 74 |   - early_stop_condition
 75 | - Performance and Parallelization:
 76 |   - parallelism
 77 |   - procs
 78 |   - cluster_manager
 79 |   - heap_size_hint_in_bytes
 80 |   - batching
 81 |   - batch_size
 82 |   - precision
 83 |   - fast_cycle
 84 |   - turbo
 85 |   - bumper
 86 |   - autodiff_backend
 87 | - Determinism:
 88 |   - random_state
 89 |   - deterministic
 90 |   - warm_start
 91 | - Monitoring:
 92 |   - verbosity
 93 |   - update_verbosity
 94 |   - print_precision
 95 |   - progress
 96 |   - logger_spec
 97 |   - input_stream
 98 | - Environment:
 99 |   - temp_equation_file
100 |   - tempdir
101 |   - delete_tempfiles
102 |   - update
103 | - Exporting the Results:
104 |   - output_directory
105 |   - run_id
106 |   - output_jax_format
107 |   - output_torch_format
108 |   - extra_sympy_mappings
109 |   - extra_torch_mappings
110 |   - extra_jax_mappings
111 | 


--------------------------------------------------------------------------------
/pysr/sklearn_monkeypatch.py:
--------------------------------------------------------------------------------
 1 | # Here, we monkey patch scikit-learn until this
 2 | # issue is fixed: https://github.com/scikit-learn/scikit-learn/issues/25922
 3 | from sklearn.utils import validation
 4 | 
 5 | 
 6 | def _ensure_no_complex_data(*args, **kwargs): ...
 7 | 
 8 | 
 9 | try:
10 |     validation._ensure_no_complex_data = _ensure_no_complex_data
11 | except AttributeError:  # pragma: no cover
12 |     ...
13 | 


--------------------------------------------------------------------------------
/pysr/test/__init__.py:
--------------------------------------------------------------------------------
 1 | from .test_cli import get_runtests as get_runtests_cli
 2 | from .test_dev import runtests as runtests_dev
 3 | from .test_jax import runtests as runtests_jax
 4 | from .test_main import runtests
 5 | from .test_startup import runtests as runtests_startup
 6 | from .test_torch import runtests as runtests_torch
 7 | 
 8 | __all__ = [
 9 |     "runtests",
10 |     "runtests_jax",
11 |     "runtests_torch",
12 |     "get_runtests_cli",
13 |     "runtests_startup",
14 |     "runtests_dev",
15 | ]
16 | 


--------------------------------------------------------------------------------
/pysr/test/__main__.py:
--------------------------------------------------------------------------------
 1 | """CLI for running PySR's test suite."""
 2 | 
 3 | import argparse
 4 | 
 5 | from . import *
 6 | 
 7 | if __name__ == "__main__":
 8 |     # Get args:
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument(
11 |         "test",
12 |         nargs="*",
13 |         help="DEPRECATED. Use `python -m pysr test [tests...]` instead.",
14 |     )
15 | 


--------------------------------------------------------------------------------
/pysr/test/generate_dev_juliapkg.py:
--------------------------------------------------------------------------------
 1 | # Example call:
 2 | ## python3 generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl
 3 | import json
 4 | import sys
 5 | 
 6 | juliapkg_json = sys.argv[1]
 7 | path_to_srjl = sys.argv[2]
 8 | 
 9 | with open(juliapkg_json, "r") as f:
10 |     juliapkg = json.load(f)
11 | 
12 | juliapkg["packages"]["SymbolicRegression"] = {
13 |     "uuid": juliapkg["packages"]["SymbolicRegression"]["uuid"],
14 |     "path": path_to_srjl,
15 |     "dev": True,
16 | }
17 | 
18 | with open(juliapkg_json, "w") as f:
19 |     json.dump(juliapkg, f, indent=4)
20 | 


--------------------------------------------------------------------------------
/pysr/test/nb_sanitize.cfg:
--------------------------------------------------------------------------------
1 | [pathnames]
2 | regex: /[a-zA-Z0-9_\- .\/]+/pysr/sr\.py
3 | replace: PATH
4 | 


--------------------------------------------------------------------------------
/pysr/test/params.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import os
 3 | import unittest
 4 | 
 5 | from pysr import PySRRegressor
 6 | 
 7 | DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 8 | DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
 9 | DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default
10 | DEFAULT_NCYCLES = DEFAULT_PARAMS["ncycles_per_iteration"].default
11 | 
12 | skip_if_beartype = unittest.skipIf(
13 |     os.environ.get("PYSR_USE_BEARTYPE", "0") == "1",
14 |     "Skipping because beartype would fail test",
15 | )
16 | 


--------------------------------------------------------------------------------
/pysr/test/test_cli.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from textwrap import dedent
 3 | 
 4 | from click import testing as click_testing
 5 | 
 6 | 
 7 | def get_runtests():
 8 |     # Lazy load to avoid circular imports.
 9 | 
10 |     from .._cli.main import pysr
11 | 
12 |     class TestCli(unittest.TestCase):
13 |         # TODO: Include test for custom project here.
14 |         def setUp(self):
15 |             self.cli_runner = click_testing.CliRunner()
16 | 
17 |         def test_help_on_all_commands(self):
18 |             expected = dedent(
19 |                 """
20 |                     Usage: pysr [OPTIONS] COMMAND [ARGS]...
21 | 
22 |                     Options:
23 |                       --help  Show this message and exit.
24 | 
25 |                     Commands:
26 |                       install  DEPRECATED (dependencies are now installed at import).
27 |                       test     Run parts of the PySR test suite.
28 |                 """
29 |             )
30 |             result = self.cli_runner.invoke(pysr, ["--help"])
31 |             self.assertEqual(result.output.strip(), expected.strip())
32 |             self.assertEqual(result.exit_code, 0)
33 | 
34 |         def test_help_on_install(self):
35 |             expected = dedent(
36 |                 """
37 |                 Usage: pysr install [OPTIONS]
38 | 
39 |                   DEPRECATED (dependencies are now installed at import).
40 | 
41 |                 Options:
42 |                   -p, --project TEXT
43 |                   -q, --quiet         Disable logging.
44 |                   --precompile
45 |                   --no-precompile
46 |                   --help              Show this message and exit.
47 |                 """
48 |             )
49 |             result = self.cli_runner.invoke(pysr, ["install", "--help"])
50 |             self.assertEqual(result.output.strip(), expected.strip())
51 |             self.assertEqual(result.exit_code, 0)
52 | 
53 |         def test_help_on_test(self):
54 |             expected = dedent(
55 |                 """
56 |                 Usage: pysr test [OPTIONS] TESTS
57 | 
58 |                   Run parts of the PySR test suite.
59 | 
60 |                   Choose from main, jax, torch, cli, dev, and startup. You can give multiple
61 |                   tests, separated by commas.
62 | 
63 |                 Options:
64 |                   -k TEXT  Filter expressions to select specific tests.
65 |                   --help   Show this message and exit.
66 |                 """
67 |             )
68 |             result = self.cli_runner.invoke(pysr, ["test", "--help"])
69 |             self.assertEqual(result.output.strip(), expected.strip())
70 |             self.assertEqual(result.exit_code, 0)
71 | 
72 |     def runtests(just_tests=False):
73 |         """Run all tests in cliTest.py."""
74 |         tests = [TestCli]
75 |         if just_tests:
76 |             return tests
77 |         loader = unittest.TestLoader()
78 |         suite = unittest.TestSuite()
79 |         for test in tests:
80 |             suite.addTests(loader.loadTestsFromTestCase(test))
81 |         runner = unittest.TextTestRunner()
82 |         return runner.run(suite)
83 | 
84 |     return runtests
85 | 


--------------------------------------------------------------------------------
/pysr/test/test_dev.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import unittest
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | class TestDev(unittest.TestCase):
 8 |     def test_simple_change_to_backend(self):
 9 |         """Test that we can use a development version of SymbolicRegression.jl"""
10 |         PYSR_TEST_JULIA_VERSION = os.environ.get("PYSR_TEST_JULIA_VERSION", "1.11")
11 |         PYSR_TEST_PYTHON_VERSION = os.environ.get("PYSR_TEST_PYTHON_VERSION", "3.12")
12 |         build_result = subprocess.run(
13 |             [
14 |                 "docker",
15 |                 "build",
16 |                 "-t",
17 |                 "pysr-dev",
18 |                 "--build-arg",
19 |                 f"JLVERSION={PYSR_TEST_JULIA_VERSION}",
20 |                 "--build-arg",
21 |                 f"PYVERSION={PYSR_TEST_PYTHON_VERSION}",
22 |                 "-f",
23 |                 "pysr/test/test_dev_pysr.dockerfile",
24 |                 ".",
25 |             ],
26 |             env=os.environ,
27 |             cwd=Path(__file__).parent.parent.parent,
28 |             universal_newlines=True,
29 |         )
30 |         self.assertEqual(build_result.returncode, 0)
31 |         test_result = subprocess.run(
32 |             [
33 |                 "docker",
34 |                 "run",
35 |                 "--rm",
36 |                 "pysr-dev",
37 |                 "python3",
38 |                 "-c",
39 |                 "from pysr import SymbolicRegression as SR; print(SR.__test_function())",
40 |             ],
41 |             stdout=subprocess.PIPE,
42 |             stderr=subprocess.PIPE,
43 |             env=os.environ,
44 |             cwd=Path(__file__).parent.parent.parent,
45 |         )
46 |         self.assertEqual(test_result.returncode, 0)
47 |         self.assertEqual(test_result.stdout.decode("utf-8").strip(), "2.3")
48 | 
49 | 
50 | def runtests(just_tests=False):
51 |     tests = [TestDev]
52 |     if just_tests:
53 |         return tests
54 |     suite = unittest.TestSuite()
55 |     loader = unittest.TestLoader()
56 |     for test in tests:
57 |         suite.addTests(loader.loadTestsFromTestCase(test))
58 |     runner = unittest.TextTestRunner()
59 |     return runner.run(suite)
60 | 


--------------------------------------------------------------------------------
/pysr/test/test_dev_pysr.dockerfile:
--------------------------------------------------------------------------------
 1 | # This dockerfile simulates a user installation that
 2 | # tries to manually edit SymbolicRegression.jl and
 3 | # use it from PySR.
 4 | 
 5 | ARG JLVERSION=1.11.1
 6 | ARG PYVERSION=3.12.6
 7 | ARG BASE_IMAGE=bullseye
 8 | 
 9 | FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl
10 | FROM python:${PYVERSION}-${BASE_IMAGE}
11 | 
12 | # Merge Julia image:
13 | COPY --from=jl /usr/local/julia /usr/local/julia
14 | ENV PATH="/usr/local/julia/bin:${PATH}"
15 | 
16 | WORKDIR /pysr
17 | 
18 | # Install PySR:
19 | # We do a minimal copy so it doesn't need to rerun at every file change:
20 | ADD ./pyproject.toml /pysr/pyproject.toml
21 | ADD ./LICENSE /pysr/LICENSE
22 | ADD ./README.md /pysr/README.md
23 | 
24 | RUN mkdir /pysr/pysr
25 | ADD ./pysr/*.py /pysr/pysr/
26 | ADD ./pysr/juliapkg.json /pysr/pysr/juliapkg.json
27 | 
28 | RUN mkdir /pysr/pysr/_cli
29 | ADD ./pysr/_cli/*.py /pysr/pysr/_cli/
30 | 
31 | RUN mkdir /pysr/pysr/test
32 | 
33 | # Now, we create a custom version of SymbolicRegression.jl
34 | # First, we get the version from juliapkg.json:
35 | RUN python3 -c 'import json; print(json.load(open("/pysr/pysr/juliapkg.json", "r"))["packages"]["SymbolicRegression"]["version"])' > /pysr/sr_version
36 | 
37 | # Remove any = or ^ or ~ from the version:
38 | RUN cat /pysr/sr_version | sed 's/[\^=~]//g' > /pysr/sr_version_processed
39 | 
40 | # Now, we check out the version of SymbolicRegression.jl that PySR is using:
41 | RUN git clone -b "v$(cat /pysr/sr_version_processed)" --single-branch https://github.com/MilesCranmer/SymbolicRegression.jl /srjl
42 | 
43 | # Edit SymbolicRegression.jl to create a new function.
44 | # We want to put this function immediately after `module SymbolicRegression`:
45 | RUN sed -i 's/module SymbolicRegression/module SymbolicRegression\n__test_function() = 2.3/' /srjl/src/SymbolicRegression.jl
46 | 
47 | # Edit PySR to use the custom version of SymbolicRegression.jl:
48 | ADD ./pysr/test/generate_dev_juliapkg.py /generate_dev_juliapkg.py
49 | RUN python3 /generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl
50 | 
51 | # Install and pre-compile
52 | RUN pip3 install --no-cache-dir . && python3 -c 'import pysr'
53 | 


--------------------------------------------------------------------------------
/pysr/test/test_jax.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from functools import partial
  3 | from pathlib import Path
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import sympy  # type: ignore
  8 | 
  9 | import pysr
 10 | from pysr import PySRRegressor, sympy2jax
 11 | 
 12 | 
 13 | class TestJAX(unittest.TestCase):
 14 |     def setUp(self):
 15 |         np.random.seed(0)
 16 |         from jax import numpy as jnp
 17 | 
 18 |         self.jnp = jnp
 19 | 
 20 |     def test_sympy2jax(self):
 21 |         from jax import random
 22 | 
 23 |         x, y, z = sympy.symbols("x y z")
 24 |         cosx = 1.0 * sympy.cos(x) + y
 25 |         key = random.PRNGKey(0)
 26 |         X = random.normal(key, (1000, 2))
 27 |         true = 1.0 * self.jnp.cos(X[:, 0]) + X[:, 1]
 28 |         f, params = sympy2jax(cosx, [x, y, z])
 29 |         self.assertTrue(self.jnp.all(self.jnp.isclose(f(X, params), true)).item())
 30 | 
 31 |     def test_pipeline_pandas(self):
 32 | 
 33 |         X = pd.DataFrame(np.random.randn(100, 10))
 34 |         y = np.ones(X.shape[0])
 35 |         model = PySRRegressor(
 36 |             progress=False,
 37 |             max_evals=10000,
 38 |             output_jax_format=True,
 39 |         )
 40 |         model.fit(X, y)
 41 | 
 42 |         equations = pd.DataFrame(
 43 |             {
 44 |                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
 45 |                 "Loss": [1.0, 0.1, 1e-5],
 46 |                 "Complexity": [1, 2, 3],
 47 |             }
 48 |         )
 49 | 
 50 |         for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]:
 51 |             equations["Complexity Loss Equation".split(" ")].to_csv(
 52 |                 Path(model.output_directory_) / model.run_id_ / fname
 53 |             )
 54 | 
 55 |         model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_))
 56 |         jformat = model.jax()
 57 | 
 58 |         np.testing.assert_almost_equal(
 59 |             np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
 60 |             np.square(np.cos(X.values[:, 1])),  # Select feature 1
 61 |             decimal=3,
 62 |         )
 63 | 
 64 |     def test_pipeline(self):
 65 |         X = np.random.randn(100, 10)
 66 |         y = np.ones(X.shape[0])
 67 |         model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True)
 68 |         model.fit(X, y)
 69 | 
 70 |         equations = pd.DataFrame(
 71 |             {
 72 |                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
 73 |                 "Loss": [1.0, 0.1, 1e-5],
 74 |                 "Complexity": [1, 2, 3],
 75 |             }
 76 |         )
 77 | 
 78 |         for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]:
 79 |             equations["Complexity Loss Equation".split(" ")].to_csv(
 80 |                 Path(model.output_directory_) / model.run_id_ / fname
 81 |             )
 82 | 
 83 |         model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_))
 84 |         jformat = model.jax()
 85 | 
 86 |         np.testing.assert_almost_equal(
 87 |             np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])),
 88 |             np.square(np.cos(X[:, 1])),  # Select feature 1
 89 |             decimal=3,
 90 |         )
 91 | 
 92 |     def test_avoid_simplification(self):
 93 |         ex = pysr.export_sympy.pysr2sympy(
 94 |             "square(exp(sign(0.44796443))) + 1.5 * x1",
 95 |             feature_names_in=["x1"],
 96 |             extra_sympy_mappings={"square": lambda x: x**2},
 97 |         )
 98 |         f, params = pysr.export_jax.sympy2jax(ex, [sympy.symbols("x1")])
 99 |         key = np.random.RandomState(0)
100 |         X = key.randn(10, 1)
101 |         np.testing.assert_almost_equal(
102 |             np.array(f(self.jnp.array(X), params)),
103 |             np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
104 |             decimal=3,
105 |         )
106 | 
107 |     def test_issue_656(self):
108 |         import sympy  # type: ignore
109 | 
110 |         E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
111 |         f, params = pysr.export_jax.sympy2jax(E_plus_x1, [sympy.symbols("x1")])
112 |         key = np.random.RandomState(0)
113 |         X = key.randn(10, 1)
114 |         np.testing.assert_almost_equal(
115 |             np.array(f(self.jnp.array(X), params)),
116 |             np.exp(1) + X[:, 0],
117 |             decimal=3,
118 |         )
119 | 
120 |     def test_feature_selection_custom_operators(self):
121 |         rstate = np.random.RandomState(0)
122 |         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
123 | 
124 |         def cos_approx(x):
125 |             return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
126 | 
127 |         sp_cos_approx = sympy.Function("cos_approx")
128 | 
129 |         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
130 | 
131 |         model = PySRRegressor(
132 |             progress=False,
133 |             unary_operators=["cos_approx(x) = 1 - x^2 / 2 + x^4 / 24 + x^6 / 720"],
134 |             select_k_features=3,
135 |             maxsize=10,
136 |             early_stop_condition=1e-5,
137 |             extra_sympy_mappings={"cos_approx": sp_cos_approx},
138 |             extra_jax_mappings={
139 |                 sp_cos_approx: "(lambda x: 1 - x**2 / 2 + x**4 / 24 + x**6 / 720)"
140 |             },
141 |             random_state=0,
142 |             deterministic=True,
143 |             parallelism="serial",
144 |         )
145 |         np.random.seed(0)
146 |         model.fit(X.values, y.values)
147 |         f, parameters = model.jax().values()
148 |         jax_prediction = partial(f, parameters=parameters)
149 |         jax_output = jax_prediction(X.values)
150 |         np.testing.assert_almost_equal(y.values, jax_output, decimal=3)
151 | 
152 | 
153 | def runtests(just_tests=False):
154 |     """Run all tests in test_jax.py."""
155 |     tests = [TestJAX]
156 |     if just_tests:
157 |         return tests
158 |     loader = unittest.TestLoader()
159 |     suite = unittest.TestSuite()
160 |     for test in tests:
161 |         suite.addTests(loader.loadTestsFromTestCase(test))
162 |     runner = unittest.TextTestRunner()
163 |     return runner.run(suite)
164 | 


--------------------------------------------------------------------------------
/pysr/test/test_nb.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "# NBVAL_IGNORE_OUTPUT\n",
 18 |     "import numpy as np\n",
 19 |     "from pysr import PySRRegressor, jl"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "3\n"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "%%julia\n",
 37 |     "\n",
 38 |     "# Automatically activates Julia magic\n",
 39 |     "\n",
 40 |     "x = 1\n",
 41 |     "println(x + 2)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "4\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "%julia println(x + 3)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "data": {
 68 |       "text/plain": [
 69 |        "my_loss (generic function with 1 method)"
 70 |       ]
 71 |      },
 72 |      "execution_count": 4,
 73 |      "metadata": {},
 74 |      "output_type": "execute_result"
 75 |     }
 76 |    ],
 77 |    "source": [
 78 |     "%%julia\n",
 79 |     "function my_loss(x)\n",
 80 |     "    x ^ 2\n",
 81 |     "end"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 5,
 87 |    "metadata": {},
 88 |    "outputs": [
 89 |     {
 90 |      "data": {
 91 |       "text/plain": [
 92 |        "4"
 93 |       ]
 94 |      },
 95 |      "execution_count": 5,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "%julia my_loss(2)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 6,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "data": {
111 |       "text/plain": [
112 |        "'PySRRegressor.equations_ = None'"
113 |       ]
114 |      },
115 |      "execution_count": 6,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "rstate = np.random.RandomState(0)\n",
122 |     "X = np.random.randn(10, 2)\n",
123 |     "y = np.random.randn(10)\n",
124 |     "\n",
125 |     "model = PySRRegressor(deterministic=True, parallelism=\"serial\", random_state=0, verbosity=0, progress=False, niterations=1, ncycles_per_iteration=1)\n",
126 |     "str(model)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 7,
132 |    "metadata": {},
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "pandas.core.frame.DataFrame"
138 |       ]
139 |      },
140 |      "execution_count": 7,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "model.fit(X, y)\n",
147 |     "type(model.equations_)"
148 |    ]
149 |   }
150 |  ],
151 |  "metadata": {
152 |   "kernelspec": {
153 |    "display_name": "Python 3 (ipykernel)",
154 |    "language": "python",
155 |    "name": "python3"
156 |   },
157 |   "language_info": {
158 |    "codemirror_mode": {
159 |     "name": "ipython",
160 |     "version": 3
161 |    },
162 |    "file_extension": ".py",
163 |    "mimetype": "text/x-python",
164 |    "name": "python",
165 |    "nbconvert_exporter": "python",
166 |    "pygments_lexer": "ipython3",
167 |    "version": "3.11.2"
168 |   }
169 |  },
170 |  "nbformat": 4,
171 |  "nbformat_minor": 2
172 | }
173 | 


--------------------------------------------------------------------------------
/pysr/test/test_startup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | import subprocess
  4 | import sys
  5 | import tempfile
  6 | import textwrap
  7 | import unittest
  8 | from pathlib import Path
  9 | 
 10 | import numpy as np
 11 | 
 12 | from pysr import PySRRegressor, jl
 13 | from pysr.julia_import import jl_version
 14 | from pysr.julia_registry_helpers import PREFERENCE_KEY, try_with_registry_fallback
 15 | 
 16 | from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS
 17 | 
 18 | 
 19 | class TestStartup(unittest.TestCase):
 20 |     """Various tests related to starting up PySR."""
 21 | 
 22 |     def setUp(self):
 23 |         # Using inspect,
 24 |         # get default niterations from PySRRegressor, and double them:
 25 |         self.default_test_kwargs = dict(
 26 |             progress=False,
 27 |             model_selection="accuracy",
 28 |             niterations=DEFAULT_NITERATIONS * 2,
 29 |             populations=DEFAULT_POPULATIONS * 2,
 30 |             temp_equation_file=True,
 31 |         )
 32 |         self.rstate = np.random.RandomState(0)
 33 |         self.X = self.rstate.randn(100, 5)
 34 | 
 35 |     def test_warm_start_from_file(self):
 36 |         """Test that we can warm start in another process."""
 37 |         if platform.system() == "Windows":
 38 |             self.skipTest("Warm start test incompatible with Windows")
 39 | 
 40 |         with tempfile.TemporaryDirectory() as tmpdirname:
 41 |             model = PySRRegressor(
 42 |                 **self.default_test_kwargs,
 43 |                 unary_operators=["cos"],
 44 |             )
 45 |             model.warm_start = True
 46 |             model.temp_equation_file = False
 47 |             model.output_directory = tmpdirname
 48 |             model.run_id = "test"
 49 |             model.deterministic = True
 50 |             model.multithreading = False
 51 |             model.random_state = 0
 52 |             model.procs = 0
 53 |             model.early_stop_condition = 1e-10
 54 | 
 55 |             rstate = np.random.RandomState(0)
 56 |             X = rstate.randn(100, 2)
 57 |             y = np.cos(X[:, 0]) ** 2
 58 |             model.fit(X, y)
 59 | 
 60 |             best_loss = model.equations_.iloc[-1]["loss"]
 61 | 
 62 |             # Save X and y to a file:
 63 |             X_file = Path(tmpdirname) / "X.npy"
 64 |             y_file = Path(tmpdirname) / "y.npy"
 65 |             np.save(X_file, X)
 66 |             np.save(y_file, y)
 67 |             # Now, create a new process and warm start from the file:
 68 |             result = subprocess.run(
 69 |                 [
 70 |                     sys.executable,
 71 |                     "-c",
 72 |                     textwrap.dedent(
 73 |                         f"""
 74 |                         from pysr import PySRRegressor
 75 |                         import numpy as np
 76 | 
 77 |                         X = np.load("{X_file}")
 78 |                         y = np.load("{y_file}")
 79 | 
 80 |                         print("Loading model from file")
 81 |                         model = PySRRegressor.from_file(
 82 |                             run_directory="{str(Path(tmpdirname) / model.run_id_)}"
 83 |                         )
 84 | 
 85 |                         assert model.julia_state_ is not None
 86 | 
 87 |                         # Reset saved equations; should be loaded from state!
 88 |                         model.equations_ = None
 89 |                         model.equation_file_contents_ = None
 90 | 
 91 |                         model.warm_start = True
 92 |                         model.niterations = 0
 93 |                         model.max_evals = 0
 94 |                         model.ncycles_per_iteration = 0
 95 | 
 96 |                         model.fit(X, y)
 97 | 
 98 |                         best_loss = model.equations_.iloc[-1]["loss"]
 99 | 
100 |                         assert best_loss <= {best_loss}
101 |                     """
102 |                     ),
103 |                 ],
104 |                 stdout=subprocess.PIPE,
105 |                 stderr=subprocess.PIPE,
106 |                 env=os.environ,
107 |             )
108 |             self.assertEqual(result.returncode, 0)
109 |             self.assertIn("Loading model from file", result.stdout.decode())
110 |             self.assertIn("Started!", result.stderr.decode())
111 | 
112 |     def test_bad_startup_options(self):
113 |         warning_tests = [
114 |             dict(
115 |                 code='import os; os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "no"; import pysr',
116 |                 msg="PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set",
117 |             ),
118 |             dict(
119 |                 code='import os; os.environ["PYTHON_JULIACALL_THREADS"] = "1"; import pysr',
120 |                 msg="PYTHON_JULIACALL_THREADS environment variable is set",
121 |             ),
122 |             dict(
123 |                 code="import juliacall; import pysr",
124 |                 msg="juliacall module already imported.",
125 |             ),
126 |         ]
127 |         for warning_test in warning_tests:
128 |             result = subprocess.run(
129 |                 [sys.executable, "-c", warning_test["code"]],
130 |                 stdout=subprocess.PIPE,
131 |                 stderr=subprocess.PIPE,
132 |                 env=os.environ,
133 |             )
134 |             self.assertIn(warning_test["msg"], result.stderr.decode())
135 | 
136 |     def test_notebook(self):
137 |         if platform.system() == "Windows":
138 |             self.skipTest("Notebook test incompatible with Windows")
139 |         if not os.access(Path(__file__).parent, os.W_OK):
140 |             self.skipTest("Read-only file system")
141 | 
142 |         notebook_file = Path(__file__).parent / "test_nb.ipynb"
143 |         sanitize_file = Path(__file__).parent / "nb_sanitize.cfg"
144 | 
145 |         if not (notebook_file.exists() and sanitize_file.exists()):
146 |             self.skipTest("Files not available for testing")
147 | 
148 |         result = subprocess.run(
149 |             [
150 |                 sys.executable,
151 |                 "-m",
152 |                 "pytest",
153 |                 "--nbval",
154 |                 str(notebook_file),
155 |                 "--nbval-sanitize-with",
156 |                 str(sanitize_file),
157 |             ],
158 |             env=os.environ,
159 |         )
160 |         self.assertEqual(result.returncode, 0)
161 | 
162 | 
163 | class TestRegistryHelper(unittest.TestCase):
164 |     """Test the custom Julia registry preference handling."""
165 | 
166 |     def setUp(self):
167 |         self.old_value = os.environ.get(PREFERENCE_KEY, None)
168 |         self.recorded_env_vars = []
169 |         self.hits = 0
170 | 
171 |         def failing_operation():
172 |             self.recorded_env_vars.append(os.environ[PREFERENCE_KEY])
173 |             self.hits += 1
174 |             # Just add some package I know will not exist and also not be in the dependency chain:
175 |             jl.Pkg.add(name="AirspeedVelocity", version="100.0.0")
176 | 
177 |         self.failing_operation = failing_operation
178 | 
179 |     def tearDown(self):
180 |         if self.old_value is not None:
181 |             os.environ[PREFERENCE_KEY] = self.old_value
182 |         else:
183 |             os.environ.pop(PREFERENCE_KEY, None)
184 | 
185 |     def test_successful_operation(self):
186 |         self.assertEqual(try_with_registry_fallback(lambda s: s, "success"), "success")
187 | 
188 |     def test_non_julia_errors_reraised(self):
189 |         with self.assertRaises(SyntaxError) as context:
190 |             try_with_registry_fallback(lambda: exec("invalid syntax !@#$"))
191 |         self.assertNotIn("JuliaError", str(context.exception))
192 | 
193 |     def test_julia_error_triggers_fallback(self):
194 |         os.environ[PREFERENCE_KEY] = "conservative"
195 | 
196 |         with self.assertWarns(Warning) as warn_context:
197 |             with self.assertRaises(Exception) as error_context:
198 |                 try_with_registry_fallback(self.failing_operation)
199 | 
200 |         self.assertIn(
201 |             "Unsatisfiable requirements detected", str(error_context.exception)
202 |         )
203 |         self.assertIn(
204 |             "Initial Julia registry operation failed. Attempting to use the `eager` registry flavor of the Julia",
205 |             str(warn_context.warning),
206 |         )
207 | 
208 |         # Verify both modes are tried in order
209 |         self.assertEqual(self.recorded_env_vars, ["conservative", "eager"])
210 |         self.assertEqual(self.hits, 2)
211 | 
212 |         # Verify environment is restored
213 |         self.assertEqual(os.environ[PREFERENCE_KEY], "conservative")
214 | 
215 |     def test_eager_mode_fails_directly(self):
216 |         os.environ[PREFERENCE_KEY] = "eager"
217 | 
218 |         with self.assertRaises(Exception) as context:
219 |             try_with_registry_fallback(self.failing_operation)
220 | 
221 |         self.assertIn("Unsatisfiable requirements detected", str(context.exception))
222 |         self.assertEqual(
223 |             self.recorded_env_vars, ["eager"]
224 |         )  # Should only try eager mode
225 |         self.assertEqual(self.hits, 1)
226 | 
227 | 
228 | def runtests(just_tests=False):
229 |     tests = [TestStartup, TestRegistryHelper]
230 |     if just_tests:
231 |         return tests
232 |     suite = unittest.TestSuite()
233 |     loader = unittest.TestLoader()
234 |     for test in tests:
235 |         suite.addTests(loader.loadTestsFromTestCase(test))
236 |     runner = unittest.TextTestRunner()
237 |     return runner.run(suite)
238 | 


--------------------------------------------------------------------------------
/pysr/test/test_torch.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from pathlib import Path
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import sympy  # type: ignore
  7 | 
  8 | import pysr
  9 | from pysr import PySRRegressor, sympy2torch
 10 | 
 11 | 
 12 | class TestTorch(unittest.TestCase):
 13 |     def setUp(self):
 14 |         np.random.seed(0)
 15 | 
 16 |         # Need to import after juliacall:
 17 |         import torch
 18 | 
 19 |         self.torch = torch
 20 | 
 21 |     def test_sympy2torch(self):
 22 |         x, y, z = sympy.symbols("x y z")
 23 |         cosx = 1.0 * sympy.cos(x) + y
 24 | 
 25 |         X = self.torch.tensor(np.random.randn(1000, 3))
 26 |         true = 1.0 * self.torch.cos(X[:, 0]) + X[:, 1]
 27 |         torch_module = sympy2torch(cosx, [x, y, z])
 28 |         self.assertTrue(
 29 |             np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy()))
 30 |         )
 31 | 
 32 |     def test_pipeline_pandas(self):
 33 |         X = pd.DataFrame(np.random.randn(100, 10))
 34 |         y = np.ones(X.shape[0])
 35 |         model = PySRRegressor(
 36 |             progress=False,
 37 |             max_evals=10000,
 38 |             model_selection="accuracy",
 39 |             extra_sympy_mappings={},
 40 |             output_torch_format=True,
 41 |         )
 42 |         model.fit(X, y)
 43 | 
 44 |         equations = pd.DataFrame(
 45 |             {
 46 |                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
 47 |                 "Loss": [1.0, 0.1, 1e-5],
 48 |                 "Complexity": [1, 2, 3],
 49 |             }
 50 |         )
 51 | 
 52 |         for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]:
 53 |             equations["Complexity Loss Equation".split(" ")].to_csv(
 54 |                 Path(model.output_directory_) / model.run_id_ / fname
 55 |             )
 56 | 
 57 |         model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_))
 58 |         tformat = model.pytorch()
 59 |         self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
 60 | 
 61 |         np.testing.assert_almost_equal(
 62 |             tformat(self.torch.tensor(X.values)).detach().numpy(),
 63 |             np.square(np.cos(X.values[:, 1])),  # Selection 1st feature
 64 |             decimal=3,
 65 |         )
 66 | 
 67 |     def test_pipeline(self):
 68 |         X = np.random.randn(100, 10)
 69 |         y = np.ones(X.shape[0])
 70 |         model = PySRRegressor(
 71 |             progress=False,
 72 |             max_evals=10000,
 73 |             model_selection="accuracy",
 74 |             output_torch_format=True,
 75 |         )
 76 |         model.fit(X, y)
 77 | 
 78 |         equations = pd.DataFrame(
 79 |             {
 80 |                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
 81 |                 "Loss": [1.0, 0.1, 1e-5],
 82 |                 "Complexity": [1, 2, 3],
 83 |             }
 84 |         )
 85 | 
 86 |         for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]:
 87 |             equations["Complexity Loss Equation".split(" ")].to_csv(
 88 |                 Path(model.output_directory_) / model.run_id_ / fname
 89 |             )
 90 | 
 91 |         model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_))
 92 | 
 93 |         tformat = model.pytorch()
 94 |         self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)")
 95 | 
 96 |         np.testing.assert_almost_equal(
 97 |             tformat(self.torch.tensor(X)).detach().numpy(),
 98 |             np.square(np.cos(X[:, 1])),  # 2nd feature
 99 |             decimal=3,
100 |         )
101 | 
102 |     def test_mod_mapping(self):
103 |         x, y, z = sympy.symbols("x y z")
104 |         expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z
105 | 
106 |         module = sympy2torch(expression, [x, y, z])
107 | 
108 |         X = self.torch.rand(100, 3).float() * 10
109 | 
110 |         true_out = (
111 |             X[:, 0] ** 2
112 |             + self.torch.atanh(self.torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2]
113 |         )
114 |         torch_out = module(X)
115 | 
116 |         np.testing.assert_array_almost_equal(
117 |             true_out.detach(), torch_out.detach(), decimal=3
118 |         )
119 | 
120 |     def test_custom_operator(self):
121 |         X = np.random.randn(100, 3)
122 |         y = np.ones(X.shape[0])
123 |         model = PySRRegressor(
124 |             progress=False,
125 |             max_evals=10000,
126 |             model_selection="accuracy",
127 |             output_torch_format=True,
128 |         )
129 |         model.fit(X, y)
130 | 
131 |         equations = pd.DataFrame(
132 |             {
133 |                 "Equation": ["1.0", "mycustomoperator(x1)"],
134 |                 "Loss": [1.0, 0.1],
135 |                 "Complexity": [1, 2],
136 |             }
137 |         )
138 | 
139 |         for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]:
140 |             equations["Complexity Loss Equation".split(" ")].to_csv(
141 |                 Path(model.output_directory_) / model.run_id_ / fname
142 |             )
143 | 
144 |         MyCustomOperator = sympy.Function("mycustomoperator")
145 | 
146 |         model.set_params(
147 |             extra_sympy_mappings={"mycustomoperator": MyCustomOperator},
148 |             extra_torch_mappings={MyCustomOperator: self.torch.sin},
149 |         )
150 |         # TODO: We shouldn't need to specify the run directory here.
151 |         model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_))
152 |         # self.assertEqual(str(model.sympy()), "sin(x1)")
153 |         # Will automatically use the set global state from get_hof.
154 | 
155 |         tformat = model.pytorch()
156 |         self.assertEqual(
157 |             str(tformat), "_SingleSymPyModule(expression=mycustomoperator(x1))"
158 |         )
159 |         np.testing.assert_almost_equal(
160 |             tformat(self.torch.tensor(X)).detach().numpy(),
161 |             np.sin(X[:, 1]),
162 |             decimal=3,
163 |         )
164 | 
165 |     def test_avoid_simplification(self):
166 |         # SymPy should not simplify without permission
167 |         torch = self.torch
168 |         ex = pysr.export_sympy.pysr2sympy(
169 |             "square(exp(sign(0.44796443))) + 1.5 * x1",
170 |             # ^ Normally this would become exp1 and require
171 |             #   its own mapping
172 |             feature_names_in=["x1"],
173 |             extra_sympy_mappings={"square": lambda x: x**2},
174 |         )
175 |         m = pysr.export_torch.sympy2torch(ex, ["x1"])
176 |         rng = np.random.RandomState(0)
177 |         X = rng.randn(10, 1)
178 |         np.testing.assert_almost_equal(
179 |             m(torch.tensor(X)).detach().numpy(),
180 |             np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0],
181 |             decimal=3,
182 |         )
183 | 
184 |     def test_issue_656(self):
185 |         # Should correctly map numeric symbols to floats
186 |         E_plus_x1 = sympy.exp(1) + sympy.symbols("x1")
187 |         m = pysr.export_torch.sympy2torch(E_plus_x1, ["x1"])
188 |         X = np.random.randn(10, 1)
189 |         np.testing.assert_almost_equal(
190 |             m(self.torch.tensor(X)).detach().numpy(),
191 |             np.exp(1) + X[:, 0],
192 |             decimal=3,
193 |         )
194 | 
195 |     def test_feature_selection_custom_operators(self):
196 |         rstate = np.random.RandomState(0)
197 |         X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)})
198 | 
199 |         def cos_approx(x):
200 |             return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720
201 | 
202 |         y = X["k15"] ** 2 + 2 * cos_approx(X["k20"])
203 | 
204 |         model = PySRRegressor(
205 |             progress=False,
206 |             unary_operators=["cos_approx(x) = 1 - x^2 / 2 + x^4 / 24 + x^6 / 720"],
207 |             select_k_features=3,
208 |             maxsize=10,
209 |             early_stop_condition=1e-5,
210 |             extra_sympy_mappings={"cos_approx": cos_approx},
211 |             random_state=0,
212 |             deterministic=True,
213 |             parallelism="serial",
214 |         )
215 |         np.random.seed(0)
216 |         model.fit(X.values, y.values)
217 |         torch_module = model.pytorch()
218 | 
219 |         np_output = model.predict(X.values)
220 | 
221 |         torch_output = torch_module(self.torch.tensor(X.values)).detach().numpy()
222 | 
223 |         np.testing.assert_almost_equal(y.values, np_output, decimal=3)
224 |         np.testing.assert_almost_equal(y.values, torch_output, decimal=3)
225 | 
226 | 
227 | def runtests(just_tests=False):
228 |     """Run all tests in test_torch.py."""
229 |     tests = [TestTorch]
230 |     if just_tests:
231 |         return tests
232 |     loader = unittest.TestLoader()
233 |     suite = unittest.TestSuite()
234 |     for test in tests:
235 |         suite.addTests(loader.loadTestsFromTestCase(test))
236 |     runner = unittest.TextTestRunner()
237 |     return runner.run(suite)
238 | 


--------------------------------------------------------------------------------
/pysr/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import difflib
 4 | import inspect
 5 | import re
 6 | from pathlib import Path
 7 | from typing import Any, TypeVar, Union
 8 | 
 9 | from numpy import ndarray
10 | from sklearn.utils.validation import _check_feature_names_in  # type: ignore
11 | 
12 | try:
13 |     from typing import List
14 | except ImportError:
15 |     from typing_extensions import List
16 | 
17 | T = TypeVar("T", bound=Any)
18 | 
19 | ArrayLike = Union[ndarray, List[T]]
20 | PathLike = Union[str, Path]
21 | 
22 | 
23 | _regexp_im = re.compile(r"\b(\d+\.\d+)im\b")
24 | _regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b")
25 | _regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b")
26 | 
27 | 
28 | def _apply_regexp_im(x: str):
29 |     return _regexp_im.sub(r"\1j", x)
30 | 
31 | 
32 | def _apply_regexp_im_sci(x: str):
33 |     return _regexp_im_sci.sub(r"\1e\2j", x)
34 | 
35 | 
36 | def _apply_regexp_sci(x: str):
37 |     return _regexp_sci.sub(r"\1e\2", x)
38 | 
39 | 
40 | def _preprocess_julia_floats(s: str) -> str:
41 |     if isinstance(s, str):
42 |         s = _apply_regexp_im(s)
43 |         s = _apply_regexp_im_sci(s)
44 |         s = _apply_regexp_sci(s)
45 |     return s
46 | 
47 | 
48 | def _safe_check_feature_names_in(self, variable_names, generate_names=True):
49 |     """_check_feature_names_in with compat for old versions."""
50 |     try:
51 |         return _check_feature_names_in(
52 |             self, variable_names, generate_names=generate_names
53 |         )
54 |     except TypeError:
55 |         return _check_feature_names_in(self, variable_names)
56 | 
57 | 
58 | def _subscriptify(i: int) -> str:
59 |     """Converts integer to subscript text form.
60 | 
61 |     For example, 123 -> "₁₂₃".
62 |     """
63 |     return "".join([chr(0x2080 + int(c)) for c in str(i)])
64 | 
65 | 
66 | def _suggest_keywords(cls, k: str) -> list[str]:
67 |     valid_keywords = [
68 |         param
69 |         for param in inspect.signature(cls.__init__).parameters
70 |         if param not in ["self", "kwargs"]
71 |     ]
72 |     suggestions = difflib.get_close_matches(k, valid_keywords, n=3)
73 |     return suggestions
74 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # setup.py – retained only for users who still type python setup.py ..."
 2 | import sys
 3 | 
 4 | sys.stderr.write(
 5 |     """⚠️  PySR uses pyproject.toml instead of setup.py.
 6 | 
 7 | Install from a checkout with:
 8 |     python -m pip install .       # normal
 9 |     python -m pip install -e .    # editable (pip ≥21.3)
10 | 
11 | Or install from PyPI with:
12 |     pip install pysr
13 | """
14 | )
15 | sys.exit(1)
16 | 


--------------------------------------------------------------------------------