├── .coveragerc ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml ├── codecov.yml ├── dependabot.yml └── workflows │ ├── CI.yml │ ├── CI_Windows.yml │ ├── CI_apptainer.yml │ ├── CI_conda_forge.yml │ ├── CI_docker.yml │ ├── CI_docker_large_nightly.yml │ ├── CI_large_nightly.yml │ ├── CI_mac.yml │ ├── codeql-analysis.yml │ ├── docker_deploy.yml │ ├── docs.yml │ ├── pypi_deploy.yml │ ├── update_backend.yml │ └── update_backend_version.py ├── .gitignore ├── .pre-commit-config.yaml ├── Apptainer.def ├── CITATION.md ├── CONTRIBUTORS.md ├── Dockerfile ├── LICENSE ├── README.md ├── benchmarks ├── README.md ├── benchmark.sh ├── hyperparamopt.py ├── print_best_model.py └── space.py ├── docs ├── .gitignore ├── README.md ├── _api.md ├── all_contributors │ ├── .all-contributorsrc │ ├── .gitignore │ ├── package.json │ ├── run_all_contrib.sh │ └── yarn.lock ├── api-advanced.md ├── assets │ ├── 87712EA9B4B3CB1B.png │ ├── favicon.png │ ├── pysr_logo.svg │ └── pysr_logo_reduced.svg ├── backend.md ├── examples.md ├── gen_docs.sh ├── gen_param_docs.py ├── generate_papers.py ├── images │ ├── Planar_relation.png │ ├── SyReg_GasConc.png │ ├── Y_Mgal_Simba.png │ ├── back_to_formula.png │ ├── cloud_cover.jpg │ ├── economic_theory_gravity.png │ ├── electronnegativity_introduction.jpg │ ├── example_plot.png │ ├── hi_mass.png │ ├── hod_importances.png │ ├── hyperbolic_volume.png │ ├── illustris_example.png │ ├── jet_background_diagram.jpg │ ├── kidger_thesis.png │ └── rediscovering_gravity.png ├── js │ └── mathjax.js ├── operators.md ├── options.md ├── papers.yml ├── requirements.txt ├── stylesheets │ ├── extra.css │ └── papers_header.txt └── tuning.md ├── environment.yml ├── example.py ├── examples └── pysr_demo.ipynb ├── mkdocs.yml ├── mypy.ini ├── pyproject.toml ├── pysr ├── .gitignore ├── __init__.py ├── __main__.py ├── _cli │ ├── __init__.py │ └── main.py ├── denoising.py ├── deprecated.py ├── export.py ├── export_jax.py ├── export_latex.py ├── export_numpy.py ├── export_sympy.py ├── export_torch.py ├── expression_specs.py ├── feature_selection.py ├── julia_extensions.py ├── julia_helpers.py ├── julia_import.py ├── julia_registry_helpers.py ├── juliapkg.json ├── logger_specs.py ├── param_groupings.yml ├── sklearn_monkeypatch.py ├── sr.py ├── test │ ├── __init__.py │ ├── __main__.py │ ├── generate_dev_juliapkg.py │ ├── nb_sanitize.cfg │ ├── params.py │ ├── test_cli.py │ ├── test_dev.py │ ├── test_dev_pysr.dockerfile │ ├── test_jax.py │ ├── test_main.py │ ├── test_nb.ipynb │ ├── test_startup.py │ └── test_torch.py └── utils.py └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */test/* 4 | source = pysr 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | description: File a bug report 3 | title: "[BUG]: " 4 | labels: ["bug"] 5 | assignees: 6 | - MilesCranmer 7 | 8 | body: 9 | - type: markdown 10 | attributes: 11 | value: | 12 | Thanks for taking the time to fill out this bug report! 13 | - type: textarea 14 | id: what-happened 15 | attributes: 16 | label: What happened? 17 | description: Also tell us, what did you expect to happen? 18 | placeholder: Tell us what you see! 19 | value: "A bug happened!" 20 | validations: 21 | required: true 22 | - type: input 23 | id: version 24 | attributes: 25 | label: Version 26 | description: What version of PySR are you running? `python3 -c 'import pysr; print(pysr.__version__)'`. (You might want to check the latest version, in case the issue has already been fixed.) 27 | validations: 28 | required: true 29 | - type: dropdown 30 | id: os 31 | attributes: 32 | label: Operating System 33 | description: What operating system do you see the issue on? 34 | multiple: true 35 | options: 36 | - Windows 37 | - macOS 38 | - Linux 39 | - type: dropdown 40 | id: pkg-manager 41 | attributes: 42 | label: Package Manager 43 | description: What package manager are you using to install PySR? 44 | options: 45 | - pip 46 | - Conda 47 | - Other (specify below) 48 | - type: dropdown 49 | id: display 50 | attributes: 51 | label: Interface 52 | description: How are you running PySR? 53 | options: 54 | - Jupyter Notebook 55 | - IPython Terminal 56 | - Script (i.e., `python my_script.py`) 57 | - Google Colab 58 | - Other (specify below) 59 | validations: 60 | required: true 61 | - type: textarea 62 | id: logs 63 | attributes: 64 | label: Relevant log output 65 | description: Please copy and paste any log output or error messages. This will be automatically formatted into code, so no need for backticks. 66 | render: shell 67 | - type: textarea 68 | id: extra 69 | attributes: 70 | label: Extra Info 71 | description: Please tell us any other information that you think might help. For example, what are your PySR settings? What dataset are you running on? If possible, please share a minimal code example that produces the error. 72 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: PySR Forums 4 | url: https://github.com/MilesCranmer/PySR/discussions 5 | about: Please ask and answer questions about how to use PySR here. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for this project 3 | title: "[Feature]: " 4 | labels: ["enhancement"] 5 | 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Thanks for taking the time to suggest a feature for PySR! Your interest in the project helps improve the software for everyone 🚀 11 | - type: textarea 12 | id: feature 13 | attributes: 14 | label: Feature Request 15 | description: Describe your desired feature request here! The more details the better. 16 | validations: 17 | required: true 18 | - type: markdown 19 | attributes: 20 | value: | 21 | Be sure to check out the [PySR forums](https://github.com/MilesCranmer/PySR/discussions) to chat with other users about PySR use-cases! 22 | -------------------------------------------------------------------------------- /.github/codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | patch: 4 | default: 5 | informational: true 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 2 | 3 | version: 2 4 | updates: 5 | 6 | - package-ecosystem: "pip" # See documentation for possible values 7 | directory: "/" # Location of package manifests 8 | schedule: 9 | interval: "daily" 10 | 11 | - package-ecosystem: "github-actions" 12 | # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) 13 | directory: "/" 14 | schedule: 15 | # Check for updates to GitHub Actions every weekday 16 | interval: "daily" 17 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: Linux 2 | 3 | on: 4 | push: 5 | branches: 6 | - '**' 7 | paths: 8 | - '**' 9 | tags: 10 | - 'v*.*.*' 11 | pull_request: 12 | branches: 13 | - 'master' 14 | paths: 15 | - '**' 16 | 17 | permissions: 18 | contents: write 19 | 20 | jobs: 21 | test: 22 | runs-on: ${{ matrix.os }} 23 | timeout-minutes: 60 24 | env: 25 | COVERAGE_PROCESS_START: "${{ github.workspace }}/.coveragerc" 26 | defaults: 27 | run: 28 | shell: bash 29 | strategy: 30 | matrix: 31 | julia-version: ['1'] 32 | python-version: ['3.13'] 33 | os: [ubuntu-latest] 34 | test-id: [main] 35 | include: 36 | - julia-version: '1.10' 37 | python-version: '3.10' 38 | os: ubuntu-latest 39 | test-id: include 40 | - julia-version: '1' 41 | python-version: '3.13' 42 | os: ubuntu-latest 43 | test-id: include 44 | - julia-version: '1' 45 | python-version: '3.8' 46 | os: ubuntu-latest 47 | test-id: include 48 | 49 | steps: 50 | - uses: actions/checkout@v4 51 | - name: "Set up Julia" 52 | uses: julia-actions/setup-julia@v2 53 | with: 54 | version: ${{ matrix.julia-version }} 55 | - name: "Cache Julia" 56 | uses: julia-actions/cache@v2 57 | with: 58 | cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }} 59 | cache-packages: false 60 | - name: "Set up Python" 61 | uses: actions/setup-python@v5 62 | with: 63 | python-version: ${{ matrix.python-version }} 64 | cache: pip 65 | - name: "Install PySR" 66 | run: | 67 | python -m pip install --upgrade pip 68 | pip install '.[dev]' 69 | python -c 'import pysr' 70 | - name: "Assert Julia version" 71 | if: ${{ matrix.julia-version != '1'}} 72 | run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor" 73 | - name: "Set up coverage for subprocesses" 74 | run: echo 'import coverage; coverage.process_startup()' > "${{ github.workspace }}/sitecustomize.py" 75 | - name: "Run tests" 76 | run: coverage run -m pysr test main,cli,startup 77 | - name: "Run JAX tests" 78 | run: coverage run --append -m pysr test jax 79 | if: ${{ matrix.test-id == 'main' }} 80 | - name: "Run Torch tests" 81 | run: coverage run --append -m pysr test torch 82 | if: ${{ matrix.test-id == 'main' }} 83 | - name: "Build coverage report" 84 | run: coverage xml 85 | - name: "Upload results to Codecov" 86 | uses: codecov/codecov-action@v5 87 | with: 88 | token: ${{ secrets.CODECOV_TOKEN }} 89 | slug: MilesCranmer/PySR 90 | 91 | dev_install: 92 | runs-on: ${{ matrix.os }} 93 | strategy: 94 | matrix: 95 | os: ['ubuntu-latest'] 96 | python-version: ['3.13'] 97 | julia-version: ['1'] 98 | include: 99 | - os: ubuntu-latest 100 | python-version: '3.10' 101 | julia-version: '1.10' 102 | steps: 103 | - uses: actions/checkout@v4 104 | - uses: actions/setup-python@v5 105 | - name: "Install PySR" 106 | run: | 107 | python -m pip install --upgrade pip 108 | pip install '.[dev]' 109 | - name: "Run development test" 110 | run: PYSR_TEST_JULIA_VERSION=${{ matrix.julia-version }} PYSR_TEST_PYTHON_VERSION=${{ matrix.python-version }} python -m pysr test dev 111 | 112 | conda_test: 113 | runs-on: ${{ matrix.os }} 114 | defaults: 115 | run: 116 | shell: bash -l {0} 117 | strategy: 118 | matrix: 119 | python-version: ['3.13'] 120 | os: ['ubuntu-latest'] 121 | 122 | steps: 123 | - uses: actions/checkout@v4 124 | - name: "Cache conda" 125 | uses: actions/cache@v4 126 | env: 127 | CACHE_NUMBER: 0 128 | with: 129 | path: ~/conda_pkgs_dir 130 | key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }} 131 | - name: "Set up Conda" 132 | uses: conda-incubator/setup-miniconda@v3 133 | with: 134 | miniforge-variant: Miniforge3 135 | miniforge-version: latest 136 | auto-activate-base: true 137 | python-version: ${{ matrix.python-version }} 138 | activate-environment: pysr-test 139 | environment-file: environment.yml 140 | - name: "Cache Julia" 141 | uses: julia-actions/cache@v2 142 | with: 143 | cache-name: ${{ matrix.os }}-conda-${{ matrix.python-version }} 144 | cache-packages: false 145 | - name: "Install PySR" 146 | run: | 147 | python3 -m pip install . 148 | python3 -c 'import pysr' 149 | - name: "Run tests" 150 | run: cd /tmp && python -m pysr test main 151 | 152 | types: 153 | name: Check types 154 | runs-on: ubuntu-latest 155 | defaults: 156 | run: 157 | shell: bash -l {0} 158 | strategy: 159 | matrix: 160 | python-version: 161 | - '3.13' 162 | - '3.10' 163 | os: ['ubuntu-latest'] 164 | 165 | steps: 166 | - uses: actions/checkout@v4 167 | - name: "Set up Python" 168 | uses: actions/setup-python@v5 169 | with: 170 | python-version: ${{ matrix.python-version }} 171 | cache: pip 172 | - name: "Install PySR and all dependencies" 173 | run: | 174 | python -m pip install --upgrade pip 175 | pip install '.[dev]' 176 | - name: "Run mypy" 177 | run: python -m mypy --install-types --non-interactive pysr 178 | if: ${{ matrix.python-version != '3.10' }} 179 | - name: "Run compatible mypy" 180 | run: python -m mypy --ignore-missing-imports pysr 181 | if: ${{ matrix.python-version == '3.10' }} 182 | 183 | beartype: 184 | name: Test with beartype 185 | runs-on: ubuntu-latest 186 | defaults: 187 | run: 188 | shell: bash -l {0} 189 | env: 190 | PYSR_USE_BEARTYPE: "1" 191 | strategy: 192 | matrix: 193 | python-version: ['3.13'] 194 | 195 | steps: 196 | - uses: actions/checkout@v4 197 | - name: "Set up Python" 198 | uses: actions/setup-python@v5 199 | with: 200 | python-version: ${{ matrix.python-version }} 201 | cache: pip 202 | - name: "Install PySR and all dependencies" 203 | run: | 204 | python -m pip install --upgrade pip 205 | pip install '.[dev]' 206 | - name: "Run tests" 207 | run: python -m pysr test main,jax,torch 208 | 209 | wheel_test: 210 | name: Test from wheel 211 | runs-on: ubuntu-latest 212 | strategy: 213 | matrix: 214 | python-version: ['3.13'] 215 | julia-version: ['1'] 216 | defaults: 217 | run: 218 | shell: bash -l {0} 219 | 220 | steps: 221 | - uses: actions/checkout@v4 222 | - uses: actions/setup-python@v5 223 | with: 224 | python-version: ${{ matrix.python-version }} 225 | cache: pip 226 | - uses: julia-actions/setup-julia@v2 227 | with: 228 | version: ${{ matrix.julia-version }} 229 | 230 | - name: "Build wheel" 231 | run: | 232 | python -m pip install --upgrade pip build virtualenv 233 | python -m build --wheel 234 | mkdir -p /tmp/artifacts 235 | mv dist/*.whl /tmp/artifacts/ 236 | 237 | - name: "Install wheel in venv & run smoke test" 238 | run: | 239 | mkdir -p /tmp/wheeltest 240 | cd /tmp/wheeltest 241 | python -m virtualenv .venv 242 | source .venv/bin/activate 243 | pip install /tmp/artifacts/*.whl 244 | python -c "import pysr; pysr.PySRRegressor(niterations=1).fit([[1]], [1])" 245 | -------------------------------------------------------------------------------- /.github/workflows/CI_Windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths: 8 | - '**' 9 | tags: 10 | - 'v*.*.*' 11 | pull_request: 12 | branches: 13 | - 'master' 14 | paths: 15 | - '**' 16 | 17 | jobs: 18 | test: 19 | runs-on: ${{ matrix.os }} 20 | timeout-minutes: 60 21 | defaults: 22 | run: 23 | shell: bash 24 | strategy: 25 | matrix: 26 | julia-version: ['1'] 27 | python-version: ['3.13'] 28 | os: [windows-latest] 29 | 30 | steps: 31 | - uses: actions/checkout@v4 32 | - name: "Set up Julia" 33 | uses: julia-actions/setup-julia@v2 34 | with: 35 | version: ${{ matrix.julia-version }} 36 | - name: "Cache Julia" 37 | uses: julia-actions/cache@v2 38 | with: 39 | cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }} 40 | cache-packages: false 41 | - name: "Set up Python" 42 | uses: actions/setup-python@v5 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | cache: pip 46 | - name: "Install PySR" 47 | run: | 48 | python -m pip install --upgrade pip 49 | pip install '.[dev]' 50 | python -c 'import pysr' 51 | - name: "Run tests" 52 | run: | 53 | python -m pysr test main,cli,startup 54 | - name: "Install Torch" 55 | run: pip install torch # (optional import) 56 | - name: "Run Torch tests" 57 | run: python -m pysr test torch 58 | -------------------------------------------------------------------------------- /.github/workflows/CI_apptainer.yml: -------------------------------------------------------------------------------- 1 | name: Apptainer 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths: 8 | - '**' 9 | tags: 10 | - 'v*.*.*' 11 | pull_request: 12 | branches: 13 | - 'master' 14 | paths: 15 | - '**' 16 | 17 | jobs: 18 | test: 19 | runs-on: ${{ matrix.os }} 20 | timeout-minutes: 60 21 | defaults: 22 | run: 23 | shell: bash 24 | strategy: 25 | matrix: 26 | os: [ubuntu-latest] 27 | 28 | steps: 29 | - uses: actions/checkout@v4 30 | - uses: eWaterCycle/setup-apptainer@v2 31 | with: 32 | apptainer-version: 1.3.0 33 | - name: Build apptainer 34 | run: sudo apptainer build --notest pysr.sif Apptainer.def 35 | - name: Test apptainer 36 | run: | 37 | TMPDIR=$(mktemp -d) 38 | cp pysr.sif $TMPDIR 39 | cd $TMPDIR 40 | sudo apptainer test ./pysr.sif 41 | -------------------------------------------------------------------------------- /.github/workflows/CI_conda_forge.yml: -------------------------------------------------------------------------------- 1 | name: conda-forge 2 | 3 | # This CI tries the conda-forge version of PySR 4 | 5 | on: 6 | schedule: 7 | # Run at the 0th minute of the 10th hour (UTC). 8 | # This means the job will run at 5am EST. 9 | - cron: "0 10 * * *" 10 | # This will automatically run on master branch only. 11 | workflow_dispatch: 12 | 13 | jobs: 14 | conda_test: 15 | runs-on: ${{ matrix.os }} 16 | timeout-minutes: 60 17 | defaults: 18 | run: 19 | shell: bash -el {0} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: ['3.10', '3'] 24 | os: ['ubuntu-latest', 'windows-latest', 'macos-latest'] 25 | 26 | steps: 27 | - name: "Set up Conda" 28 | uses: conda-incubator/setup-miniconda@v3 29 | with: 30 | miniforge-variant: Miniforge3 31 | miniforge-version: latest 32 | auto-activate-base: true 33 | python-version: ${{ matrix.python-version }} 34 | activate-environment: pysr-test 35 | - name: "Install pysr" 36 | run: | 37 | conda install -y pysr 38 | python -c "import pysr" 39 | echo "Finished." 40 | - name: "Run tests" 41 | run: | 42 | echo "Running tests" 43 | pip install pytest nbval 44 | python -m pysr test main,startup 45 | echo "Finished." 46 | -------------------------------------------------------------------------------- /.github/workflows/CI_docker.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths: 8 | - '**' 9 | tags: 10 | - 'v*.*.*' 11 | pull_request: 12 | branches: 13 | - 'master' 14 | paths: 15 | - '**' 16 | 17 | jobs: 18 | test: 19 | runs-on: ${{ matrix.os }} 20 | timeout-minutes: 60 21 | defaults: 22 | run: 23 | shell: bash 24 | strategy: 25 | matrix: 26 | os: [ubuntu-latest] 27 | arch: ['linux/amd64'] 28 | 29 | steps: 30 | - uses: actions/checkout@v4 31 | - name: Build docker 32 | run: docker build --platform=${{ matrix.arch }} -t pysr . 33 | - name: Test docker 34 | run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup' 35 | -------------------------------------------------------------------------------- /.github/workflows/CI_docker_large_nightly.yml: -------------------------------------------------------------------------------- 1 | name: Docker_Large_Nightly 2 | 3 | on: 4 | schedule: 5 | # Run at the 0th minute of the 10th hour (UTC). 6 | # This means the job will run at 5am EST. 7 | - cron: "0 10 * * *" 8 | # This will automatically run on master branch only. 9 | workflow_dispatch: 10 | 11 | jobs: 12 | test: 13 | runs-on: ${{ matrix.os }} 14 | continue-on-error: ${{ matrix.arch == 'linux/arm64' }} 15 | defaults: 16 | run: 17 | shell: bash 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | julia-version: ['1.10', '1'] 22 | python-version: ['3.10', '3.13'] 23 | os: [ubuntu-latest] 24 | arch: ['linux/amd64', 'linux/arm64'] 25 | 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | - name: Set up QEMU 30 | uses: docker/setup-qemu-action@v3 31 | with: 32 | platforms: all 33 | - name: Build docker 34 | run: docker build --platform=${{ matrix.arch }} -t pysr --build-arg JLVERSION=${{ matrix.julia-version }} --build-arg PYVERSION=${{ matrix.python-version }} . 35 | - name: Test docker 36 | run: docker run --platform=${{ matrix.arch }} --rm pysr /bin/bash -c 'pip install pytest nbval && python3 -m pysr test main,cli,startup' 37 | -------------------------------------------------------------------------------- /.github/workflows/CI_large_nightly.yml: -------------------------------------------------------------------------------- 1 | name: large_nightly 2 | 3 | # This CI only runs once per day, but tries 4 | # many different configurations. 5 | 6 | on: 7 | schedule: 8 | # Run at the 0th minute of the 10th hour (UTC). 9 | # This means the job will run at 5am EST. 10 | - cron: "0 10 * * *" 11 | # This will automatically run on master branch only. 12 | workflow_dispatch: 13 | 14 | jobs: 15 | test: 16 | runs-on: ${{ matrix.os }} 17 | timeout-minutes: 60 18 | # Windows jobs are flaky: 19 | continue-on-error: ${{ matrix.os == 'windows-latest' }} 20 | defaults: 21 | run: 22 | shell: bash 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | julia-version: ['1.10', '1'] 27 | python-version: ['3.10', '3.13'] 28 | os: [ubuntu-latest, macos-latest, windows-latest] 29 | 30 | steps: 31 | - uses: actions/checkout@v4 32 | - name: "Set up Julia" 33 | uses: julia-actions/setup-julia@v2 34 | with: 35 | version: ${{ matrix.julia-version }} 36 | - name: "Set up Python" 37 | uses: actions/setup-python@v5 38 | with: 39 | python-version: ${{ matrix.python-version }} 40 | - name: "Install PySR" 41 | run: | 42 | python -m pip install --upgrade pip 43 | pip install '.[dev]' 44 | python -c 'import pysr' 45 | - name: "Assert Julia version" 46 | if: ${{ matrix.julia-version != '1'}} 47 | run: python3 -c "from pysr import jl; assert jl.VERSION.major == jl.seval('v\"${{ matrix.julia-version }}\"').major; assert jl.VERSION.minor == jl.seval('v\"${{ matrix.julia-version }}\"').minor" 48 | - name: "Run tests" 49 | run: python -m pysr test main,cli,startup 50 | -------------------------------------------------------------------------------- /.github/workflows/CI_mac.yml: -------------------------------------------------------------------------------- 1 | name: macOS 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths: 8 | - '**' 9 | tags: 10 | - 'v*.*.*' 11 | pull_request: 12 | branches: 13 | - 'master' 14 | paths: 15 | - '**' 16 | 17 | jobs: 18 | test: 19 | runs-on: ${{ matrix.os }} 20 | timeout-minutes: 60 21 | defaults: 22 | run: 23 | shell: bash 24 | strategy: 25 | matrix: 26 | julia-version: ['1'] 27 | python-version: ['3.13'] 28 | os: [macos-latest] 29 | 30 | steps: 31 | - uses: actions/checkout@v4 32 | - name: "Set up Julia" 33 | uses: julia-actions/setup-julia@v2 34 | with: 35 | version: ${{ matrix.julia-version }} 36 | - name: "Cache Julia" 37 | uses: julia-actions/cache@v2 38 | with: 39 | cache-name: ${{ matrix.os }}-test-${{ matrix.julia-version }}-${{ matrix.python-version }} 40 | cache-packages: false 41 | - name: "Set up Python" 42 | uses: actions/setup-python@v5 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | cache: pip 46 | - name: "Install PySR" 47 | run: | 48 | python -m pip install --upgrade pip 49 | pip install '.[dev]' 50 | python -c 'import pysr' 51 | - name: "Run tests" 52 | run: | 53 | python -m pysr test main,cli,startup 54 | - name: "Run JAX tests" 55 | run: python -m pysr test jax 56 | - name: "Run Torch tests" 57 | run: python -m pysr test torch 58 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ "master" ] 9 | schedule: 10 | - cron: '28 17 * * 1' 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ubuntu-latest 16 | permissions: 17 | actions: read 18 | contents: read 19 | security-events: write 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | language: [ 'python' ] 25 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 26 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 27 | 28 | steps: 29 | - name: Checkout repository 30 | uses: actions/checkout@v4 31 | 32 | # Initializes the CodeQL tools for scanning. 33 | - name: Initialize CodeQL 34 | uses: github/codeql-action/init@v3 35 | with: 36 | languages: ${{ matrix.language }} 37 | # If you wish to specify custom queries, you can do so here or in a config file. 38 | # By default, queries listed here will override any specified in a config file. 39 | # Prefix the list here with "+" to use these queries and those in the config file. 40 | 41 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 42 | # queries: security-extended,security-and-quality 43 | 44 | 45 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 46 | # If this step fails, then you should remove it and run the build manually (see below) 47 | - name: Autobuild 48 | uses: github/codeql-action/autobuild@v3 49 | 50 | # ℹ️ Command-line programs to run using the OS shell. 51 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 52 | 53 | # If the Autobuild fails above, remove it and uncomment the following three lines. 54 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 55 | 56 | # - run: | 57 | # echo "Run, Build Application using script" 58 | # ./location_of_script_within_repo/buildscript.sh 59 | 60 | - name: Perform CodeQL Analysis 61 | uses: github/codeql-action/analyze@v3 62 | -------------------------------------------------------------------------------- /.github/workflows/docker_deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Docker 2 | 3 | on: 4 | schedule: 5 | - cron: "0 10 * * *" 6 | push: 7 | branches: 8 | - "**" 9 | tags: 10 | - "v*.*.*" 11 | workflow_dispatch: 12 | 13 | 14 | jobs: 15 | docker: 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | os: [ubuntu-latest] 20 | arch: [linux/amd64] 21 | python-version: [3.12.3] 22 | julia-version: [1.10.3] 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | - name: Login to Docker Hub 27 | uses: docker/login-action@v3 28 | if: github.event_name != 'pull_request' 29 | with: 30 | username: ${{ secrets.DOCKERHUB_USERNAME }} 31 | password: ${{ secrets.DOCKERHUB_TOKEN }} 32 | - name: Login to GitHub registry 33 | uses: docker/login-action@v3 34 | if: github.event_name != 'pull_request' 35 | with: 36 | registry: ghcr.io 37 | username: ${{ github.repository_owner }} 38 | password: ${{ secrets.GITHUB_TOKEN }} 39 | - name: Docker meta 40 | id: meta 41 | uses: docker/metadata-action@v5 42 | with: 43 | # List of Docker images to use as base name for tags 44 | images: | 45 | mcranmer/pysr 46 | ghcr.io/${{ github.repository }} 47 | # generate Docker tags based on the following events/attributes 48 | tags: | 49 | type=schedule 50 | type=ref,event=branch 51 | type=ref,event=pr 52 | type=semver,pattern={{version}} 53 | type=semver,pattern={{major}}.{{minor}} 54 | type=semver,pattern={{major}} 55 | type=sha 56 | type=raw,value=latest,enable={{is_default_branch}} 57 | - name: Set up QEMU 58 | uses: docker/setup-qemu-action@v3 59 | - name: Set up Docker Buildx 60 | uses: docker/setup-buildx-action@v3 61 | - name: Build and push 62 | uses: docker/build-push-action@v6 63 | with: 64 | context: . 65 | platforms: ${{ matrix.arch }} 66 | push: ${{ github.event_name != 'pull_request' }} 67 | tags: ${{ steps.meta.outputs.tags }} 68 | labels: ${{ steps.meta.outputs.labels }} 69 | cache-from: type=registry,ref=mcranmer/pysr:buildcache 70 | cache-to: type=registry,ref=mcranmer/pysr:buildcache,mode=max 71 | build-args: | 72 | PYVERSION=${{ matrix.python-version }} 73 | JLVERSION=${{ matrix.julia-version }} 74 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'master' 7 | paths: 8 | - 'pysr/**' 9 | - '.github/workflows/docs.yml' 10 | - 'docs/**' 11 | - 'README.md' 12 | - 'mkdocs.yml' 13 | workflow_dispatch: 14 | 15 | jobs: 16 | test: 17 | runs-on: ubuntu-latest 18 | defaults: 19 | run: 20 | shell: bash 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: "Set up Python" 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: 3.13 28 | cache: pip 29 | - name: "Install packages for docs building" 30 | run: pip install -r docs/requirements.txt 31 | - name: "Install PySR" 32 | run: pip install . && python -c 'import pysr' 33 | - name: "Build API docs" 34 | run: cd docs && ./gen_docs.sh 35 | - name: "Deploy documentation to primary repository" 36 | run: mkdocs gh-deploy --force 37 | - name: "Deploy documentation to secondary repository" 38 | env: 39 | DEPLOY_KEY: ${{ secrets.DAMTP_DEPLOY_KEY }} 40 | run: | 41 | # Set up SSH key for authentication 42 | mkdir -p ~/.ssh 43 | echo "$DEPLOY_KEY" > ~/.ssh/id_rsa 44 | chmod 600 ~/.ssh/id_rsa 45 | ssh-keyscan github.com >> ~/.ssh/known_hosts 46 | 47 | git checkout gh-pages 48 | git remote add secondary git@github.com:ai-damtp-cam-ac-uk/pysr.git 49 | git push secondary gh-pages --force 50 | -------------------------------------------------------------------------------- /.github/workflows/pypi_deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy PyPI 2 | on: 3 | push: 4 | tags: 5 | - 'v*.*.*' 6 | workflow_dispatch: 7 | 8 | jobs: 9 | pypi: 10 | runs-on: ubuntu-latest 11 | environment: 12 | name: pypi 13 | url: https://pypi.org/p/pysr 14 | permissions: 15 | id-token: write 16 | steps: 17 | - name: "Checkout" 18 | uses: actions/checkout@v4 19 | - name: "Set up Python" 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: 3.10.8 23 | - name: "Install building tools" 24 | run: pip install build 25 | - name: "Build package" 26 | run: python -m build 27 | - name: "Publish distribution 📦 to Test PyPI" 28 | uses: pypa/gh-action-pypi-publish@release/v1 29 | with: 30 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 31 | repository-url: https://test.pypi.org/legacy/ 32 | skip-existing: true 33 | verbose: true 34 | - name: "Publish distribution 📦 to PyPI" 35 | uses: pypa/gh-action-pypi-publish@release/v1 36 | with: 37 | password: ${{ secrets.PYPI_API_TOKEN }} 38 | verbose: true 39 | -------------------------------------------------------------------------------- /.github/workflows/update_backend.yml: -------------------------------------------------------------------------------- 1 | name: PySR backend update 2 | on: 3 | schedule: 4 | - cron: '00 00 * * *' 5 | workflow_dispatch: 6 | jobs: 7 | update_compat: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: actions/setup-python@v5 12 | with: 13 | python-version: 3.13 14 | cache: pip 15 | 16 | - name: "Install dependencies" 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install tomlkit 20 | 21 | - name: "Get SymbolicRegression.jl latest version" 22 | id: get-latest 23 | run: | 24 | cd $(mktemp -d) 25 | git clone https://github.com/MilesCranmer/SymbolicRegression.jl 26 | cd SymbolicRegression.jl 27 | echo "version=$(git describe --tags --match='v*' --abbrev=0 | sed 's/^v//')" >> $GITHUB_OUTPUT 28 | 29 | - name: "Update SymbolicRegression.jl version in PySR" 30 | run: | 31 | python .github/workflows/update_backend_version.py ${{ steps.get-latest.outputs.version }} 32 | 33 | - name: "Restore changes if no diff to `pysr/juliapkg.json`" 34 | run: | 35 | if git diff --quiet pysr/juliapkg.json; then 36 | echo "No changes to pysr/juliapkg.json. Restoring changes." 37 | git restore pyproject.toml 38 | fi 39 | 40 | - name: "Create PR if necessary" 41 | uses: peter-evans/create-pull-request@v7 42 | with: 43 | title: "Automated update to backend: v${{ steps.get-latest.outputs.version }}" 44 | body: | 45 | This PR was automatically generated by the GitHub Action `.github/workflows/update-backend.yml` 46 | 47 | It updates the backend version to v${{ steps.get-latest.outputs.version }}. For a full description of the changes, see the backend changelog: [v${{ steps.get-latest.outputs.version }}](https://github.com/MilesCranmer/SymbolicRegression.jl/releases/tag/v${{ steps.get-latest.outputs.version }}). 48 | delete-branch: true 49 | commit-message: "Update backend version to v${{ steps.get-latest.outputs.version }}" 50 | add-paths: | 51 | pyproject.toml 52 | pysr/juliapkg.json 53 | -------------------------------------------------------------------------------- /.github/workflows/update_backend_version.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from pathlib import Path 4 | 5 | import tomlkit 6 | 7 | new_backend_version = sys.argv[1] 8 | 9 | assert not new_backend_version.startswith("v"), "Version should not start with 'v'" 10 | 11 | pyproject_toml = Path(__file__).parent / ".." / ".." / "pyproject.toml" 12 | juliapkg_json = Path(__file__).parent / ".." / ".." / "pysr" / "juliapkg.json" 13 | 14 | with open(pyproject_toml) as toml_file: 15 | pyproject_data = tomlkit.parse(toml_file.read()) 16 | 17 | with open(juliapkg_json) as f: 18 | juliapkg_data = json.load(f) 19 | 20 | major, minor, patch, *dev = pyproject_data["project"]["version"].split(".") 21 | pyproject_data["project"]["version"] = f"{major}.{minor}.{int(patch)+1}" 22 | 23 | juliapkg_data["packages"]["SymbolicRegression"]["version"] = f"~{new_backend_version}" 24 | 25 | with open(pyproject_toml, "w") as toml_file: 26 | toml_file.write(tomlkit.dumps(pyproject_data)) 27 | 28 | with open(juliapkg_json, "w") as f: 29 | json.dump(juliapkg_data, f, indent=4) 30 | # Ensure ends with newline 31 | f.write("\n") 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .dataset*.jl 2 | .hyperparams*.jl 3 | *.csv 4 | *.csv.out* 5 | *.bkup 6 | *.pkl 7 | performance*txt 8 | *.out 9 | trials* 10 | **/__pycache__ 11 | build 12 | dist 13 | *.vs/* 14 | *.pyproj 15 | *.sln 16 | pysr/.vs/ 17 | pysr.egg-info 18 | Manifest.toml 19 | workflow 20 | docs/index.md 21 | site 22 | **/.DS_Store 23 | **/*.code-workspace 24 | **/*.tar.gz 25 | venv 26 | requirements-dev.lock 27 | requirements.lock 28 | outputs 29 | .mypy_cache 30 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | # General linting 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v5.0.0 5 | hooks: 6 | - id: trailing-whitespace 7 | - id: end-of-file-fixer 8 | - id: check-yaml 9 | - id: check-added-large-files 10 | # General formatting 11 | - repo: https://github.com/psf/black 12 | rev: 25.1.0 13 | hooks: 14 | - id: black 15 | - id: black-jupyter 16 | exclude: pysr/test/test_nb.ipynb 17 | # Stripping notebooks 18 | - repo: https://github.com/kynan/nbstripout 19 | rev: 0.8.1 20 | hooks: 21 | - id: nbstripout 22 | exclude: pysr/test/test_nb.ipynb 23 | # Unused imports 24 | - repo: https://github.com/hadialqattan/pycln 25 | rev: "v2.5.0" 26 | hooks: 27 | - id: pycln 28 | # Sorted imports 29 | - repo: https://github.com/PyCQA/isort 30 | rev: "6.0.0" 31 | hooks: 32 | - id: isort 33 | additional_dependencies: [toml] 34 | -------------------------------------------------------------------------------- /Apptainer.def: -------------------------------------------------------------------------------- 1 | # Build an Apptainer SIF file containing a working copy of PySR and its prereqs 2 | Bootstrap: docker 3 | From: julia:1.11.1-bullseye 4 | Stage: jl 5 | 6 | Bootstrap: docker 7 | From: python:3.12.6-bullseye 8 | Stage: runtime 9 | 10 | %environment 11 | # Use the container Julia binary 12 | export PATH="/usr/local/julia/bin:$PATH" 13 | 14 | # Create a stacked environment for additional Julia packages 15 | export JULIA_DEPOT_PATH="$HOME/.pysr:/pysr/depot:$JULIA_DEPOT_PATH" 16 | export JULIA_LOAD_PATH="$HOME/.pysr:/pysr:$JULIA_LOAD_PATH" 17 | 18 | %files from jl 19 | /usr/local/julia /usr/local/julia 20 | 21 | %files 22 | ./pyproject.toml /pysr/pyproject.toml 23 | ./LICENSE /pysr/LICENSE 24 | ./README.md /pysr/README.md 25 | ./pysr /pysr/pysr 26 | 27 | %post 28 | # Ensure we don't use the local pysr commands: 29 | cd $(mktemp -d) 30 | 31 | export PATH="/usr/local/julia/bin:$PATH" 32 | 33 | # Install IPython and other useful libraries: 34 | pip3 install --no-cache-dir ipython matplotlib pytest nbval 35 | # Install PySR and requirements: 36 | pip3 install --no-cache-dir /pysr 37 | 38 | # Put the Julia dependencies in /pysr/depot 39 | mkdir /pysr/depot 40 | export JULIA_DEPOT_PATH="/pysr/depot" 41 | 42 | # And set a specific environment for Julia dependencies 43 | mkdir /pysr/env 44 | export PYTHON_JULIAPKG_PROJECT="/pysr/env" 45 | 46 | # Pull in all the Julia dependencies 47 | python3 -c 'import pysr; pysr.load_all_packages()' 48 | 49 | %test 50 | python3 -m pysr test main,cli,startup 51 | 52 | %runscript 53 | # Start ipython when the container is executed 54 | [ ! -d $HOME/.pysr ] && mkdir $HOME/.pysr 55 | PYTHONPATH=/pysr ipython 56 | -------------------------------------------------------------------------------- /CITATION.md: -------------------------------------------------------------------------------- 1 | # Citing 2 | 3 | To cite PySR or SymbolicRegression.jl, please use the following BibTeX entry: 4 | 5 | ```bibtex 6 | @misc{cranmerInterpretableMachineLearning2023, 7 | title = {Interpretable {Machine} {Learning} for {Science} with {PySR} and {SymbolicRegression}.jl}, 8 | url = {http://arxiv.org/abs/2305.01582}, 9 | doi = {10.48550/arXiv.2305.01582}, 10 | urldate = {2023-07-17}, 11 | publisher = {arXiv}, 12 | author = {Cranmer, Miles}, 13 | month = may, 14 | year = {2023}, 15 | note = {arXiv:2305.01582 [astro-ph, physics:physics]}, 16 | keywords = {Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Machine Learning, Computer Science - Neural and Evolutionary Computing, Computer Science - Symbolic Computation, Physics - Data Analysis, Statistics and Probability}, 17 | } 18 | ``` 19 | 20 | To cite symbolic distillation of neural networks, the following BibTeX entry can be used: 21 | 22 | ```bibtex 23 | @article{cranmerDiscovering2020, 24 | title={Discovering Symbolic Models from Deep Learning with Inductive Biases}, 25 | author={Miles Cranmer and Alvaro Sanchez-Gonzalez and Peter Battaglia and Rui Xu and Kyle Cranmer and David Spergel and Shirley Ho}, 26 | journal={NeurIPS 2020}, 27 | year={2020}, 28 | eprint={2006.11287}, 29 | archivePrefix={arXiv}, 30 | primaryClass={cs.LG} 31 | } 32 | ``` 33 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This builds a dockerfile containing a working copy of PySR 2 | # with all pre-requisites installed. 3 | 4 | ARG JLVERSION=1.11.1 5 | ARG PYVERSION=3.12.6 6 | ARG BASE_IMAGE=bullseye 7 | 8 | FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl 9 | FROM python:${PYVERSION}-${BASE_IMAGE} 10 | 11 | # Merge Julia image: 12 | COPY --from=jl /usr/local/julia /usr/local/julia 13 | ENV PATH="/usr/local/julia/bin:${PATH}" 14 | 15 | # Install IPython and other useful libraries: 16 | RUN pip install --no-cache-dir ipython matplotlib 17 | 18 | WORKDIR /pysr 19 | 20 | # Install PySR: 21 | # We do a minimal copy so it doesn't need to rerun at every file change: 22 | ADD ./pyproject.toml /pysr/pyproject.toml 23 | ADD ./LICENSE /pysr/LICENSE 24 | ADD ./README.md /pysr/README.md 25 | ADD ./pysr /pysr/pysr 26 | RUN pip3 install --no-cache-dir . 27 | 28 | # Install Julia pre-requisites: 29 | RUN python3 -c 'import pysr; pysr.load_all_packages()' 30 | 31 | # metainformation 32 | LABEL org.opencontainers.image.authors = "Miles Cranmer" 33 | LABEL org.opencontainers.image.source = "https://github.com/MilesCranmer/PySR" 34 | LABEL org.opencontainers.image.licenses = "Apache License 2.0" 35 | 36 | CMD ["ipython"] 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 Miles Cranmer 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark 1 2 | 3 | The following benchmarks were ran with this command on a node on CCA's BNL cluster (40-cores). At no time was the node fully busy. The tags were put into the file `tags.txt`, and the `benchmark.sh` was copied to the root folder. This is the command used: 4 | 5 | ```bash 6 | for x in $(cat tags.txt); do sleep 120 && git checkout $x &> /dev/null && nohup ./benchmark.sh > performance_v3_$x.txt &; done 7 | ``` 8 | with this API call in `benchmark.sh` 9 | ```python 10 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000) 11 | ``` 12 | 13 | 14 | Version | Cycles/second 15 | --- | --- 16 | v0.3.2 | 37526 17 | v0.3.3 | 38400 18 | v0.3.4 | 28700 19 | v0.3.5 | 32700 20 | v0.3.6 | 25900 21 | v0.3.7 | 26600 22 | v0.3.8 | 7470 23 | v0.3.9 | 6760 24 | v0.3.10 | 25 | v0.3.11 | 19500 26 | v0.3.12 | 19000 27 | v0.3.13 | 15200 28 | v0.3.14 | 14700 29 | v0.3.15 | 42000 30 | v0.3.23 | 64000 31 | 32 | v0.3.10 was frozen. 33 | -------------------------------------------------------------------------------- /benchmarks/benchmark.sh: -------------------------------------------------------------------------------- 1 | python setup.py install > /dev/null && python -c ' 2 | import pkg_resources 3 | version = pkg_resources.get_distribution("pysr").version 4 | version = [int(elem) for elem in version.split(".")] 5 | import numpy as np 6 | from pysr import pysr 7 | X=np.random.randn(100, 2)*5 8 | y=2*np.sin((X[:, 0]+X[:, 1]))*np.exp(X[:, 1]/3) 9 | if version[1] >= 3 and version[2] >= 20: 10 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000, maxdepth=6, fast_cycle=True, batching=True, batch_size=50) 11 | elif version[1] >= 3 and version[2] >= 17: 12 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000, maxdepth=6, fast_cycle=True) 13 | elif version[1] >= 3 and version[2] >= 16: 14 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000, maxdepth=6) 15 | elif version[1] >= 3 and version[2] >= 2: 16 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, procs=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000) 17 | else: 18 | eq = pysr(X, y, binary_operators=["plus", "mult", "div", "pow"], unary_operators=["sin"], niterations=20, threads=4, parsimony=1e-10, population_size=1000, ncyclesperiteration=1000) 19 | ' 2>&1 | grep 'per second' | tail -n 1 | vims '%s/ //g' -l 'df:' 20 | -------------------------------------------------------------------------------- /benchmarks/hyperparamopt.py: -------------------------------------------------------------------------------- 1 | """Start a hyperoptimization from a single node""" 2 | 3 | import pickle as pkl 4 | import sys 5 | 6 | import hyperopt 7 | import numpy as np 8 | from hyperopt import Trials, fmin, hp, tpe 9 | from hyperopt.fmin import generate_trials_to_calculate 10 | from space import * 11 | 12 | from pysr import PySRRegressor 13 | 14 | # Change the following code to your file 15 | ################################################################################ 16 | TRIALS_FOLDER = "trials2" 17 | NUMBER_TRIALS_PER_RUN = 1 18 | timeout_in_minutes = 10 19 | start_from_init_vals = False 20 | 21 | # Test run to compile everything: 22 | julia_project = None 23 | procs = 4 24 | model = PySRRegressor( 25 | binary_operators=binary_operators, 26 | unary_operators=unary_operators, 27 | timeout_in_seconds=30, 28 | julia_project=julia_project, 29 | procs=procs, 30 | update=False, 31 | temp_equation_file=True, 32 | ) 33 | model.fit(np.random.randn(100, 3), np.random.randn(100)) 34 | 35 | 36 | def run_trial(args): 37 | """Evaluate the model loss using the hyperparams in args 38 | 39 | :args: A dictionary containing all hyperparameters 40 | :returns: Dict with status and loss from cross-validation 41 | 42 | """ 43 | # The arguments which are integers: 44 | integer_args = [ 45 | "populations", 46 | "niterations", 47 | "ncyclesperiteration", 48 | "population_size", 49 | "topn", 50 | "maxsize", 51 | "optimizer_nrestarts", 52 | "optimizer_iterations", 53 | ] 54 | # Set these to int types: 55 | for k, v in args.items(): 56 | if k in integer_args: 57 | args[k] = int(v) 58 | 59 | # Duplicate this argument: 60 | args["tournament_selection_n"] = args["topn"] 61 | 62 | # Invalid hyperparams: 63 | invalid = args["population_size"] < args["topn"] 64 | if invalid: 65 | return dict(status="fail", loss=float("inf")) 66 | 67 | args["timeout_in_seconds"] = timeout_in_minutes * 60 68 | args["julia_project"] = julia_project 69 | args["procs"] = procs 70 | args["update"] = False 71 | args["temp_equation_file"] = True 72 | 73 | print(f"Running trial with args: {args}") 74 | 75 | # Create the dataset: 76 | ntrials = 3 77 | losses = [] 78 | 79 | # Old datasets: 80 | eval_str = [ 81 | "np.cos(2.3 * X[:, 0]) * np.sin(2.3 * X[:, 0] * X[:, 1] * X[:, 2]) - 10.0", 82 | "(np.exp(X[:, 3]*0.3) + 3)/(np.exp(X[:, 1]*0.2) + np.cos(X[:, 0]) + 1.1)", 83 | # "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5", 84 | # "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)", 85 | # "X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0", 86 | ] 87 | 88 | for expression in eval_str: 89 | expression_losses = [] 90 | for i in range(ntrials): 91 | rstate = np.random.RandomState(i) 92 | X = 3 * rstate.randn(200, 5) 93 | y = eval(expression) 94 | 95 | # Normalize y so that losses are fair: 96 | y = (y - np.average(y)) / np.std(y) 97 | 98 | # Create the model: 99 | model = PySRRegressor(**args) 100 | 101 | # Run the model: 102 | try: 103 | model.fit(X, y) 104 | except RuntimeError: 105 | return dict(status="fail", loss=float("inf")) 106 | 107 | # Compute loss: 108 | cur_loss = float(model.get_best()["loss"]) 109 | expression_losses.append(cur_loss) 110 | 111 | losses.append(np.median(expression_losses)) 112 | 113 | loss = np.average(losses) 114 | print(f"Finished with {loss}", str(args)) 115 | 116 | return dict(status="ok", loss=loss) 117 | 118 | 119 | rand_between = lambda lo, hi: (np.random.rand() * (hi - lo) + lo) 120 | 121 | init_vals = [ 122 | dict( 123 | model_selection=0, # 0 means first choice 124 | binary_operators=0, 125 | unary_operators=0, 126 | populations=100.0, 127 | niterations=0, 128 | ncyclesperiteration=rand_between(50, 150), 129 | alpha=rand_between(0.05, 0.2), 130 | annealing=0, 131 | # fraction_replaced=0.01, 132 | fraction_replaced=0.01, 133 | # fraction_replaced_hof=0.005, 134 | fraction_replaced_hof=0.005, 135 | # population_size=100, 136 | population_size=rand_between(50, 200), 137 | # parsimony=1e-4, 138 | parsimony=1e-4, 139 | # topn=10, 140 | topn=10.0, 141 | # weight_add_node=1, 142 | weight_add_node=1.0, 143 | # weight_insert_node=3, 144 | weight_insert_node=3.0, 145 | # weight_delete_node=3, 146 | weight_delete_node=3.0, 147 | # weight_do_nothing=1, 148 | weight_do_nothing=1.0, 149 | # weight_mutate_constant=10, 150 | weight_mutate_constant=10.0, 151 | # weight_mutate_operator=1, 152 | weight_mutate_operator=1.0, 153 | # weight_swap_operands=1, 154 | weight_swap_operands=1.0, 155 | # weight_randomize=1, 156 | weight_randomize=1.0, 157 | # weight_simplify=0.002, 158 | weight_simplify=0, # One of these is fixed. 159 | # crossover_probability=0.01 160 | crossover_probability=0.01, 161 | # perturbation_factor=1.0, 162 | perturbation_factor=1.0, 163 | # maxsize=20, 164 | maxsize=0, 165 | # warmup_maxsize_by=0.0, 166 | warmup_maxsize_by=0.0, 167 | # use_frequency=True, 168 | use_frequency=1, 169 | # optimizer_nrestarts=3, 170 | optimizer_nrestarts=3.0, 171 | # optimize_probability=1.0, 172 | optimize_probability=1.0, 173 | # optimizer_iterations=10, 174 | optimizer_iterations=10.0, 175 | # tournament_selection_p=1.0, 176 | tournament_selection_p=rand_between(0.9, 0.999), 177 | ) 178 | ] 179 | 180 | ################################################################################ 181 | 182 | 183 | def merge_trials(trials1, trials2_slice): 184 | """Merge two hyperopt trials objects 185 | 186 | :trials1: The primary trials object 187 | :trials2_slice: A slice of the trials object to be merged, 188 | obtained with, e.g., trials2.trials[:10] 189 | :returns: The merged trials object 190 | 191 | """ 192 | max_tid = 0 193 | if len(trials1.trials) > 0: 194 | max_tid = max([trial["tid"] for trial in trials1.trials]) 195 | 196 | for trial in trials2_slice: 197 | tid = trial["tid"] + max_tid + 2 198 | local_hyperopt_trial = Trials().new_trial_docs( 199 | tids=[None], specs=[None], results=[None], miscs=[None] 200 | ) 201 | local_hyperopt_trial[0] = trial 202 | local_hyperopt_trial[0]["tid"] = tid 203 | local_hyperopt_trial[0]["misc"]["tid"] = tid 204 | for key in local_hyperopt_trial[0]["misc"]["idxs"].keys(): 205 | local_hyperopt_trial[0]["misc"]["idxs"][key] = [tid] 206 | trials1.insert_trial_docs(local_hyperopt_trial) 207 | trials1.refresh() 208 | return trials1 209 | 210 | 211 | import glob 212 | 213 | path = TRIALS_FOLDER + "/*.pkl" 214 | n_prior_trials = len(list(glob.glob(path))) 215 | 216 | loaded_fnames = [] 217 | if start_from_init_vals: 218 | trials = generate_trials_to_calculate(init_vals) 219 | i = 0 220 | else: 221 | trials = Trials() 222 | i = 1 223 | 224 | n = NUMBER_TRIALS_PER_RUN 225 | 226 | # Run new hyperparameter trials until killed 227 | while True: 228 | np.random.seed() 229 | 230 | # Load up all runs: 231 | 232 | if i > 0: 233 | for fname in glob.glob(path): 234 | if fname in loaded_fnames: 235 | continue 236 | 237 | trials_obj = pkl.load(open(fname, "rb")) 238 | n_trials = trials_obj["n"] 239 | trials_obj = trials_obj["trials"] 240 | if len(loaded_fnames) == 0: 241 | trials = trials_obj 242 | else: 243 | print("Merging trials") 244 | trials = merge_trials(trials, trials_obj.trials[-n_trials:]) 245 | 246 | loaded_fnames.append(fname) 247 | 248 | print("Loaded trials", len(loaded_fnames)) 249 | if len(loaded_fnames) == 0: 250 | trials = Trials() 251 | 252 | try: 253 | best = fmin( 254 | run_trial, 255 | space=space, 256 | algo=tpe.suggest, 257 | max_evals=n + len(trials.trials), 258 | trials=trials, 259 | verbose=1, 260 | rstate=np.random.RandomState(np.random.randint(1, 10**6)), 261 | ) 262 | except hyperopt.exceptions.AllTrialsFailed: 263 | continue 264 | else: 265 | best = fmin( 266 | run_trial, 267 | space=space, 268 | algo=tpe.suggest, 269 | max_evals=1, 270 | trials=trials, 271 | points_to_evaluate=init_vals, 272 | ) 273 | 274 | print("current best", best) 275 | hyperopt_trial = Trials() 276 | 277 | # Merge with empty trials dataset: 278 | if i == 0: 279 | save_trials = merge_trials(hyperopt_trial, trials.trials) 280 | else: 281 | save_trials = merge_trials(hyperopt_trial, trials.trials[-n:]) 282 | 283 | new_fname = TRIALS_FOLDER + "/" + str(np.random.randint(0, sys.maxsize)) + ".pkl" 284 | pkl.dump({"trials": save_trials, "n": n}, open(new_fname, "wb")) 285 | loaded_fnames.append(new_fname) 286 | 287 | i += 1 288 | -------------------------------------------------------------------------------- /benchmarks/print_best_model.py: -------------------------------------------------------------------------------- 1 | """Print the best model parameters and loss""" 2 | 3 | import pickle as pkl 4 | from pprint import PrettyPrinter 5 | 6 | import hyperopt 7 | import numpy as np 8 | from hyperopt import Trials, fmin, hp, tpe 9 | from space import space 10 | 11 | # Change the following code to your file 12 | ################################################################################ 13 | # TODO: Declare a folder to hold all trials objects 14 | TRIALS_FOLDER = "trials2" 15 | ################################################################################ 16 | 17 | 18 | def merge_trials(trials1, trials2_slice): 19 | """Merge two hyperopt trials objects 20 | 21 | :trials1: The primary trials object 22 | :trials2_slice: A slice of the trials object to be merged, 23 | obtained with, e.g., trials2.trials[:10] 24 | :returns: The merged trials object 25 | 26 | """ 27 | max_tid = 0 28 | if len(trials1.trials) > 0: 29 | max_tid = max([trial["tid"] for trial in trials1.trials]) 30 | 31 | for trial in trials2_slice: 32 | tid = trial["tid"] + max_tid + 1 33 | hyperopt_trial = Trials().new_trial_docs( 34 | tids=[None], specs=[None], results=[None], miscs=[None] 35 | ) 36 | hyperopt_trial[0] = trial 37 | hyperopt_trial[0]["tid"] = tid 38 | hyperopt_trial[0]["misc"]["tid"] = tid 39 | for key in hyperopt_trial[0]["misc"]["idxs"].keys(): 40 | hyperopt_trial[0]["misc"]["idxs"][key] = [tid] 41 | trials1.insert_trial_docs(hyperopt_trial) 42 | trials1.refresh() 43 | return trials1 44 | 45 | 46 | np.random.seed() 47 | 48 | # Load up all runs: 49 | import glob 50 | 51 | path = TRIALS_FOLDER + "/*.pkl" 52 | files = 0 53 | for fname in glob.glob(path): 54 | trials_obj = pkl.load(open(fname, "rb")) 55 | n_trials = trials_obj["n"] 56 | trials_obj = trials_obj["trials"] 57 | if files == 0: 58 | trials = trials_obj 59 | else: 60 | trials = merge_trials(trials, trials_obj.trials[-n_trials:]) 61 | files += 1 62 | 63 | 64 | print(files, "trials merged") 65 | 66 | 67 | best_loss = np.inf 68 | best_trial = None 69 | try: 70 | trials 71 | except NameError: 72 | raise NameError("No trials loaded. Be sure to set the right folder") 73 | 74 | # for trial in trials: 75 | # if trial['result']['status'] == 'ok': 76 | # loss = trial['result']['loss'] 77 | # if loss < best_loss: 78 | # best_loss = loss 79 | # best_trial = trial 80 | 81 | # print(best_loss, best_trial['misc']['vals']) 82 | # trials = sorted(trials, key=lambda x: (x['result']['loss'] if trials['result']['status'] == 'ok' else float('inf'))) 83 | 84 | clean_trials = [] 85 | for trial in trials: 86 | clean_trials.append((trial["result"]["loss"], trial["misc"]["vals"])) 87 | 88 | clean_trials = sorted(clean_trials, key=lambda x: x[0]) 89 | 90 | pp = PrettyPrinter(indent=4) 91 | 92 | for trial in clean_trials: 93 | loss, params = trial 94 | for k, value in params.items(): 95 | value = value[0] 96 | if isinstance(value, int): 97 | possible_args = space[k].pos_args[1:] 98 | try: 99 | value = possible_args[value].obj 100 | except AttributeError: 101 | value = [arg.obj for arg in possible_args[value].pos_args] 102 | 103 | params[k] = value 104 | 105 | pp.pprint({"loss": loss, "params": params}) 106 | -------------------------------------------------------------------------------- /benchmarks/space.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from hyperopt import Trials, fmin, hp, tpe 3 | 4 | binary_operators = ["*", "/", "+", "-"] 5 | unary_operators = ["sin", "cos", "exp", "log"] 6 | 7 | space = dict( 8 | # model_selection="best", 9 | model_selection=hp.choice("model_selection", ["accuracy"]), 10 | # binary_operators=None, 11 | binary_operators=hp.choice("binary_operators", [binary_operators]), 12 | # unary_operators=None, 13 | unary_operators=hp.choice("unary_operators", [unary_operators]), 14 | # populations=100, 15 | populations=hp.qloguniform("populations", np.log(10), np.log(1000), 1), 16 | # niterations=4, 17 | niterations=hp.choice( 18 | "niterations", [10000] 19 | ), # We will quit automatically based on a clock. 20 | # ncyclesperiteration=100, 21 | ncyclesperiteration=hp.qloguniform( 22 | "ncyclesperiteration", np.log(10), np.log(5000), 1 23 | ), 24 | # alpha=0.1, 25 | alpha=hp.loguniform("alpha", np.log(0.0001), np.log(1000)), 26 | # annealing=False, 27 | annealing=hp.choice("annealing", [False, True]), 28 | # fraction_replaced=0.01, 29 | fraction_replaced=hp.loguniform("fraction_replaced", np.log(0.0001), np.log(0.5)), 30 | # fraction_replaced_hof=0.005, 31 | fraction_replaced_hof=hp.loguniform( 32 | "fraction_replaced_hof", np.log(0.0001), np.log(0.5) 33 | ), 34 | # population_size=100, 35 | population_size=hp.qloguniform("population_size", np.log(20), np.log(1000), 1), 36 | # parsimony=1e-4, 37 | parsimony=hp.loguniform("parsimony", np.log(0.0001), np.log(0.5)), 38 | # topn=10, 39 | topn=hp.qloguniform("topn", np.log(2), np.log(50), 1), 40 | # weight_add_node=1, 41 | weight_add_node=hp.loguniform("weight_add_node", np.log(0.0001), np.log(100)), 42 | # weight_insert_node=3, 43 | weight_insert_node=hp.loguniform("weight_insert_node", np.log(0.0001), np.log(100)), 44 | # weight_delete_node=3, 45 | weight_delete_node=hp.loguniform("weight_delete_node", np.log(0.0001), np.log(100)), 46 | # weight_do_nothing=1, 47 | weight_do_nothing=hp.loguniform("weight_do_nothing", np.log(0.0001), np.log(100)), 48 | # weight_mutate_constant=10, 49 | weight_mutate_constant=hp.loguniform( 50 | "weight_mutate_constant", np.log(0.0001), np.log(100) 51 | ), 52 | # weight_mutate_operator=1, 53 | weight_mutate_operator=hp.loguniform( 54 | "weight_mutate_operator", np.log(0.0001), np.log(100) 55 | ), 56 | # weight_swap_operands=1, 57 | weight_swap_operands=hp.loguniform( 58 | "weight_swap_operands", np.log(0.0001), np.log(100) 59 | ), 60 | # weight_randomize=1, 61 | weight_randomize=hp.loguniform("weight_randomize", np.log(0.0001), np.log(100)), 62 | # weight_simplify=0.002, 63 | weight_simplify=hp.choice("weight_simplify", [0.002]), # One of these is fixed. 64 | # crossover_probability=0.01, 65 | crossover_probability=hp.loguniform( 66 | "crossover_probability", np.log(0.00001), np.log(0.2) 67 | ), 68 | # perturbation_factor=1.0, 69 | perturbation_factor=hp.loguniform( 70 | "perturbation_factor", np.log(0.0001), np.log(100) 71 | ), 72 | # maxsize=20, 73 | maxsize=hp.choice("maxsize", [30]), 74 | # warmup_maxsize_by=0.0, 75 | warmup_maxsize_by=hp.uniform("warmup_maxsize_by", 0.0, 0.5), 76 | # use_frequency=True, 77 | use_frequency=hp.choice("use_frequency", [True, False]), 78 | # optimizer_nrestarts=3, 79 | optimizer_nrestarts=hp.quniform("optimizer_nrestarts", 1, 10, 1), 80 | # optimize_probability=1.0, 81 | optimize_probability=hp.uniform("optimize_probability", 0.0, 1.0), 82 | # optimizer_iterations=10, 83 | optimizer_iterations=hp.quniform("optimizer_iterations", 1, 10, 1), 84 | # tournament_selection_p=1.0, 85 | tournament_selection_p=hp.uniform("tournament_selection_p", 0.0, 1.0), 86 | ) 87 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | api.md 3 | index.md.bak 4 | papers.md 5 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # PySR Documentation 2 | 3 | ## Building locally 4 | 5 | 1. In the base directory, run `pip install -r docs/requirements.txt`. 6 | 2. Install PySR in editable mode: `pip install -e .`. 7 | 3. Build doc source with `cd docs && ./gen_docs.sh && cd ..`. 8 | 4. Create and serve docs with mkdocs: `mkdocs serve -w pysr`. 9 | -------------------------------------------------------------------------------- /docs/_api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | `PySRRegressor` has many options for controlling a symbolic regression search. 4 | Let's look at them below. 5 | 6 | PARAMSKEY 7 | 8 | ## PySRRegressor Functions 9 | 10 | ::: pysr.PySRRegressor.fit 11 | options: 12 | show_root_heading: true 13 | heading_level: 3 14 | show_root_full_path: false 15 | 16 | ::: pysr.PySRRegressor.predict 17 | options: 18 | show_root_heading: true 19 | heading_level: 3 20 | show_root_full_path: false 21 | 22 | ::: pysr.PySRRegressor.from_file 23 | options: 24 | show_root_heading: true 25 | heading_level: 3 26 | show_root_full_path: false 27 | 28 | ::: pysr.PySRRegressor.sympy 29 | options: 30 | show_root_heading: true 31 | heading_level: 3 32 | show_root_full_path: false 33 | 34 | ::: pysr.PySRRegressor.latex 35 | options: 36 | show_root_heading: true 37 | heading_level: 3 38 | show_root_full_path: false 39 | 40 | ::: pysr.PySRRegressor.pytorch 41 | options: 42 | show_root_heading: true 43 | heading_level: 3 44 | show_root_full_path: false 45 | 46 | ::: pysr.PySRRegressor.jax 47 | options: 48 | show_root_heading: true 49 | heading_level: 3 50 | show_root_full_path: false 51 | 52 | ::: pysr.PySRRegressor.latex_table 53 | options: 54 | show_root_heading: true 55 | heading_level: 3 56 | show_root_full_path: false 57 | 58 | ::: pysr.PySRRegressor.refresh 59 | options: 60 | show_root_heading: true 61 | heading_level: 3 62 | show_root_full_path: false 63 | 64 | ## Expression Specifications 65 | 66 | ::: pysr.ExpressionSpec 67 | options: 68 | show_root_heading: true 69 | heading_level: 3 70 | show_root_full_path: false 71 | 72 | ::: pysr.TemplateExpressionSpec 73 | options: 74 | show_root_heading: true 75 | heading_level: 3 76 | show_root_full_path: false 77 | 78 | ::: pysr.ParametricExpressionSpec 79 | options: 80 | show_root_heading: true 81 | heading_level: 3 82 | show_root_full_path: false 83 | 84 | ::: pysr.AbstractExpressionSpec 85 | options: 86 | show_root_heading: true 87 | heading_level: 3 88 | show_root_full_path: false 89 | 90 | ## Logger Specifications 91 | 92 | ::: pysr.TensorBoardLoggerSpec 93 | options: 94 | show_root_heading: true 95 | heading_level: 3 96 | show_root_full_path: false 97 | 98 | ::: pysr.AbstractLoggerSpec 99 | options: 100 | show_root_heading: true 101 | heading_level: 3 102 | show_root_full_path: false 103 | -------------------------------------------------------------------------------- /docs/all_contributors/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /docs/all_contributors/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PySR", 3 | "version": "1.0.0", 4 | "main": "index.js", 5 | "repository": "git@github.com:MilesCranmer/PySR.git", 6 | "author": "MilesCranmer ", 7 | "license": "Apache-2.0", 8 | "devDependencies": { 9 | "all-contributors-cli": "6.25.1" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /docs/all_contributors/run_all_contrib.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | yarn install --frozen-lockfile 4 | yarn all-contributors $@ 5 | -------------------------------------------------------------------------------- /docs/api-advanced.md: -------------------------------------------------------------------------------- 1 | # Internal Reference 2 | 3 | ## Julia Interface 4 | 5 | ::: pysr.julia_helpers 6 | options: 7 | members: 8 | - init_julia 9 | - install 10 | heading_level: 3 11 | 12 | ## Exporting to LaTeX 13 | 14 | ::: pysr.export_latex 15 | options: 16 | members: 17 | - to_latex 18 | - generate_single_table 19 | - generate_multiple_tables 20 | - generate_table_environment 21 | heading_level: 3 22 | 23 | ## Exporting to JAX 24 | 25 | ::: pysr.export_jax 26 | options: 27 | members: 28 | - sympy2jax 29 | - sympy2jaxtext 30 | heading_level: 3 31 | 32 | ## Exporting to PyTorch 33 | 34 | ::: pysr.export_torch 35 | options: 36 | members: 37 | - sympy2torch 38 | heading_level: 3 39 | -------------------------------------------------------------------------------- /docs/assets/87712EA9B4B3CB1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/assets/87712EA9B4B3CB1B.png -------------------------------------------------------------------------------- /docs/assets/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/assets/favicon.png -------------------------------------------------------------------------------- /docs/assets/pysr_logo.svg: -------------------------------------------------------------------------------- 1 | PySR 2 | -------------------------------------------------------------------------------- /docs/assets/pysr_logo_reduced.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 23 | 24 | 25 | 26 | 27 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | P 39 | ySR 40 | 41 | 42 | -------------------------------------------------------------------------------- /docs/backend.md: -------------------------------------------------------------------------------- 1 | # Customization 2 | 3 | If you have explored the [options](options.md) and [PySRRegressor reference](api.md), and still haven't figured out how to specify a constraint or objective required for your problem, you might consider editing the backend. 4 | The backend of PySR is written as a pure Julia package under the name [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl). 5 | This package is accessed with [`juliacall`](https://github.com/JuliaPy/PythonCall.jl), which allows us to transfer objects back and forth between the Python and Julia runtimes. 6 | 7 | PySR gives you access to everything in SymbolicRegression.jl, but there are some specific use-cases which require modifications to the backend itself. 8 | Generally you can do this as follows: 9 | 10 | ## 1. Check out the source code 11 | 12 | Clone a copy of the backend as well as PySR: 13 | 14 | ```bash 15 | git clone https://github.com/MilesCranmer/SymbolicRegression.jl 16 | git clone https://github.com/MilesCranmer/PySR 17 | ``` 18 | 19 | You may wish to check out the specific versions, which you can do with: 20 | 21 | ```bash 22 | cd PySR 23 | git checkout 24 | 25 | # You can see the current backend version in `pysr/juliapkg.json` 26 | cd ../SymbolicRegression.jl 27 | git checkout 28 | ``` 29 | 30 | ## 2. Edit the source to your requirements 31 | 32 | The main search code can be found in `src/SymbolicRegression.jl`. 33 | 34 | Here are some tips: 35 | 36 | - The documentation for the backend is given [here](https://ai.damtp.cam.ac.uk/symbolicregression/dev/). 37 | - Throughout the package, you will often see template functions which typically use a symbol `T` (such as in the string `where {T<:Real}`). Here, `T` is simply the datatype of the input data and stored constants, such as `Float32` or `Float64`. Writing functions in this way lets us write functions generic to types, while still having access to the specific type specified at compilation time. 38 | - Expressions are stored as binary trees, using the `Node{T}` type, described [here](https://ai.damtp.cam.ac.uk/symbolicregression/dev/types/#SymbolicRegression.CoreModule.EquationModule.Node). 39 | - For reference, the main loop itself is found in the `equation_search` function inside [`src/SymbolicRegression.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/SymbolicRegression.jl). 40 | - Parts of the code which are typically edited by users include: 41 | - [`src/CheckConstraints.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/CheckConstraints.jl), particularly the function `check_constraints`. This function checks whether a given expression satisfies constraints, such as having a complexity lower than `maxsize`, and whether it contains any forbidden nestings of functions. 42 | - Note that all expressions, *even intermediate expressions*, must comply with constraints. Therefore, make sure that evolution can still reach your desired expression (with one mutation at a time), before setting a hard constraint. In other cases you might want to instead put in the loss function. 43 | - [`src/Options.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/Options.jl), as well as the struct definition in [`src/OptionsStruct.jl`](https://github.com/MilesCranmer/SymbolicRegression.jl/blob/master/src/OptionsStruct.jl). This file specifies all the options used in the search: an instance of `Options` is typically available throughout every function in `SymbolicRegression.jl`. If you add new functionality to the backend, and wish to make it parameterizable (including from PySR), you should specify it in the options. 44 | 45 | ## 3. Let PySR use the modified backend 46 | 47 | Once you have made your changes, you should edit the `pysr/juliapkg.json` file 48 | in the PySR repository to point to this local copy. 49 | Do this by removing the `"version"` key and adding a `"dev"` and `"path"` key: 50 | 51 | ```json 52 | ... 53 | "packages": { 54 | "SymbolicRegression": { 55 | "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb", 56 | "dev": true, 57 | "path": "/path/to/SymbolicRegression.jl" 58 | }, 59 | ... 60 | ``` 61 | 62 | You can then install PySR with this modified backend by running: 63 | 64 | ```bash 65 | cd PySR 66 | pip install . 67 | ``` 68 | 69 | For more information on `juliapkg.json`, see [`pyjuliapkg`](https://github.com/JuliaPy/pyjuliapkg). 70 | 71 | ## Additional notes 72 | 73 | If you get comfortable enough with the backend, you might consider using the Julia package directly: the API is given on the [SymbolicRegression.jl documentation](https://ai.damtp.cam.ac.uk/symbolicregression/dev/). 74 | 75 | If you make a change that you think could be useful to other users, don't hesitate to open a pull request on either the PySR or SymbolicRegression.jl repositories! Contributions are very appreciated. 76 | -------------------------------------------------------------------------------- /docs/gen_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Generate home page using README.md: 4 | echo '
' >index.md 5 | cat ../README.md | grep -v 'user-images' | grep -E -v '\<.*div.*\>' >>index.md 6 | 7 | # Transform "### Test status" to "**Test status**": 8 | sed -i.bak 's/\#\#\# Test status/**Test status**/g' index.md 9 | # Change '# ' to '## ': 10 | sed -i.bak '10,$s/^\# /## /g' index.md 11 | 12 | # Create papers.md 13 | python generate_papers.py 14 | 15 | # Copy _api.md up to but not including PARAMSKEY and all tex after: 16 | cat _api.md | sed -n '1,/PARAMSKEY/p' | sed '$d' >api.md 17 | python gen_param_docs.py >> api.md 18 | # Copy _api.md after PARAMSKEY: 19 | cat _api.md | sed -n '/PARAMSKEY/,$p' | sed '1d' >>api.md 20 | -------------------------------------------------------------------------------- /docs/gen_param_docs.py: -------------------------------------------------------------------------------- 1 | # Load YAML file param_groupings.yml: 2 | import re 3 | import sys 4 | 5 | from docstring_parser import parse 6 | from yaml import safe_load 7 | 8 | sys.path.append("..") 9 | 10 | 11 | from pysr import PySRRegressor 12 | 13 | found_params = [] 14 | 15 | 16 | def str_param_groups(param_groupings, params, cur_heading=2): 17 | global found_params 18 | # Recursively print the parameter descriptions, defaults, 19 | # with headings from the param groupings dict. 20 | if isinstance(param_groupings, list): 21 | return "\n\n".join( 22 | str_param_groups(param, params, cur_heading) for param in param_groupings 23 | ) 24 | elif isinstance(param_groupings, dict): 25 | for heading, param_grouping in param_groupings.items(): 26 | return ( 27 | f"{'#' * cur_heading} {heading}" 28 | + "\n\n" 29 | + str_param_groups(param_grouping, params, cur_heading + 1) 30 | ) 31 | elif isinstance(param_groupings, str): 32 | found_params.append(param_groupings) 33 | 34 | default_value = re.search( 35 | r"Default is `(.*)`", params[param_groupings].description 36 | ) 37 | clean_desc = re.sub(r"Default is .*", "", params[param_groupings].description) 38 | # Prepend every line with 4 spaces: 39 | clean_desc = "\n".join(" " + line for line in clean_desc.splitlines()) 40 | return ( 41 | f" - **`{param_groupings}`**" 42 | + "\n\n" 43 | + clean_desc 44 | + ( 45 | "\n\n " + f"*Default:* `{default_value.group(1)}`" 46 | if default_value 47 | else "" 48 | ) 49 | ) 50 | else: 51 | raise TypeError(f"Unexpected type {type(param_groupings)}") 52 | 53 | 54 | if __name__ == "__main__": 55 | # This is the path to the param_groupings.yml file 56 | # relative to the current file. 57 | path = "../pysr/param_groupings.yml" 58 | with open(path, "r") as f: 59 | param_groupings = safe_load(f) 60 | 61 | # This is basically a dict of lists and dicts. 62 | 63 | # Let's load in the parameter descriptions from the docstring of PySRRegressor: 64 | raw_params = parse(PySRRegressor.__doc__).params 65 | params = { 66 | param.arg_name: param 67 | for param in raw_params 68 | if param.arg_name[-1] != "_" and param.arg_name != "**kwargs" 69 | } 70 | 71 | output = str_param_groups(param_groupings, params, cur_heading=3) 72 | assert len(set(found_params) ^ set(params.keys())) == 0 73 | print("## PySRRegressor Parameters") 74 | print(output) 75 | -------------------------------------------------------------------------------- /docs/generate_papers.py: -------------------------------------------------------------------------------- 1 | """This script generates the papers.md file from the papers.yml file.""" 2 | 3 | from pathlib import Path 4 | 5 | import yaml 6 | 7 | data_file = "papers.yml" 8 | papers_header = Path("stylesheets") / "papers_header.txt" 9 | output_file = "papers.md" 10 | 11 | # Load YAML file: 12 | with open(data_file, "r") as stream: 13 | papers = yaml.load(stream, Loader=yaml.SafeLoader)["papers"] 14 | 15 | # Load header: 16 | with open(papers_header, "r") as stream: 17 | header = stream.read() 18 | 19 | with open(output_file, "w") as f: 20 | f.write(header) 21 | 22 | # First, we sort the papers by date. 23 | # This is in the format of "2022-03-15" 24 | papers = sorted(papers, key=lambda paper: paper["date"], reverse=True) 25 | 26 | snippets = [] 27 | for paper in papers: 28 | title = paper["title"] 29 | authors = ( 30 | ", ".join(paper["authors"]).replace("(", "").replace(")", "") 31 | ) 32 | affiliations = ", ".join( 33 | f"{num}{affil}" for num, affil in paper["affiliations"].items() 34 | ) 35 | link = paper["link"] 36 | abstract = paper["abstract"] 37 | image_file = paper["image"] 38 | 39 | if image_file.startswith("http"): 40 | absolute_image_file = image_file 41 | else: 42 | absolute_image_file = f"images/{image_file}" 43 | 44 | # Begin: 45 | paper_snippet = f""" 46 | 47 |
48 | ![]({absolute_image_file}){{ width="500"}} 49 |
50 | 51 |

52 | {title} 53 |

54 |
55 |
56 | 57 |
58 | {authors} 59 | 60 | {affiliations} 61 |
62 | 63 | **Abstract:** {abstract}\n\n 64 | """ 65 | snippets.append(paper_snippet) 66 | 67 | f.write("\n\n---\n\n".join(snippets)) 68 | -------------------------------------------------------------------------------- /docs/images/Planar_relation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/Planar_relation.png -------------------------------------------------------------------------------- /docs/images/SyReg_GasConc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/SyReg_GasConc.png -------------------------------------------------------------------------------- /docs/images/Y_Mgal_Simba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/Y_Mgal_Simba.png -------------------------------------------------------------------------------- /docs/images/back_to_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/back_to_formula.png -------------------------------------------------------------------------------- /docs/images/cloud_cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/cloud_cover.jpg -------------------------------------------------------------------------------- /docs/images/economic_theory_gravity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/economic_theory_gravity.png -------------------------------------------------------------------------------- /docs/images/electronnegativity_introduction.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/electronnegativity_introduction.jpg -------------------------------------------------------------------------------- /docs/images/example_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/example_plot.png -------------------------------------------------------------------------------- /docs/images/hi_mass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/hi_mass.png -------------------------------------------------------------------------------- /docs/images/hod_importances.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/hod_importances.png -------------------------------------------------------------------------------- /docs/images/hyperbolic_volume.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/hyperbolic_volume.png -------------------------------------------------------------------------------- /docs/images/illustris_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/illustris_example.png -------------------------------------------------------------------------------- /docs/images/jet_background_diagram.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/jet_background_diagram.jpg -------------------------------------------------------------------------------- /docs/images/kidger_thesis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/kidger_thesis.png -------------------------------------------------------------------------------- /docs/images/rediscovering_gravity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/docs/images/rediscovering_gravity.png -------------------------------------------------------------------------------- /docs/js/mathjax.js: -------------------------------------------------------------------------------- 1 | window.MathJax = { 2 | tex: { 3 | inlineMath: [["\\(", "\\)"]], 4 | displayMath: [["\\[", "\\]"]], 5 | processEscapes: true, 6 | processEnvironments: true 7 | }, 8 | options: { 9 | ignoreHtmlClass: ".*|", 10 | processHtmlClass: "arithmatex" 11 | } 12 | }; 13 | 14 | document$.subscribe(() => { 15 | MathJax.typesetPromise() 16 | }) 17 | -------------------------------------------------------------------------------- /docs/operators.md: -------------------------------------------------------------------------------- 1 | # Operators 2 | 3 | ## Pre-defined 4 | 5 | First, note that pretty much any valid Julia function which 6 | takes one or two scalars as input, and returns on scalar as output, 7 | is likely to be a valid operator[^1]. 8 | A selection of these and other valid operators are stated below. 9 | 10 | Also, note that it's a good idea to not use too many operators, since 11 | it can exponentially increase the search space. 12 | 13 | **Binary Operators** 14 | 15 | | Arithmetic | Comparison | Logic | 16 | |--------------|------------|----------| 17 | | `+` | `max` | `logical_or`[^2] | 18 | | `-` | `min` | `logical_and`[^3]| 19 | | `*` | `>`[^4] | | 20 | | `/` | `>=` | | 21 | | `^` | `<` | | 22 | | | `<=` | | 23 | | | `cond`[^5] | | 24 | | | `mod` | | 25 | 26 | **Unary Operators** 27 | 28 | | Basic | Exp/Log | Trig | Hyperbolic | Special | Rounding | 29 | |------------|------------|-----------|------------|-----------|------------| 30 | | `neg` | `exp` | `sin` | `sinh` | `erf` | `round` | 31 | | `square` | `log` | `cos` | `cosh` | `erfc` | `floor` | 32 | | `cube` | `log10` | `tan` | `tanh` | `gamma` | `ceil` | 33 | | `cbrt` | `log2` | `asin` | `asinh` | `relu` | | 34 | | `sqrt` | `log1p` | `acos` | `acosh` | `sinc` | | 35 | | `abs` | | `atan` | `atanh` | | | 36 | | `sign` | | | | | | 37 | | `inv` | | | | | | 38 | 39 | 40 | ## Custom 41 | 42 | Instead of passing a predefined operator as a string, 43 | you can just define a custom function as Julia code. For example: 44 | 45 | ```python 46 | PySRRegressor( 47 | ..., 48 | unary_operators=["myfunction(x) = x^2"], 49 | binary_operators=["myotherfunction(x, y) = x^2*y"], 50 | extra_sympy_mappings={ 51 | "myfunction": lambda x: x**2, 52 | "myotherfunction": lambda x, y: x**2 * y, 53 | }, 54 | ) 55 | ``` 56 | 57 | 58 | Make sure that it works with 59 | `Float32` as a datatype (for default precision, or `Float64` if you set `precision=64`). That means you need to write `1.5f3` 60 | instead of `1.5e3`, if you write any constant numbers, or simply convert a result to `Float64(...)`. 61 | 62 | PySR expects that operators not throw an error for any input value over the entire real line from `-3.4e38` to `+3.4e38`. 63 | Thus, for invalid inputs, such as negative numbers to a `sqrt` function, you may simply return a `NaN` of the same type as the input. For example, 64 | 65 | ```julia 66 | my_sqrt(x) = x >= 0 ? sqrt(x) : convert(typeof(x), NaN) 67 | ``` 68 | 69 | would be a valid operator. The genetic algorithm 70 | will preferentially selection expressions which avoid 71 | any invalid values over the training dataset. 72 | 73 | 74 | 75 | 76 | 77 | [^1]: However, you will need to define a sympy equivalent in `extra_sympy_mapping` if you want to use a function not in the above list. 78 | [^2]: `logical_or` is equivalent to `(x, y) -> (x > 0 || y > 0) ? 1 : 0` 79 | [^3]: `logical_and` is equivalent to `(x, y) -> (x > 0 && y > 0) ? 1 : 0` 80 | [^4]: `>` is equivalent to `(x, y) -> x > y ? 1 : 0` 81 | [^5]: `cond` is equivalent to `(x, y) -> x > 0 ? y : 0` 82 | -------------------------------------------------------------------------------- /docs/options.md: -------------------------------------------------------------------------------- 1 | # Features and Options 2 | 3 | Some configurable features and options in `PySR` which you 4 | may find useful include: 5 | 6 | - [Selecting from the accuracy-complexity curve](#model-selection) 7 | - [Operators](#operators) 8 | - [Number of outer search iterations](#iterations) 9 | - [Number of inner search iterations](#cycles-per-iteration) 10 | - [Multi-processing](#processors) 11 | - [Populations](#populations) 12 | - [Data weighting](#weighted-data) 13 | - [Max complexity and depth](#max-size) 14 | - [Mini-batching](#batching) 15 | - [Variable names](#variable-names) 16 | - [Constraining use of operators](#constraining-use-of-operators) 17 | - [Custom complexities](#custom-complexity) 18 | - [LaTeX and SymPy](#latex-and-sympy) 19 | - [Exporting to numpy, pytorch, and jax](#exporting-to-numpy-pytorch-and-jax) 20 | - [Loss functions](#loss) 21 | - [Model loading](#model-loading) 22 | 23 | These are described below. 24 | Also check out the [tuning page](tuning.md) for workflow tips. 25 | 26 | The program will output a pandas DataFrame containing the equations 27 | to `PySRRegressor.equations` containing the loss value 28 | and complexity. 29 | 30 | It will also dump to a csv 31 | at the end of every iteration, 32 | which is `.hall_of_fame_{date_time}.csv` by default. 33 | It also prints the equations to stdout. 34 | 35 | ## Model selection 36 | 37 | By default, `PySRRegressor` uses `model_selection='best'` 38 | which selects an equation from `PySRRegressor.equations_` using 39 | a combination of accuracy and complexity. 40 | You can also select `model_selection='accuracy'`. 41 | 42 | By printing a model (i.e., `print(model)`), you can see 43 | the equation selection with the arrow shown in the `pick` column. 44 | 45 | ## Operators 46 | 47 | A list of operators can be found on the [operators page](operators.md). 48 | One can define custom operators in Julia by passing a string: 49 | 50 | ```python 51 | PySRRegressor(niterations=100, 52 | binary_operators=["mult", "plus", "special(x, y) = x^2 + y"], 53 | extra_sympy_mappings={'special': lambda x, y: x**2 + y}, 54 | unary_operators=["cos"]) 55 | ``` 56 | 57 | Now, the symbolic regression code can search using this `special` function 58 | that squares its left argument and adds it to its right. Make sure 59 | all passed functions are valid Julia code, and take one (unary) 60 | or two (binary) float32 scalars as input, and output a float32. This means if you 61 | write any real constants in your operator, like `2.5`, you have to write them 62 | instead as `2.5f0`, which defines it as `Float32`. 63 | Operators are automatically vectorized. 64 | 65 | One should also define `extra_sympy_mappings`, 66 | so that the SymPy code can understand the output equation from Julia, 67 | when constructing a useable function. This step is optional, but 68 | is necessary for the `lambda_format` to work. 69 | 70 | ## Iterations 71 | 72 | This is the total number of generations that `pysr` will run for. 73 | I usually set this to a large number, and exit when I am satisfied 74 | with the equations. 75 | 76 | ## Cycles per iteration 77 | 78 | Each cycle considers every 10-equation subsample (re-sampled for each individual 10, 79 | unless `fast_cycle` is set in which case the subsamples are separate groups of equations) 80 | a single time, producing one mutated equation for each. 81 | The parameter `ncycles_per_iteration` defines how many times this 82 | occurs before the equations are compared to the hall of fame, 83 | and new equations are migrated from the hall of fame, or from other populations. 84 | It also controls how slowly annealing occurs. You may find that increasing 85 | `ncycles_per_iteration` results in a higher cycles-per-second, as the head 86 | worker needs to reduce and distribute new equations less often, and also increases 87 | diversity. But at the same 88 | time, a smaller number it might be that migrating equations from the hall of fame helps 89 | each population stay closer to the best current equations. 90 | 91 | ## Processors 92 | 93 | One can adjust the number of workers used by Julia with the 94 | `procs` option. You should set this equal to the number of cores 95 | you want `pysr` to use. 96 | 97 | ## Populations 98 | 99 | By default, `populations=15`, but you can set a different 100 | number of populations with this option. 101 | More populations may increase 102 | the diversity of equations discovered, though will take longer to train. 103 | However, it is usually more efficient to have `populations>procs`, 104 | as there are multiple populations running 105 | on each core. 106 | 107 | ## Weighted data 108 | 109 | Here, we assign weights to each row of data 110 | using inverse uncertainty squared. We also use 10 processes for the search 111 | instead of the default. 112 | 113 | ```python 114 | sigma = ... 115 | weights = 1/sigma**2 116 | 117 | model = PySRRegressor(procs=10) 118 | model.fit(X, y, weights=weights) 119 | ``` 120 | 121 | ## Max size 122 | 123 | `maxsize` controls the maximum size of equation (number of operators, 124 | constants, variables). `maxdepth` is by default not used, but can be set 125 | to control the maximum depth of an equation. These will make processing 126 | faster, as longer equations take longer to test. 127 | 128 | One can warm up the maxsize from a small number to encourage 129 | PySR to start simple, by using the `warmupMaxsize` argument. 130 | This specifies that maxsize increases every `warmupMaxsize`. 131 | 132 | ## Batching 133 | 134 | One can turn on mini-batching, with the `batching` flag, 135 | and control the batch size with `batch_size`. This will make 136 | evolution faster for large datasets. Equations are still evaluated 137 | on the entire dataset at the end of each iteration to compare to the hall 138 | of fame, but only on a random subset during mutations and annealing. 139 | 140 | ## Variable Names 141 | 142 | You can pass a list of strings naming each column of `X` with 143 | `variable_names`. Alternatively, you can pass `X` as a pandas dataframe 144 | and the columns will be used as variable names. Make sure only 145 | alphabetical characters and `_` are used in these names. 146 | 147 | ## Constraining use of operators 148 | 149 | One can limit the complexity of specific operators with the `constraints` parameter. 150 | There is a "maxsize" parameter to PySR, but there is also an operator-level 151 | "constraints" parameter. One supplies a dict, like so: 152 | 153 | ```python 154 | constraints={'pow': (-1, 1), 'mult': (3, 3), 'cos': 5} 155 | ``` 156 | 157 | What this says is that: a power law $x^y$ can have an expression of arbitrary (-1) complexity in the x, but only complexity 1 (e.g., a constant or variable) in the y. So $(x_0 + 3)^{5.5}$ is allowed, but $5.5^{x_0 + 3}$ is not. 158 | I find this helps a lot for getting more interpretable equations. 159 | The other terms say that each multiplication can only have sub-expressions 160 | of up to complexity 3 (e.g., $5.0 + x_2$) in each side, and cosine can only operate on 161 | expressions of complexity 5 (e.g., $5.0 + x_2 exp(x_3)$). 162 | 163 | ## Custom complexity 164 | 165 | By default, all operators, constants, and instances of variables 166 | have a complexity of 1. The sum of the complexities of all terms 167 | is the total complexity of an expression. 168 | You may change this by configuring the options: 169 | 170 | - `complexity_of_operators` - pass a dictionary of `: ` pairs 171 | to change the complexity of each operator. If an operator is not 172 | specified, it will have the default complexity of 1. 173 | - `complexity_of_constants` - supplying an integer will make all constants 174 | have that complexity. 175 | - `complexity_of_variables` - supplying an integer will make all variables 176 | have that complexity. 177 | 178 | ## LaTeX and SymPy 179 | 180 | After running `model.fit(...)`, you can look at 181 | `model.equations` which is a pandas dataframe. 182 | The `sympy_format` column gives sympy equations, 183 | and the `lambda_format` gives callable functions. 184 | You can optionally pass a pandas dataframe to the callable function, 185 | if you called `.fit` on a pandas dataframe as well. 186 | 187 | There are also some helper functions for doing this quickly. 188 | 189 | - `model.latex()` will generate a TeX formatted output of your equation. 190 | - `model.latex_table(indices=[2, 5, 8])` will generate a formatted LaTeX table including all the specified equations. 191 | - `model.sympy()` will return the SymPy representation. 192 | - `model.jax()` will return a callable JAX function combined with parameters (see below) 193 | - `model.pytorch()` will return a PyTorch model (see below). 194 | 195 | ## Exporting to numpy, pytorch, and jax 196 | 197 | By default, the dataframe of equations will contain columns 198 | with the identifier `lambda_format`. 199 | These are simple functions which correspond to the equation, but executed 200 | with numpy functions. 201 | You can pass your `X` matrix to these functions 202 | just as you did to the `model.fit` call. Thus, this allows 203 | you to numerically evaluate the equations over different output. 204 | 205 | Calling `model.predict` will execute the `lambda_format` of 206 | the best equation, and return the result. If you selected 207 | `model_selection="best"`, this will use an equation that combines 208 | accuracy with simplicity. For `model_selection="accuracy"`, this will just 209 | look at accuracy. 210 | 211 | One can do the same thing for PyTorch, which uses code 212 | from [sympytorch](https://github.com/patrick-kidger/sympytorch), 213 | and for JAX, which uses code from 214 | [sympy2jax](https://github.com/MilesCranmer/sympy2jax). 215 | 216 | Calling `model.pytorch()` will return 217 | a PyTorch module which runs the equation, using PyTorch functions, 218 | over `X` (as a PyTorch tensor). This is differentiable, and the 219 | parameters of this PyTorch module correspond to the learned parameters 220 | in the equation, and are trainable. 221 | 222 | ```python 223 | torch_model = model.pytorch() 224 | torch_model(X) 225 | ``` 226 | 227 | **Warning: If you are using custom operators, you must define `extra_torch_mappings` or `extra_jax_mappings` (both are `dict` of callables) to provide an equivalent definition of the functions.** (At any time you can set these parameters or any others with `model.set_params`.) 228 | 229 | For JAX, you can equivalently call `model.jax()` 230 | This will return a dictionary containing a `'callable'` (a JAX function), 231 | and `'parameters'` (a list of parameters in the equation). 232 | You can execute this function with: 233 | 234 | ```python 235 | jax_model = model.jax() 236 | jax_model['callable'](X, jax_model['parameters']) 237 | ``` 238 | 239 | Since the parameter list is a jax array, this therefore lets you also 240 | train the parameters within JAX (and is differentiable). 241 | 242 | ## `loss` 243 | 244 | The default loss is mean-square error, and weighted mean-square error. 245 | One can pass an arbitrary Julia string to define a custom loss, using, 246 | e.g., `elementwise_loss="myloss(x, y) = abs(x - y)^1.5"`. For more details, 247 | see the 248 | [Losses](https://milescranmer.github.io/SymbolicRegression.jl/dev/losses/) 249 | page for SymbolicRegression.jl. 250 | 251 | Here are some additional examples: 252 | 253 | abs(x-y) loss 254 | 255 | ```python 256 | PySRRegressor(..., elementwise_loss="f(x, y) = abs(x - y)^1.5") 257 | ``` 258 | 259 | Note that the function name doesn't matter: 260 | 261 | ```python 262 | PySRRegressor(..., elementwise_loss="loss(x, y) = abs(x * y)") 263 | ``` 264 | 265 | With weights: 266 | 267 | ```python 268 | model = PySRRegressor(..., elementwise_loss="myloss(x, y, w) = w * abs(x - y)") 269 | model.fit(..., weights=weights) 270 | ``` 271 | 272 | Weights can be used in arbitrary ways: 273 | 274 | ```python 275 | model = PySRRegressor(..., weights=weights, elementwise_loss="myloss(x, y, w) = abs(x - y)^2/w^2") 276 | model.fit(..., weights=weights) 277 | ``` 278 | 279 | Built-in loss (faster) (see [losses](https://ai.damtp.cam.ac.uk/symbolicregression/dev/losses/)). 280 | This one computes the L3 norm: 281 | 282 | ```python 283 | PySRRegressor(..., elementwise_loss="LPDistLoss{3}()") 284 | ``` 285 | 286 | Can also uses these losses for weighted (weighted-average): 287 | 288 | ```python 289 | model = PySRRegressor(..., weights=weights, elementwise_loss="LPDistLoss{3}()") 290 | model.fit(..., weights=weights) 291 | ``` 292 | 293 | ## Model loading 294 | 295 | PySR will automatically save a pickle file of the model state 296 | when you call `model.fit`, once before the search starts, 297 | and again after the search finishes. The filename will 298 | have the same base name as the input file, but with a `.pkl` extension. 299 | You can load the saved model state with: 300 | 301 | ```python 302 | model = PySRRegressor.from_file(pickle_filename) 303 | ``` 304 | 305 | If you have a long-running job and would like to load the model 306 | before completion, you can also do this. In this case, the model 307 | loading will use the `csv` file to load the equations, since the 308 | `csv` file is continually updated during the search. Once 309 | the search completes, the model including its equations will 310 | be saved to the pickle file, overwriting the existing version. 311 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs-material 2 | mkdocs-autorefs 3 | mkdocstrings[python] 4 | docstring_parser 5 | -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | [data-md-color-scheme="pysr"] { 2 | --md-primary-fg-color: #C13245; 3 | --md-primary-fg-color--light: #D35364; 4 | --md-primary-fg-color--dark: #982736; 5 | } 6 | -------------------------------------------------------------------------------- /docs/stylesheets/papers_header.txt: -------------------------------------------------------------------------------- 1 | # Research 2 | Below is a showcase of papers which have used PySR to discover 3 | or rediscover a symbolic model. 4 | These are sorted by the date of release, with most recent papers at the top. 5 | 6 | 7 | If you have used PySR in your research, 8 | please submit a pull request to add your paper to [this file](https://github.com/MilesCranmer/PySR/blob/master/docs/papers.yml). 9 | -------------------------------------------------------------------------------- /docs/tuning.md: -------------------------------------------------------------------------------- 1 | # Tuning and Workflow Tips 2 | 3 | I give a short guide below on how I like to tune PySR for my applications. 4 | 5 | First, my general tips would be to avoid using redundant operators, like how `pow` can do the same things as `square`, or how `-` (binary) and `neg` (unary) are equivalent. The fewer operators the better! Only use operators you need. 6 | 7 | When running PySR, I usually do the following: 8 | 9 | I run from IPython (Jupyter Notebooks don't work as well[^1]) on the head node of a slurm cluster. Passing `cluster_manager="slurm"` will make PySR set up a run over the entire allocation. I set `procs` equal to the total number of cores over my entire allocation. 10 | 11 | I use the [tensorboard feature](https://ai.damtp.cam.ac.uk/pysr/examples/#12-using-tensorboard-for-logging) for experiment tracking. 12 | 13 | [^1]: Jupyter Notebooks are supported by PySR, but miss out on some useful features available in IPython and Python: the progress bar, and early stopping with "q". In Jupyter you cannot interrupt a search once it has started; you have to restart the kernel. See [this issue](https://github.com/MilesCranmer/PySR/issues/260) for updates. 14 | 15 | 1. I start by using the default parameters. 16 | 2. I use only the operators I think it needs and no more. 17 | 3. Increase `populations` to `3*num_cores`. 18 | 4. If my dataset is more than 1000 points, I either subsample it (low-dimensional and not much noise) or set `batching=True` (high-dimensional or very noisy, so it needs to evaluate on all the data). 19 | 5. While on a laptop or single node machine, you might leave the default `ncycles_per_iteration`, on a cluster with ~100 cores I like to set `ncycles_per_iteration` to maybe `5000` or so, until the head node occupation is under `10%`. (A larger value means the workers talk less frequently to eachother, which is useful when you have many workers!) 20 | 6. Set `constraints` and `nested_constraints` as strict as possible. These can help quite a bit with exploration. Typically, if I am using `pow`, I would set `constraints={"pow": (9, 1)}`, so that power laws can only have a variable or constant as their exponent. If I am using `sin` and `cos`, I also like to set `nested_constraints={"sin": {"sin": 0, "cos": 0}, "cos": {"sin": 0, "cos": 0}}`, so that sin and cos can't be nested, which seems to happen frequently. (Although in practice I would just use `sin`, since the search could always add a phase offset!) 21 | 7. Set `maxsize` a bit larger than the final size you want. e.g., if you want a final equation of size `30`, you might set this to `35`, so that it has a bit of room to explore. 22 | 8. I typically don't use `maxdepth`, but if I do, I set it strictly, while also leaving a bit of room for exploration. e.g., if you want a final equation limited to a depth of `5`, you might set this to `6` or `7`, so that it has a bit of room to explore. 23 | 9. Set `parsimony` equal to about the minimum loss you would expect, divided by 5-10. e.g., if you expect the final equation to have a loss of `0.001`, you might set `parsimony=0.0001`. 24 | 10. Set `weight_optimize` to some larger value, maybe `0.001`. This is very important if `ncycles_per_iteration` is large, so that optimization happens more frequently. 25 | 11. Set `turbo` to `True`. This turns on advanced loop vectorization, but is still quite experimental. It should give you a nice 20% or more speedup. 26 | 12. For final runs, after I have tuned everything, I typically set `niterations` to some very large value, and just let it run for a week until my job finishes (genetic algorithms tend not to converge, they can look like they settle down, but then find a new family of expression, and explore a new space). If I am satisfied with the current equations (which are visible either in the terminal or in the saved csv file), I quit the job early. 27 | 28 | Since I am running in IPython, I can just hit `q` and then `` to stop the job, tweak the hyperparameters, and then start the search again. 29 | I can also use `warm_start=True` if I wish to continue where I left off (though note that changing some parameters, like `maxsize`, are incompatible with warm starts). 30 | 31 | Some things I try out to see if they help: 32 | 33 | 1. Play around with `complexity_of_operators`. Set operators you dislike (e.g., `pow`) to have a larger complexity. 34 | 2. Try setting `adaptive_parsimony_scaling` a bit larger, maybe up to `1000`. 35 | 3. Sometimes I try using `warmup_maxsize_by`. This is useful if you find that the search finds a very complex equation very quickly, and then gets stuck. It basically forces it to start at the simpler equations and build up complexity slowly. 36 | 4. Play around with different losses: 37 | - I typically try `L2DistLoss()` and `L1DistLoss()`. L1 loss is more robust to outliers compared to L2 (L1 finds the median, while L2 finds the mean of a random variable), so is often a good choice for a noisy dataset. 38 | - I might also provide the `weights` parameter to `fit` if there is some reasonable choice of weighting. For example, maybe I know the signal-to-noise of a particular row of `y` - I would set that SNR equal to the weights. Or, perhaps I do some sort of importance sampling, and weight the rows by importance. 39 | 40 | Very rarely I might also try tuning the mutation weights, the crossover probability, or the optimization parameters. I never use `denoise` or `select_k_features` as I find they aren't very useful. 41 | 42 | For large datasets I usually just randomly sample ~1000 points or so. In case all the points matter, I might use `batching=True`. 43 | 44 | If I find the equations get very complex and I'm not sure if they are numerically precise, I might set `precision=64`. 45 | 46 | Once a run is finished, I use the `PySRRegressor.from_file` function to load the saved search in a different process (requires the pickle file, and possibly also the `.csv` file if you quit early). I can then explore the equations, convert them to LaTeX, and plot their output. 47 | 48 | ## More Tips 49 | 50 | You might also wish to explore the [discussions](https://github.com/MilesCranmer/PySR/discussions/) page for more tips, and to see if anyone else has had similar questions. 51 | Be sure to also read through the [reference](api.md). 52 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python>=3.8 6 | - sympy>=1.0.0,<2.0.0 7 | - pandas>=0.21.0,<3.0.0 8 | - numpy>=1.13.0,<3.0.0 9 | - scikit-learn>=1.0.0,<2.0.0 10 | - pyjuliacall>=0.9.24,<0.9.26 11 | - click>=7.0.0,<9.0.0 12 | - typing-extensions>=4.0.0,<5.0.0 13 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | X = 2 * np.random.randn(100, 5) 4 | y = 2.5382 * np.cos(X[:, 3]) + X[:, 0] ** 2 - 0.5 5 | 6 | from pysr import PySRRegressor 7 | 8 | model = PySRRegressor( 9 | model_selection="best", # Result is mix of simplicity+accuracy 10 | niterations=40, 11 | binary_operators=["+", "*"], 12 | unary_operators=[ 13 | "cos", 14 | "exp", 15 | "sin", 16 | "inv(x) = 1/x", 17 | # ^ Custom operator (julia syntax) 18 | ], 19 | extra_sympy_mappings={"inv": lambda x: 1 / x}, 20 | # ^ Define operator for SymPy as well 21 | elementwise_loss="loss(x, y) = (x - y)^2", 22 | # ^ Custom loss function (julia syntax) 23 | ) 24 | 25 | model.fit(X, y) 26 | 27 | print(model) 28 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: PySR 2 | theme: 3 | name: material 4 | palette: 5 | # Light mode: 6 | - media: "(prefers-color-scheme: light)" 7 | scheme: pysr 8 | toggle: 9 | icon: material/toggle-switch 10 | name: Switch to dark mode 11 | - media: "(prefers-color-scheme: dark)" 12 | scheme: slate 13 | toggle: 14 | icon: material/toggle-switch-off-outline 15 | name: Switch to light mode 16 | 17 | 18 | features: 19 | - navigation.expand 20 | 21 | logo: assets/pysr_logo_reduced.svg 22 | icon: 23 | repo: fontawesome/brands/github-alt 24 | favicon: assets/favicon.png 25 | 26 | nav: 27 | - index.md 28 | - examples.md 29 | - api.md 30 | - operators.md 31 | - tuning.md 32 | - options.md 33 | - papers.md 34 | - api-advanced.md 35 | - backend.md 36 | 37 | extra: 38 | homepage: https://ai.damtp.cam.ac.uk/pysr 39 | 40 | extra_css: 41 | - stylesheets/extra.css 42 | 43 | extra_javascript: 44 | - js/mathjax.js 45 | - https://polyfill.io/v3/polyfill.min.js?features=es6 46 | - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js 47 | 48 | repo_url: https://github.com/MilesCranmer/PySR 49 | 50 | plugins: 51 | - search 52 | - autorefs 53 | - mkdocstrings: 54 | default_handler: python 55 | handlers: 56 | python: 57 | # paths: [pysr] 58 | options: 59 | # https://mkdocstrings.github.io/python/usage/#finding-modules 60 | docstring_style: numpy 61 | merge_init_into_class: True 62 | # docstring_options 63 | # separate_signature: True 64 | show_bases: false 65 | heading_level: 2 66 | 67 | markdown_extensions: 68 | - toc: 69 | permalink: true 70 | - attr_list 71 | - footnotes 72 | - md_in_html 73 | - pymdownx.highlight: 74 | anchor_linenums: True 75 | - pymdownx.inlinehilite 76 | - pymdownx.snippets 77 | - pymdownx.superfences 78 | - pymdownx.arithmatex: 79 | generic: true 80 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | warn_return_any = True 3 | 4 | [mypy-sklearn.*] 5 | ignore_missing_imports = True 6 | 7 | [mypy-julia.*] 8 | ignore_missing_imports = True 9 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pysr" 7 | version = "1.5.8" 8 | authors = [ 9 | {name = "Miles Cranmer", email = "miles.cranmer@gmail.com"}, 10 | ] 11 | description = "Simple and efficient symbolic regression" 12 | readme = {file = "README.md", content-type = "text/markdown"} 13 | license = {file = "LICENSE"} 14 | requires-python = ">=3.8" 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "Operating System :: OS Independent", 18 | "License :: OSI Approved :: Apache Software License" 19 | ] 20 | dependencies = [ 21 | "sympy>=1.0.0,<2.0.0", 22 | "pandas>=0.21.0,<3.0.0", 23 | "numpy>=1.13.0,<3.0.0", 24 | "scikit_learn>=1.0.0,<2.0.0", 25 | "juliacall>=0.9.24,<0.9.26", 26 | "click>=7.0.0,<9.0.0", 27 | "typing-extensions>=4.0.0,<5.0.0", 28 | ] 29 | 30 | [project.optional-dependencies] 31 | dev = [ 32 | "coverage>=7,<8", 33 | "beartype>=0.19,<0.22", 34 | "ipykernel>=6,<7", 35 | "ipython>=8,<9", 36 | "jax[cpu]>=0.4,<0.6", 37 | "jupyter>=1,<2", 38 | "mypy>=1,<2", 39 | "nbval>=0.11,<0.12", 40 | "pandas-stubs", 41 | "pre-commit>=3.0,<5", 42 | "pytest-cov>=5,<7", 43 | "pytest>=8,<9", 44 | "tensorboard>=2,<3", 45 | "torch>=2,<3", 46 | "types-openpyxl", 47 | "types-pytz", 48 | ] 49 | 50 | [tool.isort] 51 | profile = "black" 52 | -------------------------------------------------------------------------------- /pysr/.gitignore: -------------------------------------------------------------------------------- 1 | version.py 2 | -------------------------------------------------------------------------------- /pysr/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import logging 4 | import os 5 | 6 | pysr_logger = logging.getLogger("pysr") 7 | pysr_logger.setLevel(logging.INFO) 8 | handler = logging.StreamHandler() 9 | handler.setLevel(logging.INFO) 10 | pysr_logger.addHandler(handler) 11 | 12 | if os.environ.get("PYSR_USE_BEARTYPE", "0") == "1": 13 | from beartype.claw import beartype_this_package 14 | 15 | beartype_this_package() 16 | 17 | # This must be imported as early as possible to prevent 18 | # library linking issues caused by numpy/pytorch/etc. importing 19 | # old libraries: 20 | from .julia_import import jl, SymbolicRegression # isort:skip 21 | 22 | # Get the version using importlib.metadata (Python >= 3.8 is required): 23 | from importlib.metadata import PackageNotFoundError, version 24 | 25 | from . import sklearn_monkeypatch 26 | from .deprecated import best, best_callable, best_row, best_tex, install, pysr 27 | from .export_jax import sympy2jax 28 | from .export_torch import sympy2torch 29 | from .expression_specs import ( 30 | AbstractExpressionSpec, 31 | ExpressionSpec, 32 | ParametricExpressionSpec, 33 | TemplateExpressionSpec, 34 | ) 35 | from .julia_extensions import load_all_packages 36 | from .logger_specs import AbstractLoggerSpec, TensorBoardLoggerSpec 37 | from .sr import PySRRegressor 38 | 39 | try: 40 | __version__ = version("pysr") 41 | except PackageNotFoundError: # pragma: no cover 42 | # package is not installed 43 | __version__ = "unknown" 44 | 45 | __all__ = [ 46 | "jl", 47 | "SymbolicRegression", 48 | "sklearn_monkeypatch", 49 | "sympy2jax", 50 | "sympy2torch", 51 | "install", 52 | "load_all_packages", 53 | "PySRRegressor", 54 | "AbstractExpressionSpec", 55 | "ExpressionSpec", 56 | "TemplateExpressionSpec", 57 | "ParametricExpressionSpec", 58 | "AbstractLoggerSpec", 59 | "TensorBoardLoggerSpec", 60 | "best", 61 | "best_callable", 62 | "best_row", 63 | "best_tex", 64 | "pysr", 65 | "__version__", 66 | ] 67 | -------------------------------------------------------------------------------- /pysr/__main__.py: -------------------------------------------------------------------------------- 1 | from ._cli.main import pysr as _cli 2 | 3 | if __name__ == "__main__": 4 | _cli(prog_name="pysr") 5 | -------------------------------------------------------------------------------- /pysr/_cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MilesCranmer/PySR/a23b847383ddaa978b2c87c5d0fd78c758f5ff23/pysr/_cli/__init__.py -------------------------------------------------------------------------------- /pysr/_cli/main.py: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | import sys 3 | import unittest 4 | import warnings 5 | 6 | import click 7 | 8 | from ..test import ( 9 | get_runtests_cli, 10 | runtests, 11 | runtests_dev, 12 | runtests_jax, 13 | runtests_startup, 14 | runtests_torch, 15 | ) 16 | 17 | 18 | @click.group("pysr") 19 | @click.pass_context 20 | def pysr(context): 21 | ctx = context 22 | 23 | 24 | @pysr.command("install", help="DEPRECATED (dependencies are now installed at import).") 25 | @click.option( 26 | "-p", 27 | "julia_project", 28 | "--project", 29 | default=None, 30 | type=str, 31 | ) 32 | @click.option("-q", "--quiet", is_flag=True, default=False, help="Disable logging.") 33 | @click.option( 34 | "--precompile", 35 | "precompile", 36 | flag_value=True, 37 | default=None, 38 | ) 39 | @click.option( 40 | "--no-precompile", 41 | "precompile", 42 | flag_value=False, 43 | default=None, 44 | ) 45 | def _install(julia_project, quiet, precompile): 46 | warnings.warn( 47 | "This command is deprecated. Julia dependencies are now installed at first import." 48 | ) 49 | 50 | 51 | TEST_OPTIONS = {"main", "jax", "torch", "cli", "dev", "startup"} 52 | 53 | 54 | @pysr.command("test") 55 | @click.argument("tests", nargs=1) 56 | @click.option( 57 | "-k", 58 | "expressions", 59 | multiple=True, 60 | type=str, 61 | help="Filter expressions to select specific tests.", 62 | ) 63 | def _tests(tests, expressions): 64 | """Run parts of the PySR test suite. 65 | 66 | Choose from main, jax, torch, cli, dev, and startup. You can give multiple tests, separated by commas. 67 | """ 68 | test_cases = [] 69 | for test in tests.split(","): 70 | if test == "main": 71 | test_cases.extend(runtests(just_tests=True)) 72 | elif test == "jax": 73 | test_cases.extend(runtests_jax(just_tests=True)) 74 | elif test == "torch": 75 | test_cases.extend(runtests_torch(just_tests=True)) 76 | elif test == "cli": 77 | runtests_cli = get_runtests_cli() 78 | test_cases.extend(runtests_cli(just_tests=True)) 79 | elif test == "dev": 80 | test_cases.extend(runtests_dev(just_tests=True)) 81 | elif test == "startup": 82 | test_cases.extend(runtests_startup(just_tests=True)) 83 | else: 84 | warnings.warn(f"Invalid test {test}. Skipping.") 85 | 86 | loader = unittest.TestLoader() 87 | suite = unittest.TestSuite() 88 | for test_case in test_cases: 89 | loaded_tests = loader.loadTestsFromTestCase(test_case) 90 | for test in loaded_tests: 91 | if len(expressions) == 0 or any( 92 | fnmatch.fnmatch(test.id(), "*" + expression + "*") 93 | for expression in expressions 94 | ): 95 | suite.addTest(test) 96 | 97 | runner = unittest.TextTestRunner() 98 | results = runner.run(suite) 99 | 100 | if not results.wasSuccessful(): 101 | sys.exit(1) 102 | -------------------------------------------------------------------------------- /pysr/denoising.py: -------------------------------------------------------------------------------- 1 | """Functions for denoising data during preprocessing.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import cast 6 | 7 | import numpy as np 8 | from numpy import ndarray 9 | 10 | 11 | def denoise( 12 | X: ndarray, 13 | y: ndarray, 14 | Xresampled: ndarray | None = None, 15 | random_state: np.random.RandomState | None = None, 16 | ) -> tuple[ndarray, ndarray]: 17 | """Denoise the dataset using a Gaussian process.""" 18 | from sklearn.gaussian_process import GaussianProcessRegressor 19 | from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel 20 | 21 | gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel() 22 | gpr = GaussianProcessRegressor( 23 | kernel=gp_kernel, n_restarts_optimizer=50, random_state=random_state 24 | ) 25 | gpr.fit(X, y) 26 | 27 | if Xresampled is not None: 28 | return Xresampled, cast(ndarray, gpr.predict(Xresampled)) 29 | 30 | return X, cast(ndarray, gpr.predict(X)) 31 | 32 | 33 | def multi_denoise( 34 | X: ndarray, 35 | y: ndarray, 36 | Xresampled: ndarray | None = None, 37 | random_state: np.random.RandomState | None = None, 38 | ): 39 | """Perform `denoise` along each column of `y` independently.""" 40 | y = np.stack( 41 | [ 42 | denoise(X, y[:, i], Xresampled=Xresampled, random_state=random_state)[1] 43 | for i in range(y.shape[1]) 44 | ], 45 | axis=1, 46 | ) 47 | 48 | if Xresampled is not None: 49 | return Xresampled, y 50 | 51 | return X, y 52 | -------------------------------------------------------------------------------- /pysr/deprecated.py: -------------------------------------------------------------------------------- 1 | """Various functions to deprecate features.""" 2 | 3 | import warnings 4 | 5 | from .julia_import import jl 6 | 7 | 8 | def install(*args, **kwargs): 9 | del args, kwargs 10 | warnings.warn( 11 | "The `install` function has been removed. " 12 | "PySR now uses the `juliacall` package to install its dependencies automatically at import time. ", 13 | FutureWarning, 14 | ) 15 | 16 | 17 | def init_julia(*args, **kwargs): 18 | del args, kwargs 19 | warnings.warn( 20 | "The `init_julia` function has been removed. " 21 | "Julia is now initialized automatically at import time.", 22 | FutureWarning, 23 | ) 24 | return jl 25 | 26 | 27 | def pysr(X, y, weights=None, **kwargs): # pragma: no cover 28 | from .sr import PySRRegressor 29 | 30 | warnings.warn( 31 | "Calling `pysr` is deprecated. " 32 | "Please use `model = PySRRegressor(**params); " 33 | "model.fit(X, y)` going forward.", 34 | FutureWarning, 35 | ) 36 | model = PySRRegressor(**kwargs) 37 | model.fit(X, y, weights=weights) 38 | return model.equations_ 39 | 40 | 41 | def best(*args, **kwargs): # pragma: no cover 42 | raise NotImplementedError( 43 | "`best` has been deprecated. " 44 | "Please use the `PySRRegressor` interface. " 45 | "After fitting, you can return `.sympy()` " 46 | "to get the sympy representation " 47 | "of the best equation." 48 | ) 49 | 50 | 51 | def best_row(*args, **kwargs): # pragma: no cover 52 | raise NotImplementedError( 53 | "`best_row` has been deprecated. " 54 | "Please use the `PySRRegressor` interface. " 55 | "After fitting, you can run `print(model)` to view the best equation, " 56 | "or " 57 | "`model.get_best()` to return the best equation's " 58 | "row in `model.equations_`." 59 | ) 60 | 61 | 62 | def best_tex(*args, **kwargs): # pragma: no cover 63 | raise NotImplementedError( 64 | "`best_tex` has been deprecated. " 65 | "Please use the `PySRRegressor` interface. " 66 | "After fitting, you can return `.latex()` to " 67 | "get the sympy representation " 68 | "of the best equation." 69 | ) 70 | 71 | 72 | def best_callable(*args, **kwargs): # pragma: no cover 73 | raise NotImplementedError( 74 | "`best_callable` has been deprecated. Please use the `PySRRegressor` " 75 | "interface. After fitting, you can use " 76 | "`.predict(X)` to use the best callable." 77 | ) 78 | 79 | 80 | DEPRECATED_KWARGS = { 81 | "fractionReplaced": "fraction_replaced", 82 | "fractionReplacedHof": "fraction_replaced_hof", 83 | "npop": "population_size", 84 | "hofMigration": "hof_migration", 85 | "shouldOptimizeConstants": "should_optimize_constants", 86 | "weightAddNode": "weight_add_node", 87 | "weightDeleteNode": "weight_delete_node", 88 | "weightDoNothing": "weight_do_nothing", 89 | "weightInsertNode": "weight_insert_node", 90 | "weightMutateConstant": "weight_mutate_constant", 91 | "weightMutateOperator": "weight_mutate_operator", 92 | "weightSwapOperands": "weight_swap_operands", 93 | "weightRandomize": "weight_randomize", 94 | "weightSimplify": "weight_simplify", 95 | "crossoverProbability": "crossover_probability", 96 | "perturbationFactor": "perturbation_factor", 97 | "batchSize": "batch_size", 98 | "warmupMaxsizeBy": "warmup_maxsize_by", 99 | "useFrequency": "use_frequency", 100 | "useFrequencyInTournament": "use_frequency_in_tournament", 101 | "ncyclesperiteration": "ncycles_per_iteration", 102 | "loss": "elementwise_loss", 103 | "full_objective": "loss_function", 104 | } 105 | -------------------------------------------------------------------------------- /pysr/export.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import copy 4 | from collections.abc import Callable 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from numpy.typing import NDArray 9 | 10 | from .export_jax import sympy2jax 11 | from .export_numpy import sympy2numpy 12 | from .export_sympy import create_sympy_symbols, pysr2sympy 13 | from .export_torch import sympy2torch 14 | from .utils import ArrayLike 15 | 16 | 17 | def add_export_formats( 18 | output: pd.DataFrame, 19 | *, 20 | feature_names_in: ArrayLike[str], 21 | selection_mask: NDArray[np.bool_] | None = None, 22 | extra_sympy_mappings: dict[str, Callable] | None = None, 23 | extra_torch_mappings: dict[Callable, Callable] | None = None, 24 | output_torch_format: bool = False, 25 | extra_jax_mappings: dict[Callable, str] | None = None, 26 | output_jax_format: bool = False, 27 | ) -> pd.DataFrame: 28 | """Create export formats for an equations dataframe. 29 | 30 | Returns a new dataframe containing only the exported formats. 31 | """ 32 | output = copy.deepcopy(output) 33 | 34 | sympy_format = [] 35 | lambda_format = [] 36 | jax_format = [] 37 | torch_format = [] 38 | 39 | for _, eqn_row in output.iterrows(): 40 | eqn = pysr2sympy( 41 | eqn_row["equation"], 42 | feature_names_in=feature_names_in, 43 | extra_sympy_mappings=extra_sympy_mappings, 44 | ) 45 | sympy_format.append(eqn) 46 | 47 | # NumPy: 48 | sympy_symbols = create_sympy_symbols(feature_names_in) 49 | lambda_format.append( 50 | sympy2numpy( 51 | eqn, 52 | sympy_symbols, 53 | selection=selection_mask, 54 | ) 55 | ) 56 | 57 | # JAX: 58 | if output_jax_format: 59 | func, params = sympy2jax( 60 | eqn, 61 | sympy_symbols, 62 | selection=selection_mask, 63 | extra_jax_mappings=extra_jax_mappings, 64 | ) 65 | jax_format.append({"callable": func, "parameters": params}) 66 | 67 | # Torch: 68 | if output_torch_format: 69 | module = sympy2torch( 70 | eqn, 71 | sympy_symbols, 72 | selection=selection_mask, 73 | extra_torch_mappings=extra_torch_mappings, 74 | ) 75 | torch_format.append(module) 76 | 77 | exports = pd.DataFrame( 78 | { 79 | "sympy_format": sympy_format, 80 | "lambda_format": lambda_format, 81 | }, 82 | index=output.index, 83 | ) 84 | 85 | if output_jax_format: 86 | exports["jax_format"] = jax_format 87 | if output_torch_format: 88 | exports["torch_format"] = torch_format 89 | 90 | return exports 91 | -------------------------------------------------------------------------------- /pysr/export_jax.py: -------------------------------------------------------------------------------- 1 | import numpy as np # noqa: F401 2 | import sympy # type: ignore 3 | from sympy.codegen.cfunctions import log2, log10 # type: ignore 4 | 5 | # Special since need to reduce arguments. 6 | MUL = 0 7 | ADD = 1 8 | 9 | _jnp_func_lookup = { 10 | sympy.Mul: MUL, 11 | sympy.Add: ADD, 12 | sympy.div: "jnp.div", 13 | sympy.Abs: "jnp.abs", 14 | sympy.sign: "jnp.sign", 15 | # Note: May raise error for ints. 16 | sympy.ceiling: "jnp.ceil", 17 | sympy.floor: "jnp.floor", 18 | sympy.log: "jnp.log", 19 | log2: "jnp.log2", 20 | log10: "jnp.log10", 21 | sympy.exp: "jnp.exp", 22 | sympy.sqrt: "jnp.sqrt", 23 | sympy.cos: "jnp.cos", 24 | sympy.acos: "jnp.acos", 25 | sympy.sin: "jnp.sin", 26 | sympy.asin: "jnp.asin", 27 | sympy.tan: "jnp.tan", 28 | sympy.atan: "jnp.atan", 29 | sympy.atan2: "jnp.atan2", 30 | # Note: Also may give NaN for complex results. 31 | sympy.cosh: "jnp.cosh", 32 | sympy.acosh: "jnp.acosh", 33 | sympy.sinh: "jnp.sinh", 34 | sympy.asinh: "jnp.asinh", 35 | sympy.tanh: "jnp.tanh", 36 | sympy.atanh: "jnp.atanh", 37 | sympy.Pow: "jnp.power", 38 | sympy.re: "jnp.real", 39 | sympy.im: "jnp.imag", 40 | sympy.arg: "jnp.angle", 41 | # Note: May raise error for ints and complexes 42 | sympy.erf: "jsp.erf", 43 | sympy.erfc: "jsp.erfc", 44 | sympy.LessThan: "jnp.less", 45 | sympy.GreaterThan: "jnp.greater", 46 | sympy.And: "jnp.logical_and", 47 | sympy.Or: "jnp.logical_or", 48 | sympy.Not: "jnp.logical_not", 49 | sympy.Max: "jnp.max", 50 | sympy.Min: "jnp.min", 51 | sympy.Mod: "jnp.mod", 52 | sympy.Heaviside: "jnp.heaviside", 53 | sympy.core.numbers.Half: "(lambda: 0.5)", 54 | sympy.core.numbers.One: "(lambda: 1.0)", 55 | } 56 | 57 | 58 | def sympy2jaxtext(expr, parameters, symbols_in, extra_jax_mappings=None): 59 | if issubclass(expr.func, sympy.Float): 60 | parameters.append(float(expr)) 61 | return f"parameters[{len(parameters) - 1}]" 62 | elif issubclass(expr.func, sympy.Rational) or issubclass( 63 | expr.func, sympy.NumberSymbol 64 | ): 65 | return f"{float(expr)}" 66 | elif issubclass(expr.func, sympy.Integer): 67 | return f"{int(expr)}" 68 | elif issubclass(expr.func, sympy.Symbol): 69 | return ( 70 | f"X[:, {[i for i in range(len(symbols_in)) if symbols_in[i] == expr][0]}]" 71 | ) 72 | if extra_jax_mappings is None: 73 | extra_jax_mappings = {} 74 | try: 75 | _func = {**_jnp_func_lookup, **extra_jax_mappings}[expr.func] 76 | except KeyError: 77 | raise KeyError( 78 | f"Function {expr.func} was not found in JAX function mappings." 79 | "Please add it to extra_jax_mappings in the format, e.g., " 80 | "{sympy.sqrt: 'jnp.sqrt'}." 81 | ) 82 | args = [ 83 | sympy2jaxtext( 84 | arg, parameters, symbols_in, extra_jax_mappings=extra_jax_mappings 85 | ) 86 | for arg in expr.args 87 | ] 88 | if _func == MUL: 89 | return " * ".join(["(" + arg + ")" for arg in args]) 90 | if _func == ADD: 91 | return " + ".join(["(" + arg + ")" for arg in args]) 92 | return f'{_func}({", ".join(args)})' 93 | 94 | 95 | jax_initialized = False 96 | jax = None 97 | jnp = None 98 | jsp = None 99 | 100 | 101 | def _initialize_jax(): 102 | global jax_initialized 103 | global jax 104 | global jnp 105 | global jsp 106 | 107 | if not jax_initialized: 108 | import jax as _jax 109 | from jax import numpy as _jnp 110 | from jax.scipy import special as _jsp 111 | 112 | jax = _jax 113 | jnp = _jnp 114 | jsp = _jsp 115 | 116 | 117 | def sympy2jax(expression, symbols_in, selection=None, extra_jax_mappings=None): 118 | """Returns a function f and its parameters; 119 | the function takes an input matrix, and a list of arguments: 120 | f(X, parameters) 121 | where the parameters appear in the JAX equation. 122 | 123 | # Examples: 124 | 125 | Let's create a function in SymPy: 126 | ```python 127 | x, y = symbols('x y') 128 | cosx = 1.0 * sympy.cos(x) + 3.2 * y 129 | ``` 130 | Let's get the JAX version. We pass the equation, and 131 | the symbols required. 132 | ```python 133 | f, params = sympy2jax(cosx, [x, y]) 134 | ``` 135 | The order you supply the symbols is the same order 136 | you should supply the features when calling 137 | the function `f` (shape `[nrows, nfeatures]`). 138 | In this case, features=2 for x and y. 139 | The `params` in this case will be 140 | `jnp.array([1.0, 3.2])`. You pass these parameters 141 | when calling the function, which will let you change them 142 | and take gradients. 143 | 144 | Let's generate some JAX data to pass: 145 | ```python 146 | key = random.PRNGKey(0) 147 | X = random.normal(key, (10, 2)) 148 | ``` 149 | 150 | We can call the function with: 151 | ```python 152 | f(X, params) 153 | 154 | #> DeviceArray([-2.6080756 , 0.72633684, -6.7557726 , -0.2963162 , 155 | # 6.6014843 , 5.032483 , -0.810931 , 4.2520013 , 156 | # 3.5427954 , -2.7479894 ], dtype=float32) 157 | ``` 158 | 159 | We can take gradients with respect 160 | to the parameters for each row with JAX 161 | gradient parameters now: 162 | ```python 163 | jac_f = jax.jacobian(f, argnums=1) 164 | jac_f(X, params) 165 | 166 | #> DeviceArray([[ 0.49364874, -0.9692889 ], 167 | # [ 0.8283714 , -0.0318858 ], 168 | # [-0.7447336 , -1.8784496 ], 169 | # [ 0.70755106, -0.3137085 ], 170 | # [ 0.944834 , 1.767703 ], 171 | # [ 0.51673377, 1.4111717 ], 172 | # [ 0.87347716, -0.52637756], 173 | # [ 0.8760679 , 1.0549792 ], 174 | # [ 0.9961824 , 0.79581654], 175 | # [-0.88465923, -0.5822907 ]], dtype=float32) 176 | ``` 177 | 178 | We can also JIT-compile our function: 179 | ```python 180 | compiled_f = jax.jit(f) 181 | compiled_f(X, params) 182 | 183 | #> DeviceArray([-2.6080756 , 0.72633684, -6.7557726 , -0.2963162 , 184 | # 6.6014843 , 5.032483 , -0.810931 , 4.2520013 , 185 | # 3.5427954 , -2.7479894 ], dtype=float32) 186 | ``` 187 | """ 188 | _initialize_jax() 189 | global jax_initialized 190 | global jax 191 | global jnp 192 | global jsp 193 | 194 | parameters = [] 195 | functional_form_text = sympy2jaxtext( 196 | expression, parameters, symbols_in, extra_jax_mappings 197 | ) 198 | hash_string = "A_" + str(abs(hash(str(expression) + str(symbols_in)))) 199 | text = f"def {hash_string}(X, parameters):\n" 200 | if selection is not None: 201 | # Impose the feature selection: 202 | text += f" X = X[:, {list(selection)}]\n" 203 | text += " return " 204 | text += functional_form_text 205 | ldict = {} 206 | exec(text, globals(), ldict) 207 | return ldict[hash_string], jnp.array(parameters) 208 | -------------------------------------------------------------------------------- /pysr/export_latex.py: -------------------------------------------------------------------------------- 1 | """Functions to help export PySR equations to LaTeX.""" 2 | 3 | from __future__ import annotations 4 | 5 | import pandas as pd 6 | import sympy # type: ignore 7 | from sympy.printing.latex import LatexPrinter # type: ignore 8 | 9 | 10 | class PreciseLatexPrinter(LatexPrinter): 11 | """Modified SymPy printer with custom float precision.""" 12 | 13 | def __init__(self, settings=None, prec=3): 14 | super().__init__(settings) 15 | self.prec = prec 16 | 17 | def _print_Float(self, expr): 18 | # Reduce precision of float: 19 | reduced_float = sympy.Float(expr, self.prec) 20 | return super()._print_Float(reduced_float) 21 | 22 | 23 | def sympy2latex(expr, prec=3, full_prec=True, **settings) -> str: 24 | """Convert sympy expression to LaTeX with custom precision.""" 25 | settings["full_prec"] = full_prec 26 | printer = PreciseLatexPrinter(settings=settings, prec=prec) 27 | return str(printer.doprint(expr)) 28 | 29 | 30 | def generate_table_environment( 31 | columns: list[str] = ["equation", "complexity", "loss"] 32 | ) -> tuple[str, str]: 33 | margins = "c" * len(columns) 34 | column_map = { 35 | "complexity": "Complexity", 36 | "loss": "Loss", 37 | "equation": "Equation", 38 | "score": "Score", 39 | } 40 | columns = [column_map[col] for col in columns] 41 | top_pieces = [ 42 | r"\begin{table}[h]", 43 | r"\begin{center}", 44 | r"\begin{tabular}{@{}" + margins + r"@{}}", 45 | r"\toprule", 46 | " & ".join(columns) + r" \\", 47 | r"\midrule", 48 | ] 49 | 50 | bottom_pieces = [ 51 | r"\bottomrule", 52 | r"\end{tabular}", 53 | r"\end{center}", 54 | r"\end{table}", 55 | ] 56 | top_latex_table = "\n".join(top_pieces) 57 | bottom_latex_table = "\n".join(bottom_pieces) 58 | 59 | return top_latex_table, bottom_latex_table 60 | 61 | 62 | def sympy2latextable( 63 | equations: pd.DataFrame, 64 | indices: list[int] | None = None, 65 | precision: int = 3, 66 | columns: list[str] = ["equation", "complexity", "loss", "score"], 67 | max_equation_length: int = 50, 68 | output_variable_name: str = "y", 69 | ) -> str: 70 | """Generate a booktabs-style LaTeX table for a single set of equations.""" 71 | assert isinstance(equations, pd.DataFrame) 72 | 73 | latex_top, latex_bottom = generate_table_environment(columns) 74 | latex_table_content = [] 75 | 76 | if indices is None: 77 | indices = list(equations.index) 78 | 79 | for i in indices: 80 | latex_equation = sympy2latex( 81 | equations.iloc[i]["sympy_format"], 82 | prec=precision, 83 | ) 84 | complexity = str(equations.iloc[i]["complexity"]) 85 | loss = sympy2latex( 86 | sympy.Float(equations.iloc[i]["loss"]), 87 | prec=precision, 88 | ) 89 | score = sympy2latex( 90 | sympy.Float(equations.iloc[i]["score"]), 91 | prec=precision, 92 | ) 93 | 94 | row_pieces = [] 95 | for col in columns: 96 | if col == "equation": 97 | if len(latex_equation) < max_equation_length: 98 | row_pieces.append( 99 | "$" + output_variable_name + " = " + latex_equation + "$" 100 | ) 101 | else: 102 | broken_latex_equation = " ".join( 103 | [ 104 | r"\begin{minipage}{0.8\linewidth}", 105 | r"\vspace{-1em}", 106 | r"\begin{dmath*}", 107 | output_variable_name + " = " + latex_equation, 108 | r"\end{dmath*}", 109 | r"\end{minipage}", 110 | ] 111 | ) 112 | row_pieces.append(broken_latex_equation) 113 | 114 | elif col == "complexity": 115 | row_pieces.append("$" + complexity + "$") 116 | elif col == "loss": 117 | row_pieces.append("$" + loss + "$") 118 | elif col == "score": 119 | row_pieces.append("$" + score + "$") 120 | else: 121 | raise ValueError(f"Unknown column: {col}") 122 | 123 | latex_table_content.append( 124 | " & ".join(row_pieces) + r" \\", 125 | ) 126 | 127 | return "\n".join([latex_top, *latex_table_content, latex_bottom]) 128 | 129 | 130 | def sympy2multilatextable( 131 | equations: list[pd.DataFrame], 132 | indices: list[list[int]] | None = None, 133 | precision: int = 3, 134 | columns: list[str] = ["equation", "complexity", "loss", "score"], 135 | output_variable_names: list[str] | None = None, 136 | ) -> str: 137 | """Generate multiple latex tables for a list of equation sets.""" 138 | # TODO: Let user specify custom output variable 139 | 140 | latex_tables = [ 141 | sympy2latextable( 142 | equations[i], 143 | (None if not indices else indices[i]), 144 | precision=precision, 145 | columns=columns, 146 | output_variable_name=( 147 | "y_{" + str(i) + "}" 148 | if output_variable_names is None 149 | else output_variable_names[i] 150 | ), 151 | ) 152 | for i in range(len(equations)) 153 | ] 154 | 155 | return "\n\n".join(latex_tables) 156 | 157 | 158 | def with_preamble(table_string: str) -> str: 159 | preamble_string = [ 160 | r"\usepackage{breqn}", 161 | r"\usepackage{booktabs}", 162 | "", 163 | "...", 164 | "", 165 | table_string, 166 | ] 167 | return "\n".join(preamble_string) 168 | -------------------------------------------------------------------------------- /pysr/export_numpy.py: -------------------------------------------------------------------------------- 1 | """Code for exporting discovered expressions to numpy""" 2 | 3 | from __future__ import annotations 4 | 5 | import warnings 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from numpy.typing import NDArray 10 | from sympy import Expr, Symbol, lambdify # type: ignore 11 | 12 | 13 | def sympy2numpy(eqn, sympy_symbols, *, selection=None): 14 | return CallableEquation(eqn, sympy_symbols, selection=selection) 15 | 16 | 17 | class CallableEquation: 18 | """Simple wrapper for numpy lambda functions built with sympy""" 19 | 20 | _sympy: Expr 21 | _sympy_symbols: list[Symbol] 22 | _selection: NDArray[np.bool_] | None 23 | 24 | def __init__(self, eqn, sympy_symbols, selection=None): 25 | self._sympy = eqn 26 | self._sympy_symbols = sympy_symbols 27 | self._selection = selection 28 | 29 | def __repr__(self): 30 | return f"PySRFunction(X=>{self._sympy})" 31 | 32 | def __call__(self, X): 33 | expected_shape = (X.shape[0],) 34 | if isinstance(X, pd.DataFrame): 35 | # Lambda function takes as argument: 36 | return self._lambda( 37 | **{k: X[k].values for k in map(str, self._sympy_symbols)} 38 | ) * np.ones(expected_shape) 39 | 40 | if self._selection is not None: 41 | if X.shape[1] != self._selection.sum(): 42 | warnings.warn( 43 | "`X` should be of shape (n_samples, len(self._selection)). " 44 | "Automatically filtering `X` to selection. " 45 | "Note: Filtered `X` column order may not match column order in fit " 46 | "this may lead to incorrect predictions and other errors." 47 | ) 48 | X = X[:, self._selection] 49 | 50 | return self._lambda(*X.T) * np.ones(expected_shape) 51 | 52 | @property 53 | def _lambda(self): 54 | return lambdify(self._sympy_symbols, self._sympy) 55 | -------------------------------------------------------------------------------- /pysr/export_sympy.py: -------------------------------------------------------------------------------- 1 | """Define utilities to export to sympy""" 2 | 3 | from __future__ import annotations 4 | 5 | from collections.abc import Callable 6 | 7 | import sympy # type: ignore 8 | from sympy import sympify 9 | from sympy.codegen.cfunctions import log2, log10 # type: ignore 10 | 11 | from .utils import ArrayLike 12 | 13 | sympy_mappings = { 14 | "div": lambda x, y: x / y, 15 | "inv": lambda x: 1 / x, 16 | "mult": lambda x, y: x * y, 17 | "sqrt": lambda x: sympy.sqrt(x), 18 | "sqrt_abs": lambda x: sympy.sqrt(abs(x)), 19 | "cbrt": lambda x: sympy.sign(x) * sympy.cbrt(abs(x)), 20 | "square": lambda x: x**2, 21 | "cube": lambda x: x**3, 22 | "plus": lambda x, y: x + y, 23 | "sub": lambda x, y: x - y, 24 | "neg": lambda x: -x, 25 | "pow": lambda x, y: x**y, 26 | "pow_abs": lambda x, y: abs(x) ** y, 27 | "cos": sympy.cos, 28 | "sin": sympy.sin, 29 | "tan": sympy.tan, 30 | "cosh": sympy.cosh, 31 | "sinh": sympy.sinh, 32 | "tanh": sympy.tanh, 33 | "exp": sympy.exp, 34 | "acos": sympy.acos, 35 | "asin": sympy.asin, 36 | "atan": sympy.atan, 37 | "acosh": lambda x: sympy.acosh(x), 38 | "acosh_abs": lambda x: sympy.acosh(abs(x) + 1), 39 | "asinh": sympy.asinh, 40 | "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)), 41 | "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)), 42 | "abs": abs, 43 | "mod": sympy.Mod, 44 | "erf": sympy.erf, 45 | "erfc": sympy.erfc, 46 | "log": lambda x: sympy.log(x), 47 | "log10": lambda x: log10(x), 48 | "log2": lambda x: log2(x), 49 | "log1p": lambda x: sympy.log(x + 1), 50 | "log_abs": lambda x: sympy.log(abs(x)), 51 | "log10_abs": lambda x: sympy.log(abs(x), 10), 52 | "log2_abs": lambda x: sympy.log(abs(x), 2), 53 | "log1p_abs": lambda x: sympy.log(abs(x) + 1), 54 | "floor": sympy.floor, 55 | "ceil": sympy.ceiling, 56 | "sign": sympy.sign, 57 | "gamma": sympy.gamma, 58 | "round": lambda x: sympy.ceiling(x - 0.5), 59 | "max": lambda x, y: sympy.Piecewise((y, x < y), (x, True)), 60 | "min": lambda x, y: sympy.Piecewise((x, x < y), (y, True)), 61 | "greater": lambda x, y: sympy.Piecewise((1.0, x > y), (0.0, True)), 62 | "less": lambda x, y: sympy.Piecewise((1.0, x < y), (0.0, True)), 63 | "greater_equal": lambda x, y: sympy.Piecewise((1.0, x >= y), (0.0, True)), 64 | "less_equal": lambda x, y: sympy.Piecewise((1.0, x <= y), (0.0, True)), 65 | "cond": lambda x, y: sympy.Piecewise((y, x > 0), (0.0, True)), 66 | "logical_or": lambda x, y: sympy.Piecewise((1.0, (x > 0) | (y > 0)), (0.0, True)), 67 | "logical_and": lambda x, y: sympy.Piecewise((1.0, (x > 0) & (y > 0)), (0.0, True)), 68 | "relu": lambda x: sympy.Piecewise((0.0, x < 0), (x, True)), 69 | } 70 | 71 | 72 | def create_sympy_symbols_map( 73 | feature_names_in: ArrayLike[str], 74 | ) -> dict[str, sympy.Symbol]: 75 | return {variable: sympy.Symbol(variable) for variable in feature_names_in} 76 | 77 | 78 | def create_sympy_symbols( 79 | feature_names_in: ArrayLike[str], 80 | ) -> list[sympy.Symbol]: 81 | return [sympy.Symbol(variable) for variable in feature_names_in] 82 | 83 | 84 | def pysr2sympy( 85 | equation: str | float | int, 86 | *, 87 | feature_names_in: ArrayLike[str] | None = None, 88 | extra_sympy_mappings: dict[str, Callable] | None = None, 89 | ): 90 | if feature_names_in is None: 91 | feature_names_in = [] 92 | local_sympy_mappings = { 93 | **create_sympy_symbols_map(feature_names_in), 94 | **sympy_mappings, 95 | **(extra_sympy_mappings if extra_sympy_mappings is not None else {}), 96 | } 97 | 98 | try: 99 | return sympify(equation, locals=local_sympy_mappings, evaluate=False) 100 | except TypeError as e: 101 | if "got an unexpected keyword argument 'evaluate'" in str(e): 102 | return sympify(equation, locals=local_sympy_mappings) 103 | raise TypeError(f"Error processing equation '{equation}'") from e 104 | 105 | 106 | def assert_valid_sympy_symbol(var_name: str) -> None: 107 | if var_name in sympy_mappings or var_name in sympy.__dict__.keys(): 108 | raise ValueError(f"Variable name {var_name} is already a function name.") 109 | -------------------------------------------------------------------------------- /pysr/export_torch.py: -------------------------------------------------------------------------------- 1 | # Fork of https://github.com/patrick-kidger/sympytorch 2 | 3 | import collections as co 4 | import functools as ft 5 | 6 | import numpy as np # noqa: F401 7 | import sympy # type: ignore 8 | from sympy.codegen.cfunctions import log2, log10 # type: ignore 9 | 10 | 11 | def _reduce(fn): 12 | def fn_(*args): 13 | return ft.reduce(fn, args) 14 | 15 | return fn_ 16 | 17 | 18 | torch_initialized = False 19 | torch = None 20 | SingleSymPyModule = None 21 | 22 | 23 | def _initialize_torch(): 24 | global torch_initialized 25 | global torch 26 | global SingleSymPyModule 27 | 28 | # Way to lazy load torch, only if this is called, 29 | # but still allow this module to be loaded in __init__ 30 | if not torch_initialized: 31 | import torch as _torch 32 | 33 | torch = _torch 34 | 35 | _global_func_lookup = { 36 | sympy.Mul: _reduce(torch.mul), 37 | sympy.Add: _reduce(torch.add), 38 | sympy.div: torch.div, 39 | sympy.Abs: torch.abs, 40 | sympy.sign: torch.sign, 41 | # Note: May raise error for ints. 42 | sympy.ceiling: torch.ceil, 43 | sympy.floor: torch.floor, 44 | sympy.log: torch.log, 45 | log2: torch.log2, 46 | log10: torch.log10, 47 | sympy.exp: torch.exp, 48 | sympy.sqrt: torch.sqrt, 49 | sympy.cos: torch.cos, 50 | sympy.acos: torch.acos, 51 | sympy.sin: torch.sin, 52 | sympy.asin: torch.asin, 53 | sympy.tan: torch.tan, 54 | sympy.atan: torch.atan, 55 | sympy.atan2: torch.atan2, 56 | # Note: May give NaN for complex results. 57 | sympy.cosh: torch.cosh, 58 | sympy.acosh: torch.acosh, 59 | sympy.sinh: torch.sinh, 60 | sympy.asinh: torch.asinh, 61 | sympy.tanh: torch.tanh, 62 | sympy.atanh: torch.atanh, 63 | sympy.Pow: torch.pow, 64 | sympy.re: torch.real, 65 | sympy.im: torch.imag, 66 | sympy.arg: torch.angle, 67 | # Note: May raise error for ints and complexes 68 | sympy.erf: torch.erf, 69 | sympy.loggamma: torch.lgamma, 70 | sympy.Eq: torch.eq, 71 | sympy.Ne: torch.ne, 72 | sympy.StrictGreaterThan: torch.gt, 73 | sympy.StrictLessThan: torch.lt, 74 | sympy.LessThan: torch.le, 75 | sympy.GreaterThan: torch.ge, 76 | sympy.And: torch.logical_and, 77 | sympy.Or: torch.logical_or, 78 | sympy.Not: torch.logical_not, 79 | sympy.Max: torch.max, 80 | sympy.Min: torch.min, 81 | sympy.Mod: torch.remainder, 82 | sympy.Heaviside: torch.heaviside, 83 | sympy.core.numbers.Half: (lambda: 0.5), 84 | sympy.core.numbers.One: (lambda: 1.0), 85 | } 86 | 87 | class _Node(torch.nn.Module): 88 | """Forked from https://github.com/patrick-kidger/sympytorch""" 89 | 90 | def __init__(self, *, expr, _memodict, _func_lookup, **kwargs): 91 | super().__init__(**kwargs) 92 | 93 | self._sympy_func = expr.func 94 | 95 | if issubclass(expr.func, sympy.Float): 96 | self._value = torch.nn.Parameter(torch.tensor(float(expr))) 97 | self._torch_func = lambda: self._value 98 | self._args = () 99 | elif issubclass(expr.func, sympy.Rational): 100 | # This is some fraction fixed in the operator. 101 | self._value = float(expr) 102 | self._torch_func = lambda: self._value 103 | self._args = () 104 | elif issubclass(expr.func, sympy.UnevaluatedExpr): 105 | if len(expr.args) != 1 or not issubclass( 106 | expr.args[0].func, sympy.Float 107 | ): 108 | raise ValueError( 109 | "UnevaluatedExpr should only be used to wrap floats." 110 | ) 111 | self.register_buffer("_value", torch.tensor(float(expr.args[0]))) 112 | self._torch_func = lambda: self._value 113 | self._args = () 114 | elif issubclass(expr.func, sympy.Integer): 115 | # Can get here if expr is one of the Integer special cases, 116 | # e.g. NegativeOne 117 | self._value = int(expr) 118 | self._torch_func = lambda: self._value 119 | self._args = () 120 | elif issubclass(expr.func, sympy.NumberSymbol): 121 | # Can get here from exp(1) or exact pi 122 | self._value = float(expr) 123 | self._torch_func = lambda: self._value 124 | self._args = () 125 | elif issubclass(expr.func, sympy.Symbol): 126 | self._name = expr.name 127 | self._torch_func = lambda value: value 128 | self._args = ((lambda memodict: memodict[expr.name]),) 129 | else: 130 | try: 131 | self._torch_func = _func_lookup[expr.func] 132 | except KeyError: 133 | raise KeyError( 134 | f"Function {expr.func} was not found in Torch function mappings." 135 | "Please add it to extra_torch_mappings in the format, e.g., " 136 | "{sympy.sqrt: torch.sqrt}." 137 | ) 138 | args = [] 139 | for arg in expr.args: 140 | try: 141 | arg_ = _memodict[arg] 142 | except KeyError: 143 | arg_ = type(self)( 144 | expr=arg, 145 | _memodict=_memodict, 146 | _func_lookup=_func_lookup, 147 | **kwargs, 148 | ) 149 | _memodict[arg] = arg_ 150 | args.append(arg_) 151 | self._args = torch.nn.ModuleList(args) 152 | 153 | def forward(self, memodict): 154 | args = [] 155 | for arg in self._args: 156 | try: 157 | arg_ = memodict[arg] 158 | except KeyError: 159 | arg_ = arg(memodict) 160 | memodict[arg] = arg_ 161 | args.append(arg_) 162 | return self._torch_func(*args) 163 | 164 | class _SingleSymPyModule(torch.nn.Module): 165 | """Forked from https://github.com/patrick-kidger/sympytorch""" 166 | 167 | def __init__( 168 | self, expression, symbols_in, selection=None, extra_funcs=None, **kwargs 169 | ): 170 | super().__init__(**kwargs) 171 | 172 | if extra_funcs is None: 173 | extra_funcs = {} 174 | _func_lookup = co.ChainMap(_global_func_lookup, extra_funcs) 175 | 176 | _memodict = {} 177 | self._node = _Node( 178 | expr=expression, _memodict=_memodict, _func_lookup=_func_lookup 179 | ) 180 | self._expression_string = str(expression) 181 | self._selection = selection 182 | self.symbols_in = [str(symbol) for symbol in symbols_in] 183 | 184 | def __repr__(self): 185 | return f"{type(self).__name__}(expression={self._expression_string})" 186 | 187 | def forward(self, X): 188 | if self._selection is not None: 189 | X = X[:, self._selection] 190 | symbols = {symbol: X[:, i] for i, symbol in enumerate(self.symbols_in)} 191 | return self._node(symbols) 192 | 193 | SingleSymPyModule = _SingleSymPyModule 194 | 195 | 196 | def sympy2torch(expression, symbols_in, selection=None, extra_torch_mappings=None): 197 | """Returns a module for a given sympy expression with trainable parameters; 198 | 199 | This function will assume the input to the module is a matrix X, where 200 | each column corresponds to each symbol you pass in `symbols_in`. 201 | """ 202 | global SingleSymPyModule 203 | 204 | _initialize_torch() 205 | 206 | return SingleSymPyModule( 207 | expression, symbols_in, selection=selection, extra_funcs=extra_torch_mappings 208 | ) 209 | -------------------------------------------------------------------------------- /pysr/feature_selection.py: -------------------------------------------------------------------------------- 1 | """Functions for doing feature selection during preprocessing.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from typing import cast 7 | 8 | import numpy as np 9 | from numpy import ndarray 10 | from numpy.typing import NDArray 11 | 12 | from .utils import ArrayLike 13 | 14 | pysr_logger = logging.getLogger(__name__) 15 | 16 | 17 | def run_feature_selection( 18 | X: ndarray, 19 | y: ndarray, 20 | select_k_features: int, 21 | random_state: np.random.RandomState | None = None, 22 | ) -> NDArray[np.bool_]: 23 | """ 24 | Find most important features. 25 | 26 | Uses a gradient boosting tree regressor as a proxy for finding 27 | the k most important features in X, returning indices for those 28 | features as output. 29 | """ 30 | from sklearn.ensemble import RandomForestRegressor 31 | from sklearn.feature_selection import SelectFromModel 32 | 33 | clf = RandomForestRegressor( 34 | n_estimators=100, max_depth=3, random_state=random_state 35 | ) 36 | clf.fit(X, y) 37 | selector = SelectFromModel( 38 | clf, threshold=-np.inf, max_features=select_k_features, prefit=True 39 | ) 40 | return cast(NDArray[np.bool_], selector.get_support(indices=False)) 41 | 42 | 43 | # Function has not been removed only due to usage in module tests 44 | def _handle_feature_selection( 45 | X: ndarray, 46 | select_k_features: int | None, 47 | y: ndarray, 48 | variable_names: ArrayLike[str], 49 | ): 50 | if select_k_features is not None: 51 | selection = run_feature_selection(X, y, select_k_features) 52 | pysr_logger.info(f"Using features {[variable_names[i] for i in selection]}") 53 | X = X[:, selection] 54 | else: 55 | selection = None 56 | 57 | return X, selection 58 | -------------------------------------------------------------------------------- /pysr/julia_extensions.py: -------------------------------------------------------------------------------- 1 | """This file installs and loads extensions for SymbolicRegression.""" 2 | 3 | from __future__ import annotations 4 | 5 | from typing import Literal 6 | 7 | from .julia_import import Pkg, jl 8 | from .julia_registry_helpers import try_with_registry_fallback 9 | from .logger_specs import AbstractLoggerSpec, TensorBoardLoggerSpec 10 | 11 | 12 | def load_required_packages( 13 | *, 14 | turbo: bool = False, 15 | bumper: bool = False, 16 | autodiff_backend: Literal["Zygote"] | None = None, 17 | cluster_manager: str | None = None, 18 | logger_spec: AbstractLoggerSpec | None = None, 19 | ): 20 | if turbo: 21 | load_package("LoopVectorization", "bdcacae8-1622-11e9-2a5c-532679323890") 22 | if bumper: 23 | load_package("Bumper", "8ce10254-0962-460f-a3d8-1f77fea1446e") 24 | if autodiff_backend is not None: 25 | load_package("Zygote", "e88e6eb3-aa80-5325-afca-941959d7151f") 26 | if cluster_manager is not None: 27 | load_package("ClusterManagers", "34f1f09b-3a8b-5176-ab39-66d58a4d544e") 28 | if isinstance(logger_spec, TensorBoardLoggerSpec): 29 | load_package("TensorBoardLogger", "899adc3e-224a-11e9-021f-63837185c80f") 30 | 31 | 32 | def load_all_packages(): 33 | """Install and load all Julia extensions available to PySR.""" 34 | load_required_packages( 35 | turbo=True, 36 | bumper=True, 37 | autodiff_backend="Zygote", 38 | cluster_manager="slurm", 39 | logger_spec=TensorBoardLoggerSpec(log_dir="logs"), 40 | ) 41 | 42 | 43 | # TODO: Refactor this file so we can install all packages at once using `juliapkg`, 44 | # ideally parameterizable via the regular Python extras API 45 | 46 | 47 | def isinstalled(uuid_s: str): 48 | return jl.haskey(Pkg.dependencies(), jl.Base.UUID(uuid_s)) 49 | 50 | 51 | def load_package(package_name: str, uuid_s: str) -> None: 52 | if not isinstalled(uuid_s): 53 | 54 | def _add_package(): 55 | Pkg.add(name=package_name, uuid=uuid_s) 56 | Pkg.resolve() 57 | 58 | try_with_registry_fallback(_add_package) 59 | 60 | # TODO: Protect against loading the same symbol from two packages, 61 | # maybe with a @gensym here. 62 | jl.seval(f"using {package_name}: {package_name}") 63 | return None 64 | -------------------------------------------------------------------------------- /pysr/julia_helpers.py: -------------------------------------------------------------------------------- 1 | """Functions for initializing the Julia environment and installing deps.""" 2 | 3 | from typing import Any, Callable, cast, overload 4 | 5 | import numpy as np 6 | from juliacall import convert as jl_convert # type: ignore 7 | from numpy.typing import NDArray 8 | 9 | from .deprecated import init_julia, install 10 | from .julia_import import AnyValue, jl 11 | 12 | jl_convert = cast(Callable[[Any, Any], Any], jl_convert) 13 | 14 | jl.seval("using Serialization: Serialization") 15 | jl.seval("using PythonCall: PythonCall") 16 | 17 | Serialization = jl.Serialization 18 | PythonCall = jl.PythonCall 19 | 20 | jl.seval("using SymbolicRegression: plus, sub, mult, div, pow") 21 | 22 | 23 | def _escape_filename(filename): 24 | """Turn a path into a string with correctly escaped backslashes.""" 25 | if filename is None: 26 | return None 27 | str_repr = str(filename) 28 | str_repr = str_repr.replace("\\", "\\\\") 29 | return str_repr 30 | 31 | 32 | def _load_cluster_manager(cluster_manager: str): 33 | jl.seval(f"using ClusterManagers: addprocs_{cluster_manager}") 34 | return jl.seval(f"addprocs_{cluster_manager}") 35 | 36 | 37 | def jl_array(x, dtype=None): 38 | if x is None: 39 | return None 40 | elif dtype is None: 41 | return jl_convert(jl.Array, x) 42 | else: 43 | return jl_convert(jl.Array[dtype], x) 44 | 45 | 46 | def jl_dict(x): 47 | return jl_convert(jl.Dict, x) 48 | 49 | 50 | def jl_is_function(f) -> bool: 51 | return cast(bool, jl.seval("op -> op isa Function")(f)) 52 | 53 | 54 | def jl_serialize(obj: Any) -> NDArray[np.uint8]: 55 | buf = jl.IOBuffer() 56 | Serialization.serialize(buf, obj) 57 | return np.array(jl.take_b(buf)) 58 | 59 | 60 | @overload 61 | def jl_deserialize(s: NDArray[np.uint8]) -> AnyValue: ... 62 | @overload 63 | def jl_deserialize(s: None) -> None: ... 64 | def jl_deserialize(s): 65 | if s is None: 66 | return s 67 | buf = jl.IOBuffer() 68 | jl.write(buf, jl_array(s)) 69 | jl.seekstart(buf) 70 | return Serialization.deserialize(buf) 71 | -------------------------------------------------------------------------------- /pysr/julia_import.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import warnings 4 | from types import ModuleType 5 | from typing import cast 6 | 7 | from .julia_registry_helpers import try_with_registry_fallback 8 | 9 | # Check if JuliaCall is already loaded, and if so, warn the user 10 | # about the relevant environment variables. If not loaded, 11 | # set up sensible defaults. 12 | if "juliacall" in sys.modules: 13 | warnings.warn( 14 | "juliacall module already imported. " 15 | "Make sure that you have set the environment variable `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` to avoid segfaults. " 16 | "Also note that PySR will not be able to configure `PYTHON_JULIACALL_THREADS` or `PYTHON_JULIACALL_OPTLEVEL` for you." 17 | ) 18 | else: 19 | # Required to avoid segfaults (https://juliapy.github.io/PythonCall.jl/dev/faq/) 20 | if os.environ.get("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes") != "yes": 21 | warnings.warn( 22 | "PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set to something other than 'yes' or ''. " 23 | + "You will experience segfaults if running with multithreading." 24 | ) 25 | 26 | if os.environ.get("PYTHON_JULIACALL_THREADS", "auto") != "auto": 27 | warnings.warn( 28 | "PYTHON_JULIACALL_THREADS environment variable is set to something other than 'auto', " 29 | "so PySR was not able to set it. You may wish to set it to `'auto'` for full use " 30 | "of your CPU." 31 | ) 32 | 33 | # TODO: Remove these when juliapkg lets you specify this 34 | for k, default in ( 35 | ("PYTHON_JULIACALL_HANDLE_SIGNALS", "yes"), 36 | ("PYTHON_JULIACALL_THREADS", "auto"), 37 | ("PYTHON_JULIACALL_OPTLEVEL", "3"), 38 | ): 39 | os.environ[k] = os.environ.get(k, default) 40 | 41 | 42 | autoload_extensions = os.environ.get("PYSR_AUTOLOAD_EXTENSIONS") 43 | if autoload_extensions is not None: 44 | # Deprecated; so just pass to juliacall 45 | os.environ["PYTHON_JULIACALL_AUTOLOAD_IPYTHON_EXTENSION"] = autoload_extensions 46 | 47 | 48 | def _import_juliacall(): 49 | import juliacall # type: ignore 50 | 51 | 52 | try_with_registry_fallback(_import_juliacall) 53 | 54 | 55 | from juliacall import AnyValue # type: ignore 56 | from juliacall import VectorValue # type: ignore 57 | from juliacall import Main as jl # type: ignore 58 | 59 | jl = cast(ModuleType, jl) 60 | 61 | 62 | jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch) 63 | 64 | jl.seval("using SymbolicRegression") 65 | SymbolicRegression = jl.SymbolicRegression 66 | 67 | # Expose `D` operator: 68 | jl.seval("using SymbolicRegression: D") 69 | 70 | # Expose other operators: 71 | jl.seval("using SymbolicRegression: less, greater_equal, less_equal") 72 | 73 | jl.seval("using Pkg: Pkg") 74 | Pkg = jl.Pkg 75 | -------------------------------------------------------------------------------- /pysr/julia_registry_helpers.py: -------------------------------------------------------------------------------- 1 | """Utilities for managing Julia registry preferences during package operations.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | import warnings 7 | from collections.abc import Callable 8 | from typing import TypeVar 9 | 10 | T = TypeVar("T") 11 | 12 | PREFERENCE_KEY = "JULIA_PKG_SERVER_REGISTRY_PREFERENCE" 13 | 14 | 15 | def try_with_registry_fallback(f: Callable[..., T], *args, **kwargs) -> T: 16 | """Execute function with modified Julia registry preference. 17 | 18 | First tries with existing registry preference. If that fails with a Julia registry error, 19 | temporarily modifies the registry preference to 'eager'. Restores original preference after 20 | execution. 21 | """ 22 | try: 23 | return f(*args, **kwargs) 24 | except Exception as initial_error: 25 | # Check if this is a Julia registry error by looking at the error message 26 | if "JuliaError" not in str( 27 | type(initial_error) 28 | ) or "Unsatisfiable requirements detected" not in str(initial_error): 29 | raise initial_error 30 | 31 | old_value = os.environ.get(PREFERENCE_KEY, None) 32 | if old_value == "eager": 33 | raise initial_error 34 | 35 | warnings.warn( 36 | "Initial Julia registry operation failed. Attempting to use the `eager` registry flavor of the Julia " 37 | + f"General registry from the Julia Pkg server (via the `{PREFERENCE_KEY}` environment variable)." 38 | ) 39 | os.environ[PREFERENCE_KEY] = "eager" 40 | try: 41 | return f(*args, **kwargs) 42 | finally: 43 | if old_value is not None: 44 | os.environ[PREFERENCE_KEY] = old_value 45 | else: 46 | del os.environ[PREFERENCE_KEY] 47 | -------------------------------------------------------------------------------- /pysr/juliapkg.json: -------------------------------------------------------------------------------- 1 | { 2 | "julia": "=1.10.0, 1.10.3", 3 | "packages": { 4 | "SymbolicRegression": { 5 | "uuid": "8254be44-1295-4e6a-a16d-46603ac705cb", 6 | "version": "~1.11.0" 7 | }, 8 | "Serialization": { 9 | "uuid": "9e88b42a-f829-5b0c-bbe9-9e923198166b", 10 | "version": "1" 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /pysr/logger_specs.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from abc import ABC, abstractmethod 4 | from dataclasses import dataclass 5 | from typing import Any 6 | 7 | from .julia_helpers import jl_array, jl_dict 8 | from .julia_import import AnyValue, jl 9 | 10 | 11 | class AbstractLoggerSpec(ABC): 12 | """Abstract base class for logger specifications.""" 13 | 14 | @abstractmethod 15 | def create_logger(self) -> AnyValue: 16 | """Create a logger instance.""" 17 | pass # pragma: no cover 18 | 19 | @abstractmethod 20 | def write_hparams(self, logger: AnyValue, hparams: dict[str, Any]) -> None: 21 | """Write hyperparameters to the logger.""" 22 | pass # pragma: no cover 23 | 24 | @abstractmethod 25 | def close(self, logger: AnyValue) -> None: 26 | """Close the logger instance.""" 27 | pass # pragma: no cover 28 | 29 | 30 | @dataclass 31 | class TensorBoardLoggerSpec(AbstractLoggerSpec): 32 | """Specification for TensorBoard logger. 33 | 34 | Parameters 35 | ---------- 36 | log_dir : str 37 | Directory where TensorBoard logs will be saved. If `overwrite` is `False`, 38 | new logs will be saved to `{log_dir}_1`, and so on. Default is `"logs/run"`. 39 | log_interval : int, optional 40 | Interval (in steps) at which logs are written. Default is 10. 41 | overwrite : bool, optional 42 | Whether to overwrite existing logs in the directory. Default is False. 43 | """ 44 | 45 | log_dir: str = "logs/run" 46 | log_interval: int = 1 47 | overwrite: bool = False 48 | 49 | def create_logger(self) -> AnyValue: 50 | # We assume that TensorBoardLogger is already imported via `julia_extensions.py` 51 | make_logger = jl.seval( 52 | """ 53 | function make_logger(log_dir::AbstractString, overwrite::Bool, log_interval::Int) 54 | base_logger = TensorBoardLogger.TBLogger( 55 | log_dir, 56 | (overwrite ? (TensorBoardLogger.tb_overwrite,) : ())... 57 | ) 58 | return SRLogger(; logger=base_logger, log_interval) 59 | end 60 | """ 61 | ) 62 | log_dir = str(self.log_dir) 63 | return make_logger(log_dir, self.overwrite, self.log_interval) 64 | 65 | def write_hparams(self, logger: AnyValue, hparams: dict[str, Any]) -> None: 66 | base_logger = jl.SymbolicRegression.get_logger(logger) 67 | writer = jl.seval("TensorBoardLogger.write_hparams!") 68 | jl_clean_hparams = jl_dict( 69 | { 70 | k: (v if isinstance(v, (bool, int, float)) else str(v)) 71 | for k, v in hparams.items() 72 | } 73 | ) 74 | writer( 75 | base_logger, 76 | jl_clean_hparams, 77 | jl_array( 78 | [ 79 | "search/data/summaries/pareto_volume", 80 | "search/data/summaries/min_loss", 81 | ], 82 | ), 83 | ) 84 | 85 | def close(self, logger: AnyValue) -> None: 86 | base_logger = jl.SymbolicRegression.get_logger(logger) 87 | jl.close(base_logger) 88 | -------------------------------------------------------------------------------- /pysr/param_groupings.yml: -------------------------------------------------------------------------------- 1 | - The Algorithm: 2 | - Creating the Search Space: 3 | - binary_operators 4 | - unary_operators 5 | - expression_spec 6 | - maxsize 7 | - maxdepth 8 | - Setting the Search Size: 9 | - niterations 10 | - populations 11 | - population_size 12 | - ncycles_per_iteration 13 | - The Objective: 14 | - elementwise_loss 15 | - loss_function 16 | - loss_function_expression 17 | - loss_scale 18 | - model_selection 19 | - dimensional_constraint_penalty 20 | - dimensionless_constants_only 21 | - Working with Complexities: 22 | - parsimony 23 | - constraints 24 | - nested_constraints 25 | - complexity_of_operators 26 | - complexity_of_constants 27 | - complexity_of_variables 28 | - complexity_mapping 29 | - warmup_maxsize_by 30 | - use_frequency 31 | - use_frequency_in_tournament 32 | - adaptive_parsimony_scaling 33 | - should_simplify 34 | - Mutations: 35 | - weight_add_node 36 | - weight_insert_node 37 | - weight_delete_node 38 | - weight_do_nothing 39 | - weight_mutate_constant 40 | - weight_mutate_operator 41 | - weight_swap_operands 42 | - weight_rotate_tree 43 | - weight_randomize 44 | - weight_simplify 45 | - weight_optimize 46 | - crossover_probability 47 | - annealing 48 | - alpha 49 | - perturbation_factor 50 | - probability_negate_constant 51 | - skip_mutation_failures 52 | - Tournament Selection: 53 | - tournament_selection_n 54 | - tournament_selection_p 55 | - Constant Optimization: 56 | - optimizer_algorithm 57 | - optimizer_nrestarts 58 | - optimizer_f_calls_limit 59 | - optimize_probability 60 | - optimizer_iterations 61 | - should_optimize_constants 62 | - Migration between Populations: 63 | - fraction_replaced 64 | - fraction_replaced_hof 65 | - migration 66 | - hof_migration 67 | - topn 68 | - Data Preprocessing: 69 | - denoise 70 | - select_k_features 71 | - Stopping Criteria: 72 | - max_evals 73 | - timeout_in_seconds 74 | - early_stop_condition 75 | - Performance and Parallelization: 76 | - parallelism 77 | - procs 78 | - cluster_manager 79 | - heap_size_hint_in_bytes 80 | - batching 81 | - batch_size 82 | - precision 83 | - fast_cycle 84 | - turbo 85 | - bumper 86 | - autodiff_backend 87 | - Determinism: 88 | - random_state 89 | - deterministic 90 | - warm_start 91 | - Monitoring: 92 | - verbosity 93 | - update_verbosity 94 | - print_precision 95 | - progress 96 | - logger_spec 97 | - input_stream 98 | - Environment: 99 | - temp_equation_file 100 | - tempdir 101 | - delete_tempfiles 102 | - update 103 | - Exporting the Results: 104 | - output_directory 105 | - run_id 106 | - output_jax_format 107 | - output_torch_format 108 | - extra_sympy_mappings 109 | - extra_torch_mappings 110 | - extra_jax_mappings 111 | -------------------------------------------------------------------------------- /pysr/sklearn_monkeypatch.py: -------------------------------------------------------------------------------- 1 | # Here, we monkey patch scikit-learn until this 2 | # issue is fixed: https://github.com/scikit-learn/scikit-learn/issues/25922 3 | from sklearn.utils import validation 4 | 5 | 6 | def _ensure_no_complex_data(*args, **kwargs): ... 7 | 8 | 9 | try: 10 | validation._ensure_no_complex_data = _ensure_no_complex_data 11 | except AttributeError: # pragma: no cover 12 | ... 13 | -------------------------------------------------------------------------------- /pysr/test/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_cli import get_runtests as get_runtests_cli 2 | from .test_dev import runtests as runtests_dev 3 | from .test_jax import runtests as runtests_jax 4 | from .test_main import runtests 5 | from .test_startup import runtests as runtests_startup 6 | from .test_torch import runtests as runtests_torch 7 | 8 | __all__ = [ 9 | "runtests", 10 | "runtests_jax", 11 | "runtests_torch", 12 | "get_runtests_cli", 13 | "runtests_startup", 14 | "runtests_dev", 15 | ] 16 | -------------------------------------------------------------------------------- /pysr/test/__main__.py: -------------------------------------------------------------------------------- 1 | """CLI for running PySR's test suite.""" 2 | 3 | import argparse 4 | 5 | from . import * 6 | 7 | if __name__ == "__main__": 8 | # Get args: 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument( 11 | "test", 12 | nargs="*", 13 | help="DEPRECATED. Use `python -m pysr test [tests...]` instead.", 14 | ) 15 | -------------------------------------------------------------------------------- /pysr/test/generate_dev_juliapkg.py: -------------------------------------------------------------------------------- 1 | # Example call: 2 | ## python3 generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl 3 | import json 4 | import sys 5 | 6 | juliapkg_json = sys.argv[1] 7 | path_to_srjl = sys.argv[2] 8 | 9 | with open(juliapkg_json, "r") as f: 10 | juliapkg = json.load(f) 11 | 12 | juliapkg["packages"]["SymbolicRegression"] = { 13 | "uuid": juliapkg["packages"]["SymbolicRegression"]["uuid"], 14 | "path": path_to_srjl, 15 | "dev": True, 16 | } 17 | 18 | with open(juliapkg_json, "w") as f: 19 | json.dump(juliapkg, f, indent=4) 20 | -------------------------------------------------------------------------------- /pysr/test/nb_sanitize.cfg: -------------------------------------------------------------------------------- 1 | [pathnames] 2 | regex: /[a-zA-Z0-9_\- .\/]+/pysr/sr\.py 3 | replace: PATH 4 | -------------------------------------------------------------------------------- /pysr/test/params.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import os 3 | import unittest 4 | 5 | from pysr import PySRRegressor 6 | 7 | DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters 8 | DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default 9 | DEFAULT_POPULATIONS = DEFAULT_PARAMS["populations"].default 10 | DEFAULT_NCYCLES = DEFAULT_PARAMS["ncycles_per_iteration"].default 11 | 12 | skip_if_beartype = unittest.skipIf( 13 | os.environ.get("PYSR_USE_BEARTYPE", "0") == "1", 14 | "Skipping because beartype would fail test", 15 | ) 16 | -------------------------------------------------------------------------------- /pysr/test/test_cli.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from textwrap import dedent 3 | 4 | from click import testing as click_testing 5 | 6 | 7 | def get_runtests(): 8 | # Lazy load to avoid circular imports. 9 | 10 | from .._cli.main import pysr 11 | 12 | class TestCli(unittest.TestCase): 13 | # TODO: Include test for custom project here. 14 | def setUp(self): 15 | self.cli_runner = click_testing.CliRunner() 16 | 17 | def test_help_on_all_commands(self): 18 | expected = dedent( 19 | """ 20 | Usage: pysr [OPTIONS] COMMAND [ARGS]... 21 | 22 | Options: 23 | --help Show this message and exit. 24 | 25 | Commands: 26 | install DEPRECATED (dependencies are now installed at import). 27 | test Run parts of the PySR test suite. 28 | """ 29 | ) 30 | result = self.cli_runner.invoke(pysr, ["--help"]) 31 | self.assertEqual(result.output.strip(), expected.strip()) 32 | self.assertEqual(result.exit_code, 0) 33 | 34 | def test_help_on_install(self): 35 | expected = dedent( 36 | """ 37 | Usage: pysr install [OPTIONS] 38 | 39 | DEPRECATED (dependencies are now installed at import). 40 | 41 | Options: 42 | -p, --project TEXT 43 | -q, --quiet Disable logging. 44 | --precompile 45 | --no-precompile 46 | --help Show this message and exit. 47 | """ 48 | ) 49 | result = self.cli_runner.invoke(pysr, ["install", "--help"]) 50 | self.assertEqual(result.output.strip(), expected.strip()) 51 | self.assertEqual(result.exit_code, 0) 52 | 53 | def test_help_on_test(self): 54 | expected = dedent( 55 | """ 56 | Usage: pysr test [OPTIONS] TESTS 57 | 58 | Run parts of the PySR test suite. 59 | 60 | Choose from main, jax, torch, cli, dev, and startup. You can give multiple 61 | tests, separated by commas. 62 | 63 | Options: 64 | -k TEXT Filter expressions to select specific tests. 65 | --help Show this message and exit. 66 | """ 67 | ) 68 | result = self.cli_runner.invoke(pysr, ["test", "--help"]) 69 | self.assertEqual(result.output.strip(), expected.strip()) 70 | self.assertEqual(result.exit_code, 0) 71 | 72 | def runtests(just_tests=False): 73 | """Run all tests in cliTest.py.""" 74 | tests = [TestCli] 75 | if just_tests: 76 | return tests 77 | loader = unittest.TestLoader() 78 | suite = unittest.TestSuite() 79 | for test in tests: 80 | suite.addTests(loader.loadTestsFromTestCase(test)) 81 | runner = unittest.TextTestRunner() 82 | return runner.run(suite) 83 | 84 | return runtests 85 | -------------------------------------------------------------------------------- /pysr/test/test_dev.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import unittest 4 | from pathlib import Path 5 | 6 | 7 | class TestDev(unittest.TestCase): 8 | def test_simple_change_to_backend(self): 9 | """Test that we can use a development version of SymbolicRegression.jl""" 10 | PYSR_TEST_JULIA_VERSION = os.environ.get("PYSR_TEST_JULIA_VERSION", "1.11") 11 | PYSR_TEST_PYTHON_VERSION = os.environ.get("PYSR_TEST_PYTHON_VERSION", "3.12") 12 | build_result = subprocess.run( 13 | [ 14 | "docker", 15 | "build", 16 | "-t", 17 | "pysr-dev", 18 | "--build-arg", 19 | f"JLVERSION={PYSR_TEST_JULIA_VERSION}", 20 | "--build-arg", 21 | f"PYVERSION={PYSR_TEST_PYTHON_VERSION}", 22 | "-f", 23 | "pysr/test/test_dev_pysr.dockerfile", 24 | ".", 25 | ], 26 | env=os.environ, 27 | cwd=Path(__file__).parent.parent.parent, 28 | universal_newlines=True, 29 | ) 30 | self.assertEqual(build_result.returncode, 0) 31 | test_result = subprocess.run( 32 | [ 33 | "docker", 34 | "run", 35 | "--rm", 36 | "pysr-dev", 37 | "python3", 38 | "-c", 39 | "from pysr import SymbolicRegression as SR; print(SR.__test_function())", 40 | ], 41 | stdout=subprocess.PIPE, 42 | stderr=subprocess.PIPE, 43 | env=os.environ, 44 | cwd=Path(__file__).parent.parent.parent, 45 | ) 46 | self.assertEqual(test_result.returncode, 0) 47 | self.assertEqual(test_result.stdout.decode("utf-8").strip(), "2.3") 48 | 49 | 50 | def runtests(just_tests=False): 51 | tests = [TestDev] 52 | if just_tests: 53 | return tests 54 | suite = unittest.TestSuite() 55 | loader = unittest.TestLoader() 56 | for test in tests: 57 | suite.addTests(loader.loadTestsFromTestCase(test)) 58 | runner = unittest.TextTestRunner() 59 | return runner.run(suite) 60 | -------------------------------------------------------------------------------- /pysr/test/test_dev_pysr.dockerfile: -------------------------------------------------------------------------------- 1 | # This dockerfile simulates a user installation that 2 | # tries to manually edit SymbolicRegression.jl and 3 | # use it from PySR. 4 | 5 | ARG JLVERSION=1.11.1 6 | ARG PYVERSION=3.12.6 7 | ARG BASE_IMAGE=bullseye 8 | 9 | FROM julia:${JLVERSION}-${BASE_IMAGE} AS jl 10 | FROM python:${PYVERSION}-${BASE_IMAGE} 11 | 12 | # Merge Julia image: 13 | COPY --from=jl /usr/local/julia /usr/local/julia 14 | ENV PATH="/usr/local/julia/bin:${PATH}" 15 | 16 | WORKDIR /pysr 17 | 18 | # Install PySR: 19 | # We do a minimal copy so it doesn't need to rerun at every file change: 20 | ADD ./pyproject.toml /pysr/pyproject.toml 21 | ADD ./LICENSE /pysr/LICENSE 22 | ADD ./README.md /pysr/README.md 23 | 24 | RUN mkdir /pysr/pysr 25 | ADD ./pysr/*.py /pysr/pysr/ 26 | ADD ./pysr/juliapkg.json /pysr/pysr/juliapkg.json 27 | 28 | RUN mkdir /pysr/pysr/_cli 29 | ADD ./pysr/_cli/*.py /pysr/pysr/_cli/ 30 | 31 | RUN mkdir /pysr/pysr/test 32 | 33 | # Now, we create a custom version of SymbolicRegression.jl 34 | # First, we get the version from juliapkg.json: 35 | RUN python3 -c 'import json; print(json.load(open("/pysr/pysr/juliapkg.json", "r"))["packages"]["SymbolicRegression"]["version"])' > /pysr/sr_version 36 | 37 | # Remove any = or ^ or ~ from the version: 38 | RUN cat /pysr/sr_version | sed 's/[\^=~]//g' > /pysr/sr_version_processed 39 | 40 | # Now, we check out the version of SymbolicRegression.jl that PySR is using: 41 | RUN git clone -b "v$(cat /pysr/sr_version_processed)" --single-branch https://github.com/MilesCranmer/SymbolicRegression.jl /srjl 42 | 43 | # Edit SymbolicRegression.jl to create a new function. 44 | # We want to put this function immediately after `module SymbolicRegression`: 45 | RUN sed -i 's/module SymbolicRegression/module SymbolicRegression\n__test_function() = 2.3/' /srjl/src/SymbolicRegression.jl 46 | 47 | # Edit PySR to use the custom version of SymbolicRegression.jl: 48 | ADD ./pysr/test/generate_dev_juliapkg.py /generate_dev_juliapkg.py 49 | RUN python3 /generate_dev_juliapkg.py /pysr/pysr/juliapkg.json /srjl 50 | 51 | # Install and pre-compile 52 | RUN pip3 install --no-cache-dir . && python3 -c 'import pysr' 53 | -------------------------------------------------------------------------------- /pysr/test/test_jax.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from functools import partial 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import sympy # type: ignore 8 | 9 | import pysr 10 | from pysr import PySRRegressor, sympy2jax 11 | 12 | 13 | class TestJAX(unittest.TestCase): 14 | def setUp(self): 15 | np.random.seed(0) 16 | from jax import numpy as jnp 17 | 18 | self.jnp = jnp 19 | 20 | def test_sympy2jax(self): 21 | from jax import random 22 | 23 | x, y, z = sympy.symbols("x y z") 24 | cosx = 1.0 * sympy.cos(x) + y 25 | key = random.PRNGKey(0) 26 | X = random.normal(key, (1000, 2)) 27 | true = 1.0 * self.jnp.cos(X[:, 0]) + X[:, 1] 28 | f, params = sympy2jax(cosx, [x, y, z]) 29 | self.assertTrue(self.jnp.all(self.jnp.isclose(f(X, params), true)).item()) 30 | 31 | def test_pipeline_pandas(self): 32 | 33 | X = pd.DataFrame(np.random.randn(100, 10)) 34 | y = np.ones(X.shape[0]) 35 | model = PySRRegressor( 36 | progress=False, 37 | max_evals=10000, 38 | output_jax_format=True, 39 | ) 40 | model.fit(X, y) 41 | 42 | equations = pd.DataFrame( 43 | { 44 | "Equation": ["1.0", "cos(x1)", "square(cos(x1))"], 45 | "Loss": [1.0, 0.1, 1e-5], 46 | "Complexity": [1, 2, 3], 47 | } 48 | ) 49 | 50 | for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]: 51 | equations["Complexity Loss Equation".split(" ")].to_csv( 52 | Path(model.output_directory_) / model.run_id_ / fname 53 | ) 54 | 55 | model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_)) 56 | jformat = model.jax() 57 | 58 | np.testing.assert_almost_equal( 59 | np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])), 60 | np.square(np.cos(X.values[:, 1])), # Select feature 1 61 | decimal=3, 62 | ) 63 | 64 | def test_pipeline(self): 65 | X = np.random.randn(100, 10) 66 | y = np.ones(X.shape[0]) 67 | model = PySRRegressor(progress=False, max_evals=10000, output_jax_format=True) 68 | model.fit(X, y) 69 | 70 | equations = pd.DataFrame( 71 | { 72 | "Equation": ["1.0", "cos(x1)", "square(cos(x1))"], 73 | "Loss": [1.0, 0.1, 1e-5], 74 | "Complexity": [1, 2, 3], 75 | } 76 | ) 77 | 78 | for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]: 79 | equations["Complexity Loss Equation".split(" ")].to_csv( 80 | Path(model.output_directory_) / model.run_id_ / fname 81 | ) 82 | 83 | model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_)) 84 | jformat = model.jax() 85 | 86 | np.testing.assert_almost_equal( 87 | np.array(jformat["callable"](self.jnp.array(X), jformat["parameters"])), 88 | np.square(np.cos(X[:, 1])), # Select feature 1 89 | decimal=3, 90 | ) 91 | 92 | def test_avoid_simplification(self): 93 | ex = pysr.export_sympy.pysr2sympy( 94 | "square(exp(sign(0.44796443))) + 1.5 * x1", 95 | feature_names_in=["x1"], 96 | extra_sympy_mappings={"square": lambda x: x**2}, 97 | ) 98 | f, params = pysr.export_jax.sympy2jax(ex, [sympy.symbols("x1")]) 99 | key = np.random.RandomState(0) 100 | X = key.randn(10, 1) 101 | np.testing.assert_almost_equal( 102 | np.array(f(self.jnp.array(X), params)), 103 | np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0], 104 | decimal=3, 105 | ) 106 | 107 | def test_issue_656(self): 108 | import sympy # type: ignore 109 | 110 | E_plus_x1 = sympy.exp(1) + sympy.symbols("x1") 111 | f, params = pysr.export_jax.sympy2jax(E_plus_x1, [sympy.symbols("x1")]) 112 | key = np.random.RandomState(0) 113 | X = key.randn(10, 1) 114 | np.testing.assert_almost_equal( 115 | np.array(f(self.jnp.array(X), params)), 116 | np.exp(1) + X[:, 0], 117 | decimal=3, 118 | ) 119 | 120 | def test_feature_selection_custom_operators(self): 121 | rstate = np.random.RandomState(0) 122 | X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)}) 123 | 124 | def cos_approx(x): 125 | return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720 126 | 127 | sp_cos_approx = sympy.Function("cos_approx") 128 | 129 | y = X["k15"] ** 2 + 2 * cos_approx(X["k20"]) 130 | 131 | model = PySRRegressor( 132 | progress=False, 133 | unary_operators=["cos_approx(x) = 1 - x^2 / 2 + x^4 / 24 + x^6 / 720"], 134 | select_k_features=3, 135 | maxsize=10, 136 | early_stop_condition=1e-5, 137 | extra_sympy_mappings={"cos_approx": sp_cos_approx}, 138 | extra_jax_mappings={ 139 | sp_cos_approx: "(lambda x: 1 - x**2 / 2 + x**4 / 24 + x**6 / 720)" 140 | }, 141 | random_state=0, 142 | deterministic=True, 143 | parallelism="serial", 144 | ) 145 | np.random.seed(0) 146 | model.fit(X.values, y.values) 147 | f, parameters = model.jax().values() 148 | jax_prediction = partial(f, parameters=parameters) 149 | jax_output = jax_prediction(X.values) 150 | np.testing.assert_almost_equal(y.values, jax_output, decimal=3) 151 | 152 | 153 | def runtests(just_tests=False): 154 | """Run all tests in test_jax.py.""" 155 | tests = [TestJAX] 156 | if just_tests: 157 | return tests 158 | loader = unittest.TestLoader() 159 | suite = unittest.TestSuite() 160 | for test in tests: 161 | suite.addTests(loader.loadTestsFromTestCase(test)) 162 | runner = unittest.TextTestRunner() 163 | return runner.run(suite) 164 | -------------------------------------------------------------------------------- /pysr/test/test_nb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Detected Jupyter notebook. Loading juliacall extension. Set `PYSR_AUTOLOAD_EXTENSIONS=no` to disable.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "# NBVAL_IGNORE_OUTPUT\n", 18 | "import numpy as np\n", 19 | "from pysr import PySRRegressor, jl" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "3\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "%%julia\n", 37 | "\n", 38 | "# Automatically activates Julia magic\n", 39 | "\n", 40 | "x = 1\n", 41 | "println(x + 2)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "4\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "%julia println(x + 3)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "my_loss (generic function with 1 method)" 70 | ] 71 | }, 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "output_type": "execute_result" 75 | } 76 | ], 77 | "source": [ 78 | "%%julia\n", 79 | "function my_loss(x)\n", 80 | " x ^ 2\n", 81 | "end" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "4" 93 | ] 94 | }, 95 | "execution_count": 5, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "%julia my_loss(2)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "'PySRRegressor.equations_ = None'" 113 | ] 114 | }, 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "rstate = np.random.RandomState(0)\n", 122 | "X = np.random.randn(10, 2)\n", 123 | "y = np.random.randn(10)\n", 124 | "\n", 125 | "model = PySRRegressor(deterministic=True, parallelism=\"serial\", random_state=0, verbosity=0, progress=False, niterations=1, ncycles_per_iteration=1)\n", 126 | "str(model)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 7, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "pandas.core.frame.DataFrame" 138 | ] 139 | }, 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "model.fit(X, y)\n", 147 | "type(model.equations_)" 148 | ] 149 | } 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "Python 3 (ipykernel)", 154 | "language": "python", 155 | "name": "python3" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.11.2" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /pysr/test/test_startup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import subprocess 4 | import sys 5 | import tempfile 6 | import textwrap 7 | import unittest 8 | from pathlib import Path 9 | 10 | import numpy as np 11 | 12 | from pysr import PySRRegressor, jl 13 | from pysr.julia_import import jl_version 14 | from pysr.julia_registry_helpers import PREFERENCE_KEY, try_with_registry_fallback 15 | 16 | from .params import DEFAULT_NITERATIONS, DEFAULT_POPULATIONS 17 | 18 | 19 | class TestStartup(unittest.TestCase): 20 | """Various tests related to starting up PySR.""" 21 | 22 | def setUp(self): 23 | # Using inspect, 24 | # get default niterations from PySRRegressor, and double them: 25 | self.default_test_kwargs = dict( 26 | progress=False, 27 | model_selection="accuracy", 28 | niterations=DEFAULT_NITERATIONS * 2, 29 | populations=DEFAULT_POPULATIONS * 2, 30 | temp_equation_file=True, 31 | ) 32 | self.rstate = np.random.RandomState(0) 33 | self.X = self.rstate.randn(100, 5) 34 | 35 | def test_warm_start_from_file(self): 36 | """Test that we can warm start in another process.""" 37 | if platform.system() == "Windows": 38 | self.skipTest("Warm start test incompatible with Windows") 39 | 40 | with tempfile.TemporaryDirectory() as tmpdirname: 41 | model = PySRRegressor( 42 | **self.default_test_kwargs, 43 | unary_operators=["cos"], 44 | ) 45 | model.warm_start = True 46 | model.temp_equation_file = False 47 | model.output_directory = tmpdirname 48 | model.run_id = "test" 49 | model.deterministic = True 50 | model.multithreading = False 51 | model.random_state = 0 52 | model.procs = 0 53 | model.early_stop_condition = 1e-10 54 | 55 | rstate = np.random.RandomState(0) 56 | X = rstate.randn(100, 2) 57 | y = np.cos(X[:, 0]) ** 2 58 | model.fit(X, y) 59 | 60 | best_loss = model.equations_.iloc[-1]["loss"] 61 | 62 | # Save X and y to a file: 63 | X_file = Path(tmpdirname) / "X.npy" 64 | y_file = Path(tmpdirname) / "y.npy" 65 | np.save(X_file, X) 66 | np.save(y_file, y) 67 | # Now, create a new process and warm start from the file: 68 | result = subprocess.run( 69 | [ 70 | sys.executable, 71 | "-c", 72 | textwrap.dedent( 73 | f""" 74 | from pysr import PySRRegressor 75 | import numpy as np 76 | 77 | X = np.load("{X_file}") 78 | y = np.load("{y_file}") 79 | 80 | print("Loading model from file") 81 | model = PySRRegressor.from_file( 82 | run_directory="{str(Path(tmpdirname) / model.run_id_)}" 83 | ) 84 | 85 | assert model.julia_state_ is not None 86 | 87 | # Reset saved equations; should be loaded from state! 88 | model.equations_ = None 89 | model.equation_file_contents_ = None 90 | 91 | model.warm_start = True 92 | model.niterations = 0 93 | model.max_evals = 0 94 | model.ncycles_per_iteration = 0 95 | 96 | model.fit(X, y) 97 | 98 | best_loss = model.equations_.iloc[-1]["loss"] 99 | 100 | assert best_loss <= {best_loss} 101 | """ 102 | ), 103 | ], 104 | stdout=subprocess.PIPE, 105 | stderr=subprocess.PIPE, 106 | env=os.environ, 107 | ) 108 | self.assertEqual(result.returncode, 0) 109 | self.assertIn("Loading model from file", result.stdout.decode()) 110 | self.assertIn("Started!", result.stderr.decode()) 111 | 112 | def test_bad_startup_options(self): 113 | warning_tests = [ 114 | dict( 115 | code='import os; os.environ["PYTHON_JULIACALL_HANDLE_SIGNALS"] = "no"; import pysr', 116 | msg="PYTHON_JULIACALL_HANDLE_SIGNALS environment variable is set", 117 | ), 118 | dict( 119 | code='import os; os.environ["PYTHON_JULIACALL_THREADS"] = "1"; import pysr', 120 | msg="PYTHON_JULIACALL_THREADS environment variable is set", 121 | ), 122 | dict( 123 | code="import juliacall; import pysr", 124 | msg="juliacall module already imported.", 125 | ), 126 | ] 127 | for warning_test in warning_tests: 128 | result = subprocess.run( 129 | [sys.executable, "-c", warning_test["code"]], 130 | stdout=subprocess.PIPE, 131 | stderr=subprocess.PIPE, 132 | env=os.environ, 133 | ) 134 | self.assertIn(warning_test["msg"], result.stderr.decode()) 135 | 136 | def test_notebook(self): 137 | if platform.system() == "Windows": 138 | self.skipTest("Notebook test incompatible with Windows") 139 | if not os.access(Path(__file__).parent, os.W_OK): 140 | self.skipTest("Read-only file system") 141 | 142 | notebook_file = Path(__file__).parent / "test_nb.ipynb" 143 | sanitize_file = Path(__file__).parent / "nb_sanitize.cfg" 144 | 145 | if not (notebook_file.exists() and sanitize_file.exists()): 146 | self.skipTest("Files not available for testing") 147 | 148 | result = subprocess.run( 149 | [ 150 | sys.executable, 151 | "-m", 152 | "pytest", 153 | "--nbval", 154 | str(notebook_file), 155 | "--nbval-sanitize-with", 156 | str(sanitize_file), 157 | ], 158 | env=os.environ, 159 | ) 160 | self.assertEqual(result.returncode, 0) 161 | 162 | 163 | class TestRegistryHelper(unittest.TestCase): 164 | """Test the custom Julia registry preference handling.""" 165 | 166 | def setUp(self): 167 | self.old_value = os.environ.get(PREFERENCE_KEY, None) 168 | self.recorded_env_vars = [] 169 | self.hits = 0 170 | 171 | def failing_operation(): 172 | self.recorded_env_vars.append(os.environ[PREFERENCE_KEY]) 173 | self.hits += 1 174 | # Just add some package I know will not exist and also not be in the dependency chain: 175 | jl.Pkg.add(name="AirspeedVelocity", version="100.0.0") 176 | 177 | self.failing_operation = failing_operation 178 | 179 | def tearDown(self): 180 | if self.old_value is not None: 181 | os.environ[PREFERENCE_KEY] = self.old_value 182 | else: 183 | os.environ.pop(PREFERENCE_KEY, None) 184 | 185 | def test_successful_operation(self): 186 | self.assertEqual(try_with_registry_fallback(lambda s: s, "success"), "success") 187 | 188 | def test_non_julia_errors_reraised(self): 189 | with self.assertRaises(SyntaxError) as context: 190 | try_with_registry_fallback(lambda: exec("invalid syntax !@#$")) 191 | self.assertNotIn("JuliaError", str(context.exception)) 192 | 193 | def test_julia_error_triggers_fallback(self): 194 | os.environ[PREFERENCE_KEY] = "conservative" 195 | 196 | with self.assertWarns(Warning) as warn_context: 197 | with self.assertRaises(Exception) as error_context: 198 | try_with_registry_fallback(self.failing_operation) 199 | 200 | self.assertIn( 201 | "Unsatisfiable requirements detected", str(error_context.exception) 202 | ) 203 | self.assertIn( 204 | "Initial Julia registry operation failed. Attempting to use the `eager` registry flavor of the Julia", 205 | str(warn_context.warning), 206 | ) 207 | 208 | # Verify both modes are tried in order 209 | self.assertEqual(self.recorded_env_vars, ["conservative", "eager"]) 210 | self.assertEqual(self.hits, 2) 211 | 212 | # Verify environment is restored 213 | self.assertEqual(os.environ[PREFERENCE_KEY], "conservative") 214 | 215 | def test_eager_mode_fails_directly(self): 216 | os.environ[PREFERENCE_KEY] = "eager" 217 | 218 | with self.assertRaises(Exception) as context: 219 | try_with_registry_fallback(self.failing_operation) 220 | 221 | self.assertIn("Unsatisfiable requirements detected", str(context.exception)) 222 | self.assertEqual( 223 | self.recorded_env_vars, ["eager"] 224 | ) # Should only try eager mode 225 | self.assertEqual(self.hits, 1) 226 | 227 | 228 | def runtests(just_tests=False): 229 | tests = [TestStartup, TestRegistryHelper] 230 | if just_tests: 231 | return tests 232 | suite = unittest.TestSuite() 233 | loader = unittest.TestLoader() 234 | for test in tests: 235 | suite.addTests(loader.loadTestsFromTestCase(test)) 236 | runner = unittest.TextTestRunner() 237 | return runner.run(suite) 238 | -------------------------------------------------------------------------------- /pysr/test/test_torch.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import sympy # type: ignore 7 | 8 | import pysr 9 | from pysr import PySRRegressor, sympy2torch 10 | 11 | 12 | class TestTorch(unittest.TestCase): 13 | def setUp(self): 14 | np.random.seed(0) 15 | 16 | # Need to import after juliacall: 17 | import torch 18 | 19 | self.torch = torch 20 | 21 | def test_sympy2torch(self): 22 | x, y, z = sympy.symbols("x y z") 23 | cosx = 1.0 * sympy.cos(x) + y 24 | 25 | X = self.torch.tensor(np.random.randn(1000, 3)) 26 | true = 1.0 * self.torch.cos(X[:, 0]) + X[:, 1] 27 | torch_module = sympy2torch(cosx, [x, y, z]) 28 | self.assertTrue( 29 | np.all(np.isclose(torch_module(X).detach().numpy(), true.detach().numpy())) 30 | ) 31 | 32 | def test_pipeline_pandas(self): 33 | X = pd.DataFrame(np.random.randn(100, 10)) 34 | y = np.ones(X.shape[0]) 35 | model = PySRRegressor( 36 | progress=False, 37 | max_evals=10000, 38 | model_selection="accuracy", 39 | extra_sympy_mappings={}, 40 | output_torch_format=True, 41 | ) 42 | model.fit(X, y) 43 | 44 | equations = pd.DataFrame( 45 | { 46 | "Equation": ["1.0", "cos(x1)", "square(cos(x1))"], 47 | "Loss": [1.0, 0.1, 1e-5], 48 | "Complexity": [1, 2, 3], 49 | } 50 | ) 51 | 52 | for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]: 53 | equations["Complexity Loss Equation".split(" ")].to_csv( 54 | Path(model.output_directory_) / model.run_id_ / fname 55 | ) 56 | 57 | model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_)) 58 | tformat = model.pytorch() 59 | self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)") 60 | 61 | np.testing.assert_almost_equal( 62 | tformat(self.torch.tensor(X.values)).detach().numpy(), 63 | np.square(np.cos(X.values[:, 1])), # Selection 1st feature 64 | decimal=3, 65 | ) 66 | 67 | def test_pipeline(self): 68 | X = np.random.randn(100, 10) 69 | y = np.ones(X.shape[0]) 70 | model = PySRRegressor( 71 | progress=False, 72 | max_evals=10000, 73 | model_selection="accuracy", 74 | output_torch_format=True, 75 | ) 76 | model.fit(X, y) 77 | 78 | equations = pd.DataFrame( 79 | { 80 | "Equation": ["1.0", "cos(x1)", "square(cos(x1))"], 81 | "Loss": [1.0, 0.1, 1e-5], 82 | "Complexity": [1, 2, 3], 83 | } 84 | ) 85 | 86 | for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]: 87 | equations["Complexity Loss Equation".split(" ")].to_csv( 88 | Path(model.output_directory_) / model.run_id_ / fname 89 | ) 90 | 91 | model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_)) 92 | 93 | tformat = model.pytorch() 94 | self.assertEqual(str(tformat), "_SingleSymPyModule(expression=cos(x1)**2)") 95 | 96 | np.testing.assert_almost_equal( 97 | tformat(self.torch.tensor(X)).detach().numpy(), 98 | np.square(np.cos(X[:, 1])), # 2nd feature 99 | decimal=3, 100 | ) 101 | 102 | def test_mod_mapping(self): 103 | x, y, z = sympy.symbols("x y z") 104 | expression = x**2 + sympy.atanh(sympy.Mod(y + 1, 2) - 1) * 3.2 * z 105 | 106 | module = sympy2torch(expression, [x, y, z]) 107 | 108 | X = self.torch.rand(100, 3).float() * 10 109 | 110 | true_out = ( 111 | X[:, 0] ** 2 112 | + self.torch.atanh(self.torch.fmod(X[:, 1] + 1, 2) - 1) * 3.2 * X[:, 2] 113 | ) 114 | torch_out = module(X) 115 | 116 | np.testing.assert_array_almost_equal( 117 | true_out.detach(), torch_out.detach(), decimal=3 118 | ) 119 | 120 | def test_custom_operator(self): 121 | X = np.random.randn(100, 3) 122 | y = np.ones(X.shape[0]) 123 | model = PySRRegressor( 124 | progress=False, 125 | max_evals=10000, 126 | model_selection="accuracy", 127 | output_torch_format=True, 128 | ) 129 | model.fit(X, y) 130 | 131 | equations = pd.DataFrame( 132 | { 133 | "Equation": ["1.0", "mycustomoperator(x1)"], 134 | "Loss": [1.0, 0.1], 135 | "Complexity": [1, 2], 136 | } 137 | ) 138 | 139 | for fname in ["hall_of_fame.csv.bak", "hall_of_fame.csv"]: 140 | equations["Complexity Loss Equation".split(" ")].to_csv( 141 | Path(model.output_directory_) / model.run_id_ / fname 142 | ) 143 | 144 | MyCustomOperator = sympy.Function("mycustomoperator") 145 | 146 | model.set_params( 147 | extra_sympy_mappings={"mycustomoperator": MyCustomOperator}, 148 | extra_torch_mappings={MyCustomOperator: self.torch.sin}, 149 | ) 150 | # TODO: We shouldn't need to specify the run directory here. 151 | model.refresh(run_directory=str(Path(model.output_directory_) / model.run_id_)) 152 | # self.assertEqual(str(model.sympy()), "sin(x1)") 153 | # Will automatically use the set global state from get_hof. 154 | 155 | tformat = model.pytorch() 156 | self.assertEqual( 157 | str(tformat), "_SingleSymPyModule(expression=mycustomoperator(x1))" 158 | ) 159 | np.testing.assert_almost_equal( 160 | tformat(self.torch.tensor(X)).detach().numpy(), 161 | np.sin(X[:, 1]), 162 | decimal=3, 163 | ) 164 | 165 | def test_avoid_simplification(self): 166 | # SymPy should not simplify without permission 167 | torch = self.torch 168 | ex = pysr.export_sympy.pysr2sympy( 169 | "square(exp(sign(0.44796443))) + 1.5 * x1", 170 | # ^ Normally this would become exp1 and require 171 | # its own mapping 172 | feature_names_in=["x1"], 173 | extra_sympy_mappings={"square": lambda x: x**2}, 174 | ) 175 | m = pysr.export_torch.sympy2torch(ex, ["x1"]) 176 | rng = np.random.RandomState(0) 177 | X = rng.randn(10, 1) 178 | np.testing.assert_almost_equal( 179 | m(torch.tensor(X)).detach().numpy(), 180 | np.square(np.exp(np.sign(0.44796443))) + 1.5 * X[:, 0], 181 | decimal=3, 182 | ) 183 | 184 | def test_issue_656(self): 185 | # Should correctly map numeric symbols to floats 186 | E_plus_x1 = sympy.exp(1) + sympy.symbols("x1") 187 | m = pysr.export_torch.sympy2torch(E_plus_x1, ["x1"]) 188 | X = np.random.randn(10, 1) 189 | np.testing.assert_almost_equal( 190 | m(self.torch.tensor(X)).detach().numpy(), 191 | np.exp(1) + X[:, 0], 192 | decimal=3, 193 | ) 194 | 195 | def test_feature_selection_custom_operators(self): 196 | rstate = np.random.RandomState(0) 197 | X = pd.DataFrame({f"k{i}": rstate.randn(2000) for i in range(10, 21)}) 198 | 199 | def cos_approx(x): 200 | return 1 - (x**2) / 2 + (x**4) / 24 + (x**6) / 720 201 | 202 | y = X["k15"] ** 2 + 2 * cos_approx(X["k20"]) 203 | 204 | model = PySRRegressor( 205 | progress=False, 206 | unary_operators=["cos_approx(x) = 1 - x^2 / 2 + x^4 / 24 + x^6 / 720"], 207 | select_k_features=3, 208 | maxsize=10, 209 | early_stop_condition=1e-5, 210 | extra_sympy_mappings={"cos_approx": cos_approx}, 211 | random_state=0, 212 | deterministic=True, 213 | parallelism="serial", 214 | ) 215 | np.random.seed(0) 216 | model.fit(X.values, y.values) 217 | torch_module = model.pytorch() 218 | 219 | np_output = model.predict(X.values) 220 | 221 | torch_output = torch_module(self.torch.tensor(X.values)).detach().numpy() 222 | 223 | np.testing.assert_almost_equal(y.values, np_output, decimal=3) 224 | np.testing.assert_almost_equal(y.values, torch_output, decimal=3) 225 | 226 | 227 | def runtests(just_tests=False): 228 | """Run all tests in test_torch.py.""" 229 | tests = [TestTorch] 230 | if just_tests: 231 | return tests 232 | loader = unittest.TestLoader() 233 | suite = unittest.TestSuite() 234 | for test in tests: 235 | suite.addTests(loader.loadTestsFromTestCase(test)) 236 | runner = unittest.TextTestRunner() 237 | return runner.run(suite) 238 | -------------------------------------------------------------------------------- /pysr/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import difflib 4 | import inspect 5 | import re 6 | from pathlib import Path 7 | from typing import Any, TypeVar, Union 8 | 9 | from numpy import ndarray 10 | from sklearn.utils.validation import _check_feature_names_in # type: ignore 11 | 12 | try: 13 | from typing import List 14 | except ImportError: 15 | from typing_extensions import List 16 | 17 | T = TypeVar("T", bound=Any) 18 | 19 | ArrayLike = Union[ndarray, List[T]] 20 | PathLike = Union[str, Path] 21 | 22 | 23 | _regexp_im = re.compile(r"\b(\d+\.\d+)im\b") 24 | _regexp_im_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)im\b") 25 | _regexp_sci = re.compile(r"\b(\d+\.\d+)[eEfF]([+-]?\d+)\b") 26 | 27 | 28 | def _apply_regexp_im(x: str): 29 | return _regexp_im.sub(r"\1j", x) 30 | 31 | 32 | def _apply_regexp_im_sci(x: str): 33 | return _regexp_im_sci.sub(r"\1e\2j", x) 34 | 35 | 36 | def _apply_regexp_sci(x: str): 37 | return _regexp_sci.sub(r"\1e\2", x) 38 | 39 | 40 | def _preprocess_julia_floats(s: str) -> str: 41 | if isinstance(s, str): 42 | s = _apply_regexp_im(s) 43 | s = _apply_regexp_im_sci(s) 44 | s = _apply_regexp_sci(s) 45 | return s 46 | 47 | 48 | def _safe_check_feature_names_in(self, variable_names, generate_names=True): 49 | """_check_feature_names_in with compat for old versions.""" 50 | try: 51 | return _check_feature_names_in( 52 | self, variable_names, generate_names=generate_names 53 | ) 54 | except TypeError: 55 | return _check_feature_names_in(self, variable_names) 56 | 57 | 58 | def _subscriptify(i: int) -> str: 59 | """Converts integer to subscript text form. 60 | 61 | For example, 123 -> "₁₂₃". 62 | """ 63 | return "".join([chr(0x2080 + int(c)) for c in str(i)]) 64 | 65 | 66 | def _suggest_keywords(cls, k: str) -> list[str]: 67 | valid_keywords = [ 68 | param 69 | for param in inspect.signature(cls.__init__).parameters 70 | if param not in ["self", "kwargs"] 71 | ] 72 | suggestions = difflib.get_close_matches(k, valid_keywords, n=3) 73 | return suggestions 74 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # setup.py – retained only for users who still type python setup.py ..." 2 | import sys 3 | 4 | sys.stderr.write( 5 | """⚠️ PySR uses pyproject.toml instead of setup.py. 6 | 7 | Install from a checkout with: 8 | python -m pip install . # normal 9 | python -m pip install -e . # editable (pip ≥21.3) 10 | 11 | Or install from PyPI with: 12 | pip install pysr 13 | """ 14 | ) 15 | sys.exit(1) 16 | --------------------------------------------------------------------------------