├── .gitattributes
├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── python-publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── blog
    ├── README.md
    ├── bessel_ratio.ipynb
    ├── binary_ising_mf.ipynb
    ├── spin_transformer_module.ipynb
    └── vector_ising_mf.ipynb
├── pyproject.toml
├── setup.py
└── spin_model_transformers
    ├── __init__.py
    ├── bessel.py
    └── spin_model_transformers.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | blog/** linguist-vendored


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: mcbal
2 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | **/.ipynb_checkpoints
3 | *.egg-info/
4 | build/
5 | dist/
6 | .vscode/


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.4.0
 4 |     hooks:
 5 |       - id: check-added-large-files
 6 |       - id: check-ast
 7 |       - id: check-builtin-literals
 8 |       - id: check-case-conflict
 9 |       - id: check-merge-conflict
10 |       - id: check-toml
11 |       - id: debug-statements
12 |       - id: end-of-file-fixer
13 |         types: [python]
14 |       - id: fix-byte-order-marker
15 |       - id: mixed-line-ending
16 |       - id: trailing-whitespace
17 |         types: [python]
18 |   - repo: https://github.com/ambv/black
19 |     rev: 23.9.1
20 |     hooks:
21 |     - id: black
22 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
23 |     rev: 'v0.0.291'
24 |     hooks:
25 |       - id: ruff
26 |         args: ['--fix']
27 |   - repo: https://github.com/nbQA-dev/nbQA
28 |     rev: 1.7.0
29 |     hooks:
30 |     - id: nbqa-black
31 |       additional_dependencies: [ipython, black]
32 |     - id: nbqa-ruff 
33 |       args: ['--ignore=I001']
34 |       additional_dependencies: [ipython, ruff]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Spin-model transformers
 2 | 
 3 | 
 4 | ## Install
 5 | 
 6 | ```bash
 7 | pip install -e .[dev]
 8 | pre-commit install
 9 | pre-commit run --all-files
10 | ```
11 | 
12 | ## Examples
13 | 
14 | ```python
15 | import jax
16 | from spin_model_transformers import SpinTransformer
17 | 
18 | 
19 | key = jax.random.PRNGKey(2666)
20 | x_key, mod_key = jax.random.split(key)
21 | 
22 | x = jax.random.normal(x_key, shape=(1, 256, 512))
23 | transformer = SpinTransformer(depth=6, dim=512, num_heads=1, beta=1.0, key=mod_key)
24 | 
25 | out = jax.vmap(transformer)(x)  # (1, 256, 512)
26 | ```
27 | 


--------------------------------------------------------------------------------
/blog/README.md:
--------------------------------------------------------------------------------
1 | # Notebooks to reproduce blog post results
2 | 
3 | - Section 2.5: `binary_ising_mf.ipynb`
4 | - Section 3.5: `vector_ising_mf.ipynb`
5 | - Section 4.3: `spin_transformer_module.ipynb`
6 | - Appendix A.4: `bessel_ratio.ipynb`


--------------------------------------------------------------------------------
/blog/bessel_ratio.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "59aacb0a-b330-4bf5-b521-7ce4953bf2ff",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Ratio of modified Bessel functions of the first kind"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "5247b0b7-0aa5-4a68-a440-ac1ed1b006ea",
 17 |    "metadata": {
 18 |     "tags": []
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import jax\n",
 23 |     "import jax.numpy as jnp\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import numpy as np\n",
 26 |     "from scipy.special import ive\n",
 27 |     "\n",
 28 |     "\n",
 29 |     "from spin_model_transformers.bessel import bessel_iv_ratio\n",
 30 |     "\n",
 31 |     "\n",
 32 |     "def asymptotic_ratio(nu, x, n=1):\n",
 33 |     "    \"\"\"Reference\"\"\"\n",
 34 |     "    return (x / (1 + (1 + x**2) ** 0.5)) ** n * (1.0)\n",
 35 |     "\n",
 36 |     "\n",
 37 |     "jit_bessel_iv_ratio = jax.jit(\n",
 38 |     "    jax.vmap(\n",
 39 |     "        bessel_iv_ratio,\n",
 40 |     "        in_axes=(0, None, None),\n",
 41 |     "    ),\n",
 42 |     "    static_argnums=(1, 2),\n",
 43 |     ")\n",
 44 |     "\n",
 45 |     "steps = np.logspace(0, 9, num=10, endpoint=True, base=2.0)\n",
 46 |     "\n",
 47 |     "r_scipy, r_jax, r_asym = [], [], []\n",
 48 |     "for nu in steps:\n",
 49 |     "    r_scipy.append(\n",
 50 |     "        ive(nu + 1, [nu**0.5, nu, nu**2]) / ive(nu, [nu**0.5, nu, nu**2])\n",
 51 |     "    )\n",
 52 |     "    r_jax.append(jit_bessel_iv_ratio(jnp.array([nu**0.5, nu, nu**2]), nu, 2))\n",
 53 |     "    r_asym.append(\n",
 54 |     "        [\n",
 55 |     "            asymptotic_ratio(nu, nu**-0.5),\n",
 56 |     "            asymptotic_ratio(nu, 1.0),\n",
 57 |     "            asymptotic_ratio(nu, nu),\n",
 58 |     "        ]\n",
 59 |     "    )\n",
 60 |     "\n",
 61 |     "with plt.style.context(\"ggplot\"):\n",
 62 |     "    plt.plot(\n",
 63 |     "        steps,\n",
 64 |     "        np.stack(r_scipy),\n",
 65 |     "        label=[\"SciPy\", \"_\", \"_\"],\n",
 66 |     "        color=\"tab:red\",\n",
 67 |     "        marker=\"o\",\n",
 68 |     "        markerfacecolor=\"none\",\n",
 69 |     "        linewidth=1.5,\n",
 70 |     "        linestyle=\"none\",\n",
 71 |     "    )\n",
 72 |     "    plt.plot(\n",
 73 |     "        steps,\n",
 74 |     "        np.stack(r_jax),\n",
 75 |     "        label=[\"JAX\", \"_\", \"_\"],\n",
 76 |     "        color=\"tab:blue\",\n",
 77 |     "        marker=\"x\",\n",
 78 |     "        linewidth=1.5,\n",
 79 |     "        linestyle=\"dashed\",\n",
 80 |     "    )\n",
 81 |     "    plt.plot(\n",
 82 |     "        steps,\n",
 83 |     "        np.stack(r_asym),\n",
 84 |     "        label=[\"asym\", \"_\", \"_\"],\n",
 85 |     "        color=\"tab:green\",\n",
 86 |     "        marker=\"x\",\n",
 87 |     "        linewidth=1.5,\n",
 88 |     "        linestyle=\"dotted\",\n",
 89 |     "    )\n",
 90 |     "    plt.legend()\n",
 91 |     "    plt.xlabel(r\"$\\nu$\")\n",
 92 |     "    plt.ylabel(r\"$I_{\\nu+1}(x)/I_{\\nu}(x)$\")\n",
 93 |     "    plt.text(\n",
 94 |     "        200,\n",
 95 |     "        0.96,\n",
 96 |     "        r\"$x=\\nu^2$\",\n",
 97 |     "        horizontalalignment=\"center\",\n",
 98 |     "        verticalalignment=\"center\",\n",
 99 |     "    )\n",
100 |     "    plt.text(\n",
101 |     "        200, 0.45, r\"$x=\\nu$\", horizontalalignment=\"center\", verticalalignment=\"center\"\n",
102 |     "    )\n",
103 |     "    plt.text(\n",
104 |     "        200,\n",
105 |     "        0.08,\n",
106 |     "        r\"$x=\\sqrt{\\nu}$\",\n",
107 |     "        horizontalalignment=\"center\",\n",
108 |     "        verticalalignment=\"center\",\n",
109 |     "    )\n",
110 |     "    plt.show()"
111 |    ]
112 |   }
113 |  ],
114 |  "metadata": {
115 |   "kernelspec": {
116 |    "display_name": "py310",
117 |    "language": "python",
118 |    "name": "python3"
119 |   },
120 |   "language_info": {
121 |    "codemirror_mode": {
122 |     "name": "ipython",
123 |     "version": 3
124 |    },
125 |    "file_extension": ".py",
126 |    "mimetype": "text/x-python",
127 |    "name": "python",
128 |    "nbconvert_exporter": "python",
129 |    "pygments_lexer": "ipython3",
130 |    "version": "3.10.11"
131 |   }
132 |  },
133 |  "nbformat": 4,
134 |  "nbformat_minor": 5
135 | }
136 | 


--------------------------------------------------------------------------------
/blog/binary_ising_mf.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "59aacb0a-b330-4bf5-b521-7ce4953bf2ff",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Mean-field theory of asymmetric Ising models with binary spins"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "eb80f5ac-25d9-4f30-a0a4-76c67e06dd90",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from functools import partial\n",
 21 |     "\n",
 22 |     "import jax\n",
 23 |     "import jax.numpy as jnp\n",
 24 |     "\n",
 25 |     "from jaxopt import AndersonAcceleration\n",
 26 |     "\n",
 27 |     "\n",
 28 |     "def update_naive_mf(m0, _, x, J):\n",
 29 |     "    \"\"\"See Eq. (22).\"\"\"\n",
 30 |     "    m1 = jnp.tanh(x + jnp.einsum(\"i j, j -> i\", J, m0))\n",
 31 |     "    return m1, m0\n",
 32 |     "\n",
 33 |     "\n",
 34 |     "def update_tap_mf(m0, _, x, J):\n",
 35 |     "    \"\"\"See Eq. (26).\"\"\"\n",
 36 |     "\n",
 37 |     "    def tap(m, _m0, _x, _J):\n",
 38 |     "        return jnp.tanh(\n",
 39 |     "            _x\n",
 40 |     "            + jnp.einsum(\"i j, j -> i\", _J, _m0)\n",
 41 |     "            - m * jnp.einsum(\"i j, j -> i\", _J**2, (1.0 - _m0**2))\n",
 42 |     "        )\n",
 43 |     "\n",
 44 |     "    m1 = (\n",
 45 |     "        AndersonAcceleration(fixed_point_fun=tap, tol=1e-3, maxiter=10)\n",
 46 |     "        .run(m0, m0, x, J)\n",
 47 |     "        .params\n",
 48 |     "    )\n",
 49 |     "    return m1, m0\n",
 50 |     "\n",
 51 |     "\n",
 52 |     "def time_evolution(m0, steps, update_fun):\n",
 53 |     "    final_carry, stacked_outputs = jax.lax.scan(update_fun, init=m0, xs=steps)\n",
 54 |     "    return final_carry, stacked_outputs\n",
 55 |     "\n",
 56 |     "\n",
 57 |     "def init_params(key, N, beta, X0, J_mu, J_sigma):\n",
 58 |     "    x_key, J_key = jax.random.split(key)\n",
 59 |     "    x = jax.random.uniform(x_key, shape=(N,), minval=-beta * X0, maxval=beta * X0)\n",
 60 |     "    J = beta * J_mu * N**-1 + beta * J_sigma * N**-0.5 * jax.random.normal(\n",
 61 |     "        J_key, shape=(N, N)\n",
 62 |     "    )\n",
 63 |     "    return x, J\n",
 64 |     "\n",
 65 |     "\n",
 66 |     "def simulate(\n",
 67 |     "    key, m0, steps, beta, X0=0.5, J_mu=1.0, J_sigma=0.1, update_fun=update_tap_mf\n",
 68 |     "):\n",
 69 |     "    x, J = init_params(key, m0.shape[-1], beta, X0, J_mu, J_sigma)\n",
 70 |     "    wrapped_time_evolution = partial(\n",
 71 |     "        time_evolution,\n",
 72 |     "        steps=steps,\n",
 73 |     "        update_fun=partial(update_fun, x=x, J=J),\n",
 74 |     "    )\n",
 75 |     "    final_carry, stacked_outputs = jax.vmap(wrapped_time_evolution)(m0)\n",
 76 |     "    return final_carry, stacked_outputs"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "id": "e2accb24-d9e5-43a1-a97e-4740d6bbd416",
 83 |    "metadata": {
 84 |     "tags": []
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "import matplotlib.pyplot as plt\n",
 89 |     "\n",
 90 |     "\n",
 91 |     "beta_c = 1.1108\n",
 92 |     "N = 512\n",
 93 |     "steps = jnp.arange(0, 128)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "id": "cdc8929c-1061-45b7-9ece-9fe7036d61b6",
 99 |    "metadata": {},
100 |    "source": [
101 |     "### Naive mean-field vs. Thouless-Anderson-Palmer (TAP)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "id": "ecf2907b-9a8a-4242-88eb-e084fdd79391",
108 |    "metadata": {
109 |     "tags": []
110 |    },
111 |    "outputs": [],
112 |    "source": [
113 |     "key = jax.random.PRNGKey(2666)\n",
114 |     "params_key, _ = jax.random.split(key)\n",
115 |     "\n",
116 |     "m0 = jnp.ones((1, N))\n",
117 |     "_, stacked_outputs = simulate(params_key, m0, steps, beta_c, update_fun=update_naive_mf)\n",
118 |     "\n",
119 |     "y = stacked_outputs[0, :, :]\n",
120 |     "\n",
121 |     "with plt.style.context(\"ggplot\"):\n",
122 |     "    plt.clf()\n",
123 |     "    fig, ax1 = plt.subplots()\n",
124 |     "\n",
125 |     "    ax1.set_xlabel(r\"$t$\")\n",
126 |     "    ax1.set_ylabel(r\"$m_{i,t}$\")\n",
127 |     "    ax1.imshow(y.transpose((1, 0)), aspect=\"auto\")\n",
128 |     "    ax1.grid(False)\n",
129 |     "\n",
130 |     "    ax2 = ax1.twinx()\n",
131 |     "    ax2.set_ylabel(r\"$\\langle m_{i,t} \\rangle$\")\n",
132 |     "    ax2.plot(steps, y.mean(axis=-1), color=\"tab:red\", linewidth=2.0)\n",
133 |     "    ax2.grid(False)\n",
134 |     "\n",
135 |     "    fig.tight_layout()\n",
136 |     "    plt.show()"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "id": "3e823582-cee6-412f-89cf-54d8b74f173a",
143 |    "metadata": {
144 |     "tags": []
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "key = jax.random.PRNGKey(2666)\n",
149 |     "params_key, m0_key = jax.random.split(key)\n",
150 |     "\n",
151 |     "m0 = jnp.ones((1, N))\n",
152 |     "_, stacked_outputs_naive = simulate(\n",
153 |     "    params_key, m0, steps, beta_c, update_fun=update_naive_mf\n",
154 |     ")\n",
155 |     "_, stacked_outputs_tap = simulate(params_key, m0, steps, beta_c)\n",
156 |     "\n",
157 |     "y_naive = stacked_outputs_naive.mean(axis=-1).transpose((1, 0))\n",
158 |     "y_tap = stacked_outputs_tap.mean(axis=-1).transpose((1, 0))\n",
159 |     "\n",
160 |     "with plt.style.context(\"ggplot\"):\n",
161 |     "    plt.plot(steps, y_naive, label=\"Naive mean-field\", color=\"tab:red\", linewidth=1.5)\n",
162 |     "    plt.plot(\n",
163 |     "        steps,\n",
164 |     "        y_tap,\n",
165 |     "        label=\"Thouless-Anderson-Palmer (TAP)\",\n",
166 |     "        color=\"tab:blue\",\n",
167 |     "        linewidth=1.5,\n",
168 |     "    )\n",
169 |     "    plt.legend()\n",
170 |     "    plt.xlabel(r\"$t$\")\n",
171 |     "    plt.ylabel(r\"$\\langle m_{i,t} \\rangle$\")\n",
172 |     "    plt.show()"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "id": "7f6980fa",
178 |    "metadata": {},
179 |    "source": [
180 |     "### Sampling trajectories"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "id": "88a70d9e-1037-4391-8b28-a6cf12d11e49",
187 |    "metadata": {
188 |     "tags": []
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "key = jax.random.PRNGKey(2666)\n",
193 |     "params_key, m0_key = jax.random.split(key)\n",
194 |     "\n",
195 |     "m0 = 5 * jax.random.normal(m0_key, shape=(100, N))\n",
196 |     "_, stacked_outputs_below = simulate(params_key, m0, steps, 0.5 * beta_c)\n",
197 |     "_, stacked_outputs_near = simulate(params_key, m0, steps, beta_c)\n",
198 |     "_, stacked_outputs_above = simulate(params_key, m0, steps, 2 * beta_c)\n",
199 |     "\n",
200 |     "with plt.style.context(\"ggplot\"):\n",
201 |     "    plt.figure(figsize=(9, 3))\n",
202 |     "\n",
203 |     "    ax1 = plt.subplot(131)\n",
204 |     "    plt.plot(\n",
205 |     "        steps, stacked_outputs_below.mean(axis=-1).transpose((1, 0)), linewidth=1.0\n",
206 |     "    )\n",
207 |     "    plt.xlabel(r\"$t$\")\n",
208 |     "    plt.ylabel(r\"$\\langle m_{i,t} \\rangle$\")\n",
209 |     "    plt.title(r\"$\\beta < \\beta_c$\", fontsize=10)\n",
210 |     "\n",
211 |     "    ax2 = plt.subplot(132, sharey=ax1)\n",
212 |     "    plt.plot(steps, stacked_outputs_near.mean(axis=-1).transpose((1, 0)), linewidth=1.0)\n",
213 |     "    plt.xlabel(r\"$t$\")\n",
214 |     "    plt.title(r\"$\\beta \\approx \\beta_c$\", fontsize=10)\n",
215 |     "\n",
216 |     "    ax3 = plt.subplot(133, sharey=ax1)\n",
217 |     "    plt.plot(\n",
218 |     "        steps, stacked_outputs_above.mean(axis=-1).transpose((1, 0)), linewidth=1.0\n",
219 |     "    )\n",
220 |     "    plt.xlabel(r\"$t$\")\n",
221 |     "    plt.title(r\"$\\beta > \\beta_c$\", fontsize=10)\n",
222 |     "\n",
223 |     "    plt.show()"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "id": "5b909038",
229 |    "metadata": {},
230 |    "source": [
231 |     "### Sampling model parameters"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "id": "4562445b-50fa-44fb-a9df-5e05b2d14f18",
238 |    "metadata": {
239 |     "tags": []
240 |    },
241 |    "outputs": [],
242 |    "source": [
243 |     "key = jax.random.PRNGKey(2666)\n",
244 |     "params_key, m0_key = jax.random.split(key)\n",
245 |     "\n",
246 |     "m0 = jax.random.normal(m0_key, shape=(1, N))\n",
247 |     "_, stacked_outputs = jax.vmap(\n",
248 |     "    jax.vmap(simulate, in_axes=(0, None, None, None)), in_axes=(None, None, None, 0)\n",
249 |     ")(\n",
250 |     "    jax.random.split(params_key, num=100),\n",
251 |     "    m0,\n",
252 |     "    steps,\n",
253 |     "    jnp.linspace(0.25 * beta_c, 2 * beta_c, num=20),\n",
254 |     ")\n",
255 |     "\n",
256 |     "y = jnp.abs(stacked_outputs[:, :, 0, -1, :].mean(axis=-1))\n",
257 |     "\n",
258 |     "with plt.style.context(\"ggplot\"):\n",
259 |     "    plt.plot(jnp.linspace(0.25 * beta_c, 2 * beta_c, num=20), y, linewidth=1.0)\n",
260 |     "    plt.axvline(x=beta_c)\n",
261 |     "    plt.xlabel(r\"$\\beta$\")\n",
262 |     "    plt.ylabel(r\"$\\langle m_{i} \\rangle$\")\n",
263 |     "    plt.show()"
264 |    ]
265 |   }
266 |  ],
267 |  "metadata": {
268 |   "kernelspec": {
269 |    "display_name": "py310",
270 |    "language": "python",
271 |    "name": "python3"
272 |   },
273 |   "language_info": {
274 |    "codemirror_mode": {
275 |     "name": "ipython",
276 |     "version": 3
277 |    },
278 |    "file_extension": ".py",
279 |    "mimetype": "text/x-python",
280 |    "name": "python",
281 |    "nbconvert_exporter": "python",
282 |    "pygments_lexer": "ipython3",
283 |    "version": "3.10.11"
284 |   }
285 |  },
286 |  "nbformat": 4,
287 |  "nbformat_minor": 5
288 | }
289 | 


--------------------------------------------------------------------------------
/blog/spin_transformer_module.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "59aacb0a-b330-4bf5-b521-7ce4953bf2ff",
 6 |    "metadata": {
 7 |     "tags": []
 8 |    },
 9 |    "source": [
10 |     "# Spin-transformer models"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": null,
16 |    "id": "e484d695",
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "import jax\n",
21 |     "import jax.numpy as jnp\n",
22 |     "import equinox as eqx\n",
23 |     "\n",
24 |     "from spin_model_transformers import SpinTransformerModule, SpinTransformer"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": null,
30 |    "id": "34c3fda2",
31 |    "metadata": {},
32 |    "outputs": [],
33 |    "source": [
34 |     "key = jax.random.PRNGKey(2666)\n",
35 |     "x_key, mod_key = jax.random.split(key)\n",
36 |     "\n",
37 |     "x = jax.random.normal(x_key, shape=(1, 512, 512))\n",
38 |     "transformer_module = SpinTransformerModule(dim=512, num_heads=1, beta=2.0, key=mod_key)\n",
39 |     "\n",
40 |     "print(jax.vmap(transformer_module)(x))"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "code",
45 |    "execution_count": null,
46 |    "id": "14e2315c",
47 |    "metadata": {},
48 |    "outputs": [],
49 |    "source": [
50 |     "@eqx.filter_jit\n",
51 |     "def loss_fn(model, x):\n",
52 |     "    return jnp.mean(jax.vmap(model)(x))\n",
53 |     "\n",
54 |     "\n",
55 |     "print(eqx.filter_grad(loss_fn)(transformer_module, x).to_qk.weight)"
56 |    ]
57 |   },
58 |   {
59 |    "cell_type": "code",
60 |    "execution_count": null,
61 |    "id": "e1a1adfc",
62 |    "metadata": {},
63 |    "outputs": [],
64 |    "source": [
65 |     "transformer = SpinTransformer(depth=6, dim=512, num_heads=8, beta=1.0, key=mod_key)\n",
66 |     "print(jax.vmap(transformer)(x))"
67 |    ]
68 |   }
69 |  ],
70 |  "metadata": {
71 |   "kernelspec": {
72 |    "display_name": "py310",
73 |    "language": "python",
74 |    "name": "python3"
75 |   },
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython",
79 |     "version": 3
80 |    },
81 |    "file_extension": ".py",
82 |    "mimetype": "text/x-python",
83 |    "name": "python",
84 |    "nbconvert_exporter": "python",
85 |    "pygments_lexer": "ipython3",
86 |    "version": "3.10.11"
87 |   }
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 5
91 | }
92 | 


--------------------------------------------------------------------------------
/blog/vector_ising_mf.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "59aacb0a-b330-4bf5-b521-7ce4953bf2ff",
  6 |    "metadata": {
  7 |     "tags": []
  8 |    },
  9 |    "source": [
 10 |     "# Mean-field theory of asymmetric Ising models with vector spins"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "f86cf435",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import jax\n",
 21 |     "import matplotlib.pyplot as plt\n",
 22 |     "import networkx as nx\n",
 23 |     "import numpy as np\n",
 24 |     "\n",
 25 |     "\n",
 26 |     "key = jax.random.PRNGKey(2666)\n",
 27 |     "N = 64\n",
 28 |     "J = N**-0.5 * jax.random.normal(key, shape=(N, N))\n",
 29 |     "\n",
 30 |     "G = nx.from_numpy_array(np.array(J), create_using=nx.DiGraph)\n",
 31 |     "pos = nx.circular_layout(G)\n",
 32 |     "edge_weights = [G[u][v][\"weight\"] for u, v in G.edges()]\n",
 33 |     "\n",
 34 |     "plt.figure(figsize=(8, 8))\n",
 35 |     "\n",
 36 |     "nodes = nx.draw_networkx_nodes(\n",
 37 |     "    G, pos, node_size=100, node_color=\"white\", edgecolors=\"black\"\n",
 38 |     ")\n",
 39 |     "edges = nx.draw_networkx_edges(\n",
 40 |     "    G,\n",
 41 |     "    pos,\n",
 42 |     "    arrowstyle=\"->\",\n",
 43 |     "    arrowsize=10,\n",
 44 |     "    edge_color=edge_weights,\n",
 45 |     "    edge_cmap=plt.cm.RdBu_r,\n",
 46 |     "    width=2 * edge_weights,\n",
 47 |     ")\n",
 48 |     "\n",
 49 |     "for i in range(G.number_of_edges()):\n",
 50 |     "    edges[i].set_alpha(0.9)\n",
 51 |     "\n",
 52 |     "ax = plt.gca()\n",
 53 |     "ax.set_box_aspect(1)\n",
 54 |     "ax.set_axis_off()\n",
 55 |     "plt.show()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "id": "eb80f5ac-25d9-4f30-a0a4-76c67e06dd90",
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from functools import partial\n",
 66 |     "\n",
 67 |     "import jax\n",
 68 |     "import jax.numpy as jnp\n",
 69 |     "\n",
 70 |     "from jaxopt import AndersonAcceleration\n",
 71 |     "\n",
 72 |     "\n",
 73 |     "def _gamma(x, beta, r):\n",
 74 |     "    \"\"\"See Eq. (39).\"\"\"\n",
 75 |     "    return jnp.sqrt(1 + beta**2 * jnp.sum(x**2, axis=-1, keepdims=True) / r**2)\n",
 76 |     "\n",
 77 |     "\n",
 78 |     "def _phi(theta, beta, r):\n",
 79 |     "    \"\"\"See Eq. (38).\"\"\"\n",
 80 |     "    return beta / (1 + _gamma(theta, beta, r)) * theta\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "def update_naive_mf(m0, _, x, J, beta, r):\n",
 84 |     "    \"\"\"See Eq. (47).\"\"\"\n",
 85 |     "    theta = x + jnp.einsum(\"i j, j d -> i d\", J, m0)\n",
 86 |     "    m1 = _phi(theta, beta, r)\n",
 87 |     "    return m1, m0\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "def _inv_phi(m, beta, r):\n",
 91 |     "    \"\"\"See Eq. (64).\"\"\"\n",
 92 |     "    return 2 * r**2 / (beta * (r**2 - jnp.sum(m**2, axis=-1, keepdims=True))) * m\n",
 93 |     "\n",
 94 |     "\n",
 95 |     "def _d2_m_d_alpha_2(m1, m0, x, J, beta, r):\n",
 96 |     "    \"\"\"See Eq. (58).\"\"\"\n",
 97 |     "    g0 = _gamma(_inv_phi(m0, beta, r), beta, r)\n",
 98 |     "    g1 = _gamma(_inv_phi(m1, beta, r), beta, r)\n",
 99 |     "    v = -_inv_phi(m1, beta, r) + x + jnp.einsum(\"i j, j d -> i d\", J, m0)\n",
100 |     "\n",
101 |     "    return (\n",
102 |     "        (beta**2 * (1 + 3 * g1))\n",
103 |     "        / (r**4 * g1**3)\n",
104 |     "        * (\n",
105 |     "            jnp.einsum(\"i d, i d -> i\", m1, v)[:, None] ** 2\n",
106 |     "            + jnp.einsum(\n",
107 |     "                \"i j, i d -> i d\",\n",
108 |     "                J**2,\n",
109 |     "                jnp.sum(m1**2, axis=-1, keepdims=True),\n",
110 |     "            )\n",
111 |     "            / (1 + g0)\n",
112 |     "            - jnp.einsum(\n",
113 |     "                \"i j, i d, j d, i e, j e -> i\",\n",
114 |     "                J**2,\n",
115 |     "                m1,\n",
116 |     "                m0,\n",
117 |     "                m1,\n",
118 |     "                m0,\n",
119 |     "            )[:, None]\n",
120 |     "            / (r**2 * g0)\n",
121 |     "        )\n",
122 |     "        * m1\n",
123 |     "        - (beta**2)\n",
124 |     "        / (r**2 * (g1**2 + g1))\n",
125 |     "        * (\n",
126 |     "            jnp.sum(v**2, axis=-1, keepdims=True)\n",
127 |     "            + jnp.einsum(\n",
128 |     "                \"i j, j -> i\",\n",
129 |     "                J**2,\n",
130 |     "                r**2 - jnp.sum(m0**2, axis=-1),\n",
131 |     "            )[:, None]\n",
132 |     "        )\n",
133 |     "        * m1\n",
134 |     "        - 2.0\n",
135 |     "        * beta**2\n",
136 |     "        / (r**2 * (g1**2 + g1))\n",
137 |     "        * (\n",
138 |     "            jnp.einsum(\"i d, i d, i f -> i f\", v, m1, v)\n",
139 |     "            + jnp.einsum(\"i j, i d -> i d\", J**2, m1 / (1 + g0))\n",
140 |     "            - jnp.einsum(\n",
141 |     "                \"i j, i d, j d, j f -> i f\",\n",
142 |     "                J**2,\n",
143 |     "                m1,\n",
144 |     "                m0,\n",
145 |     "                m0,\n",
146 |     "            )\n",
147 |     "            / (r**2 * g0)\n",
148 |     "        )\n",
149 |     "    )\n",
150 |     "\n",
151 |     "\n",
152 |     "def _f(m1, m0, x, J, beta, r):\n",
153 |     "    \"\"\"See Eq. (61).\"\"\"\n",
154 |     "    g1 = _gamma(_inv_phi(m1, beta, r), beta, r)\n",
155 |     "    d2_m_d_alpha_2 = _d2_m_d_alpha_2(m1, m0, x, J, beta, r)\n",
156 |     "\n",
157 |     "    ff = (\n",
158 |     "        (1 + g1)\n",
159 |     "        / (2 * beta)\n",
160 |     "        * (\n",
161 |     "            d2_m_d_alpha_2\n",
162 |     "            + (\n",
163 |     "                jnp.einsum(\"i d, i d -> i\", m1, d2_m_d_alpha_2)[:, None]\n",
164 |     "                / ((r**2 * g1) / (1 + g1) - jnp.sum(m1**2, axis=-1, keepdims=True))\n",
165 |     "                * m1\n",
166 |     "            )\n",
167 |     "        )\n",
168 |     "    )\n",
169 |     "    return x + jnp.einsum(\"i j, j d -> i d\", J, m0) + ff\n",
170 |     "\n",
171 |     "\n",
172 |     "def update_tap_mf(m0, _, x, J, beta, r):\n",
173 |     "    \"\"\"See Eq. (65).\"\"\"\n",
174 |     "\n",
175 |     "    def tap(m1, _m0, _x, _J, _beta, _r):\n",
176 |     "        return _phi(_f(m1, _m0, _x, _J, _beta, _r), _beta, _r)\n",
177 |     "\n",
178 |     "    out = (\n",
179 |     "        AndersonAcceleration(\n",
180 |     "            fixed_point_fun=tap,\n",
181 |     "            tol=1e-3,\n",
182 |     "            maxiter=100,\n",
183 |     "        )\n",
184 |     "    ).run(_phi(x + J @ m0, beta, r), m0, x, J, beta, r)\n",
185 |     "\n",
186 |     "    jax.debug.print(\"{error}\", error=out.state.error)\n",
187 |     "\n",
188 |     "    return out.params, m0\n",
189 |     "\n",
190 |     "\n",
191 |     "def time_evolution(m0, steps, update_fun):\n",
192 |     "    final_carry, stacked_outputs = jax.lax.scan(update_fun, init=m0, xs=steps)\n",
193 |     "    return final_carry, stacked_outputs\n",
194 |     "\n",
195 |     "\n",
196 |     "def simulate(x, J, m0, steps, beta, r, update_fun=update_tap_mf):\n",
197 |     "    wrapped_time_evolution = partial(\n",
198 |     "        time_evolution,\n",
199 |     "        steps=steps,\n",
200 |     "        update_fun=partial(update_fun, x=x, J=J, beta=beta, r=r),\n",
201 |     "    )\n",
202 |     "    final_carry, stacked_outputs = jax.vmap(wrapped_time_evolution)(m0)\n",
203 |     "    return final_carry, stacked_outputs"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "id": "24ff6ca2",
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "import matplotlib.pyplot as plt\n",
214 |     "import optax\n",
215 |     "\n",
216 |     "\n",
217 |     "def simulate_and_plot_only_naive(x, J, m0, steps, beta, r):\n",
218 |     "    _, stacked_outputs_naive = simulate(\n",
219 |     "        x, J, m0, steps, beta, r, update_fun=update_naive_mf\n",
220 |     "    )\n",
221 |     "\n",
222 |     "    y_naive = stacked_outputs_naive.transpose((1, 0, 2, 3))\n",
223 |     "\n",
224 |     "    y_naive_m_buffer = jnp.zeros_like(y_naive[:, :, :, 0])\n",
225 |     "    y_naive_x_buffer = jnp.zeros_like(y_naive[:, :, :, 0])\n",
226 |     "    y_naive_m0_buffer = jnp.zeros_like(y_naive[:, :, :, 0])\n",
227 |     "\n",
228 |     "    for step in steps:\n",
229 |     "        if step == 0:\n",
230 |     "            y_naive_m_buffer = y_naive_m_buffer.at[step].set(\n",
231 |     "                optax.cosine_similarity(y_naive[0], y_naive[0])\n",
232 |     "            )\n",
233 |     "        else:\n",
234 |     "            y_naive_m_buffer = y_naive_m_buffer.at[step].set(\n",
235 |     "                optax.cosine_similarity(y_naive[step - 1], y_naive[step])\n",
236 |     "            )\n",
237 |     "        y_naive_x_buffer = y_naive_x_buffer.at[step].set(\n",
238 |     "            optax.cosine_similarity(x, y_naive[step])\n",
239 |     "        )\n",
240 |     "        y_naive_m0_buffer = y_naive_m0_buffer.at[step].set(\n",
241 |     "            optax.cosine_similarity(m0, y_naive[step])\n",
242 |     "        )\n",
243 |     "\n",
244 |     "    with plt.style.context(\"ggplot\"):\n",
245 |     "        plt.clf()\n",
246 |     "        plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\")\n",
247 |     "        fig, axes = plt.subplots(2, 1, sharex=True, sharey=\"row\", figsize=(3.5, 5.5))\n",
248 |     "\n",
249 |     "        axes[0].set_ylabel(r\"cosine similarity\")\n",
250 |     "        axes[0].plot(\n",
251 |     "            steps[1:],\n",
252 |     "            y_naive_m_buffer.squeeze()[1:],\n",
253 |     "            color=\"tab:green\",\n",
254 |     "            linewidth=0.2,\n",
255 |     "            alpha=0.5,\n",
256 |     "        )\n",
257 |     "\n",
258 |     "        axes[0].plot(\n",
259 |     "            steps,\n",
260 |     "            y_naive_m0_buffer.squeeze(),\n",
261 |     "            color=\"tab:olive\",\n",
262 |     "            linewidth=0.2,\n",
263 |     "            alpha=0.5,\n",
264 |     "        )\n",
265 |     "        axes[0].plot(\n",
266 |     "            steps,\n",
267 |     "            y_naive_x_buffer.squeeze(),\n",
268 |     "            color=\"tab:blue\",\n",
269 |     "            linewidth=0.2,\n",
270 |     "            alpha=0.5,\n",
271 |     "        )\n",
272 |     "        axes[0].legend(loc=\"lower center\")\n",
273 |     "\n",
274 |     "        axes[1].set_xlabel(r\"$t$\")\n",
275 |     "        axes[1].set_ylabel(r\"Euclidean norm\")\n",
276 |     "        axes[1].plot(\n",
277 |     "            steps,\n",
278 |     "            jnp.linalg.norm(y_naive, axis=-1).squeeze(),\n",
279 |     "            color=\"tab:red\",\n",
280 |     "            linewidth=0.2,\n",
281 |     "            alpha=0.5,\n",
282 |     "        )\n",
283 |     "        axes[1].set_xlabel(r\"$t$\")\n",
284 |     "\n",
285 |     "        fig.tight_layout()\n",
286 |     "        plt.show()\n",
287 |     "\n",
288 |     "\n",
289 |     "def simulate_and_plot(x, J, m0, steps, beta, r):\n",
290 |     "    _, stacked_outputs_naive = simulate(\n",
291 |     "        x, J, m0, steps, beta, r, update_fun=update_naive_mf\n",
292 |     "    )\n",
293 |     "    _, stacked_outputs_tap = simulate(\n",
294 |     "        x, J, m0, steps, beta, r, update_fun=update_tap_mf\n",
295 |     "    )\n",
296 |     "\n",
297 |     "    y_naive = stacked_outputs_naive.transpose((1, 0, 2, 3))\n",
298 |     "    y_tap = stacked_outputs_tap.transpose((1, 0, 2, 3))\n",
299 |     "\n",
300 |     "    y_naive_m_buffer = jnp.zeros_like(y_naive[:, :, :, 0])\n",
301 |     "    y_naive_x_buffer = jnp.zeros_like(y_naive[:, :, :, 0])\n",
302 |     "    y_naive_m0_buffer = jnp.zeros_like(y_naive[:, :, :, 0])\n",
303 |     "    y_tap_m_buffer = jnp.zeros_like(y_tap[:, :, :, 0])\n",
304 |     "    y_tap_x_buffer = jnp.zeros_like(y_tap[:, :, :, 0])\n",
305 |     "    y_tap_m0_buffer = jnp.zeros_like(y_tap[:, :, :, 0])\n",
306 |     "\n",
307 |     "    for step in steps:\n",
308 |     "        if step == 0:\n",
309 |     "            y_naive_m_buffer = y_naive_m_buffer.at[step].set(\n",
310 |     "                optax.cosine_similarity(y_naive[0], y_naive[0])\n",
311 |     "            )\n",
312 |     "            y_tap_m_buffer = y_tap_m_buffer.at[step].set(\n",
313 |     "                optax.cosine_similarity(y_tap[0], y_tap[0])\n",
314 |     "            )\n",
315 |     "        else:\n",
316 |     "            y_naive_m_buffer = y_naive_m_buffer.at[step].set(\n",
317 |     "                optax.cosine_similarity(y_naive[step - 1], y_naive[step])\n",
318 |     "            )\n",
319 |     "            y_tap_m_buffer = y_tap_m_buffer.at[step].set(\n",
320 |     "                optax.cosine_similarity(y_tap[step - 1], y_tap[step])\n",
321 |     "            )\n",
322 |     "        y_naive_x_buffer = y_naive_x_buffer.at[step].set(\n",
323 |     "            optax.cosine_similarity(x, y_naive[step])\n",
324 |     "        )\n",
325 |     "        y_tap_x_buffer = y_tap_x_buffer.at[step].set(\n",
326 |     "            optax.cosine_similarity(x, y_tap[step])\n",
327 |     "        )\n",
328 |     "        y_naive_m0_buffer = y_naive_m0_buffer.at[step].set(\n",
329 |     "            optax.cosine_similarity(m0, y_naive[step])\n",
330 |     "        )\n",
331 |     "        y_tap_m0_buffer = y_tap_m0_buffer.at[step].set(\n",
332 |     "            optax.cosine_similarity(m0, y_tap[step])\n",
333 |     "        )\n",
334 |     "\n",
335 |     "    with plt.style.context(\"ggplot\"):\n",
336 |     "        plt.clf()\n",
337 |     "        plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\")\n",
338 |     "        fig, axes = plt.subplots(2, 2, sharex=True, sharey=\"row\", figsize=(6, 6))\n",
339 |     "\n",
340 |     "        axes[0, 0].set_ylabel(r\"cosine similarity\")\n",
341 |     "        axes[0, 0].plot(\n",
342 |     "            steps[1:],\n",
343 |     "            y_naive_m_buffer.squeeze()[1:].mean(axis=-1),\n",
344 |     "            color=\"tab:green\",\n",
345 |     "            linewidth=2,\n",
346 |     "            label=\"$\\mathbf{m}_{t-1}$\",\n",
347 |     "        )\n",
348 |     "        axes[0, 0].fill_between(\n",
349 |     "            steps[1:],\n",
350 |     "            y_naive_m_buffer.squeeze()[1:].min(axis=-1),\n",
351 |     "            y_naive_m_buffer.squeeze()[1:].max(axis=-1),\n",
352 |     "            color=\"tab:green\",\n",
353 |     "            alpha=0.2,\n",
354 |     "        )\n",
355 |     "        axes[0, 0].plot(\n",
356 |     "            steps,\n",
357 |     "            y_naive_m0_buffer.squeeze().mean(axis=-1),\n",
358 |     "            color=\"tab:olive\",\n",
359 |     "            linewidth=2,\n",
360 |     "            label=\"$\\mathbf{m}_{0}$\",\n",
361 |     "        )\n",
362 |     "        axes[0, 0].fill_between(\n",
363 |     "            steps,\n",
364 |     "            y_naive_m0_buffer.squeeze().min(axis=-1),\n",
365 |     "            y_naive_m0_buffer.squeeze().max(axis=-1),\n",
366 |     "            color=\"tab:olive\",\n",
367 |     "            alpha=0.2,\n",
368 |     "        )\n",
369 |     "        axes[0, 0].plot(\n",
370 |     "            steps,\n",
371 |     "            y_naive_x_buffer.squeeze().mean(axis=-1),\n",
372 |     "            color=\"tab:blue\",\n",
373 |     "            linewidth=2,\n",
374 |     "            label=\"$\\mathbf{x}$\",\n",
375 |     "        )\n",
376 |     "        axes[0, 0].fill_between(\n",
377 |     "            steps,\n",
378 |     "            y_naive_x_buffer.squeeze().min(axis=-1),\n",
379 |     "            y_naive_x_buffer.squeeze().max(axis=-1),\n",
380 |     "            color=\"tab:blue\",\n",
381 |     "            alpha=0.2,\n",
382 |     "        )\n",
383 |     "        axes[0, 0].legend(loc=\"lower center\")\n",
384 |     "        axes[0, 0].set_title(r\"update_naive_mf\")\n",
385 |     "\n",
386 |     "        axes[0, 1].plot(\n",
387 |     "            steps[1:],\n",
388 |     "            y_tap_m_buffer.squeeze()[1:].mean(axis=-1),\n",
389 |     "            color=\"tab:green\",\n",
390 |     "            linewidth=2,\n",
391 |     "            label=\"$\\mathbf{m}_{t-1}$\",\n",
392 |     "        )\n",
393 |     "        axes[0, 1].fill_between(\n",
394 |     "            steps[1:],\n",
395 |     "            y_tap_m_buffer.squeeze()[1:].min(axis=-1),\n",
396 |     "            y_tap_m_buffer.squeeze()[1:].max(axis=-1),\n",
397 |     "            color=\"tab:green\",\n",
398 |     "            alpha=0.2,\n",
399 |     "        )\n",
400 |     "        axes[0, 1].plot(\n",
401 |     "            steps,\n",
402 |     "            y_tap_m0_buffer.squeeze().mean(axis=-1),\n",
403 |     "            color=\"tab:olive\",\n",
404 |     "            linewidth=2,\n",
405 |     "            label=\"$\\mathbf{m}_{0}$\",\n",
406 |     "        )\n",
407 |     "        axes[0, 1].fill_between(\n",
408 |     "            steps,\n",
409 |     "            y_tap_m0_buffer.squeeze().min(axis=-1),\n",
410 |     "            y_tap_m0_buffer.squeeze().max(axis=-1),\n",
411 |     "            color=\"tab:olive\",\n",
412 |     "            alpha=0.2,\n",
413 |     "        )\n",
414 |     "        axes[0, 1].plot(\n",
415 |     "            steps,\n",
416 |     "            y_tap_x_buffer.squeeze().mean(axis=-1),\n",
417 |     "            color=\"tab:blue\",\n",
418 |     "            linewidth=2,\n",
419 |     "            label=\"$\\mathbf{x}$\",\n",
420 |     "        )\n",
421 |     "        axes[0, 1].fill_between(\n",
422 |     "            steps,\n",
423 |     "            y_tap_x_buffer.squeeze().min(axis=-1),\n",
424 |     "            y_tap_x_buffer.squeeze().max(axis=-1),\n",
425 |     "            color=\"tab:blue\",\n",
426 |     "            alpha=0.2,\n",
427 |     "        )\n",
428 |     "        axes[0, 1].legend(loc=\"lower center\")\n",
429 |     "        axes[0, 1].set_title(r\"update_tap_mf\")\n",
430 |     "\n",
431 |     "        axes[1, 0].set_xlabel(r\"$t$\")\n",
432 |     "        axes[1, 0].set_ylabel(r\"Euclidean norm\")\n",
433 |     "        axes[1, 0].plot(\n",
434 |     "            steps,\n",
435 |     "            jnp.linalg.norm(y_naive, axis=-1).squeeze().mean(axis=-1),\n",
436 |     "            color=\"tab:red\",\n",
437 |     "            linewidth=2,\n",
438 |     "        )\n",
439 |     "        axes[1, 0].fill_between(\n",
440 |     "            steps,\n",
441 |     "            jnp.linalg.norm(y_naive, axis=-1).squeeze().min(axis=-1),\n",
442 |     "            jnp.linalg.norm(y_naive, axis=-1).squeeze().max(axis=-1),\n",
443 |     "            color=\"tab:red\",\n",
444 |     "            alpha=0.2,\n",
445 |     "        )\n",
446 |     "        axes[1, 1].plot(\n",
447 |     "            steps,\n",
448 |     "            jnp.linalg.norm(y_tap, axis=-1).squeeze().mean(axis=-1),\n",
449 |     "            color=\"tab:red\",\n",
450 |     "            linewidth=2,\n",
451 |     "        )\n",
452 |     "        axes[1, 1].fill_between(\n",
453 |     "            steps,\n",
454 |     "            jnp.linalg.norm(y_tap, axis=-1).squeeze().min(axis=-1),\n",
455 |     "            jnp.linalg.norm(y_tap, axis=-1).squeeze().max(axis=-1),\n",
456 |     "            color=\"tab:red\",\n",
457 |     "            alpha=0.2,\n",
458 |     "        )\n",
459 |     "        axes[1, 1].set_xlabel(r\"$t$\")\n",
460 |     "\n",
461 |     "        fig.tight_layout()\n",
462 |     "        plt.show()"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "code",
467 |    "execution_count": null,
468 |    "id": "a7309b96",
469 |    "metadata": {},
470 |    "outputs": [],
471 |    "source": [
472 |     "N = 1024\n",
473 |     "D = 512\n",
474 |     "beta = 1.0\n",
475 |     "r = (D / 2 - 1) ** 0.5\n",
476 |     "\n",
477 |     "key = jax.random.PRNGKey(2666)\n",
478 |     "x_key, J_key = jax.random.split(key)\n",
479 |     "\n",
480 |     "x = jax.random.normal(x_key, shape=(N, D))\n",
481 |     "x = r * x / jnp.linalg.norm(x, axis=-1, keepdims=True)\n",
482 |     "\n",
483 |     "J = N**-0.5 * jax.random.normal(J_key, shape=(N, N))\n",
484 |     "print(N**-1, J.var(axis=-1).mean(), J.var(), N**-0.5)\n",
485 |     "\n",
486 |     "m0 = jnp.ones((1, N, D))\n",
487 |     "m0 = m0 / jnp.linalg.norm(m0, axis=-1, keepdims=True)\n",
488 |     "\n",
489 |     "simulate_and_plot_only_naive(x, J, m0, jnp.arange(0, 20), beta, r)"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "code",
494 |    "execution_count": null,
495 |    "id": "e2accb24-d9e5-43a1-a97e-4740d6bbd416",
496 |    "metadata": {
497 |     "tags": []
498 |    },
499 |    "outputs": [],
500 |    "source": [
501 |     "N = 1024\n",
502 |     "D = 512\n",
503 |     "beta = 1.0\n",
504 |     "r = (D / 2 - 1) ** 0.5\n",
505 |     "\n",
506 |     "key = jax.random.PRNGKey(2666)\n",
507 |     "x_key, J_key = jax.random.split(key)\n",
508 |     "\n",
509 |     "x = jax.random.normal(x_key, shape=(N, D))\n",
510 |     "x = r * x / jnp.linalg.norm(x, axis=-1, keepdims=True)\n",
511 |     "\n",
512 |     "J = N**-0.5 * jax.random.normal(J_key, shape=(N, N))\n",
513 |     "print(N**-1, J.var(axis=-1).mean(), J.var(), N**-0.5)\n",
514 |     "\n",
515 |     "m0 = jnp.ones((1, N, D))\n",
516 |     "m0 = m0 / jnp.linalg.norm(m0, axis=-1, keepdims=True)\n",
517 |     "\n",
518 |     "\n",
519 |     "simulate_and_plot(x, J, m0, jnp.arange(0, 20), beta, r)"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "code",
524 |    "execution_count": null,
525 |    "id": "e484d695",
526 |    "metadata": {},
527 |    "outputs": [],
528 |    "source": [
529 |     "N = 1024\n",
530 |     "D = 512\n",
531 |     "beta = 2.0\n",
532 |     "r = (D / 2 - 1) ** 0.5\n",
533 |     "\n",
534 |     "key = jax.random.PRNGKey(2666)\n",
535 |     "x_key, J_key = jax.random.split(key)\n",
536 |     "\n",
537 |     "x = jax.random.normal(x_key, shape=(N, D))\n",
538 |     "x = r * x / jnp.linalg.norm(x, axis=-1, keepdims=True)\n",
539 |     "\n",
540 |     "J = N**-0.5 * jax.random.normal(J_key, shape=(N, N))\n",
541 |     "print(N**-1, J.var(axis=-1).mean(), J.var(), N**-0.5)\n",
542 |     "\n",
543 |     "m0 = jnp.ones((1, N, D))\n",
544 |     "m0 = m0 / jnp.linalg.norm(m0, axis=-1, keepdims=True)\n",
545 |     "\n",
546 |     "\n",
547 |     "simulate_and_plot(x, J, m0, jnp.arange(0, 20), beta, r)"
548 |    ]
549 |   },
550 |   {
551 |    "cell_type": "code",
552 |    "execution_count": null,
553 |    "id": "623fbfb7",
554 |    "metadata": {},
555 |    "outputs": [],
556 |    "source": [
557 |     "N = 1024\n",
558 |     "D = 512\n",
559 |     "beta = 1.0\n",
560 |     "r = (D / 2 - 1) ** 0.5\n",
561 |     "\n",
562 |     "key = jax.random.PRNGKey(2666)\n",
563 |     "x_key, J_key = jax.random.split(key)\n",
564 |     "\n",
565 |     "x = jax.random.normal(x_key, shape=(N, D))\n",
566 |     "x = x / jnp.linalg.norm(x, axis=-1, keepdims=True)\n",
567 |     "\n",
568 |     "J = 2 * N**-0.5 * jax.random.normal(J_key, shape=(N, N))\n",
569 |     "print(N**-1, J.var(axis=-1).mean(), J.var(), N**-0.5)\n",
570 |     "\n",
571 |     "m0 = jnp.ones((1, N, D))\n",
572 |     "m0 = m0 / jnp.linalg.norm(m0, axis=-1, keepdims=True)\n",
573 |     "\n",
574 |     "\n",
575 |     "simulate_and_plot(x, J, m0, jnp.arange(0, 20), beta, r)"
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": null,
581 |    "id": "140e8f08",
582 |    "metadata": {},
583 |    "outputs": [],
584 |    "source": [
585 |     "def _phi_norm(theta, beta, r):\n",
586 |     "    \"\"\"See Eq. (38).\"\"\"\n",
587 |     "    return beta / (1 + jnp.sqrt(1 + beta**2 * theta**2 / r**2)) * theta\n",
588 |     "\n",
589 |     "\n",
590 |     "x_values = np.linspace(0, 40, 200)\n",
591 |     "betas = [0.1, 0.5, 1.0, 2.0, 10.0]\n",
592 |     "\n",
593 |     "D = 512\n",
594 |     "r = (D / 2 * 1) ** 0.5\n",
595 |     "\n",
596 |     "with plt.style.context(\"ggplot\"):\n",
597 |     "    plt.clf()\n",
598 |     "\n",
599 |     "    plt.figure(figsize=(6, 6))\n",
600 |     "    plt.rc(\"text\", usetex=True)\n",
601 |     "    plt.rc(\"text.latex\", preamble=r\"\\usepackage{amsmath}\")\n",
602 |     "\n",
603 |     "    for i, beta in enumerate(betas[::-1]):\n",
604 |     "        plt.plot(\n",
605 |     "            x_values,\n",
606 |     "            _phi_norm(x_values, beta, r),\n",
607 |     "            label=r\"$\\beta = $\" + f\"${beta}$\",\n",
608 |     "            lw=2.0,\n",
609 |     "        )\n",
610 |     "\n",
611 |     "    plt.axhline(y=r, linestyle=\"--\", color=\"gray\")\n",
612 |     "    plt.axvline(x=r, linestyle=\"--\", color=\"gray\")\n",
613 |     "\n",
614 |     "    plt.xlabel(r\"$\\Vert \\boldsymbol{\\theta} \\rVert$\")\n",
615 |     "    plt.xlim(x_values.min(), x_values.max())\n",
616 |     "    plt.ylabel(r\"$\\Vert \\varphi ( \\boldsymbol{\\theta} ) \\rVert$\")\n",
617 |     "    plt.legend(loc=\"best\", bbox_to_anchor=(0.5, 0.0, 0.5, 0.5))\n",
618 |     "\n",
619 |     "    plt.show()"
620 |    ]
621 |   }
622 |  ],
623 |  "metadata": {
624 |   "kernelspec": {
625 |    "display_name": "py310",
626 |    "language": "python",
627 |    "name": "python3"
628 |   },
629 |   "language_info": {
630 |    "codemirror_mode": {
631 |     "name": "ipython",
632 |     "version": 3
633 |    },
634 |    "file_extension": ".py",
635 |    "mimetype": "text/x-python",
636 |    "name": "python",
637 |    "nbconvert_exporter": "python",
638 |    "pygments_lexer": "ipython3",
639 |    "version": "3.10.11"
640 |   }
641 |  },
642 |  "nbformat": 4,
643 |  "nbformat_minor": 5
644 | }
645 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | select = ["E", "F", "I001"]
 3 | ignore = ["E402", "E721", "E731", "E741", "F722"]
 4 | ignore-init-module-imports = true
 5 | fixable = ["I001", "F401"]
 6 | line-length = 119
 7 | 
 8 | [tool.ruff.isort]
 9 | combine-as-imports = true
10 | lines-after-imports = 2
11 | extra-standard-library = ["typing_extensions"]
12 | order-by-type = false


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | 
 4 | with open("README.md", "r") as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | setup(
 8 |     name="spin-model-transformers",
 9 |     version="0.0.1",
10 |     author="Matthias Bal",
11 |     author_email="matthiascbal@gmail.com",
12 |     description="Physics-inspired transformer modules based on mean-field dynamics of vector-spin models",
13 |     long_description=long_description,
14 |     long_description_content_type="text/markdown",
15 |     url="https://github.com/mcbal/spin-model-transformers",
16 |     license="Apache-2.0",
17 |     packages=find_packages(exclude=["examples", "notebooks", "tests"]),
18 |     python_requires=">=3.9",
19 |     install_requires=[
20 |         "einops>=0.6.1",
21 |         "equinox>=0.11.2",
22 |         "jax>=0.4.20",
23 |         "jaxlib>=0.4.20",
24 |         "jaxopt>=0.8",
25 |         "numpy>=1.26",
26 |         "optax>=0.1.7",
27 |     ],
28 |     extras_require={
29 |         "dev": [
30 |             "black~=23.9.1",
31 |             "nbqa~=1.7",
32 |             "pre-commit~=3.4.0",
33 |             "ruff~=0.0.291",
34 |         ]
35 |     },
36 | )
37 | 


--------------------------------------------------------------------------------
/spin_model_transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Matthias Bal
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # ruff: noqa: F401
16 | 
17 | from .spin_model_transformers import SpinTransformer, SpinTransformerModule
18 | 


--------------------------------------------------------------------------------
/spin_model_transformers/bessel.py:
--------------------------------------------------------------------------------
 1 | import jax.lax as lax
 2 | import jax.numpy as jnp
 3 | import numpy as np
 4 | 
 5 | 
 6 | def bessel_iv_ratio(x, nu, num_iter):
 7 |     """Compute ratio `I_{\nu+1}(x) / I_{\nu}(x)` of modified Bessel functions of the first kind.
 8 | 
 9 |     Reference:
10 |         D. E. Amos, Computation of Modified Bessel Functions and Their Ratios.
11 |             Mathematics of Computation, 28(125), 239-251 (1974)
12 |     """
13 |     v = np.maximum(20, nu)
14 | 
15 |     idx_range = jnp.arange(num_iter)
16 |     r_init = x / (v + idx_range + 0.5 + jnp.sqrt((v + idx_range + 1.5) ** 2 + x**2))
17 | 
18 |     def _update(r, idx):
19 |         return r.at[idx].set(
20 |             x
21 |             / (
22 |                 v
23 |                 + idx
24 |                 + 1.0
25 |                 + jnp.sqrt((v + idx + 1.0) ** 2 + x**2 * r[idx + 1] / r[idx])
26 |             )
27 |         )
28 | 
29 |     def _refine(r):
30 |         def _inner(r, step):
31 |             return lax.scan(
32 |                 lambda r, idx: (
33 |                     lax.cond(
34 |                         idx < num_iter - step - 1,
35 |                         lambda r, idx: _update(r, idx),
36 |                         lambda r, idx: r,
37 |                         r,
38 |                         idx,
39 |                     ),
40 |                     None,
41 |                 ),
42 |                 r,
43 |                 jnp.arange(num_iter - 1),
44 |             )[0]
45 | 
46 |         return lax.scan(
47 |             lambda r, step: (_inner(r, step), None), r, jnp.arange(num_iter)
48 |         )[0]
49 | 
50 |     def _maybe_recurse(y):
51 |         return lax.scan(
52 |             lambda y, kk: (1.0 / (2 * kk / x + y), None),
53 |             y,
54 |             jnp.arange(v, stop=nu, step=-1),
55 |         )[0]
56 | 
57 |     return _maybe_recurse(_refine(r_init)[0])
58 | 


--------------------------------------------------------------------------------
/spin_model_transformers/spin_model_transformers.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | from typing import Callable
  3 | 
  4 | import equinox as eqx
  5 | import jax
  6 | import jax.numpy as jnp
  7 | from einops import rearrange
  8 | from jaxopt import AndersonAcceleration
  9 | 
 10 | 
 11 | def _gamma(x, beta, R):
 12 |     return jnp.sqrt(1 + beta**2 * jnp.sum(x**2, axis=-1, keepdims=True) / R**2)
 13 | 
 14 | 
 15 | def _phi(theta, beta, R):
 16 |     return beta / (1 + _gamma(theta, beta, R)) * theta
 17 | 
 18 | 
 19 | def _inv_phi(m, beta, R):
 20 |     return 2 * R**2 / (beta * (R**2 - jnp.sum(m**2, axis=-1, keepdims=True))) * m
 21 | 
 22 | 
 23 | def _d2_m_d_alpha_2(m1, m0, x, J, beta, R):
 24 |     g0 = _gamma(_inv_phi(m0, beta, R), beta, R)
 25 |     g1 = _gamma(_inv_phi(m1, beta, R), beta, R)
 26 |     v = -_inv_phi(m1, beta, R) + x + jnp.einsum("i j, j d -> i d", J, m0)
 27 | 
 28 |     return (
 29 |         (beta**2 * (1 + 3 * g1))
 30 |         / (R**4 * g1**3)
 31 |         * (
 32 |             jnp.einsum("i d, i d -> i", m1, v)[:, None] ** 2
 33 |             + jnp.einsum(
 34 |                 "i j, i d -> i d",
 35 |                 J**2,
 36 |                 jnp.sum(m1**2, axis=-1, keepdims=True),
 37 |             )
 38 |             / (1 + g0)
 39 |             - jnp.einsum(
 40 |                 "i j, i d, j d, i e, j e -> i",
 41 |                 J**2,
 42 |                 m1,
 43 |                 m0,
 44 |                 m1,
 45 |                 m0,
 46 |             )[:, None]
 47 |             / (R**2 * g0)
 48 |         )
 49 |         * m1
 50 |         - (beta**2)
 51 |         / (R**2 * (g1**2 + g1))
 52 |         * (
 53 |             jnp.sum(v**2, axis=-1, keepdims=True)
 54 |             + jnp.einsum(
 55 |                 "i j, j -> i",
 56 |                 J**2,
 57 |                 R**2 - jnp.sum(m0**2, axis=-1),
 58 |             )[:, None]
 59 |         )
 60 |         * m1
 61 |         - 2.0
 62 |         * beta**2
 63 |         / (R**2 * (g1**2 + g1))
 64 |         * (
 65 |             jnp.einsum("i d, i d, i f -> i f", v, m1, v)
 66 |             + jnp.einsum("i j, i d -> i d", J**2, m1 / (1 + g0))
 67 |             - jnp.einsum(
 68 |                 "i j, i d, j d, j f -> i f",
 69 |                 J**2,
 70 |                 m1,
 71 |                 m0,
 72 |                 m0,
 73 |             )
 74 |             / (R**2 * g0)
 75 |         )
 76 |     )
 77 | 
 78 | 
 79 | def _f(m1, m0, x, J, beta, R):
 80 |     g1 = _gamma(_inv_phi(m1, beta, R), beta, R)
 81 |     d2_m_d_alpha_2 = _d2_m_d_alpha_2(m1, m0, x, J, beta, R)
 82 | 
 83 |     ff = (
 84 |         (1 + g1)
 85 |         / (2 * beta)
 86 |         * (
 87 |             d2_m_d_alpha_2
 88 |             + (
 89 |                 jnp.einsum("i d, i d -> i", m1, d2_m_d_alpha_2)[:, None]
 90 |                 / ((R**2 * g1) / (1 + g1) - jnp.sum(m1**2, axis=-1, keepdims=True))
 91 |                 * m1
 92 |             )
 93 |         )
 94 |     )
 95 |     return x + jnp.einsum("i j, j d -> i d", J, m0) + ff
 96 | 
 97 | 
 98 | def vector_tap_fp(m0, x, J, beta, R, tol: float = 1e-3, maxiter: int = 100):
 99 |     def _m_ness(m, _x, _J, _beta, _R):
100 |         return _phi(_f(m, m, _x, _J, _beta, _R), _beta, _R)
101 | 
102 |     return (
103 |         AndersonAcceleration(
104 |             fixed_point_fun=_m_ness,
105 |             tol=tol,
106 |             maxiter=maxiter,
107 |         )
108 |         .run(_phi(x + J @ m0, beta, R), x, J, beta, R)
109 |         .params
110 |     )
111 | 
112 | 
113 | class SpinTransformerModule(eqx.Module):
114 |     dim: int
115 |     dim_head: int
116 |     num_heads: int
117 |     scale: float
118 |     to_qk: eqx.Module
119 |     vector_tap_fp: Callable
120 | 
121 |     def __init__(
122 |         self,
123 |         *,
124 |         dim,
125 |         num_heads,
126 |         key,
127 |         beta=1.0,
128 |         solver_tol=1e-3,
129 |         solver_maxiter=100,
130 |     ):
131 |         super().__init__()
132 | 
133 |         self.dim = dim
134 |         self.num_heads = num_heads
135 |         self.dim_head = dim // num_heads
136 |         self.scale = (self.dim_head / 2 - 1) ** 0.5
137 | 
138 |         self.to_qk = eqx.nn.Linear(
139 |             dim, 2 * self.dim_head * num_heads, use_bias=False, key=key
140 |         )
141 |         self.vector_tap_fp = partial(
142 |             vector_tap_fp,
143 |             beta=beta,
144 |             R=(self.dim_head / 2 - 1) ** 0.5,
145 |             tol=solver_tol,
146 |             maxiter=solver_maxiter,
147 |         )
148 | 
149 |     def _J(self, x, mask=None):
150 |         x = rearrange(x, "... h n d -> ... n (h d)", h=self.num_heads)
151 | 
152 |         q, k = jnp.split(jax.vmap(self.to_qk)(x), 2, axis=-1)
153 |         q, k = map(
154 |             lambda t: rearrange(t, "... n (h d) -> ... h n d", h=self.num_heads), (q, k)
155 |         )
156 | 
157 |         sim = jnp.einsum("... i d, ... j d -> ... i j", q, k)
158 | 
159 |         if mask is not None:
160 |             sim = jnp.where(mask, sim, jnp.finfo(sim.dtype).min)
161 | 
162 |         return jax.nn.softmax(sim, axis=-1)
163 | 
164 |     def __call__(self, x, mask=None):
165 |         x = rearrange(x, "... n (h d) -> ... h n d", h=self.num_heads, d=self.dim_head)
166 |         x = self.scale * x / jnp.linalg.norm(x, axis=-1, keepdims=True)
167 | 
168 |         m0 = jnp.ones_like(x)
169 |         m0 = m0 / jnp.linalg.norm(m0, axis=-1, keepdims=True)
170 | 
171 |         return rearrange(
172 |             jax.vmap(self.vector_tap_fp, in_axes=(0, 0, 0))(
173 |                 m0, x, self._J(x, mask=mask)
174 |             ),
175 |             "... h n d -> ... n (h d)",
176 |         )
177 | 
178 | 
179 | class SpinTransformer(eqx.Module):
180 |     modules: SpinTransformerModule
181 | 
182 |     def __init__(self, depth, dim, num_heads, beta, key):
183 |         keys = jax.random.split(key, depth)
184 | 
185 |         make_modules = lambda k: SpinTransformerModule(
186 |             dim=dim, num_heads=num_heads, beta=beta, key=k
187 |         )
188 |         self.modules = eqx.filter_vmap(make_modules)(keys)
189 | 
190 |     def __call__(self, x):
191 |         dynamic_modules, static_modules = eqx.partition(self.modules, eqx.is_array)
192 | 
193 |         def f(_x, _dynamic_module):
194 |             module = eqx.combine(_dynamic_module, static_modules)
195 |             return module(_x), None
196 | 
197 |         out, _ = jax.lax.scan(f, x, dynamic_modules)
198 |         return out
199 | 


--------------------------------------------------------------------------------