├── CHANGELOG.md ├── Guide.ipynb ├── LICENSE ├── Leaderboard.md ├── README.md ├── pyproject.toml ├── requirements.txt ├── src └── MLGeometry │ ├── __init__.py │ ├── bihomoNN.py │ ├── cicyhypersurface.py │ ├── complex_math.py │ ├── hypersurface.py │ ├── lbfgs.py │ ├── loss.py │ └── tf_dataset.py └── training ├── README.md ├── bihomoNN_train.py ├── bihomoNN_train.sh └── models.py /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] 4 | 5 | ## [1.2.0] - 2025-03-07 6 | 7 | ### Changed 8 | 9 | - Updated the package to be compatible with the lastest version of Tensorflow (2.18) and Keras 3 10 | - The package can now be installed by pip 11 | - Moved the U1-invariant neural network from LOGML24 to the branch 'U1' 12 | 13 | ## [1.1.0] - 2023-11-20 14 | 15 | ### Added 16 | 17 | - A new section to print out the metrics explicitly in Guide.ipynb 18 | - Support for Calabi-Yau manifolds as the complete intersection of two hypersurfaces 19 | - Support for generating the real locus of a hypersurface with class RealHypersurface 20 | 21 | ### Changed 22 | 23 | - Changed the default initialization of the SquareDense layer to be all-positive with an extra 24 | abs function, which could help the training in certain cases 25 | - Changed several functions in the hypersurface class from being private to public 26 | 27 | ### Removed 28 | 29 | - An incorrect documentation for the complex hessian function 30 | - The function to do numerical integration over the manifold and several related deprecated functions 31 | 32 | ## [1.0.2] - 2022-03-18 33 | 34 | ### Added 35 | 36 | - A new argument d in the bihomogeneous layer for different dimensions 37 | - Save and load models in the guide 38 | - A tutorial for environment setup 39 | 40 | ### Removed 41 | 42 | - The n_patches attribute in the Hypersurface class since it fails on subpatches 43 | 44 | ## [1.0.1] - 2020-12-20 45 | 46 | ### Added 47 | 48 | - Multi-batch support for L-BFGS 49 | 50 | [Unreleased]: https://github.com/yidiq7/MLGeometry/compare/v1.2.0...HEAD 51 | [1.0.1]: https://github.com/yidiq7/MLGeometry/releases/tag/v1.0.1 52 | [1.0.2]: https://github.com/yidiq7/MLGeometry/releases/tag/v1.0.2 53 | [1.1.0]: https://github.com/yidiq7/MLGeometry/releases/tag/v1.1.0 54 | [1.2.0]: https://github.com/yidiq7/MLGeometry/releases/tag/v1.2.0 55 | -------------------------------------------------------------------------------- /Guide.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "2FIm6mUYtGC5" 7 | }, 8 | "source": [ 9 | "# MLGeometry guide" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "dCemOECptGC6" 16 | }, 17 | "source": [ 18 | "This introduction demonstrates how to use MLGeometry to:\n", 19 | "1. Generate a hypersurface.\n", 20 | "2. Build a bihomogeneous neural network.\n", 21 | "3. Use the model to compute numerical Calabi-Yau metrics with the embedding method.\n", 22 | "4. Plot $\\eta$ on a rational curve." 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "source": [ 28 | "## Install the package (on Colab)" 29 | ], 30 | "metadata": { 31 | "id": "ilHaPYnkEi-S" 32 | } 33 | }, 34 | { 35 | "cell_type": "code", 36 | "source": [ 37 | "!pip install MLGeometry-tf" 38 | ], 39 | "metadata": { 40 | "id": "5pVEmL9vErvY" 41 | }, 42 | "execution_count": null, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "VyFvWKNmtGC7" 49 | }, 50 | "source": [ 51 | "## Configure imports" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "id": "lhqc2oMWtGC8" 58 | }, 59 | "source": [ 60 | "Import tensorflow_probability to use the L-BFGS optimizer:" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "id": "doBhWopntGC9" 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "import sympy as sp\n", 72 | "import tensorflow as tf\n", 73 | "import tensorflow_probability as tfp\n", 74 | "import keras" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "id": "MJJvGaCNtGC-" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "import MLGeometry as mlg\n", 86 | "from MLGeometry import bihomoNN as bnn" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "id": "-9F3AKqPtGC_" 93 | }, 94 | "source": [ 95 | "Import the libraries to plot the $\\eta$ on the rational curve (see the last section):" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "id": "J9LWSTH3tGC_" 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "import os\n", 107 | "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'\n", 108 | "import math\n", 109 | "import numpy as np\n", 110 | "import matplotlib.pyplot as plt\n", 111 | "from mpl_toolkits.mplot3d import Axes3D" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "id": "pd1zG07TtGDA" 118 | }, 119 | "source": [ 120 | "## Set a random seed (optional)\n", 121 | "Some random seed might be bad for numerical calulations. If there are any errors during the training, you may want to try a different seed." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "id": "x5Rz0lXmtGDB" 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "np.random.seed(42)\n", 133 | "tf.random.set_seed(42)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "id": "QjB84Ln1tGDB" 140 | }, 141 | "source": [ 142 | "## Define a hypersurface\n", 143 | "First define a set of coordinates and a function as sympy symbols:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "id": "pwhQynDBtGDB" 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "z0, z1, z2, z3, z4 = sp.symbols('z0, z1, z2, z3, z4')\n", 155 | "Z = [z0,z1,z2,z3,z4]\n", 156 | "f = z0**5 + z1**5 + z2**5 + z3**5 + z4**5 + 0.5*z0*z1*z2*z3*z4" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": { 162 | "id": "wTm215kftGDC" 163 | }, 164 | "source": [ 165 | "Then define a hypersurface as a collection of points which solve the equation f = 0, using the `Hypersurface` class in the `mlg.hypersurface` module. The parameter n_pairs is the number of random pairs of points used to form the random lines in $\\mathbf{CP}^{N+1}$. Then we take the intersections of those random lines and the hypersurface. By Bezout's theorem, each line intersects the hypersurface in precisely d points where d is the number of homogeneous coordinates. So the total number of points is d * n_pairs." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "id": "jz1Vi4Y2tGDC" 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "n_pairs = 10000\n", 177 | "HS_train = mlg.hypersurface.Hypersurface(Z, f, n_pairs)\n", 178 | "HS_test = mlg.hypersurface.Hypersurface(Z, f, n_pairs)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "YE981r2ctGDC" 185 | }, 186 | "source": [ 187 | "The Hypersurface class will take care of the patchwork automatically. Let's use the `list_patches` function to check the number of points on each patch:" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "id": "_zyw84dftGDC", 195 | "outputId": "e8fb2f30-eadf-4129-80c6-33971a79b273", 196 | "colab": { 197 | "base_uri": "https://localhost:8080/" 198 | } 199 | }, 200 | "outputs": [ 201 | { 202 | "output_type": "stream", 203 | "name": "stdout", 204 | "text": [ 205 | "Number of Patches: 5\n", 206 | "Points on patch 1 : 9933\n", 207 | "Points on patch 2 : 10015\n", 208 | "Points on patch 3 : 10249\n", 209 | "Points on patch 4 : 10011\n", 210 | "Points on patch 5 : 9792\n" 211 | ] 212 | } 213 | ], 214 | "source": [ 215 | "HS_train.list_patches()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": { 221 | "id": "4JtfwpmFtGDD" 222 | }, 223 | "source": [ 224 | "You can also invoke this method on one of the patches to check the distribution on the subpatches:" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "id": "8fMZAyKutGDD", 232 | "outputId": "5b432e0a-36e6-4ac5-9dda-4c9447aa656a", 233 | "colab": { 234 | "base_uri": "https://localhost:8080/" 235 | } 236 | }, 237 | "outputs": [ 238 | { 239 | "output_type": "stream", 240 | "name": "stdout", 241 | "text": [ 242 | "Number of Patches: 4\n", 243 | "Points on patch 1 : 2474\n", 244 | "Points on patch 2 : 2468\n", 245 | "Points on patch 3 : 2547\n", 246 | "Points on patch 4 : 2444\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "HS_train.patches[0].list_patches()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": { 257 | "id": "h8p5D9ZWtGDE" 258 | }, 259 | "source": [ 260 | "The Hypersurface class contains some symbolic and numerical methods as well, which will be introduced elsewhere." 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": { 266 | "id": "4u1oIOLytGDE" 267 | }, 268 | "source": [ 269 | "## Training with Tensorflow\n", 270 | "The following steps are similar to a regular Tensorflow training process.\n", 271 | "### Generate datasets\n", 272 | "The `mlg.tf_dataset.generate_dataset` function converts a hypersurface to a Tensorflow Dataset, which has four componets: the points on the hypersurface, the volume form $\\small \\Omega \\wedge \\bar\\Omega$, the mass reweighting the points distribution and the restriction which restricts the Kähler metric to a subpatch. The restriction contains an extra linear transformation so that points on different affine patches can all be processed in one call. It is also possible to generate a dataset only on one affine patch." 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": { 279 | "id": "uGq-mKdDtGDE" 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "train_set = mlg.tf_dataset.generate_dataset(HS_train)\n", 284 | "test_set = mlg.tf_dataset.generate_dataset(HS_test)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": { 290 | "id": "MHP3ExA1tGDG" 291 | }, 292 | "source": [ 293 | "Shuffle and batch the datasets:" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": { 300 | "id": "G1AX_cm_tGDG" 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "train_set = train_set.shuffle(HS_train.n_points).batch(1000)\n", 305 | "test_set = test_set.shuffle(HS_test.n_points).batch(1000)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": { 311 | "id": "t7Mga5fdtGDG" 312 | }, 313 | "source": [ 314 | "Let's look at what is inside a dataset:" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "scrolled": true, 322 | "id": "kwHPcIFHtGDH", 323 | "outputId": "4e35f5b8-52d9-470a-caeb-9310229ee77a", 324 | "colab": { 325 | "base_uri": "https://localhost:8080/" 326 | } 327 | }, 328 | "outputs": [ 329 | { 330 | "output_type": "stream", 331 | "name": "stdout", 332 | "text": [ 333 | "tf.Tensor(\n", 334 | "[[-0.28939024-0.7013853j 0.36498785+0.31731284j 1. -0.j\n", 335 | " -0.83973217-0.04013346j 0.69238067-0.44357434j]\n", 336 | " [-0.2632903 +0.00763338j 1. +0.j -0.3904083 -0.03917403j\n", 337 | " -0.07155564+0.36113203j 0.80278397+0.58611786j]\n", 338 | " [-0.9368906 +0.12442758j 0.21154833+0.6303546j -0.3895686 +0.4952412j\n", 339 | " -0.22148535-0.86754274j 1. -0.j ]\n", 340 | " ...\n", 341 | " [-0.3757828 -0.8230363j -0.176058 +0.23105958j -0.36762843+0.82377625j\n", 342 | " -0.28792354-0.0678029j 1. -0.j ]\n", 343 | " [ 0.38600475+0.27455673j 0.80249757+0.5882626j 1. +0.j\n", 344 | " -0.0039644 +0.08065715j -0.46647677+0.11512545j]\n", 345 | " [ 0.8101118 -0.5754197j 1. +0.j -0.13839988-0.06433091j\n", 346 | " -0.17594224+0.5278059j -0.47444513+0.20035468j]], shape=(1000, 5), dtype=complex64)\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "points, Omega_Omegabar, mass, restriction = next(iter(train_set))\n", 352 | "print(points)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": { 358 | "id": "2kVSGnnktGDH" 359 | }, 360 | "source": [ 361 | "### Build a bihomogeneous neural network" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": { 367 | "id": "XV_JRrERtGDI" 368 | }, 369 | "source": [ 370 | "The `mlg.bihomoNN` module provides the necessary layers (e.g. `Bihomogeneous` and `Dense` ) to construct the Kähler potential with a bihomogeneous neural network. Here is an example of a two-hidden-layer network (k = 4) with 70 and 100 hidden units:" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": { 377 | "id": "B9mE_YLltGDI" 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "@keras.saving.register_keras_serializable(package=\"MLGeometry\")\n", 382 | "class Kahler_potential(tf.keras.Model):\n", 383 | " def __init__(self, trainable=True, dtype='float32', **kwargs):\n", 384 | " super(Kahler_potential, self).__init__(trainable=trainable, dtype=dtype, **kwargs)\n", 385 | " # The first layer transforms the complex points to the bihomogeneous form.\n", 386 | " # The number of the outputs is d^2, where d is the number of coordinates.\n", 387 | " self.bihomogeneous = bnn.Bihomogeneous(d=len(Z))\n", 388 | " self.layer1 = bnn.SquareDense(5**2, 70, activation=tf.square)\n", 389 | " self.layer2 = bnn.SquareDense(70, 100, activation=tf.square)\n", 390 | " self.layer3 = bnn.SquareDense(100, 1)\n", 391 | "\n", 392 | " def call(self, inputs):\n", 393 | " x = self.bihomogeneous(inputs)\n", 394 | " x = self.layer1(x)\n", 395 | " x = self.layer2(x)\n", 396 | " x = self.layer3(x)\n", 397 | " x = tf.math.log(x)\n", 398 | " return x" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": null, 404 | "metadata": { 405 | "id": "cXSQuJgCtGDI" 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "model = Kahler_potential()" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": { 415 | "id": "vooMAXq2tGDJ" 416 | }, 417 | "source": [ 418 | "Define the Kähler metric $g_{i \\bar j} = \\partial_i\\bar\\partial_{\\bar j} K$ and the volume form $d\\mu_g = \\det g_{i \\bar j}$:" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": { 425 | "id": "ZKNPTfxPtGDJ" 426 | }, 427 | "outputs": [], 428 | "source": [ 429 | "@tf.function\n", 430 | "def volume_form(points, Omega_Omegabar, mass, restriction):\n", 431 | "\n", 432 | " kahler_metric = mlg.complex_math.complex_hessian(tf.math.real(model(points)), points)\n", 433 | " kahler_metric = tf.matmul(restriction, tf.matmul(kahler_metric, restriction, adjoint_b=True))\n", 434 | " det_g = tf.math.real(tf.linalg.det(kahler_metric))\n", 435 | "\n", 436 | " # Calculate the normalization constant to make the overall integration as 1\n", 437 | " # It is a batchwise calculation but we expect it to converge to a constant eventually\n", 438 | " # Consequently, if one computes the average of volume_form / Omega_Omegabar,\n", 439 | " # they will get strictly 1. (Actually the result would be Vol_Omega, but we set\n", 440 | " # it to be 1 here implicitly.)\n", 441 | " weights = mass / tf.reduce_sum(mass)\n", 442 | " factor = tf.reduce_sum(weights * det_g / Omega_Omegabar)\n", 443 | " volume_form = det_g / factor\n", 444 | "\n", 445 | " return volmue_form" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": { 451 | "id": "GSvy2QCmtGDJ" 452 | }, 453 | "source": [ 454 | "### Train the model with Adam and L-BFGS\n", 455 | "#### Adam\n", 456 | "Setup the keras optmizer as `Adam` and the loss function as one of weighted loss in the `mlg.loss` module. Some available functions are `weighted_MAPE`, `weighted_MSE`, `max_error` and `MAPE_plus_max_error`. They are weighted with the mass formula since the points on the hypersurface are distributed according to the Fubini-Study measure while the measure used in the integration is determined by the volume form $\\small \\Omega \\wedge \\bar\\Omega$." 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "id": "6jeIUXSTtGDK" 464 | }, 465 | "outputs": [], 466 | "source": [ 467 | "optimizer = keras.optimizers.Adam()\n", 468 | "loss_func = mlg.loss.weighted_MAPE" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "metadata": { 474 | "id": "V8R-_rO7tGDK" 475 | }, 476 | "source": [ 477 | "Loop over the batches and train the network:" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": null, 483 | "metadata": { 484 | "scrolled": true, 485 | "id": "WNWbQo1LtGDK", 486 | "outputId": "0a6b8726-c99d-41dc-9a56-6bafc1e9d172", 487 | "colab": { 488 | "base_uri": "https://localhost:8080/" 489 | } 490 | }, 491 | "outputs": [ 492 | { 493 | "output_type": "stream", 494 | "name": "stdout", 495 | "text": [ 496 | "epoch 10: loss = 0.04700\n", 497 | "epoch 20: loss = 0.01388\n", 498 | "epoch 30: loss = 0.01273\n", 499 | "epoch 40: loss = 0.01305\n", 500 | "epoch 50: loss = 0.01215\n", 501 | "epoch 60: loss = 0.01275\n", 502 | "epoch 70: loss = 0.01238\n", 503 | "epoch 80: loss = 0.01209\n", 504 | "epoch 90: loss = 0.01222\n", 505 | "epoch 100: loss = 0.01200\n", 506 | "epoch 110: loss = 0.01204\n", 507 | "epoch 120: loss = 0.01120\n", 508 | "epoch 130: loss = 0.01224\n", 509 | "epoch 140: loss = 0.01139\n", 510 | "epoch 150: loss = 0.01236\n", 511 | "epoch 160: loss = 0.01149\n", 512 | "epoch 170: loss = 0.01203\n", 513 | "epoch 180: loss = 0.01190\n", 514 | "epoch 190: loss = 0.01039\n", 515 | "epoch 200: loss = 0.01261\n" 516 | ] 517 | } 518 | ], 519 | "source": [ 520 | "max_epochs = 200\n", 521 | "epoch = 0\n", 522 | "while epoch < max_epochs:\n", 523 | " epoch = epoch + 1\n", 524 | " for step, (points, Omega_Omegabar, mass, restriction) in enumerate(train_set):\n", 525 | " with tf.GradientTape() as tape:\n", 526 | " det_omega = volume_form(points, Omega_Omegabar, mass, restriction)\n", 527 | " loss = loss_func(Omega_Omegabar, det_omega, mass)\n", 528 | " grads = tape.gradient(loss, model.trainable_weights)\n", 529 | " optimizer.apply_gradients(zip(grads, model.trainable_weights))\n", 530 | " if epoch % 10 == 0:\n", 531 | " print(\"epoch %d: loss = %.5f\" % (epoch, loss))" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": { 537 | "id": "uAVB6kgztGDL" 538 | }, 539 | "source": [ 540 | "Let's check the loss of the test dataset. First define a function to calculate the total loss over the whole dataset:" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": null, 546 | "metadata": { 547 | "id": "SqGWnTq6tGDL" 548 | }, 549 | "outputs": [], 550 | "source": [ 551 | "def cal_total_loss(dataset, loss_function):\n", 552 | " total_loss = tf.constant(0, dtype=tf.float32)\n", 553 | " total_mass = tf.constant(0, dtype=tf.float32)\n", 554 | "\n", 555 | " for step, (points, Omega_Omegabar, mass, restriction) in enumerate(dataset):\n", 556 | " det_omega = volume_form(points, Omega_Omegabar, mass, restriction)\n", 557 | " mass_sum = tf.reduce_sum(mass)\n", 558 | " total_loss += loss_function(Omega_Omegabar, det_omega, mass) * mass_sum\n", 559 | " total_mass += mass_sum\n", 560 | " total_loss = total_loss / total_mass\n", 561 | "\n", 562 | " return total_loss.numpy()" 563 | ] 564 | }, 565 | { 566 | "cell_type": "markdown", 567 | "metadata": { 568 | "id": "iDGmTMIytGDL" 569 | }, 570 | "source": [ 571 | "Check the results of MAPE and MSE:" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": { 578 | "id": "N7gZJSOdtGDL", 579 | "outputId": "2bbba998-8388-4ac7-be8b-ede3299e4642", 580 | "colab": { 581 | "base_uri": "https://localhost:8080/" 582 | } 583 | }, 584 | "outputs": [ 585 | { 586 | "output_type": "stream", 587 | "name": "stdout", 588 | "text": [ 589 | "sigma_test = 0.01207\n", 590 | "E_test = 0.00027\n" 591 | ] 592 | } 593 | ], 594 | "source": [ 595 | "sigma_test = cal_total_loss(test_set, mlg.loss.weighted_MAPE)\n", 596 | "E_test = cal_total_loss(test_set, mlg.loss.weighted_MSE)\n", 597 | "print(\"sigma_test = %.5f\" % sigma_test)\n", 598 | "print(\"E_test = %.5f\" % E_test)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "markdown", 603 | "metadata": { 604 | "id": "omQZi7TytGDL" 605 | }, 606 | "source": [ 607 | "You can also check the error of the Monte Carlo integration, estimated by:\n", 608 | "\n", 609 | "$$\\delta \\sigma = \\frac{1}{\\sqrt{N_p}} {\\left( \\int_X (|\\eta - 1_X| - \\sigma)^2 d\\mu_{\\Omega}\\right)}^{1/2},$$\n", 610 | "\n", 611 | "where $N_p$ is the number of points on the hypersurface and $\\sigma$ is the `weighted_MAPE` loss, and\n", 612 | "\n", 613 | "$$\\eta = \\frac{\\det \\omega}{\\small \\Omega \\wedge \\bar \\Omega}$$" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "metadata": { 620 | "id": "ZY2fn1iFtGDM", 621 | "outputId": "7c1f7b56-325c-434e-f46d-907a89e3857a", 622 | "colab": { 623 | "base_uri": "https://localhost:8080/" 624 | } 625 | }, 626 | "outputs": [ 627 | { 628 | "output_type": "stream", 629 | "name": "stdout", 630 | "text": [ 631 | "delta_simga = 0.00012\n" 632 | ] 633 | } 634 | ], 635 | "source": [ 636 | "def delta_sigma_square_test(y_true, y_pred, mass):\n", 637 | " weights = mass / tf.reduce_sum(mass)\n", 638 | " return tf.reduce_sum((tf.abs(y_true - y_pred) / y_true - sigma_test)**2 * weights)\n", 639 | "\n", 640 | "delta_sigma = cal_total_loss(test_set, delta_sigma_square_test)\n", 641 | "print(\"delta_simga = %.5f\" % delta_sigma)" 642 | ] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": { 647 | "id": "HQxGKzH4tGDM" 648 | }, 649 | "source": [ 650 | "#### Save and Load\n", 651 | "The trained network can be saved by:" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": null, 657 | "metadata": { 658 | "id": "IkrLO0eutGDM" 659 | }, 660 | "outputs": [], 661 | "source": [ 662 | "os.makedirs('trained_model', exist_ok=True)\n", 663 | "model.save('trained_model/70_100_1.keras')" 664 | ] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": { 669 | "id": "HHK4cU7VtGDN" 670 | }, 671 | "source": [ 672 | "And loaded by the `load_model` method:" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": { 679 | "id": "CdjCeKlmtGDO" 680 | }, 681 | "outputs": [], 682 | "source": [ 683 | "model = keras.saving.load_model('trained_model/70_100_1.keras')" 684 | ] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": { 689 | "id": "dlWwMrv7tGDP" 690 | }, 691 | "source": [ 692 | "#### L-BFGS\n", 693 | "As elaborated in our paper, when the network getting more complicated, L-BFGS converges faster than Adam near the minima. It is recommanded to use it after pretraining with Adam. However, L-BFGS is not in the standard Tensorflow library so the training process is slightly different: (Only ~20 iterations are shown here. In a real case you may want to set the `max_epochs` to ~1000)" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": { 700 | "id": "3FUgvhWvtGDP", 701 | "outputId": "bf02ba35-d8c3-4ab1-d2d1-79e0c2345187", 702 | "colab": { 703 | "base_uri": "https://localhost:8080/" 704 | } 705 | }, 706 | "outputs": [ 707 | { 708 | "output_type": "stream", 709 | "name": "stdout", 710 | "text": [ 711 | "Iter: 1 loss: 0.0120117264\n", 712 | "Iter: 2 loss: 0.521354854\n", 713 | "Iter: 3 loss: 0.0819888934\n", 714 | "Iter: 4 loss: 0.0208096653\n", 715 | "Iter: 5 loss: 0.0115710068\n", 716 | "Iter: 6 loss: 0.0107686864\n", 717 | "Iter: 7 loss: 0.016376676\n", 718 | "Iter: 8 loss: 0.0107517727\n", 719 | "Iter: 9 loss: 0.0106189838\n", 720 | "Iter: 10 loss: 0.0111169312\n", 721 | "Iter: 11 loss: 0.0105820298\n", 722 | "Iter: 12 loss: 0.010477283\n", 723 | "Iter: 13 loss: 0.0104977814\n", 724 | "Iter: 14 loss: 0.0103970384\n", 725 | "Iter: 15 loss: 0.010341051\n", 726 | "Iter: 16 loss: 0.0110788839\n", 727 | "Iter: 17 loss: 0.0103399819\n" 728 | ] 729 | } 730 | ], 731 | "source": [ 732 | "# The displayed max_epochs will be three to four times this value since iter + 1 everytime the function\n", 733 | "# is invoked, which also happens during the evaluation of the function itself and its gradient\n", 734 | "max_epochs = 5\n", 735 | "\n", 736 | "# Setup the function to be optimized by L-BFGS\n", 737 | "\n", 738 | "train_func = mlg.lbfgs.function_factory(model, loss_func, train_set)\n", 739 | "\n", 740 | "# Setup the inital values and train\n", 741 | "init_params = tf.dynamic_stitch(train_func.idx, model.trainable_variables)\n", 742 | "results = tfp.optimizer.lbfgs_minimize(value_and_gradients_function=train_func,\n", 743 | " initial_position=init_params,\n", 744 | " max_iterations=max_epochs)\n", 745 | "# Update the model after the last loop\n", 746 | "train_func.assign_new_model_parameters(results.position)" 747 | ] 748 | }, 749 | { 750 | "cell_type": "markdown", 751 | "metadata": { 752 | "id": "VQ3BrFxltGDP" 753 | }, 754 | "source": [ 755 | "Note that the definition of the volume form is already in the `mlg.lbfgs` module. Also note that the standard L-BFGS does not support multi-batch training. You can still batch the dataset in case the GPU is out of memory, but the parameters are only updated after a whole epoch." 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": { 761 | "id": "P_sNfX12tGDQ" 762 | }, 763 | "source": [ 764 | "You can also check the test dataset:" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": { 771 | "id": "pjRWkGZ4tGDQ", 772 | "outputId": "273e4c93-53d9-4783-84f0-3e940aab0bbd", 773 | "colab": { 774 | "base_uri": "https://localhost:8080/" 775 | } 776 | }, 777 | "outputs": [ 778 | { 779 | "output_type": "stream", 780 | "name": "stdout", 781 | "text": [ 782 | "sigma_test = 0.01207\n", 783 | "E_test = 0.00027\n" 784 | ] 785 | } 786 | ], 787 | "source": [ 788 | "sigma_test = cal_total_loss(test_set, mlg.loss.weighted_MAPE)\n", 789 | "E_test = cal_total_loss(test_set, mlg.loss.weighted_MSE)\n", 790 | "print(\"sigma_test = %.5f\" % sigma_test)\n", 791 | "print(\"E_test = %.5f\" % E_test)" 792 | ] 793 | }, 794 | { 795 | "cell_type": "markdown", 796 | "metadata": { 797 | "id": "Xf6Dy8NvtGDQ" 798 | }, 799 | "source": [ 800 | "#### Print out the metrics\n", 801 | "After all of the trainings are done, the final results for the metrics can be printed out explicitly, using the previously generated data points and restriction matrices:" 802 | ] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": null, 807 | "metadata": { 808 | "id": "qt2ZZElptGDR" 809 | }, 810 | "outputs": [], 811 | "source": [ 812 | "@tf.function\n", 813 | "def get_cy_metric(points, restriction):\n", 814 | "\n", 815 | " cy_metric = mlg.complex_math.complex_hessian(tf.math.real(model(points)), points)\n", 816 | " cy_metric = tf.matmul(restriction, tf.matmul(cy_metric, restriction, adjoint_b=True))\n", 817 | "\n", 818 | " return cy_metric" 819 | ] 820 | }, 821 | { 822 | "cell_type": "code", 823 | "execution_count": null, 824 | "metadata": { 825 | "id": "FMOBAaCytGDR", 826 | "outputId": "8f40f4e8-cca9-4072-c9b5-b3e2236db0c0", 827 | "colab": { 828 | "base_uri": "https://localhost:8080/" 829 | } 830 | }, 831 | "outputs": [ 832 | { 833 | "output_type": "stream", 834 | "name": "stdout", 835 | "text": [ 836 | "[-0.59150815+7.2931312e-02j -0.9938062 +1.1493446e-02j\n", 837 | " 0.67252195-1.9070959e-01j -0.12152821+2.9399461e-01j\n", 838 | " 1. +2.2154981e-17j]\n", 839 | "[[ 2.472322 -4.78886477e-08j 0.3180696 +2.02831551e-01j\n", 840 | " -0.01433054+4.51943465e-02j]\n", 841 | " [ 0.31806967-2.02831566e-01j 2.557338 +7.45058060e-08j\n", 842 | " 0.02845549-1.64638966e-01j]\n", 843 | " [-0.01433052-4.51943949e-02j 0.02845548+1.64638966e-01j\n", 844 | " 1.9045304 -1.21071935e-08j]]\n" 845 | ] 846 | } 847 | ], 848 | "source": [ 849 | "cy_metric = get_cy_metric(points, restriction)\n", 850 | "print(points[5].numpy())\n", 851 | "print(cy_metric[5].numpy())" 852 | ] 853 | }, 854 | { 855 | "cell_type": "markdown", 856 | "metadata": { 857 | "id": "KeB5YqMHtGDR" 858 | }, 859 | "source": [ 860 | "### $\\eta$ on the rational curve" 861 | ] 862 | }, 863 | { 864 | "cell_type": "markdown", 865 | "metadata": { 866 | "id": "ZO-bZ-FstGDa" 867 | }, 868 | "source": [ 869 | "Now let's retrict our model to a subspace and check the local behavior of $\\eta$. With the quintic 3-fold f = 0, we can choose the embedding\n", 870 | "\n", 871 | "$$(z_0, -z_0, z_1, 0, -z_1),$$\n", 872 | "\n", 873 | "and the local coordinate system defined by $t = z_1 / z_0$. Using shperical coordinates $(\\theta, \\phi)$, it can be embedded into $\\mathbb{R}^3$ by:\n", 874 | "\n", 875 | "$$z_0 = \\sin \\theta \\cos \\phi, \\qquad z_1= \\sin \\theta \\sin \\phi + i \\cos \\phi$$\n", 876 | "\n", 877 | "So first sample the points on the rational curve:" 878 | ] 879 | }, 880 | { 881 | "cell_type": "code", 882 | "execution_count": null, 883 | "metadata": { 884 | "id": "3FtODaYEtGDa" 885 | }, 886 | "outputs": [], 887 | "source": [ 888 | "theta, phi = np.linspace(0.001,np.pi+0.001, 400), np.linspace(0.001, 2*np.pi+0.001, 400)\n", 889 | "eps = 0.0001 + 0.0001j\n", 890 | "\n", 891 | "R = []\n", 892 | "points_list = []\n", 893 | "for j in phi:\n", 894 | " for i in theta:\n", 895 | " t = complex(math.sin(i)*math.sin(j), math.cos(i)) / (math.sin(i)*math.cos(j))\n", 896 | " if np.absolute(t) <= 1:\n", 897 | " # The Bihomogeneous layer will remove the zero entries automatically.\n", 898 | " # So here we add a small number eps to avoid being removed\n", 899 | " points_list.append([1+eps, -1+eps, t+eps, 0+eps, -t+eps])\n", 900 | " else:\n", 901 | " # Use the symmetry:\n", 902 | " points_list.append([1+eps, -1+eps, 1/t+eps, 0+eps, -1/t+eps])" 903 | ] 904 | }, 905 | { 906 | "cell_type": "markdown", 907 | "metadata": { 908 | "id": "1jMeM3K3tGDb" 909 | }, 910 | "source": [ 911 | "Use this set of points to generate the rational curve with norm_coordinate = z0 and max_grad_coordinate = z1:" 912 | ] 913 | }, 914 | { 915 | "cell_type": "code", 916 | "execution_count": null, 917 | "metadata": { 918 | "id": "TDq6-gWwtGDb" 919 | }, 920 | "outputs": [], 921 | "source": [ 922 | "rc = mlg.hypersurface.Hypersurface(Z, f, points=points_list, norm_coordinate=0, max_grad_coordinate=0)\n", 923 | "rc_dataset = mlg.tf_dataset.generate_dataset(rc).batch(rc.n_points)" 924 | ] 925 | }, 926 | { 927 | "cell_type": "markdown", 928 | "metadata": { 929 | "id": "6--X-YkMtGDb" 930 | }, 931 | "source": [ 932 | "Calculate $\\eta$:" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": null, 938 | "metadata": { 939 | "id": "I2ypMTBmtGDb" 940 | }, 941 | "outputs": [], 942 | "source": [ 943 | "points, Omega_Omegabar, mass, restriction = next(iter(rc_dataset))\n", 944 | "det_omega = volume_form(points, Omega_Omegabar, mass, restriction)\n", 945 | "eta = (det_omega / Omega_Omegabar).numpy()" 946 | ] 947 | }, 948 | { 949 | "cell_type": "markdown", 950 | "metadata": { 951 | "id": "Qp0EdTwLtGDc" 952 | }, 953 | "source": [ 954 | "Convert to Cartesian coordinates:" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": null, 960 | "metadata": { 961 | "id": "VMECj1VotGDc" 962 | }, 963 | "outputs": [], 964 | "source": [ 965 | "R = eta.reshape(400, 400)\n", 966 | "THETA, PHI = np.meshgrid(theta, phi)\n", 967 | "X = R * np.sin(THETA) * np.cos(PHI)\n", 968 | "Y = R * np.sin(THETA) * np.sin(PHI)\n", 969 | "ZZ = R * np.cos(THETA)" 970 | ] 971 | }, 972 | { 973 | "cell_type": "markdown", 974 | "metadata": { 975 | "id": "KpygjzVRtGDc" 976 | }, 977 | "source": [ 978 | "Plot the figure:" 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "execution_count": null, 984 | "metadata": { 985 | "id": "A1E03PN8tGDc", 986 | "outputId": "e72b77a5-1b80-4e81-f394-c9ff255ea7ef", 987 | "colab": { 988 | "base_uri": "https://localhost:8080/", 989 | "height": 410 990 | } 991 | }, 992 | "outputs": [ 993 | { 994 | "output_type": "display_data", 995 | "data": { 996 | "text/plain": [ 997 | "
" 998 | ], 999 | "image/png": "\n" 1000 | }, 1001 | "metadata": {} 1002 | } 1003 | ], 1004 | "source": [ 1005 | "fig = plt.figure()\n", 1006 | "ax = fig.add_subplot(1,1,1, projection='3d')\n", 1007 | "ax.set_zlim3d(-1.0, 1.0)\n", 1008 | "plot = ax.plot_surface(\n", 1009 | " X, Y, ZZ, rstride=1, cstride=1, cmap=plt.cm.YlGnBu_r,\n", 1010 | " linewidth=0, antialiased=False)" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "markdown", 1015 | "metadata": { 1016 | "id": "PyvsITrMtGDc" 1017 | }, 1018 | "source": [ 1019 | "$\\eta$ is expected to approach the constant function 1 as k increases." 1020 | ] 1021 | } 1022 | ], 1023 | "metadata": { 1024 | "kernelspec": { 1025 | "display_name": "Python 3", 1026 | "name": "python3" 1027 | }, 1028 | "language_info": { 1029 | "name": "python" 1030 | }, 1031 | "colab": { 1032 | "provenance": [], 1033 | "gpuType": "T4" 1034 | }, 1035 | "accelerator": "GPU" 1036 | }, 1037 | "nbformat": 4, 1038 | "nbformat_minor": 0 1039 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Michael R. Douglas, Subramanian Lakshminarasimhan and Yidi Qi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Leaderboard.md: -------------------------------------------------------------------------------- 1 | # Leaderboard 2 | 3 | ## Work in progress 4 | 5 | Here we will give both leaderboard and a link to a script which does the following: 6 | 7 | Run 5 prechosen sample CYs and apply the following criteria: 8 | 1. No more than 2 hours time per run (we will run submissions on our cluster). 9 | 2. Do each run twice with two choices of random seed and keep the worse one. 10 | 3. All runs must meet some minimal accuracy (e.g. 1% MAPE). 11 | 4. Rank the qualifying entries by average log MAPE (over all samples). 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLGeometry 2 | 3 | Higher dimensional computational geometry using machine learning software 4 | 5 | - Kahler geometry and Kahler-Einstein metrics 6 | 7 | More to come. 8 | 9 | ## Recent Changes 10 | 11 | MLGeometry has been updated to be compatible with the lastest version of TensorFlow and Keras 3, and it can now be installed directly from PyPI. If you prefer the older version, please check the 'Using and Older Version' section below. 12 | 13 | ## Installation 14 | 15 | ### Prerequisites 16 | 17 | MLGeometry requires Python 3.11 and TensorFlow (>=2.16). 18 | 19 | Install TensorFlow by following the official installation guide: [TensorFlow Installation](https://www.tensorflow.org/install). 20 | 21 | On Linux with GPU, TensorFlow can be installed by 22 | 23 | pip install 'tensorflow[and-cuda]' 24 | 25 | ### Installing MLGeometry 26 | 27 | You can install MLGeometry using one of the following methods: 28 | 29 | #### Via PyPI 30 | 31 | pip install MLGeometry-tf 32 | 33 | *Note: Use "MLGeometry-tf" with a suffix when installing via pip.* 34 | 35 | #### Directly from Github 36 | 37 | pip install git+https://github.com/yidiq7/MLGeometry.git 38 | 39 | #### Using an Older Version 40 | 41 | If you prefer to use an older version of MLGeometry based on Tensorflow 2.12 and Keras 2, you can check out the previous release (v1.1.0) here: [Version 1.1.0 Release](https://github.com/yidiq7/MLGeometry/releases/tag/v1.1.0). Follow the installation instructions provided in that release's documentation. The compatible versions of Python and CUDA can be found [here](https://www.tensorflow.org/install/source#gpu). 42 | 43 | 44 | ## [Sample jupyter notebook](https://github.com/yidiq7/MLGeometry/blob/main/Guide.ipynb) 45 | 46 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yidiq7/MLGeometry/blob/main/Guide.ipynb) 47 | 48 | ## Citation 49 | 50 | You can find our paper on [arxiv](https://arxiv.org/abs/2012.04797) or [PMLR](https://proceedings.mlr.press/v145/douglas22a.html). 51 | If you find our paper or package useful in your research or project, please cite it as follows: 52 | 53 | ``` 54 | @InProceedings{pmlr-v145-douglas22a, 55 | title = {Numerical Calabi-Yau metrics from holomorphic networks}, 56 | author = {Douglas, Michael and Lakshminarasimhan, Subramanian and Qi, Yidi}, 57 | booktitle = {Proceedings of the 2nd Mathematical and Scientific Machine Learning Conference}, 58 | pages = {223--252}, 59 | year = {2022}, 60 | editor = {Bruna, Joan and Hesthaven, Jan and Zdeborova, Lenka}, 61 | volume = {145}, 62 | series = {Proceedings of Machine Learning Research}, 63 | month = {16--19 Aug}, 64 | publisher = {PMLR}, 65 | pdf = {https://proceedings.mlr.press/v145/douglas22a/douglas22a.pdf}, 66 | url = {https://proceedings.mlr.press/v145/douglas22a.html}, 67 | } 68 | ``` 69 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "MLGeometry-tf" 7 | readme = "README.md" 8 | description = "Higher dimensional computational geometry using machine learning software" 9 | requires-python = ">=3.11" 10 | version = "1.2.0" 11 | dependencies = [ 12 | "tensorflow-probability[tf]", 13 | "sympy", 14 | "matplotlib" 15 | ] 16 | license = { file = "LICENSE" } 17 | maintainers = [ 18 | {name = "Yidi Qi", email = "qiyidi2012@gmail.com"} 19 | ] 20 | 21 | [project.urls] 22 | Homepage = "https://github.com/yidiq7/MLGeometry" 23 | Changelog = "https://github.com/yidiq7/MLGeometry/blob/master/CHANGELOG.md" 24 | 25 | [tool.setuptools] 26 | packages = ["MLGeometry"] 27 | package-dir = {"" = "src"} 28 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | "tensorflow-probability[tf]" 2 | sympy 3 | matplotlib 4 | -------------------------------------------------------------------------------- /src/MLGeometry/__init__.py: -------------------------------------------------------------------------------- 1 | from . import hypersurface 2 | from . import cicyhypersurface 3 | from . import bihomoNN 4 | from . import lbfgs 5 | from . import loss 6 | from . import tf_dataset 7 | from . import complex_math 8 | 9 | -------------------------------------------------------------------------------- /src/MLGeometry/bihomoNN.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras import activations 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | __all__ = ['Bihomogeneous','Bihomogeneous_k2','Bihomogeneous_k3', 7 | 'Bihomogeneous_k4','SquareDense','WidthOneDense'] 8 | 9 | @keras.saving.register_keras_serializable(package="MLGeometry") 10 | class Bihomogeneous(keras.layers.Layer): 11 | '''A layer transform zi to zi*zjbar''' 12 | def __init__(self, d=5): 13 | super(Bihomogeneous, self).__init__() 14 | self.d = d 15 | 16 | def call(self, inputs): 17 | zzbar = tf.einsum('ai,aj->aij', inputs, tf.math.conj(inputs)) 18 | zzbar = tf.linalg.band_part(zzbar, 0, -1) 19 | zzbar = tf.reshape(zzbar, [-1, self.d**2]) 20 | zzbar = tf.concat([tf.math.real(zzbar), tf.math.imag(zzbar)], axis=1) 21 | return remove_zero_entries(zzbar) 22 | 23 | 24 | @keras.saving.register_keras_serializable(package="MLGeometry") 25 | class Bihomogeneous_k2(keras.layers.Layer): 26 | '''A layer transform zi to symmetrized zi1*zi2, then to zi1*zi2 * zi1zi2bar''' 27 | def __init__(self): 28 | super(Bihomogeneous_k2, self).__init__() 29 | 30 | def call(self, inputs): 31 | # zi to zi1*zi2 32 | zz = tf.einsum('ai,aj->aij', inputs, inputs) 33 | zz = tf.linalg.band_part(zz, 0, -1) # zero below upper triangular 34 | zz = tf.reshape(zz, [-1, 5**2]) 35 | zz = tf.reshape(remove_zero_entries(zz), [-1, 15]) 36 | 37 | # zi1*zi2 to zzbar 38 | zzbar = tf.einsum('ai,aj->aij', zz, tf.math.conj(zz)) 39 | zzbar = tf.linalg.band_part(zzbar, 0, -1) 40 | zzbar = tf.reshape(zzbar, [-1, 15**2]) 41 | zzbar = tf.concat([tf.math.real(zzbar), tf.math.imag(zzbar)], axis=1) 42 | return remove_zero_entries(zzbar) 43 | 44 | 45 | @keras.saving.register_keras_serializable(package="MLGeometry") 46 | class Bihomogeneous_k3(keras.layers.Layer): 47 | '''A layer transform zi to symmetrized zi1*zi2*zi3, then to zzbar''' 48 | def __init__(self): 49 | super(Bihomogeneous_k3, self).__init__() 50 | 51 | def call(self, inputs): 52 | zz = tf.einsum('ai,aj,ak->aijk', inputs, inputs, inputs) 53 | zz = tf.linalg.band_part(zz, 0, -1) # keep upper triangular 2/3 54 | zz = tf.transpose(zz, perm=[0, 3, 1, 2]) 55 | zz = tf.linalg.band_part(zz, 0, -1) # keep upper triangular 1/2 56 | zz = tf.transpose(zz, perm=[0, 2, 3, 1]) 57 | zz = tf.reshape(zz, [-1, 5**3]) 58 | zz = tf.reshape(remove_zero_entries(zz), [-1, 35]) 59 | 60 | zzbar = tf.einsum('ai,aj->aij', zz, tf.math.conj(zz)) 61 | zzbar = tf.linalg.band_part(zzbar, 0, -1) 62 | zzbar = tf.reshape(zzbar, [-1, 35**2]) 63 | zzbar = tf.concat([tf.math.real(zzbar), tf.math.imag(zzbar)], axis=1) 64 | return remove_zero_entries(zzbar) 65 | 66 | 67 | @keras.saving.register_keras_serializable(package="MLGeometry") 68 | class Bihomogeneous_k4(keras.layers.Layer): 69 | '''A layer transform zi to symmetrized zi1*zi2*zi3*zi4, then to zzbar''' 70 | def __init__(self): 71 | super(Bihomogeneous_k4, self).__init__() 72 | 73 | def call(self, inputs): 74 | zz = tf.einsum('ai,aj,ak,al->aijkl', inputs, inputs, inputs, inputs) 75 | zz = tf.linalg.band_part(zz, 0, -1) 76 | zz = tf.transpose(zz, perm=[0, 4, 1, 2, 3]) 77 | zz = tf.linalg.band_part(zz, 0, -1) 78 | zz = tf.transpose(zz, perm=[0, 4, 1, 2, 3]) # 3412 79 | zz = tf.linalg.band_part(zz, 0, -1) 80 | zz = tf.reshape(zz, [-1, 5**4]) 81 | zz = tf.reshape(remove_zero_entries(zz), [-1, 70]) 82 | 83 | zzbar = tf.einsum('ai,aj->aij', zz, tf.math.conj(zz)) 84 | zzbar = tf.linalg.band_part(zzbar, 0, -1) 85 | zzbar = tf.reshape(zzbar, [-1, 70**2]) 86 | zzbar = tf.concat([tf.math.real(zzbar), tf.math.imag(zzbar)], axis=1) 87 | return remove_zero_entries(zzbar) 88 | 89 | 90 | def remove_zero_entries(x): 91 | x = tf.transpose(x) 92 | intermediate_tensor = tf.reduce_sum(tf.abs(x), 1) 93 | bool_mask = tf.squeeze(tf.math.logical_not(tf.math.less(intermediate_tensor, 1e-3))) 94 | x = tf.boolean_mask(x, bool_mask) 95 | x = tf.transpose(x) 96 | return x 97 | 98 | 99 | @keras.saving.register_keras_serializable(package="MLGeometry") 100 | class SquareDense(keras.layers.Layer): 101 | def __init__(self, input_dim, units, activation=tf.square, trainable=True): 102 | super(SquareDense, self).__init__() 103 | w_init = tf.random_normal_initializer(mean=0.0, stddev=0.05) 104 | self.w = self.add_weight( 105 | shape=(input_dim, units), 106 | initializer=keras.initializers.Constant( 107 | tf.math.abs(w_init(shape=(input_dim, units), dtype='float32'))), 108 | #initial_value=w_init(shape=(input_dim, units), dtype='float32'), 109 | trainable=trainable, 110 | ) 111 | self.activation = activations.get(activation) 112 | 113 | def call(self, inputs): 114 | return self.activation(tf.matmul(inputs, self.w)) 115 | 116 | 117 | @keras.saving.register_keras_serializable(package="MLGeometry") 118 | class WidthOneDense(keras.layers.Layer): 119 | ''' 120 | Usage: layer = WidthOneDense(n**2, 1) 121 | where n is the number of sections for different ks 122 | n = 5 for k = 1 123 | n = 15 for k = 2 124 | n = 35 for k = 3 125 | This layer is used directly after Bihomogeneous_k layers to sum over all 126 | the terms in the previous layer. The weights are initialized so that the h 127 | matrix is a real identity matrix. The training does not work if they are randomly 128 | initialized. 129 | ''' 130 | def __init__(self, input_dim, units, activation=None, trainable=True): 131 | super(WidthOneDense, self).__init__() 132 | dim = int(np.sqrt(input_dim)) 133 | mask = tf.cast(tf.linalg.band_part(tf.ones([dim, dim]),0,-1), dtype=tf.bool) 134 | upper_tri = tf.boolean_mask(tf.eye(dim), mask) 135 | w_init = tf.reshape(tf.concat([upper_tri, tf.zeros(input_dim - len(upper_tri))], axis=0), [-1, 1]) 136 | self.w = self.add_weight( 137 | shape=(input_dim, units), 138 | initializer=keras.initializer(w_init), 139 | trainable=trainable, 140 | ) 141 | self.activation = activations.get(activation) 142 | 143 | def call(self, inputs): 144 | return self.activation(tf.matmul(inputs, self.w)) 145 | 146 | -------------------------------------------------------------------------------- /src/MLGeometry/cicyhypersurface.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import numpy as np 3 | import sympy as sp 4 | #import tensorflow as tf 5 | import mpmath 6 | from multiprocessing import Pool 7 | from .hypersurface import Hypersurface 8 | from .hypersurface import RealHypersurface 9 | 10 | __all__ = ['RealHypersurface', 'CICYRealHypersurface'] 11 | 12 | class CICYHypersurface(Hypersurface): 13 | 14 | def solve_points(self, n_trios): 15 | points = [] 16 | ztrios = self.generate_random_projective(n_trios, 3) 17 | coeff_a = sp.var('a0:{}'.format(self.n_dim)) 18 | coeff_b = sp.var('b0:{}'.format(self.n_dim)) 19 | coeff_c = sp.var('c0:{}'.format(self.n_dim)) 20 | sp.var('t0:2') 21 | coeff_zip = zip(coeff_a, coeff_b, coeff_c) 22 | plane = [t0*a + t1*b + c for (a, b, c) in coeff_zip] 23 | # Add another function & poly & coeffs here 24 | poly_t = sp.Matrix(self.function).subs([(self.coordinates[i], plane[i]) 25 | for i in range(self.n_dim)]) 26 | 27 | coeffs_list = [sp.Poly(poly,(t0, t1)).coeffs() for poly in poly_t] 28 | monoms_list = [sp.Poly(poly,(t0, t1)).monoms() for poly in poly_t] 29 | 30 | coeffs_func = sp.lambdify([coeff_a + coeff_b + coeff_c], coeffs_list, "numpy") 31 | 32 | coeffs_list = [] 33 | for ztrio in ztrios: 34 | coeffs_list.append(coeffs_func(np.array(ztrio).flatten())) 35 | 36 | monoms_list = [monoms_list] * n_trios 37 | 38 | points = self.solve_points_multiprocessing(coeffs_list, monoms_list, ztrios) 39 | 40 | return points 41 | 42 | def solve_points_multiprocessing(self, coeffs_list, monoms_list, ztrios): 43 | points = [] 44 | with Pool() as pool: 45 | for point in pool.starmap(CICYHypersurface.solve_poly, zip(coeffs_list, monoms_list, ztrios)): 46 | points.append(point) 47 | 48 | points = list(filter(lambda x: x is not None, points)) 49 | return points 50 | 51 | @staticmethod 52 | def solve_poly(coeff_list, monom_list, ztrio): 53 | point = None 54 | 55 | def func_t(t0, t1): 56 | return [sum([coeff * t0**monom[0]*t1**monom[1] for coeff, monom in zip(coeffs, monoms)]) for coeffs, monoms in zip(coeff_list, monom_list)] 57 | 58 | for attempt in range(1): 59 | try: 60 | t_real = np.random.randn(4) 61 | t_init = [complex(t_real[0], t_real[1]), complex(t_real[2], t_real[3])] 62 | #t_init = np.random.randn(2).tolist() 63 | t_solved = mpmath.findroot(func_t, t_init) 64 | t_array = np.array(t_solved.tolist(), dtype=np.complex64) 65 | t_array = np.concatenate((t_array, np.array([[1.0+0.0j]]))) 66 | point = np.add.reduce(t_array * ztrio) 67 | break 68 | except: 69 | pass 70 | 71 | return point 72 | 73 | def get_grad(self): 74 | func = sp.Matrix(self.function) 75 | grad = func.jacobian(self.affine_coordinates) 76 | return grad 77 | 78 | def get_hol_n_form(self, coord): 79 | """ 80 | 81 | Return: 82 | ------- 83 | A or a list of symbolic expressions of the holomorphic n-form 1/(∂f/∂z_i) 84 | 85 | """ 86 | hol_n_form = [] 87 | try: 88 | hol_n_form = 1/self.grad[:,coord].det() 89 | except: 90 | logging.exception('The number of functions and the number of coordinates to eliminate do not match') 91 | 92 | return hol_n_form 93 | 94 | def autopatch(self): 95 | # projective patches 96 | points_on_patch = [[] for i in range(self.n_dim)] 97 | for point in self.points: 98 | norms = np.absolute(point) 99 | for i in range(self.n_dim): 100 | if norms[i] == max(norms): 101 | point_normalized = self.normalize_point(point, i) 102 | points_on_patch[i].append(point_normalized) 103 | continue 104 | for i in range(self.n_dim): 105 | self.set_patch(points_on_patch[i], i) 106 | 107 | # Remove empty patches 108 | self.patches = [subpatch for subpatch in self.patches if subpatch.points] 109 | 110 | for patch in self.patches: 111 | 112 | jac_det = [] 113 | for i in range(self.n_dim-1): 114 | det_row = [] 115 | for j in range(self.n_dim-1): 116 | det_row.append(patch.grad[:, [i,j]].det()) 117 | jac_det.append(det_row) 118 | 119 | jac_det = sp.Matrix(jac_det) 120 | jac_det = sp.lambdify([self.coordinates], jac_det, 'numpy') 121 | 122 | jac_det_arr = np.abs(np.squeeze(np.vectorize(jac_det,signature='(n)->(p,q)')(patch.points))) 123 | 124 | n, m, _ = jac_det_arr.shape 125 | # Reshape the array to a 2D array where each row represents one mxm subarray 126 | reshaped_arr = jac_det_arr.reshape(n, -1) 127 | # Find the argmax indices for each row (mxm subarray) 128 | argmax_indices = np.argmax(reshaped_arr, axis=1) 129 | # Convert the flat indices to row and column indices 130 | row_indices, col_indices = np.unravel_index(argmax_indices, (m, m)) 131 | # Stack the row and column indices horizontally to get the final result 132 | result = np.column_stack((row_indices, col_indices)) 133 | 134 | max_grad_list = np.unique(result, axis=0).tolist() 135 | 136 | points_arr = np.array(patch.points) 137 | for max_grad_coord in max_grad_list: 138 | points_on_patch = points_arr[np.where(np.all(result == max_grad_coord, axis=1))] 139 | patch.set_patch(points_on_patch, patch.norm_coordinate, max_grad_coord=max_grad_coord) 140 | 141 | def set_patch(self, points_on_patch, norm_coord=None, max_grad_coord=None): 142 | new_patch = CICYHypersurface(self.coordinates, 143 | self.function, 144 | points=points_on_patch, 145 | norm_coordinate=norm_coord, 146 | max_grad_coordinate=max_grad_coord) 147 | self.patches.append(new_patch) 148 | 149 | def get_restriction(self, ignored_coord=None, lambdify=False): 150 | if ignored_coord is None: 151 | ignored_coord = self.max_grad_coordinate 152 | # Since we have more than one ignored_coordinate in CICY, sympy subs() 153 | # cannot replace two coordinates simultaneously. As a result, if the first 154 | # expression contains the coordinate to be replaced by the second expression 155 | # that coordinate will also be replaced. So here we will create a temporary 156 | # coordinate list W to avoid this issue 157 | W = sp.var('w0:{}'.format(len(self.affine_coordinates))) 158 | ignored_coordinate = np.array(W)[ignored_coord] 159 | local_coordinates = sp.Matrix(W).subs({coord: func for coord, func in zip(ignored_coordinate, self.function)}) 160 | local_coordinates = local_coordinates.subs({w: z for w, z in zip(W, self.affine_coordinates)}) 161 | restriction = local_coordinates.jacobian(self.affine_coordinates).inv() 162 | for coord in reversed(ignored_coord): 163 | restriction.col_del(coord) 164 | if lambdify is True: 165 | restriction = sp.lambdify([self.coordinates], restriction, 'numpy') 166 | return restriction 167 | 168 | class RealCICYHypersurface(CICYHypersurface, RealHypersurface): 169 | 170 | def generate_random_projective(self, n_set, n_pt_in_a_set): 171 | return RealHypersurface.generate_random_projective(self, n_set, n_pt_in_a_set) 172 | 173 | def solve_points_multiprocessing(self, coeffs_list, monoms_list, ztrios): 174 | points = [] 175 | with Pool() as pool: 176 | for point in pool.starmap(RealCICYHypersurface.solve_poly, zip(coeffs_list, monoms_list, ztrios)): 177 | points.append(point) 178 | 179 | points = list(filter(lambda x: x is not None, points)) 180 | return points 181 | 182 | @staticmethod 183 | def solve_poly(coeff_list, monom_list, ztrio): 184 | point = None 185 | 186 | def func_t(t0, t1): 187 | return [sum([coeff * t0**monom[0]*t1**monom[1] for coeff, monom in zip(coeffs, monoms)]) for coeffs, monoms in zip(coeff_list, monom_list)] 188 | 189 | for attempt in range(1): 190 | try: 191 | #t_real = np.random.randn(4) 192 | #t_init = [complex(t_real[0], t_real[1]), complex(t_real[2], t_real[3])] 193 | t_init = np.random.randn(2).tolist() 194 | t_solved = mpmath.findroot(func_t, t_init) 195 | t_array = np.array(t_solved.tolist(), dtype=np.complex64) 196 | t_array = np.concatenate((t_array, np.array([[1.0+0.0j]]))) 197 | point = np.add.reduce(t_array * ztrio) 198 | break 199 | except: 200 | pass 201 | 202 | return point 203 | -------------------------------------------------------------------------------- /src/MLGeometry/complex_math.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def gradients_zbar(func, x): 4 | dx_real = tf.gradients(tf.math.real(func), x) 5 | dx_imag = tf.gradients(tf.math.imag(func), x) 6 | return (dx_real + dx_imag*tf.constant(1j, dtype=x.dtype)) / 2 7 | 8 | @tf.autograph.experimental.do_not_convert 9 | def complex_hessian(func, x): 10 | # Take a real function and calculate dzdzbar(f) 11 | #grad = gradients_z(func, x) 12 | grad = tf.math.conj(tf.gradients(func, x)) 13 | hessian = tf.stack([gradients_zbar(tmp[0], x)[0] 14 | for tmp in tf.unstack(grad, axis=2)], 15 | axis = 1) / 2.0 16 | 17 | return hessian 18 | 19 | -------------------------------------------------------------------------------- /src/MLGeometry/hypersurface.py: -------------------------------------------------------------------------------- 1 | """Defines a Python class for hypersurfaces""" 2 | 3 | from multiprocessing import Pool 4 | import mpmath 5 | 6 | import numpy as np 7 | import sympy as sp 8 | import tensorflow as tf 9 | 10 | __all__ = ['Hypersurface', 'RealHypersurface', 'diff', 'diff_conjugate'] 11 | 12 | class Hypersurface(): 13 | r"""A hypersurface or patch defined both symbolically and numerically. 14 | 15 | The Hypersuface class contains the symbolic polynomial representation of a 16 | hypersurface in sympy. It is also numerically defined as a collection of 17 | points on the hypersurface. The points are sperated into patches, which are 18 | also collectons of points. Therefore, recursively, the patches can also be 19 | defined as instances of the Hypersurface class. 20 | 21 | Hypersurface 22 | / | \ 23 | patch1 patch2 patch3 (Also Hypersurface class) 24 | / | \ / | \ / | \ 25 | subpatch .. .. .. .. .. .. 26 | 27 | Attribute: 28 | ------------- 29 | coordinates: 30 | The homogeneous coordinates as a list of sympy symbols, e.g. 31 | z0, z1, z2, z3, z4 = sp.symbols('z0, z1, z2, z3, z4') 32 | Z = [z0, z2, z3, z3, z4] 33 | function: 34 | A function of the homogeneous coordiantes, e.g. 35 | f = z0**5 + z1**5 + z2**5 + z3**5 + z4**5 + 0.5*z0*z1*z2*z3*z4 36 | The hypersurface is defined by f = 0 37 | n_dim: The number of homogeneous coordiantes. 38 | norm_coordinate: 39 | Applicable if the instance is a patch. An integer representing the 40 | index of the coordinate set to 1 on the affine patch. The first level 41 | of patches are defined with this coordinate. The corresponding symbolic 42 | coordinate is self.coordiante[self.norm_coordiante]. 43 | affine_coordinates: 44 | The coordiantes on the affine patches (withouth the norm_coordinate). 45 | max_grad_coordinate: 46 | The index of the affine coordinate with the largest |∂f/∂z_i|. The 47 | second level of patches (subpatches) are defined using this coordinate, 48 | so that when one computes the holomorphic n-form 49 | Omega = 1/(∂f/∂z_i) * (dz_1 ^ ... dz_{i-1} ^ dz_{i+1} ^ ... dz_N), 50 | it is less likely to have a small number on the denominator. 51 | The corresponding symbolic coordinate is 52 | self.affine_coordinate[self.max_grad_coordinate]. 53 | patches: A list of instances of the subpatches of the hypersurface/patch 54 | points: The numerical points generated by Monte Carlo. 55 | n_points: The total number of points on the hypersurface or a patch. 56 | grad: A list of symbolic expressions of ∂f/∂z_i 57 | 58 | Usage: 59 | ---------- 60 | Firstly, one needs to define the coordinates and function with sympy, 61 | and the number of pairs of points used in Monte Carlo: 62 | 63 | z0, z1, z2, z3, z4 = sp.symbols('z0, z1, z2, z3, z4') 64 | Z = [z0, z2, z3, z3, z4] 65 | f = z0**5 + z1**5 + z2**5 + z3**5 + z4**5 + 0.5*z0*z1*z2*z3*z4 66 | n_pairs = 1000 67 | 68 | Then define the hypersurface with f = 0: 69 | 70 | HS = MLGeometry.hypersurface.Hypersurface(Z, f, npairs) 71 | 72 | """ 73 | def __init__(self, 74 | coordinates, 75 | function, 76 | n_pairs=0, 77 | points=None, 78 | norm_coordinate=None, 79 | max_grad_coordinate=None): 80 | """Initialize the hypersurface 81 | 82 | Given the sympy coordinates Z, function f and npairs, there are two 83 | main steps on the highest level: 84 | 1. Generate points using Monte Carlo methods with __solve_points() 85 | 2. Define the patches automatically with autopatch() 86 | On the patches, the points are calculated with autopatch() 87 | beforehand and passed as an argument. 88 | 89 | """ 90 | self.coordinates = np.array(coordinates) 91 | self.function = function 92 | self.n_dim = len(self.coordinates) 93 | self.norm_coordinate = norm_coordinate 94 | if norm_coordinate is not None: 95 | self.affine_coordinates = np.delete(self.coordinates, norm_coordinate) 96 | else: 97 | self.affine_coordinates = self.coordinates 98 | self.max_grad_coordinate = max_grad_coordinate 99 | self.patches = [] 100 | if points is None: 101 | self.points = self.solve_points(n_pairs) 102 | self.autopatch() 103 | else: 104 | self.points = points 105 | #self.n_patches = len(self.patches) 106 | self.n_points = len(self.points) 107 | self.grad = self.get_grad() 108 | 109 | def solve_points(self, n_pairs): 110 | """Generates random points on the hypersurface with Monte Carlo 111 | 112 | #TODO explain the MC method or refer to the paper 113 | 114 | Args: 115 | -------- 116 | n_pairs: The number of random pair used in Monte carlo. 117 | 118 | Returns: 119 | -------- 120 | A list of random complex points on the hypersurface, where the points 121 | themselves are a list of complex coordiantes with dtype Complex128. 122 | 123 | """ 124 | zpairs = self.generate_random_projective(n_pairs, 2) 125 | coeff_a = [sp.symbols('a'+str(i)) for i in range(self.n_dim)] 126 | coeff_b = [sp.symbols('b'+str(i)) for i in range(self.n_dim)] 127 | c = sp.symbols('c') 128 | coeff_zip = zip(coeff_a, coeff_b) 129 | line = [c*a+b for (a, b) in coeff_zip] 130 | function_eval = self.function.subs([(self.coordinates[i], line[i]) 131 | for i in range(self.n_dim)]) 132 | poly = sp.Poly(function_eval, c) 133 | coeff_poly = poly.coeffs() 134 | get_coeff = sp.lambdify([coeff_a, coeff_b], coeff_poly) 135 | 136 | points = self.solve_points_multiprocessing(zpairs, get_coeff) 137 | 138 | return points 139 | 140 | def solve_points_multiprocessing(self, zpairs, get_coeff): 141 | points = [] 142 | # Multiprocessing. Then append the points to the same list in the main process 143 | with Pool() as pool: 144 | for points_d in pool.starmap(Hypersurface.solve_poly, 145 | zip(zpairs, [get_coeff(zpair[0], zpair[1]) 146 | for zpair in zpairs])): 147 | points.extend(points_d) 148 | return points 149 | 150 | def generate_random_projective(self, n_set, n_pt_in_a_set): 151 | """Generate sets of points in CP^N 152 | 153 | Args: 154 | -------- 155 | n_set: The total number of sets/complex lines/planes sampled. Equivalent 156 | to n_pairs when there are 2 points in each set. 157 | 158 | n_pt_in_a_set: The number of points in a set. For pairs it equals to 2 and 159 | for trios it equal to 3, etc. 160 | 161 | Returns: 162 | -------- 163 | A list of random points in (CP^N)^n_point 164 | 165 | """ 166 | z_random = [] 167 | for i in range(n_set): 168 | zv = [] 169 | for j in range(n_pt_in_a_set): 170 | zv.append([complex(c[0],c[1]) for c in np.random.normal(0.0, 1.0, (self.n_dim, 2))]) 171 | z_random.append(zv) 172 | return z_random 173 | 174 | @staticmethod 175 | def solve_poly(zpair, coeff): 176 | # For each zpair there are d solutions, where d is the n_dim 177 | points_d = [] 178 | c_solved = mpmath.polyroots(coeff) 179 | for pram_c in c_solved: 180 | points_d.append([complex(pram_c * a + b) 181 | for (a, b) in zip(zpair[0], zpair[1])]) 182 | return points_d 183 | 184 | def autopatch(self): 185 | # projective patches 186 | points_on_patch = [[] for i in range(self.n_dim)] 187 | for point in self.points: 188 | norms = np.absolute(point) 189 | for i in range(self.n_dim): 190 | if norms[i] == max(norms): 191 | point_normalized = self.normalize_point(point, i) 192 | points_on_patch[i].append(point_normalized) 193 | continue 194 | for i in range(self.n_dim): 195 | if points_on_patch[i]: 196 | self.set_patch(points_on_patch[i], i) 197 | # Subpatches on each patch 198 | for patch in self.patches: 199 | points_on_patch = [[] for i in range(self.n_dim-1)] 200 | grad_eval = sp.lambdify(self.coordinates, patch.grad, 'numpy') 201 | for point in patch.points: 202 | grad = grad_eval(*point) 203 | grad_norm = np.absolute(grad) 204 | points_on_patch[np.argmax(grad_norm)].append(point) 205 | for i in range(self.n_dim-1): 206 | if points_on_patch[i]: 207 | patch.set_patch(points_on_patch[i], patch.norm_coordinate, 208 | max_grad_coord=i) 209 | 210 | def set_patch(self, points_on_patch, norm_coord=None, max_grad_coord=None): 211 | new_patch = Hypersurface(self.coordinates, 212 | self.function, 213 | points=points_on_patch, 214 | norm_coordinate=norm_coord, 215 | max_grad_coordinate=max_grad_coord) 216 | self.patches.append(new_patch) 217 | 218 | def list_patches(self): 219 | print("Number of Patches:", len(self.patches)) 220 | i = 1 221 | for patch in self.patches: 222 | print("Points on patch", i, ":", len(patch.points)) 223 | i = i + 1 224 | 225 | def normalize_point(self, point, norm_coordinate): 226 | point_normalized = [] 227 | for coordinate in point: 228 | norm_coefficient = point[norm_coordinate] 229 | coordinate_normalized = coordinate / norm_coefficient 230 | point_normalized.append(coordinate_normalized) 231 | return point_normalized 232 | 233 | def get_FS(self): 234 | FS_metric = self.kahler_metric(np.identity(self.n_dim), k=1) 235 | return FS_metric 236 | 237 | def get_grad(self): 238 | grad = [] 239 | for coord in self.affine_coordinates: 240 | grad_i = self.function.diff(coord) 241 | grad.append(grad_i) 242 | return grad 243 | 244 | def get_hol_n_form(self, coord): 245 | """ 246 | 247 | Return: 248 | ------- 249 | A or a list of symbolic expressions of the holomorphic n-form 1/(∂f/∂z_i) 250 | 251 | """ 252 | hol_n_form = [] 253 | if coord is not None: 254 | hol_n_form = 1 / self.grad[coord] 255 | else: 256 | for i in range(len(self.affine_coordinates)): 257 | hol_n_form.append(self.get_hol_n_form(i)) 258 | return hol_n_form 259 | 260 | def get_omega_omegabar(self, lambdify=False): 261 | omega_omegabar = [] 262 | if self.patches == [] and self.max_grad_coordinate is not None: 263 | hol_n_form = self.get_hol_n_form(self.max_grad_coordinate) 264 | omega_omegabar = hol_n_form * sp.conjugate(hol_n_form) 265 | else: 266 | for patch in self.patches: 267 | try: 268 | omega_omegabar.append(patch.omega_omegabar) 269 | except AttributeError: 270 | omega_omegabar.append(patch.get_omega_omegabar(lambdify=lambdify)) 271 | 272 | if lambdify is True: 273 | OObar_func = sp.lambdify([self.coordinates], omega_omegabar,'numpy') 274 | omega_omegabar = lambda point: OObar_func(point).real 275 | return omega_omegabar 276 | 277 | def get_sections(self, k, lambdify=False): 278 | sections = [] 279 | t = sp.symbols('t') 280 | GenSec = sp.prod(1/(1-(t*zz)) for zz in self.coordinates) 281 | poly = sp.series(GenSec, t, n=k+1).coeff(t**k) 282 | while poly!=0: 283 | sections.append(sp.LT(poly)) 284 | poly = poly - sp.LT(poly) 285 | n_sections = len(sections) 286 | sections = np.array(sections) 287 | if lambdify is True: 288 | sections = sp.lambdify([self.coordinates], sections, 'numpy') 289 | return sections, n_sections 290 | 291 | def kahler_potential(self, h_matrix=None, k=1): 292 | sections, n_sec = self.get_sections(k) 293 | if h_matrix is None: 294 | h_matrix = np.identity(n_sec) 295 | # Check if h_matrix is a string 296 | elif isinstance(h_matrix, str): 297 | if h_matrix == "identity": 298 | h_matrix = np.identity(n_sec) 299 | elif h_matrix == "symbolic": 300 | h_matrix = sp.MatrixSymbol('H', n_sec, n_sec) 301 | 302 | elif h_matrix == "FS": 303 | h_matrix = np.diag(sp.Poly(sp.expand(sum(self.coordinates)**k)).coeffs()) 304 | z_H_zbar = np.matmul(sections, np.matmul(h_matrix, sp.conjugate(sections))) 305 | if self.norm_coordinate is not None: 306 | z_H_zbar = z_H_zbar.subs(self.coordinates[self.norm_coordinate], 1) 307 | kahler_potential = sp.log(z_H_zbar) 308 | return kahler_potential 309 | 310 | def kahler_metric(self, h_matrix=None, k=1, point=None): 311 | if point is None: 312 | pot = self.kahler_potential(h_matrix, k) 313 | metric = [] 314 | #i holomorphc, j anti-hol 315 | for coord_i in self.affine_coordinates: 316 | a_holo_der = [] 317 | for coord_j in self.affine_coordinates: 318 | a_holo_der.append(diff_conjugate(pot, coord_j)) 319 | metric.append([diff(ah, coord_i) for ah in a_holo_der]) 320 | metric = sp.Matrix(metric) 321 | 322 | return metric 323 | 324 | def get_restriction(self, ignored_coord=None, lambdify=False): 325 | if ignored_coord is None: 326 | ignored_coord = self.max_grad_coordinate 327 | ignored_coordinate = self.affine_coordinates[ignored_coord] 328 | local_coordinates = sp.Matrix(self.affine_coordinates).subs(ignored_coordinate, self.function) 329 | affine_coordinates = sp.Matrix(self.affine_coordinates) 330 | restriction = local_coordinates.jacobian(affine_coordinates).inv() 331 | restriction.col_del(ignored_coord) 332 | if lambdify is True: 333 | restriction = sp.lambdify([self.coordinates], restriction, 'numpy') 334 | return restriction 335 | 336 | def get_FS_volume_form(self, h_matrix=None, k=1, lambdify=False): 337 | kahler_metric = self.kahler_metric(h_matrix, k) 338 | restriction = self.get_restriction() 339 | FS_volume_form = restriction.T * kahler_metric * restriction.conjugate() 340 | FS_volume_form = FS_volume_form.det() 341 | if lambdify is True: 342 | FS_func = sp.lambdify([self.coordinates], FS_volume_form, 'numpy') 343 | FS_volume_form = lambda point: FS_func(point).real 344 | return FS_volume_form 345 | 346 | # Numerical Methods: 347 | 348 | def set_k(self, k): 349 | self.k = k 350 | sections, ns = self.get_sections(k, lambdify=False) 351 | sections_func, ns = self.get_sections(k, lambdify=True) 352 | self.n_sections = ns 353 | for patch in self.patches: 354 | # patch.k = k 355 | for subpatch in patch.patches: 356 | # subpatch.k = k 357 | subpatch.n_sections = ns 358 | subpatch.sections = sections_func 359 | jacobian = sp.Matrix(sections).jacobian(subpatch.affine_coordinates) 360 | subpatch.sections_jacobian = sp.lambdify([subpatch.coordinates], 361 | jacobian,'numpy') 362 | subpatch.restriction = subpatch.get_restriction(lambdify=True) 363 | subpatch.omega_omegabar = subpatch.get_omega_omegabar(lambdify=True) 364 | subpatch.h_FS = np.diag(sp.Poly(sp.expand(sum(self.coordinates)**k)).coeffs()) 365 | 366 | # Tensors 367 | subpatch.s_tf, subpatch.J_tf = subpatch.num_s_J_tf() 368 | subpatch.s_tf_1, subpatch.J_tf_1 = subpatch.num_s_J_tf(k=1) 369 | subpatch.Omega_Omegabar_tf = subpatch.num_Omega_Omegabar_tf() 370 | subpatch.r_tf = subpatch.num_restriction_tf() 371 | 372 | #@tf.function 373 | def num_s_J_tf(self, k=-1): 374 | 375 | s_vec = [] 376 | J_vec = [] 377 | 378 | for point in self.points: 379 | if k == 1: 380 | # k = 1 will be used in the mass formula during the integration 381 | s = [point] 382 | # Delete the correspoding row 383 | J = np.delete(np.identity(self.n_dim), self.norm_coordinate, 0) 384 | else: 385 | s = [self.sections(point)] 386 | J = self.sections_jacobian(point).T 387 | 388 | s_vec.append(s) 389 | J_vec.append(J) 390 | 391 | s_tf = tf.constant(np.array(s_vec, dtype=np.complex64)) 392 | J_tf = tf.constant(np.array(J_vec, dtype=np.complex64)) 393 | return s_tf, J_tf 394 | 395 | #@tf.function 396 | def num_Omega_Omegabar_tf(self): 397 | Omega_Omegabar = [] 398 | for point in self.points: 399 | Omega_Omegabar.append(self.omega_omegabar(point)) 400 | Omega_Omegabar = tf.constant(np.array(Omega_Omegabar, dtype=np.float32)) 401 | return Omega_Omegabar 402 | 403 | #@tf.function 404 | def num_restriction_tf(self): 405 | # Maybe I shouldn't do transpose here. A little bit confusing but 406 | # I guarantee you that the calculations are correct 407 | r = [] 408 | for point in self.points: 409 | r.append(self.restriction(point).T) 410 | r_tf = tf.constant(np.array(r, dtype=np.complex64)) 411 | return r_tf 412 | 413 | #@tf.function 414 | def num_kahler_metric_tf(self, h_matrix, k=-1): 415 | if isinstance(h_matrix, str): 416 | if h_matrix == 'identity': 417 | if k == 1: 418 | h_matrix = np.identity(self.n_dim, dtype=np.complex64) 419 | else: 420 | h_matrix = np.identity(self.n_sections, dtype=np.complex64) 421 | elif h_matrix == 'FS': 422 | h_matrix = self.h_FS.astype(np.complex64) 423 | #h_matrix = np.array(self.h_FS, dtype=np.complex64) 424 | 425 | #h_tf = tf.constant(h_matrix) 426 | #if isinstance(h_matrix, np.ndarray): 427 | h_tf = tf.convert_to_tensor(h_matrix, dtype=tf.complex64) 428 | 429 | if k == 1: 430 | s_tf = self.s_tf_1 431 | J_tf = self.J_tf_1 432 | else: 433 | s_tf = self.s_tf 434 | J_tf = self.J_tf 435 | 436 | #h_tf = tf.cast(h_tf, tf.complex64) 437 | 438 | H_Jdag = tf.matmul(h_tf, J_tf, adjoint_b=True) 439 | A = tf.matmul(J_tf, H_Jdag) 440 | b = tf.matmul(s_tf, H_Jdag) 441 | B = tf.matmul(b, b, adjoint_a=True) 442 | alpha = tf.matmul(s_tf, tf.matmul(h_tf, s_tf, adjoint_b=True)) 443 | G = A / alpha - B / alpha**2 444 | #if tf.reduce_min(tf.abs(alpha)) < 0.001: 445 | # print('alpha: ', tf.reduce_min(alpha)) 446 | return G 447 | 448 | #@tf.function 449 | def num_FS_volume_form_tf(self, h_matrix, k=-1): 450 | kahler_metric = self.num_kahler_metric_tf(h_matrix, k) 451 | r_tf = self.r_tf 452 | FS_volume_form = tf.matmul(r_tf, tf.matmul(kahler_metric, r_tf, adjoint_b=True)) 453 | FS_volume_form = tf.linalg.det(FS_volume_form) 454 | FS_volume_form = tf.math.real(FS_volume_form) 455 | return FS_volume_form 456 | 457 | def num_kahler_metric(self, h_matrix, point, k=-1): 458 | if k == 1: 459 | # k = 1 will be used in the mass formula during the integration 460 | s = point 461 | # Delete the correspoding row 462 | J = np.delete(np.identity(len(s)), self.norm_coordinate, 0) 463 | else: 464 | s = self.sections(point) 465 | J = self.sections_jacobian(point).T 466 | if isinstance(h_matrix, str): 467 | if h_matrix == 'identity': 468 | h_matrix = np.identity(len(s)) 469 | elif h_matrix == 'FS': 470 | h_matrix = np.array(self.h_FS, dtype=int) 471 | 472 | H_Jdag = np.matmul(h_matrix, np.conj(J).T) 473 | A = np.matmul(J, H_Jdag) 474 | # Get the right half of B then reshape to transpose, 475 | # since b.T is still b if b is a 1d vector 476 | b = np.matmul(s, H_Jdag).reshape(-1, 1) 477 | B = np.matmul(np.conj(b), b.T) 478 | alpha = np.matmul(s, np.matmul(h_matrix, np.conj(s))) 479 | G = A / alpha - B / alpha**2 480 | return G 481 | 482 | def num_FS_volume_form(self, h_matrix, point, k=-1): 483 | kahler_metric = self.num_kahler_metric(h_matrix, point, k) 484 | r = self.restriction(point) 485 | FS_volume_form = np.matmul(r.T, np.matmul(kahler_metric, np.conj(r))) 486 | FS_volume_form = np.matrix(FS_volume_form, dtype=complex) 487 | FS_volume_form = np.linalg.det(FS_volume_form).real 488 | return FS_volume_form 489 | 490 | def diff_conjugate(expr, coordinate): 491 | coord_bar = sp.symbols('coord_bar') 492 | expr_diff = expr.subs(sp.conjugate(coordinate), coord_bar).diff(coord_bar) 493 | expr_diff = expr_diff.subs(coord_bar, sp.conjugate(coordinate)) 494 | return expr_diff 495 | 496 | def diff(expr, coordinate): 497 | coord_bar = sp.symbols('coord_bar') 498 | expr_diff = expr.subs(sp.conjugate(coordinate), coord_bar).diff(coordinate) 499 | expr_diff = expr_diff.subs(coord_bar, sp.conjugate(coordinate)) 500 | return expr_diff 501 | 502 | 503 | class RealHypersurface(Hypersurface): 504 | 505 | def generate_random_projective(self, n_set, n_pt_in_a_set): 506 | z_random= [] 507 | for i in range(n_set): 508 | zv = [] 509 | for j in range(n_pt_in_a_set): 510 | zv.append(np.random.normal(0.0, 1.0, self.n_dim).astype(complex)) 511 | z_random.append(zv) 512 | return z_random 513 | 514 | def solve_points_multiprocessing(self, zpairs, get_coeff): 515 | points = [] 516 | # Multiprocessing. Then append the points to the same list in the main process 517 | with Pool() as pool: 518 | for points_d in pool.starmap(RealHypersurface.solve_poly_real, 519 | zip(zpairs, [get_coeff(zpair[0], zpair[1]) 520 | for zpair in zpairs])): 521 | points.extend(points_d) 522 | return points 523 | 524 | 525 | @staticmethod 526 | def solve_poly_real(zpair, coeff): 527 | # For each zpair there are d solutions, where d is the n_dim 528 | # There will be one real solution and we will keep that only 529 | points_d = [] 530 | try: 531 | c_solved = mpmath.polyroots(coeff) 532 | for pram_c in c_solved: 533 | if np.abs(np.imag(pram_c)) < 1e-8: 534 | points_d.append([complex(pram_c * a + b) 535 | for (a, b) in zip(zpair[0], zpair[1])]) 536 | except: 537 | pass 538 | return points_d 539 | 540 | -------------------------------------------------------------------------------- /src/MLGeometry/lbfgs.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim:fenc=utf-8 4 | # 5 | # Distributed under terms of the MIT license. 6 | 7 | """An example of using tfp.optimizer.lbfgs_minimize to optimize a TensorFlow model. 8 | 9 | This code shows a naive way to wrap a tf.keras.Model and optimize it with the L-BFGS 10 | optimizer from TensorFlow Probability. 11 | 12 | Python interpreter version: 3.6.9 13 | TensorFlow version: 2.0.0 14 | TensorFlow Probability version: 0.8.0 15 | NumPy version: 1.17.2 16 | Matplotlib version: 3.1.1 17 | """ 18 | import numpy as np 19 | import tensorflow as tf 20 | import tensorflow_probability as tfp 21 | from matplotlib import pyplot 22 | from . import complex_math 23 | 24 | __all__ = ['function_factory'] 25 | 26 | def function_factory(model, loss, dataset): 27 | """A factory to create a function required by tfp.optimizer.lbfgs_minimize. 28 | 29 | Args: 30 | model [in]: an instance of `tf.keras.Model` or its subclasses. 31 | loss [in]: a function with signature loss_value = loss(pred_y, true_y). 32 | train_x [in]: the input part of training data. 33 | train_y [in]: the output part of training data. 34 | 35 | Returns: 36 | A function that has a signature of: 37 | loss_value, gradients = f(model_parameters). 38 | """ 39 | 40 | # obtain the shapes of all trainable parameters in the model 41 | shapes = tf.shape_n(model.trainable_variables) 42 | n_tensors = len(shapes) 43 | 44 | # we'll use tf.dynamic_stitch and tf.dynamic_partition later, so we need to 45 | # prepare required information first 46 | count = 0 47 | idx = [] # stitch indices 48 | part = [] # partition indices 49 | 50 | for i, shape in enumerate(shapes): 51 | n = np.prod(shape) 52 | idx.append(tf.reshape(tf.range(count, count+n, dtype=tf.int32), shape)) 53 | part.extend([i]*n) 54 | count += n 55 | 56 | part = tf.constant(part) 57 | 58 | @tf.function 59 | @tf.autograph.experimental.do_not_convert 60 | def assign_new_model_parameters(params_1d): 61 | """A function updating the model's parameters with a 1D tf.Tensor. 62 | 63 | Args: 64 | params_1d [in]: a 1D tf.Tensor representing the model's trainable parameters. 65 | """ 66 | 67 | params = tf.dynamic_partition(params_1d, part, n_tensors) 68 | for i, (shape, param) in enumerate(zip(shapes, params)): 69 | model.trainable_variables[i].assign(tf.reshape(param, shape)) 70 | #tf.print(model.trainable_variables[i]) 71 | 72 | @tf.function 73 | def volume_form(x, Omega_Omegabar, mass, restriction): 74 | kahler_metric = complex_math.complex_hessian(tf.math.real(model(x)), x) 75 | volume_form = tf.math.real(tf.linalg.det(tf.matmul(restriction, tf.matmul(kahler_metric, restriction, adjoint_b=True)))) 76 | weights = mass / tf.reduce_sum(mass) 77 | factor = tf.reduce_sum(weights * volume_form / Omega_Omegabar) 78 | #factor = tf.constant(35.1774, dtype=tf.complex64) 79 | return volume_form / factor 80 | 81 | 82 | # now create a function that will be returned by this factory 83 | def f(params_1d): 84 | """A function that can be used by tfp.optimizer.lbfgs_minimize. 85 | 86 | This function is created by function_factory. 87 | 88 | Args: 89 | params_1d [in]: a 1D tf.Tensor. 90 | 91 | Returns: 92 | A scalar loss and the gradients w.r.t. the `params_1d`. 93 | """ 94 | 95 | # use GradientTape so that we can calculate the gradient of loss w.r.t. parameters 96 | for step, (points, Omega_Omegabar, mass, restriction) in enumerate(dataset): 97 | with tf.GradientTape() as tape: 98 | # update the parameters in the model 99 | assign_new_model_parameters(params_1d) 100 | # calculate the loss 101 | det_omega = volume_form(points, Omega_Omegabar, mass, restriction) 102 | loss_value = loss(Omega_Omegabar, det_omega, mass) 103 | 104 | # calculate gradients and convert to 1D tf.Tensor 105 | grads = tape.gradient(loss_value, model.trainable_variables) 106 | grads = tf.dynamic_stitch(idx, grads) 107 | 108 | # reweight the loss and grads 109 | mass_sum = tf.reduce_sum(mass) 110 | try: 111 | total_loss += loss_value * mass_sum 112 | total_grads += grads * mass_sum 113 | total_mass += mass_sum 114 | except NameError: 115 | total_loss = loss_value * mass_sum 116 | total_grads = grads * mass_sum 117 | total_mass = mass_sum 118 | 119 | total_loss = total_loss / total_mass 120 | total_grads = total_grads / total_mass 121 | 122 | # print out iteration & loss 123 | f.iter.assign_add(1) 124 | tf.print("Iter:", f.iter, "loss:", total_loss) 125 | 126 | # store loss value so we can retrieve later 127 | tf.py_function(f.history.append, inp=[total_loss], Tout=[]) 128 | 129 | return total_loss, total_grads 130 | 131 | # store these information as members so we can use them outside the scope 132 | f.iter = tf.Variable(0) 133 | f.idx = idx 134 | f.part = part 135 | f.shapes = shapes 136 | f.assign_new_model_parameters = assign_new_model_parameters 137 | f.history = [] 138 | 139 | return f 140 | -------------------------------------------------------------------------------- /src/MLGeometry/loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import keras 3 | 4 | __all__ = ['weighted_MAPE','weighted_MSE','max_error','MAPE_plus_max_error'] 5 | 6 | @keras.saving.register_keras_serializable(package="MLGeometry") 7 | def weighted_MAPE(y_true, y_pred, mass): 8 | weights = mass / tf.reduce_sum(mass) 9 | return tf.reduce_sum(tf.abs(y_true - y_pred) / y_true * weights) 10 | 11 | 12 | @keras.saving.register_keras_serializable(package="MLGeometry") 13 | def weighted_MSE(y_true, y_pred, mass): 14 | weights = mass / tf.reduce_sum(mass) 15 | return tf.reduce_sum(tf.square(y_pred / y_true - 1) * weights) 16 | 17 | 18 | @keras.saving.register_keras_serializable(package="MLGeometry") 19 | def max_error(y_true, y_pred, mass): 20 | return tf.math.reduce_max(tf.abs(y_true - y_pred) / y_true) 21 | 22 | 23 | @keras.saving.register_keras_serializable(package="MLGeometry") 24 | def MAPE_plus_max_error(y_true, y_pred, mass): 25 | return 1*max_error(y_true, y_pred, mass) + weighted_MAPE(y_true, y_pred, mass) 26 | 27 | -------------------------------------------------------------------------------- /src/MLGeometry/tf_dataset.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | __all__ = ['generate_dataset', 'dataset_on_patch'] 5 | 6 | def generate_dataset(patch): 7 | dataset = None 8 | if patch.patches == []: 9 | dataset = dataset_on_patch(patch) 10 | else: 11 | for subpatch in patch.patches: 12 | new_dataset = generate_dataset(subpatch) 13 | if dataset is None: 14 | dataset = new_dataset 15 | else: 16 | dataset = dataset.concatenate(new_dataset) 17 | return dataset 18 | 19 | def dataset_on_patch(patch): 20 | 21 | # To calculate the numerical tensors, one needs to invoke function set_k() first 22 | # to lambdify the sympy expression to generate the python functions used for different k. 23 | # However the full set of set_k() is too slow for large k. Here the minimum 24 | # required functions are invoked so that one does not need to invoke set_k(). 25 | patch.s_tf_1, patch.J_tf_1 = patch.num_s_J_tf(k=1) 26 | patch.omega_omegabar = patch.get_omega_omegabar(lambdify=True) 27 | patch.restriction = patch.get_restriction(lambdify=True) 28 | patch.r_tf = patch.num_restriction_tf() 29 | 30 | x = tf.convert_to_tensor(np.array(patch.points, dtype=np.complex64)) 31 | y = tf.cast(patch.num_Omega_Omegabar_tf(), dtype=tf.float32) 32 | 33 | mass = y / tf.cast(patch.num_FS_volume_form_tf('identity', k=1), dtype=tf.float32) 34 | 35 | # The Kahler metric calculated by complex_hessian includes the derivative of 36 | # the norm_coordinate. Here the restriction is linear transformed so that 37 | # the corresponding column and row will be ignored in the hessian. 38 | trans_mat = np.delete(np.identity(patch.n_dim), patch.norm_coordinate, axis=0) 39 | trans_tensor = tf.convert_to_tensor(np.array(trans_mat, dtype=np.complex64)) 40 | restriction = tf.matmul(patch.r_tf, trans_tensor) 41 | 42 | dataset = tf.data.Dataset.from_tensor_slices((x, y, mass, restriction)) 43 | 44 | return dataset 45 | 46 | -------------------------------------------------------------------------------- /training/README.md: -------------------------------------------------------------------------------- 1 | These are the training scripts. Put them in the root folder then run the .sh file 2 | -------------------------------------------------------------------------------- /training/bihomoNN_train.py: -------------------------------------------------------------------------------- 1 | import os 2 | #os.environ['CUDA_VISIBLE_DEVICES'] = '0' 3 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 4 | import sys 5 | sys.path.append("..") 6 | 7 | import tensorflow as tf 8 | import tensorflow_probability as tfp 9 | import numpy as np 10 | import sympy as sp 11 | import time 12 | import math 13 | import argparse 14 | 15 | import MLGeometry as mlg 16 | from models import * 17 | 18 | z0, z1, z2, z3, z4 = sp.symbols('z0, z1, z2, z3, z4') 19 | Z = [z0,z1,z2,z3,z4] 20 | 21 | parser = argparse.ArgumentParser() 22 | # Data generation 23 | parser.add_argument('--seed', type=int) 24 | parser.add_argument('--n_pairs', type=int) 25 | parser.add_argument('--batch_size', type=int) 26 | parser.add_argument('--function') 27 | parser.add_argument('--psi', type=float) 28 | parser.add_argument('--phi', type=float) 29 | parser.add_argument('--alpha', type=float) 30 | 31 | # Network 32 | parser.add_argument('--OuterProductNN_k', type=int) 33 | parser.add_argument('--layers') 34 | parser.add_argument('--k2_as_first_layer', action='store_true') 35 | parser.add_argument('--k4_as_first_layer', action='store_true') 36 | parser.add_argument('--load_model') 37 | parser.add_argument('--save_dir') 38 | parser.add_argument('--save_name') 39 | 40 | # Training 41 | parser.add_argument('--max_epochs', type=int) 42 | parser.add_argument('--loss_func') 43 | parser.add_argument('--clip_threshold', type=float) 44 | parser.add_argument('--optimizer', default='Adam') 45 | parser.add_argument('--learning_rate', type=float, default=0.001) 46 | parser.add_argument('--decay_rate', type=float, default=1.0) 47 | parser.add_argument('--num_correction_pairs', type=int, default=10) 48 | 49 | args = parser.parse_args() 50 | print("Processing model: " + args.save_name) 51 | # Data generation 52 | seed = args.seed 53 | n_pairs = args.n_pairs 54 | batch_size = args.batch_size 55 | psi = args.psi 56 | 57 | f = z0**5 + z1**5 + z2**5 + z3**5 + z4**5 + psi*z0*z1*z2*z3*z4 58 | if args.function == 'f1': 59 | phi = args.phi 60 | f = f + phi*(z3*z4**4 + z3**2*z4**3 + z3**3*z4**2 + z3**4*z4) 61 | elif args.function == 'f2': 62 | alpha = args.alpha 63 | f = f + alpha*(z2*z0**4 + z0*z4*z1**3 + z0*z2*z3*z4**2 + z3**2*z1**3 + z4*z1**2*z2**2 + z0*z1*z2*z3**2 + 64 | z2*z4*z3**3 + z0*z1**4 + z0*z4**2*z2**2 + z4**3*z1**2 + z0*z2*z3**3 + z3*z4*z0**3 + z1**3*z4**2 + 65 | z0*z2*z4*z1**2 + z1**2*z3**3 + z1*z4**4 + z1*z2*z0**3 + z2**2*z4**3 + z4*z2**4 + z1*z3**4) 66 | 67 | np.random.seed(seed) 68 | tf.random.set_seed(seed) 69 | HS = mlg.hypersurface.Hypersurface(Z, f, n_pairs) 70 | HS_test = mlg.hypersurface.Hypersurface(Z, f, n_pairs) 71 | 72 | train_set = mlg.tf_dataset.generate_dataset(HS) 73 | test_set = mlg.tf_dataset.generate_dataset(HS_test) 74 | 75 | #if batch_size is None or args.optimizer.lower() == 'lbfgs': 76 | if batch_size is None: 77 | batch_size = HS.n_points 78 | 79 | train_set = train_set.shuffle(HS.n_points).batch(batch_size) 80 | test_set = test_set.shuffle(HS_test.n_points).batch(batch_size) 81 | 82 | # Network 83 | if args.OuterProductNN_k is not None: 84 | k = args.OuterProductNN_k 85 | else: 86 | layers = args.layers 87 | n_units = layers.split('_') 88 | for i in range(0, len(n_units)): 89 | n_units[i] = int(n_units[i]) 90 | n_hidden = len(n_units) - 1 91 | if args.k2_as_first_layer is True: 92 | k = 2**(n_hidden+1) 93 | else: 94 | k = 2**n_hidden 95 | 96 | model_list_OuterProductNN = [OuterProductNN_k2, OuterProductNN_k3, OuterProductNN_k4] 97 | model_list_k2_as_first_layer = [k2_twolayers, k2_threelayers] 98 | model_list_k4_as_first_layer = [k4_onelayer, k4_twolayers] 99 | model_list = [zerolayer, onelayer, twolayers, threelayers, fourlayers, fivelayers] 100 | 101 | load_path = args.load_model 102 | if load_path is not None: 103 | model = keras.models.load_model(load_path, compile=False) 104 | elif args.OuterProductNN_k is not None: 105 | try: 106 | model = model_list_OuterProductNN[k-2]() 107 | except IndexError: 108 | print("Error: Only k = 2,3,4 are supported now") 109 | elif args.k2_as_first_layer: 110 | try: 111 | model = model_list_k2_as_first_layer[n_hidden-2](n_units) 112 | except IndexError: 113 | print("Error: Only two and three layers are supported") 114 | elif args.k4_as_first_layer: 115 | try: 116 | model = model_list_k4_as_first_layer[n_hidden-1](n_units) 117 | except IndexError: 118 | print("Error: Only one and two layers is supported") 119 | else: 120 | try: 121 | model = model_list[n_hidden](n_units) 122 | except IndexError: 123 | print("Error: Only k <= 32 is supported") 124 | 125 | 126 | max_epochs = args.max_epochs 127 | func_dict = {"weighted_MAPE": mlg.loss.weighted_MAPE, "weighted_MSE": mlg.loss.weighted_MSE, "max_error":mlg.loss.max_error, 128 | "MAPE_plus_max_error": mlg.loss.MAPE_plus_max_error} 129 | loss_func = func_dict[args.loss_func] 130 | #early_stopping = False 131 | clip_threshold = args.clip_threshold 132 | save_dir = args.save_dir 133 | if not os.path.exists(save_dir): 134 | os.makedirs(save_dir) 135 | save_name = args.save_name 136 | 137 | @tf.function 138 | def volume_form(x, Omega_Omegabar, mass, restriction): 139 | kahler_metric = mlg.complex_math.complex_hessian(tf.math.real(model(x)), x) 140 | volume_form = tf.math.real(tf.linalg.det(tf.matmul(restriction, tf.matmul(kahler_metric, restriction, adjoint_b=True)))) 141 | weights = mass / tf.reduce_sum(mass) 142 | factor = tf.reduce_sum(weights * volume_form / Omega_Omegabar) 143 | #factor = tf.constant(35.1774, dtype=tf.complex64) 144 | return volume_form / factor 145 | 146 | def cal_total_loss(dataset, loss_function): 147 | 148 | total_loss = tf.constant(0, dtype=tf.float32) 149 | total_mass= tf.constant(0, dtype=tf.float32) 150 | 151 | for step, (points, Omega_Omegabar, mass, restriction) in enumerate(dataset): 152 | det_omega = volume_form(points, Omega_Omegabar, mass, restriction) 153 | mass_sum = tf.reduce_sum(mass) 154 | total_loss += loss_function(Omega_Omegabar, det_omega, mass) * mass_sum 155 | total_mass += mass_sum 156 | total_loss = total_loss / total_mass 157 | 158 | return total_loss.numpy() 159 | 160 | def cal_max_error(dataset): 161 | ''' 162 | find max|eta - 1| over the whole dataset: calculate the error on each batch then compare. 163 | ''' 164 | max_error_tmp = 0 165 | for step, (points, Omega_Omegabar, mass, restriction) in enumerate(dataset): 166 | det_omega = volume_form(points, Omega_Omegabar, mass, restriction) 167 | error = mlg.loss.max_error(Omega_Omegabar, det_omega, mass).numpy() 168 | if error > max_error_tmp: 169 | max_error_tmp = error 170 | 171 | return max_error_tmp 172 | 173 | # Training 174 | start_time = time.time() 175 | if args.optimizer.lower() == 'lbfgs': 176 | # iter+1 everytime f is evoked, which will also be invoked when calculationg the hessian, etc 177 | # So the true max_epochs will be 3 times user's input 178 | max_epochs = int(max_epochs/3) 179 | train_func = mlg.lbfgs.function_factory(model, loss_func, train_set) 180 | 181 | init_params = tf.dynamic_stitch(train_func.idx, model.trainable_variables) 182 | results = tfp.optimizer.lbfgs_minimize(value_and_gradients_function=train_func, 183 | initial_position=init_params, 184 | max_iterations=max_epochs, 185 | num_correction_pairs=args.num_correction_pairs) 186 | train_func.assign_new_model_parameters(results.position) 187 | 188 | else: 189 | if args.optimizer.lower() == 'sgd': 190 | optimizer = keras.optimizers.SGD(args.learning_rate) 191 | else: 192 | lr_schedule = keras.optimizers.schedules.ExponentialDecay( 193 | initial_learning_rate=args.learning_rate, 194 | decay_steps = HS.n_points/batch_size, 195 | decay_rate = args.decay_rate) 196 | optimizer = keras.optimizers.Adam(learning_rate=lr_schedule) 197 | #optimizer = keras.optimizers.Adam(learning_rate=args.learning_rate) 198 | 199 | train_log_dir = save_dir + '/logs/' + save_name + '/train' 200 | test_log_dir = save_dir + '/logs/' + save_name + '/test' 201 | train_summary_writer = tf.summary.create_file_writer(train_log_dir) 202 | test_summary_writer = tf.summary.create_file_writer(test_log_dir) 203 | 204 | stop = False 205 | loss_old = 100000 206 | epoch = 0 207 | 208 | while epoch < max_epochs and stop is False: 209 | epoch = epoch + 1 210 | for step, (points, Omega_Omegabar, mass, restriction) in enumerate(train_set): 211 | with tf.GradientTape() as tape: 212 | 213 | det_omega = volume_form(points, Omega_Omegabar, mass, restriction) 214 | loss = loss_func(Omega_Omegabar, det_omega, mass) 215 | grads = tape.gradient(loss, model.trainable_weights) 216 | if clip_threshold is not None: 217 | grads = [tf.clip_by_value(grad, -clip_threshold, clip_threshold) for grad in grads] 218 | optimizer.apply_gradients(zip(grads, model.trainable_weights)) 219 | #tf.print(model.tranable_weights) 220 | #if step % 500 == 0: 221 | # print("step %d: loss = %.4f" % (step, loss)) 222 | if epoch % 10 == 0: 223 | sigma_max_train = cal_max_error(train_set) 224 | sigma_max_test = cal_max_error(test_set) 225 | 226 | E_train = cal_total_loss(train_set, mlg.loss.weighted_MSE) 227 | E_test = cal_total_loss(test_set, mlg.loss.weighted_MSE) 228 | 229 | sigma_train = cal_total_loss(train_set, mlg.loss.weighted_MAPE) 230 | sigma_test = cal_total_loss(test_set, mlg.loss.weighted_MAPE) 231 | 232 | def delta_sigma_square_train(y_true, y_pred, mass): 233 | weights = mass / tf.reduce_sum(mass) 234 | return tf.reduce_sum((tf.abs(y_true - y_pred) / y_true - sigma_train)**2 * weights) 235 | 236 | def delta_sigma_square_test(y_true, y_pred, mass): 237 | weights = mass / tf.reduce_sum(mass) 238 | return tf.reduce_sum((tf.abs(y_true - y_pred) / y_true - sigma_test)**2 * weights) 239 | 240 | delta_sigma_train = math.sqrt(cal_total_loss(train_set, delta_sigma_square_train) / HS.n_points) 241 | delta_sigma_test = math.sqrt(cal_total_loss(test_set, delta_sigma_square_test) / HS.n_points) 242 | 243 | print("train_loss:", loss.numpy()) 244 | print("test_loss:", cal_total_loss(test_set, loss_func)) 245 | 246 | with train_summary_writer.as_default(): 247 | tf.summary.scalar('max_error', sigma_max_train, step=epoch) 248 | tf.summary.scalar('delta_sigma', delta_sigma_train, step=epoch) 249 | tf.summary.scalar('E', E_train, step=epoch) 250 | tf.summary.scalar('sigma', sigma_train , step=epoch) 251 | 252 | with test_summary_writer.as_default(): 253 | tf.summary.scalar('max_error', sigma_max_test, step=epoch) 254 | tf.summary.scalar('delta_sigma', delta_sigma_test, step=epoch) 255 | tf.summary.scalar('E', E_test, step=epoch) 256 | tf.summary.scalar('sigma', sigma_test, step=epoch) # Early stopping 257 | 258 | # if early_stopping is True and epoch > 800: 259 | # if epoch % 5 == 0: 260 | # if train_loss > loss_old: 261 | # stop = True 262 | # loss_old = train_loss 263 | 264 | train_time = time.time() - start_time 265 | 266 | model.save(save_dir + '/' + save_name) 267 | 268 | sigma_train = cal_total_loss(train_set, mlg.loss.weighted_MAPE) 269 | sigma_test = cal_total_loss(test_set, mlg.loss.weighted_MAPE) 270 | E_train = cal_total_loss(train_set, mlg.loss.weighted_MSE) 271 | E_test = cal_total_loss(test_set, mlg.loss.weighted_MSE) 272 | sigma_max_train = cal_max_error(train_set) 273 | sigma_max_test = cal_max_error(test_set) 274 | 275 | ####################################################################### 276 | # Calculate delta_sigma 277 | 278 | def delta_sigma_square_train(y_true, y_pred, mass): 279 | weights = mass / tf.reduce_sum(mass) 280 | return tf.reduce_sum((tf.abs(y_true - y_pred) / y_true - sigma_train)**2 * weights) 281 | 282 | def delta_sigma_square_test(y_true, y_pred, mass): 283 | weights = mass / tf.reduce_sum(mass) 284 | return tf.reduce_sum((tf.abs(y_true - y_pred) / y_true - sigma_test)**2 * weights) 285 | 286 | def delta_E_square_train(y_true, y_pred, mass): 287 | weights = mass / tf.reduce_sum(mass) 288 | return tf.reduce_sum(((y_pred / y_true - 1)**2 - E_train)**2 * weights) 289 | 290 | def delta_E_square_test(y_true, y_pred, mass): 291 | weights = mass / tf.reduce_sum(mass) 292 | return tf.reduce_sum(((y_pred / y_true - 1)**2 - E_test)**2 * weights) 293 | 294 | delta_sigma_train = math.sqrt(cal_total_loss(train_set, delta_sigma_square_train) / HS.n_points) 295 | delta_sigma_test = math.sqrt(cal_total_loss(test_set, delta_sigma_square_test) / HS.n_points) 296 | delta_E_train = math.sqrt(cal_total_loss(train_set, delta_E_square_train) / HS.n_points) 297 | delta_E_test = math.sqrt(cal_total_loss(test_set, delta_E_square_test) / HS.n_points) 298 | 299 | #print(delta_sigma_train) 300 | #print(delta_sigma_test) 301 | 302 | ##################################################################### 303 | # Write to file 304 | 305 | with open(save_dir + save_name + ".txt", "w") as f: 306 | f.write('[Results] \n') 307 | f.write('model_name = {} \n'.format(save_name)) 308 | f.write('seed = {} \n'.format(seed)) 309 | f.write('n_pairs = {} \n'.format(n_pairs)) 310 | f.write('n_points = {} \n'.format(HS.n_points)) 311 | f.write('batch_size = {} \n'.format(batch_size)) 312 | f.write('function = {} \n'.format(args.function)) 313 | f.write('psi = {} \n'.format(psi)) 314 | if args.function == 'f1': 315 | f.write('phi = {} \n'.format(phi)) 316 | elif args.function == 'f2': 317 | f.write('alpha = {} \n'.format(alpha)) 318 | f.write('k = {} \n'.format(k)) 319 | f.write('n_parameters = {} \n'.format(model.count_params())) 320 | f.write('loss function = {} \n'.format(loss_func.__name__)) 321 | if clip_threshold is not None: 322 | f.write('clip_threshold = {} \n'.format(clip_threshold)) 323 | f.write('\n') 324 | f.write('n_epochs = {} \n'.format(max_epochs)) 325 | f.write('train_time = {:.6g} \n'.format(train_time)) 326 | f.write('sigma_train = {:.6g} \n'.format(sigma_train)) 327 | f.write('sigma_test = {:.6g} \n'.format(sigma_test)) 328 | f.write('delta_sigma_train = {:.6g} \n'.format(delta_sigma_train)) 329 | f.write('delta_sigma_test = {:.6g} \n'.format(delta_sigma_test)) 330 | f.write('E_train = {:.6g} \n'.format(E_train)) 331 | f.write('E_test = {:.6g} \n'.format(E_test)) 332 | f.write('delta_E_train = {:.6g} \n'.format(delta_E_train)) 333 | f.write('delta_E_test = {:.6g} \n'.format(delta_E_test)) 334 | f.write('sigma_max_train = {:.6g} \n'.format(sigma_max_train)) 335 | f.write('sigma_max_test = {:.6g} \n'.format(sigma_max_test)) 336 | 337 | with open(save_dir + "summary.txt", "a") as f: 338 | if args.function == 'f0': 339 | f.write('{} {} {} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g}\n'.format(save_name, args.function, psi, train_time, sigma_train, sigma_test, E_train, E_test, sigma_max_train, sigma_max_test)) 340 | elif args.function == 'f1': 341 | f.write('{} {} {} {} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g}\n'.format(save_name, args.function, psi, phi, train_time, sigma_train, sigma_test, E_train, E_test, sigma_max_train, sigma_max_test)) 342 | elif args.function == 'f2': 343 | f.write('{} {} {} {} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g} {:.6g}\n'.format(save_name, args.function, psi, alpha, train_time, sigma_train, sigma_test, E_train, E_test, sigma_max_train, sigma_max_test)) 344 | -------------------------------------------------------------------------------- /training/bihomoNN_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for psi in 0.5 ; do 3 | for loss_func in "weighted_MAPE" ; do 4 | for layers in "300_300_300_1"; do 5 | python bihomoNN_train.py --seed 1234 \ 6 | --n_pairs 100000\ 7 | --batch_size 5000\ 8 | --function "f0" \ 9 | --psi $psi \ 10 | --layers $layers \ 11 | --load_model "f0_psi${psi}/${layers}" \ 12 | --save_dir "experiments.yidi/train_curve/f0_psi${psi}/" \ 13 | --save_name "${layers}" \ 14 | --optimizer 'lbfgs'\ 15 | --learning_rate 0.001 \ 16 | --decay_rate 1 \ 17 | --max_epochs 1000\ 18 | --loss_func ${loss_func} 19 | done 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /training/models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from MLGeometry import bihomoNN as bnn 3 | 4 | __all__ = ['zerolayer', 'onelayer', 'twolayers', 'threelayers', 'fourlayers', 5 | 'fivelayers','OuterProductNN_k2','OuterProductNN_k3','OuterProductNN_k4', 6 | 'k2_twolayers', 'k2_threelayers','k4_onelayer','k4_twolayers'] 7 | 8 | class zerolayer(keras.Model): 9 | 10 | def __init__(self, n_units): 11 | super(zerolayer, self).__init__() 12 | self.bihomogeneous = bnn.Bihomogeneous() 13 | self.layer1 = bnn.WidthOneDense(25, 1) 14 | 15 | def call(self, inputs): 16 | x = self.bihomogeneous(inputs) 17 | x = self.layer1(x) 18 | x = tf.math.log(x) 19 | return x 20 | 21 | class onelayer(keras.Model): 22 | 23 | def __init__(self, n_units): 24 | super(onelayer, self).__init__() 25 | self.bihomogeneous = bnn.Bihomogeneous() 26 | self.layer1 = bnn.SquareDense(25, n_units[0], activation=tf.square) 27 | self.layer2 = bnn.SquareDense(n_units[0], 1) 28 | 29 | def call(self, inputs): 30 | x = self.bihomogeneous(inputs) 31 | x = self.layer1(x) 32 | x = self.layer2(x) 33 | x = tf.math.log(x) 34 | return x 35 | 36 | 37 | class twolayers(keras.Model): 38 | 39 | def __init__(self, n_units): 40 | super(twolayers, self).__init__() 41 | self.bihomogeneous = bnn.Bihomogeneous() 42 | self.layer1 = bnn.SquareDense(25, n_units[0], activation=tf.square) 43 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 44 | self.layer3 = bnn.SquareDense(n_units[1], 1) 45 | 46 | def call(self, inputs): 47 | x = self.bihomogeneous(inputs) 48 | x = self.layer1(x) 49 | x = self.layer2(x) 50 | x = self.layer3(x) 51 | x = tf.math.log(x) 52 | return x 53 | 54 | 55 | class threelayers(keras.Model): 56 | 57 | def __init__(self, n_units): 58 | super(threelayers, self).__init__() 59 | self.bihomogeneous = bnn.Bihomogeneous() 60 | self.layer1 = bnn.SquareDense(25, n_units[0], activation=tf.square) 61 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 62 | self.layer3 = bnn.SquareDense(n_units[1], n_units[2], activation=tf.square) 63 | self.layer4 = bnn.SquareDense(n_units[2], 1) 64 | 65 | def call(self, inputs): 66 | x = self.bihomogeneous(inputs) 67 | x = self.layer1(x) 68 | x = self.layer2(x) 69 | x = self.layer3(x) 70 | x = self.layer4(x) 71 | x = tf.math.log(x) 72 | return x 73 | 74 | 75 | class fourlayers(keras.Model): 76 | 77 | def __init__(self, n_units): 78 | super(fourlayers, self).__init__() 79 | self.bihomogeneous = bnn.Bihomogeneous() 80 | self.layer1 = bnn.SquareDense(25, n_units[0], activation=tf.square) 81 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 82 | self.layer3 = bnn.SquareDense(n_units[1], n_units[2], activation=tf.square) 83 | self.layer4 = bnn.SquareDense(n_units[2], n_units[3], activation=tf.square) 84 | self.layer5 = bnn.SquareDense(n_units[3], 1) 85 | 86 | def call(self, inputs): 87 | x = self.bihomogeneous(inputs) 88 | x = self.layer1(x) 89 | x = self.layer2(x) 90 | x = self.layer3(x) 91 | x = self.layer4(x) 92 | x = self.layer5(x) 93 | x = tf.math.log(x) 94 | return x 95 | 96 | 97 | class fivelayers(keras.Model): 98 | 99 | def __init__(self, n_units): 100 | super(fivelayers, self).__init__() 101 | self.bihomogeneous = bnn.Bihomogeneous() 102 | self.layer1 = bnn.SquareDense(25, n_units[0], activation=tf.square) 103 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 104 | self.layer3 = bnn.SquareDense(n_units[1], n_units[2], activation=tf.square) 105 | self.layer4 = bnn.SquareDense(n_units[2], n_units[3], activation=tf.square) 106 | self.layer5 = bnn.SquareDense(n_units[3], n_units[4], activation=tf.square) 107 | self.layer6 = bnn.SquareDense(n_units[4], 1) 108 | 109 | def call(self, inputs): 110 | x = self.bihomogeneous(inputs) 111 | x = self.layer1(x) 112 | x = self.layer2(x) 113 | x = self.layer3(x) 114 | x = self.layer4(x) 115 | x = self.layer5(x) 116 | x = self.layer6(x) 117 | x = tf.math.log(x) 118 | return x 119 | 120 | class OuterProductNN_k2(keras.Model): 121 | 122 | def __init__(self): 123 | super(OuterProductNN_k2, self).__init__() 124 | self.bihomogeneous_k2 = bnn.Bihomogeneous_k2() 125 | self.layer1 = bnn.WidthOneDense(15**2, 1) 126 | 127 | def call(self, inputs): 128 | x = self.bihomogeneous_k2(inputs) 129 | x = self.layer1(x) 130 | x = tf.math.log(x) 131 | return x 132 | 133 | 134 | class OuterProductNN_k3(keras.Model): 135 | 136 | def __init__(self): 137 | super(OuterProductNN_k3, self).__init__() 138 | self.bihomogeneous_k3 = bnn.Bihomogeneous_k3() 139 | self.layer1 = bnn.WidthOneDense(35**2, 1) 140 | 141 | def call(self, inputs): 142 | x = self.bihomogeneous_k3(inputs) 143 | x = self.layer1(x) 144 | x = tf.math.log(x) 145 | return x 146 | 147 | class OuterProductNN_k4(keras.Model): 148 | 149 | def __init__(self): 150 | super(OuterProductNN_k4, self).__init__() 151 | self.bihomogeneous_k4 = bnn.Bihomogeneous_k4() 152 | self.layer1 = bnn.WidthOneDense(70**2, 1) 153 | 154 | def call(self, inputs): 155 | with tf.device('/cpu:0'): 156 | x = self.bihomogeneous_k4(inputs) 157 | with tf.device('/gpu:0'): 158 | x = self.layer1(x) 159 | x = tf.math.log(x) 160 | return x 161 | 162 | class k2_twolayers(keras.Model): 163 | 164 | def __init__(self, n_units): 165 | super(k2_twolayers, self).__init__() 166 | self.bihomogeneous_k2 = bnn.Bihomogeneous_k2() 167 | self.layer1 = bnn.SquareDense(15**2, n_units[0], activation=tf.square) 168 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 169 | self.layer3 = bnn.SquareDense(n_units[1], 1) 170 | 171 | def call(self, inputs): 172 | x = self.bihomogeneous_k2(inputs) 173 | x = self.layer1(x) 174 | x = self.layer2(x) 175 | x = self.layer3(x) 176 | x = tf.math.log(x) 177 | return x 178 | 179 | 180 | class k2_threelayers(keras.Model): 181 | 182 | def __init__(self, n_units): 183 | super(k2_threelayers, self).__init__() 184 | self.bihomogeneous_k2 = bnn.Bihomogeneous_k2() 185 | self.layer1 = bnn.SquareDense(15**2, n_units[0], activation=tf.square) 186 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 187 | self.layer3 = bnn.SquareDense(n_units[1], n_units[2], activation=tf.square) 188 | self.layer4 = bnn.SquareDense(n_units[2], 1) 189 | 190 | def call(self, inputs): 191 | x = self.bihomogeneous_k2(inputs) 192 | x = self.layer1(x) 193 | x = self.layer2(x) 194 | x = self.layer3(x) 195 | x = self.layer4(x) 196 | x = tf.math.log(x) 197 | return x 198 | 199 | class k4_onelayer(keras.Model): 200 | 201 | def __init__(self, n_units): 202 | super(k4_onelayer, self).__init__() 203 | self.bihomogeneous_k4 = bnn.Bihomogeneous_k4() 204 | self.layer1 = bnn.SquareDense(70**2, n_units[0], activation=tf.square) 205 | self.layer2 = bnn.SquareDense(n_units[0], 1) 206 | 207 | def call(self, inputs): 208 | x = self.bihomogeneous_k4(inputs) 209 | x = self.layer1(x) 210 | x = self.layer2(x) 211 | x = tf.math.log(x) 212 | return x 213 | 214 | class k4_twolayers(keras.Model): 215 | 216 | def __init__(self, n_units): 217 | super(k4_twolayers, self).__init__() 218 | self.bihomogeneous_k4 = bnn.Bihomogeneous_k4() 219 | self.layer1 = bnn.SquareDense(70**2, n_units[0], activation=tf.square) 220 | self.layer2 = bnn.SquareDense(n_units[0], n_units[1], activation=tf.square) 221 | self.layer3 = bnn.SquareDense(n_units[1], 1) 222 | 223 | def call(self, inputs): 224 | x = self.bihomogeneous_k4(inputs) 225 | x = self.layer1(x) 226 | x = self.layer2(x) 227 | x = self.layer3(x) 228 | x = tf.math.log(x) 229 | return x 230 | 231 | --------------------------------------------------------------------------------