├── README.md └── codes ├── Ising_2D ├── assets │ ├── Z.png │ ├── Z2.png │ ├── gen1.png │ ├── gen2.png │ ├── gen3.png │ ├── gen4.png │ ├── gradients.png │ ├── image-20190430141119642.png │ ├── image-20190430143433180.png │ ├── image-20190430144239877.png │ ├── image-20190430144449213.png │ ├── image-20190430145634644.png │ ├── image-20190430150813001.png │ ├── image-20190430152031663.png │ ├── init.png │ ├── init2.png │ └── px.png ├── kacward.py └── tensor_contraction_simple.ipynb ├── MPS_Born_machine ├── .ipynb_checkpoints │ └── mps_tutorial-checkpoint.ipynb ├── assets │ ├── Z.png │ ├── Z2.png │ ├── gen1.png │ ├── gen2.png │ ├── gen3.png │ ├── gen4.png │ ├── gradients.png │ ├── image-20190430141119642.png │ ├── image-20190430143433180.png │ ├── image-20190430144239877.png │ ├── image-20190430144449213.png │ ├── image-20190430145634644.png │ ├── image-20190430150813001.png │ ├── image-20190430152031663.png │ ├── init.png │ ├── init2.png │ └── px.png ├── imgs │ ├── L.png │ ├── L0.png │ ├── Z.png │ ├── born_machine.png │ ├── cond_prob.png │ ├── cp.png │ ├── joint_prob.png │ ├── mnist_mps.png │ ├── mps.png │ ├── mps1.png │ ├── mps2.png │ ├── mps_left.png │ ├── psi_prime.png │ ├── rank_one.png │ ├── recon0.png │ ├── recon1.png │ ├── supunsup.png │ ├── tensor.png │ ├── tensor_diagram.png │ ├── tensor_networks.png │ ├── training.png │ ├── tucker.png │ └── two_qubits.png ├── mnist784_bin_1000.npy └── mps_tutorial.ipynb ├── VAE ├── .ipynb_checkpoints │ └── REINFORCE_VS_Reparametrization-checkpoint.ipynb └── REINFORCE_VS_Reparametrization.ipynb ├── VAN ├── .ipynb_checkpoints │ └── SK_variational-checkpoint.ipynb └── SK_variational.ipynb └── neural_networks ├── .ipynb_checkpoints ├── MLP_sin-checkpoint.ipynb ├── grad_matrix_products-checkpoint.ipynb ├── logistic_regression-checkpoint.ipynb ├── lr_mlp_conv-checkpoint.ipynb └── tanh-checkpoint.ipynb ├── MLP_sin.ipynb ├── grad_matrix_products.ipynb ├── logistic_regression.ipynb ├── lr_mlp_conv.ipynb └── tanh.ipynb /README.md: -------------------------------------------------------------------------------- 1 | This repo contains demo codes for my lectures in 2 | * Lectures at the ITP, CAS http://www.itp.cas.cn/xshd/yjsgjkc/201903/t20190311_5253828.html 3 | * Lectures at summer school of *The fifth conference on statistical physics and complex systems* http://spcsc2019.ustc.edu.cn/summerschool2019 4 | -------------------------------------------------------------------------------- /codes/Ising_2D/assets/Z.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/Z.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/Z2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/Z2.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/gen1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/gen1.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/gen2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/gen2.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/gen3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/gen3.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/gen4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/gen4.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/gradients.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/gradients.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430141119642.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430141119642.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430143433180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430143433180.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430144239877.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430144239877.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430144449213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430144449213.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430145634644.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430145634644.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430150813001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430150813001.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/image-20190430152031663.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/image-20190430152031663.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/init.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/init2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/init2.png -------------------------------------------------------------------------------- /codes/Ising_2D/assets/px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/panzhang83/lectures/723042252171ab9d252c4921bd199f97910f8dce/codes/Ising_2D/assets/px.png -------------------------------------------------------------------------------- /codes/Ising_2D/kacward.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | Kac-Ward exact Ising 5 | See Theorem 1 of https://arxiv.org/abs/1011.3494 6 | ''' 7 | 8 | phi = np.array([[0., np.pi/2, -np.pi/2, np.nan ], 9 | [-np.pi/2, 0.0, np.nan, np.pi/2], 10 | [np.pi/2, np.nan, 0.0, -np.pi/2], 11 | [np.nan, -np.pi/2, np.pi/2, 0] 12 | ]) 13 | 14 | def logcosh(x): 15 | xp = np.abs(x) 16 | if (xp< 12): 17 | return np.log( np.cosh(x) ) 18 | else: 19 | return xp - np.log(2.) 20 | 21 | def neighborsite(i, n, L): 22 | """ 23 | The coordinate system is geometrically left->right, down -> up 24 | y| 25 | | 26 | | 27 | |________ x 28 | (0,0) 29 | So as a definition, l means x-1, r means x+1, u means y+1, and d means y-1 30 | """ 31 | x = i%L 32 | y = i//L # y denotes 33 | site = None 34 | # ludr : 35 | if (n==0): 36 | if (x-1>=0): 37 | site = (x-1) + y*L 38 | elif (n==1): 39 | if (y+1=0): 43 | site = x + (y-1)*L 44 | elif (n==3): 45 | if (x+1" 149 | ] 150 | }, 151 | "execution_count": 3, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | }, 155 | { 156 | "data": { 157 | "image/png": "\n", 158 | "text/plain": [ 159 | "
" 160 | ] 161 | }, 162 | "metadata": {}, 163 | "output_type": "display_data" 164 | } 165 | ], 166 | "source": [ 167 | "%matplotlib inline\n", 168 | "import matplotlib.pyplot as plt\n", 169 | "\n", 170 | "plt.plot(np.log10(Ns),vars1)\n", 171 | "plt.plot(np.log10(Ns),vars3)\n", 172 | "plt.plot(np.log10(Ns),vars2)\n", 173 | "\n", 174 | "#plt.plot(vars1)\n", 175 | "#plt.plot(vars2)\n", 176 | "plt.legend(['REINFORCE', 'REINFORCE+Variance reduction (mean)','Reparametrization'])" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Variance of the estimates using reparameterization trick is one order of magnitude smaller than the estimates from the first method!" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python 3", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.6.8" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 1 208 | } 209 | -------------------------------------------------------------------------------- /codes/VAE/REINFORCE_VS_Reparametrization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Reparameterization Trick\n", 8 | "Following https://gokererdogan.github.io/2016/07/01/reparameterization-trick/" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "Considering a normal distribution with mean $\\mu$ and unit variance, $q_{\\mu}(x) = N(\\mu,1)$, and we want to adjust $\\theta$ for optimizing the second moment\n", 16 | "\n", 17 | "$\n", 18 | "\\hat\\mu=\\arg\\min_{\\mu} \\mathcal L(\\mu)=\\arg\\min_{\\mu}\\mathbb E_q[x^2]\n", 19 | "$\n", 20 | "\n", 21 | "Apparently, analytically we have $\\nabla_\\mu\\mathcal L=\\nabla_\\mu(\\mu^2+1)=2\\mu$.\n", 22 | "\n", 23 | "But we want to compare numerical computation of this quantity using both REINFORCE and the Reparametrization method.\n", 24 | "\n", 25 | "For the REINFORMCE algorithm, we have \n", 26 | "\n", 27 | "$\\nabla_\\mu\\mathcal L=\\mathbb E_q(x^2\\nabla_\\mu\\log q(x))=\\mathbb E_q(x^2(x-\\mu))=\\mathbb E_q(x^3-x^2\\mu))=\\mu^3+3\\mu-\\mu(\\mu^2+1)=2\\mu$\n", 28 | "\n", 29 | "One way to keep the expectation of gradients, while reduce the variance of the gradients is add a *baseline* which is not a function of $x$. A very simple approach is use the mean such that\n", 30 | "\n", 31 | "$\\nabla_\\mu\\mathcal L=\\mathbb E_q([x^2-\\mathbb E_q(x^2)]\\nabla_\\mu\\log q(x))=\\mathbb E_q([x^2-\\mathbb E_q(x^2)](x-\\mu))=2\\mu$\n", 32 | "\n", 33 | "For the reparametrization,\n", 34 | "$\n", 35 | "x = \\mu + \\epsilon, \\quad \\epsilon \\sim \\mathcal{N}(0,1)\n", 36 | "$\n", 37 | "\n", 38 | "Then we have\n", 39 | "$\\nabla_\\mu\\mathcal L=\\nabla_\\mu\\mathbb E_{\\mathcal N(0,1)} (\\mu+\\epsilon)^2=\\mathbb E_{\\mathcal N(0,1)}\\nabla_\\mu (\\mu+\\epsilon)^2=\\mathbb E_{\\mathcal N(0,1)}[2(\\mu+\\epsilon)]=2\\mu$\n", 40 | "\n", 41 | "Let have some impression on the gradient estimate given by three methods by comparing them on a single instance:" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 9, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "grad of REINFORCE= 4.097396172279478\n", 54 | "grad of REINFORCE + baseline= 4.01904525710015\n", 55 | "grad of Reparametrization = 4.031061372628896\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "import numpy as np\n", 61 | "N = 1000\n", 62 | "theta = 2.0\n", 63 | "eps = np.random.randn(N)\n", 64 | "x = theta + eps\n", 65 | "\n", 66 | "grad1 = lambda x: np.sum(np.square(x)*(x-theta)) / x.size\n", 67 | "grad2 = lambda eps: np.sum(2*(theta + eps)) / x.size\n", 68 | "grad3 = lambda x: np.sum((np.square(x)-np.sum(np.square(x))/x.size)*(x-theta)) / x.size\n", 69 | "\n", 70 | "print(\"grad of REINFORCE=\",grad1(x))\n", 71 | "print(\"grad of REINFORCE + baseline=\",grad3(x))\n", 72 | "print(\"grad of Reparametrization = \",grad2(eps))\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Let us check the variance for different sample sizes." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 8, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "[4.55637839 4.10093377 4.03974593 3.98094517 4.00103464]\n", 92 | "[4.11529199 4.02050707 3.99739092 3.99746569 3.99969491]\n", 93 | "[3.89359207 3.98636754 4.01466151 3.99127621 4.00268074]\n", 94 | "[1.54686336e+01 9.41211946e-01 1.28511151e-01 8.85848125e-03\n", 95 | " 8.06813959e-04]\n", 96 | "[3.18476310e-01 3.68889932e-02 3.64861942e-03 2.92998741e-04\n", 97 | " 2.77811917e-05]\n", 98 | "[7.63648618e+00 4.21657403e-01 5.56574377e-02 3.59091375e-03\n", 99 | " 3.74900223e-04]\n" 100 | ] 101 | } 102 | ], 103 | "source": [ 104 | "Ns = [10, 100, 1000, 10000, 100000]\n", 105 | "reps = 100\n", 106 | "\n", 107 | "means1 = np.zeros(len(Ns))\n", 108 | "vars1 = np.zeros(len(Ns))\n", 109 | "means2 = np.zeros(len(Ns))\n", 110 | "means3 = np.zeros(len(Ns))\n", 111 | "vars2 = np.zeros(len(Ns))\n", 112 | "vars3 = np.zeros(len(Ns))\n", 113 | "\n", 114 | "est1 = np.zeros(reps)\n", 115 | "est2 = np.zeros(reps)\n", 116 | "est3 = np.zeros(reps)\n", 117 | "for i, N in enumerate(Ns):\n", 118 | " for r in range(reps):\n", 119 | " x = np.random.randn(N) + theta\n", 120 | " est1[r] = grad1(x)\n", 121 | " est3[r] = grad3(x)\n", 122 | " eps = np.random.randn(N)\n", 123 | " est2[r] = grad2(eps)\n", 124 | " means1[i] = np.mean(est1)\n", 125 | " means2[i] = np.mean(est2)\n", 126 | " means3[i] = np.mean(est3)\n", 127 | " vars1[i] = np.var(est1)\n", 128 | " vars2[i] = np.var(est2)\n", 129 | " vars3[i] = np.var(est3)\n", 130 | " \n", 131 | "print(means1)\n", 132 | "print(means2)\n", 133 | "print(means3)\n", 134 | "print\n", 135 | "print (vars1)\n", 136 | "print (vars2)\n", 137 | "print (vars3)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 3, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "" 149 | ] 150 | }, 151 | "execution_count": 3, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | }, 155 | { 156 | "data": { 157 | "image/png": "\n", 158 | "text/plain": [ 159 | "
" 160 | ] 161 | }, 162 | "metadata": {}, 163 | "output_type": "display_data" 164 | } 165 | ], 166 | "source": [ 167 | "%matplotlib inline\n", 168 | "import matplotlib.pyplot as plt\n", 169 | "\n", 170 | "plt.plot(np.log10(Ns),vars1)\n", 171 | "plt.plot(np.log10(Ns),vars3)\n", 172 | "plt.plot(np.log10(Ns),vars2)\n", 173 | "\n", 174 | "#plt.plot(vars1)\n", 175 | "#plt.plot(vars2)\n", 176 | "plt.legend(['REINFORCE', 'REINFORCE+Variance reduction (mean)','Reparametrization'])" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Variance of the estimates using reparameterization trick is one order of magnitude smaller than the estimates from the first method!" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python 3", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.6.8" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 1 208 | } 209 | -------------------------------------------------------------------------------- /codes/VAN/.ipynb_checkpoints/SK_variational-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Variational methods for Sherrington-Kirkpatrick model\n", 8 | "Pan Zhang\n", 9 | "Institute of Theoretical Physics, Chinese Academy of Sciences\n", 10 | "\n", 11 | "We compuare variational free energy given by the variational mean-field method and the Variational Autoregressive Network (Physical Review Letters 122, 080602), for the Sherrington-Kirkpatrick spin glass model." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## SK model\n", 19 | "$n$ denotes number of variables, $J_{ij}=\\frac{1}{\\sqrt{n}}\\mathcal N(0,1)$ is the couplings matrix and $\\beta$ is the inverse temperature. One needs to notice that diagonal terms of the couplings matrix must be zero, i.e.\n", 20 | "$J_{ii}=0$, and the coupling matrix $\\mathbf J$ is symmetic.\n", 21 | "\n", 22 | "The following code generates a small instance of the SK model" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 203, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import torch,math\n", 32 | "import numpy as np\n", 33 | "from torch import nn\n", 34 | "from scipy.special import logsumexp\n", 35 | "import sys\n", 36 | "device=torch.device('cpu')\n", 37 | "device=torch.device('cuda:0')\n", 38 | "n=20 # number of spins\n", 39 | "beta=0.5 # inverse temperature\n", 40 | "seed=1\n", 41 | "torch.manual_seed(seed)\n", 42 | "J=torch.randn(n,n,device=device)/math.sqrt(n)\n", 43 | "J = torch.triu(J,diagonal=1) # take the upper triangular matrix\n", 44 | "J = J+J.t() # make the coupling matrix symmetric\n", 45 | "J_np = J.cpu().numpy()\n", 46 | "J.requires_grad=False" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Exact enumerations for small systems\n", 54 | "Whent the number of spins is small, e.g. $n\\leq 20$, we can compute exactly the free energy, energy and entropy by enumerating all $2^n$ configurations." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 204, 60 | "metadata": { 61 | "scrolled": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "def cfg_id_to_sample(cfg_id):\n", 66 | " return np.array( [((cfg_id >> i) & 1) * 2 - 1 for i in range(n-1,-1,-1)])\n", 67 | "\n", 68 | "def list_energy(print_step=float('inf')):\n", 69 | " samples=[]\n", 70 | " energy_arr = []\n", 71 | " for cfg_id in range(1 << n):\n", 72 | " if (cfg_id + 1) % print_step == 0:\n", 73 | " sys.stdout.write(\"\\rEnumerating all configurations: %d / 100\"%(int(cfg_id /(1 << n)*100)+1))\n", 74 | " sample = cfg_id_to_sample(cfg_id)\n", 75 | " energy_arr.append(sample.dot(J_np).dot(sample)/2.0)\n", 76 | " samples.append(sample)\n", 77 | " \n", 78 | " cfg_id_arr = np.arange(1 << (n), dtype=int)\n", 79 | " energy_arr = np.array(energy_arr)\n", 80 | " energy_arr *= -1.0\n", 81 | " samples = np.array(samples)\n", 82 | "\n", 83 | " return cfg_id_arr, energy_arr\n", 84 | " \n", 85 | "def f_exact():\n", 86 | " if(n>20):\n", 87 | " return 0,0,0\n", 88 | " step=int((1<> i) & 1) * 2 - 1 for i in range(n-1,-1,-1)])\n", 67 | "\n", 68 | "def list_energy(print_step=float('inf')):\n", 69 | " samples=[]\n", 70 | " energy_arr = []\n", 71 | " for cfg_id in range(1 << n):\n", 72 | " if (cfg_id + 1) % print_step == 0:\n", 73 | " sys.stdout.write(\"\\rEnumerating all configurations: %d / 100\"%(int(cfg_id /(1 << n)*100)+1))\n", 74 | " sample = cfg_id_to_sample(cfg_id)\n", 75 | " energy_arr.append(sample.dot(J_np).dot(sample)/2.0)\n", 76 | " samples.append(sample)\n", 77 | " \n", 78 | " cfg_id_arr = np.arange(1 << (n), dtype=int)\n", 79 | " energy_arr = np.array(energy_arr)\n", 80 | " energy_arr *= -1.0\n", 81 | " samples = np.array(samples)\n", 82 | "\n", 83 | " return cfg_id_arr, energy_arr\n", 84 | " \n", 85 | "def f_exact():\n", 86 | " if(n>20):\n", 87 | " return 0,0,0\n", 88 | " step=int((1<)\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "lnZ = 0.0\n", 55 | "M = T.clone() # M will be the converged envioment tensor\n", 56 | "print(math.log(M.norm()))\n", 57 | "Niter = 20 # number of iterations in RG. That is, 2^Niter matrices will be contracted finally.\n", 58 | "for i in range(Niter): # after i steps, there are totally 2^i matrices contracted.\n", 59 | " s = M.norm() # This is the normalization of a matrix contracted of 2^i T.\n", 60 | " lnZ = lnZ + torch.log(s)/2**i # Notice that we can only record a density of logarithm of the results, for contraction of infinite matrices.\n", 61 | " M = M/s\n", 62 | " M = M@M\n", 63 | "lnZ = lnZ + torch.trace(M)/(2**Niter) # trace(M) is the trace of contraction of all tensors.\n", 64 | "print(lnZ)\n", 65 | "lnZ.backward()\n", 66 | "RG_grad = T.grad.clone()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Autograd of the leading eigenvalue of the transfer matrix\n", 74 | "In the last section, we have evaluated $\\ln Z$ using RG. Actually, this can be evaluated analytically as\n", 75 | "$$\\ln Z =\n", 76 | "\\frac{1}{t}\\lim_{t\\to\\infty}\\ln \\mathrm{tr}(T^t)=\\frac{1}{t}\\lim_{t\\to\\infty}\\ln\\sum_{i=1}^N \\lambda_i^t=\n", 77 | "\\ln\\lambda_\\mathrm{max},$$\n", 78 | "where $\\lambda_{\\mathrm{max}}$ is the leading eigenvalue. Thus we can do back propagation directly on $\\lambda_{\\mathrm{max}}$." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 6, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "w, v = torch.symeig(T, eigenvectors=True) # W is an arry of eigenvalues, v is a matrix with each row storing an eigenvector\n", 88 | "T.grad.zero_()\n", 89 | "lambda_max = torch.log(w[-1])\n", 90 | "lambda_max.backward()\n", 91 | "eigenvalue_grad = ((T.grad + T.grad.t())/2) # need to symmetrize since it is an upper triangular matrix" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Using cavity of the matrix product\n", 99 | "Using the property of the environment matrix $M$, we have $Z=\\mathrm{tr}\\left[\\underbrace{T\\times T \\times \\cdots \\times T}_{\\mathrm{\\infty}}\\right]=\\mathrm{Tr}(M\\times T \\times M)$. Then the gradient with respect to a specific $T$ can be written as\n", 100 | "$$\\nabla_T \\ln Z = \\frac{M\\times M}{\\mathrm{tr}(M\\times T\\times M)}.$$\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 46, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "impurity_grad = (M@M).t()/torch.trace(M@T@M)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Autograd of $\\ln Z$ using vector environment\n", 117 | "Here comes the trick: Infinite tensor product with a Periodic Boundary Condition (PBC) is equivalent to that with a Open Boundary Condition (OBC), which is written as\n", 118 | "$\\ln Z=\\ln \\left[u\\times\\underbrace{T\\times T \\times \\cdots \\times T}_{\\mathrm{\\infty}}\\times u\\right]=\\ln(m\\times T\\times m)$, where $m$ is the environment for the OBC tensor product, a vector. Obviously, $m$ is the leading eigenvector of the transfer matrix $T$ hence is normalized, and the gradient can be written as $$\\nabla_T\\ln Z=\\exp(-\\lambda_\\mathrm{max}) m\\otimes m.$$" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 47, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "v=v[:,-1] # the leading eigenvector of the transfer matrix $T$\n", 128 | "eigenvector_grad=v[:,None]@v[None,:]/torch.exp(lambda_max) # outer product of the leading eigenvector and its transpose" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Comparing these gradients" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 48, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "2.168404344971009e-18\n", 148 | "3.469446951953614e-18\n", 149 | "4.336808689942018e-18\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "# comparing RG_grad, eigenvector_grad, eigenvalue_grad and impurity_grad\n", 155 | "print ((impurity_grad-RG_grad).abs().max().item())\n", 156 | "print ((impurity_grad-eigenvalue_grad).abs().max().item())\n", 157 | "print ((impurity_grad-eigenvector_grad).abs().max().item())" 158 | ] 159 | } 160 | ], 161 | "metadata": { 162 | "kernelspec": { 163 | "display_name": "Python 3", 164 | "language": "python", 165 | "name": "python3" 166 | }, 167 | "language_info": { 168 | "codemirror_mode": { 169 | "name": "ipython", 170 | "version": 3 171 | }, 172 | "file_extension": ".py", 173 | "mimetype": "text/x-python", 174 | "name": "python", 175 | "nbconvert_exporter": "python", 176 | "pygments_lexer": "ipython3", 177 | "version": "3.6.8" 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } 183 | -------------------------------------------------------------------------------- /codes/neural_networks/.ipynb_checkpoints/logistic_regression-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "import torch\n", 11 | "import torchvision\n", 12 | "import torch.nn as nn\n", 13 | "import torch.nn.functional as F\n", 14 | "import torch.optim as optim\n", 15 | "from torchvision import datasets, transforms\n", 16 | "%matplotlib inline\n", 17 | "import matplotlib.pyplot as plt" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "def show_imgs(imgs,l1=4,l2=5,s1=6,s2=6):\n", 27 | " \"\"\" Plot images \"\"\"\n", 28 | " plt.rcParams['figure.figsize']=(s1,s2)\n", 29 | " imgs=imgs.cpu().reshape([-1,28,28])\n", 30 | " g, ax = plt.subplots(l1,l2)\n", 31 | " for i in range(l1): \n", 32 | " for j in range(l2):\n", 33 | " a=i*l2+j\n", 34 | " if(a>=imgs.shape[0]):\n", 35 | " break\n", 36 | " ax[i][j].imshow(imgs[a,:,:],cmap='summer')\n", 37 | " ax[i][j].set_xticks([])\n", 38 | " ax[i][j].set_yticks([])\n", 39 | " plt.show()\n", 40 | "#show_imgs(data,2,10,10,2)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "scrolled": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "batch_size=100\n", 52 | "test_batch_size=100\n", 53 | "epochs=1000\n", 54 | "device=torch.device(\"cpu\")\n", 55 | "#device=torch.device(\"cuda:0\")\n", 56 | "kwargs = {'num_workers': 1, 'pin_memory': True} if device != torch.device('cpu') else {}\n", 57 | "train_loader = torch.utils.data.DataLoader( \n", 58 | " datasets.MNIST('../../data', train=True,download=True, \n", 59 | " transform=transforms.Compose([ transforms.ToTensor(),\n", 60 | " transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size,\n", 61 | " shuffle=False, **kwargs)\n", 62 | "test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False,\n", 63 | " transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,),\n", 64 | " (0.3081,)) ])), batch_size=test_batch_size, shuffle=False, **kwargs)\n", 65 | "train_data=[]\n", 66 | "test_data=[]\n", 67 | "for batch_idx, (data, target) in enumerate(train_loader):\n", 68 | " data, target = data.to(device), target.to(device)\n", 69 | " train_data.append((data,target))\n", 70 | "for batch_idx, (data, target) in enumerate(test_loader):\n", 71 | " data, target = data.to(device), target.to(device)\n", 72 | " test_data.append((data,target))\n", 73 | "#print(train_data[0])\n", 74 | "\n", 75 | "\n", 76 | " " 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "show_imgs(train_data[0][0],2,10,10,2)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "nin=784\n", 95 | "nout=10\n", 96 | "W=torch.randn(nin,nout,device=device) \n", 97 | "W.requires_grad=True\n", 98 | "bias = torch.randn(nout,device=device)\n", 99 | "bias.requres_grad=True\n", 100 | "print(train_data[0][0].shape)\n", 101 | "lr=0.001\n", 102 | "optimizer = torch.optim.Adam([W,bias], lr=lr, betas=(0.9, 0.999))\n", 103 | "print_bin = epochs/100\n", 104 | "for epoch in range(1, epochs + 1):\n", 105 | " optimizer.zero_grad()\n", 106 | " t1=time.time()\n", 107 | " for batch_idx, (data, target) in enumerate(train_data):\n", 108 | "# loss = F.cross_entropy(data.view(data.shape[0],-1)@W+bias.expand(data.shape[0],nout),target)\n", 109 | " output=F.log_softmax(data.view(data.shape[0],-1)@W+bias.expand(data.shape[0],nout), dim=1)\n", 110 | " loss = F.nll_loss(output, target)\n", 111 | " loss.backward()\n", 112 | " optimizer.step()\n", 113 | " test_loss = 0\n", 114 | " correct = 0\n", 115 | " for data, target in test_data:\n", 116 | " output=F.log_softmax(data.view(data.shape[0],-1)@W+bias.expand(data.shape[0],nout), dim=1)\n", 117 | " test_loss += F.nll_loss(output, target, reduction='sum').item()\n", 118 | " pred = output.argmax(dim=1, keepdim=True)\n", 119 | " correct += pred.eq(target.view_as(pred)).sum().item()\n", 120 | " test_loss /= 10000\n", 121 | " correct /= 10000\n", 122 | " if(epoch % print_bin==0):\n", 123 | " print(\"#%d loss=%.3f test_loss=%.3f accuracy=%.3f time=%.3f\"%(epoch,loss,test_loss,correct,time.time()-t1))\n", 124 | " " 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [] 133 | } 134 | ], 135 | "metadata": { 136 | "kernelspec": { 137 | "display_name": "Python 3", 138 | "language": "python", 139 | "name": "python3" 140 | }, 141 | "language_info": { 142 | "codemirror_mode": { 143 | "name": "ipython", 144 | "version": 3 145 | }, 146 | "file_extension": ".py", 147 | "mimetype": "text/x-python", 148 | "name": "python", 149 | "nbconvert_exporter": "python", 150 | "pygments_lexer": "ipython3", 151 | "version": "3.6.8" 152 | } 153 | }, 154 | "nbformat": 4, 155 | "nbformat_minor": 2 156 | } 157 | -------------------------------------------------------------------------------- /codes/neural_networks/.ipynb_checkpoints/lr_mlp_conv-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "" 9 | } 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import time\n", 14 | "import torch\n", 15 | "import torchvision\n", 16 | "import torch.nn as nn\n", 17 | "import torch.nn.functional as F\n", 18 | "import torch.optim as optim\n", 19 | "from torchvision import datasets, transforms\n", 20 | "\n", 21 | "batch_size=1000\n", 22 | "test_batch_size=1000\n", 23 | "epochs=1000\n", 24 | "device=torch.device(\"cpu\")\n", 25 | "device=torch.device(\"cuda:0\")\n", 26 | "data_path='../../../data'\n", 27 | "kwargs = {'num_workers': 1, 'pin_memory': True} if device != torch.device('cpu') else {}\n", 28 | "train_loader = torch.utils.data.DataLoader( \n", 29 | " datasets.MNIST(data_path, train=True,download=True, \n", 30 | " transform=transforms.Compose([ transforms.ToTensor(),\n", 31 | " transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size,\n", 32 | " shuffle=False, **kwargs)\n", 33 | "test_loader = torch.utils.data.DataLoader(datasets.MNIST(data_path, train=False,\n", 34 | " transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,),\n", 35 | " (0.3081,)) ])), batch_size=test_batch_size, shuffle=False, **kwargs)\n", 36 | "train_data=[]\n", 37 | "test_data=[]\n", 38 | "for batch_idx, (data, target) in enumerate(train_loader):\n", 39 | " data, target = data.to(device), target.to(device)\n", 40 | " train_data.append((data,target))\n", 41 | "for batch_idx, (data, target) in enumerate(test_loader):\n", 42 | " data, target = data.to(device), target.to(device)\n", 43 | " test_data.append((data,target))" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 20, 49 | "metadata": { 50 | "scrolled": true, 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "#10 loss=1.225 test_loss=1.346 accuracy=0.661 time=0.140\n", 61 | "#20 loss=0.684 test_loss=0.710 accuracy=0.812 time=0.132\n", 62 | "#30 loss=0.323 test_loss=0.417 accuracy=0.860 time=0.144\n", 63 | "#40 loss=0.418 test_loss=0.540 accuracy=0.836 time=0.136\n", 64 | "#50 loss=0.209 test_loss=0.271 accuracy=0.891 time=0.132\n", 65 | "#60 loss=0.133 test_loss=0.153 accuracy=0.954 time=0.120\n", 66 | "#70 loss=0.158 test_loss=0.215 accuracy=0.935 time=0.133\n", 67 | "#80 loss=0.115 test_loss=0.121 accuracy=0.962 time=0.106\n", 68 | "#90 loss=0.359 test_loss=0.466 accuracy=0.870 time=0.131\n", 69 | "#100 loss=0.147 test_loss=0.150 accuracy=0.954 time=0.135\n", 70 | "#110 loss=0.115 test_loss=0.101 accuracy=0.969 time=0.133\n", 71 | "#120 loss=0.544 test_loss=0.625 accuracy=0.871 time=0.104\n", 72 | "#130 loss=0.127 test_loss=0.136 accuracy=0.961 time=0.140\n", 73 | "#140 loss=0.332 test_loss=0.390 accuracy=0.891 time=0.145\n", 74 | "#150 loss=0.120 test_loss=0.099 accuracy=0.971 time=0.131\n", 75 | "#160 loss=0.116 test_loss=0.097 accuracy=0.972 time=0.133\n", 76 | "#170 loss=0.114 test_loss=0.093 accuracy=0.973 time=0.137\n", 77 | "#180 loss=0.232 test_loss=0.251 accuracy=0.932 time=0.110\n", 78 | "#190 loss=0.144 test_loss=0.132 accuracy=0.964 time=0.090\n", 79 | "#200 loss=0.144 test_loss=0.120 accuracy=0.967 time=0.091\n", 80 | "#210 loss=0.124 test_loss=0.097 accuracy=0.974 time=0.112\n", 81 | "#220 loss=0.119 test_loss=0.096 accuracy=0.975 time=0.129\n", 82 | "#230 loss=0.118 test_loss=0.095 accuracy=0.976 time=0.108\n", 83 | "#240 loss=0.140 test_loss=0.166 accuracy=0.959 time=0.105\n", 84 | "#250 loss=0.202 test_loss=0.233 accuracy=0.947 time=0.098\n", 85 | "#260 loss=0.161 test_loss=0.133 accuracy=0.969 time=0.098\n", 86 | "#270 loss=0.125 test_loss=0.091 accuracy=0.977 time=0.088\n", 87 | "#280 loss=0.119 test_loss=0.092 accuracy=0.977 time=0.087\n", 88 | "#290 loss=0.113 test_loss=0.094 accuracy=0.978 time=0.087\n", 89 | "#300 loss=0.107 test_loss=0.095 accuracy=0.977 time=0.081\n", 90 | "#310 loss=0.102 test_loss=0.097 accuracy=0.978 time=0.102\n", 91 | "#320 loss=0.110 test_loss=0.117 accuracy=0.974 time=0.089\n", 92 | "#330 loss=0.258 test_loss=0.218 accuracy=0.953 time=0.113\n", 93 | "#340 loss=0.406 test_loss=0.480 accuracy=0.917 time=0.137\n", 94 | "#350 loss=0.154 test_loss=0.109 accuracy=0.978 time=0.106\n", 95 | "#360 loss=0.137 test_loss=0.107 accuracy=0.979 time=0.129\n", 96 | "#370 loss=0.128 test_loss=0.107 accuracy=0.979 time=0.134\n", 97 | "#380 loss=0.123 test_loss=0.108 accuracy=0.979 time=0.125\n", 98 | "#390 loss=0.112 test_loss=0.111 accuracy=0.979 time=0.121\n", 99 | "#400 loss=0.101 test_loss=0.111 accuracy=0.978 time=0.133\n", 100 | "#410 loss=0.101 test_loss=0.113 accuracy=0.979 time=0.126\n" 101 | ] 102 | }, 103 | { 104 | "ename": "KeyboardInterrupt", 105 | "evalue": "", 106 | "output_type": "error", 107 | "traceback": [ 108 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 109 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 110 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnet2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcross_entropy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 111 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 490\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 112 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_modules\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0minput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 113 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 490\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 114 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mweak_script_method\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 67\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 115 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(input, weight, bias)\u001b[0m\n\u001b[1;32m 1350\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1351\u001b[0m \u001b[0;31m# fused op is marginally faster\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1352\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maddmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unwrap_optional\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1353\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1354\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 116 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "n_in=784\n", 122 | "n_out=10\n", 123 | "n_hidden=500\n", 124 | "model = \"lr\" # logistic regression\n", 125 | "#model = \"fc2\" # 2-layer MLP\n", 126 | "#model = \"conv\" # convolution\n", 127 | "if (model == \"lr\"):\n", 128 | " net = nn.Sequential(nn.Linear(n_in,n_out))\n", 129 | "elif(model == \"fc2\"):\n", 130 | " net = nn.Sequential(nn.Linear(n_in,n_hidden),nn.Sigmoid(),nn.Linear(n_hidden,n_out))\n", 131 | "elif(model == \"conv\"):\n", 132 | " net = nn.Sequential(nn.Conv2d(1,20,5),nn.ReLU(),nn.MaxPool2d(2),nn.Conv2d(20,50,5),nn.ReLU(),nn.MaxPool2d(2))\n", 133 | " net2 = nn.Sequential(nn.Linear(4*4*50,n_hidden),nn.ReLU(),nn.Linear(n_hidden,n_out))\n", 134 | "else:\n", 135 | " print(\"Wrong model\")\n", 136 | "net = net.to(device)\n", 137 | "net2 = net2.to(device)\n", 138 | "optimizer = torch.optim.Adam(list(net.parameters()), lr=0.001, betas=(0.9, 0.999))\n", 139 | "print_bin = epochs/100\n", 140 | "for epoch in range(1, epochs + 1):\n", 141 | " optimizer.zero_grad()\n", 142 | " t1=time.time()\n", 143 | " for batch_idx, (data, target) in enumerate(train_data):\n", 144 | " if(model == 'conv'):\n", 145 | " output=net(data).view(output.shape[0],-1)\n", 146 | " output = net2(output)\n", 147 | " else:\n", 148 | " output=net(data.view(data.shape[0],-1))\n", 149 | "\n", 150 | " loss = F.cross_entropy(output, target)\n", 151 | " loss.backward()\n", 152 | " optimizer.step()\n", 153 | " test_loss = 0\n", 154 | " correct = 0\n", 155 | " for data, target in test_data:\n", 156 | " if(model == 'conv'):\n", 157 | " output=net(data).view(output.shape[0],-1)\n", 158 | " output = net2(output)\n", 159 | " else:\n", 160 | " output=net(data.view(data.shape[0],-1))\n", 161 | " output=F.log_softmax(output, dim=1)\n", 162 | " test_loss += F.nll_loss(output, target, reduction='sum').item()\n", 163 | " pred = output.argmax(dim=1, keepdim=True)\n", 164 | " correct += pred.eq(target.view_as(pred)).sum().item()\n", 165 | " test_loss /= 10000\n", 166 | " correct /= 10000\n", 167 | " if(epoch % print_bin==0):\n", 168 | " print(\"#%d loss=%.3f test_loss=%.3f accuracy=%.3f time=%.3f\"%(epoch,loss,test_loss,correct,time.time()-t1))\n", 169 | " " 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Python 3", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.6.8" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 2 201 | } 202 | -------------------------------------------------------------------------------- /codes/neural_networks/grad_matrix_products.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Different ways of computing gradient of trace of infinite matrix products\n", 8 | "$\\nabla_T \\ln Z = \\nabla _T \\ln \\mathrm{tr}\\left[\\underbrace{T\\times T \\times \\cdots \\times T}_{\\mathrm{\\infty}}\\right]$\n", 9 | "We present several methods for computing the gradients with:\n", 10 | "- Autograd of $\\ln Z$ that is computed using the environment matrix and RG\n", 11 | "- Autograd of the leading eigenvalue of the transfer matrix\n", 12 | "- Autograd of $\\ln Z$ that is computed using the environment vector and power method\n", 13 | "- Using cavity of the matrix product\n", 14 | "- Using the leading eigenvector" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 3, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import torch,math\n", 24 | "torch.manual_seed(1)\n", 25 | "N = 10 # size of the matrix\n", 26 | "A = torch.randn(N, N, dtype=torch.float64) \n", 27 | "T = A@A.t() # a symmetric positive definite transfer matrix\n", 28 | "T.requires_grad=True" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "### Autograd of $\\ln Z$ using environment matrix\n", 36 | "Consider a infinite-length chain as product of matrices $T$. Analogous to real-space RG for the one-dimensional Ising model, we merge all pairs of consequtive matrices at each RG step, resulting to a chain with half length (although still infinite), then truncate the spectrum of merged matrices to original dimension for a low-rank approximation. By keep doing this RG step, merged matrices will finally converge to environments (for each tensor $T$) $M$ which also indicates that $Z=M\\times T \\times M$, and $$ \\ln Z=\\ln(M\\times T \\times M).$$" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 4, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "4.042987293864085\n", 49 | "tensor(3.8794, dtype=torch.float64, grad_fn=)\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "lnZ = 0.0\n", 55 | "M = T.clone() # M will be the converged envioment tensor\n", 56 | "print(math.log(M.norm()))\n", 57 | "Niter = 20 # number of iterations in RG. That is, 2^Niter matrices will be contracted finally.\n", 58 | "for i in range(Niter): # after i steps, there are totally 2^i matrices contracted.\n", 59 | " s = M.norm() # This is the normalization of a matrix contracted of 2^i T.\n", 60 | " lnZ = lnZ + torch.log(s)/2**i # Notice that we can only record a density of logarithm of the results, for contraction of infinite matrices.\n", 61 | " M = M/s\n", 62 | " M = M@M\n", 63 | "lnZ = lnZ + torch.trace(M)/(2**Niter) # trace(M) is the trace of contraction of all tensors.\n", 64 | "print(lnZ)\n", 65 | "lnZ.backward()\n", 66 | "RG_grad = T.grad.clone()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "### Autograd of the leading eigenvalue of the transfer matrix\n", 74 | "In the last section, we have evaluated $\\ln Z$ using RG. Actually, this can be evaluated analytically as\n", 75 | "$$\\ln Z =\n", 76 | "\\frac{1}{t}\\lim_{t\\to\\infty}\\ln \\mathrm{tr}(T^t)=\\frac{1}{t}\\lim_{t\\to\\infty}\\ln\\sum_{i=1}^N \\lambda_i^t=\n", 77 | "\\ln\\lambda_\\mathrm{max},$$\n", 78 | "where $\\lambda_{\\mathrm{max}}$ is the leading eigenvalue. Thus we can do back propagation directly on $\\lambda_{\\mathrm{max}}$." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "w, v = torch.symeig(T, eigenvectors=True) # W is an arry of eigenvalues, v is a matrix with each row storing an eigenvector\n", 88 | "T.grad.zero_()\n", 89 | "lambda_max = torch.log(w[-1])\n", 90 | "lambda_max.backward()\n", 91 | "eigenvalue_grad = ((T.grad + T.grad.t())/2) # need to symmetrize since it is an upper triangular matrix" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Using cavity of the matrix product\n", 99 | "Using the property of the environment matrix $M$, we have $Z=\\mathrm{tr}\\left[\\underbrace{T\\times T \\times \\cdots \\times T}_{\\mathrm{\\infty}}\\right]=\\mathrm{Tr}(M\\times T \\times M)$. Then the gradient with respect to a specific $T$ can be written as\n", 100 | "$$\\nabla_T \\ln Z = \\frac{M\\times M}{\\mathrm{tr}(M\\times T\\times M)}.$$\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 6, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "impurity_grad = (M@M).t()/torch.trace(M@T@M)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "### Autograd of $\\ln Z$ using vector environment\n", 117 | "Here comes the trick: Infinite tensor product with a Periodic Boundary Condition (PBC) is equivalent to that with a Open Boundary Condition (OBC), which is written as\n", 118 | "$\\ln Z=\\ln \\left[u\\times\\underbrace{T\\times T \\times \\cdots \\times T}_{\\mathrm{\\infty}}\\times u\\right]=\\ln(m\\times T\\times m)$, where $m$ is the environment for the OBC tensor product, a vector. Obviously, $m$ is the leading eigenvector of the transfer matrix $T$ hence is normalized, and the gradient can be written as $$\\nabla_T\\ln Z=\\exp(-\\lambda_\\mathrm{max}) m\\otimes m.$$" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "v=v[:,-1] # the leading eigenvector of the transfer matrix $T$\n", 128 | "eigenvector_grad=v[:,None]@v[None,:]/torch.exp(lambda_max) # outer product of the leading eigenvector and its transpose" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Comparing these gradients" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "1.734723475976807e-18\n", 148 | "1.6046192152785466e-17\n", 149 | "1.6046192152785466e-17\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "# comparing RG_grad, eigenvector_grad, eigenvalue_grad and impurity_grad\n", 155 | "print ((impurity_grad-RG_grad).abs().max().item())\n", 156 | "print ((impurity_grad-eigenvalue_grad).abs().max().item())\n", 157 | "print ((impurity_grad-eigenvector_grad).abs().max().item())" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [] 166 | } 167 | ], 168 | "metadata": { 169 | "kernelspec": { 170 | "display_name": "Python 3", 171 | "language": "python", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "codemirror_mode": { 176 | "name": "ipython", 177 | "version": 3 178 | }, 179 | "file_extension": ".py", 180 | "mimetype": "text/x-python", 181 | "name": "python", 182 | "nbconvert_exporter": "python", 183 | "pygments_lexer": "ipython3", 184 | "version": "3.6.8" 185 | } 186 | }, 187 | "nbformat": 4, 188 | "nbformat_minor": 2 189 | } 190 | -------------------------------------------------------------------------------- /codes/neural_networks/logistic_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 174, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "import torch\n", 11 | "import torchvision\n", 12 | "import torch.nn as nn\n", 13 | "import torch.nn.functional as F\n", 14 | "import torch.optim as optim\n", 15 | "from torchvision import datasets, transforms\n", 16 | "%matplotlib inline\n", 17 | "import matplotlib.pyplot as plt" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 175, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "def show_imgs(imgs,l1=4,l2=5,s1=6,s2=6):\n", 27 | " \"\"\" Plot images \"\"\"\n", 28 | " plt.rcParams['figure.figsize']=(s1,s2)\n", 29 | " imgs=imgs.cpu().reshape([-1,28,28])\n", 30 | " g, ax = plt.subplots(l1,l2)\n", 31 | " for i in range(l1): \n", 32 | " for j in range(l2):\n", 33 | " a=i*l2+j\n", 34 | " if(a>=imgs.shape[0]):\n", 35 | " break\n", 36 | " ax[i][j].imshow(imgs[a,:,:],cmap='summer')\n", 37 | " ax[i][j].set_xticks([])\n", 38 | " ax[i][j].set_yticks([])\n", 39 | " plt.show()\n", 40 | "#show_imgs(data,2,10,10,2)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 183, 46 | "metadata": { 47 | "scrolled": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "batch_size=10000\n", 52 | "test_batch_size=100\n", 53 | "epochs=1000\n", 54 | "device=torch.device(\"cpu\")\n", 55 | "device=torch.device(\"cuda:0\")\n", 56 | "data_path='../../../data'\n", 57 | "kwargs = {'num_workers': 1, 'pin_memory': True} if device != torch.device('cpu') else {}\n", 58 | "train_loader = torch.utils.data.DataLoader( \n", 59 | " datasets.MNIST(data_path, train=True,download=True, \n", 60 | " transform=transforms.Compose([ transforms.ToTensor(),\n", 61 | " transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size,\n", 62 | " shuffle=False, **kwargs)\n", 63 | "test_loader = torch.utils.data.DataLoader(datasets.MNIST(data_path, train=False,\n", 64 | " transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,),\n", 65 | " (0.3081,)) ])), batch_size=test_batch_size, shuffle=False, **kwargs)\n", 66 | "train_data=[]\n", 67 | "test_data=[]\n", 68 | "for batch_idx, (data, target) in enumerate(train_loader):\n", 69 | " data, target = data.to(device), target.to(device)\n", 70 | " train_data.append((data,target))\n", 71 | "for batch_idx, (data, target) in enumerate(test_loader):\n", 72 | " data, target = data.to(device), target.to(device)\n", 73 | " test_data.append((data,target))\n", 74 | "#print(train_data[0])\n", 75 | "\n", 76 | "\n", 77 | " " 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 179, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "image/png": "\n", 88 | "text/plain": [ 89 | "
" 90 | ] 91 | }, 92 | "metadata": {}, 93 | "output_type": "display_data" 94 | } 95 | ], 96 | "source": [ 97 | "show_imgs(train_data[0][0],2,10,10,2)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 184, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "torch.Size([10000, 1, 28, 28])\n", 110 | "#10 loss=26.888 test_loss=27.740 accuracy=0.136 time=0.036\n", 111 | "#20 loss=19.086 test_loss=19.920 accuracy=0.230 time=0.036\n", 112 | "#30 loss=13.808 test_loss=14.583 accuracy=0.330 time=0.030\n", 113 | "#40 loss=10.418 test_loss=11.069 accuracy=0.420 time=0.029\n", 114 | "#50 loss=8.243 test_loss=8.756 accuracy=0.497 time=0.030\n", 115 | "#60 loss=6.787 test_loss=7.193 accuracy=0.557 time=0.037\n", 116 | "#70 loss=5.767 test_loss=6.108 accuracy=0.608 time=0.035\n", 117 | "#80 loss=5.024 test_loss=5.324 accuracy=0.646 time=0.033\n", 118 | "#90 loss=4.460 test_loss=4.735 accuracy=0.678 time=0.036\n", 119 | "#100 loss=4.021 test_loss=4.278 accuracy=0.702 time=0.036\n", 120 | "#110 loss=3.671 test_loss=3.917 accuracy=0.723 time=0.029\n", 121 | "#120 loss=3.384 test_loss=3.624 accuracy=0.738 time=0.029\n", 122 | "#130 loss=3.146 test_loss=3.379 accuracy=0.752 time=0.029\n", 123 | "#140 loss=2.947 test_loss=3.172 accuracy=0.764 time=0.029\n", 124 | "#150 loss=2.778 test_loss=2.995 accuracy=0.773 time=0.029\n", 125 | "#160 loss=2.633 test_loss=2.842 accuracy=0.783 time=0.029\n", 126 | "#170 loss=2.505 test_loss=2.708 accuracy=0.792 time=0.030\n", 127 | "#180 loss=2.392 test_loss=2.589 accuracy=0.799 time=0.029\n", 128 | "#190 loss=2.290 test_loss=2.482 accuracy=0.805 time=0.029\n", 129 | "#200 loss=2.199 test_loss=2.386 accuracy=0.810 time=0.030\n", 130 | "#210 loss=2.116 test_loss=2.298 accuracy=0.816 time=0.030\n", 131 | "#220 loss=2.041 test_loss=2.218 accuracy=0.820 time=0.030\n", 132 | "#230 loss=1.972 test_loss=2.144 accuracy=0.823 time=0.030\n", 133 | "#240 loss=1.908 test_loss=2.076 accuracy=0.829 time=0.029\n", 134 | "#250 loss=1.849 test_loss=2.012 accuracy=0.833 time=0.029\n", 135 | "#260 loss=1.795 test_loss=1.954 accuracy=0.836 time=0.030\n", 136 | "#270 loss=1.744 test_loss=1.899 accuracy=0.839 time=0.030\n", 137 | "#280 loss=1.697 test_loss=1.848 accuracy=0.841 time=0.029\n", 138 | "#290 loss=1.652 test_loss=1.800 accuracy=0.844 time=0.030\n", 139 | "#300 loss=1.610 test_loss=1.755 accuracy=0.847 time=0.035\n", 140 | "#310 loss=1.570 test_loss=1.713 accuracy=0.850 time=0.035\n", 141 | "#320 loss=1.531 test_loss=1.672 accuracy=0.852 time=0.028\n", 142 | "#330 loss=1.495 test_loss=1.634 accuracy=0.854 time=0.029\n", 143 | "#340 loss=1.460 test_loss=1.597 accuracy=0.856 time=0.029\n", 144 | "#350 loss=1.427 test_loss=1.562 accuracy=0.857 time=0.036\n", 145 | "#360 loss=1.395 test_loss=1.528 accuracy=0.858 time=0.043\n", 146 | "#370 loss=1.364 test_loss=1.496 accuracy=0.859 time=0.036\n", 147 | "#380 loss=1.335 test_loss=1.465 accuracy=0.861 time=0.037\n", 148 | "#390 loss=1.306 test_loss=1.435 accuracy=0.863 time=0.030\n", 149 | "#400 loss=1.279 test_loss=1.406 accuracy=0.864 time=0.030\n", 150 | "#410 loss=1.252 test_loss=1.379 accuracy=0.866 time=0.031\n", 151 | "#420 loss=1.227 test_loss=1.352 accuracy=0.867 time=0.031\n", 152 | "#430 loss=1.203 test_loss=1.326 accuracy=0.867 time=0.030\n", 153 | "#440 loss=1.179 test_loss=1.301 accuracy=0.869 time=0.030\n", 154 | "#450 loss=1.156 test_loss=1.277 accuracy=0.870 time=0.029\n", 155 | "#460 loss=1.134 test_loss=1.254 accuracy=0.871 time=0.029\n", 156 | "#470 loss=1.113 test_loss=1.232 accuracy=0.872 time=0.033\n", 157 | "#480 loss=1.093 test_loss=1.210 accuracy=0.874 time=0.029\n", 158 | "#490 loss=1.073 test_loss=1.190 accuracy=0.875 time=0.030\n", 159 | "#500 loss=1.054 test_loss=1.170 accuracy=0.875 time=0.029\n", 160 | "#510 loss=1.035 test_loss=1.150 accuracy=0.876 time=0.030\n", 161 | "#520 loss=1.018 test_loss=1.133 accuracy=0.876 time=0.029\n", 162 | "#530 loss=0.999 test_loss=1.119 accuracy=0.876 time=0.030\n", 163 | "#540 loss=1.012 test_loss=1.120 accuracy=0.874 time=0.029\n", 164 | "#550 loss=0.969 test_loss=1.092 accuracy=0.878 time=0.029\n", 165 | "#560 loss=0.965 test_loss=1.077 accuracy=0.880 time=0.029\n", 166 | "#570 loss=0.943 test_loss=1.049 accuracy=0.879 time=0.029\n", 167 | "#580 loss=0.927 test_loss=1.041 accuracy=0.880 time=0.029\n", 168 | "#590 loss=0.914 test_loss=1.027 accuracy=0.881 time=0.041\n", 169 | "#600 loss=0.901 test_loss=1.007 accuracy=0.881 time=0.035\n", 170 | "#610 loss=0.885 test_loss=1.001 accuracy=0.881 time=0.038\n", 171 | "#620 loss=0.874 test_loss=0.981 accuracy=0.881 time=0.037\n", 172 | "#630 loss=0.861 test_loss=0.970 accuracy=0.882 time=0.037\n", 173 | "#640 loss=0.850 test_loss=0.960 accuracy=0.883 time=0.036\n", 174 | "#650 loss=0.844 test_loss=0.942 accuracy=0.882 time=0.040\n", 175 | "#660 loss=0.832 test_loss=0.947 accuracy=0.882 time=0.036\n", 176 | "#670 loss=0.825 test_loss=0.920 accuracy=0.883 time=0.034\n", 177 | "#680 loss=0.805 test_loss=0.921 accuracy=0.882 time=0.029\n", 178 | "#690 loss=0.796 test_loss=0.903 accuracy=0.885 time=0.035\n", 179 | "#700 loss=0.800 test_loss=0.891 accuracy=0.884 time=0.035\n", 180 | "#710 loss=0.779 test_loss=0.893 accuracy=0.882 time=0.031\n", 181 | "#720 loss=0.763 test_loss=0.866 accuracy=0.884 time=0.036\n", 182 | "#730 loss=0.762 test_loss=0.858 accuracy=0.885 time=0.042\n", 183 | "#740 loss=0.750 test_loss=0.860 accuracy=0.884 time=0.036\n", 184 | "#750 loss=0.739 test_loss=0.839 accuracy=0.885 time=0.037\n", 185 | "#760 loss=0.729 test_loss=0.829 accuracy=0.886 time=0.036\n", 186 | "#770 loss=0.734 test_loss=0.832 accuracy=0.885 time=0.038\n", 187 | "#780 loss=0.717 test_loss=0.818 accuracy=0.886 time=0.036\n", 188 | "#790 loss=0.710 test_loss=0.808 accuracy=0.885 time=0.033\n", 189 | "#800 loss=0.702 test_loss=0.806 accuracy=0.885 time=0.029\n", 190 | "#810 loss=0.691 test_loss=0.785 accuracy=0.886 time=0.030\n", 191 | "#820 loss=0.685 test_loss=0.787 accuracy=0.885 time=0.029\n", 192 | "#830 loss=0.693 test_loss=0.775 accuracy=0.888 time=0.029\n", 193 | "#840 loss=0.678 test_loss=0.788 accuracy=0.884 time=0.031\n", 194 | "#850 loss=0.673 test_loss=0.758 accuracy=0.890 time=0.045\n", 195 | "#860 loss=0.655 test_loss=0.760 accuracy=0.886 time=0.038\n", 196 | "#870 loss=0.648 test_loss=0.748 accuracy=0.887 time=0.048\n", 197 | "#880 loss=0.654 test_loss=0.735 accuracy=0.891 time=0.046\n", 198 | "#890 loss=0.630 test_loss=0.736 accuracy=0.887 time=0.037\n", 199 | "#900 loss=0.626 test_loss=0.725 accuracy=0.888 time=0.038\n", 200 | "#910 loss=0.631 test_loss=0.716 accuracy=0.891 time=0.035\n", 201 | "#920 loss=0.612 test_loss=0.718 accuracy=0.889 time=0.035\n", 202 | "#930 loss=0.612 test_loss=0.703 accuracy=0.889 time=0.035\n", 203 | "#940 loss=0.619 test_loss=0.705 accuracy=0.890 time=0.029\n", 204 | "#950 loss=0.603 test_loss=0.701 accuracy=0.890 time=0.036\n", 205 | "#960 loss=0.598 test_loss=0.690 accuracy=0.890 time=0.035\n", 206 | "#970 loss=0.588 test_loss=0.683 accuracy=0.893 time=0.029\n", 207 | "#980 loss=0.580 test_loss=0.676 accuracy=0.892 time=0.043\n", 208 | "#990 loss=0.589 test_loss=0.667 accuracy=0.892 time=0.035\n", 209 | "#1000 loss=0.576 test_loss=0.682 accuracy=0.891 time=0.035\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "nin=784\n", 215 | "nout=10\n", 216 | "W=torch.randn(nin,nout,device=device) \n", 217 | "W.requires_grad=True\n", 218 | "bias = torch.randn(nout,device=device)\n", 219 | "bias.requres_grad=True\n", 220 | "print(train_data[0][0].shape)\n", 221 | "lr=0.001\n", 222 | "optimizer = torch.optim.Adam([W,bias], lr=lr, betas=(0.9, 0.999))\n", 223 | "print_bin = epochs/100\n", 224 | "for epoch in range(1, epochs + 1):\n", 225 | " optimizer.zero_grad()\n", 226 | " t1=time.time()\n", 227 | " for batch_idx, (data, target) in enumerate(train_data):\n", 228 | "# loss = F.cross_entropy(data.view(data.shape[0],-1)@W+bias.expand(data.shape[0],nout),target)\n", 229 | " output=F.log_softmax(data.view(data.shape[0],-1)@W+bias.expand(data.shape[0],nout), dim=1)\n", 230 | " loss = F.nll_loss(output, target)\n", 231 | " loss.backward()\n", 232 | " optimizer.step()\n", 233 | " test_loss = 0\n", 234 | " correct = 0\n", 235 | " for data, target in test_data:\n", 236 | " output=F.log_softmax(data.view(data.shape[0],-1)@W+bias.expand(data.shape[0],nout), dim=1)\n", 237 | " test_loss += F.nll_loss(output, target, reduction='sum').item()\n", 238 | " pred = output.argmax(dim=1, keepdim=True)\n", 239 | " correct += pred.eq(target.view_as(pred)).sum().item()\n", 240 | " test_loss /= 10000\n", 241 | " correct /= 10000\n", 242 | " if(epoch % print_bin==0):\n", 243 | " print(\"#%d loss=%.3f test_loss=%.3f accuracy=%.3f time=%.3f\"%(epoch,loss,test_loss,correct,time.time()-t1))\n", 244 | " " 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [] 253 | } 254 | ], 255 | "metadata": { 256 | "kernelspec": { 257 | "display_name": "Python 3", 258 | "language": "python", 259 | "name": "python3" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 3 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython3", 271 | "version": "3.6.8" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 2 276 | } 277 | -------------------------------------------------------------------------------- /codes/neural_networks/lr_mlp_conv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 21, 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "" 9 | } 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "import time\n", 14 | "import torch\n", 15 | "import torchvision\n", 16 | "import torch.nn as nn\n", 17 | "import torch.nn.functional as F\n", 18 | "import torch.optim as optim\n", 19 | "from torchvision import datasets, transforms\n", 20 | "\n", 21 | "batch_size=1000\n", 22 | "test_batch_size=1000\n", 23 | "epochs=1000\n", 24 | "device=torch.device(\"cpu\")\n", 25 | "device=torch.device(\"cuda:0\")\n", 26 | "data_path='../../../data'\n", 27 | "kwargs = {'num_workers': 1, 'pin_memory': True} if device != torch.device('cpu') else {}\n", 28 | "train_loader = torch.utils.data.DataLoader( \n", 29 | " datasets.MNIST(data_path, train=True,download=True, \n", 30 | " transform=transforms.Compose([ transforms.ToTensor(),\n", 31 | " transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size,\n", 32 | " shuffle=False, **kwargs)\n", 33 | "test_loader = torch.utils.data.DataLoader(datasets.MNIST(data_path, train=False,\n", 34 | " transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,),\n", 35 | " (0.3081,)) ])), batch_size=test_batch_size, shuffle=False, **kwargs)\n", 36 | "train_data=[]\n", 37 | "test_data=[]\n", 38 | "for batch_idx, (data, target) in enumerate(train_loader):\n", 39 | " data, target = data.to(device), target.to(device)\n", 40 | " train_data.append((data,target))\n", 41 | "for batch_idx, (data, target) in enumerate(test_loader):\n", 42 | " data, target = data.to(device), target.to(device)\n", 43 | " test_data.append((data,target))" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 24, 49 | "metadata": { 50 | "scrolled": false, 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "#10 loss=0.231 test_loss=0.266 accuracy=0.923 time=0.419\n", 61 | "#20 loss=0.227 test_loss=0.257 accuracy=0.922 time=0.418\n", 62 | "#30 loss=0.492 test_loss=0.577 accuracy=0.828 time=0.418\n", 63 | "#40 loss=0.124 test_loss=0.097 accuracy=0.972 time=0.418\n", 64 | "#50 loss=0.108 test_loss=0.076 accuracy=0.977 time=0.418\n", 65 | "#60 loss=0.191 test_loss=0.168 accuracy=0.948 time=0.418\n", 66 | "#70 loss=0.122 test_loss=0.072 accuracy=0.978 time=0.419\n", 67 | "#80 loss=0.120 test_loss=0.075 accuracy=0.978 time=0.418\n", 68 | "#90 loss=0.224 test_loss=0.203 accuracy=0.938 time=0.418\n", 69 | "#100 loss=0.097 test_loss=0.058 accuracy=0.981 time=0.418\n", 70 | "#110 loss=0.094 test_loss=0.059 accuracy=0.983 time=0.418\n", 71 | "#120 loss=0.099 test_loss=0.051 accuracy=0.985 time=0.418\n", 72 | "#130 loss=0.206 test_loss=0.182 accuracy=0.949 time=0.418\n", 73 | "#140 loss=0.097 test_loss=0.050 accuracy=0.984 time=0.417\n", 74 | "#150 loss=0.322 test_loss=0.418 accuracy=0.905 time=0.417\n", 75 | "#160 loss=0.096 test_loss=0.053 accuracy=0.985 time=0.417\n", 76 | "#170 loss=0.100 test_loss=0.051 accuracy=0.985 time=0.418\n", 77 | "#180 loss=0.112 test_loss=0.065 accuracy=0.981 time=0.417\n", 78 | "#190 loss=0.097 test_loss=0.048 accuracy=0.987 time=0.417\n", 79 | "#200 loss=0.098 test_loss=0.048 accuracy=0.987 time=0.417\n", 80 | "#210 loss=0.172 test_loss=0.181 accuracy=0.958 time=0.417\n", 81 | "#220 loss=0.097 test_loss=0.053 accuracy=0.984 time=0.417\n", 82 | "#230 loss=0.101 test_loss=0.051 accuracy=0.985 time=0.418\n", 83 | "#240 loss=0.105 test_loss=0.058 accuracy=0.985 time=0.418\n", 84 | "#250 loss=0.103 test_loss=0.055 accuracy=0.985 time=0.417\n", 85 | "#260 loss=0.101 test_loss=0.053 accuracy=0.985 time=0.417\n", 86 | "#270 loss=0.103 test_loss=0.054 accuracy=0.986 time=0.417\n", 87 | "#280 loss=0.106 test_loss=0.056 accuracy=0.986 time=0.417\n", 88 | "#290 loss=0.115 test_loss=0.066 accuracy=0.984 time=0.418\n", 89 | "#300 loss=0.121 test_loss=0.064 accuracy=0.984 time=0.417\n", 90 | "#310 loss=0.135 test_loss=0.071 accuracy=0.982 time=0.417\n", 91 | "#320 loss=0.117 test_loss=0.063 accuracy=0.984 time=0.417\n", 92 | "#330 loss=0.114 test_loss=0.065 accuracy=0.985 time=0.417\n" 93 | ] 94 | }, 95 | { 96 | "ename": "KeyboardInterrupt", 97 | "evalue": "", 98 | "output_type": "error", 99 | "traceback": [ 100 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 101 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 102 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mif\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'conv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnet2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 103 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 490\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 104 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_modules\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0minput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 105 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 489\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 490\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 106 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mweak_script_method\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 67\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 68\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 107 | "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mlinear\u001b[0;34m(input, weight, bias)\u001b[0m\n\u001b[1;32m 1350\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mbias\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1351\u001b[0m \u001b[0;31m# fused op is marginally faster\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1352\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maddmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unwrap_optional\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1353\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1354\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmatmul\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 108 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "n_in=784\n", 114 | "n_out=10\n", 115 | "n_hidden=500\n", 116 | "model = \"lr\" # logistic regression\n", 117 | "model = \"fc2\" # 2-layer MLP\n", 118 | "model = \"conv\" # convolution\n", 119 | "if (model == \"lr\"):\n", 120 | " net = nn.Sequential(nn.Linear(n_in,n_out))\n", 121 | "elif(model == \"fc2\"):\n", 122 | " net = nn.Sequential(nn.Linear(n_in,n_hidden),nn.Sigmoid(),nn.Linear(n_hidden,n_out))\n", 123 | "elif(model == \"conv\"):\n", 124 | " net = nn.Sequential(nn.Conv2d(1,20,5),nn.ReLU(),nn.MaxPool2d(2),nn.Conv2d(20,50,5),nn.ReLU(),nn.MaxPool2d(2))\n", 125 | " net2 = nn.Sequential(nn.Linear(4*4*50,n_hidden),nn.ReLU(),nn.Linear(n_hidden,n_out))\n", 126 | "else:\n", 127 | " print(\"Wrong model\")\n", 128 | "net = net.to(device)\n", 129 | "net2 = net2.to(device)\n", 130 | "optimizer = torch.optim.Adam(list(net.parameters()), lr=0.001, betas=(0.9, 0.999))\n", 131 | "print_bin = epochs/100\n", 132 | "for epoch in range(1, epochs + 1):\n", 133 | " optimizer.zero_grad()\n", 134 | " t1=time.time()\n", 135 | " for batch_idx, (data, target) in enumerate(train_data):\n", 136 | " if(model == 'conv'):\n", 137 | " output=net(data).view(output.shape[0],-1)\n", 138 | " output = net2(output)\n", 139 | " else:\n", 140 | " output=net(data.view(data.shape[0],-1))\n", 141 | "\n", 142 | " loss = F.cross_entropy(output, target)\n", 143 | " loss.backward()\n", 144 | " optimizer.step()\n", 145 | " test_loss = 0\n", 146 | " correct = 0\n", 147 | " for data, target in test_data:\n", 148 | " if(model == 'conv'):\n", 149 | " output=net(data).view(output.shape[0],-1)\n", 150 | " output = net2(output)\n", 151 | " else:\n", 152 | " output=net(data.view(data.shape[0],-1))\n", 153 | " output=F.log_softmax(output, dim=1)\n", 154 | " test_loss += F.nll_loss(output, target, reduction='sum').item()\n", 155 | " pred = output.argmax(dim=1, keepdim=True)\n", 156 | " correct += pred.eq(target.view_as(pred)).sum().item()\n", 157 | " test_loss /= 10000\n", 158 | " correct /= 10000\n", 159 | " if(epoch % print_bin==0):\n", 160 | " print(\"#%d loss=%.3f test_loss=%.3f accuracy=%.3f time=%.3f\"%(epoch,loss,test_loss,correct,time.time()-t1))\n", 161 | " " 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [] 170 | } 171 | ], 172 | "metadata": { 173 | "kernelspec": { 174 | "display_name": "Python 3", 175 | "language": "python", 176 | "name": "python3" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.6.8" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 2 193 | } 194 | -------------------------------------------------------------------------------- /codes/neural_networks/tanh.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 34, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "image/png": "\n", 11 | "text/plain": [ 12 | "
" 13 | ] 14 | }, 15 | "metadata": {}, 16 | "output_type": "display_data" 17 | } 18 | ], 19 | "source": [ 20 | "# Following HIPS autograd https://github.com/HIPS/autograd/blob/master/examples/tanh.py\n", 21 | "# Following https://github.com/QuantumBFS/SSSS/blob/master/1_deep_learning/tanh.py\n", 22 | "import torch\n", 23 | "%matplotlib inline\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "x = torch.linspace(-7, 7, 100, requires_grad=True)\n", 26 | "for i in range(7):\n", 27 | " if (i==0):\n", 28 | " y = torch.tanh(x/2)\n", 29 | " else:\n", 30 | " y, = torch.autograd.grad(y, x, grad_outputs=torch.ones(y.shape[0]), create_graph=True)\n", 31 | " plt.plot(x.detach().numpy(), y.detach().numpy(), '-', label='$%g$-th'%(i))\n", 32 | "plt.legend(loc=2);plt.show()" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.6.8" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | --------------------------------------------------------------------------------