├── .gitattributes ├── .gitignore ├── Bishop - Pattern Recognition And Machine Learning - Springer 2006.pdf ├── LICENSE ├── README.md ├── assets └── resources_sentiment_analysis.tar.gz ├── classifier └── hypothesis.ipynb ├── fft.py ├── gen ├── Julia.ipynb ├── Python2.ipynb └── gen.ipynb ├── matrics.py ├── notes └── perceptron.md ├── optimization.py ├── perceptron ├── gen-data.py ├── perception-algorithm.py └── perceptron.py ├── plane.py ├── probability.py ├── project0 ├── debug.py ├── main.py └── test.py ├── requirements.txt ├── sentiment_analysis ├── 200.txt ├── 4000.txt ├── main.py ├── project1.py ├── reviews_submit.tsv ├── reviews_test.tsv ├── reviews_train.tsv ├── reviews_val.tsv ├── stopwords.txt ├── test.py ├── toy_data.tsv └── utils.py ├── sqrt_newton.py ├── svm ├── main.py ├── project1.py ├── toy_data.tsv └── utils.py ├── update.sh ├── vector.py ├── wiki.txt └── wiki_zh.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pdf filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /Bishop - Pattern Recognition And Machine Learning - Springer 2006.pdf: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:4ee767e0a6b04fa05ba7e599e9dbb4637a94a4407ccedf0b4d316b1fd7c8ec64 3 | size 18090775 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Huang Kan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MIT-ML 2 | MITx: 6.86x Machine Learning with Python: from Linear Models to Deep Learning 3 | -------------------------------------------------------------------------------- /assets/resources_sentiment_analysis.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KellyHwong/MIT-ML/0305208956f89cb039292c7cce175852f0783336/assets/resources_sentiment_analysis.tar.gz -------------------------------------------------------------------------------- /classifier/hypothesis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import matplotlib.pyplot as plt\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 9, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "data": { 20 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEWCAYAAABliCz2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xd8VFXawPHfk14mhJBGDQESQEAiIVRREUHAurZFdm2v78q6LKy9rIDrKu7a6yqKZUFXWcuqL7q6awMREKQjPaGHlh7SM8mc9487CQEpAXJnJpnn+/nMJ1PO3PvkMjw5c+65zxFjDEoppVq+AG8HoJRSyjM04SullJ/QhK+UUn5CE75SSvkJTfhKKeUnNOErpZSf0ISvmjUR+bWIfGnTtmeJyHQbtrtDREY29XaVOhFN+MrnicgwEVksIsUiUiAii0RkAIAx5h1jzIXejvFIImJEpExESkVkj4g8IyKBJ7mN4SKSbVeMyv8EeTsApY5HRFoBnwG/A94HQoBzgCpvxtVIacaYLBHpCcwHtgCveDck5c+0h698XXcAY8wcY0ytMabCGPOlMWYtgIjcJCIL6xq7e9YTRSRTREpE5BER6eb+hnBQRN4XkRB32+Eiki0iD4hInnuo5dfHCkRELhGR1SJS5N5e38b8AsaYTcD3QJ+jbDNURJ4Tkb3u23Pu5yKBL4D27m8JpSLS/mQOnFJH0oSvfN0WoFZEZovIWBGJacR7RgP9gcHAvcBM4DqgE1bSHd+gbVsgDugA3AjMFJEeR25QRPoBbwK/BWKBV4G5IhJ6omBEpBfWt5JVR3l5ijvOs4A0YCAw1RhTBowF9hpjHO7b3hP/6kodmyZ85dOMMQeBYYABXgNyRWSuiCQe521PGGMOGmPWA+uAL40x24wxxVi95n5HtJ9mjKkyxnwH/Bv45VG2OQF41Riz1P1NYzbWsNLg48SxUkQKgU+B14G/H6XNr4GHjTE5xphc4M/A9cfZplKnTMfwlc8zxmwEbgJwj4f/A3iOw3vqDR1ocL/iKI/bNnhc6O5N19kJHG3opDNwo4hMbvBcyDHa1kk3xmQd53Xc79/ZiP0rddq0h6+aFfd4+CyOMh5+imLc4+V1koCjDZ3sBh41xrRucIswxsw5zf3vxfpjcrT9aylb1aQ04SufJiI9ReQuEenoftwJq2e/pAl382cRCRGRc4BLgA+O0uY14FYRGSSWSBG5WESiTnPfc4CpIhIvInHAg1jfYMD6ZhIrItGnuQ+lAB3SUb6vBBgE3CkirYEirGma9zTR9vcDhVi96nLgVve3iMMYY5aLyC3A34BUrKGhhcCC09z/dKAVsNb9+AP3cxhjNonIHGCbew5/Lz1xq06H6AIoyl+JyHDgH8aYjt6ORSlP0CEdpZTyE5rwlVLKT+iQjlJK+Qnt4SullJ/wqVk6cXFxJjk52dthKKVUs7FixYo8Y0x8Y9r6VMJPTk5m+fLl3g5DKaWaDRHZeeJWFh3SUUopP6EJXyml/IQmfKWU8hM+NYZ/NE6nk+zsbCorK70dSrMWFhZGx44dCQ4O9nYoSikv8fmEn52dTVRUFMnJyYiIt8Nplowx5Ofnk52dTZcuXbwdjlLKS2wd0hGRO0RkvYisE5E5IhJ2stuorKwkNjZWk/1pEBFiY2P1W5JSfs62hC8iHYA/ABnGmD5AIHDtKW6rKUPzS3oMlVJ2n7QNAsJFJAiI4OgLSyillN9avGcx72x8B6fLafu+bEv4xpg9wFPALmAfUGyM+fLIdiIyQUSWi8jy3Nxcu8I5LQ6HA4C9e/dy9dVXezkapVRL4TIunl7xNHM2zUGw/1u4nUM6McDlQBesNTojReS6I9sZY2YaYzKMMRnx8Y26Othr2rdvz4cffmjrPmpqamzdvlLKd/x3x3/ZUriFiWkTCQqwfw6NnUM6I4HtxphcY4wT+AgYauP+bLdjxw769LGWUp01axZXXnklY8aMITU1lXvvvbe+3ZdffsmQIUNIT0/nmmuuobS0FICHH36YAQMG0KdPHyZMmEBdpdLhw4dz++23k5GRwfPPP+/5X0wp5XE1rhpeXv0yKa1TGNNljEf2aeeflF3AYBGJwFoO7gLgtArl/PnT9WzYe7ApYqvXq30r/nRp71N67+rVq1m1ahWhoaH06NGDyZMnEx4ezvTp0/n666+JjIzk8ccf55lnnuHBBx9k0qRJPPjggwBcf/31fPbZZ1x66aUAVFdXax0hpfzIp1s/ZcfBHTx3/nMEiGeugbUt4RtjlorIh8BKoAZYBcy0a3/ecMEFFxAdba0v3atXL3bu3ElRUREbNmzg7LPPBqxEPmTIEADmzZvHE088QXl5OQUFBfTu3bs+4Y8bN847v4RSyuOqa6t5Zc0r9Intw4hOIzy2X1sHjYwxfwL+1FTbO9WeuF1CQ0Pr7wcGBlJTU4MxhlGjRjFnzpzD2lZWVjJx4kSWL19Op06deOihhw6bFx8ZGemxuJVS3vWvzH+xt2wvfxryJ49OmdZaOk1s8ODBLFq0iKysLADKysrYsmVLfXKPi4ujtLTU9pO/SinfVO4sZ+bamfRP7M+Q9kM8um+fL63Q3MTHxzNr1izGjx9PVVUVANOnT6d79+7ccsst9OnTh7Zt2zJgwAAvR6qU8oZ3N71LXkUezw5/1uMXRPrUmrYZGRnmyBOXGzdu5IwzzvBSRC2LHkulvKu4qpixH42lf0J/XrzgxSbZpoisMMZkNKatDukopZSHvLnuTUqrS5mcPtkr+9eEr5RSHpBbnsu7G9/l4q4X0z2mu1di0ISvlFIe8OraV6lx1TDxrIlei0ETvlJK2WxH8Q4+3PIhV3W/ik5RnbwWhyZ8pZSy2QurXiAkMIRb0271ahya8JVSykZrc9fy1c6vuKn3TcSFx3k1Fk34XnLTTTed9MVXn3zyCRs2bKh//OCDD/L11183dWhKqSZijOHZFc/SJqwNN/a+0dvhaML3NbW1tcd87ciE//DDDzNy5EhPhKWUOgXf7/me5QeW89u+vyUy2PvlUzThN8Kjjz5K9+7dGTZsGOPHj+epp55i+PDh9dUt8/LySE5OBqwSyueccw7p6emkp6ezePFiwPpLP2nSJHr06MHIkSPJycmp335ycjL33Xcf6enpfPDBB7z22msMGDCAtLQ0rrrqKsrLy1m8eDFz587lnnvu4ayzzmLr1q2HfUtYtmwZQ4cOJS0tjYEDB1JSUuLZg6SUOkytq5bnVj5Hp6hOXNP9Gm+HAzS30gpf3A/7f2rabbY9E8Y+dsyXV6xYwT//+U9Wr15NTU0N6enp9O/f/5jtExIS+OqrrwgLCyMzM5Px48ezfPlyPv74YzZv3syGDRs4cOAAvXr14uabb65/X2xsLCtXrgQgPz+fW265BYCpU6fyxhtvMHnyZC677DIuueSSn626VV1dzbhx43jvvfcYMGAABw8eJDw8/HSOilLqNM3dOpfMwkyePO9JggODvR0O0NwSvhd8//33XHHFFURERABw2WWXHbe90+lk0qRJrF69msDAQLZs2QLAggULGD9+PIGBgbRv354RIw4vidqwPPK6deuYOnUqRUVFlJaWMnr06OPuc/PmzbRr166+Pk+rVq1O+vdUSjWdcmc5f1v1N/rG9WV05+P///Wk5pXwj9MT97SgoCBcLhfAYWWOn332WRITE1mzZg0ul4uwsLBGba9heeSbbrqJTz75hLS0NGbNmsX8+fObNHallL3e3vA2ORU5PHnekx4vkHY8OoZ/Aueeey6ffPIJFRUVlJSU8OmnnwLWuPuKFSsADpttU1xcTLt27QgICODtt9+uPwl77rnn8t5771FbW8u+ffuYN2/eMfdZUlJCu3btcDqdvPPOO/XPR0VFHXVsvkePHuzbt49ly5bVv1/XxlXKO/Iq8nhz3ZtckHQB6Ynp3g7nMHYuYt5DRFY3uB0Ukdvt2p9d0tPTGTduHGlpaYwdO7Z+2OTuu+9mxowZ9OvXj7y8vPr2EydOZPbs2aSlpbFp06b6nvsVV1xBamoqvXr14oYbbqhfBetoHnnkEQYNGsTZZ59Nz54965+/9tprefLJJ+nXrx9bt26tfz4kJIT33nuPyZMnk5aWxqhRow771qGU8pwZq2dQXVvN7em+l+48Uh5ZRAKBPcAgY8zOY7VrDuWRH3roIRwOB3fffbe3QzlpvnYslWppthZt5aq5V3FN92uYMniKR/bpi+WRLwC2Hi/ZK6VUc/fU8qeICIrgd2f9ztuhHJWnTtpeC8w52gsiMgGYAJCUlOShcE7dQw895O0QlFI+aNGeRSzcs5C7+t9Fm7A23g7nqGzv4YtICHAZ8MHRXjfGzDTGZBhjMuLj4+0ORymlmlyNq4anlj9FR0dHfnXGr7wdzjF5YkhnLLDSGHPAA/tSSimP+zjrY7KKsrgz405CAkO8Hc4xeSLhj+cYwzlKKdXclVaX8rdVfyM9IZ2RSb5d28rWhC8ikcAo4CM796OUUt4yc+1MCioLuHfAvT51kdXR2JrwjTFlxphYY0yxnfvxFfPnz+eSSy456msXXXQRRUVFHo5IKWWnnQd38vbGt/lFyi/oHdfb2+GcUPMqrdCMff75594OQSnVxJ5a/hQhASHcln6bt0NpFC2t0AhvvfUWffv2JS0tjeuvv/5ni5c4HI76+wcPHuTiiy+mR48e3HrrrfX1dpKTk+uvyD1ye0qp5mfx3sXM3z2fW/re4vWVrBqrWfXwH//xcTYVbGrSbfZs05P7Bt53zNfXr1/P9OnTWbx4MXFxcRQUFHDnnXces/2PP/7Ihg0b6Ny5M2PGjOGjjz46rJzx0banlGpealw1PLnsSTo6OnJ9r+bTadMe/gl8++23XHPNNcTFWX/B27Q5/gUVAwcOpGvXrgQGBjJ+/HgWLlx4WttTSvme9za/R1ZRFndn3E1oYKi3w2m0ZtXDP15P3JMalkZ2uVxUV1fXv3bkWXpfP2uvlDo5BZUFvLT6JYa0G8KIpBEnfoMP0R7+CYwYMYIPPviA/Px8AAoKCg4rjTx37lycTmd9+x9//JHt27fjcrl47733GDZs2Am3p5RqPl5Y+QIVzgruH3h/s+vQNasevjf07t2bKVOmcN555xEYGEi/fv14/PHHufzyy0lLS2PMmDGHLV4yYMAAJk2aRFZWFueffz5XXHHFCbc3a9YsD/9WSqlTsT5vPR9lfsT1va6na+uu3g7npHmkPHJjNYfyyM2ZHkulTp3LuLjhixvILsnm0ys+JSokytshAb5ZHlkppZq1uVvnsiZ3Dbel3+Yzyf5kacJXSqkTOFh9kGdXPEtafBqXp1zu7XBOWbMYwzfGNLuTI77Gl4bulGpu/rbqbxRVFfHKyFcIkObbT/b5yMPCwsjPz9eEdRqMMeTn5xMWFubtUJRqdjbmb+S9ze8xrsc4zoht3ufAfL6H37FjR7Kzs8nNzfV2KM1aWFgYHTt29HYYSjUrLuPi0aWP0jq0NZP6TfJ2OKfN5xN+cHAwXbp08XYYSik/9H9Z/8ea3DU8cvYjtApp5e1wTpvPD+kopZQ3FFYW8syKZ0hPSOeybpd5O5wmoQlfKaWO4tkVz1JaXcq0wdOa9Ynahuxe8aq1iHwoIptEZKOIDLFzf0op1RRW5azi46yPub739aTEpHg7nCZj9xj+88B/jDFXi0gIEGHz/pRS6rQ4XU4e/uFh2kW249a+t3o7nCZlW8IXkWjgXOAmAGNMNVB9vPcopZS3vb3hbbKKsnj+/OeJCG5ZfVQ7h3S6ALnA30VklYi87l7U/DAiMkFElovIcp16qZTypuySbGasnsGITiOaXenjxrAz4QcB6cAMY0w/oAy4/8hGxpiZxpgMY0xGfHy8jeEopdSxGWOYvmQ6gQGB/HHQH70dji3sTPjZQLYxZqn78YdYfwCUUsrnfLH9CxbtXcTkfpNpG9nW2+HYwraEb4zZD+wWkR7upy4ANti1P6WUOlXFVcU8vuxx+sT24doe13o7HNvYPUtnMvCOe4bONuB/bN6fUkqdtGdWPENxVTGvjnqVwIBAb4djG1sTvjFmNdCowvxKKeUNS/ct5aPMj7i5z830bNPT2+HYqmVcPqaUUqegoqaCP//wZ5Kikvhd2u+8HY7tfL54mlJK2WXGmhnsLtnNGxe+QVhQyy8frj18pZRf2pC/gbfWv8VVqVcxsN1Ab4fjEZrwlVJ+x1nrZNqiabQJa8OdGXd6OxyP0SEdpZTfeX3d62wp3MIL57/QIurcN5b28JVSfmVL4RZmrp3J2C5jOT/pfG+H41Ga8JVSfqPGVcODix6kVUgr/jiwZZZPOB5N+EopvzF7/WzW56/nj4P+SExYjLfD8ThN+Eopv5BVmMVLq19iZNJIRnce7e1wvEITvlKqxXO6nExZNAVHsIOpg6ciIt4OySt0lo5SqsX7+7q/syF/A0+f9zSx4bHeDsdrtIevlGrRNhdsZsaaGYxNHsuFyRd6Oxyv0oSvlGqxqmureWDhA0SHRPPAoAe8HY7X6ZCOUqrFmrFmBlsKt/DiiBdpHdba2+F4nfbwlVIt0uqc1by57k2uTL2S4Z2Gezscn2BrD19EdgAlQC1QY4zR2vhKKduVO8t5YOEDtItsxz0Z93g7HJ/hiSGd840xeR7Yj1JKAfD08qfJLsnmjdFv4AhxeDscn6FDOkqpFuW73d/x/pb3ubH3jQxoO8Db4fgUuxO+Ab4UkRUiMuFoDURkgogsF5Hlubm5NoejlGrJ8iryeHDxg/SI6cHkfpO9HY7PsXtIZ5gxZo+IJABficgmY8yChg2MMTOBmQAZGRnG5niUUi2UMYY/Lf4TpdWlvHHhG4QEhng7JJ9jaw/fGLPH/TMH+Bjwj2VllFIe9/7m91mQvYA7M+4kJSbF2+H4JNsSvohEikhU3X3gQmCdXftTSvmvrUVbeXL5kwxtP5TxPcd7OxyfZeeQTiLwsbtIURDwrjHmPzbuTynlh6pqq7hnwT1EBkfy6LBHCRCdi3IstiV8Y8w2IM2u7SulFMAzy58hszCTly54ibjwOG+H49P0T6FSqtlakL2Adze9y3VnXMe5Hc/1djg+TxO+UqpZOlB2gCkLp9Ajpge397/d2+E0C5rwlVLNTq2rlvu/v5+q2iqeOO8JQgNDvR1Ss6AJXynV7Ly69lWWH1jO1MFT6Rrd1dvhNBua8JVSzcqy/ct4de2rXNbtMi7rdpm3w2lWjjlLR0SuPN4bjTEfNX04Sil1bPkV+dy34D6SopKYMmiKt8Npdo43LfPS47xmAE34SimPqRu3P1h9kBkjZxARHOHtkJqdYyZ8Y8z/eDIQpZQ6nplrZ7Jk3xL+PPTP9GjTw9vhNEsnHMMXkUQReUNEvnA/7iUi/2t/aEopZflh7w/MWDODy7pdxhUpV3g7nGarMSdtZwH/Bdq7H28BdNKrUsoj9pft5/7v76drdFemDJqCu1yLOgWNSfhxxpj3AReAMaYGa8lCpZSylbPWyV3f3UVlTSXPDH9Gx+1PU2Nq6ZSJSCzWiVpEZDBQbGtUSikFPLn8SdbmruWp856ia2udb3+6GpPw7wTmAt1EZBEQD1xta1RKKb/32bbPmLNpDtf3up7RyaO9HU6LcMKEb4xZKSLnAT0AATYbY5y2R6aU8lubCzbz8A8Pk56Qzh397/B2OC3GCRO+iIQBE4FhWMM634vIK8aYSruDU0r5n+KqYm6fdzuOYAdPnfcUwQHB3g6pxWjMkM5bQAnwovvxr4C3gWvsCkop5Z9qXbXc9/197C/fz99H/534iHhvh9SiNCbh9zHG9GrweJ6IbGjsDkQkEFgO7DHGXHKyASql/MdLq19i0Z5FTBs8jbMSzvJ2OC1OY6ZlrnTPzAFARAZhJfDGug3YeLKBKaX8y5c7vuS1n17jytQruaa7DiDY4ZgJX0R+EpG1QH9gsYjsEJHtwA9ARmM2LiIdgYuB15siWKVUy7SpYBNTF00lLT7Nvy6uMgZWzIb/+71Hdne8IZ2mGH55DrgXiDpWAxGZAEwASEpKaoJdKqWak4LKAm779jaiQqJ47vznCAkM8XZInpG/FT69DXZ8D52HQXUZhETausvjFU/b2fCxiCQAYY3dsIhcAuQYY1aIyPDj7GcmMBMgIyPDNHb7Sqnmz1nr5M75d5Jfmc/sMbP9YxHy2hpY8jLM+wsEBsMlz0H6jRBg//IkjZmWeRnwNFYtnRygM9aYfO8TvPVs4DIRuQjrD0UrEfmHMea60wtZKdUSGGOYvnQ6Kw6s4LFzHqN33IlSSguwdzV8+gfYtwZ6XAQXPw2t2p/4fU2kMX9SHgEGA1uMMV2AC4AlJ3qTMeaPxpiOxphk4FrgW032Sqk6b294m48yP+KWM2/h4q4Xezsce1WXw5fT4LURULIfrpkN177r0WQPjZuW6TTG5ItIgIgEGGPmichztkemlGqxFmQv4OkVTzOq8ygm9Zvk7XDslfUNfHYHFO2E9Btg1MMQHuOVUBqT8ItExAEsAN4RkRyg7GR2YoyZD8w/6eiUUi3OlsIt3LvgXnrE9GD62dMJkBa6tHZpLnw5Bda+B7GpcNO/IXmYV0NqTMK/HKgE7gB+DUQDD9sZlFKqZcotz+X33/yeyKBIXhjxQsssd2wMrHrbGsKpLoNz74Vz7oLgRs95sU1jiqc17M3PtjEWpVQLVu4sZ9K3kyiuKmb2mNm0jWzr7ZCaXu5ma/hm5yJIGgqXPgfxvrMc4zETvoiU4K6Bf+RLgDHGtLItKqVUi1JXI2dTwSZeOP8Fzog9w9shNS1nBSx4Eha9YM2lv/QF6He9R6ZanozjzcM/5sVSSinVWMYYHl/2OPN3z+f+gfdzXqfzvB1S08r8Cj6/Gwp3QNp4GPUIOHyz6FtjxvCVUuqUzV4/mzmb5nBDrxv49Rm/9nY4Tad4D/znftg41zope+On0OVcb0d1XJrwlVK2+WL7Fzy94mlGJ4/mroy7vB1O06h1wtJXYP5j4KqBEdNg6B8gyPdLQmjCV0rZYtn+ZUxZOIX0hHQeHfZoy5h+uXMx/PsuyNkA3cfAmMegTRdvR9VojSmtMBn4hzGm0APxKKVagM0Fm/nDt38gKSqJF0a8QGhgqLdDOj0lB+CrB2HtPyE6Ca6dAz0v8nZUJ60xPfxEYJmIrATeBP5rjNEiZ0qpo8ouyebWr28lMjiSV0a9QnRotLdDOnW1NbDsNavQWU0lnHO3Nac+pHleP9CYefhTRWQacCHwP8DfROR94A1jzFa7A1RKNR/5Ffnc+vWtVNdW89bYt5r3XPsdC+Hze6zhm24XwEVPQmw3b0d1Who1hm+MMSKyH9gP1AAxwIci8pUx5l47A1RKNQ8l1SX87uvfcaDsAK9d+BrdWjfT5Fi8B76aBuv+ZQ3fjPsH9LwEWsCiLI0Zw78NuAHIw1q56h5jjFNEAoBMrAVOlFJ+rLKmkknfTCKzMJPnRzzfPNejramCH/4GC54CVy2cdz8Mux2Cw70dWZNpTA+/DXDlkQuiGGNc7kVOlFJ+zOlyctd3d7EqZxWPn/s453b07bnoP2MMbPkP/OePULjd6s2PfhRikr0dWZNrzBj+n47zmi5OrpQfq3XVMuX7KSzIXsC0wdMY22Wst0M6ObmbrYuntn4Lcd3huo8g5QJvR2UbnYevlDolLuPioR8e4osdX3BH/zv4ZY9fejukxqsohPmPWzNwgiOt+fQDfmMtOdiCacJXSp00YwyP/fgYn2R9wq1pt3Jzn5u9HVLj1NbAir/DvEehsthakGTENIj0g7V0sTHhi0gY1qIpoe79fHi84SGlVPNgjOHZFc8yZ9Mcbux1IxPTJno7pMbJ+hr+OwVyN0HyOVavvm0fb0flUXb28KuAEcaYUhEJBhaKyBfGmBOuh6uU8k3GGF5Y9QJ/X/93xvUYx10ZdyG+Pl0xd7OV6LO+gpguLWqa5cmyLeG7r8YtdT8Mdt/0Cl2lmrGX17zM6z+9ztXdr+aBQQ/4drIvy4P5f4Xlf4cQB1w4HQZOgKBmXubhNNg6hi8igcAKIAV4yRiz9ChtJgATAJKSkuwMRyl1GmasmcEra17hipQrmDZ4mu8WQ3NWWtUsv3/aWmJwwP9ac+ojY70dmdfZmvCNMbXAWSLSGvhYRPoYY9Yd0WYmMBMgIyNDvwEo5WOMMby85mVeWfMKl3W7jIeGPuSbyd7lsq6O/ebPULzbqmY56hGI7+7tyHyGR2bpGGOKRGQeMAZYd6L2SinfYIzhxVUv8tpPr/GLlF/w0BAfTfbbF1jVLPeugnZp8IuXfX4xEm+wc5ZOPOB0J/twYBTwuF37U0o1LWMMz618jjfXvclVqVfx4JAHfS/ZH9gAXz8Emf+FVh3hilfhzF/63FqyvsLOHn47YLZ7HD8AeN8Y85mN+1NKNZG6dWjf2fgO13S/hqmDp/pWsi/Ohnl/hTXvQkgUjPwzDLoVgsO8HZlPs3OWzlqgn13bV0rZo9ZVyyNLHuFfmf/iujOu494B9/rObJyKQlj4LCx9FYwLBk+06tNHtPF2ZM2CXmmrlKrndDmZtmga/972b2458xYm95vsG8m+utyaebPoOag8CH3HwYgp0Fpn9p0MTfhKKQCqaqu4e/7dzM+ezx/6/YFb+t7i7ZCsBcNXvgULnoSSfdbMmxHT/O4K2aaiCV8pRZmzjMnfTmb5/uVMGTSFa3te692AXC5Y/xF8O90qWdxpMFz9JnQe6t24mjlN+Er5uYLKAiZ+PZFNBZv46zl/5eKuF3svGGNg8xdWcbMD6yCxD/zqfUi90C9LITQ1TfhK+bG6Bcf3l+3n+fOf57xO53knEGNg23yrR79nObTpCle9Ab2v1CmWTUgTvlJ+anPB5voFx1+/8HXvLUu48wcr0e9caM2lv/R5OOvXLb42vTdowlfKDy3dt5Tb591OZHAkb419yzsLju9eZg3dbJsHjkQY+yT0v9Gvi5vZTRO+Un7m062f8uDiB0lulcyMkTNoG9nWswHsWQHzH4PMLyEi1qp3M+A3EBLh2Tj8kCZ8pfyEMYbXf3qdF1a9wMC2A3n2/GdpFdLKcwHsXWUl+i3/gfAYuOBPVrniUIfnYvBzmvCV8gPOWicPL3mYT7I+4eKuF/Mb/T7TAAAe4ElEQVTI0EcI9tQY+Z4V8N0ThxL9iGlWog/z4B8bBWjCV6rFK64q5o75d7Bs/zJuTbuViWkTPXP17O4frUSf9ZU70U+Fgb/VRO9FmvCVasF2HtzJpG8msad0D38Z9hcu7Xap/TvdsdBK9Nu/g/A27qGbWyA0yv59q+PShK9UC7V472Lu/u5uAiWQ1y58jf6J/e3bmTGw9RtY8DTsWgyRCdaSghk3Q0ikfftVJ0UTvlItjDGGOZvm8MSyJ+gS3YUXR7xIx6iO9uzM5YLNn8P3T1knZVt1gDGPW9Mrg8Pt2ac6ZZrwlWpBqmureXTpo3yU+RHDOw7nsXMfIzLYhh52bQ2s+9AqVZy7CWKS4dIXIO1anUfvw+xc8aoT8BaQCBhgpjHmebv2p5S/O1B2gDvn38navLXccuYt/P6s3xMYENi0O6kuh9XvwOIXoGgXJPSySiD0+gUEav/R19n5L1QD3GWMWSkiUcAKEfnKGLPBxn0q5ZdWHljJXd/dRZmzjGeHP8vIziObdgflBbD8DVjyCpTnQceBMPYJSB2ttW6aETtXvNoH7HPfLxGRjUAHQBO+Uk3EGMPbG97mmRXP0MHRgZmjZpIak9p0OyjaDUtehhWzwVkGKaPgnDshaYhWr2wCzloXO/PLyCutZnDXWNv355HvYCKSjLXc4dKjvDYBmACQlKSr1yjVWGXOMh5c9CBf7vySEZ1GMH3YdKJCmmjq4761sPhFWPcvK7H3uRrO/gMk9m6a7fuZSmct23LLyMotJetACVm5pWQeKGV7Xhk1LkNsZAgrpo2yPQ7bE76IOIB/AbcbYw4e+boxZiYwEyAjI8PYHY9SLcHmgs3c9d1d7C7Zze3pt3Nzn5tP/2IqYyDrG/jhRatUcYjDWhh88O+gdacmibulK62qISunlKycUjJzSsg6UEpWbim7Csox7uwWINA5NpJu8Q5G9kokNcFBakIUxhjbL4izNeGLSDBWsn/HGPORnftSyh8YY/gw80MeW/oY0aHRvH7h6wxoO+D0NuqshJ8+gB9egtyN4GhrXSyVcTOEt26awFuYwrLq+l56fXLPKWVfcWV9m5DAALrERdKnfTS/OKsDKQkOUhIcdImLJCy4iU+mN5Kds3QEeAPYaIx5xq79KOUvSqpLeOSHR/hixxcMaTeEv57zV2LDT2PctzTXOhG77HUoy7VWl/rFK9DnKggKabrAmyljDDklVVZCdw/D1PXe80qr69uFBwfSLSGSwV1j65N6aoKDpDYRBAX61gltO3v4ZwPXAz+JyGr3cw8YYz63cZ9KtUhrctdw34L72F+2n8n9JvObM39DgJxiMtn/kzXb5qcPoLbKWj5wyO+hy3l+eSLW5TLsKapwj68f6q1n5pRSUllT3y4qLIiUBAcjeiaQmhBVn9w7tA4nIKB5HDc7Z+ksBJrHUVDKR9W6anlz3Zu8tPolEiMSmTVm1qmtTOWqtdaKXfoK7PgegiOg33XWGH1896YP3AfV1LrYWVBe30tveKtw1ta3i3OE0C3eweVnta9P7KkJDuKjQj1TdM5GeqWEUj4quySbBxY+wKqcVYxJHsO0IdNOvn59eQGs+gf8+BoU74LoTjDqYUi/wapg2QJV1dSyPa+sfny9box9e14ZztpD80LaR4fRLcHB+IFJhw3FxES23OEsTfhK+RhjDHO3zuWvP/4VQfjLsL9wSddLTq53uW8t/DjTGrapqYTOw2D0o9DjohZzRWx5dQ1bc8oOG4LJyillZ34ZrgYzYpLaRJCS4OD8BkMx3eIjiQrzvzVzW8a/vFItRF5FHg//8DDzds8jPSGdv5zzFzo4OjTuzTXVsOH/YNlrsHspBIVbtW0G3AJt+9gbuI2Ky51k5ZaQeeBQUs/KKWVPUUV9m6AAoUtcJD3bRnFp33akJEaREu+ga7z3ZsT4Ik34SvkAYwz/3flfHl3yKOXOcu7JuIfrel3XuBOzhTth5WxY+ZY126ZNV7jwUej362YzbGOMIa+0msycErY26K1n5pSSW1JV3y4sOIBu8Q4ykmMYn9Cpfiimc2wkwT42I8YXacJXysvyKvJ4dMmjfL3ra/rE9uHRYY/StXXX47/JVQuZX8HyN63FwEWsujYDfwNdR/hsfRtjDHuLK61pjg1PnOaWUlTurG8XFRpEtwQHw7vH1yf17olRzWpGjC/ShK+Ulxhj+HTbpzz+4+NU1lRyR/87uKHXDQQFHOe/5cG91knYFbPhYLa10Mg5d0H/m3zqathal2F3QTmZDS5KqruVVx+aEdMmMoSUeAcXndmO1PoTp1Ektmr+M2J8kSZ8pbxgd8luHvnhEX7Y9wNnxZ/Fw2c/TJfoLkdv7KqFrK9hxSxrIXDjgq7DYcxf3CdhvXfysbrGxY78MvfFSYeS+7a8MqprXPXt2rYKIyXBwS8zOpGa6CAl3krusQ6tne9JmvCV8iCny8lb69/ilTWvEBgQyJRBU/hlj18efay+cIfVm1/1DpTshch4OPs26Hc9xHbzaNwV1bVszT00xTHTXSNmZ345te4pMSLQMSaclHgH5zYYiklJcNDKD2fE+CJN+Ep5yPL9y5m+ZDpbi7cyotMI/jjoj7SNbHt4I2clbPoMVr1tFTBDIGUkjH0Muo+1veTBwUqnNfTiTuiZB0rIdM+IqSv+FRQgdI6NIDXBwUV92tUn9W7xDsJDdEaML9OEr5TN8iryeHbFs8zdOpf2ke15ccSLDO80/FADY2DvSlj9rjVvvrIYWifB8AfgrF/ZMjafX1rlHl8vdc+KsXrtOQ1mxIQEBdA1LpKzOrXmlxmHZsQkx0YSEuSbJ4XV8WnCV8omTpeTdze+y4w1M6iqreI3Z/6GCX0nEB7kXty7ZD+sfd9K9LkbISgMel4C6ddD8rmnPdPGGMP+g5UNxtcPJffCBjNiIkMCSUlwcE5qfP3VpikJDjq1iSBQZ8S0KJrwlbLBwj0LeXLZk2wr3sY5Hc7hvoH30blVZ3BWWIuKrJ4DW7+xTsB2yIBLnoXeV55SOeJalyG7sLx+XD2rQc+9tOpQ8a/WEcGkxDsY06ctKQ2Kf7WPDtMZMX5CE75STWhb0TaeXP4kC/csJCkqyRq+6XAu7FwE856ADXOh6iC06gjD7oC08RDXuCUJq2us5fAalhHIyilla24pVQ1mxCREhZKS4OCq9Loa7FZyj3OEaGL3c5rwlWoCeRV5vLz6ZT7K/IiIoAjuybiH8TF9CV7/EXwwEQ7usVaQ6nU59B0Hyeccc8imbjm8hvPXM3NK2eFeDq9Ox5hwUhMcnJ0SS2pCFN3cPfbocJ0Ro45OE75Sp6HMWcbs9bOZtX4Wzlon45LH8tvaSNrMf9kalw8Igm4XWBUqe1wEIRH17y2pmxGTU3pYYt9dePhyeMmxkXRLcDCqVyLdEx2kxEfRLSGSiBD976tOjn5ilDoFVbVVvLfpPV7/6XUKqwoZ5ejK7QVFJH37stUgaQhc9BT0voJCWllDMKvyDuu1H7kcXtf4SM7sGM2V7qGY1IQokuMiCA3SqY6qadi5xOGbwCVAjjGm+ZbqU6oBZ62Tj7M+5rU1r7K/IofBJpTb9h6gT/UunAlnsv2se1nmGM6aklZkripl639XkV92aDm8iJBAusU76pfDS01wkJoYRaeYcJ9bDk+1PHb28GcBfwPesnEfSnmE0+Xk0/Xv8OrameytKaFvZRXTC4vobNoxL/hqHqjKYO2uRNgFUESrsFJSEhyMPCOR1ESHNb4e37yWw1Mtj51LHC4QkWS7tq+UnZy1Lnbml7N92wZ+3PIS39SsIjfQ0Keqipvzg9hXNoSptUMpjOxGaqyDtFQHV9XNYU90EO/Q4l/K93h9DF9EJgATAJKSkrwcjfI3lU73cnj1J05LqNy3idSD30L0cr5uXU1OUBA9nHBhWR9aJfwK11lpnJ3g4KYEB60jWu5yeKrl8XrCN8bMBGYCZGRkmBM0V+qUlFXVsDW3tP7iJGu90xJ2FZRjjIs02cbowOX8NnQl30eVMCfZwcHAQNKC2jO1728Z3uca7bGrZs/rCV+pplRUXl0/vbH+ytMDJextMCMmOFDoERvEL1ttYZhjGd2LFrLXVcTs6GhujnLgJJoR7YZyc7/f0ze+rxd/G6WaliZ81ewYY8gtrTo0d/3AoTnseaWHL4eXkuBgYJc2pCQ46B1VTu/SH4jbO4+A7d/hOljBoqgYbu/YgUWuCEIDQ7i82+Vc3+v6Y9emV6oZs3Na5hxgOBAnItnAn4wxb9i1P9XyuFyGvcUV9XVhGpYUKK5osBxeWBApCQ7O7xFPaqI1fz0lwUGHVsEE7F0BmZ/Cli9h/08AHIxJYm7Pc/hnbT47K3OJDw1lYo+JjOsxjjZhbbz16yplOztn6Yy3a9uqZal1GXYVlFvrnOZatdgz3TViGi6HFxsZQrcEB5f0rVsOz0rshy2HV7Ifsr6Cb76CrfOgsggkEJM0iPXDJvGBlPD5vh+oLNtE3/i+PD7gbkZ1HkWwF1eNUspTdEhHeUxVTS078sp/tsbpkcvhtYu2lsMbN6BTfW89JcFBm8ijzIhxVloLhWybB1nfwIF11vOOROh5MQe7DOPfUsG/dnzB5j1zCQ8K5+KuFzOuxzjOiD3DM7+4Uj5CE75qcuXVNWzNKSMrt6R+fD0rp5SdBYcvh9cpxlo16bwjlsOLOt5yeC4XHPjJneTnw87FUFMJAcGQNBhGPkRttxEsqS3h/7bO5Zu1T1PtquaMNmcwddBULup6EVEhUZ44DEr5HE346pQVV1jFv+pXTHIn9uzCivo2QQFCclwk3ROjuLhvu/ql8FISHIQFN6JGjDGQvxW2z4ftC2D791BRYL0Wfwb0/x/oNgKTNITN5Xv4bOtnfPH9neRU5BAdGs1V3a/i8pTL6R3b256DoFQzoglfHZcxhvyy6sOmONYl9obL4YUGBdA13kG/pBjGNVwOLy6S4JOpEVOX4HcuhB3uW8k+67VWHaD7aOg6HLqcB63asaN4B//Z8R/+85+X2Vq8laCAIGvBka73MbzTcEIC9cIopepowleAldj3FVcesbiGldyLGiyH5wgNopt7ObzURKs+zGkth+dyWWWEdy6GXT/AjkVQut96LTIBupwDycOsBN+mK4iwvXg7X+/4jK92fsXGgo0ApCekM3XQVEYnj6Z12MmvGqWUP9CE72dqXYbdBeWHJ/bcny+HFxMRTEqCg7F92rkrOlqJvW2r01wOz1kJe1fB7iWwy32rLLJei2pnJffks6HzMGslKBFcxsWG/A18u+pF5u2eR1ZRFgBp8WncnXE3o5NH0zay7ekcFqX8gib8FqpuObzMI+avb80tPWxGTGKrBsvhJUaREu+ge6KDWEdo0wRSvAeyl1m33Uth72pwub8xxKbCGZdA57Ot+vExydbZXKDcWc6S3fNYkL2A77O/J6cih0AJJD0xnfsH3s8FSRdoklfqJGnCb+YqqmvZmmsl8rqrTjNzStiZX/6z5fBSEhyckxpHSrzDnuXwqkqshL5nBexZDtkroGSv9VpgKHRIhyG/h04DodNgiIytf6sxhqyiLBbvXczCPQtZcWAFTpeTyOBIhrYfyvBOwzm3w7k6XKPUadCE30zULYfXcPHqzJwSsgsr6pfDCwwQOsdGkBLvYEyfttaJU7uWw6sut+a8711tDdHsXQm5mwF3MDFdoPNQK7l3zIDEMyHo8BOoe0v38uP+H1mybwlL9y0lryIPgJTWKYzvOZ5zO55LekK6XhSlVBPRhO9j8utqxLgrOtZVeNx/sMFyeEEBdI2LJK1ja65Obzgjxqbl8CqKrOS+by3sXwv71ljJ3bivgo2Mh/bp0PsK62eH/of13sHqwe86uJOVB1ay4sAKlh9Yzp7SPQC0CWvDoHaDGNxuMEPbD9WhGqVsognfC4wxHDhYVX/FaWaOVU4gK7eUgiOWw0tJcDC0WywpDWrEJJ3qjJgTcdVC4Q4ruR9YD/vXWRc5Fe061MbRFtr1hZ4XQ7uzoH0/aNW+fuy9TmVNJRsLNrI6ZzVrctewJndNfQ++dWhrMhIzuL7X9QxoO4DU1qlaelgpD9CEbyOXy7CnqOJQYq+rEZNTSkmDGTHR4cGkJji4sFfiYVecto+2aTk8lwuKd0PuJsjZ6P65weq117i/SUiAdVK1Q4Z1cVPbvtD2TIhK/NnmqmurySzKZGP+Rtbnr2dd3joyCzOpdX8D6BTVicHtBpOemE56QjpdorsQILp+q1Kepgm/CdQth5fVsMfunhFT6Tw0IyY+KpSUeAdXpHcgNeHQiVPblsOrLoeCbZC3BfKzrJ+5m637zvJD7RxtIeEMGPAbiO8JbftYP4PDf7bJ/Ip8soqy2Fywmc2Fm9lSuIWsoixqXNYfsKjgKPrE9eHmPjfTJ64PafFpxIbH/mw7SinP04R/EiqdtWzLLau/4rRunH17XtlhM2I6tA6nW4KDwV1j3VUdreGY6AgbTj5Wl1nDMAXbreResA0KtkL+NjiYfXjb6E4Q192a6x7X3UrqCT0hPOawZsYYDpQfYEfeGrYXb2dr0Va2F28nqyiLgsqC+nZx4XH0iOnB0F5D6RXbi15tetEhqoP23pXyUZrwj6K0quaI+utWz31XQTl1eT1AoHNsJN3iHYzslVif2LvFO4gMbcLDWl0OxdlQvAuKdltDMYU7oWin9bMs5/D24W2sK1KTh0FsN+t+XHeITYGQiPpmTpeT/aX7yS7cxJ7de9hdspvdJbvZdXAXu0p2UVFzqB6OI9hB1+iunNfxPFJjUklpnUJqTCpx4XFN93sqpWzn1wm/sKy6wfqm1jTHrTmlP1sOr0tcJL3at+LyszrUj693iYtsXPGvYzEGKgqh9IBVK6ZkPxzc2+C2x0r0FQWHv08CIbojxHS26sq06WJdsBTTxbofHoOz1kleRR45FTnkludyoOgnDuz5mv3l+9lXuo99ZfvIrcjFZQ4NNwUFBNHR0ZFOUZ0Y0HYAXaK70LlVZ7pEdyE+PF5PqirVAtia8EVkDPA8EAi8box5zM79HY0xhtySqgZXnB6qxZ5XemhGTHiwNSNmUNfYw06cdm4TQVBjin8ZA9WlUF4A5flWoi7Lh/I8KMuDslzrVprj/nkAaqt/vp3wNlaRsFbtoGMGJqo95VGJFEfGcDAsiqLAYIpqSiiuLKawqpDCykIK8pdQsOdz8ivyyavMo7iq+GebDQkIISEigfaO9gxqN4h2ke3o4OhAx6iOdHB0IDEikcAAG6Z0KqV8hp1LHAYCLwGjgGxgmYjMNcZssGN/dTNiDq2YdOgEaknloRkxUWFBpCY4uKBHLD3iQkmNDaZr6yDaRUBAbSU4K8C5131hURnsKrXGyatLoeogVB6EqhJMRRE1VcU4K4upqSzCWXWQGlNLtYBTBKcI1QiVAUK1BFIZHk1VWCuqwqKoiO5GRXAfKoLDqAgKoSwwkPKAAMqNi9LaCsqcZZRWl1JSuozSgtL62S5HExUcRUxYDG3C2tAlugsZbTOIDY8lITyB+Ih44sPjSYxMJCY0RnvpSvk5O3v4A4EsY8w2ABH5J3A50KQJv6bWxdVvnEW1WMMTdadOBYOEQOeOcCjNGQwGJ4b1xbCuGMxW6z0GwQAuAZd7O7ViPVcLuESoRagV6n8S4r61igAOjY8fWwWYCqjMgUoQhPCgcCKCI4gIiiAiOAJHsIO2EW2JbB1JVHAUUSHWLTo0muiQaFqFtqJ1aOv6m16FqpRqLDsTfgdgd4PH2cCgIxuJyARgAkBSUtJJ7yQoMIB4HBhcBAUGEBQQQHBggHv+uiAS4L4oSEACEPcNCYCAQEQCkYBAAgKCEPctMDAECQxGAoIJDAolIDCUwKBQAgNCCAwIJFACCQoIqr8FBwTX3w8JCCEkMOTQT/ctLDCM0KBQwgLDCA8KJzwonNBAm6ZjKqXUUXj9pK0xZiYwEyAjI8OcoPlRvTZhcZPGpJRSLZGdE6b3AJ0aPO7ofk4ppZQX2JnwlwGpItJFREKAa4G5Nu5PKaXUcdg2pGOMqRGRScB/saZlvmmMWW/X/pRSSh2frWP4xpjPgc/t3IdSSqnG0aInSinlJzThK6WUn9CEr5RSfkITvlJK+Qkx5pSudbKFiOQCO0/x7XFAXhOG01Q0rpOjcZ0cjevktMS4Ohtj4hvT0KcS/ukQkeXGmAxvx3EkjevkaFwnR+M6Of4elw7pKKWUn9CEr5RSfqIlJfyZ3g7gGDSuk6NxnRyN6+T4dVwtZgxfKaXU8bWkHr5SSqnj0ISvlFJ+wucTvoiMEZHNIpIlIvcf5fVQEXnP/fpSEUlu8Nof3c9vFpHRHo7rThHZICJrReQbEenc4LVaEVntvjVpyehGxHWTiOQ22P9vGrx2o4hkum83ejiuZxvEtEVEihq8ZufxelNEckRk3TFeFxF5wR33WhFJb/CancfrRHH92h3PTyKyWETSGry2w/38ahFZ7uG4hotIcYN/rwcbvHbcz4DNcd3TIKZ17s9UG/drdh6vTiIyz50L1ovIbUdp47nPmDHGZ29YZZW3Al2xVo9dA/Q6os1E4BX3/WuB99z3e7nbhwJd3NsJ9GBc5wMR7vu/q4vL/bjUi8frJuBvR3lvG2Cb+2eM+36Mp+I6ov1krHLath4v97bPBdKBdcd4/SLgC6ylkQcDS+0+Xo2Ma2jd/oCxdXG5H+8A4rx0vIYDn53uZ6Cp4zqi7aXAtx46Xu2AdPf9KGDLUf5Peuwz5us9/PqF0I0x1UDdQugNXQ7Mdt//ELhARMT9/D+NMVXGmO1Alnt7HonLGDPPGFPufrgEa8UvuzXmeB3LaOArY0yBMaYQ+AoY46W4xgNzmmjfx2WMWQAUHKfJ5cBbxrIEaC0i7bD3eJ0wLmPMYvd+wXOfr8Ycr2M5nc9mU8flyc/XPmPMSvf9EmAj1nrfDXnsM+brCf9oC6EfebDq2xhjaoBiILaR77Uzrob+F+sveJ0wEVkuIktE5BdNFNPJxHWV+6vjhyJStwylTxwv99BXF+DbBk/bdbwa41ix23m8TtaRny8DfCkiK0RkghfiGSIia0TkCxHp7X7OJ46XiERgJc1/NXjaI8dLrOHmfsDSI17y2GfM64uYt3Qich2QAZzX4OnOxpg9ItIV+FZEfjLGbPVQSJ8Cc4wxVSLyW6xvRyM8tO/GuBb40BhT2+A5bx4vnyYi52Ml/GENnh7mPl4JwFcissndA/aElVj/XqUichHwCZDqoX03xqXAImNMw28Dth8vEXFg/ZG53RhzsCm3fTJ8vYffmIXQ69uISBAQDeQ38r12xoWIjASmAJcZY6rqnjfG7HH/3AbMx/qr75G4jDH5DWJ5Hejf2PfaGVcD13LE120bj1djHCt2O49Xo4hIX6x/w8uNMfl1zzc4XjnAxzTdUOYJGWMOGmNK3fc/B4JFJA4fOF5ux/t82XK8RCQYK9m/Y4z56ChNPPcZs+NERVPdsL6BbMP6il93oqf3EW1+z+Enbd933+/N4Sdtt9F0J20bE1c/rJNUqUc8HwOEuu/HAZk00cmrRsbVrsH9K4Al5tAJou3u+GLc99t4Ki53u55YJ9DEE8erwT6SOfZJyIs5/ITaj3Yfr0bGlYR1XmroEc9HAlEN7i8GxngwrrZ1/35YiXOX+9g16jNgV1zu16OxxvkjPXW83L/7W8Bzx2njsc9Ykx1su25YZ7C3YCXPKe7nHsbqNQOEAR+4P/w/Al0bvHeK+32bgbEejutr4ACw2n2b635+KPCT+wP/E/C/Ho7rr8B69/7nAT0bvPdm93HMAv7Hk3G5Hz8EPHbE++w+XnOAfYATa4z0f4FbgVvdrwvwkjvun4AMDx2vE8X1OlDY4PO13P18V/exWuP+d57i4bgmNfh8LaHBH6SjfQY8FZe7zU1YEzkavs/u4zUM6xzB2gb/Vhd56zOmpRWUUspP+PoYvlJKqSaiCV8ppfyEJnyllPITmvCVUspPaMJXSik/oQlfKTcRKT3B68nHqsZ4nPfMEpGrTy8ypZqGJnyllPITmvBViyciA9zF4sJEJNJdl7zPcdo7xFrDYKW7TnrDqo5BIvKOiGx0F5+LcL+nv4h85y7A9V93tUOlfIpeeKX8gohMx7oqOxzINsb89ShtSo0xDndNpghjzEF3HZglWAXAOmNd3j7MGLNIRN4ENgDPA99h1bTJFZFxwGhjzM0iMgurPvyHnvg9lToerZap/MXDwDKgEvjDCdoK8BcRORdwYZWkTXS/ttsYs8h9/x/ubf0H6INVaRGsxT72NWn0SjUBTfjKX8QCDiAYq6dfdpy2vwbigf7GGKeI7HC/B6y6KA0ZrD8Q640xQ5o0YqWamI7hK3/xKjANeAd4/ARto4Ecd7I/H2sop06SiNQl9l8BC7GK88XXPS8iwQ0W/lDKZ2jCVy2eiNwAOI0x7wKPAQNE5HiLvrwDZIjIT8ANwKYGr20Gfi8iG7FK1s4w1pJ9VwOPi8garIqIQ234VZQ6LXrSViml/IT28JVSyk9owldKKT+hCV8ppfyEJnyllPITmvCVUspPaMJXSik/oQlfKaX8xP8DHvi0lgpXKowAAAAASUVORK5CYII=\n", 21 | "text/plain": [ 22 | "
" 23 | ] 24 | }, 25 | "metadata": { 26 | "needs_background": "light" 27 | }, 28 | "output_type": "display_data" 29 | } 30 | ], 31 | "source": [ 32 | "fig = plt.figure() # an empty figure with no axes\n", 33 | "# fig.suptitle('No axes on this figure')\n", 34 | "x = np.linspace(0, 2, 100)\n", 35 | "\n", 36 | "plt.plot(x, x, label='linear')\n", 37 | "plt.plot(x, x**2, label='quadratic')\n", 38 | "plt.plot(x, x**3, label='cubic')\n", 39 | "\n", 40 | "plt.xlabel('x label')\n", 41 | "plt.ylabel('y label')\n", 42 | "\n", 43 | "plt.title(\"Simple Plot\")\n", 44 | "plt.legend()\n", 45 | "plt.show()" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 30, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "
" 57 | ] 58 | }, 59 | "metadata": {}, 60 | "output_type": "display_data" 61 | }, 62 | { 63 | "data": { 64 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQ8AAAEUCAYAAAA1PrNmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFE1JREFUeJzt3X+Q3HV9x/HniyQQGtCAnIpABBVRBIrkClIRRbGkjJWCWhSrE+00E8cf4OhYBOsPShFkhjoVWhoHjI2AY4UMiL8IGIuiRO5oSMKPWFQiiMqpRUFbJMm7f+xn43Lc7e1+b7/7+X53X4+Zm9vd++73+7697HO/+929iyICM7Nu7ZR7ADOrJ8fDzApxPMysEMfDzApxPMysEMfDzAoZ6nhIOkPSH7Wc/4qkhen0o+nz/pI29Xi7r5V0ZpfXuU/SXr2co8229pd0Wonrb962z5L0xYLrWCrpWV1ep6OfpaSVkl5fwvY/Kun96fQ5ko7v4ro7bitJh0s6sZttl2Go4wGcAeyIR0ScGBEPl73RiLguIs6ffLmkuWVvu0P7A6XFoykiHoyItnfSNpYCXd15e2xW24+ID0fEjV0s33pbHQ44Hv0gaYGkL0u6Q9ImSadKeg+NH/5aSWvTch0/ukvaTdJNkm6XtFHSSdMstyQtc4ekm9JlSyVdnE6vlHSppHXAJ9J6P5PWuUHS66ZY519L+p6k9ZL+TdKcGWb9W0m3pRmubu5tTX6Ebe4RAOcDL0vrf6+k+S0z/Zek47rczgGSvpuuf27L8jv2BFpvk3T+ekmvkDQnzbkpXf+9aeZR4Io0466SFkv6T0njkr4uae+0nsVpnjuAd04ztyRdLGmzpBuBp7d87cPpe9okaUVadqrtP2m5GX4mO2779O/u42ldY5KOSN/DDyQtb72tJO0MnAOcmpY/VdLL0+n16eeze7tt90xEDPwH8Drg0y3nn5o+3wfs1XL5jvPAo+nz/sCmKdY5F3hKOr0XcC+gScuMAPcDB6Tze6bPS4GL0+mVwPXAnHT+AuCTLevYo3U24IXAl4B56fJ/Ad46w/f/tJbT5wLvbtn261u+1vyeXwFc33L5+4DL0+kXAD8G5nexneuaM9K4Az/ptm29TdL569Mci4E1LZcvTJ+/CYym0/OA7wAj6fypLfNuAI5Npy+c5md5CrAGmEPjAeXh5u3S/Jml06uAv5i8/XbLTdrOR4H3T77t08/2Hen0P6WZd6fx7+fnHdxWXwJemk7vBsztx/1qKPY8gI3AqyVdIOllEfHrHqxTwHmSNgA3AvsAz5i0zEuAmyPiRwAR8atp1vUfEbEtnT4euKT5hYj4n0nLvorGHeo2SevT+efMMOshkr4laSPwZuBFMyw/2THA59I89wBbgOd3sZ2XAlel06u63PYPgedI+pSkJcBvpljmIOAQYE26TT4E7KvG8auFEXHzDNs+FrgqIrZFxIPAN1q+dpykdel7eiXT33adLjed69LnjcC6iHgkIiaAx9L30c4twEVq7E0vjIitXW67kKo8xy5VRHxf0hE0nieeK+mmiDhnlqt9M41HhsUR8bik+4D5Bdf12y6WFfDZiPhgF9dZCfxlRNwhaSmNR3SAraSnrpJ2AnbuYp1I+gzwYuDBiDixzXYAZvolqh2zJPOhEU9JfwycACwH/gp4++RRgDsj4uhJ8810p2tL0nwae3ajEXG/pI8yxc+40+Vm8Fj6vL3ldPN82/tpRJwv6cs0/n3fIumEFPlSDcWehxpHxX8XEZ+jset6RPrSIzR2D4t4KvBQCsdxwLOnWOZW4FhJB6Q59uxgvWtoeW4uaY9JX78JeL2kpzfXKenZ6fS/SzpyinXuDvxU0jwa0Wu6j8ZeDMBraez+w5Nvl281ryfp+cAiYHNEvC0iDk/haLedW4A3ptOtl7e6Dzhc0k6S9gOOTNvbC9gpIq6msUcx1c9uMzAi6eh0nXmSXhSNg98PSzpmhm3fTOMYwpx0rKR5TKcZgF9I2g1oPbjbuv12y5XhCT8fSc+NiI0RcQFwG42nlqUbij0P4FDgQknbgceBd6TLVwBfk/RgREx5ELCNK4Avpd3UMeBJpY+ICUnLgGvSI/tDwKtnWO+5wCVqHEjcBnwMuKZlnXdJ+hBwQ1rn4zRiswU4DHhwinX+PbAOmEifm//wPg1cmw4mfo0/7AFtALaly1fSeFT91/S9bgWWRkTro+NM2zkduFLS3wHXTrpOc4/kFuBHwF3A3cDt6fJ9gM+k7xWguce1ErhU0v8CR9O4w/6zpKfS+Hf9SeBO4G3A5ZICuGGKmQFW03iqcReN4znfBYiIhyV9GtgE/IzGHbNp8vanW64Ma4Ez01O0jwPHpAew7TS+56+WvH0gHeCz+pP0FOCyiHhD7lk6JWkxcFFEvDz3LNY9x8OykDQKXAmcGRHXzLS8VU+2eKSDTDcDu9DYzfxiRHwkyzBm1rWc8RCwICIeTQfYvg2cHhG3ZhnIzLqS7YBpNKrVfEfjvPTh51BmNZH11RY13lY9DjwPuCQi1k2xzDJgGcCCBQsWv+AFfXkVqq0I2PLL3/LIY1vZZ+Gu7Lmgq7dHmFXW+Pj4LyJipJNlK3HANL2ZZzWNtzNP+1uPo6OjMTY21r/B2nhs6zaWrxpn7eYJzjv5UE47alHukcxmTdJ4RIx2smwl3iSW3syzFliSe5ZO7TJ3Dpe+ZTHHHTTCWas3cuW6H+ceyayvssVD0oj+8LczdqXx5qnS31LbSw6IDbOcex570/h1+A003pG3JiKuzzhPIQ6IDaucr7ZsoPFLVbXXDMjyVeOctXojgI+B2MCrxDGPQeA9EBs2jkcPOSA2TByPHnNAbFg4HiVwQGwYOB4lcUBs0DkeJXJAbJA5HiVzQGxQOR594IDYIHI8+sQBsUHjePSRA2KDxPHoMwfEBoXjkYEDYoPA8cjEAbG6czwyckCszhyPzBwQqyvHowIcEKsjx6MiHBCrG8ejQhwQqxPHo2IcEKsLx6OCHBCrA8ejohwQqzrHo8IcEKsyx6PiHBCrKsejBhwQqyLHoyYcEKsax6NGHBCrEsejZhwQq4ps8ZC0n6S1ku6SdKek03PNUjcOiFVBzj2PrcD7IuJg4CXAOyUdnHGeWnFALLds8YiIn0bE7en0I8DdwD655qkjB8RyqsQxD0n7Ay8G1uWdpH4cEMslezwk7QZcDZwREb+Z4uvLJI1JGpuYmOj/gDXggFgOWeMhaR6NcFwREddMtUxErIiI0YgYHRkZ6e+ANeKAWL/lfLVFwGXA3RFxUa45BokDYv2Uc8/jpcBbgFdKWp8+Tsw4z0BwQKxf5ubacER8G1Cu7Q+yZkCWrxrnrNUbATjtqEWZp7JBk/2AqZXDeyBWNsdjgDkgVibHY8A5IFYWx2MIOCBWBsdjSDgg1muOxxBxQKyXHI8h44BYrzgeQ8gBsV5wPIaUA2Kz5XgMMQfEZsPxGHIOiBXleJgDYoU4HgY4INY9x8N2cECsG46HPYEDYp1yPOxJHBDrhONhU3JAbCaOh03LAbF2HA9rywGx6TgeNiMHxKbieFhHHBCbzPGwjjkg1srxsK44INbkeFjXHBADx8MKckDM8bDCHJDh5njYrDggw8vxsFlzQIZT1nhIulzSQ5I25ZzDZs8BGT659zxWAksyz2A94oAMl6zxiIibgV/lnMF6ywEZHrn3PGYkaZmkMUljExMTucexDjggw6Hy8YiIFRExGhGjIyMjucexDjkgg6/y8bD6ckAGm+NhpXJABlful2qvAr4LHCTpAUl/k3MeK4cDMpjm5tx4RLwp5/atf5oBWb5qnLNWbwTgtKMWZZ7KZsNPW6xvvAcyWBwP6ysHZHA4HtZ3DshgcDwsCwek/hwPy8YBqTfHw7JyQOrL8bDsHJB6cjysEhyQ+nE8rDIckHpxPKxSHJD6cDyschyQenA8rJIckOpzPKyyHJBqczys0hyQ6nI8rPIckGpyPKwWHJDqcTysNhyQanE8rFYckOpwPKx2HJBqcDyslhyQ/BwPqy0HJC/Hw2rNAcnH8bDac0DycDxsIDgg/ed42MBwQPrL8bCB4oD0j+NhA8cB6Y/c/9H1EkmbJd0r6cycs9hgcUDKly0ekuYAlwB/DhwMvEnSwbnmscHjgJRrxnhIerekPUrY9pHAvRHxw4j4PfB54KQStmNDzAEpTyd7Hs8AbpP0hfQ0Qz3a9j7A/S3nH0iXPYGkZZLGJI1NTEz0aNM2TByQcswYj4j4EHAgcBmwFPhvSedJem7JszW3vyIiRiNidGRkpB+btAHkgPReR8c8IiKAn6WPrcAewBclfWIW2/4JsF/L+X3TZWalcEB6q5NjHqdLGgc+AdwCHBoR7wAWA6+bxbZvAw6UdICknYE3AtfNYn1mM3JAemduB8vsCZwSEVtaL4yI7ZJeU3TDEbFV0ruArwNzgMsj4s6i6zPrVDMgy1eNc9bqjQCcdtSizFPVjxrPSOphdHQ0xsbGco9hA+KxrdtYvmqctZsnOO/kQx0QQNJ4RIx2sqzfYWpDy09hZsfxsKHmgBTneNjQc0CKcTzMcECKcDzMEgekO46HWQsHpHOOh9kkDkhnHA+zKTggM3M8zKbhgLTneJi14YBMz/Ewm4EDMjXHw6wDDsiTOR5mHXJAnsjxMOuCA/IHjodZlxyQBsfDrAAHxPEwK2zYA+J4mM3CMAfE8TCbpWENiONh1gPDGBDHw6xHhi0gjodZDw1TQBwPsx4bloA4HmYlGIaAOB5mJRn0gDgeZiUa5IA4HmYlG9SAZImHpDdIulPSdkkd/b+YZnU2iAHJteexCTgFuDnT9s36btACkiUeEXF3RGzOsW2znAYpIJU/5iFpmaQxSWMTExO5xzGbtUEJSGnxkHSjpE1TfJzUzXoiYkVEjEbE6MjISFnjmvXVIARkblkrjojjy1q32SBoBmT5qnHOWr0RgNOOWpR5qs5V/mmL2SCr8x5IrpdqT5b0AHA08GVJX88xh1kV1DUguV5tWR0R+0bELhHxjIg4IcccZlVRx4D4aYtZRdQtII6HWYXUKSCOh1nF1CUgjodZBdUhII6HWUVVPSCOh1mFVTkgjodZxVU1II6HWQ1UMSCOh1lNVC0gjodZjVQpII6HWc1UJSCOh1kNVSEgjodZTeUOiONhVmM5A+J4mNVcroA4HmYDIEdAHA+zAdHvgDgeZgOknwFxPMwGTL8C4niYDaB+BMTxMBtQZQfE8TAbYGUGxPEwG3BlBcTxMBsCZQTE8TAbEr0OiONhNkR6GRDHw2zI9Coguf6j6wsl3SNpg6TVkhbmmMNsWPUiILn2PNYAh0TEYcD3gQ9mmsNsaM02IFniERE3RMTWdPZWYN8cc5gNu8kB6UYVjnm8Hfhq7iHMhlVrQLqhiChlIEk3As+c4ktnR8S1aZmzgVHglJhmEEnLgGUAixYtWrxly5ZS5jUbdo9t3cb8eXPHI2K0k+XnljVIRBzf7uuSlgKvAV41XTjSelYAKwBGR0fLKZ2ZscvcOV0tX1o82pG0BPgA8PKI+F2OGcxsdnId87gY2B1YI2m9pEszzWFmBWXZ84iI5+XYrpn1ThVebTGzGnI8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKyQLPGQ9A+SNkhaL+kGSc/KMYeZFZdrz+PCiDgsIg4Hrgc+nGkOMysoSzwi4jctZxcAkWMOMytubq4NS/pH4K3Ar4Hj2iy3DFiWzj4maVMfxuvUXsAvcg/RomrzQPVm8jztHdTpgooo50Ff0o3AM6f40tkRcW3Lch8E5kfERzpY51hEjPZwzFnxPDOr2kyep71u5iltzyMiju9w0SuArwAzxsPMqiPXqy0Htpw9CbgnxxxmVlyuYx7nSzoI2A5sAZZ3eL0V5Y1UiOeZWdVm8jztdTxPacc8zGyw+R2mZlaI42FmhdQuHlV7a7ukCyXdk2ZaLWlh5nneIOlOSdslZXsJUNISSZsl3SvpzFxztMxzuaSHqvI+IUn7SVor6a708zo98zzzJX1P0h1pno/NeKWIqNUH8JSW0+8BLs08z58Bc9PpC4ALMs/zQhpv9PkmMJpphjnAD4DnADsDdwAHZ75djgWOADblnKNlnr2BI9Lp3YHv57yNAAG7pdPzgHXAS9pdp3Z7HlGxt7ZHxA0RsTWdvRXYN/M8d0fE5pwzAEcC90bEDyPi98Dnabwkn01E3Az8KucMrSLipxFxezr9CHA3sE/GeSIiHk1n56WPtvet2sUDGm9tl3Q/8Gaq9Ut1bwe+mnuICtgHuL/l/ANkvGNUnaT9gRfTeLTPOcccSeuBh4A1EdF2nkrGQ9KNkjZN8XESQEScHRH70Xh36rtyz5OWORvYmmbKPo/Vg6TdgKuBMybtVfddRGyLxm+67wscKemQdstn+8W4dqJib22faR5JS4HXAK+K9KQx5zwV8BNgv5bz+6bLrIWkeTTCcUVEXJN7nqaIeFjSWmAJMO0B5kruebRTtbe2S1oCfAB4bUT8LucsFXIbcKCkAyTtDLwRuC7zTJUiScBlwN0RcVEF5hlpvlIoaVfg1cxw36rdO0wlXU3j1YQdb22PiGyPapLuBXYBfpkuujUiOn27fRnznAx8ChgBHgbWR8QJGeY4EfgkjVdeLo+If+z3DJPmuQp4BY1fgf858JGIuCzjPMcA3wI20vi3DHBWRHwl0zyHAZ+l8fPaCfhCRJzT9jp1i4eZVUPtnraYWTU4HmZWiONhZoU4HmZWiONhZoU4HmZWiONhZoU4HtZzkv4k/X2T+ZIWpL8P0fb3JKx+/CYxK4Wkc4H5wK7AAxHx8cwjWY85HlaK9DsttwH/B/xpRGzLPJL1mJ+2WFmeBuxG469kzc88i5XAex5WCknX0fgLYgcAe0dE6X93xfqrkn/Pw+pN0luBxyPiSklzgO9IemVEfCP3bNY73vMws0J8zMPMCnE8zKwQx8PMCnE8zKwQx8PMCnE8zKwQx8PMCvl/mDfBgpXzpZIAAAAASUVORK5CYII=\n", 65 | "text/plain": [ 66 | "
" 67 | ] 68 | }, 69 | "metadata": { 70 | "needs_background": "light" 71 | }, 72 | "output_type": "display_data" 73 | } 74 | ], 75 | "source": [ 76 | "fig = plt.figure()\n", 77 | "fig, axs = plt.subplots(1, 1)\n", 78 | "# fig, axs = plt.subplots(2, 2)\n", 79 | "x = np.linspace(-20, 20, 100)\n", 80 | "\n", 81 | "axs.set_aspect('equal', 'box')\n", 82 | "axs.set(xlim=(-3, 3), ylim=(-3, 3))\n", 83 | "axs.set_title('still a circle, auto-adjusted data limits', fontsize=10)\n", 84 | "\n", 85 | "plt.plot(x, -x+1, label='linear')\n", 86 | "plt.xlabel('x')\n", 87 | "plt.ylabel('y')\n", 88 | "\n", 89 | "# plt.title(\"Linear Classifier\")\n", 90 | "plt.show()" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [] 99 | } 100 | ], 101 | "metadata": { 102 | "kernelspec": { 103 | "display_name": "Python 3", 104 | "language": "python", 105 | "name": "python3" 106 | }, 107 | "language_info": { 108 | "codemirror_mode": { 109 | "name": "ipython", 110 | "version": 3 111 | }, 112 | "file_extension": ".py", 113 | "mimetype": "text/x-python", 114 | "name": "python", 115 | "nbconvert_exporter": "python", 116 | "pygments_lexer": "ipython3", 117 | "version": "3.7.3" 118 | } 119 | }, 120 | "nbformat": 4, 121 | "nbformat_minor": 2 122 | } 123 | -------------------------------------------------------------------------------- /fft.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Jul-11-19 19:34 4 | # @Author : Your Name (you@example.org) 5 | # @Link : https://zh.wikipedia.org/zh-cn/%E5%8D%B7%E7%A7%AF#%E8%A8%88%E7%AE%97%E7%A6%BB%E6%95%A3%E5%8D%B7%E7%A9%8D%E7%9A%84%E6%96%B9%E6%B3%95 6 | 7 | import os 8 | import numpy as np 9 | import numpy.fft 10 | 11 | 12 | def ft(f: list, g: list) -> list: 13 | """ 14 | 直接计算 15 | y_n = f_n * g_n = \sum_m=0^M-1{f_n-m}{g_m} 16 | 复杂度 17 | O(MN) 18 | """ 19 | N, M = len(f), len(g) 20 | if M > N: # let M < N 21 | f, g = g, f 22 | M, N = N, M 23 | L = M + N - 1 24 | y = [0] * L 25 | for n in range(L): 26 | for m in range(M): 27 | if n-m not in range(N): 28 | continue 29 | print("m:{},n:{}".format(m, n)) 30 | print(y) 31 | y[n] += f[n-m] * g[m] 32 | return y 33 | 34 | 35 | def fft(f: list, g: list) -> list: 36 | return None 37 | 38 | 39 | def main(): 40 | f = [4, 5, 6, 7] 41 | g = [1, 2, 3] 42 | 43 | y = ft(f, g) 44 | # y = np.convolve(f, g) # [ 4 13 28 34 32 21] 45 | print(y) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /gen/Julia.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "10" 12 | ] 13 | }, 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "x = 10" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "11" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "x + 1" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "2" 52 | ] 53 | }, 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "x = 1 + 1" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "\"Hello World!\"" 72 | ] 73 | }, 74 | "execution_count": 4, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "x = \"Hello World!\"" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "1.0" 92 | ] 93 | }, 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "x = 1.0" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 6, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/plain": [ 111 | "-3" 112 | ] 113 | }, 114 | "execution_count": 6, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "y = -3" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 7, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "\"Kelly Hwong\"" 132 | ] 133 | }, 134 | "execution_count": 7, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "Z = \"Kelly Hwong\"" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "\"人人生而自由,在尊严和权利上一律平等。\"" 152 | ] 153 | }, 154 | "execution_count": 8, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "UniversalDeclarationOfHumanRightsStart = \"人人生而自由,在尊严和权利上一律平等。\"\n", 161 | "\"人人生而自由,在尊严和权利上一律平等。\"" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 9, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "\"人人生而自由,在尊严和权利上一律平等。\"" 173 | ] 174 | }, 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "UniversalDeclarationOfHumanRightsStart" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 10, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "1.0e-5" 193 | ] 194 | }, 195 | "execution_count": 10, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | } 199 | ], 200 | "source": [ 201 | "δ = 0.00001" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 11, 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "\"笑\"" 213 | ] 214 | }, 215 | "execution_count": 11, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "😆 = \"笑\"" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 12, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "'哭': Unicode U+54ed (category Lo: Letter, other)" 233 | ] 234 | }, 235 | "execution_count": 12, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "😢 = '哭'" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 13, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "π = 3.1415926535897..." 253 | ] 254 | }, 255 | "execution_count": 13, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "pi" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 14, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "ename": "ErrorException", 271 | "evalue": "cannot assign variable MathConstants.pi from module Main", 272 | "output_type": "error", 273 | "traceback": [ 274 | "cannot assign variable MathConstants.pi from module Main", 275 | "", 276 | "Stacktrace:", 277 | " [1] top-level scope at In[14]:1" 278 | ] 279 | } 280 | ], 281 | "source": [ 282 | "pi = 3" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 15, 288 | "metadata": {}, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "π = 3.1415926535897..." 294 | ] 295 | }, 296 | "execution_count": 15, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "pi" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [] 311 | } 312 | ], 313 | "metadata": { 314 | "kernelspec": { 315 | "display_name": "Julia 1.1.1", 316 | "language": "julia", 317 | "name": "julia-1.1" 318 | }, 319 | "language_info": { 320 | "file_extension": ".jl", 321 | "mimetype": "application/julia", 322 | "name": "julia", 323 | "version": "1.1.1" 324 | } 325 | }, 326 | "nbformat": 4, 327 | "nbformat_minor": 2 328 | } 329 | -------------------------------------------------------------------------------- /gen/Python2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "x = 10" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "11" 21 | ] 22 | }, 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "x + 1" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "x = 1 + 1" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "x = \"Hello World!\"" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 7, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | " x = 1.0" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 8, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "y = -3" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 11, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "Z = \"Kelly Hwong\"" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 12, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "'Kelly Hwong'" 86 | ] 87 | }, 88 | "execution_count": 12, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "Z" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 13, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "'\\xe4\\xba\\xba\\xe4\\xba\\xba\\xe7\\x94\\x9f\\xe8\\x80\\x8c\\xe8\\x87\\xaa\\xe7\\x94\\xb1\\xef\\xbc\\x8c\\xe5\\x9c\\xa8\\xe5\\xb0\\x8a\\xe4\\xb8\\xa5\\xe5\\x92\\x8c\\xe6\\x9d\\x83\\xe5\\x88\\xa9\\xe4\\xb8\\x8a\\xe4\\xb8\\x80\\xe5\\xbe\\x8b\\xe5\\xb9\\xb3\\xe7\\xad\\x89\\xe3\\x80\\x82'" 106 | ] 107 | }, 108 | "execution_count": 13, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "UniversalDeclarationOfHumanRightsStart = \"人人生而自由,在尊严和权利上一律平等。\"\n", 115 | "\"人人生而自由,在尊严和权利上一律平等。\"" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 14, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "'\\xe4\\xba\\xba\\xe4\\xba\\xba\\xe7\\x94\\x9f\\xe8\\x80\\x8c\\xe8\\x87\\xaa\\xe7\\x94\\xb1\\xef\\xbc\\x8c\\xe5\\x9c\\xa8\\xe5\\xb0\\x8a\\xe4\\xb8\\xa5\\xe5\\x92\\x8c\\xe6\\x9d\\x83\\xe5\\x88\\xa9\\xe4\\xb8\\x8a\\xe4\\xb8\\x80\\xe5\\xbe\\x8b\\xe5\\xb9\\xb3\\xe7\\xad\\x89\\xe3\\x80\\x82'" 127 | ] 128 | }, 129 | "execution_count": 14, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | " UniversalDeclarationOfHumanRightsStart" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 15, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "ename": "SyntaxError", 145 | "evalue": "invalid syntax (, line 1)", 146 | "output_type": "error", 147 | "traceback": [ 148 | "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m δ = 0.00001\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "δ = 0.00001" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 2", 167 | "language": "python", 168 | "name": "python2" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 2 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython2", 180 | "version": "2.7.16" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } 186 | -------------------------------------------------------------------------------- /matrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Jul-01-19 10:24 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | import numpy as np 9 | 10 | a = [[1, 2, 3], 11 | [4, 5, 6], 12 | [1, 2, 1]] 13 | a = np.asarray(a) 14 | 15 | print(a.T) 16 | 17 | print(np.linalg.det(a)) 18 | print(np.linalg.det(a.T)) 19 | 20 | g = np.asarray([2, 1, 3]) 21 | print(np.dot(g, a)) 22 | print(np.dot(a, g)) 23 | 24 | b = [[2, 1, 0], 25 | [1, 4, 4], 26 | [5, 6, 4]] 27 | b = np.asarray(b) 28 | rank = np.linalg.matrix_rank(b) 29 | print(rank) 30 | 31 | inv = np.linalg.inv(a) 32 | print(inv) # inverse of a 33 | print(np.dot(a, inv)) 34 | -------------------------------------------------------------------------------- /notes/perceptron.md: -------------------------------------------------------------------------------- 1 | ## Perceptron Algorithm 2 | 3 | 4 | 5 | $$\epsilon(\theta_1,\theta_0)=\frac{1}{n}\sum_{i=1}^{n}[[y^{(i)}(\theta_1*x^{(i)}+\theta_0)\le0]]$$ 6 | 7 | if $y^{(i)}(\theta*x^{(i)})\le0$ then: 8 |     $\theta=\theta+y^{(i)}x^{(i)}$ 9 | -------------------------------------------------------------------------------- /optimization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Jul-01-19 16:26 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | -------------------------------------------------------------------------------- /perceptron/gen-data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | 5 | N = 10 6 | 7 | 8 | def null(a, rtol=1e-5): 9 | u, s, v = np.linalg.svd(a) 10 | rank = (s > rtol*s[0]).sum() 11 | return rank, v[rank:].T.copy() 12 | 13 | 14 | def gen_data(N, noisy=False): 15 | lower = -1 16 | upper = 1 17 | dim = 2 18 | 19 | X = np.random.rand(dim, N)*(upper-lower)+lower 20 | 21 | while True: 22 | Xsample = np.concatenate( 23 | (np.ones((1, dim)), np.random.rand(dim, dim)*(upper-lower)+lower)) 24 | k, w = null(Xsample.T) 25 | y = np.sign(np.dot(w.T, np.concatenate((np.ones((1, N)), X)))) 26 | if np.all(y): 27 | break 28 | 29 | return (X, y, w) 30 | 31 | 32 | def change_label(y): 33 | idx = random.sample(range(1, N), N/10) 34 | y[idx] = -y[idx] 35 | return y 36 | 37 | 38 | if __name__ == '__main__': 39 | X, y, w = gen_data(10) 40 | print(X) 41 | -------------------------------------------------------------------------------- /perceptron/perception-algorithm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Aug-04-19 14:39 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | import numpy as np 9 | import math 10 | 11 | 12 | def perception(samples, labels, theta=None, epoch=1): 13 | if theta is None: 14 | theta = len(samples[0]) * [0] 15 | theta = np.asarray(theta) 16 | for _ in range(epoch): 17 | for i, x in enumerate(samples): 18 | if labels[i] * np.dot(theta, x) <= 0: 19 | theta = np.add(theta, np.multiply(labels[i], x)) 20 | return theta 21 | 22 | 23 | def hw1(samples, labels, epoch=1): 24 | theta = len(samples[0]) * [0] 25 | theta = np.asarray(theta) 26 | # the first point that the algorithm sees is always considered a mistake 27 | theta = np.add(theta, np.multiply(labels[0], samples[0])) 28 | mistakes = 1 #  29 | for _ in range(epoch): 30 | for i, x in enumerate(samples): 31 | if i == 0 and _ == 0: 32 | continue 33 | if labels[i] * np.dot(theta, x) <= 0: 34 | mistakes += 1 35 | theta = np.add(theta, np.multiply(labels[i], x)) 36 | print("theta:", theta) 37 | print("mistakes:", mistakes) 38 | return theta 39 | 40 | 41 | def hw2(): 42 | # d = 2 # 2次后收敛 43 | d = 3 # 2次后收敛 44 | n = d # n = d samples 45 | xs = [] 46 | for t in range(n): 47 | x = [0] * d 48 | for i in range(d): 49 | if i == t: 50 | x[i] = math.cos(math.pi*(t+1)) 51 | else: 52 | x[i] = 0 53 | xs.append(x) 54 | # print(xs) 55 | 56 | ys = [1] * d 57 | theta = [0] * d 58 | epoch = 4 59 | for e in range(epoch): 60 | for i, x in enumerate(xs): 61 | theta = perception([x], [ys[i]], theta) 62 | print(theta) 63 | 64 | 65 | def main(): 66 | """ 67 | theta = [1, 1] 68 | sample = [2, 2] 69 | ret = np.dot(theta, sample) 70 | print(ret) 71 | """ 72 | """ 73 | # starts with data point ?(1) 74 | x1 = [[-1, -1], [1, 0], [-1, 1.5]] 75 | y1 = [1, -1, 1] 76 | # starts with data point ?(2) 77 | x2 = [[1, 0], [-1, 1.5], [-1, -1]] 78 | y2 = [-1, 1, 1] 79 | # theta = hw1(x2, y2, epoch=3) 80 | # print(theta) 81 | 82 | # 1.c 83 | x1 = [[-1, -1], [1, 0], [-1, 10]] 84 | y1 = [1, -1, 1] # mistakes 4 85 | x2 = [[1, 0], [-1, 10], [-1, -1]] 86 | y2 = [-1, 1, 1] # mistakes 1 87 | # theta = hw1(x2, y2, epoch=3) 88 | # theta = hw1(x1, y1, epoch=10) 89 | """ 90 | hw2() 91 | 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /perceptron/perceptron.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Jul-17-19 19:40 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | import numpy as np 9 | 10 | 11 | def algorithm(parameter_list): 12 | pass 13 | 14 | 15 | def main(): 16 | ''' 17 | dataArr = [-0.2, -1.1, 0, 2.3, 4.5, 0.0] 18 | print(dataArr) 19 | signResult = np.sign(dataArr) 20 | print(signResult) 21 | ''' 22 | 23 | # 输入数据 24 | X = np.array([[1, 1, 2, 3], 25 | [1, 1, 4, 5], 26 | [1, 1, 1, 1], 27 | [1, 1, 5, 3], 28 | [1, 1, 0, 1]]) 29 | # 权重初始化,取值范围-1到1 30 | print(X.shape[1]) 31 | W = (np.random.random(X.shape[1])*2)-1 32 | print('初始化权值:', W) 33 | 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /plane.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Jun-30-19 20:16 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | import numpy as np 9 | 10 | 11 | def funcname(parameter_list): 12 | pass 13 | 14 | 15 | def main(): 16 | theta = np.asarray([3, 1]) 17 | theta0 = -1 18 | a = np.asarray([-1, -1]) 19 | r1 = (np.dot(a, theta) + theta0) / np.linalg.norm(theta) 20 | print(r1) 21 | 22 | o = np.asarray([0, 0]) 23 | r2 = (np.dot(o, theta) + theta0) / np.linalg.norm(theta) 24 | print(r2) 25 | 26 | # 3 x1 + 1 x2 + -1 = 0 27 | # (1, -2), (2, -5) 28 | # (1, -3) 即直线的方向向量 29 | d = np.asarray([1, -3]) 30 | p1 = np.dot(a, theta) / np.linalg.norm(theta) 31 | print(p1) 32 | 33 | p2 = np.dot(a, d) / np.linalg.norm(d) 34 | print(p2) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /probability.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Jul-01-19 12:50 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | from scipy.integrate import quad 9 | from numpy import sqrt, pi, exp 10 | 11 | 12 | def gaussian(x, mu, sigma): 13 | return 1/sqrt(2*pi*sigma**2)*exp(-((x-mu)/sigma)**2/2) 14 | 15 | 16 | if __name__ == "__main__": 17 | mu, sigma = 1, sqrt(2) 18 | a, b = 0.5, 2 19 | I = quad(gaussian, a, b, args=(mu, sigma)) 20 | print(I) 21 | -------------------------------------------------------------------------------- /project0/debug.py: -------------------------------------------------------------------------------- 1 | def get_sum_metrics(predictions, metrics=[]): 2 | for i in range(3): 3 | metrics.append(lambda x: x + i) 4 | 5 | sum_metrics = 0 6 | for metric in metrics: 7 | sum_metrics += metric(predictions) 8 | 9 | return sum_metrics 10 | 11 | 12 | def main(): 13 | print(get_sum_metrics(0)) # Should be (0 + 0) + (0 + 1) + (0 + 2) = 3 14 | print(get_sum_metrics(1)) # Should be (1 + 0) + (1 + 1) + (1 + 2) = 6 15 | print(get_sum_metrics(2)) # Should be (2 + 0) + (2 + 1) + (2 + 2) = 9 16 | print(get_sum_metrics(3, [lambda x: x])) # Should be (3) + (3 + 0) + (3 + 1) + (3 + 2) = 15 17 | print(get_sum_metrics(0)) # Should be (0 + 0) + (0 + 1) + (0 + 2) = 3 18 | print(get_sum_metrics(1)) # Should be (1 + 0) + (1 + 1) + (1 + 2) = 6 19 | print(get_sum_metrics(2)) # Should be (2 + 0) + (2 + 1) + (2 + 2) = 9 20 | 21 | if __name__ == "__main__": 22 | main() 23 | -------------------------------------------------------------------------------- /project0/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def randomization(n): 4 | """ 5 | Arg: 6 | n - an integer 7 | Returns: 8 | A - a randomly-generated nx1 Numpy array. 9 | """ 10 | #Your code here 11 | raise NotImplementedError 12 | 13 | def operations(h, w): 14 | """ 15 | Takes two inputs, h and w, and makes two Numpy arrays A and B of size 16 | h x w, and returns A, B, and s, the sum of A and B. 17 | 18 | Arg: 19 | h - an integer describing the height of A and B 20 | w - an integer describing the width of A and B 21 | Returns (in this order): 22 | A - a randomly-generated h x w Numpy array. 23 | B - a randomly-generated h x w Numpy array. 24 | s - the sum of A and B. 25 | """ 26 | #Your code here 27 | raise NotImplementedError 28 | 29 | 30 | def norm(A, B): 31 | """ 32 | Takes two Numpy column arrays, A and B, and returns the L2 norm of their 33 | sum. 34 | 35 | Arg: 36 | A - a Numpy array 37 | B - a Numpy array 38 | Returns: 39 | s - the L2 norm of A+B. 40 | """ 41 | #Your code here 42 | raise NotImplementedError 43 | 44 | 45 | def neural_network(inputs, weights): 46 | """ 47 | Takes an input vector and runs it through a 1-layer neural network 48 | with a given weight matrix and returns the output. 49 | 50 | Arg: 51 | inputs - 2 x 1 NumPy array 52 | weights - 2 x 1 NumPy array 53 | Returns (in this order): 54 | out - a 1 x 1 NumPy array, representing the output of the neural network 55 | """ 56 | #Your code here 57 | raise NotImplementedError 58 | -------------------------------------------------------------------------------- /project0/test.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import torch.nn as nn 3 | 4 | 5 | def green(s): 6 | return '\033[1;32m%s\033[m' % s 7 | 8 | 9 | def yellow(s): 10 | return '\033[1;33m%s\033[m' % s 11 | 12 | 13 | def red(s): 14 | return '\033[1;31m%s\033[m' % s 15 | 16 | 17 | def log(*m): 18 | print(" ".join(map(str, m))) 19 | 20 | 21 | def log_exit(*m): 22 | log(red("ERROR:"), *m) 23 | exit(1) 24 | 25 | 26 | def check_numpy(): 27 | try: 28 | import numpy 29 | log(green("PASS"), "NumPy installed") 30 | except ModuleNotFoundError: 31 | log(red("FAIL"), "NumPy not installed") 32 | 33 | 34 | def check_scipy(): 35 | try: 36 | import scipy 37 | log(green("PASS"), "SciPy installed") 38 | except ModuleNotFoundError: 39 | log(red("FAIL"), "SciPy not installed") 40 | 41 | 42 | def check_matplotlib(): 43 | try: 44 | import matplotlib 45 | log(green("PASS"), "matplotlib installed") 46 | except ModuleNotFoundError: 47 | log(red("FAIL"), "matplotlib not installed") 48 | 49 | 50 | def check_torch(): 51 | try: 52 | import torch 53 | log(green("PASS"), "PyTorch installed") 54 | except ModuleNotFoundError: 55 | log(red("FAIL"), "PyTorch not installed") 56 | 57 | 58 | def check_tqdm(): 59 | try: 60 | import tqdm 61 | log(green("PASS"), "tqdm installed") 62 | except ModuleNotFoundError: 63 | log(red("FAIL"), "tqdm not installed") 64 | 65 | 66 | def main(): 67 | try: 68 | check_numpy() 69 | check_scipy() 70 | check_matplotlib() 71 | check_torch() 72 | check_tqdm() 73 | except Exception: 74 | log_exit(traceback.format_exc()) 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | SciPy 4 | tqdm 5 | torch 6 | -------------------------------------------------------------------------------- /sentiment_analysis/200.txt: -------------------------------------------------------------------------------- 1 | 131,181,22,172,144,92,97,187,58,93,6,70,106,68,153,168,179,199,29,46,9,142,134,88,193,110,26,32,117,112,17,39,166,13,94,138,109,147,51,101,59,188,116,5,170,99,100,167,180,146,65,1,104,43,38,184,123,171,137,162,71,44,95,174,12,7,54,152,21,47,28,176,34,2,132,118,42,189,150,14,165,41,192,45,82,128,63,57,197,160,53,75,108,135,121,159,183,67,169,50,87,69,89,196,115,19,148,96,86,11,8,60,33,173,78,4,119,105,182,127,177,30,186,40,49,178,76,157,161,73,164,151,31,74,191,27,125,198,81,20,155,114,139,36,61,56,145,48,16,83,62,85,126,0,102,23,3,140,15,195,133,113,190,141,52,163,156,80,111,90,175,143,120,84,18,25,79,37,154,136,64,158,24,185,72,35,129,55,149,91,122,77,103,124,130,66,10,107,194,98 -------------------------------------------------------------------------------- /sentiment_analysis/4000.txt: -------------------------------------------------------------------------------- 1 | 2314,3128,1211,666,1266,1627,2831,2975,3163,2836,2967,2585,1291,621,3825,2794,61,195,1001,3722,1132,997,3708,2564,1816,2363,877,732,2010,1566,3625,1529,1659,872,3504,85,870,1171,668,2584,396,2418,3888,3941,3197,3572,1614,3868,2631,2274,1103,865,68,351,3044,2092,1377,3052,3115,190,600,3445,3995,2372,3645,3345,1151,3130,2235,2127,2643,1005,2592,1282,1789,506,298,3493,2765,3377,1494,391,348,1231,1976,1846,3559,3519,2696,2221,3425,560,3813,1210,2898,2474,3608,1350,2648,1475,1582,1327,2791,2746,1648,1500,930,1797,2835,433,1507,2676,823,3523,207,1775,2144,447,2163,389,2310,1437,3511,595,2503,3172,3111,1221,941,2963,462,1484,1397,961,2325,3801,1395,2635,2161,2504,22,2849,866,2058,2361,2377,697,3365,3770,3137,1279,3198,3012,3731,622,2949,1764,357,3998,2198,559,1872,3618,3443,270,1980,2469,3084,3175,1561,1888,1084,544,1030,3989,873,920,868,2024,1071,2204,2173,2797,422,3067,398,2728,516,3902,2197,1904,3554,3265,3476,94,1062,1726,2822,306,3226,2079,305,1854,2256,2624,2086,1551,2725,3348,1370,25,1267,1341,500,2390,2191,2177,222,2183,1115,2792,3836,2578,2344,1801,437,2986,1559,3594,175,1599,1537,3064,1640,3077,1237,2355,1182,252,1209,1815,517,418,869,276,623,1163,53,1656,3251,860,2944,3939,2958,499,1172,3178,3870,3304,1730,2519,1949,3893,2396,364,2368,1044,2683,2179,1008,455,3193,3896,3454,2222,3169,2572,3900,1604,754,2825,527,2093,878,2393,1422,3501,1501,2467,3159,1580,676,2131,3087,2834,615,1771,2548,1947,183,3332,1415,3878,1741,1054,1679,2118,2973,569,1831,1461,2369,3196,2521,2848,3157,3447,773,1554,2960,2895,2999,2513,2272,248,101,492,423,947,2694,3325,720,3053,2301,1638,1336,3469,547,1654,2682,545,1759,2690,229,875,3482,3131,2840,74,3496,1890,3029,134,1539,1690,936,1560,3185,3957,3167,152,2915,1367,1176,819,1649,863,3693,2668,3805,1863,1851,1101,593,603,1216,1181,1136,3194,1493,126,438,369,2446,45,3986,2364,611,1637,11,2838,937,3505,656,2187,2514,1045,2560,1625,988,906,2566,2901,1704,3239,2827,295,239,2428,1530,1619,2500,3700,3047,876,3945,3778,3244,808,1025,1505,3529,2700,990,307,3743,3109,115,1861,585,3876,332,3231,81,1712,3903,3038,3309,421,1281,2423,3218,2056,2317,2283,1860,3697,946,700,2972,809,3728,3973,1793,2862,301,1810,2669,363,818,2158,3760,266,2231,253,3946,1519,971,1825,2800,1843,2806,777,2022,1048,19,846,3832,1884,918,3985,750,1758,1097,1083,3783,59,2151,522,3712,1848,2285,729,3541,2141,3390,3685,3435,3191,3716,1476,3929,2162,2843,3026,933,3923,1389,1985,1330,2407,2671,434,2559,205,1347,368,77,2269,2388,1188,793,573,3241,634,986,1122,2340,1999,2070,235,535,3484,1924,1853,2037,1596,2320,3413,2039,1262,3510,1117,1093,1161,2104,2075,3122,1067,2912,3494,3220,3389,210,1966,672,2132,482,3280,1891,441,532,1359,2245,739,1038,3670,703,503,2660,2190,3199,217,2266,3810,2210,1082,1823,806,3546,762,3639,1558,1545,717,124,910,955,3202,1535,3907,3248,2250,767,2268,1007,3730,3222,3306,2761,155,2323,2343,128,1959,1139,3767,3132,2051,2852,385,1514,1956,1932,1371,2771,1840,1276,2430,432,2788,3281,141,1496,2385,2538,3270,1564,1112,1544,3432,3350,292,3080,289,1882,2679,361,2511,501,358,102,2629,1675,3803,1247,324,1439,3314,859,1531,1379,2695,1308,2640,683,1013,1032,1946,591,2946,2678,3238,723,3988,2422,1473,2774,914,1428,1223,1908,359,2234,1584,2742,186,853,139,3605,2040,3045,3030,1170,310,3330,1800,3715,133,1133,2688,2976,1669,162,3188,3626,202,1152,2032,3550,131,891,1214,1623,1499,2558,1527,20,240,3260,98,1692,1847,1644,3219,675,17,2208,3891,815,749,3372,1518,1448,3502,1315,2956,1923,316,448,3974,2734,2634,3189,1119,3921,3283,1906,2455,3237,3701,2536,3049,3745,1996,705,3293,2316,256,952,469,1445,1438,3509,3392,196,1031,3010,748,2968,416,3917,206,3571,1477,399,3533,1776,1986,553,1435,3144,2152,3016,1098,3772,3786,708,3075,841,2338,976,1756,1795,721,1095,2710,1333,3809,1472,814,1609,3720,3764,2047,1456,1989,1228,2408,466,1299,2192,3990,2108,3408,1000,3632,1183,390,3634,2606,3576,2715,1852,938,2930,3808,2945,440,335,2515,2615,452,2060,1921,686,3295,3549,1738,2087,1184,3894,495,594,3915,1003,3649,2939,2884,674,538,1757,756,3644,3374,1321,1241,2911,2646,822,3558,1108,2237,3021,911,1021,2174,241,1041,3354,2360,3173,1195,3201,3710,1206,2798,2964,3235,1090,3138,2520,3018,3889,3587,1954,942,2409,3224,249,633,784,3470,376,3205,2750,3947,2414,1850,3451,3729,1374,2202,3657,2319,2175,742,3134,514,2828,3714,353,2720,2923,3006,1917,761,1070,1714,605,3895,2965,744,1425,681,360,1096,1391,1314,992,2594,680,2773,578,660,2392,35,2048,3498,72,900,151,320,523,3003,2481,3472,1012,3263,224,3437,2169,1480,1547,1934,2637,3687,3916,2380,2207,3311,2936,3420,86,3061,1123,3771,1431,1826,2306,2526,2085,1011,1244,2672,3616,1988,3492,338,1017,3086,331,2652,2386,3650,1334,117,960,1191,1177,2262,3677,3250,2252,3622,291,3140,2293,1509,2907,1760,792,536,2215,2807,1977,885,3628,2046,2816,2611,3468,1533,1964,2711,2113,91,1693,2804,3291,1524,212,659,130,923,1255,574,2842,2055,1864,456,856,760,112,1280,2573,2052,3334,1361,2425,56,1202,3388,1301,2603,2966,1086,2419,1305,3046,2961,1557,3042,1284,2217,30,3905,2717,2264,1641,3139,226,3317,537,2645,3604,602,2714,380,228,230,258,1085,1963,1650,917,1856,2205,3551,3741,2556,3035,3485,2277,677,1808,1957,561,647,2126,3373,3127,387,1164,2486,1666,1987,3426,277,1311,2692,110,5,1950,3011,3275,3662,2405,366,377,653,3664,1492,427,3499,2974,2760,2796,352,2579,3829,1785,3307,3617,743,3092,804,2739,2294,3459,42,33,1830,2315,2308,355,378,2180,3822,525,3102,3914,2464,2106,939,526,397,2630,1078,3177,2713,3019,530,699,974,1036,2424,1945,2439,2482,3419,1053,3246,443,2793,386,481,1156,610,1734,1796,3344,3807,2303,1387,2545,973,285,2879,2517,2381,1440,812,3655,3262,3898,1871,2533,2044,608,751,733,719,2013,27,592,2756,3887,1454,1300,1611,2657,2023,3340,92,2107,496,1356,3312,3434,3438,707,1486,3718,922,3387,1532,1984,1907,2166,1287,3414,2432,1665,1705,1077,1092,788,435,794,1628,3880,3078,843,1534,409,943,1490,613,2577,2667,2913,1190,1910,2211,2494,2535,2605,2373,2366,2434,1114,3066,1961,381,3942,616,2334,309,510,414,731,3278,336,119,1792,2437,3865,2074,2449,1023,3126,3821,1642,847,1076,1981,2244,3361,2990,3768,201,2367,2980,488,3329,785,1606,473,105,1376,347,766,867,2411,3815,1474,2633,3635,2680,461,3085,3212,857,2896,3328,3002,2140,3744,2280,3213,1735,588,1636,1740,641,3342,1355,1307,1716,46,2398,587,3978,2045,2933,1874,3442,3709,948,3349,2527,1612,1944,2805,2762,2059,3401,919,2027,1135,3819,1418,563,340,2621,598,2478,2751,3540,71,662,2200,3746,3182,1689,2510,3671,1634,2212,1939,2632,2378,1120,1357,3588,3679,713,899,2546,3475,2267,3409,1506,2636,3036,2719,198,3824,457,1701,70,2000,2228,1200,153,3073,3489,2304,1925,3535,2612,1149,1256,2686,3407,281,3367,528,1526,2362,1014,2305,28,1130,2088,2061,122,3398,1109,3972,1304,2100,1914,597,1770,2043,3839,2073,2259,2330,65,3758,880,243,2661,1661,273,1360,1243,3421,3261,3828,2785,3694,890,3956,1079,3633,3781,3975,1972,1283,646,1585,2539,2752,2091,1208,1603,1635,2722,1761,3352,449,3976,2493,2709,3319,2928,3118,314,1677,1049,1919,3079,2365,489,47,2555,2460,2006,1263,3579,3593,3004,1009,1709,3612,2201,1100,2371,2412,3940,1002,1316,2302,279,3996,8,1553,2871,1495,1574,1166,2529,3302,1593,2321,831,1935,95,3223,3082,1687,1897,2969,950,2808,3214,232,3097,300,3315,1578,1540,1549,898,3227,2345,1469,3516,2436,763,3256,1572,1403,3640,2081,3725,3133,2009,2218,871,1056,223,1798,3524,2109,1249,3465,2675,1543,3953,2069,3901,2979,402,1497,1881,650,3453,998,3145,2232,3039,2919,2199,1168,1358,2540,3322,3812,2258,2065,888,2995,1217,2837,3508,3536,3875,3495,3362,2426,3938,1396,2812,2103,96,800,343,393,3056,3538,2850,1388,2454,959,1568,2206,1273,684,75,1842,2243,3142,2583,1378,1292,1893,2530,384,2803,2413,2702,304,1227,3806,1337,737,2410,902,467,2902,1922,2282,518,2927,879,2155,1772,1621,3965,1918,3684,2549,3384,915,48,840,3675,3150,718,508,2049,1718,687,2370,1927,160,3774,3269,3284,263,2433,1121,931,1725,962,916,1982,2932,1691,2490,216,3569,2906,2745,1955,3909,3792,654,2655,1173,3971,776,2953,2472,3119,1271,3477,1417,49,558,1667,1779,3883,2445,2063,1058,3379,275,1312,929,1386,690,2905,1441,2830,1193,3601,551,3481,2487,2562,1817,430,2914,3288,2220,2542,3014,2952,1836,3802,362,1099,2839,850,3818,2703,3749,2735,3683,1517,167,2935,3277,3285,3343,2590,2673,3581,1094,3527,16,3560,478,3932,3286,2851,1632,1899,231,365,2954,1485,3566,539,2461,2910,3296,1579,3877,2781,2899,1680,465,1902,2463,966,1880,3,3440,3993,2431,476,1968,1833,1028,817,2194,2072,1265,1696,2950,945,3615,3959,2394,3411,2102,225,1844,2602,692,3179,2123,2685,3578,3259,3643,247,3170,1911,2802,1774,1806,3817,1569,3273,820,2375,568,3181,734,3033,2853,2353,2537,1965,3834,3949,1024,727,821,244,2356,3338,264,1254,3648,579,3480,2116,2030,3913,985,3656,3320,408,589,3555,297,3785,1762,2230,752,1513,463,864,3548,624,2290,3676,3797,3282,726,2147,2337,2096,2832,3449,3827,1019,3171,2983,2012,1404,2479,1787,702,2165,1791,711,1145,2403,219,1413,2053,2354,1232,1546,1433,1143,828,2452,3129,14,3627,1144,2456,280,2766,2821,116,1769,1348,283,3556,3376,736,2962,3355,1994,1668,2066,3924,1900,188,1138,2020,1159,805,979,2747,2485,2399,3742,3568,1703,1246,3795,3031,2900,3567,2880,3542,370,3236,968,807,3580,3837,1004,2298,1706,2948,208,1269,2753,80,932,1788,1037,477,685,1937,1204,1997,177,2925,1903,2440,3217,2111,1157,3800,1812,2981,795,1883,274,3206,43,3069,829,1400,2918,897,426,1353,1245,3944,2005,2844,3595,1739,3255,1960,52,3166,1814,1885,2484,1753,2551,3610,2859,1694,2124,3674,325,1889,3001,1588,1663,996,3507,3930,1426,1196,755,926,1384,3849,2099,3967,2885,2563,741,3028,3050,21,73,3204,174,2764,345,1258,3136,3112,3552,3264,1051,1235,1319,3060,3370,1429,1325,3934,1427,218,1059,2240,1915,2557,3871,1219,2608,2270,3958,2729,1993,1399,200,3532,3882,839,3310,3404,3383,965,2101,799,709,3866,2531,1803,3705,2291,1137,3799,1515,2826,3598,1868,2749,2339,1091,679,1983,3696,1286,2550,3711,1072,2552,485,3105,3619,2071,2019,2647,121,934,2376,849,3845,3979,3987,1742,318,311,1898,2977,2891,127,3623,3690,451,3547,549,1420,3107,1536,1102,1681,142,3412,2457,169,1556,1212,3151,2727,2628,801,2819,2916,2988,2903,3141,3418,3114,3735,1162,2189,1683,1942,3020,694,661,2705,2741,3165,1773,1610,1724,3186,1607,590,199,436,1600,1317,3994,1131,970,1943,3478,2934,1129,1033,3858,1626,3467,2674,3490,3333,1563,2644,1006,2149,1767,395,3363,1670,2653,3070,1639,887,2416,1491,3658,3022,1363,3738,1715,108,1555,627,2322,458,193,2997,3842,2089,1324,2352,79,3323,704,1597,23,2512,2219,2358,565,1488,1896,1723,3271,2940,2718,975,3299,3473,1153,984,927,2251,3630,1978,97,1344,2471,651,3023,657,3089,642,2253,2524,2,150,3267,3885,631,3863,2993,403,2627,2278,2768,392,2112,3405,2496,1646,3861,211,3899,2276,2489,2295,2778,3062,2453,725,87,1436,107,2929,394,1464,262,3791,3195,1081,2617,3158,740,2743,2786,1268,1047,1453,1805,221,1672,317,1465,688,213,2847,1274,3793,2733,180,791,3925,2391,3088,747,3933,1297,557,2698,1482,2288,3385,3847,3403,3147,1751,425,327,10,1406,2120,3816,2427,234,3859,3673,1366,652,2139,3521,3906,1870,790,293,2856,138,2098,2565,3850,625,3943,3562,2651,1869,2119,3375,1039,2348,3416,2763,1087,714,2921,3096,339,2459,2724,1586,2273,2447,2128,3624,2618,548,2326,2620,245,2095,1253,1655,2858,3125,1664,1126,513,3577,3534,3008,1288,176,1929,3843,453,781,284,18,3513,1423,3613,2920,170,554,2136,1940,3844,99,3713,3543,2311,519,2329,3058,1905,157,1594,1784,3247,1629,830,2706,44,3823,3153,1257,3603,1342,1458,1790,1035,1113,1616,3455,267,2731,673,771,411,2084,1841,3083,3641,2347,3072,3174,758,1124,3869,2341,2955,78,2035,1451,796,649,3032,3106,64,935,1686,2300,2522,1901,3491,2607,2307,636,3991,24,580,3024,2770,1401,1302,2271,2159,3517,3110,1148,1755,55,1167,3692,1088,1780,2639,2225,2064,450,3826,644,259,695,3707,712,3257,1736,2248,287,768,1055,2666,38,1398,515,2708,178,2328,3406,3922,696,3156,3830,2619,1323,3040,3402,1487,1411,3724,2554,3294,3424,2748,2878,626,665,2587,2182,2400,993,1349,3168,2883,2854,3706,1581,2525,2817,3051,3663,2951,2383,1040,140,1118,3074,209,2031,3229,1653,491,2887,1867,3100,1074,2261,2336,2178,143,1920,2254,1180,2172,1272,3853,1608,2016,1105,3506,1657,2596,1457,2078,967,1876,617,1199,1467,3857,2033,148,111,3303,3359,2970,164,3258,2697,614,1873,424,2448,1749,3148,2866,3614,691,3015,1010,722,972,1351,326,2860,2707,566,2050,12,2663,1598,1674,502,2313,171,1895,2943,2186,1080,1676,3360,1567,3048,1346,2650,1277,3982,3117,3176,1838,886,556,1731,1412,3272,2994,1338,924,3682,184,989,3120,251,2571,2011,90,3522,3382,2582,238,2209,2349,1459,69,1248,664,1409,3851,925,3043,1405,2723,1290,3200,854,242,3232,1194,1296,3910,3590,2475,2662,3620,322,1285,2236,1969,2026,3955,2275,2721,51,1264,2776,2003,3410,2441,2156,106,2523,442,3368,3000,3659,2924,3698,1447,3525,957,3076,963,2732,2332,3152,845,555,313,2090,724,635,1620,3488,103,3775,3766,1503,1343,3977,3101,63,562,701,2384,1174,1967,2574,1528,3292,1340,1259,2122,265,3672,907,1155,2239,3225,1470,3446,2957,2701,2642,2757,498,2841,1754,2110,3300,2886,2823,893,1018,3471,706,405,2518,486,67,949,1116,1069,2598,2196,3005,2476,2105,1809,1215,824,2216,2498,1169,2281,439,1781,1309,3669,1142,524,2846,2810,1022,3737,3335,1615,2289,1748,315,3814,1804,3638,1695,629,3960,987,3358,3856,483,1802,1886,3835,1688,194,1794,2028,2869,37,296,2260,1252,3190,982,1408,1974,2809,1575,2117,3607,3892,3717,1029,825,769,1416,3838,858,3848,1510,778,181,2138,118,3242,1331,1866,2195,1322,1306,350,3773,419,3573,1390,789,1858,3436,2284,3563,2492,909,3779,1752,260,575,3124,2681,3428,2881,1432,667,215,179,3393,1089,1381,882,3912,1043,2007,1862,3093,1763,3879,2656,3702,2114,2985,220,3386,1685,1446,1165,2097,3013,2508,1218,2501,954,710,2775,2613,233,1489,3864,3057,3596,958,136,1590,203,2402,836,1799,803,3417,1595,2015,1750,1592,1430,1819,3116,855,529,1294,2146,604,1832,606,1782,2815,2654,3025,3326,689,3881,3992,3321,1777,3789,783,3667,2346,576,928,3855,3457,3520,1107,15,3660,3999,3564,1479,26,1618,2740,3155,3561,2813,272,2157,41,1601,3699,3017,104,883,2544,214,3371,584,1229,1818,3221,2670,2018,3356,2054,2845,2002,3113,892,3161,1936,3216,1450,1224,2737,3952,571,1916,2420,1643,1562,493,2855,505,643,564,2575,3636,3041,3703,1197,1141,1652,2226,2599,1682,3233,2176,861,2568,3531,2351,550,2129,1158,3464,1699,278,669,1020,2171,484,1392,2185,1251,1075,3135,3353,349,2438,802,2754,1160,3950,3149,417,3055,1622,2908,2382,2154,1631,3396,1,3331,2738,1106,494,3935,3500,1147,1660,2227,951,3456,2777,379,1930,2892,1737,3160,2495,2233,3740,2576,468,1222,895,31,1892,2534,1839,2863,7,1207,834,1979,354,3642,2586,1894,1455,3904,2143,1394,1857,1015,715,1768,1727,3680,3928,1827,2877,630,2818,3763,2286,459,1134,444,759,372,3243,3123,1522,835,1807,1313,2982,3154,34,1673,1541,236,2167,3794,1512,2591,1821,1016,3337,1452,2609,1220,1729,2421,582,3287,1707,2255,1226,1721,540,299,2876,319,3606,84,2462,541,189,2716,1719,3518,3512,3098,969,994,2082,1293,912,512,3962,1542,640,1260,2379,2184,420,3651,2429,3867,1339,2604,2265,3209,1198,1938,3452,1483,852,114,3854,1057,2897,1410,3983,460,1859,765,581,953,2477,100,2824,1511,3487,29,3063,504,2998,3637,2699,838,3586,3515,586,3732,3146,1498,944,40,728,1424,1414,120,2833,3665,3203,288,1443,321,1034,2959,1329,2034,123,1708,2811,1765,1829,125,3007,639,3599,406,3911,1702,3381,344,1728,2318,1744,596,464,1250,2077,1720,2145,3798,3316,2569,3931,940,3919,2342,3721,2730,2130,3884,1046,1991,163,182,308,2588,1460,1865,3575,3099,1521,1820,1605,156,774,3429,497,3537,3054,1203,1587,520,1213,2547,2922,2795,3460,2664,1383,2589,2516,3734,2870,3668,246,454,2008,3184,3180,3208,1570,798,1419,3691,2249,2867,431,1326,2580,543,2528,2450,132,82,3253,3187,1928,3747,192,1879,1583,1909,2505,144,1684,1444,620,3647,3831,334,2937,3984,2014,577,3298,1971,147,227,1368,2987,1175,978,521,3394,670,3937,1571,1573,1671,3164,1240,1073,3920,268,3796,1042,2335,2076,3631,745,3034,3611,2691,83,3463,956,2401,3526,3689,3240,1975,1711,2451,2193,1186,844,3090,1746,3357,312,2458,3997,2121,3936,3252,3589,371,3784,137,2057,2170,1565,2068,1520,1887,2687,609,3754,658,1354,3427,3108,62,1230,3539,3592,921,3339,3733,2115,3439,3557,271,3723,3305,2893,3759,1205,2387,1552,638,3211,2468,1052,3926,2150,145,1318,3351,1303,3059,2890,1393,39,3787,2497,2223,827,2744,2601,2042,2041,1320,3474,2029,2134,1733,2784,2614,1662,2309,607,1998,3874,429,619,2238,1481,1548,1877,810,428,3790,2553,884,3266,3530,2625,848,3918,1630,2036,3574,826,2772,3081,1849,2153,113,2875,3366,3621,3545,3968,786,1766,3448,3183,2397,1068,901,1508,342,1732,889,3752,2926,2395,3228,2001,1837,1187,2125,2279,1613,401,32,3872,3736,905,3071,811,346,3308,269,356,2616,3301,3514,2312,2567,682,637,2814,3009,1372,1236,154,1747,2038,2865,3765,3757,3776,1407,3600,3873,3444,1380,1941,1970,2297,407,445,1698,1225,1345,3422,1651,1239,290,2984,2142,735,1261,197,981,2992,1958,470,1201,531,3597,1617,1478,1185,3981,3852,671,1104,168,146,3748,3192,1385,383,2942,1504,3234,1382,1365,1111,618,567,36,3890,3811,1538,851,730,2203,1525,1783,2622,1878,2638,2917,3609,2684,2488,2872,3068,3466,172,3833,3629,1502,1289,2287,1063,2789,842,166,173,76,3486,1140,632,471,1421,2641,3980,3347,2133,2677,546,1050,3528,474,3245,3276,1855,1125,89,333,2570,1624,3661,1948,410,746,2820,1146,1238,813,2659,2491,487,780,400,2507,2164,2767,3602,9,3091,2938,58,874,13,3948,1710,2779,1645,60,534,2971,3430,341,2593,2246,770,1700,3327,3897,3369,3313,204,1912,3688,1278,1335,2025,159,0,3094,2473,2299,1995,2160,1828,645,2873,1061,261,1824,678,191,2904,2712,3162,1713,3908,2874,3570,1647,129,3654,3582,2327,2759,3585,3927,1550,1835,2610,913,980,655,1369,2861,787,2435,6,330,2665,3395,1697,3065,1589,3961,2931,542,2543,3497,2595,3653,2417,1471,2758,1179,2499,1442,1973,3121,983,552,1150,3750,187,3027,1375,323,3037,1875,50,2829,2296,628,1027,2693,3840,1591,1462,2658,1275,2083,3583,2331,3686,1678,2790,1931,54,716,2726,1811,2444,480,3969,2168,1402,663,3462,648,2769,3230,237,3143,833,2755,3788,693,2333,3963,3433,1926,3804,3704,1468,3860,816,3769,3846,413,2443,161,1913,2062,3753,738,1786,1434,1233,2649,294,904,286,1064,3423,415,2541,1743,2229,93,2782,250,3279,2978,862,1962,507,2941,757,797,88,2480,149,475,3415,1516,374,3841,490,2359,3400,2483,772,2600,509,109,303,2787,2466,2188,3727,2597,2350,3346,185,896,2857,3503,599,3207,3951,254,3341,1745,3886,698,3254,1523,903,2889,479,1110,2996,66,995,779,3461,1373,2581,2404,1992,472,3862,1328,337,1463,3249,3755,2868,2626,2704,2017,1845,1127,2374,1717,3380,2415,2783,3666,881,3479,964,3324,2137,1128,2148,3966,3431,1602,2799,2214,3397,3780,2864,3678,328,3450,158,3782,612,3591,533,3103,2780,367,282,1951,1065,2689,2509,135,764,3364,775,753,1189,991,2357,1298,1576,3268,2442,375,1822,1178,2406,2241,2470,3970,832,2213,3777,3336,1362,2242,511,2894,1192,2257,1234,446,2004,3820,2080,3584,1310,329,373,1834,2224,3289,583,3290,977,908,2324,3756,782,257,3565,2882,1332,2991,999,1364,3553,3391,2021,2506,4,2561,2736,3215,3378,2909,1633,3441,2947,382,57,3751,2247,3762,3297,255,1066,2135,3544,1813,1953,2263,837,3739,2623,2502,1295,1778,1449,412,2292,1933,1352,2801,3483,3681,302,2989,404,1270,601,3695,2465,3318,2888,2181,3726,1026,3274,2532,388,3095,572,1154,570,2067,894,3719,3652,2389,1466,1952,3964,1242,3399,3210,3761,1658,1990,2094,3954,1060,165,1722,3646,3104,1577,3458 -------------------------------------------------------------------------------- /sentiment_analysis/main.py: -------------------------------------------------------------------------------- 1 | import project1 as p1 2 | import utils 3 | import numpy as np 4 | 5 | # ------------------------------------------------------------------------------- 6 | # Data loading. There is no need to edit code in this section. 7 | # ------------------------------------------------------------------------------- 8 | 9 | 10 | def load_stopwords(path_stopwords): 11 | stopwords = [] 12 | f_data = open(path_stopwords, "r") 13 | for line in f_data.readlines(): 14 | stopwords.append(line.strip()) 15 | return stopwords 16 | 17 | 18 | train_data = utils.load_data('reviews_train.tsv') 19 | val_data = utils.load_data('reviews_val.tsv') 20 | test_data = utils.load_data('reviews_test.tsv') 21 | 22 | train_texts, train_labels = zip( 23 | *((sample['text'], sample['sentiment']) for sample in train_data)) 24 | val_texts, val_labels = zip( 25 | *((sample['text'], sample['sentiment']) for sample in val_data)) 26 | test_texts, test_labels = zip( 27 | *((sample['text'], sample['sentiment']) for sample in test_data)) 28 | 29 | # dictionary = p1.bag_of_words(train_texts) 30 | stopwords = load_stopwords("stopwords.txt") 31 | dictionary = p1.bag_of_words(train_texts, stopwords) 32 | 33 | train_bow_features = p1.extract_bow_feature_vectors(train_texts, dictionary) 34 | val_bow_features = p1.extract_bow_feature_vectors(val_texts, dictionary) 35 | test_bow_features = p1.extract_bow_feature_vectors(test_texts, dictionary) 36 | 37 | # ------------------------------------------------------------------------------- 38 | # Problem 5 39 | # ------------------------------------------------------------------------------- 40 | 41 | toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') 42 | 43 | T = 10 44 | L = 0.2 45 | T = 100 # test convergence 46 | 47 | thetas_perceptron = p1.perceptron(toy_features, toy_labels, T) 48 | thetas_avg_perceptron = p1.average_perceptron(toy_features, toy_labels, T) 49 | thetas_pegasos = p1.pegasos(toy_features, toy_labels, T, L) 50 | 51 | 52 | def plot_toy_results(algo_name, thetas): 53 | print('theta for', algo_name, 'is', ', '.join(map(str, list(thetas[0])))) 54 | print('theta_0 for', algo_name, 'is', str(thetas[1])) 55 | utils.plot_toy_data(algo_name, toy_features, toy_labels, thetas) 56 | 57 | 58 | plot_toy_results('Perceptron', thetas_perceptron) 59 | plot_toy_results('Average Perceptron', thetas_avg_perceptron) 60 | plot_toy_results('Pegasos', thetas_pegasos) 61 | 62 | exit() 63 | # ------------------------------------------------------------------------------- 64 | # Problem 7 65 | # ------------------------------------------------------------------------------- 66 | """ 67 | T = 10 68 | L = 0.01 69 | 70 | pct_train_accuracy, pct_val_accuracy = \ 71 | p1.classifier_accuracy(p1.perceptron, train_bow_features, 72 | val_bow_features, train_labels, val_labels, T=T) 73 | print("{:35} {:.4f}".format( 74 | "Training accuracy for perceptron:", pct_train_accuracy)) 75 | print("{:35} {:.4f}".format( 76 | "Validation accuracy for perceptron:", pct_val_accuracy)) 77 | 78 | avg_pct_train_accuracy, avg_pct_val_accuracy = \ 79 | p1.classifier_accuracy(p1.average_perceptron, train_bow_features, 80 | val_bow_features, train_labels, val_labels, T=T) 81 | print("{:43} {:.4f}".format( 82 | "Training accuracy for average perceptron:", avg_pct_train_accuracy)) 83 | print("{:43} {:.4f}".format( 84 | "Validation accuracy for average perceptron:", avg_pct_val_accuracy)) 85 | 86 | avg_peg_train_accuracy, avg_peg_val_accuracy = \ 87 | p1.classifier_accuracy(p1.pegasos, train_bow_features, 88 | val_bow_features, train_labels, val_labels, T=T, L=L) 89 | print("{:50} {:.4f}".format( 90 | "Training accuracy for Pegasos:", avg_peg_train_accuracy)) 91 | print("{:50} {:.4f}".format( 92 | "Validation accuracy for Pegasos:", avg_peg_val_accuracy)) 93 | """ 94 | # ------------------------------------------------------------------------------- 95 | # Problem 8 96 | # ------------------------------------------------------------------------------- 97 | """ 98 | data = (train_bow_features, train_labels, val_bow_features, val_labels) 99 | 100 | # values of T and lambda to try 101 | Ts = [1, 5, 10, 15, 25, 50] 102 | Ls = [0.001, 0.01, 0.1, 1, 10] 103 | 104 | pct_tune_results = utils.tune_perceptron(Ts, *data) 105 | print('perceptron valid:', list(zip(Ts, pct_tune_results[1]))) 106 | print('best = {:.4f}, T={:.4f}'.format( 107 | np.max(pct_tune_results[1]), Ts[np.argmax(pct_tune_results[1])])) 108 | 109 | avg_pct_tune_results = utils.tune_avg_perceptron(Ts, *data) 110 | print('avg perceptron valid:', list(zip(Ts, avg_pct_tune_results[1]))) 111 | print('best = {:.4f}, T={:.4f}'.format( 112 | np.max(avg_pct_tune_results[1]), Ts[np.argmax(avg_pct_tune_results[1])])) 113 | 114 | # fix values for L and T while tuning Pegasos T and L, respective 115 | fix_L = 0.01 116 | peg_tune_results_T = utils.tune_pegasos_T(fix_L, Ts, *data) 117 | print('Pegasos valid: tune T', list(zip(Ts, peg_tune_results_T[1]))) 118 | print('best = {:.4f}, T={:.4f}'.format( 119 | np.max(peg_tune_results_T[1]), Ts[np.argmax(peg_tune_results_T[1])])) 120 | 121 | fix_T = Ts[np.argmax(peg_tune_results_T[1])] 122 | peg_tune_results_L = utils.tune_pegasos_L(fix_T, Ls, *data) 123 | print('Pegasos valid: tune L', list(zip(Ls, peg_tune_results_L[1]))) 124 | print('best = {:.4f}, L={:.4f}'.format( 125 | np.max(peg_tune_results_L[1]), Ls[np.argmax(peg_tune_results_L[1])])) 126 | 127 | utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results) 128 | utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) 129 | utils.plot_tune_results('Pegasos', 'T', Ts, *peg_tune_results_T) 130 | utils.plot_tune_results('Pegasos', 'L', Ls, *peg_tune_results_L) 131 | """ 132 | # ------------------------------------------------------------------------------- 133 | # Use the best method (perceptron, average perceptron or Pegasos) along with 134 | # the optimal hyperparameters according to validation accuracies to test 135 | # against the test dataset. The test data has been provided as 136 | # test_bow_features and test_labels. 137 | # ------------------------------------------------------------------------------- 138 | 139 | data = (train_bow_features, train_labels, test_bow_features, test_labels) 140 | T = 25 141 | L = 0.01 # best parameters 142 | thetas_pegasos = p1.pegasos(train_bow_features, train_labels, T, L) 143 | # print(thetas_pegasos) 144 | 145 | avg_peg_train_accuracy, avg_peg_test_accuracy = \ 146 | p1.classifier_accuracy(p1.pegasos, train_bow_features, 147 | test_bow_features, train_labels, test_labels, T=T, L=L) 148 | print("{:50} {:.4f}".format( 149 | "Training accuracy for Pegasos:", avg_peg_train_accuracy)) 150 | print("{:50} {:.4f}".format( 151 | "Test accuracy for Pegasos:", avg_peg_test_accuracy)) 152 | # ------------------------------------------------------------------------------- 153 | # Assign to best_theta, the weights (and not the bias!) learned by your most 154 | # accurate algorithm with the optimal choice of hyperparameters. 155 | # ------------------------------------------------------------------------------- 156 | 157 | best_theta = thetas_pegasos[0] 158 | wordlist = [word for (idx, word) in sorted( 159 | zip(dictionary.values(), dictionary.keys()))] 160 | sorted_word_features = utils.most_explanatory_word(best_theta, wordlist) 161 | print("Most Explanatory Word Features") 162 | print(sorted_word_features[:10]) 163 | -------------------------------------------------------------------------------- /sentiment_analysis/project1.py: -------------------------------------------------------------------------------- 1 | from string import punctuation, digits 2 | import numpy as np 3 | import random 4 | 5 | # Part I 6 | 7 | 8 | # pragma: coderesponse template 9 | def get_order(n_samples): 10 | try: 11 | with open(str(n_samples) + '.txt') as fp: 12 | line = fp.readline() 13 | return list(map(int, line.split(','))) 14 | except FileNotFoundError: 15 | random.seed(1) 16 | indices = list(range(n_samples)) 17 | random.shuffle(indices) 18 | return indices 19 | # pragma: coderesponse end 20 | 21 | 22 | # pragma: coderesponse template 23 | def hinge_loss_single(feature_vector, label, theta, theta_0): 24 | """ 25 | Finds the hinge loss on a single data point given specific classification 26 | parameters. 27 | 28 | Args: 29 | feature_vector - A numpy array describing the given data point. 30 | label - A real valued number, the correct classification of the data 31 | point. 32 | theta - A numpy array describing the linear classifier. 33 | theta_0 - A real valued number representing the offset parameter. 34 | 35 | 36 | Returns: A real number representing the hinge loss associated with the 37 | given data point and parameters. 38 | """ 39 | y = np.dot(theta, feature_vector) + theta_0 40 | loss = max(0.0, 1 - y * label) 41 | return loss 42 | # pragma: coderesponse end 43 | 44 | 45 | # pragma: coderesponse template 46 | def hinge_loss_full(feature_matrix, labels, theta, theta_0): 47 | """ 48 | Finds the total hinge loss on a set of data given specific classification 49 | parameters. 50 | 51 | Args: 52 | feature_matrix - A numpy matrix describing the given data. Each row 53 | represents a single data point. 54 | labels - A numpy array where the kth element of the array is the 55 | correct classification of the kth row of the feature matrix. 56 | theta - A numpy array describing the linear classifier. 57 | theta_0 - A real valued number representing the offset parameter. 58 | 59 | 60 | Returns: A real number representing the hinge loss associated with the 61 | given dataset and parameters. This number should be the average hinge 62 | loss across all of the points in the feature matrix. 63 | """ 64 | # Your code here 65 | loss = 0 66 | for i in range(len(feature_matrix)): 67 | loss += hinge_loss_single(feature_matrix[i], labels[i], theta, theta_0) 68 | return loss / len(labels) 69 | # pragma: coderesponse end 70 | 71 | 72 | # pragma: coderesponse template 73 | def perceptron_single_step_update( 74 | feature_vector, 75 | label, 76 | current_theta, 77 | current_theta_0): 78 | """ 79 | Properly updates the classification parameter, theta and theta_0, on a 80 | single step of the perceptron algorithm. 81 | 82 | Args: 83 | feature_vector - A numpy array describing a single data point. 84 | label - The correct classification of the feature vector. 85 | current_theta - The current theta being used by the perceptron 86 | algorithm before this update. 87 | current_theta_0 - The current theta_0 being used by the perceptron 88 | algorithm before this update. 89 | 90 | Returns: A tuple where the first element is a numpy array with the value of 91 | theta after the current update has completed and the second element is a 92 | real valued number with the value of theta_0 after the current updated has 93 | completed. 94 | """ 95 | if label * (np.dot(current_theta, feature_vector) + current_theta_0) <= 0: 96 | current_theta += label * feature_vector 97 | current_theta_0 += label 98 | return (current_theta, current_theta_0) 99 | # pragma: coderesponse end 100 | 101 | 102 | # pragma: coderesponse template 103 | def perceptron(feature_matrix, labels, T): 104 | """ 105 | Runs the full perceptron algorithm on a given set of data. Runs T 106 | iterations through the data set, there is no need to worry about 107 | stopping early. 108 | 109 | NOTE: Please use the previously implemented functions when applicable. 110 | Do not copy paste code from previous parts. 111 | 112 | NOTE: Iterate the data matrix by the orders returned by get_order(feature_matrix.shape[0]) 113 | 114 | Args: 115 | feature_matrix - A numpy matrix describing the given data. Each row 116 | represents a single data point. 117 | labels - A numpy array where the kth element of the array is the 118 | correct classification of the kth row of the feature matrix. 119 | T - An integer indicating how many times the perceptron algorithm 120 | should iterate through the feature matrix. 121 | 122 | Returns: A tuple where the first element is a numpy array with the value of 123 | theta, the linear classification parameter, after T iterations through the 124 | feature matrix and the second element is a real number with the value of 125 | theta_0, the offset classification parameter, after T iterations through 126 | the feature matrix. 127 | """ 128 | (nsamples, nfeatures) = feature_matrix.shape 129 | theta = np.zeros(nfeatures) 130 | theta_0 = 0.0 131 | for t in range(T): 132 | for i in get_order(nsamples): 133 | theta, theta_0 = perceptron_single_step_update( 134 | feature_matrix[i], labels[i], theta, theta_0) 135 | return (theta, theta_0) 136 | # pragma: coderesponse end 137 | 138 | 139 | # pragma: coderesponse template 140 | def average_perceptron(feature_matrix, labels, T): 141 | """ 142 | Runs the average perceptron algorithm on a given set of data. Runs T 143 | iterations through the data set, there is no need to worry about 144 | stopping early. 145 | 146 | NOTE: Please use the previously implemented functions when applicable. 147 | Do not copy paste code from previous parts. 148 | 149 | NOTE: Iterate the data matrix by the orders returned by get_order(feature_matrix.shape[0]) 150 | 151 | 152 | Args: 153 | feature_matrix - A numpy matrix describing the given data. Each row 154 | represents a single data point. 155 | labels - A numpy array where the kth element of the array is the 156 | correct classification of the kth row of the feature matrix. 157 | T - An integer indicating how many times the perceptron algorithm 158 | should iterate through the feature matrix. 159 | 160 | Returns: A tuple where the first element is a numpy array with the value of 161 | the average theta, the linear classification parameter, found after T 162 | iterations through the feature matrix and the second element is a real 163 | number with the value of the average theta_0, the offset classification 164 | parameter, found after T iterations through the feature matrix. 165 | 166 | Hint: It is difficult to keep a running average; however, it is simple to 167 | find a sum and divide. 168 | """ 169 | (nsamples, nfeatures) = feature_matrix.shape 170 | theta = np.zeros(nfeatures) 171 | theta_sum = np.zeros(nfeatures) 172 | theta_0 = 0.0 173 | theta_0_sum = 0.0 174 | for t in range(T): 175 | for i in get_order(nsamples): 176 | theta, theta_0 = perceptron_single_step_update( 177 | feature_matrix[i], labels[i], theta, theta_0) 178 | theta_sum += theta 179 | theta_0_sum += theta_0 180 | return (theta_sum / (nsamples * T), theta_0_sum / (nsamples * T)) 181 | # pragma: coderesponse end 182 | 183 | 184 | # pragma: coderesponse template 185 | def pegasos_single_step_update( 186 | feature_vector, 187 | label, 188 | L, 189 | eta, 190 | current_theta, 191 | current_theta_0): 192 | """ 193 | Properly updates the classification parameter, theta and theta_0, on a 194 | single step of the Pegasos algorithm 195 | 196 | Args: 197 | feature_vector - A numpy array describing a single data point. 198 | label - The correct classification of the feature vector. 199 | L - The lamba value being used to update the parameters. 200 | eta - Learning rate to update parameters. 201 | current_theta - The current theta being used by the Pegasos 202 | algorithm before this update. 203 | current_theta_0 - The current theta_0 being used by the 204 | Pegasos algorithm before this update. 205 | 206 | Returns: A tuple where the first element is a numpy array with the value of 207 | theta after the current update has completed and the second element is a 208 | real valued number with the value of theta_0 after the current updated has 209 | completed. 210 | """ 211 | mult = 1 - (eta * L) 212 | if label * (np.dot(feature_vector, current_theta) + current_theta_0) <= 1: 213 | return ((mult * current_theta) + (eta * label * feature_vector), 214 | (current_theta_0) + (eta * label)) 215 | return (mult * current_theta, current_theta_0) 216 | # pragma: coderesponse end 217 | 218 | 219 | # pragma: coderesponse template 220 | def pegasos(feature_matrix, labels, T, L): 221 | """ 222 | Runs the Pegasos algorithm on a given set of data. Runs T 223 | iterations through the data set, there is no need to worry about 224 | stopping early. 225 | 226 | For each update, set learning rate = 1/sqrt(t), 227 | where t is a counter for the number of updates performed so far (between 1 228 | and nT inclusive). 229 | 230 | NOTE: Please use the previously implemented functions when applicable. 231 | Do not copy paste code from previous parts. 232 | 233 | Args: 234 | feature_matrix - A numpy matrix describing the given data. Each row 235 | represents a single data point. 236 | labels - A numpy array where the kth element of the array is the 237 | correct classification of the kth row of the feature matrix. 238 | T - An integer indicating how many times the algorithm 239 | should iterate through the feature matrix. 240 | L - The lamba value being used to update the Pegasos 241 | algorithm parameters. 242 | 243 | Returns: A tuple where the first element is a numpy array with the value of 244 | the theta, the linear classification parameter, found after T 245 | iterations through the feature matrix and the second element is a real 246 | number with the value of the theta_0, the offset classification 247 | parameter, found after T iterations through the feature matrix. 248 | """ 249 | (nsamples, nfeatures) = feature_matrix.shape 250 | theta = np.zeros(nfeatures) 251 | theta_0 = 0 252 | count = 0 253 | for t in range(T): 254 | for i in get_order(nsamples): 255 | count += 1 256 | eta = 1.0 / np.sqrt(count) 257 | (theta, theta_0) = pegasos_single_step_update( 258 | feature_matrix[i], labels[i], L, eta, theta, theta_0) 259 | return (theta, theta_0) 260 | # pragma: coderesponse end 261 | 262 | # Part II 263 | 264 | 265 | # pragma: coderesponse template 266 | def classify(feature_matrix, theta, theta_0): 267 | """ 268 | A classification function that uses theta and theta_0 to classify a set of 269 | data points. 270 | 271 | Args: 272 | feature_matrix - A numpy matrix describing the given data. Each row 273 | represents a single data point. 274 | theta - A numpy array describing the linear classifier. 275 | theta - A numpy array describing the linear classifier. 276 | theta_0 - A real valued number representing the offset parameter. 277 | 278 | Returns: A numpy array of 1s and -1s where the kth element of the array is 279 | the predicted classification of the kth row of the feature matrix using the 280 | given theta and theta_0. If a prediction is GREATER THAN zero, it should 281 | be considered a positive classification. 282 | """ 283 | (nsamples, nfeatures) = feature_matrix.shape 284 | predictions = np.zeros(nsamples) 285 | for i in range(nsamples): 286 | feature_vector = feature_matrix[i] 287 | prediction = np.dot(theta, feature_vector) + theta_0 288 | if (prediction > 0): 289 | predictions[i] = 1 290 | else: 291 | predictions[i] = -1 292 | return predictions 293 | # pragma: coderesponse end 294 | 295 | 296 | # pragma: coderesponse template 297 | def classifier_accuracy( 298 | classifier, 299 | train_feature_matrix, 300 | val_feature_matrix, 301 | train_labels, 302 | val_labels, 303 | **kwargs): 304 | """ 305 | Trains a linear classifier and computes accuracy. 306 | The classifier is trained on the train data. The classifier's 307 | accuracy on the train and validation data is then returned. 308 | 309 | Args: 310 | classifier - A classifier function that takes arguments 311 | (feature matrix, labels, **kwargs) and returns (theta, theta_0) 312 | train_feature_matrix - A numpy matrix describing the training 313 | data. Each row represents a single data point. 314 | val_feature_matrix - A numpy matrix describing the training 315 | data. Each row represents a single data point. 316 | train_labels - A numpy array where the kth element of the array 317 | is the correct classification of the kth row of the training 318 | feature matrix. 319 | val_labels - A numpy array where the kth element of the array 320 | is the correct classification of the kth row of the validation 321 | feature matrix. 322 | **kwargs - Additional named arguments to pass to the classifier 323 | (e.g. T or L) 324 | 325 | Returns: A tuple in which the first element is the (scalar) accuracy of the 326 | trained classifier on the training data and the second element is the 327 | accuracy of the trained classifier on the validation data. 328 | """ 329 | theta, theta_0 = classifier(train_feature_matrix, train_labels, **kwargs) 330 | train_predictions = classify(train_feature_matrix, theta, theta_0) 331 | val_predictions = classify(val_feature_matrix, theta, theta_0) 332 | train_accuracy = accuracy(train_predictions, train_labels) 333 | validation_accuracy = accuracy(val_predictions, val_labels) 334 | return (train_accuracy, validation_accuracy) 335 | # pragma: coderesponse end 336 | 337 | 338 | # pragma: coderesponse template 339 | def extract_words(input_string): 340 | """ 341 | Helper function for bag_of_words() 342 | Inputs a text string 343 | Returns a list of lowercase words in the string. 344 | Punctuation and digits are separated out into their own words. 345 | """ 346 | for c in punctuation + digits: 347 | input_string = input_string.replace(c, ' ' + c + ' ') 348 | 349 | return input_string.lower().split() 350 | # pragma: coderesponse end 351 | 352 | 353 | # pragma: coderesponse template 354 | def bag_of_words(texts, stopwords=None): 355 | """ 356 | Inputs a list of string reviews 357 | Returns a dictionary of unique unigrams occurring over the input 358 | 359 | Feel free to change this code as guided by Problem 9 360 | """ 361 | # Your code here 362 | dictionary = {} # maps word to unique index 363 | for text in texts: 364 | word_list = extract_words(text) 365 | for word in word_list: 366 | if stopwords and word in stopwords: 367 | continue 368 | if word not in dictionary: 369 | dictionary[word] = len(dictionary) 370 | return dictionary 371 | # pragma: coderesponse end 372 | 373 | 374 | # pragma: coderesponse template 375 | def extract_bow_feature_vectors(reviews, dictionary): 376 | """ 377 | Inputs a list of string reviews 378 | Inputs the dictionary of words as given by bag_of_words 379 | Returns the bag-of-words feature matrix representation of the data. 380 | The returned matrix is of shape (n, m), where n is the number of reviews 381 | and m the total number of entries in the dictionary. 382 | 383 | Feel free to change this code as guided by Problem 9 384 | """ 385 | # Your code here 386 | 387 | num_reviews = len(reviews) 388 | feature_matrix = np.zeros([num_reviews, len(dictionary)]) 389 | 390 | for i, text in enumerate(reviews): 391 | word_list = extract_words(text) 392 | for word in word_list: 393 | if word in dictionary: 394 | if word not in dictionary.keys(): 395 | feature_matrix[i, dictionary[word]] = 1 396 | else: 397 | feature_matrix[i, dictionary[word]] += 1 398 | return feature_matrix 399 | # pragma: coderesponse end 400 | 401 | 402 | # pragma: coderesponse template 403 | def accuracy(preds, targets): 404 | """ 405 | Given length-N vectors containing predicted and target labels, 406 | returns the percentage and number of correct predictions. 407 | """ 408 | return (preds == targets).mean() 409 | # pragma: coderesponse end 410 | -------------------------------------------------------------------------------- /sentiment_analysis/reviews_submit.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KellyHwong/MIT-ML/0305208956f89cb039292c7cce175852f0783336/sentiment_analysis/reviews_submit.tsv -------------------------------------------------------------------------------- /sentiment_analysis/reviews_test.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KellyHwong/MIT-ML/0305208956f89cb039292c7cce175852f0783336/sentiment_analysis/reviews_test.tsv -------------------------------------------------------------------------------- /sentiment_analysis/reviews_train.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KellyHwong/MIT-ML/0305208956f89cb039292c7cce175852f0783336/sentiment_analysis/reviews_train.tsv -------------------------------------------------------------------------------- /sentiment_analysis/reviews_val.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KellyHwong/MIT-ML/0305208956f89cb039292c7cce175852f0783336/sentiment_analysis/reviews_val.tsv -------------------------------------------------------------------------------- /sentiment_analysis/stopwords.txt: -------------------------------------------------------------------------------- 1 | i 2 | me 3 | my 4 | myself 5 | we 6 | our 7 | ours 8 | ourselves 9 | you 10 | your 11 | yours 12 | yourself 13 | yourselves 14 | he 15 | him 16 | his 17 | himself 18 | she 19 | her 20 | hers 21 | herself 22 | it 23 | its 24 | itself 25 | they 26 | them 27 | their 28 | theirs 29 | themselves 30 | what 31 | which 32 | who 33 | whom 34 | this 35 | that 36 | these 37 | those 38 | am 39 | is 40 | are 41 | was 42 | were 43 | be 44 | been 45 | being 46 | have 47 | has 48 | had 49 | having 50 | do 51 | does 52 | did 53 | doing 54 | a 55 | an 56 | the 57 | and 58 | but 59 | if 60 | or 61 | because 62 | as 63 | until 64 | while 65 | of 66 | at 67 | by 68 | for 69 | with 70 | about 71 | against 72 | between 73 | into 74 | through 75 | during 76 | before 77 | after 78 | above 79 | below 80 | to 81 | from 82 | up 83 | down 84 | in 85 | out 86 | on 87 | off 88 | over 89 | under 90 | again 91 | further 92 | then 93 | once 94 | here 95 | there 96 | when 97 | where 98 | why 99 | how 100 | all 101 | any 102 | both 103 | each 104 | few 105 | more 106 | most 107 | other 108 | some 109 | such 110 | no 111 | nor 112 | not 113 | only 114 | own 115 | same 116 | so 117 | than 118 | too 119 | very 120 | s 121 | t 122 | can 123 | will 124 | just 125 | don 126 | should 127 | now 128 | -------------------------------------------------------------------------------- /sentiment_analysis/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import traceback 5 | import project1 as p1 6 | import numpy as np 7 | 8 | verbose = False 9 | 10 | def green(s): 11 | return '\033[1;32m%s\033[m' % s 12 | 13 | def yellow(s): 14 | return '\033[1;33m%s\033[m' % s 15 | 16 | def red(s): 17 | return '\033[1;31m%s\033[m' % s 18 | 19 | def log(*m): 20 | print(" ".join(map(str, m))) 21 | 22 | def log_exit(*m): 23 | log(red("ERROR:"), *m) 24 | exit(1) 25 | 26 | 27 | def check_real(ex_name, f, exp_res, *args): 28 | try: 29 | res = f(*args) 30 | except NotImplementedError: 31 | log(red("FAIL"), ex_name, ": not implemented") 32 | return True 33 | if not np.isreal(res): 34 | log(red("FAIL"), ex_name, ": does not return a real number, type: ", type(res)) 35 | return True 36 | if res != exp_res: 37 | log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res) 38 | return True 39 | 40 | 41 | def equals(x, y): 42 | if type(y) == np.ndarray: 43 | return (x == y).all() 44 | return x == y 45 | 46 | def check_tuple(ex_name, f, exp_res, *args, **kwargs): 47 | try: 48 | res = f(*args, **kwargs) 49 | except NotImplementedError: 50 | log(red("FAIL"), ex_name, ": not implemented") 51 | return True 52 | if not type(res) == tuple: 53 | log(red("FAIL"), ex_name, ": does not return a tuple, type: ", type(res)) 54 | return True 55 | if not len(res) == len(exp_res): 56 | log(red("FAIL"), ex_name, ": expected a tuple of size ", len(exp_res), " but got tuple of size", len(res)) 57 | return True 58 | if not all(equals(x, y) for x, y in zip(res, exp_res)): 59 | log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res) 60 | return True 61 | 62 | def check_array(ex_name, f, exp_res, *args): 63 | try: 64 | res = f(*args) 65 | except NotImplementedError: 66 | log(red("FAIL"), ex_name, ": not implemented") 67 | return True 68 | if not type(res) == np.ndarray: 69 | log(red("FAIL"), ex_name, ": does not return a numpy array, type: ", type(res)) 70 | return True 71 | if not len(res) == len(exp_res): 72 | log(red("FAIL"), ex_name, ": expected an array of shape ", exp_res.shape, " but got array of shape", res.shape) 73 | return True 74 | if not all(equals(x, y) for x, y in zip(res, exp_res)): 75 | log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res) 76 | return True 77 | 78 | def check_list(ex_name, f, exp_res, *args): 79 | try: 80 | res = f(*args) 81 | except NotImplementedError: 82 | log(red("FAIL"), ex_name, ": not implemented") 83 | return True 84 | if not type(res) == list: 85 | log(red("FAIL"), ex_name, ": does not return a list, type: ", type(res)) 86 | return True 87 | if not len(res) == len(exp_res): 88 | log(red("FAIL"), ex_name, ": expected a list of size ", len(exp_res), " but got list of size", len(res)) 89 | return True 90 | if not all(equals(x, y) for x, y in zip(res, exp_res)): 91 | log(red("FAIL"), ex_name, ": incorrect answer. Expected", exp_res, ", got: ", res) 92 | return True 93 | 94 | 95 | def check_get_order(): 96 | ex_name = "Get order" 97 | if check_list( 98 | ex_name, p1.get_order, 99 | [0], 1): 100 | log("You should revert `get_order` to its original implementation for this test to pass") 101 | return 102 | if check_list( 103 | ex_name, p1.get_order, 104 | [1, 0], 2): 105 | log("You should revert `get_order` to its original implementation for this test to pass") 106 | return 107 | log(green("PASS"), ex_name, "") 108 | 109 | 110 | def check_hinge_loss_single(): 111 | ex_name = "Hinge loss single" 112 | 113 | feature_vector = np.array([1, 2]) 114 | label, theta, theta_0 = 1, np.array([-1, 1]), -0.2 115 | exp_res = 1 - 0.8 116 | if check_real( 117 | ex_name, p1.hinge_loss_single, 118 | exp_res, feature_vector, label, theta, theta_0): 119 | return 120 | log(green("PASS"), ex_name, "") 121 | 122 | 123 | def check_hinge_loss_full(): 124 | ex_name = "Hinge loss full" 125 | 126 | feature_vector = np.array([[1, 2], [1, 2]]) 127 | label, theta, theta_0 = np.array([1, 1]), np.array([-1, 1]), -0.2 128 | exp_res = 1 - 0.8 129 | if check_real( 130 | ex_name, p1.hinge_loss_full, 131 | exp_res, feature_vector, label, theta, theta_0): 132 | return 133 | 134 | log(green("PASS"), ex_name, "") 135 | 136 | 137 | def check_perceptron_single_update(): 138 | ex_name = "Perceptron single update" 139 | 140 | feature_vector = np.array([1, 2]) 141 | label, theta, theta_0 = 1, np.array([-1, 1]), -1.5 142 | exp_res = (np.array([0, 3]), -0.5) 143 | if check_tuple( 144 | ex_name, p1.perceptron_single_step_update, 145 | exp_res, feature_vector, label, theta, theta_0): 146 | return 147 | 148 | feature_vector = np.array([1, 2]) 149 | label, theta, theta_0 = 1, np.array([-1, 1]), -1 150 | exp_res = (np.array([0, 3]), 0) 151 | if check_tuple( 152 | ex_name + " (boundary case)", p1.perceptron_single_step_update, 153 | exp_res, feature_vector, label, theta, theta_0): 154 | return 155 | 156 | log(green("PASS"), ex_name, "") 157 | 158 | 159 | def check_perceptron(): 160 | ex_name = "Perceptron" 161 | 162 | feature_matrix = np.array([[1, 2]]) 163 | labels = np.array([1]) 164 | T = 1 165 | exp_res = (np.array([1, 2]), 1) 166 | if check_tuple( 167 | ex_name, p1.perceptron, 168 | exp_res, feature_matrix, labels, T): 169 | return 170 | 171 | feature_matrix = np.array([[1, 2], [-1, 0]]) 172 | labels = np.array([1, 1]) 173 | T = 1 174 | exp_res = (np.array([0, 2]), 2) 175 | if check_tuple( 176 | ex_name, p1.perceptron, 177 | exp_res, feature_matrix, labels, T): 178 | return 179 | 180 | feature_matrix = np.array([[1, 2]]) 181 | labels = np.array([1]) 182 | T = 2 183 | exp_res = (np.array([1, 2]), 1) 184 | if check_tuple( 185 | ex_name, p1.perceptron, 186 | exp_res, feature_matrix, labels, T): 187 | return 188 | 189 | feature_matrix = np.array([[1, 2], [-1, 0]]) 190 | labels = np.array([1, 1]) 191 | T = 2 192 | exp_res = (np.array([0, 2]), 2) 193 | if check_tuple( 194 | ex_name, p1.perceptron, 195 | exp_res, feature_matrix, labels, T): 196 | return 197 | 198 | log(green("PASS"), ex_name, "") 199 | 200 | 201 | def check_average_perceptron(): 202 | ex_name = "Average perceptron" 203 | 204 | feature_matrix = np.array([[1, 2]]) 205 | labels = np.array([1]) 206 | T = 1 207 | exp_res = (np.array([1, 2]), 1) 208 | if check_tuple( 209 | ex_name, p1.average_perceptron, 210 | exp_res, feature_matrix, labels, T): 211 | return 212 | 213 | feature_matrix = np.array([[1, 2], [-1, 0]]) 214 | labels = np.array([1, 1]) 215 | T = 1 216 | exp_res = (np.array([-0.5, 1]), 1.5) 217 | if check_tuple( 218 | ex_name, p1.average_perceptron, 219 | exp_res, feature_matrix, labels, T): 220 | return 221 | 222 | feature_matrix = np.array([[1, 2]]) 223 | labels = np.array([1]) 224 | T = 2 225 | exp_res = (np.array([1, 2]), 1) 226 | if check_tuple( 227 | ex_name, p1.average_perceptron, 228 | exp_res, feature_matrix, labels, T): 229 | return 230 | 231 | feature_matrix = np.array([[1, 2], [-1, 0]]) 232 | labels = np.array([1, 1]) 233 | T = 2 234 | exp_res = (np.array([-0.25, 1.5]), 1.75) 235 | if check_tuple( 236 | ex_name, p1.average_perceptron, 237 | exp_res, feature_matrix, labels, T): 238 | return 239 | 240 | log(green("PASS"), ex_name, "") 241 | 242 | 243 | def check_pegasos_single_update(): 244 | ex_name = "Pegasos single update" 245 | 246 | feature_vector = np.array([1, 2]) 247 | label, theta, theta_0 = 1, np.array([-1, 1]), -1.5 248 | L = 0.2 249 | eta = 0.1 250 | exp_res = (np.array([-0.88, 1.18]), -1.4) 251 | if check_tuple( 252 | ex_name, p1.pegasos_single_step_update, 253 | exp_res, 254 | feature_vector, label, L, eta, theta, theta_0): 255 | return 256 | 257 | feature_vector = np.array([1, 1]) 258 | label, theta, theta_0 = 1, np.array([-1, 1]), 1 259 | L = 0.2 260 | eta = 0.1 261 | exp_res = (np.array([-0.88, 1.08]), 1.1) 262 | if check_tuple( 263 | ex_name + " (boundary case)", p1.pegasos_single_step_update, 264 | exp_res, 265 | feature_vector, label, L, eta, theta, theta_0): 266 | return 267 | 268 | feature_vector = np.array([1, 2]) 269 | label, theta, theta_0 = 1, np.array([-1, 1]), -2 270 | L = 0.2 271 | eta = 0.1 272 | exp_res = (np.array([-0.88, 1.18]), -1.9) 273 | if check_tuple( 274 | ex_name, p1.pegasos_single_step_update, 275 | exp_res, 276 | feature_vector, label, L, eta, theta, theta_0): 277 | return 278 | 279 | log(green("PASS"), ex_name, "") 280 | 281 | 282 | def check_pegasos(): 283 | ex_name = "Pegasos" 284 | 285 | feature_matrix = np.array([[1, 2]]) 286 | labels = np.array([1]) 287 | T = 1 288 | L = 0.2 289 | exp_res = (np.array([1, 2]), 1) 290 | if check_tuple( 291 | ex_name, p1.pegasos, 292 | exp_res, feature_matrix, labels, T, L): 293 | return 294 | 295 | feature_matrix = np.array([[1, 1], [1, 1]]) 296 | labels = np.array([1, 1]) 297 | T = 1 298 | L = 1 299 | exp_res = (np.array([1-1/np.sqrt(2), 1-1/np.sqrt(2)]), 1) 300 | if check_tuple( 301 | ex_name, p1.pegasos, 302 | exp_res, feature_matrix, labels, T, L): 303 | return 304 | 305 | log(green("PASS"), ex_name, "") 306 | 307 | 308 | def check_classify(): 309 | ex_name = "Classify" 310 | 311 | feature_matrix = np.array([[1, 1], [1, 1], [1, 1]]) 312 | theta = np.array([1, 1]) 313 | theta_0 = 0 314 | exp_res = np.array([1, 1, 1]) 315 | if check_array( 316 | ex_name, p1.classify, 317 | exp_res, feature_matrix, theta, theta_0): 318 | return 319 | 320 | feature_matrix = np.array([[-1, 1]]) 321 | theta = np.array([1, 1]) 322 | theta_0 = 0 323 | exp_res = np.array([-1]) 324 | if check_array( 325 | ex_name + " (boundary case)", p1.classify, 326 | exp_res, feature_matrix, theta, theta_0): 327 | return 328 | 329 | log(green("PASS"), ex_name, "") 330 | 331 | def check_classifier_accuracy(): 332 | ex_name = "Classifier accuracy" 333 | 334 | train_feature_matrix = np.array([[1, 0], [1, -1], [2, 3]]) 335 | val_feature_matrix = np.array([[1, 1], [2, -1]]) 336 | train_labels = np.array([1, -1, 1]) 337 | val_labels = np.array([-1, 1]) 338 | exp_res = 1, 0 339 | T=1 340 | if check_tuple( 341 | ex_name, p1.classifier_accuracy, 342 | exp_res, 343 | p1.perceptron, 344 | train_feature_matrix, val_feature_matrix, 345 | train_labels, val_labels, 346 | T=T): 347 | return 348 | 349 | train_feature_matrix = np.array([[1, 0], [1, -1], [2, 3]]) 350 | val_feature_matrix = np.array([[1, 1], [2, -1]]) 351 | train_labels = np.array([1, -1, 1]) 352 | val_labels = np.array([-1, 1]) 353 | exp_res = 1, 0 354 | T=1 355 | L=0.2 356 | if check_tuple( 357 | ex_name, p1.classifier_accuracy, 358 | exp_res, 359 | p1.pegasos, 360 | train_feature_matrix, val_feature_matrix, 361 | train_labels, val_labels, 362 | T=T, L=L): 363 | return 364 | 365 | log(green("PASS"), ex_name, "") 366 | 367 | def check_bag_of_words(): 368 | ex_name = "Bag of words" 369 | 370 | texts = [ 371 | "He loves to walk on the beach", 372 | "There is nothing better"] 373 | 374 | try: 375 | res = p1.bag_of_words(texts) 376 | except NotImplementedError: 377 | log(red("FAIL"), ex_name, ": not implemented") 378 | return 379 | if not type(res) == dict: 380 | log(red("FAIL"), ex_name, ": does not return a tuple, type: ", type(res)) 381 | return 382 | 383 | vals = sorted(res.values()) 384 | exp_vals = list(range(len(res.keys()))) 385 | if not vals == exp_vals: 386 | log(red("FAIL"), ex_name, ": wrong set of indices. Expected: ", exp_vals, " got ", vals) 387 | return 388 | 389 | log(green("PASS"), ex_name, "") 390 | 391 | keys = sorted(res.keys()) 392 | exp_keys = ['beach', 'better', 'he', 'is', 'loves', 'nothing', 'on', 'the', 'there', 'to', 'walk'] 393 | stop_keys = ['beach', 'better', 'loves', 'nothing', 'walk'] 394 | 395 | if keys == exp_keys: 396 | log(yellow("WARN"), ex_name, ": does not remove stopwords:", [k for k in keys if k not in stop_keys]) 397 | elif keys == stop_keys: 398 | log(green("PASS"), ex_name, " stopwords removed") 399 | else: 400 | log(red("FAIL"), ex_name, ": keys are missing:", [k for k in stop_keys if k not in keys], " or are not unexpected:", [k for k in keys if k not in stop_keys]) 401 | 402 | 403 | def check_extract_bow_feature_vectors(): 404 | ex_name = "Extract bow feature vectors" 405 | texts = [ 406 | "He loves her ", 407 | "He really really loves her"] 408 | keys = ["he", "loves", "her", "really"] 409 | dictionary = {k:i for i, k in enumerate(keys)} 410 | exp_res = np.array( 411 | [[1, 1, 1, 0], 412 | [1, 1, 1, 1]]) 413 | non_bin_res = np.array( 414 | [[1, 1, 1, 0], 415 | [1, 1, 1, 2]]) 416 | 417 | 418 | try: 419 | res = p1.extract_bow_feature_vectors(texts, dictionary) 420 | except NotImplementedError: 421 | log(red("FAIL"), ex_name, ": not implemented") 422 | return 423 | 424 | if not type(res) == np.ndarray: 425 | log(red("FAIL"), ex_name, ": does not return a numpy array, type: ", type(res)) 426 | return 427 | if not len(res) == len(exp_res): 428 | log(red("FAIL"), ex_name, ": expected an array of shape ", exp_res.shape, " but got array of shape", res.shape) 429 | return 430 | 431 | log(green("PASS"), ex_name) 432 | 433 | if (res == exp_res).all(): 434 | log(yellow("WARN"), ex_name, ": uses binary indicators as features") 435 | elif (res == non_bin_res).all(): 436 | log(green("PASS"), ex_name, ": correct non binary features") 437 | else: 438 | log(red("FAIL"), ex_name, ": unexpected feature matrix") 439 | return 440 | 441 | def main(): 442 | log(green("PASS"), "Import project1") 443 | try: 444 | check_get_order() 445 | check_hinge_loss_single() 446 | check_hinge_loss_full() 447 | check_perceptron_single_update() 448 | check_perceptron() 449 | check_average_perceptron() 450 | check_pegasos_single_update() 451 | check_pegasos() 452 | check_classify() 453 | check_classifier_accuracy() 454 | check_bag_of_words() 455 | check_extract_bow_feature_vectors() 456 | except Exception: 457 | log_exit(traceback.format_exc()) 458 | 459 | if __name__ == "__main__": 460 | main() 461 | -------------------------------------------------------------------------------- /sentiment_analysis/toy_data.tsv: -------------------------------------------------------------------------------- 1 | -1 1.7600 0.4000 2 | -1 0.9790 2.2400 3 | -1 1.8700 -0.9770 4 | -1 0.9500 -0.1510 5 | -1 -0.1030 0.4110 6 | -1 0.1440 1.4500 7 | -1 0.7610 0.1220 8 | -1 0.4440 0.3340 9 | -1 1.4900 -0.2050 10 | -1 0.3130 -0.8540 11 | -1 -2.5500 0.6540 12 | -1 0.8640 -0.7420 13 | -1 2.2700 -1.4500 14 | -1 0.0458 -0.1870 15 | -1 1.5300 1.4700 16 | -1 0.1550 0.3780 17 | -1 -0.8878 -1.9808 18 | -1 -0.3480 0.1560 19 | -1 1.2300 1.2000 20 | -1 -0.3873 -0.3023 21 | -1 -1.0486 -1.4200 22 | -1 -1.7100 1.9500 23 | -1 -0.5097 -0.4381 24 | -1 -1.2500 0.7770 25 | -1 -1.6139 -0.2127 26 | -1 -0.8950 0.3870 27 | -1 -0.5108 -1.1806 28 | -1 -0.0282 0.4280 29 | -1 0.0665 0.3020 30 | -1 -0.6343 -0.3627 31 | -1 -0.6725 -0.3596 32 | -1 -0.8131 -1.7263 33 | -1 0.1770 -0.4020 34 | -1 -1.6300 0.4630 35 | -1 -0.9070 0.0519 36 | -1 0.7290 0.1290 37 | -1 1.1400 -1.2300 38 | -1 0.4020 -0.6850 39 | -1 -0.8708 -0.5788 40 | -1 -0.3120 0.0562 41 | -1 -1.1700 0.9010 42 | -1 0.4660 -1.5400 43 | -1 1.4900 1.9000 44 | -1 1.1800 -0.1800 45 | -1 -1.0700 1.0500 46 | -1 -0.4030 1.2200 47 | -1 0.2080 0.9770 48 | -1 0.3560 0.7070 49 | -1 0.0105 1.7900 50 | -1 0.1270 0.4020 51 | -1 1.8800 -1.3500 52 | -1 -1.2700 0.9690 53 | -1 -1.1700 1.9400 54 | -1 -0.4136 -0.7475 55 | -1 1.9200 1.4800 56 | -1 1.8700 0.9060 57 | -1 -0.8610 1.9100 58 | -1 -0.2680 0.8020 59 | -1 0.9470 -0.1550 60 | -1 0.6140 0.9220 61 | -1 0.3760 -1.1000 62 | -1 0.2980 1.3300 63 | -1 -0.6946 -0.1496 64 | -1 -0.4350 1.8500 65 | -1 0.6720 0.4070 66 | -1 -0.7700 0.5390 67 | -1 -0.6740 0.0318 68 | -1 -0.6360 0.6760 69 | -1 0.5770 -0.2080 70 | -1 0.3960 -1.0900 71 | -1 -1.4900 0.4390 72 | -1 0.1670 0.6350 73 | -1 2.3800 0.9440 74 | -1 -0.9130 1.1200 75 | -1 -1.3159 -0.4616 76 | -1 -0.0682 1.7100 77 | -1 -0.7448 -0.8264 78 | -1 -0.0985 -0.6635 79 | -1 1.1300 -1.0800 80 | -1 -1.1475 -0.4378 81 | -1 -0.4980 1.9300 82 | -1 0.9490 0.0876 83 | -1 -1.2300 0.8440 84 | -1 -1.0002 -1.5448 85 | -1 1.1900 0.3170 86 | -1 0.9210 0.3190 87 | -1 0.8570 -0.6510 88 | -1 -1.0300 0.6820 89 | -1 -0.8034 -0.6895 90 | -1 -0.4560 0.0175 91 | -1 -0.3540 -1.3750 92 | -1 -0.6436 -2.2234 93 | -1 0.6250 -1.6000 94 | -1 -1.1000 0.0522 95 | -1 -0.7400 1.5400 96 | -1 -1.2900 0.2670 97 | -1 -0.0393 -1.1681 98 | -1 0.5230 -0.1720 99 | -1 0.7720 0.8240 100 | -1 2.1600 1.3400 101 | 1 1.6300 1.7600 102 | 1 3.1000 2.6600 103 | 1 2.6400 0.3830 104 | 1 1.9800 1.2600 105 | 1 2.2800 1.9000 106 | 1 2.9100 2.3200 107 | 1 2.7900 1.5300 108 | 1 1.0600 1.5900 109 | 1 1.9800 2.3800 110 | 1 4.2600 1.9600 111 | 1 1.0400 1.6500 112 | 1 1.5400 2.4800 113 | 1 0.4590 2.0600 114 | 1 2.1600 2.2300 115 | 1 1.4000 1.7600 116 | 1 0.5760 1.5100 117 | 1 1.4600 2.4200 118 | 1 0.8440 2.7800 119 | 1 3.4900 -0.0700 120 | 1 2.4300 2.6800 121 | 1 1.3600 1.6000 122 | 1 1.8700 1.7000 123 | 1 1.6900 0.3240 124 | 1 3.1500 3.0800 125 | 1 1.1900 0.5340 126 | 1 2.5200 1.4200 127 | 1 2.1400 1.6800 128 | 1 2.6900 2.6900 129 | 1 1.2700 0.6170 130 | 1 0.4170 2.6100 131 | 1 0.8110 1.4900 132 | 1 1.4000 1.9500 133 | 1 0.0637 2.1900 134 | 1 2.5200 2.0900 135 | 1 1.6900 2.1000 136 | 1 2.4000 -0.7730 137 | 1 3.9600 2.3900 138 | 1 1.3500 1.6100 139 | 1 2.4900 1.8800 140 | 1 -0.0307 4.0600 141 | 1 1.8900 3.0200 142 | 1 1.3100 3.5400 143 | 1 2.2900 2.6100 144 | 1 0.9550 3.2100 145 | 1 2.6900 3.3000 146 | 1 1.3700 1.5200 147 | 1 4.3000 0.9400 148 | 1 1.8600 3.1400 149 | 1 2.1000 2.5800 150 | 1 1.6000 2.3700 151 | 1 0.6930 3.6600 152 | 1 1.8800 1.3200 153 | 1 2.6700 1.5400 154 | 1 0.6660 0.6530 155 | 1 2.6900 1.8400 156 | 1 1.8700 3.0800 157 | 1 0.8730 1.2700 158 | 1 1.6200 2.0900 159 | 1 1.9600 1.7100 160 | 1 1.9400 1.8900 161 | 1 1.2800 1.1900 162 | 1 2.2700 1.1100 163 | 1 0.8430 1.6900 164 | 1 1.8400 4.2600 165 | 1 1.3000 2.9400 166 | 1 2.7500 0.8110 167 | 1 2.7700 0.8160 168 | 1 -0.6590 2.6100 169 | 1 0.2440 2.4500 170 | 1 1.3200 3.6600 171 | 1 3.0700 1.5500 172 | 1 1.3100 0.7860 173 | 1 1.5600 1.7200 174 | 1 1.6400 2.1600 175 | 1 2.5800 2.3500 176 | 1 1.2400 0.5620 177 | 1 3.3600 1.3100 178 | 1 1.3500 1.4800 179 | 1 0.1570 1.5200 180 | 1 1.5200 2.6200 181 | 1 2.7000 2.0000 182 | 1 2.9300 2.3400 183 | 1 1.9800 2.1600 184 | 1 1.8100 1.6100 185 | 1 1.7300 0.8720 186 | 1 2.2800 1.0100 187 | 1 2.8400 1.7500 188 | 1 2.0500 2.4900 189 | 1 2.6400 0.4290 190 | 1 1.7900 2.8800 191 | 1 0.3020 2.3900 192 | 1 -0.2560 0.9770 193 | 1 2.0400 0.3430 194 | 1 1.0100 0.5280 195 | 1 3.6500 2.1600 196 | 1 2.5700 1.7800 197 | 1 1.6500 0.3840 198 | 1 1.7100 1.2400 199 | 1 2.8600 3.1400 200 | 1 3.4700 2.8500 -------------------------------------------------------------------------------- /sentiment_analysis/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | import project1 as p1 6 | import sys 7 | 8 | if sys.version_info[0] < 3: 9 | PYTHON3 = False 10 | else: 11 | PYTHON3 = True 12 | 13 | def load_toy_data(path_toy_data): 14 | """ 15 | Loads the 2D toy dataset as numpy arrays. 16 | Returns the tuple (features, labels) in which features is an Nx2 numpy matrix and 17 | labels is a length-N vector of +1/-1 labels. 18 | """ 19 | labels, xs, ys = np.loadtxt(path_toy_data, delimiter='\t', unpack=True) 20 | return np.vstack((xs, ys)).T, labels 21 | 22 | def load_data(path_data, extras=False): 23 | """ 24 | Returns a list of dict with keys: 25 | * sentiment: +1 or -1 if the review was positive or negative, respectively 26 | * text: the text of the review 27 | 28 | Additionally, if the `extras` argument is True, each dict will also include the 29 | following information: 30 | * productId: a string that uniquely identifies each product 31 | * userId: a string that uniquely identifies each user 32 | * summary: the title of the review 33 | * helpfulY: the number of users who thought this review was helpful 34 | * helpfulN: the number of users who thought this review was NOT helpful 35 | """ 36 | 37 | global PYTHON3 38 | 39 | basic_fields = {'sentiment', 'text'} 40 | numeric_fields = {'sentiment', 'helpfulY', 'helpfulN'} 41 | 42 | data = [] 43 | if PYTHON3: 44 | f_data = open(path_data, encoding="latin1") 45 | else: 46 | f_data = open(path_data) 47 | 48 | for datum in csv.DictReader(f_data, delimiter='\t'): 49 | for field in list(datum.keys()): 50 | if not extras and field not in basic_fields: 51 | del datum[field] 52 | elif field in numeric_fields and datum[field]: 53 | datum[field] = int(datum[field]) 54 | 55 | data.append(datum) 56 | 57 | f_data.close() 58 | 59 | return data 60 | 61 | def write_predictions(path_submit_data, preds): 62 | if PYTHON3: 63 | f_data = open(path_submit_data, encoding="latin1") 64 | else: 65 | f_data = open(path_submit_data) 66 | 67 | reader = csv.DictReader(f_data, delimiter='\t') 68 | data = list(reader) 69 | 70 | assert len(preds) == len(data), \ 71 | 'Expected {} predictions but {} were given.'.format(len(data), len(preds)) 72 | 73 | for pred, datum in zip(preds.astype(int), data): 74 | assert pred == 1 or pred == -1, 'Invalid prediction: {}.'.format(pred) 75 | datum['sentiment'] = pred 76 | f_data.close() 77 | 78 | if PYTHON3: 79 | f_out = open(path_submit_data, 'w') 80 | else: 81 | f_out = open(path_submit_data, 'wb') 82 | 83 | writer = csv.DictWriter(f_out, delimiter='\t', fieldnames=reader.fieldnames) 84 | writer.writeheader() 85 | for datum in data: 86 | writer.writerow(datum) 87 | f_out.close() 88 | 89 | def plot_toy_data(algo_name, features, labels, thetas): 90 | """ 91 | Plots the toy data in 2D. 92 | Arguments: 93 | * features - an Nx2 ndarray of features (points) 94 | * labels - a length-N vector of +1/-1 labels 95 | * thetas - the tuple (theta, theta_0) that is the output of the learning algorithm 96 | * algorithm - the string name of the learning algorithm used 97 | """ 98 | # plot the points with labels represented as colors 99 | plt.subplots() 100 | colors = ['b' if label == 1 else 'r' for label in labels] 101 | plt.scatter(features[:, 0], features[:, 1], s=40, c=colors) 102 | xmin, xmax = plt.axis()[:2] 103 | 104 | # plot the decision boundary 105 | theta, theta_0 = thetas 106 | xs = np.linspace(xmin, xmax) 107 | ys = -(theta[0]*xs + theta_0) / (theta[1] + 1e-16) 108 | plt.plot(xs, ys, 'k-') 109 | 110 | # show the plot 111 | algo_name = ' '.join((word.capitalize() for word in algo_name.split(' '))) 112 | plt.suptitle('Classified Toy Data ({})'.format(algo_name)) 113 | plt.show() 114 | 115 | def plot_tune_results(algo_name, param_name, param_vals, acc_train, acc_val): 116 | """ 117 | Plots classification accuracy on the training and validation data versus 118 | several values of a hyperparameter used during training. 119 | """ 120 | # put the data on the plot 121 | plt.subplots() 122 | plt.plot(param_vals, acc_train, '-o') 123 | plt.plot(param_vals, acc_val, '-o') 124 | 125 | # make the plot presentable 126 | algo_name = ' '.join((word.capitalize() for word in algo_name.split(' '))) 127 | param_name = param_name.capitalize() 128 | plt.suptitle('Classification Accuracy vs {} ({})'.format(param_name, algo_name)) 129 | plt.legend(['train','val'], loc='upper right', title='Partition') 130 | plt.xlabel(param_name) 131 | plt.ylabel('Accuracy (%)') 132 | plt.show() 133 | 134 | def tune(train_fn, param_vals, train_feats, train_labels, val_feats, val_labels): 135 | train_accs = np.ndarray(len(param_vals)) 136 | val_accs = np.ndarray(len(param_vals)) 137 | 138 | for i, val in enumerate(param_vals): 139 | theta, theta_0 = train_fn(train_feats, train_labels, val) 140 | 141 | train_preds = p1.classify(train_feats, theta, theta_0) 142 | train_accs[i] = p1.accuracy(train_preds, train_labels) 143 | 144 | val_preds = p1.classify(val_feats, theta, theta_0) 145 | val_accs[i] = p1.accuracy(val_preds, val_labels) 146 | 147 | return train_accs, val_accs 148 | 149 | def tune_perceptron(*args): 150 | return tune(p1.perceptron, *args) 151 | 152 | def tune_avg_perceptron(*args): 153 | return tune(p1.average_perceptron, *args) 154 | 155 | def tune_pegasos_T(best_L, *args): 156 | def train_fn(features, labels, T): 157 | return p1.pegasos(features, labels, T, best_L) 158 | return tune(train_fn, *args) 159 | 160 | def tune_pegasos_L(best_T, *args): 161 | def train_fn(features, labels, L): 162 | return p1.pegasos(features, labels, best_T, L) 163 | return tune(train_fn, *args) 164 | 165 | def most_explanatory_word(theta, wordlist): 166 | """Returns the word associated with the bag-of-words feature having largest weight.""" 167 | return [word for (theta_i, word) in sorted(zip(theta, wordlist))[::-1]] 168 | -------------------------------------------------------------------------------- /sqrt_newton.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2019-06-01 20:13:43 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | # @Version : $Id$ 7 | 8 | import os 9 | 10 | def sqrt_newton(y): 11 | if y < 0: 12 | return None 13 | err = 1e-7 14 | xn = y 15 | while abs(y - xn*xn) > err: 16 | xn = (y/xn + xn) / 2.0; 17 | print(xn) 18 | return xn 19 | 20 | def main(): 21 | print(sqrt_newton(2)) 22 | 23 | if __name__ == '__main__': 24 | main() -------------------------------------------------------------------------------- /svm/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : Aug-09-19 14:06 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | 7 | import os 8 | import utils 9 | 10 | 11 | def test_sklearn(): 12 | from sklearn import svm 13 | 14 | X = [[0, 0], [2, 2]] 15 | y = [0.5, 2.5] 16 | clf = svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma="auto", 17 | kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) 18 | clf.fit(X, y) 19 | 20 | print(clf.predict([[1, 1]])) 21 | 22 | 23 | def test_toy_data(): 24 | toy_features, toy_labels = toy_data = utils.load_toy_data('toy_data.tsv') 25 | 26 | from sklearn import svm 27 | clf = svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma="auto", 28 | kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) 29 | clf.fit(toy_features, toy_labels) 30 | # print(clf.coef_) # coef_ is only available when using a linear kernel 31 | print(clf.dual_coef_) 32 | 33 | 34 | def main(): 35 | # test_sklearn() 36 | test_toy_data() 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /svm/project1.py: -------------------------------------------------------------------------------- 1 | from string import punctuation, digits 2 | import numpy as np 3 | import random 4 | 5 | # Part I 6 | 7 | 8 | # pragma: coderesponse template 9 | def get_order(n_samples): 10 | try: 11 | with open(str(n_samples) + '.txt') as fp: 12 | line = fp.readline() 13 | return list(map(int, line.split(','))) 14 | except FileNotFoundError: 15 | random.seed(1) 16 | indices = list(range(n_samples)) 17 | random.shuffle(indices) 18 | return indices 19 | # pragma: coderesponse end 20 | 21 | 22 | # pragma: coderesponse template 23 | def hinge_loss_single(feature_vector, label, theta, theta_0): 24 | """ 25 | Finds the hinge loss on a single data point given specific classification 26 | parameters. 27 | 28 | Args: 29 | feature_vector - A numpy array describing the given data point. 30 | label - A real valued number, the correct classification of the data 31 | point. 32 | theta - A numpy array describing the linear classifier. 33 | theta_0 - A real valued number representing the offset parameter. 34 | 35 | 36 | Returns: A real number representing the hinge loss associated with the 37 | given data point and parameters. 38 | """ 39 | y = np.dot(theta, feature_vector) + theta_0 40 | loss = max(0.0, 1 - y * label) 41 | return loss 42 | # pragma: coderesponse end 43 | 44 | 45 | # pragma: coderesponse template 46 | def hinge_loss_full(feature_matrix, labels, theta, theta_0): 47 | """ 48 | Finds the total hinge loss on a set of data given specific classification 49 | parameters. 50 | 51 | Args: 52 | feature_matrix - A numpy matrix describing the given data. Each row 53 | represents a single data point. 54 | labels - A numpy array where the kth element of the array is the 55 | correct classification of the kth row of the feature matrix. 56 | theta - A numpy array describing the linear classifier. 57 | theta_0 - A real valued number representing the offset parameter. 58 | 59 | 60 | Returns: A real number representing the hinge loss associated with the 61 | given dataset and parameters. This number should be the average hinge 62 | loss across all of the points in the feature matrix. 63 | """ 64 | # Your code here 65 | loss = 0 66 | for i in range(len(feature_matrix)): 67 | loss += hinge_loss_single(feature_matrix[i], labels[i], theta, theta_0) 68 | return loss / len(labels) 69 | # pragma: coderesponse end 70 | 71 | 72 | # pragma: coderesponse template 73 | def perceptron_single_step_update( 74 | feature_vector, 75 | label, 76 | current_theta, 77 | current_theta_0): 78 | """ 79 | Properly updates the classification parameter, theta and theta_0, on a 80 | single step of the perceptron algorithm. 81 | 82 | Args: 83 | feature_vector - A numpy array describing a single data point. 84 | label - The correct classification of the feature vector. 85 | current_theta - The current theta being used by the perceptron 86 | algorithm before this update. 87 | current_theta_0 - The current theta_0 being used by the perceptron 88 | algorithm before this update. 89 | 90 | Returns: A tuple where the first element is a numpy array with the value of 91 | theta after the current update has completed and the second element is a 92 | real valued number with the value of theta_0 after the current updated has 93 | completed. 94 | """ 95 | if label * (np.dot(current_theta, feature_vector) + current_theta_0) <= 0: 96 | current_theta += label * feature_vector 97 | current_theta_0 += label 98 | return (current_theta, current_theta_0) 99 | # pragma: coderesponse end 100 | 101 | 102 | # pragma: coderesponse template 103 | def perceptron(feature_matrix, labels, T): 104 | """ 105 | Runs the full perceptron algorithm on a given set of data. Runs T 106 | iterations through the data set, there is no need to worry about 107 | stopping early. 108 | 109 | NOTE: Please use the previously implemented functions when applicable. 110 | Do not copy paste code from previous parts. 111 | 112 | NOTE: Iterate the data matrix by the orders returned by get_order(feature_matrix.shape[0]) 113 | 114 | Args: 115 | feature_matrix - A numpy matrix describing the given data. Each row 116 | represents a single data point. 117 | labels - A numpy array where the kth element of the array is the 118 | correct classification of the kth row of the feature matrix. 119 | T - An integer indicating how many times the perceptron algorithm 120 | should iterate through the feature matrix. 121 | 122 | Returns: A tuple where the first element is a numpy array with the value of 123 | theta, the linear classification parameter, after T iterations through the 124 | feature matrix and the second element is a real number with the value of 125 | theta_0, the offset classification parameter, after T iterations through 126 | the feature matrix. 127 | """ 128 | (nsamples, nfeatures) = feature_matrix.shape 129 | theta = np.zeros(nfeatures) 130 | theta_0 = 0.0 131 | for t in range(T): 132 | for i in get_order(nsamples): 133 | theta, theta_0 = perceptron_single_step_update( 134 | feature_matrix[i], labels[i], theta, theta_0) 135 | return (theta, theta_0) 136 | # pragma: coderesponse end 137 | 138 | 139 | # pragma: coderesponse template 140 | def average_perceptron(feature_matrix, labels, T): 141 | """ 142 | Runs the average perceptron algorithm on a given set of data. Runs T 143 | iterations through the data set, there is no need to worry about 144 | stopping early. 145 | 146 | NOTE: Please use the previously implemented functions when applicable. 147 | Do not copy paste code from previous parts. 148 | 149 | NOTE: Iterate the data matrix by the orders returned by get_order(feature_matrix.shape[0]) 150 | 151 | 152 | Args: 153 | feature_matrix - A numpy matrix describing the given data. Each row 154 | represents a single data point. 155 | labels - A numpy array where the kth element of the array is the 156 | correct classification of the kth row of the feature matrix. 157 | T - An integer indicating how many times the perceptron algorithm 158 | should iterate through the feature matrix. 159 | 160 | Returns: A tuple where the first element is a numpy array with the value of 161 | the average theta, the linear classification parameter, found after T 162 | iterations through the feature matrix and the second element is a real 163 | number with the value of the average theta_0, the offset classification 164 | parameter, found after T iterations through the feature matrix. 165 | 166 | Hint: It is difficult to keep a running average; however, it is simple to 167 | find a sum and divide. 168 | """ 169 | (nsamples, nfeatures) = feature_matrix.shape 170 | theta = np.zeros(nfeatures) 171 | theta_sum = np.zeros(nfeatures) 172 | theta_0 = 0.0 173 | theta_0_sum = 0.0 174 | for t in range(T): 175 | for i in get_order(nsamples): 176 | theta, theta_0 = perceptron_single_step_update( 177 | feature_matrix[i], labels[i], theta, theta_0) 178 | theta_sum += theta 179 | theta_0_sum += theta_0 180 | return (theta_sum / (nsamples * T), theta_0_sum / (nsamples * T)) 181 | # pragma: coderesponse end 182 | 183 | 184 | # pragma: coderesponse template 185 | def pegasos_single_step_update( 186 | feature_vector, 187 | label, 188 | L, 189 | eta, 190 | current_theta, 191 | current_theta_0): 192 | """ 193 | Properly updates the classification parameter, theta and theta_0, on a 194 | single step of the Pegasos algorithm 195 | 196 | Args: 197 | feature_vector - A numpy array describing a single data point. 198 | label - The correct classification of the feature vector. 199 | L - The lamba value being used to update the parameters. 200 | eta - Learning rate to update parameters. 201 | current_theta - The current theta being used by the Pegasos 202 | algorithm before this update. 203 | current_theta_0 - The current theta_0 being used by the 204 | Pegasos algorithm before this update. 205 | 206 | Returns: A tuple where the first element is a numpy array with the value of 207 | theta after the current update has completed and the second element is a 208 | real valued number with the value of theta_0 after the current updated has 209 | completed. 210 | """ 211 | mult = 1 - (eta * L) 212 | if label * (np.dot(feature_vector, current_theta) + current_theta_0) <= 1: 213 | return ((mult * current_theta) + (eta * label * feature_vector), 214 | (current_theta_0) + (eta * label)) 215 | return (mult * current_theta, current_theta_0) 216 | # pragma: coderesponse end 217 | 218 | 219 | # pragma: coderesponse template 220 | def pegasos(feature_matrix, labels, T, L): 221 | """ 222 | Runs the Pegasos algorithm on a given set of data. Runs T 223 | iterations through the data set, there is no need to worry about 224 | stopping early. 225 | 226 | For each update, set learning rate = 1/sqrt(t), 227 | where t is a counter for the number of updates performed so far (between 1 228 | and nT inclusive). 229 | 230 | NOTE: Please use the previously implemented functions when applicable. 231 | Do not copy paste code from previous parts. 232 | 233 | Args: 234 | feature_matrix - A numpy matrix describing the given data. Each row 235 | represents a single data point. 236 | labels - A numpy array where the kth element of the array is the 237 | correct classification of the kth row of the feature matrix. 238 | T - An integer indicating how many times the algorithm 239 | should iterate through the feature matrix. 240 | L - The lamba value being used to update the Pegasos 241 | algorithm parameters. 242 | 243 | Returns: A tuple where the first element is a numpy array with the value of 244 | the theta, the linear classification parameter, found after T 245 | iterations through the feature matrix and the second element is a real 246 | number with the value of the theta_0, the offset classification 247 | parameter, found after T iterations through the feature matrix. 248 | """ 249 | (nsamples, nfeatures) = feature_matrix.shape 250 | theta = np.zeros(nfeatures) 251 | theta_0 = 0 252 | count = 0 253 | for t in range(T): 254 | for i in get_order(nsamples): 255 | count += 1 256 | eta = 1.0 / np.sqrt(count) 257 | (theta, theta_0) = pegasos_single_step_update( 258 | feature_matrix[i], labels[i], L, eta, theta, theta_0) 259 | return (theta, theta_0) 260 | # pragma: coderesponse end 261 | 262 | # Part II 263 | 264 | 265 | # pragma: coderesponse template 266 | def classify(feature_matrix, theta, theta_0): 267 | """ 268 | A classification function that uses theta and theta_0 to classify a set of 269 | data points. 270 | 271 | Args: 272 | feature_matrix - A numpy matrix describing the given data. Each row 273 | represents a single data point. 274 | theta - A numpy array describing the linear classifier. 275 | theta - A numpy array describing the linear classifier. 276 | theta_0 - A real valued number representing the offset parameter. 277 | 278 | Returns: A numpy array of 1s and -1s where the kth element of the array is 279 | the predicted classification of the kth row of the feature matrix using the 280 | given theta and theta_0. If a prediction is GREATER THAN zero, it should 281 | be considered a positive classification. 282 | """ 283 | (nsamples, nfeatures) = feature_matrix.shape 284 | predictions = np.zeros(nsamples) 285 | for i in range(nsamples): 286 | feature_vector = feature_matrix[i] 287 | prediction = np.dot(theta, feature_vector) + theta_0 288 | if (prediction > 0): 289 | predictions[i] = 1 290 | else: 291 | predictions[i] = -1 292 | return predictions 293 | # pragma: coderesponse end 294 | 295 | 296 | # pragma: coderesponse template 297 | def classifier_accuracy( 298 | classifier, 299 | train_feature_matrix, 300 | val_feature_matrix, 301 | train_labels, 302 | val_labels, 303 | **kwargs): 304 | """ 305 | Trains a linear classifier and computes accuracy. 306 | The classifier is trained on the train data. The classifier's 307 | accuracy on the train and validation data is then returned. 308 | 309 | Args: 310 | classifier - A classifier function that takes arguments 311 | (feature matrix, labels, **kwargs) and returns (theta, theta_0) 312 | train_feature_matrix - A numpy matrix describing the training 313 | data. Each row represents a single data point. 314 | val_feature_matrix - A numpy matrix describing the training 315 | data. Each row represents a single data point. 316 | train_labels - A numpy array where the kth element of the array 317 | is the correct classification of the kth row of the training 318 | feature matrix. 319 | val_labels - A numpy array where the kth element of the array 320 | is the correct classification of the kth row of the validation 321 | feature matrix. 322 | **kwargs - Additional named arguments to pass to the classifier 323 | (e.g. T or L) 324 | 325 | Returns: A tuple in which the first element is the (scalar) accuracy of the 326 | trained classifier on the training data and the second element is the 327 | accuracy of the trained classifier on the validation data. 328 | """ 329 | theta, theta_0 = classifier(train_feature_matrix, train_labels, **kwargs) 330 | train_predictions = classify(train_feature_matrix, theta, theta_0) 331 | val_predictions = classify(val_feature_matrix, theta, theta_0) 332 | train_accuracy = accuracy(train_predictions, train_labels) 333 | validation_accuracy = accuracy(val_predictions, val_labels) 334 | return (train_accuracy, validation_accuracy) 335 | # pragma: coderesponse end 336 | 337 | 338 | # pragma: coderesponse template 339 | def extract_words(input_string): 340 | """ 341 | Helper function for bag_of_words() 342 | Inputs a text string 343 | Returns a list of lowercase words in the string. 344 | Punctuation and digits are separated out into their own words. 345 | """ 346 | for c in punctuation + digits: 347 | input_string = input_string.replace(c, ' ' + c + ' ') 348 | 349 | return input_string.lower().split() 350 | # pragma: coderesponse end 351 | 352 | 353 | # pragma: coderesponse template 354 | def bag_of_words(texts, stopwords=None): 355 | """ 356 | Inputs a list of string reviews 357 | Returns a dictionary of unique unigrams occurring over the input 358 | 359 | Feel free to change this code as guided by Problem 9 360 | """ 361 | # Your code here 362 | dictionary = {} # maps word to unique index 363 | for text in texts: 364 | word_list = extract_words(text) 365 | for word in word_list: 366 | if stopwords and word in stopwords: 367 | continue 368 | if word not in dictionary: 369 | dictionary[word] = len(dictionary) 370 | return dictionary 371 | # pragma: coderesponse end 372 | 373 | 374 | # pragma: coderesponse template 375 | def extract_bow_feature_vectors(reviews, dictionary): 376 | """ 377 | Inputs a list of string reviews 378 | Inputs the dictionary of words as given by bag_of_words 379 | Returns the bag-of-words feature matrix representation of the data. 380 | The returned matrix is of shape (n, m), where n is the number of reviews 381 | and m the total number of entries in the dictionary. 382 | 383 | Feel free to change this code as guided by Problem 9 384 | """ 385 | # Your code here 386 | 387 | num_reviews = len(reviews) 388 | feature_matrix = np.zeros([num_reviews, len(dictionary)]) 389 | 390 | for i, text in enumerate(reviews): 391 | word_list = extract_words(text) 392 | for word in word_list: 393 | if word in dictionary: 394 | if word not in dictionary.keys(): 395 | feature_matrix[i, dictionary[word]] = 1 396 | else: 397 | feature_matrix[i, dictionary[word]] += 1 398 | return feature_matrix 399 | # pragma: coderesponse end 400 | 401 | 402 | # pragma: coderesponse template 403 | def accuracy(preds, targets): 404 | """ 405 | Given length-N vectors containing predicted and target labels, 406 | returns the percentage and number of correct predictions. 407 | """ 408 | return (preds == targets).mean() 409 | # pragma: coderesponse end 410 | -------------------------------------------------------------------------------- /svm/toy_data.tsv: -------------------------------------------------------------------------------- 1 | -1 1.7600 0.4000 2 | -1 0.9790 2.2400 3 | -1 1.8700 -0.9770 4 | -1 0.9500 -0.1510 5 | -1 -0.1030 0.4110 6 | -1 0.1440 1.4500 7 | -1 0.7610 0.1220 8 | -1 0.4440 0.3340 9 | -1 1.4900 -0.2050 10 | -1 0.3130 -0.8540 11 | -1 -2.5500 0.6540 12 | -1 0.8640 -0.7420 13 | -1 2.2700 -1.4500 14 | -1 0.0458 -0.1870 15 | -1 1.5300 1.4700 16 | -1 0.1550 0.3780 17 | -1 -0.8878 -1.9808 18 | -1 -0.3480 0.1560 19 | -1 1.2300 1.2000 20 | -1 -0.3873 -0.3023 21 | -1 -1.0486 -1.4200 22 | -1 -1.7100 1.9500 23 | -1 -0.5097 -0.4381 24 | -1 -1.2500 0.7770 25 | -1 -1.6139 -0.2127 26 | -1 -0.8950 0.3870 27 | -1 -0.5108 -1.1806 28 | -1 -0.0282 0.4280 29 | -1 0.0665 0.3020 30 | -1 -0.6343 -0.3627 31 | -1 -0.6725 -0.3596 32 | -1 -0.8131 -1.7263 33 | -1 0.1770 -0.4020 34 | -1 -1.6300 0.4630 35 | -1 -0.9070 0.0519 36 | -1 0.7290 0.1290 37 | -1 1.1400 -1.2300 38 | -1 0.4020 -0.6850 39 | -1 -0.8708 -0.5788 40 | -1 -0.3120 0.0562 41 | -1 -1.1700 0.9010 42 | -1 0.4660 -1.5400 43 | -1 1.4900 1.9000 44 | -1 1.1800 -0.1800 45 | -1 -1.0700 1.0500 46 | -1 -0.4030 1.2200 47 | -1 0.2080 0.9770 48 | -1 0.3560 0.7070 49 | -1 0.0105 1.7900 50 | -1 0.1270 0.4020 51 | -1 1.8800 -1.3500 52 | -1 -1.2700 0.9690 53 | -1 -1.1700 1.9400 54 | -1 -0.4136 -0.7475 55 | -1 1.9200 1.4800 56 | -1 1.8700 0.9060 57 | -1 -0.8610 1.9100 58 | -1 -0.2680 0.8020 59 | -1 0.9470 -0.1550 60 | -1 0.6140 0.9220 61 | -1 0.3760 -1.1000 62 | -1 0.2980 1.3300 63 | -1 -0.6946 -0.1496 64 | -1 -0.4350 1.8500 65 | -1 0.6720 0.4070 66 | -1 -0.7700 0.5390 67 | -1 -0.6740 0.0318 68 | -1 -0.6360 0.6760 69 | -1 0.5770 -0.2080 70 | -1 0.3960 -1.0900 71 | -1 -1.4900 0.4390 72 | -1 0.1670 0.6350 73 | -1 2.3800 0.9440 74 | -1 -0.9130 1.1200 75 | -1 -1.3159 -0.4616 76 | -1 -0.0682 1.7100 77 | -1 -0.7448 -0.8264 78 | -1 -0.0985 -0.6635 79 | -1 1.1300 -1.0800 80 | -1 -1.1475 -0.4378 81 | -1 -0.4980 1.9300 82 | -1 0.9490 0.0876 83 | -1 -1.2300 0.8440 84 | -1 -1.0002 -1.5448 85 | -1 1.1900 0.3170 86 | -1 0.9210 0.3190 87 | -1 0.8570 -0.6510 88 | -1 -1.0300 0.6820 89 | -1 -0.8034 -0.6895 90 | -1 -0.4560 0.0175 91 | -1 -0.3540 -1.3750 92 | -1 -0.6436 -2.2234 93 | -1 0.6250 -1.6000 94 | -1 -1.1000 0.0522 95 | -1 -0.7400 1.5400 96 | -1 -1.2900 0.2670 97 | -1 -0.0393 -1.1681 98 | -1 0.5230 -0.1720 99 | -1 0.7720 0.8240 100 | -1 2.1600 1.3400 101 | 1 1.6300 1.7600 102 | 1 3.1000 2.6600 103 | 1 2.6400 0.3830 104 | 1 1.9800 1.2600 105 | 1 2.2800 1.9000 106 | 1 2.9100 2.3200 107 | 1 2.7900 1.5300 108 | 1 1.0600 1.5900 109 | 1 1.9800 2.3800 110 | 1 4.2600 1.9600 111 | 1 1.0400 1.6500 112 | 1 1.5400 2.4800 113 | 1 0.4590 2.0600 114 | 1 2.1600 2.2300 115 | 1 1.4000 1.7600 116 | 1 0.5760 1.5100 117 | 1 1.4600 2.4200 118 | 1 0.8440 2.7800 119 | 1 3.4900 -0.0700 120 | 1 2.4300 2.6800 121 | 1 1.3600 1.6000 122 | 1 1.8700 1.7000 123 | 1 1.6900 0.3240 124 | 1 3.1500 3.0800 125 | 1 1.1900 0.5340 126 | 1 2.5200 1.4200 127 | 1 2.1400 1.6800 128 | 1 2.6900 2.6900 129 | 1 1.2700 0.6170 130 | 1 0.4170 2.6100 131 | 1 0.8110 1.4900 132 | 1 1.4000 1.9500 133 | 1 0.0637 2.1900 134 | 1 2.5200 2.0900 135 | 1 1.6900 2.1000 136 | 1 2.4000 -0.7730 137 | 1 3.9600 2.3900 138 | 1 1.3500 1.6100 139 | 1 2.4900 1.8800 140 | 1 -0.0307 4.0600 141 | 1 1.8900 3.0200 142 | 1 1.3100 3.5400 143 | 1 2.2900 2.6100 144 | 1 0.9550 3.2100 145 | 1 2.6900 3.3000 146 | 1 1.3700 1.5200 147 | 1 4.3000 0.9400 148 | 1 1.8600 3.1400 149 | 1 2.1000 2.5800 150 | 1 1.6000 2.3700 151 | 1 0.6930 3.6600 152 | 1 1.8800 1.3200 153 | 1 2.6700 1.5400 154 | 1 0.6660 0.6530 155 | 1 2.6900 1.8400 156 | 1 1.8700 3.0800 157 | 1 0.8730 1.2700 158 | 1 1.6200 2.0900 159 | 1 1.9600 1.7100 160 | 1 1.9400 1.8900 161 | 1 1.2800 1.1900 162 | 1 2.2700 1.1100 163 | 1 0.8430 1.6900 164 | 1 1.8400 4.2600 165 | 1 1.3000 2.9400 166 | 1 2.7500 0.8110 167 | 1 2.7700 0.8160 168 | 1 -0.6590 2.6100 169 | 1 0.2440 2.4500 170 | 1 1.3200 3.6600 171 | 1 3.0700 1.5500 172 | 1 1.3100 0.7860 173 | 1 1.5600 1.7200 174 | 1 1.6400 2.1600 175 | 1 2.5800 2.3500 176 | 1 1.2400 0.5620 177 | 1 3.3600 1.3100 178 | 1 1.3500 1.4800 179 | 1 0.1570 1.5200 180 | 1 1.5200 2.6200 181 | 1 2.7000 2.0000 182 | 1 2.9300 2.3400 183 | 1 1.9800 2.1600 184 | 1 1.8100 1.6100 185 | 1 1.7300 0.8720 186 | 1 2.2800 1.0100 187 | 1 2.8400 1.7500 188 | 1 2.0500 2.4900 189 | 1 2.6400 0.4290 190 | 1 1.7900 2.8800 191 | 1 0.3020 2.3900 192 | 1 -0.2560 0.9770 193 | 1 2.0400 0.3430 194 | 1 1.0100 0.5280 195 | 1 3.6500 2.1600 196 | 1 2.5700 1.7800 197 | 1 1.6500 0.3840 198 | 1 1.7100 1.2400 199 | 1 2.8600 3.1400 200 | 1 3.4700 2.8500 -------------------------------------------------------------------------------- /svm/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | import project1 as p1 6 | import sys 7 | 8 | if sys.version_info[0] < 3: 9 | PYTHON3 = False 10 | else: 11 | PYTHON3 = True 12 | 13 | def load_toy_data(path_toy_data): 14 | """ 15 | Loads the 2D toy dataset as numpy arrays. 16 | Returns the tuple (features, labels) in which features is an Nx2 numpy matrix and 17 | labels is a length-N vector of +1/-1 labels. 18 | """ 19 | labels, xs, ys = np.loadtxt(path_toy_data, delimiter='\t', unpack=True) 20 | return np.vstack((xs, ys)).T, labels 21 | 22 | def load_data(path_data, extras=False): 23 | """ 24 | Returns a list of dict with keys: 25 | * sentiment: +1 or -1 if the review was positive or negative, respectively 26 | * text: the text of the review 27 | 28 | Additionally, if the `extras` argument is True, each dict will also include the 29 | following information: 30 | * productId: a string that uniquely identifies each product 31 | * userId: a string that uniquely identifies each user 32 | * summary: the title of the review 33 | * helpfulY: the number of users who thought this review was helpful 34 | * helpfulN: the number of users who thought this review was NOT helpful 35 | """ 36 | 37 | global PYTHON3 38 | 39 | basic_fields = {'sentiment', 'text'} 40 | numeric_fields = {'sentiment', 'helpfulY', 'helpfulN'} 41 | 42 | data = [] 43 | if PYTHON3: 44 | f_data = open(path_data, encoding="latin1") 45 | else: 46 | f_data = open(path_data) 47 | 48 | for datum in csv.DictReader(f_data, delimiter='\t'): 49 | for field in list(datum.keys()): 50 | if not extras and field not in basic_fields: 51 | del datum[field] 52 | elif field in numeric_fields and datum[field]: 53 | datum[field] = int(datum[field]) 54 | 55 | data.append(datum) 56 | 57 | f_data.close() 58 | 59 | return data 60 | 61 | def write_predictions(path_submit_data, preds): 62 | if PYTHON3: 63 | f_data = open(path_submit_data, encoding="latin1") 64 | else: 65 | f_data = open(path_submit_data) 66 | 67 | reader = csv.DictReader(f_data, delimiter='\t') 68 | data = list(reader) 69 | 70 | assert len(preds) == len(data), \ 71 | 'Expected {} predictions but {} were given.'.format(len(data), len(preds)) 72 | 73 | for pred, datum in zip(preds.astype(int), data): 74 | assert pred == 1 or pred == -1, 'Invalid prediction: {}.'.format(pred) 75 | datum['sentiment'] = pred 76 | f_data.close() 77 | 78 | if PYTHON3: 79 | f_out = open(path_submit_data, 'w') 80 | else: 81 | f_out = open(path_submit_data, 'wb') 82 | 83 | writer = csv.DictWriter(f_out, delimiter='\t', fieldnames=reader.fieldnames) 84 | writer.writeheader() 85 | for datum in data: 86 | writer.writerow(datum) 87 | f_out.close() 88 | 89 | def plot_toy_data(algo_name, features, labels, thetas): 90 | """ 91 | Plots the toy data in 2D. 92 | Arguments: 93 | * features - an Nx2 ndarray of features (points) 94 | * labels - a length-N vector of +1/-1 labels 95 | * thetas - the tuple (theta, theta_0) that is the output of the learning algorithm 96 | * algorithm - the string name of the learning algorithm used 97 | """ 98 | # plot the points with labels represented as colors 99 | plt.subplots() 100 | colors = ['b' if label == 1 else 'r' for label in labels] 101 | plt.scatter(features[:, 0], features[:, 1], s=40, c=colors) 102 | xmin, xmax = plt.axis()[:2] 103 | 104 | # plot the decision boundary 105 | theta, theta_0 = thetas 106 | xs = np.linspace(xmin, xmax) 107 | ys = -(theta[0]*xs + theta_0) / (theta[1] + 1e-16) 108 | plt.plot(xs, ys, 'k-') 109 | 110 | # show the plot 111 | algo_name = ' '.join((word.capitalize() for word in algo_name.split(' '))) 112 | plt.suptitle('Classified Toy Data ({})'.format(algo_name)) 113 | plt.show() 114 | 115 | def plot_tune_results(algo_name, param_name, param_vals, acc_train, acc_val): 116 | """ 117 | Plots classification accuracy on the training and validation data versus 118 | several values of a hyperparameter used during training. 119 | """ 120 | # put the data on the plot 121 | plt.subplots() 122 | plt.plot(param_vals, acc_train, '-o') 123 | plt.plot(param_vals, acc_val, '-o') 124 | 125 | # make the plot presentable 126 | algo_name = ' '.join((word.capitalize() for word in algo_name.split(' '))) 127 | param_name = param_name.capitalize() 128 | plt.suptitle('Classification Accuracy vs {} ({})'.format(param_name, algo_name)) 129 | plt.legend(['train','val'], loc='upper right', title='Partition') 130 | plt.xlabel(param_name) 131 | plt.ylabel('Accuracy (%)') 132 | plt.show() 133 | 134 | def tune(train_fn, param_vals, train_feats, train_labels, val_feats, val_labels): 135 | train_accs = np.ndarray(len(param_vals)) 136 | val_accs = np.ndarray(len(param_vals)) 137 | 138 | for i, val in enumerate(param_vals): 139 | theta, theta_0 = train_fn(train_feats, train_labels, val) 140 | 141 | train_preds = p1.classify(train_feats, theta, theta_0) 142 | train_accs[i] = p1.accuracy(train_preds, train_labels) 143 | 144 | val_preds = p1.classify(val_feats, theta, theta_0) 145 | val_accs[i] = p1.accuracy(val_preds, val_labels) 146 | 147 | return train_accs, val_accs 148 | 149 | def tune_perceptron(*args): 150 | return tune(p1.perceptron, *args) 151 | 152 | def tune_avg_perceptron(*args): 153 | return tune(p1.average_perceptron, *args) 154 | 155 | def tune_pegasos_T(best_L, *args): 156 | def train_fn(features, labels, T): 157 | return p1.pegasos(features, labels, T, best_L) 158 | return tune(train_fn, *args) 159 | 160 | def tune_pegasos_L(best_T, *args): 161 | def train_fn(features, labels, L): 162 | return p1.pegasos(features, labels, best_T, L) 163 | return tune(train_fn, *args) 164 | 165 | def most_explanatory_word(theta, wordlist): 166 | """Returns the word associated with the bag-of-words feature having largest weight.""" 167 | return [word for (theta_i, word) in sorted(zip(theta, wordlist))[::-1]] 168 | -------------------------------------------------------------------------------- /update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git add -A 3 | git commit -am "gen and svm `date`" 4 | git push 5 | -------------------------------------------------------------------------------- /vector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Date : 2019-06-28 01:57:08 4 | # @Author : Your Name (you@example.org) 5 | # @Link : http://example.org 6 | # @Version : $Id$ 7 | 8 | import os 9 | import numpy as np 10 | 11 | 12 | def funcname(parameter_list): 13 | pass 14 | 15 | 16 | def main(): 17 | a = np.asarray([0.4, 0.3]) 18 | b = np.asarray([-0.15, 0.2]) 19 | r = np.dot(a, b) 20 | print(r) 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | -------------------------------------------------------------------------------- /wiki.txt: -------------------------------------------------------------------------------- 1 | '''Stochastic gradient descent''' (often abbreviated '''SGD''') is an [[iterative method]] for [[Mathematical optimization|optimizing]] an [[objective function]] with suitable smoothness properties (e.g. [[Differentiable function|differentiable]] or [[Subgradient method|subdifferentiable]]). It is called '''stochastic''' because the method uses randomly selected (or shuffled) samples to evaluate the gradients, hence SGD can be regarded as a [[stochastic approximation]] of [[gradient descent]] optimization. The ideas can be traced back{{cite journal | last = Mei | first = Song | title = A mean field view of the landscape of two-layer neural networks | journal = Proceedings of the National Academy of Sciences | volume = 115| issue = 33| year = 2018 | pages = E7665–E7671| jstor = | doi = 10.1073/pnas.1806579115 | pmid = 30054315 | pmc = 6099898 }} at least to the 1951 article titled "A Stochastic Approximation Method" by [[Herbert Robbins]] and [[Sutton Monro]], who proposed with detailed analysis a root-finding method now called the [[stochastic approximation|Robbins–Monro algorithm]]. 2 | 3 | == Background == 4 | {{Main|M-estimation}} 5 | {{See also|Estimating equation}} 6 | Both [[statistics|statistical]] [[M-estimation|estimation]] and [[machine learning]] consider the problem of [[Mathematical optimization|minimizing]] an [[objective function]] that has the form of a sum: 7 | : Q(w) = \frac{1}{n}\sum_{i=1}^n Q_i(w), 8 | where the [[parametric statistics|parameter]] w that minimizes Q(w) is to be [[estimator|estimated]]. Each summand function Q_i is typically associated with the i-th [[Observation (statistics)|observation]] in the [[data set]] (used for training). 9 | 10 | In classical statistics, sum-minimization problems arise in [[least squares]] and in [[maximum-likelihood estimation]] (for independent observations). The general class of estimators that arise as minimizers of sums are called [[M-estimator]]s. However, in statistics, it has been long recognized that requiring even local minimization is too restrictive for some problems of maximum-likelihood estimation.{{cite journal | last = Ferguson | first = Thomas S. | title = An inconsistent maximum likelihood estimate | journal = Journal of the American Statistical Association | volume = 77 | issue = 380 | year = 1982 | pages = 831–834 | jstor = 2287314 | doi = 10.1080/01621459.1982.10477894 }} Therefore, contemporary statistical theorists often consider [[stationary point]]s of the [[likelihood function]] (or zeros of its derivative, the [[Score (statistics)|score function]], and other [[estimating equations]]). 11 | 12 | The sum-minimization problem also arises for [[empirical risk minimization]]. In this case, Q_i(w) is the value of the [[loss function]] at i-th example, and Q(w) is the empirical risk. 13 | 14 | When used to minimize the above function, a standard (or "batch") [[gradient descent]] method would perform the following iterations : 15 | : w := w - \eta \nabla Q(w) = w - \eta \sum_{i=1}^n \nabla Q_i(w)/n, 16 | where \eta is a step size (sometimes called the ''[[learning rate]]'' in machine learning). 17 | 18 | In many cases, the summand functions have a simple form that enables inexpensive evaluations of the sum-function and the sum gradient. For example, in statistics, [[exponential families|one-parameter exponential families]] allow economical function-evaluations and gradient-evaluations. 19 | 20 | However, in other cases, evaluating the sum-gradient may require expensive evaluations of the gradients from all summand functions. When the training set is enormous and no simple formulas exist, evaluating the sums of gradients becomes very expensive, because evaluating the gradient requires evaluating all the summand functions' gradients. To economize on the computational cost at every iteration, stochastic gradient descent [[sampling (statistics)|samples]] a subset of summand functions at every step. This is very 21 | effective in the case of large-scale machine learning problems.{{Cite conference |first1=Léon |last1=Bottou |author1-link=Léon Bottou |last2=Bousquet |first2=Olivier |title=The Tradeoffs of Large Scale Learning |url=http://leon.bottou.org/papers/bottou-bousquet-2008 |conference=[[Advances in Neural Information Processing Systems]] |volume=20 |pages=161–168 |year=2008}} 22 | 23 | == Iterative method == 24 | [[Image:stogra.png|thumb|right|Fluctuations in the total objective function as gradient steps with respect to mini-batches are taken.]] 25 | 26 | In stochastic (or "on-line") gradient descent, the true gradient of Q(w) is approximated by a gradient at a single example: 27 | : w := w - \eta \nabla Q_i(w). 28 | As the algorithm sweeps through the training set, it performs the above update for each training example. Several passes can be made over the training set until the algorithm converges. If this is done, the data can be shuffled for each pass to prevent cycles. Typical implementations may use an [[adaptive learning rate]] so that the algorithm converges. 29 | 30 | In pseudocode, stochastic gradient descent can be presented as follows: 31 |
32 | {{framebox|blue}} 33 | * Choose an initial vector of parameters w and learning rate \eta. 34 | * Repeat until an approximate minimum is obtained: 35 | ** Randomly shuffle examples in the training set. 36 | ** For i=1, 2, ..., n, do: 37 | *** \! w := w - \eta \nabla Q_i(w). 38 | {{frame-footer}} 39 |
40 | 41 | A compromise between computing the true gradient and the gradient at a single example is to compute the gradient against more than one training example (called a "mini-batch") at each step. This can perform significantly better than "true" stochastic gradient descent described, because the code can make use of [[Vectorization (mathematics)|vectorization]] libraries rather than computing each step separately. It may also result in smoother convergence, as the gradient computed at each step is averaged over more training examples. 42 | 43 | The convergence of stochastic gradient descent has been analyzed using the theories of [[convex optimization|convex minimization]] and of [[stochastic approximation]]. Briefly, when the [[learning rate]]s \eta decrease with an appropriate rate, 44 | and subject to relatively mild assumptions, stochastic gradient descent converges [[almost surely]] to a global minimum 45 | when the objective function is [[convex function|convex]] or [[pseudoconvex function|pseudoconvex]], 46 | and otherwise converges almost surely to a local minimum.{{Cite book 47 | |last=Bottou 48 | |first=Léon 49 | |authorlink=Léon Bottou 50 | |contribution=Online Algorithms and Stochastic Approximations 51 | |year=1998 52 | |title=Online Learning and Neural Networks 53 | |publisher=Cambridge University Press 54 | |url=http://leon.bottou.org/papers/bottou-98x 55 | |isbn=978-0-521-65263-6 56 | |postscript={{inconsistent citations}} 57 | }}{{cite news 58 | |last=Kiwiel 59 | |first=Krzysztof C. 60 | |title=Convergence and efficiency of subgradient methods for quasiconvex minimization 61 | |journal=Mathematical Programming, Series A 62 | |publisher=Springer|location=Berlin, Heidelberg 63 | |issn=0025-5610|pages=1–25|volume=90|issue=1 64 | |doi=10.1007/PL00011414|year=2001 |mr=1819784}} 65 | This is in fact a consequence of the [[Robbins-Siegmund theorem]].{{Cite book 66 | |last1=Robbins 67 | |first1=Herbert 68 | |author1-link=Herbert Robbins 69 | |last2=Siegmund 70 | |first2=David O. 71 | |author2-link=David O. Siegmund 72 | |contribution=A convergence theorem for non negative almost supermartingales and some applications 73 | |title=Optimizing Methods in Statistics 74 | |publisher=Academic Press 75 | |year=1971 76 | |editor-last=Rustagi 77 | |editor-first=Jagdish S. 78 | |postscript={{inconsistent citations}} 79 | }} 80 | 81 | 82 | == Example == 83 | Let's suppose we want to fit a straight line y = \! w_1 + w_2 x to a training set with observations (x_1, x_2, \ldots, x_n) and corresponding estimated responses (\hat{y_1}, \hat{y_2}, \ldots, \hat{y_n}) using [[least squares]]. The objective function to be minimized is: 84 | : Q(w) = \sum_{i=1}^n Q_i(w) = \sum_{i=1}^n \left(\hat{y_i}-y_i\right)^2 = \sum_{i=1}^n \left(w_1 + w_2 x_i - y_i\right)^2. 85 | 86 | The last line in the above pseudocode for this specific problem will become: 87 | : \begin{bmatrix} w_1 \\ w_2 \end{bmatrix} := 88 | \begin{bmatrix} w_1 \\ w_2 \end{bmatrix} 89 | - \eta \begin{bmatrix} \frac{\partial}{\partial w_1} (w_1 + w_2 x_i - y_i)^2 \\ 90 | \frac{\partial}{\partial w_2} (w_1 + w_2 x_i - y_i)^2 \end{bmatrix} = 91 | \begin{bmatrix} w_1 \\ w_2 \end{bmatrix} 92 | - \eta \begin{bmatrix} 2 (w_1 + w_2 x_i - y_i) \\ 2 x_i(w_1 + w_2 x_i - y_i) \end{bmatrix}. 93 | 94 | Note that in each iteration (also called update), only the gradient evaluated at a single point x_i instead of evaluating at the set of all samples. 95 | 96 | The key difference compared to standard (Batch) Gradient Descent is that only one piece of data from the dataset is used to calculate the step, and the piece of data is picked randomly at each step. 97 | 98 | == Notable applications == 99 | Stochastic gradient descent is a popular algorithm for training a wide range of models in [[machine learning]], including (linear) [[support vector machine]]s, [[logistic regression]] (see, e.g., [[Vowpal Wabbit]]) and [[graphical model]]s.Jenny Rose Finkel, Alex Kleeman, Christopher D. Manning (2008). [http://www.aclweb.org/anthology/P08-1109 Efficient, Feature-based, Conditional Random Field Parsing]. Proc. Annual Meeting of the ACL. When combined with the [[backpropagation]] algorithm, it is the ''de facto'' standard algorithm for training [[artificial neural network]]s.[http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf LeCun, Yann A., et al. "Efficient backprop." Neural networks: Tricks of the trade. Springer Berlin Heidelberg, 2012. 9-48] Its use has been also reported in the [[Geophysics]] community, specifically to applications of Full Waveform Inversion (FWI).[http://library.seg.org/doi/abs/10.1190/1.3627777 Díaz, Esteban and Guitton, Antoine. "Fast full waveform inversion with random shot decimation". SEG Technical Program Expanded Abstracts, 2011. 2804-2808] 100 | 101 | Stochastic gradient descent competes with the [[limited-memory BFGS|L-BFGS]] algorithm,{{Citation needed|date=July 2015}} which is also widely used. Stochastic gradient descent has been used since at least 1960 for training [[linear regression]] models, originally under the name [[ADALINE]].{{cite web |author=Avi Pfeffer |title=CS181 Lecture 5 — Perceptrons |url=http://www.seas.harvard.edu/courses/cs181/files/lecture05-notes.pdf |publisher=Harvard University }}{{Dead link|date=June 2018 |bot=InternetArchiveBot |fix-attempted=no }} 102 | 103 | Another stochastic gradient descent algorithm is the [[Least mean squares filter|least mean squares (LMS)]] adaptive filter. 104 | 105 | ==Extensions and variants== 106 | Many improvements on the basic stochastic gradient descent algorithm have been proposed and used. In particular, in machine learning, the need to set a [[learning rate]] (step size) has been recognized as problematic. Setting this parameter too high can cause the algorithm to diverge{{citation needed|date=October 2017}}; setting it too low makes it slow to converge{{citation needed|date=October 2017}}. A conceptually simple extension of stochastic gradient descent makes the learning rate a decreasing function {{mvar|ηt}} of the iteration number {{mvar|t}}, giving a ''learning rate schedule'', so that the first iterations cause large changes in the parameters, while the later ones do only fine-tuning. Such schedules have been known since the work of MacQueen on [[K-means clustering|{{mvar|k}}-means clustering]].Cited by {{cite conference |last1=Darken |first1=Christian |first2=John |last2=Moody |title=Fast adaptive k-means clustering: some empirical results |year=1990 |conference=Int'l Joint Conf. on Neural Networks (IJCNN) |publisher=IEEE|url=http://ieeexplore.ieee.org/abstract/document/5726679/}} Some practical guidance on choosing the step size in several variants of SGD is given in Sects. 4.4, 6.6, and 7.5 of Spall, J. C. (2003), Introduction to Stochastic Search and Optimization: Estimation, Simulation, and Control, Wiley, Hoboken, NJ. 107 | 108 | ===Implicit updates (ISGD)=== 109 | As mentioned earlier, classical stochastic gradient descent is generally sensitive to [[learning rate]] {{mvar|η}}. Fast convergence requires large learning rates but this may induce numerical instability. The problem can be largely solved{{citation needed|date=May 2019}} by considering ''implicit updates'' whereby the stochastic gradient is evaluated at the next iterate rather than the current one: 110 | :w^{new} := w^{old} - \eta \nabla Q_i(w^{new}). 111 | 112 | This equation is implicit since w^{new} appears on both sides of the equation. It is a stochastic form of the [[proximal gradient method]] since the update 113 | can also be written as: 114 | :w^{new} := \arg\min_w \{ Q_i(w) + \frac{1}{2\eta} ||w - w^{old}||^2 \}. 115 | 116 | As an example, 117 | consider least squares with features x_1, \ldots, x_n \in\mathbb{R}^p and observations 118 | y_1, \ldots, y_n\in\mathbb{R}. We wish to solve: 119 | :\min_w \sum_{j=1}^n (y_j - x_j'w)^2. 120 | Note that x could have "1" as the first element to include an intercept. Classical stochastic gradient descent proceeds as follows: 121 | :w^{new} = w^{old} + \eta (y_i - x_i'w^{old}) x_i 122 | 123 | where i is uniformly sampled between 1 and n. Although theoretical convergence of this procedure happens under relatively mild assumptions, in practice the procedure can be quite unstable. In particular, when \eta is misspecified so that I - \eta x_i x_i' has large absolute eigenvalues with high probability, the procedure may diverge numerically within a few iterations. In contrast, ''implicit stochastic gradient descent'' (shortened as ISGD) can be solved in closed-form as: 124 | :w^{new} = w^{old} + \frac{\eta}{1 + \eta ||x_i||^2} (y_i - x_i'w^{old}) x_i. 125 | 126 | This procedure will remain numerically stable virtually for all \eta as the [[learning rate]] is now normalized. Such comparison between classical and implicit stochastic gradient descent in the least squares problem is very similar to the comparison between [[Least mean squares filter|least mean squares (LMS)]] and 127 | [[Least mean squares filter#Normalized least mean squares filter (NLMS)|normalized least mean squares filter (NLMS)]]. 128 | 129 | Even though a closed-form solution for ISGD is only possible in least squares, the procedure can be efficiently implemented in a wide range of models. Specifically, suppose that Q_i(w) depends on w only through a linear combination with features x_i, so that we can write \nabla_w Q_i(w) = -q(x_i'w) x_i, where 130 | q() \in\mathbb{R} may depend on x_i, y_i as well but not on w except through x_i'w. Least squares obeys this rule, and so does [[logistic regression]], and most [[generalized linear model]]s. For instance, in least squares, q(x_i'w) = y_i - x_i'w, and in logistic regression q(x_i'w) = y_i - S(x_i'w), where S(u) = e^u/(1+e^u) is the [[logistic function]]. In [[Poisson regression]], q(x_i'w) = y_i - e^{x_i'w}, and so on. 131 | 132 | In such settings, ISGD is simply implemented as follows: 133 | :w^{new} = w^{old} + \xi x_i,\quad\xi = \eta q(x_i'w^{old} + \xi ||x_i||^2). 134 | 135 | The scaling factor \xi\in\mathbb{R} can be found through [[bisection method]] since 136 | in most regular models, such as the aforementioned generalized linear models, function q() is decreasing, 137 | and thus the search bounds for \xi are 138 | [\min(0, b_i), \max(0, b_i)], where b_i = \eta q(x_i'w^{old}). 139 | 140 | ===Momentum=== 141 | Further proposals include the ''momentum method'', which appeared in [[David Rumelhart|Rumelhart]], [[Geoffrey Hinton|Hinton]] and [[Ronald J. Williams|Williams]]' seminal paper on backpropagation learning.{{cite journal|last=Rumelhart|first=David E.|author2=Hinton, Geoffrey E.|author3=Williams, Ronald J.|title=Learning representations by back-propagating errors|journal=Nature|date=8 October 1986|volume=323|issue=6088|pages=533–536|doi=10.1038/323533a0|bibcode=1986Natur.323..533R}} Stochastic gradient descent with momentum remembers the update {{math|Δ ''w''}} at each iteration, and determines the next update as a [[linear combination]] of the gradient and the previous update:{{cite conference|last=Sutskever|first=Ilya|author2=Martens, James|author3=Dahl, George|author4=Hinton, Geoffrey E.|editor=Sanjoy Dasgupta and David Mcallester|title=On the importance of initialization and momentum in deep learning|conference=In Proceedings of the 30th international conference on machine learning (ICML-13)|date=June 2013|volume=28|location=Atlanta, GA|pages=1139–1147|url=http://www.cs.utoronto.ca/~ilya/pubs/2013/1051_2.pdf|access-date=14 January 2016}}{{cite thesis|last=Sutskever|first=Ilya|title=Training recurrent neural networks|date=2013|publisher=University of Toronto|url=http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf|type=Ph.D.|page=74}} 142 | :\Delta w := \alpha \Delta w - \eta \nabla Q_i(w) 143 | :w := w + \Delta w 144 | that leads to: 145 | :w := w - \eta \nabla Q_i(w) + \alpha \Delta w 146 | 147 | where the [[parametric statistics|parameter]] w which minimizes Q(w) is to be [[estimator|estimated]], and \eta is a step size (sometimes called the ''[[learning rate]]'' in machine learning){{clarify|reason=What is alpha in this case??|date=October 2017}}. 148 | 149 | The name momentum stems from an analogy to [[momentum]] in physics: the weight vector w, thought of as a particle traveling through parameter space{{r|Rumelhart1986}}, incurs acceleration from the gradient of the loss ("[[force]]"). Unlike in classical stochastic gradient descent, it tends to keep traveling in the same direction, preventing oscillations. Momentum has been used successfully by computer scientists in the training of [[artificial neural networks]] for several decades.{{cite arXiv |last=Zeiler |first=Matthew D. |eprint=1212.5701 |title=ADADELTA: An adaptive learning rate method |year=2012|class=cs.LG }} 150 | 151 | ===Averaging=== 152 | ''Averaged stochastic gradient descent'', invented independently by Ruppert and Polyak in the late 1980s, is ordinary stochastic gradient descent that records an average of its parameter vector over time. That is, the update is the same as for ordinary stochastic gradient descent, but the algorithm also keeps track of{{cite journal |last1=Polyak |first1=Boris T. |first2=Anatoli B. |last2=Juditsky |title=Acceleration of stochastic approximation by averaging |journal=SIAM J. Control Optim. |volume=30 |issue=4 |year=1992 |pages=838–855|url=http://www.meyn.ece.ufl.edu/archive/spm_files/Courses/ECE555-2011/555media/poljud92.pdf|doi=10.1137/0330046 }} 153 | 154 | :\bar{w} = \frac{1}{t} \sum_{i=0}^{t-1} w_i. 155 | 156 | When optimization is done, this averaged parameter vector takes the place of {{mvar|w}}. 157 | 158 | ===AdaGrad=== 159 | ''AdaGrad'' (for adaptive [[Gradient descent|gradient]] algorithm) is a modified stochastic gradient descent with per-parameter [[learning rate]], first published in 2011.{{cite journal |last1=Duchi |first1=John |first2=Elad |last2=Hazan |first3=Yoram |last3=Singer |title=Adaptive subgradient methods for online learning and stochastic optimization |journal=[[Journal of Machine Learning Research|JMLR]] |volume=12 |year=2011 |pages=2121–2159 |url=http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf}}{{cite web |first=Joseph |last=Perla |year=2014 |title=Notes on AdaGrad |url=http://seed.ucsd.edu/mediawiki/images/6/6a/Adagrad.pdf |deadurl=yes |archiveurl=https://web.archive.org/web/20150330033637/http://seed.ucsd.edu/mediawiki/images/6/6a/Adagrad.pdf |archivedate=2015-03-30 |df= }} Informally, this increases the learning rate for sparser parameters and decreases the learning rate for ones that are less sparse. This strategy often improves convergence performance over standard stochastic gradient descent in settings where data is sparse and sparse parameters are more informative. Examples of such applications include natural language processing and image recognition. It still has a base learning rate {{mvar|η}}, but this is multiplied with the elements of a vector {{math|{''G''''j'',''j''} }} which is the diagonal of the [[outer product]] matrix 160 | 161 | :G = \sum_{\tau=1}^t g_\tau g_\tau^\mathsf{T} 162 | 163 | where g_\tau = \nabla Q_i(w), the gradient, at iteration {{mvar|τ}}. The diagonal is given by 164 | 165 | :G_{j,j} = \sum_{\tau=1}^t g_{\tau,j}^2. 166 | 167 | This vector is updated after every iteration. The formula for an update is now 168 | 169 | :w := w - \eta\, \mathrm{diag}(G)^{-\frac{1}{2}} \circ g{{efn|\circ is the [[Hadamard product (matrices)|element-wise product]].}} 170 | 171 | or, written as per-parameter updates, 172 | 173 | :w_j := w_j - \frac{\eta}{\sqrt{G_{j,j}}} g_j. 174 | 175 | Each {{math|{''G''(''i'',''i'')} }} gives rise to a scaling factor for the learning rate that applies to a single parameter {{math|''w''''i''}}. Since the denominator in this factor, \sqrt{G_i} = \sqrt{\sum_{\tau=1}^t g_\tau^2} is the [[Norm (mathematics)#Euclidean norm|''ℓ''2 norm]] of previous derivatives, extreme parameter updates get dampened, while parameters that get few or small updates receive higher learning rates. 176 | 177 | While designed for [[convex optimization|convex problems]], AdaGrad has been successfully applied to non-convex optimization.{{cite journal |last1=Gupta |first1=Maya R. |first2=Samy |last2=Bengio |first3=Jason |last3=Weston |title=Training highly multiclass classifiers |journal=JMLR |volume=15 |issue=1 |year=2014 |pages=1461–1492 |url=http://jmlr.org/papers/volume15/gupta14a/gupta14a.pdf}} 178 | 179 | ===RMSProp=== 180 | ''RMSProp'' (for Root Mean Square Propagation) is also a method in which the [[learning rate]] is adapted for each of the parameters. The idea is to divide the learning rate for a weight by a running average of the magnitudes of recent gradients for that weight.Tieleman, Tijmen and Hinton, Geoffrey (2012). Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude. COURSERA: Neural Networks for Machine Learning So, first the running average is calculated in terms of means square, 181 | 182 | :v(w,t):=\gamma v(w,t-1)+(1-\gamma)(\nabla Q_i(w))^2 183 | 184 | where, \gamma is the forgetting factor. 185 | 186 | And the parameters are updated as, 187 | 188 | :w:=w-\frac{\eta}{\sqrt{v(w,t)}}\nabla Q_i(w) 189 | 190 | RMSProp has shown excellent adaptation of learning rate in different applications. RMSProp can be seen as a generalization of [[Rprop]] and is capable to work with mini-batches as well opposed to only full-batches.{{Cite web|url=http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf|title=Overview of mini-batch gradient descent|last=Hinton|first=Geoffrey|authorlink=Geoffrey Hinton|date=|website=|publisher=|pages=27–29|access-date=27 September 2016}} 191 | 192 | ===Adam=== 193 | ''Adam''{{cite arXiv |last1=Diederik |first1=Kingma |first2=Jimmy |last2=Ba |eprint=1412.6980 |title=Adam: A method for stochastic optimization |year=2014 |class=cs.LG }} (short for Adaptive Moment Estimation) is an update to the ''RMSProp'' optimizer. In this optimization algorithm, running averages of both the gradients and the second moments of the gradients are used. Given parameters w^ {(t)} and a loss function L ^ {(t)} , where t indexes the current training iteration (indexed at 0 ), Adam's parameter update is given by: 194 | 195 | :m_w ^ {(t+1)} \leftarrow \beta_1 m_w ^ {(t)} + (1 - \beta_1) \nabla _w L ^ {(t)} 196 | :v_w ^ {(t+1)} \leftarrow \beta_2 v_w ^ {(t)} + (1 - \beta_2) (\nabla _w L ^ {(t)} )^2 197 | 198 | :\hat{m}_w = \frac{m_w ^ {(t+1)}}{1 - (\beta_1) ^{t+1}} 199 | :\hat{v}_w = \frac{ v_w ^ {(t+1)}}{1 - (\beta_2) ^{t+1}} 200 | 201 | :w ^ {(t+1)} \leftarrow w ^ {(t)} - \eta \frac{\hat{m}_w}{\sqrt{\hat{v}_w} + \epsilon} 202 | 203 | where \epsilon is a small scalar used to prevent division by 0, and \beta_1 and \beta_2 are the forgetting factors for gradients and second moments of gradients, respectively. Squaring and square-rooting is done elementwise. 204 | 205 | ===Natural Gradient Descent and kSGD=== 206 | Kalman-based Stochastic Gradient Descent (kSGD){{Cite journal|last=Patel|first=V.|date=2016-01-01|title=Kalman-Based Stochastic Gradient Method with Stop Condition and Insensitivity to Conditioning|journal=SIAM Journal on Optimization|volume=26|issue=4|pages=2620–2648|doi=10.1137/15M1048239|issn=1052-6234|arxiv=1512.01139}} is an online and offline algorithm for learning parameters from statistical problems from [[quasi-likelihood]] models, which include [[Linear regression|linear models]], [[Nonlinear regression|non-linear models]], [[generalized linear model]]s, and [[Artificial neural network|neural networks]] with [[Mean squared error|squared error loss]] as special cases. For online learning problems, kSGD is a special case of the [[Kalman filter|Kalman Filter]] for linear regression problems, a special case of the [[Extended Kalman filter|Extended Kalman Filter]] for non-linear regression problems, and can be viewed as an incremental [[Gauss–Newton algorithm|Gauss-Newton]] method. Moreover, because of kSGD's relationship to the Kalman Filter and natural gradient descent's{{cite journal|last1=Cichocki|first1=A|last2=Chen|first2=T|last3=Amari|first3=S|title=Stability Analysis of Learning Algorithms for Blind Source Separation.|journal=Neural Networks|date=November 1997|volume=10|issue=8|pages=1345–1351|pmid=12662478|doi=10.1016/S0893-6080(97)00039-7}} relationship to the Kalman Filter,{{cite arxiv|last1=Yann|first1=Ollivier|title=Online Natural Gradient as a Kalman Filter|eprint=1703.00209|language=en|date=1 March 2017|class=stat.ML}} kSGD is a rigorous improvement over the popular natural gradient descent method. 207 | 208 | The benefits of kSGD, in comparison to other methods, are (1) it is not sensitive to the condition number of the problem ,{{efn|For the linear regression problem, kSGD's objective function discrepancy (i.e. the total of bias and variance) at iteration k is {\frac {1+\epsilon }{k}}p\sigma ^{2} with probability converging to 1 at a rate depending on \epsilon \in (0,1), where \sigma ^{2} is the variance of the residuals. Moreover, for specific choices of \gamma _{1},\gamma _{2}, kSGD's objective function bias at iteration k can be shown to be \frac {(1+\epsilon )^{2}}{2k^{2}}\Vert w(0)-w_{*}\Vert _{2}^{2} with probability converging to 1 at a rate depending on \epsilon \in (0,1), where w_{*} is the optimal parameter. 209 | }} (2) it has a robust choice of hyperparameters, and (3) it has a stopping condition. The drawbacks of kSGD is that the algorithm requires storing a dense covariance matrix between iterations, and requires a matrix-vector product at each iteration. 210 | 211 | To describe the algorithm, suppose Q_i(w), where w \in \mathbb{R}^p is defined by an example (Y_i,X_i)\in \mathbb{R} \times \mathbb{R}^d such that 212 | 213 | :\nabla_w Q_i(w) = \frac{Y_i - \mu(X_i,w)}{V(\mu(X_i,w))} \nabla_w \mu(X_i,w) 214 | 215 | where \mu(X_i,w) is mean function (i.e. the expected value of Y_i given X_i), and V(\mu(X_i,w)) is the variance function (i.e. the variance of Y_i given X_i). Then, the parameter update, w(t+1) , and covariance matrix update, M(t+1) are given by the following 216 | 217 | : p = \nabla_w \mu(X_{t+1},w(t)) 218 | : m = \mu(X_{t+1},w(t)) 219 | : v = M(t) p 220 | : s = \min\lbrace \gamma_1, \max\lbrace \gamma_2, V(m)\rbrace \rbrace + v^\mathsf{T} p 221 | : w(t+1) = w(t) + \frac{Y_{t+1} - m}{s}v 222 | : M(t+1) = M(t) - \frac{1}{s} v v^\mathsf{T} 223 | 224 | where \gamma_1,\gamma_2 are hyperparameters. The M(t) update can result in the covariance matrix becoming indefinite, which can be avoided at the cost of a matrix-matrix multiplication. M(0) can be any positive definite symmetric matrix, but is typically taken to be the identity. As noted by Patel, for all problems besides linear regression, restarts are required to ensure convergence of the algorithm, but no theoretical or implementation details were given. In a closely related, off-line, mini-batch method for non-linear regression analyzed by Bertsekas,{{Cite journal|last=Bertsekas|first=D.|date=1996-08-01|title=Incremental Least Squares Methods and the Extended Kalman Filter|journal=SIAM Journal on Optimization|volume=6|issue=3|pages=807–822|doi=10.1137/S1052623494268522|issn=1052-6234|hdl=1721.1/3362}} a forgetting factor was used in the covariance matrix update to prove convergence. 225 | 226 | ===Second-Order Methods=== 227 | It is known that a stochastic analogue of the standard (deterministic) Newton-Raphson algorithm (a “second-order” method) provides an asymptotically optimal or near-optimal form of iterative optimization in the setting of stochastic approximation. A method that uses direct measurements of the Hessian matrices of the summands in the empirical risk function is given in R. H. Byrd, S. L. Hansen, J. Nocedal, and Y. Singer, “A Stochastic Quasi-Newton method for Large-Scale Optimization,” SIAM Journal on Optimization, vol. 26, no. 2, pp. 1008–1031, 2016. However, directly determining the required Hessian matrices for optimization may not be possible in practice. Practical and theoretically sound methods for second-order versions of SGD that do not require direct Hessian information are given in J. C. Spall (2000), “Adaptive Stochastic Approximation by the Simultaneous Perturbation Method,” IEEE Transactions on Automatic Control, vol. 45, pp. 1839−1853. http://dx.doi.org/10.1109/TAC.2000.880982 J. C. Spall (2009), “Feedback and Weighting Mechanisms for Improving Jacobian Estimates in the Adaptive Simultaneous Perturbation Algorithm,” IEEE Transactions on Automatic Control, vol. 54(6), pp. 1216–1229. http://dx.doi.org/10.1109/TAC.2009.2019793 S. Bhatnagar, H. L. Prasad, and L. A. Prashanth (2013), Stochastic Recursive Algorithms for Optimization: Simultaneous Perturbation Methods. Springer.. (A less efficient method based on finite differences, instead of simultaneous perturbations, is given in D. Ruppert, “A Newton-Raphson Version of the Multivariate Robbins-Monro Procedure,” The Annals of Statistics, vol. 13, no. 1, pp. 236–245, 1985.) These methods not requiring direct Hessian information are based on either values of the summands in the above empirical risk function or values of the gradients of the summands (i.e., the SGD inputs). In particular, second-order optimality is asymptotically achievable without direct calculation of the Hessian matrices of the summands in the empirical risk function. 228 | 229 | == Notes == 230 | {{notelist}} 231 | 232 | == See also == 233 | * [[Coordinate descent]] – changes one coordinate at a time, rather than one example 234 | * [[Linear classifier]] 235 | * [[Online machine learning]] 236 | 237 | == References == 238 | {{reflist|30em}} 239 | 240 | == Further reading == 241 | *{{Citation 242 | | last = Bertsekas 243 | | first = Dimitri P. 244 | | author-link = Dimitri P. Bertsekas 245 | | title = Nonlinear Programming 246 | | publisher = Athena Scientific 247 | | year = 1999 248 | | edition = 2nd 249 | | location = Cambridge, MA. 250 | | isbn = 978-1-886529-00-7 251 | | ref = none 252 | }}. 253 | 254 | *{{Citation 255 | | last = Bertsekas 256 | | first = Dimitri 257 | | author-link = Dimitri P. Bertsekas 258 | | title = Convex Analysis and Optimization 259 | | publisher = Athena Scientific 260 | | year = 2003 261 | | ref = none 262 | }}. 263 | 264 | *{{Citation 265 | | last = Bottou 266 | | first = Léon 267 | | author-link = Léon Bottou 268 | | contribution = Stochastic Learning 269 | | year = 2004 270 | | title = Advanced Lectures on Machine Learning 271 | | pages = 146–168 272 | | publisher = Springer 273 | | series = LNAI 274 | | volume = 3176 275 | | url = http://leon.bottou.org/papers/bottou-mlss-2004 276 | | isbn = 978-3-540-23122-6 277 | | ref = none 278 | }}. 279 | 280 | *{{Citation 281 | | last = Davidon 282 | | first = W.C. 283 | | author-link = William C. Davidon 284 | | title = New least-square algorithms 285 | | journal = Journal of Optimization Theory and Applications 286 | | volume = 18 287 | | year = 1976 288 | | number = 2 289 | | pages = 187–197 290 | | doi = 10.1007/BF00935703 291 | | mr=418461 292 | | ref = none 293 | }}. 294 | 295 | *{{Citation 296 | | title = Pattern Classification 297 | | first1 = Richard O. 298 | | last1 = Duda 299 | | first2 = Peter E. 300 | | last2 = Hart 301 | | first3 = David G. 302 | | last3 = Stork 303 | | publisher = [[John Wiley & Sons|Wiley]] 304 | | year = 2000 305 | | edition = 2nd 306 | | isbn = 978-0-471-05669-0 307 | | ref = none 308 | }}. 309 | 310 | *{{Citation 311 | | last = Kiwiel 312 | | first = Krzysztof C. 313 | | title = Convergence of approximate and incremental subgradient methods for convex optimization 314 | | journal = SIAM Journal on Optimization 315 | | volume = 14 316 | | year = 2004 317 | | number = 3 318 | | pages = 807–840 319 | | doi = 10.1137/S1052623400376366 320 | | mr = 2085944 321 | | ref = none 322 | }}. (Extensive list of references) 323 | 324 | *{{Citation 325 | | title = Practical Mathematical Optimization - Basic Optimization Theory and Gradient-Based Algorithms|series=Springer Optimization and Its Applications Vol. 133 326 | | first1 = Jan A. 327 | | last1 = Snyman 328 | | first2 = Daniel N. 329 | | last2 = Wilke 330 | | publisher = [[Springer International Publishing|Springer]] 331 | | edition=2 332 | | year = 2018 333 | | pages=xxvi+372 334 | | isbn = 978-3-319-77585-2 335 | | url = https://www.springer.com/gp/book/9783319775852 336 | | ref = none 337 | }}. (Python module [http://extras.springer.com/2018/978-3-319-77585-2 pmo.py]) 338 | 339 | *{{Citation 340 | | title = Introduction to Stochastic Search and Optimization 341 | | first = James C. 342 | | last = Spall 343 | | publisher = [[John Wiley & Sons|Wiley]] 344 | | year = 2003 345 | | isbn = 978-0-471-33052-3 346 | | ref = none 347 | }}. 348 | 349 | == External links == 350 | * [http://codingplayground.blogspot.it/2013/05/stocastic-gradient-descent.html Using stochastic gradient descent in C++, Boost, Ublas for linear regression] 351 | * [http://studyofai.com/machine-learning-algorithms/ Machine Learning Algorithms] 352 | * {{cite web |work=3Blue1Brown |title=Gradient Descent, How Neural Networks Learn |date=October 16, 2017 |url=https://www.youtube.com/watch?v=IHZwWFHWa-w&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&index=2 |via=[[YouTube]] }} 353 | 354 | [[Category:Stochastic optimization]] 355 | [[Category:Computational statistics]] 356 | [[Category:Gradient methods]] 357 | [[Category:M-estimators]] 358 | [[Category:Machine learning algorithms]] 359 | [[Category:Convex optimization]] 360 | [[Category:Statistical approximations]] 361 | -------------------------------------------------------------------------------- /wiki_zh.txt: -------------------------------------------------------------------------------- 1 | '''随机梯度下降''' (简称SGD),是(比如,可微函数)。SGD中'''随机''' 意思是它使用随机选择(或者乱序后)的样本来估计梯度,因此SGD可看作梯度下降优化问题的随机估计。该思想至少可以追溯到1951{{cite journal | last = Mei | first = Song | title = A mean field view of the landscape of two-layer neural networks | journal = Proceedings of the National Academy of Sciences | volume = 115| issue = 33| year = 2018 | pages = E7665–E7671| jstor = | doi = 10.1073/pnas.1806579115 | pmid = 30054315 | pmc = 6099898 }}年,[[Herbert Robbins]] and [[Sutton Monro]]和著作的文章,"A Stochastic Approximation Method",他们提出和详细分析了一种求根方法,即现在说的[[stochastic approximation|Robbins–Monro algorithm]]。 2 | 3 | == 背景 == 4 | 5 | 6 | == 外部链接 == 7 | * [http://codingplayground.blogspot.it/2013/05/stocastic-gradient-descent.html Using stochastic gradient descent in C++, Boost, Ublas for linear regression] 8 | * [http://studyofai.com/machine-learning-algorithms/ Machine Learning Algorithms] 9 | * {{cite web |work=3Blue1Brown |title=Gradient Descent, How Neural Networks Learn |date=October 16, 2017 |url=https://www.youtube.com/watch?v=IHZwWFHWa-w&list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi&index=2 |via=[[YouTube]] }} 10 | 11 | [[Category:Stochastic optimization]] 12 | [[Category:Computational statistics]] 13 | [[Category:Gradient methods]] 14 | [[Category:M-estimators]] 15 | [[Category:Machine learning algorithms]] 16 | [[Category:Convex optimization]] 17 | [[Category:Statistical approximations]] --------------------------------------------------------------------------------