├── .gitignore ├── Adversarial_Autoencoder.ipynb ├── Author Topic Model.ipynb ├── Best Worst Scaling.ipynb ├── Binary Search.ipynb ├── Card24.ipynb ├── Charts Experiments.ipynb ├── Company Mission Statements.ipynb ├── Dictionary_lookup_with_default.ipynb ├── Dynamax.ipynb ├── Dynamic Programming.ipynb ├── Extract_unique_triadic_edges_from_triads.ipynb ├── FastRG - Python implementation.ipynb ├── Fourier Transform.ipynb ├── GMM Clusters.ipynb ├── Gamma Distribution.ipynb ├── Graph Diffusion.ipynb ├── Graph_Feature_Propagation.ipynb ├── IMCMC.ipynb ├── IRCTC Data Hack.ipynb ├── ITE.ipynb ├── Infinite Mixture Models.ipynb ├── KMeans_Functional.ipynb ├── Keras Char Word LSTM.ipynb ├── Keras Demo.ipynb ├── Keras attention.ipynb ├── Keras_Elmo.ipynb ├── LICENSE ├── LazyValues.ipynb ├── Lightweight_coreset_construction.ipynb ├── Likelihood+ratio.ipynb ├── Logistic Regression.ipynb ├── MazeSolving.ipynb ├── Mean_Media_Mode.ipynb ├── Monte Carlo Integration.ipynb ├── Monte Carlo.ipynb ├── MultiTask_Transformer_for_Token_Classification.ipynb ├── Naive Sudoku Solver.ipynb ├── Plotting Decision Boundaries.ipynb ├── Positive Semi-definite Matrix.ipynb ├── Prediction versus Explanation.ipynb ├── Programming assignments.ipynb ├── PyMC Disaster.ipynb ├── PyMC Testing.ipynb ├── PyMC_LDA.ipynb ├── Quora Haqathon.ipynb ├── README.md ├── Regression Coefficient Significance.ipynb ├── ReinforcementLearning.ipynb ├── SVD_Vis.ipynb ├── Self-attention.ipynb ├── Slide Notebooks ├── IMO 2021 - Problem 2.ipynb └── Product of consecutive numbers.ipynb ├── Stable_Craiyon.ipynb ├── Student Debt Over time.ipynb ├── System_Identification_DMD_Control_Example.ipynb ├── Time Dependent Models.ipynb ├── Unattacked Queens.ipynb ├── World Leaders DB.ipynb ├── data └── isl_wise_train_detail_03082015_v1.csv ├── images └── olympic_athlete.PNG ├── maxContigSum.ipynb ├── monte_carlo_circle.png ├── olympic_athlete_data_download.ipynb ├── temp.tsv └── worldpresidentsdb.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | #iPython 60 | .ipynb_checkpoints/ 61 | -------------------------------------------------------------------------------- /Author Topic Model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "# Author Topic Model\n", 12 | "\n", 13 | "Implementation as described in http://mimno.infosci.cornell.edu/info6150/readings/398.pdf" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "collapsed": true, 21 | "slideshow": { 22 | "slide_type": "subslide" 23 | } 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "%matplotlib inline\n", 28 | "\n", 29 | "import matplotlib.pyplot as plt\n", 30 | "import numpy as np" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 69, 36 | "metadata": { 37 | "collapsed": false, 38 | "slideshow": { 39 | "slide_type": "notes" 40 | } 41 | }, 42 | "outputs": [ 43 | { 44 | "name": "stdout", 45 | "output_type": "stream", 46 | "text": [ 47 | "[[ 0. 7. 7. 7. 5.]\n", 48 | " [ 1. 7. 5. 1. 0.]\n", 49 | " [ 5. 5. 6. 9. 2.]\n", 50 | " [ 4. 6. 0. 7. 2.]]\n", 51 | "[1 2 1 2 3]\n", 52 | "['V0' 'V1' 'V2' 'V3' 'V4' 'V5' 'V6' 'V7' 'V8' 'V9']\n", 53 | "[[ 0. 14. 7. 14. 15.]\n", 54 | " [ 1. 14. 5. 2. 0.]\n", 55 | " [ 5. 10. 6. 18. 6.]\n", 56 | " [ 4. 12. 0. 14. 6.]]\n" 57 | ] 58 | }, 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "array([['V2', 'V1', 'V0', 'V2', 'V0'],\n", 63 | " ['V3', 'V0', 'V2', 'V3', 'V3']], \n", 64 | " dtype='|S2')" 65 | ] 66 | }, 67 | "execution_count": 69, 68 | "metadata": {}, 69 | "output_type": "execute_result" 70 | } 71 | ], 72 | "source": [ 73 | "np.ones([10])[[2,3,4], np.newaxis].repeat(5, axis=1)\n", 74 | "\n", 75 | "_a = np.random.randint(0,10,size=(4,5)) * 1.0\n", 76 | "print _a\n", 77 | "_b = np.array([1,2,1,2,3])\n", 78 | "print _b\n", 79 | "_c = np.array([\"V%s\" % k for k in xrange(10)])\n", 80 | "print _c\n", 81 | "print (_a * _b)\n", 82 | "_c[np.argsort(_a, axis=0)[::-1, :][:2, :]]" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 70, 88 | "metadata": { 89 | "collapsed": true, 90 | "slideshow": { 91 | "slide_type": "fragment" 92 | } 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "class AuthorTopicModel(object):\n", 97 | " \"\"\"Implementation of an author topic model.\n", 98 | " Generates each document based on a topic and author pair,\n", 99 | " This is used to generate a word in the document.\n", 100 | " \"\"\"\n", 101 | " \n", 102 | " def __init__(self, K, doc_word_matrix, doc_author_matrix, vocab, authornames, alpha=0.1, beta=0.5):\n", 103 | " \"\"\"Constructor for the function\n", 104 | " K: number of topics\n", 105 | " doc_word_matrix: list of documents each represented as list of word ids\n", 106 | " doc_author_matrix: list of documents each represented as list of author ids\n", 107 | " vocab: dictionary of word ids mapped to word strings\n", 108 | " authornames: dictionary of author ids mapped to author names\n", 109 | " alpha: Author topic diritchelet parameter\n", 110 | " beta: Word topic diritchelet parameter \n", 111 | " \"\"\"\n", 112 | " self.K = K\n", 113 | " self.doc_word_matrix = doc_word_matrix\n", 114 | " self.doc_author_matrix = doc_author_matrix\n", 115 | " self.N = len(doc_word_matrix)\n", 116 | " self.vocab = vocab\n", 117 | " self.W = len(vocab)\n", 118 | " self.authornames= authornames\n", 119 | " self.A = len(authornames)\n", 120 | " self.alpha = alpha\n", 121 | " self.beta = beta\n", 122 | " \n", 123 | " self.W_T = np.zeros([self.W, self.K])\n", 124 | " self.A_T = np.zeros([self.A, self.K])\n", 125 | " \n", 126 | " self.T_marginal = np.zeros(self.K)\n", 127 | " self.A_marginal = np.zeros(self.A)\n", 128 | " \n", 129 | " self.T_assigned = []\n", 130 | " self.A_assigned = []\n", 131 | " self._populate_vars()\n", 132 | " \n", 133 | " def _populate_vars(self):\n", 134 | " \"\"\"Populate the variables with the initial data\n", 135 | " \"\"\"\n", 136 | " for di, doc in enumerate(self.doc_word_matrix):\n", 137 | " auth = self.doc_author_matrix[di]\n", 138 | " self.T_assigned.append([])\n", 139 | " self.A_assigned.append([])\n", 140 | " for wi, w in enumerate(doc):\n", 141 | " # Randomly assign a topic to the word\n", 142 | " z = np.random.choice(self.K)\n", 143 | " # Randomly assign a topic to a random author\n", 144 | " a = np.random.choice(self.A)\n", 145 | " # Update all the word, topic and author topic counts\n", 146 | " self.W_T[w,z] += 1\n", 147 | " self.A_T[a,z] += 1\n", 148 | " # Update marginals\n", 149 | " self.T_marginal[z] += 1\n", 150 | " self.A_marginal[a] += 1\n", 151 | " # Record the sampled topic and author assignments\n", 152 | " self.T_assigned[-1].append(z)\n", 153 | " self.A_assigned[-1].append(a)\n", 154 | " \n", 155 | " def gibbs_sampling(self):\n", 156 | " \"\"\"Perform single gibbs sampling step\n", 157 | " \"\"\"\n", 158 | " for di, doc in enumerate(self.doc_word_matrix):\n", 159 | " auth = self.doc_author_matrix[di]\n", 160 | " for wi, w in enumerate(doc):\n", 161 | " # Extract the previous assignment\n", 162 | " z = self.T_assigned[di][wi]\n", 163 | " a = self.A_assigned[di][wi]\n", 164 | " # Substract the previous assignments\n", 165 | " # Update all the word, topic and author topic counts\n", 166 | " self.W_T[w,z] -= 1\n", 167 | " self.A_T[a,z] -= 1\n", 168 | " # Update marginals\n", 169 | " self.T_marginal[z] -= 1\n", 170 | " self.A_marginal[a] -= 1\n", 171 | " \n", 172 | " # Find probability of the word w belonging to each topic\n", 173 | " phi = (self.W_T[w,:] + self.beta) / (self.T_marginal + self.W*self.beta)\n", 174 | " # Find probability of each author in auth belonging to each topic\n", 175 | " theta = (self.A_T[auth,:] + self.alpha) / (self.A_marginal[auth, np.newaxis] + self.W*self.alpha)\n", 176 | " # Joint probability of word and author for all topics\n", 177 | " pdf = theta*phi\n", 178 | " pdf = pdf / pdf.sum()\n", 179 | " # Index of authors and topics\n", 180 | " auth_t_pairs = [(i,j) for i in auth for j in xrange(self.K)]\n", 181 | " # Sample an author and topic pair for the word\n", 182 | " #print auth_t_pairs, p.flatten()\n", 183 | " idx = np.random.choice(range(len(auth_t_pairs)), p=pdf.flatten())\n", 184 | " a, z = auth_t_pairs[idx]\n", 185 | " # Update all the word, topic and author topic counts\n", 186 | " self.W_T[w,z] += 1\n", 187 | " self.A_T[a,z] += 1\n", 188 | " # Update marginals\n", 189 | " self.T_marginal[z] += 1\n", 190 | " self.A_marginal[a] += 1\n", 191 | " # Record the sampled topic and author assignments\n", 192 | " self.T_assigned[di][wi] = z\n", 193 | " self.A_assigned[di][wi] = a\n", 194 | " \n", 195 | " def perform_iterations(self, burnin=100, max_iters=10, print_every=5):\n", 196 | " \"\"\"Perform max_iters of gibbs sampling steps\n", 197 | " \"\"\"\n", 198 | " print \"Performing %s gibbs sampling iterations burn in phase\" % burnin\n", 199 | " for i in xrange(burnin):\n", 200 | " self.gibbs_sampling()\n", 201 | " print \"Burn in complete\"\n", 202 | " print \"Topic proportions: %s\" % (self.T_marginal * 1. / self.T_marginal.sum())\n", 203 | " print \"Author proportions: %s\" % (self.A_marginal * 1. / self.A_marginal.sum())\n", 204 | " print \"W_T[w,z]:\\n%s\" % (self.W_T * 1./ self.W_T.sum())\n", 205 | " print \"A_T[a,z]:\\n%s\" % (self.A_T * 1./ self.A_T.sum())\n", 206 | " print \"Performing %s gibbs sampling iterations\" % max_iters\n", 207 | " for i in xrange(max_iters):\n", 208 | " if i%print_every == 0:\n", 209 | " print \"Iter %s:\" % i\n", 210 | " self.gibbs_sampling()\n", 211 | " print \"Topic proportions: %s\" % (self.T_marginal * 1. / self.T_marginal.sum())\n", 212 | " print \"Author proportions: %s\" % (self.A_marginal * 1. / self.A_marginal.sum())\n", 213 | " print \"W_T[w,z]:\\n%s\" % (self.W_T * 1./ self.W_T.sum())\n", 214 | " print \"A_T[a,z]:\\n%s\" % (self.A_T * 1./ self.A_T.sum())\n", 215 | " print \"Done\"\n", 216 | " \n", 217 | " def show_topics(self, topn_w=3, topn_a=3):\n", 218 | " print \"Top %s words per topic\" % topn_w\n", 219 | " print self.vocab[np.argsort(self.W_T, axis=0)[::-1, :][:topn_w, :]]\n", 220 | " print \"Top %s authors per topic\" % topn_a\n", 221 | " print self.authornames[np.argsort(self.A_T, axis=0)[::-1, :][:topn_a, :]]\n", 222 | " \n", 223 | " " 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 71, 229 | "metadata": { 230 | "collapsed": false, 231 | "slideshow": { 232 | "slide_type": "slide" 233 | } 234 | }, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/plain": [ 239 | "(3,\n", 240 | " [[0, 0, 0, 1, 2, 1],\n", 241 | " [0, 0, 1, 1, 1, 1, 1],\n", 242 | " [2, 2, 2, 3, 3, 3],\n", 243 | " [0, 2, 2, 2, 3, 3, 1],\n", 244 | " [4, 4, 4, 0, 5, 5, 2],\n", 245 | " [4, 5, 5, 3, 0, 5, 5, 1]],\n", 246 | " [[0, 1], [1, 2], [0, 1, 2], [2, 3], [4, 5, 3], [4, 5]],\n", 247 | " array(['V0', 'V1', 'V2', 'V3', 'V4', 'V5'], \n", 248 | " dtype='|S2'),\n", 249 | " array(['A0', 'A1', 'A2', 'A3', 'A4', 'A5'], \n", 250 | " dtype='|S2'))" 251 | ] 252 | }, 253 | "execution_count": 71, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "K = 3\n", 260 | "doc_word_matrix = [[0,0,0,1,2,1],\n", 261 | " [0,0,1,1,1,1,1],\n", 262 | " [2,2,2,3,3,3],\n", 263 | " [0,2,2,2,3,3,1],\n", 264 | " [4,4,4,0,5,5,2],\n", 265 | " [4,5,5,3,0,5,5,1]]\n", 266 | "doc_author_matrix = [[0,1],\n", 267 | " [1,2],\n", 268 | " [0,1,2],\n", 269 | " [2,3],\n", 270 | " [4,5,3],\n", 271 | " [4,5]]\n", 272 | "vocab = np.array([\"V%s\" % k for k in xrange(6)])\n", 273 | "authornames = np.array([\"A%s\" % k for k in xrange(6)])\n", 274 | "\n", 275 | "K, doc_word_matrix, doc_author_matrix, vocab, authornames" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 72, 281 | "metadata": { 282 | "collapsed": false, 283 | "slideshow": { 284 | "slide_type": "slide" 285 | } 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "atm = AuthorTopicModel(K, doc_word_matrix, doc_author_matrix, vocab, authornames)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 73, 295 | "metadata": { 296 | "collapsed": false, 297 | "scrolled": false, 298 | "slideshow": { 299 | "slide_type": "subslide" 300 | } 301 | }, 302 | "outputs": [ 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "Performing 100 gibbs sampling iterations burn in phase\n", 308 | "Burn in complete\n", 309 | "Topic proportions: [ 0.24390244 0.51219512 0.24390244]\n", 310 | "Author proportions: [ 0.19512195 0.14634146 0.14634146 0.14634146 0.2195122 0.14634146]\n", 311 | "W_T[w,z]:\n", 312 | "[[ 0.02439024 0.14634146 0.02439024]\n", 313 | " [ 0. 0.12195122 0.09756098]\n", 314 | " [ 0.04878049 0.04878049 0.09756098]\n", 315 | " [ 0. 0.12195122 0.02439024]\n", 316 | " [ 0.09756098 0. 0. ]\n", 317 | " [ 0.07317073 0.07317073 0. ]]\n", 318 | "A_T[a,z]:\n", 319 | "[[ 0. 0.17073171 0.02439024]\n", 320 | " [ 0.02439024 0.12195122 0. ]\n", 321 | " [ 0. 0.07317073 0.07317073]\n", 322 | " [ 0. 0. 0.14634146]\n", 323 | " [ 0.2195122 0. 0. ]\n", 324 | " [ 0. 0.14634146 0. ]]\n", 325 | "Performing 10 gibbs sampling iterations\n", 326 | "Iter 0:\n", 327 | "Topic proportions: [ 0.24390244 0.48780488 0.26829268]\n", 328 | "Author proportions: [ 0.12195122 0.19512195 0.24390244 0.12195122 0.24390244 0.07317073]\n", 329 | "W_T[w,z]:\n", 330 | "[[ 0. 0.12195122 0.07317073]\n", 331 | " [ 0. 0.14634146 0.07317073]\n", 332 | " [ 0. 0.12195122 0.07317073]\n", 333 | " [ 0. 0.09756098 0.04878049]\n", 334 | " [ 0.09756098 0. 0. ]\n", 335 | " [ 0.14634146 0. 0. ]]\n", 336 | "A_T[a,z]:\n", 337 | "[[ 0. 0.09756098 0.02439024]\n", 338 | " [ 0. 0.19512195 0. ]\n", 339 | " [ 0. 0.12195122 0.12195122]\n", 340 | " [ 0. 0. 0.12195122]\n", 341 | " [ 0.24390244 0. 0. ]\n", 342 | " [ 0. 0.07317073 0. ]]\n", 343 | "Iter 5:\n", 344 | "Topic proportions: [ 0.17073171 0.58536585 0.24390244]\n", 345 | "Author proportions: [ 0.09756098 0.17073171 0.29268293 0.12195122 0.17073171 0.14634146]\n", 346 | "W_T[w,z]:\n", 347 | "[[ 0. 0.09756098 0.09756098]\n", 348 | " [ 0. 0.2195122 0. ]\n", 349 | " [ 0. 0.17073171 0.02439024]\n", 350 | " [ 0. 0.04878049 0.09756098]\n", 351 | " [ 0.04878049 0.04878049 0. ]\n", 352 | " [ 0.12195122 0. 0.02439024]]\n", 353 | "A_T[a,z]:\n", 354 | "[[ 0. 0.09756098 0. ]\n", 355 | " [ 0. 0.17073171 0. ]\n", 356 | " [ 0. 0.2195122 0.07317073]\n", 357 | " [ 0. 0. 0.12195122]\n", 358 | " [ 0.17073171 0. 0. ]\n", 359 | " [ 0. 0.09756098 0.04878049]]\n", 360 | "Done\n" 361 | ] 362 | } 363 | ], 364 | "source": [ 365 | "atm.perform_iterations(max_iters=10)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 74, 371 | "metadata": { 372 | "collapsed": false, 373 | "slideshow": { 374 | "slide_type": "slide" 375 | } 376 | }, 377 | "outputs": [ 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "Top 3 words per topic\n", 383 | "[['V5' 'V1' 'V3']\n", 384 | " ['V4' 'V2' 'V0']\n", 385 | " ['V3' 'V0' 'V5']]\n", 386 | "Top 3 authors per topic\n", 387 | "[['A4' 'A2' 'A3']\n", 388 | " ['A5' 'A1' 'A2']\n", 389 | " ['A3' 'A5' 'A5']]\n" 390 | ] 391 | } 392 | ], 393 | "source": [ 394 | "atm.show_topics()" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "collapsed": true, 402 | "slideshow": { 403 | "slide_type": "slide" 404 | } 405 | }, 406 | "outputs": [], 407 | "source": [] 408 | } 409 | ], 410 | "metadata": { 411 | "celltoolbar": "Slideshow", 412 | "kernelspec": { 413 | "display_name": "Python 2", 414 | "language": "python", 415 | "name": "python2" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 2 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython2", 427 | "version": "2.7.11" 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 0 432 | } 433 | -------------------------------------------------------------------------------- /Best Worst Scaling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Code inspired from: \n", 8 | "\n", 9 | "* https://github.com/valeriobasile/bwstuples/blob/master/bws.py\n", 10 | "* http://valeriobasile.github.io/Best-worst-scaling-and-the-clock-of-Gauss/" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from math import gcd" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]" 31 | ] 32 | }, 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "instances = list(range(10))\n", 40 | "instances" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "k = 4\n", 50 | "p = 3" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "def bws_generation(instances, k, p):\n", 60 | " n = len(instances)\n", 61 | " \n", 62 | " while gcd(n, k) != 1:\n", 63 | " print(n, n-1, gcd(n, k))\n", 64 | " n = max(n-1, 0)\n", 65 | " \n", 66 | " for j in range(p):\n", 67 | " for x in range(n//k):\n", 68 | " prefix = x*(k**(j+1))\n", 69 | " t = [\n", 70 | " (prefix + (i*(k**j))) % n \n", 71 | " for i in range(k)\n", 72 | " ]\n", 73 | " yield t" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 5, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "10 9 2\n", 86 | "[0, 1, 2, 3]\n", 87 | "[4, 5, 6, 7]\n", 88 | "[0, 4, 8, 3]\n", 89 | "[7, 2, 6, 1]\n", 90 | "[0, 7, 5, 3]\n", 91 | "[1, 8, 6, 4]\n" 92 | ] 93 | } 94 | ], 95 | "source": [ 96 | "for t in bws_generation(instances, k, p):\n", 97 | " print(t)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "We can use the tuples generated to ask the annotators to pick the most positive and most negative instance. " 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.7.3" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 4 136 | } 137 | -------------------------------------------------------------------------------- /Binary Search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 16, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "def binary_search(arr, low, high, x):\n", 12 | " if low > high:\n", 13 | " return -1\n", 14 | " mid = (low+high)/2\n", 15 | " print \"Searching in range: %s, %s\" % (low, high)\n", 16 | " print \"arr[%s]=%s\" % (mid, arr[mid])\n", 17 | " if arr[mid] == x:\n", 18 | " return mid\n", 19 | " if arr[mid] < x:\n", 20 | " return binary_search(arr, mid+1, high, x)\n", 21 | " return binary_search(arr, low, mid -1, x)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 17, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "Searching in range: 0, 9\n", 36 | "arr[4]=4\n", 37 | "Searching in range: 5, 9\n", 38 | "arr[7]=7\n", 39 | "Searching in range: 5, 6\n", 40 | "arr[5]=5\n" 41 | ] 42 | }, 43 | { 44 | "data": { 45 | "text/plain": [ 46 | "5" 47 | ] 48 | }, 49 | "execution_count": 17, 50 | "metadata": {}, 51 | "output_type": "execute_result" 52 | } 53 | ], 54 | "source": [ 55 | "arr = range(10)\n", 56 | "binary_search(arr, 0, len(arr)-1, 5)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 18, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [ 66 | { 67 | "name": "stdout", 68 | "output_type": "stream", 69 | "text": [ 70 | "Searching in range: 0, 1\n", 71 | "arr[0]=0\n", 72 | "Searching in range: 1, 1\n", 73 | "arr[1]=5\n" 74 | ] 75 | }, 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "1" 80 | ] 81 | }, 82 | "execution_count": 18, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "arr = [0,5]\n", 89 | "binary_search(arr, 0, len(arr)-1, 5)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "def reverse(arr):\n", 101 | " for i in range()\n", 102 | "def rotate(arr, i):\n", 103 | " " 104 | ] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 2", 110 | "language": "python", 111 | "name": "python2" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 2 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython2", 123 | "version": "2.7.9" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 0 128 | } 129 | -------------------------------------------------------------------------------- /Card24.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Find mathematical expression which is equals a given number using a set of numbers\n", 8 | "===========================================\n", 9 | "# Problem Statement\n", 10 | "Goal of this exercise is to write code which accepts a set of numbers and then tries to devise an arithmetic expression that yields a requested value, using four basic arithmetic operations: addition, subtraction, multiplication and division. Each input number must be used exactly once in the expression. Division is applicable only to numbers that are divisible without remainder. All input numbers and the target number are integers greater than zero. There are no more than 5 input numbers and target number is not larger than 1000.\n", 11 | "Example 1: Suppose that numbers 4, 8 and 9 are given and value 18 should be constructed. One solution is: 9 * 8 / 4.\n", 12 | "Example 2: If numbers 6, 7 and 9 are given, number 3 requested, then solution is: 6 / (9 - 7).\n", 13 | "\n", 14 | "# References\n", 15 | "Python implementation of the solution posted at http://www.codinghelmet.com/?path=exercises/expression-from-numbers" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 18, 21 | "metadata": { 22 | "collapsed": false 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "from Queue import Queue" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 14, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "def SolveAndPrint(numbers, targetValue):\n", 38 | " targetKey = (targetValue << len(numbers)) + (1 << len(numbers)) - 1\n", 39 | " # (value << numbers.Length) represents expression value\n", 40 | " # (1 << numbers.Length) - 1 represents mask with all bits set to 1,\n", 41 | " # i.e. mask in which each input number has been used exactly once\n", 42 | " # to build the expression.\n", 43 | " \n", 44 | " solvedKeys = set()\n", 45 | " # Each number in the collection indicates that corresponding value + mask\n", 46 | " # has been reached using arithmetical operations.\n", 47 | " \n", 48 | " keyToLeftParent = dict()\n", 49 | " # For each solved key (value + mask), there is an entry indicating\n", 50 | " # result of the expression on the left side of the arithmetic\n", 51 | " # operator. Missing value indicates that key represents the\n", 52 | " # raw number (taken from the input list), rather than\n", 53 | " # the result of a calculation.\n", 54 | " \n", 55 | " keyToRightParent = dict()\n", 56 | " # Same as keyToLeftParent, only indicating the right parent\n", 57 | " # used to build the expression.\n", 58 | " \n", 59 | " keyToOperator = dict()\n", 60 | " # Indicates arithmetic operator used to build this node\n", 61 | " # from left and right parent nodes. Missing value for a given key\n", 62 | " # indicates that key is a raw value taken from input array,\n", 63 | " # rather than result of an arithmetic operation.\n", 64 | " \n", 65 | " queue = Queue()\n", 66 | " # Keys (value + mask pairs) that have not been processed yet\n", 67 | "\n", 68 | " # First step is to initialize the structures:\n", 69 | " # Add all input values into corresponding array entries and\n", 70 | " # add them to the queue so that the operation can begin\n", 71 | " for i in range(len(numbers)):\n", 72 | " key = (numbers[i] << len(numbers)) + (1 << i)\n", 73 | " solvedKeys.add(key)\n", 74 | " queue.put(key)\n", 75 | " \n", 76 | " # Now expand entries one at the time until queue is empty,\n", 77 | " # i.e. until there are no new entries populated.\n", 78 | " # Additional stopping condition is that target key has been generated,\n", 79 | " # which indicates that problem has been solved and there is no need to\n", 80 | " # expand nodes any further.\n", 81 | " while (not queue.empty() > 0 and (targetKey not in solvedKeys)):\n", 82 | " curKey = queue.get()\n", 83 | "\n", 84 | " curMask = curKey & ((1 << len(numbers)) - 1)\n", 85 | " curValue = curKey >> len(numbers)\n", 86 | " \n", 87 | " # Now first take a snapshot of all keys that\n", 88 | " # have been reached because this collection is going to\n", 89 | " # change during the following operation\n", 90 | " keys = solvedKeys.copy()\n", 91 | "\n", 92 | " for keys_i in keys:\n", 93 | " mask = keys_i & ((1 << len(numbers)) - 1)\n", 94 | " value = keys_i >> len(numbers)\n", 95 | "\n", 96 | " if ((mask & curMask) == 0):\n", 97 | " # Masks are disjoint, i.e. two entries do not use\n", 98 | " # the same input number twice.\n", 99 | " # This is sufficient condition to combine the two entries\n", 100 | " for op in range(6):\n", 101 | " opSign = '\\0'\n", 102 | " newValue = 0\n", 103 | " if op == 0: # Addition\n", 104 | " newValue = curValue + value\n", 105 | " opSign = '+'\n", 106 | " elif op == 1: # Subtraction - another value subtracted from current\n", 107 | " newValue = curValue - value\n", 108 | " opSign = '-'\n", 109 | " elif op == 2: # Subtraction - current value subtracted from another\n", 110 | " newValue = value - curValue\n", 111 | " opSign = '-'\n", 112 | " elif op == 3: # Multiplication\n", 113 | " newValue = curValue * value\n", 114 | " opSign = '*'\n", 115 | " elif op == 4: # Division - current divided by another\n", 116 | " newValue = -1 # Indicates failure to divide\n", 117 | " if (value != 0 and curValue % value == 0):\n", 118 | " newValue = curValue / value\n", 119 | " opSign = '/'\n", 120 | " elif op == 5: # Division - other value divided by current\n", 121 | " newValue = -1 # Indicates failure to divide\n", 122 | " if (curValue != 0 and value % curValue == 0):\n", 123 | " newValue = value / curValue\n", 124 | " opSign = '/'\n", 125 | "\n", 126 | " if (newValue >= 0):\n", 127 | " # Ignore negative values - they can always be created\n", 128 | " # the other way around, by subtracting them\n", 129 | " # from a larger value so that positive value is reached.\n", 130 | " newMask = (curMask | mask)\n", 131 | " # Combine the masks to indicate that all input numbers\n", 132 | " # from both operands have been used to produce\n", 133 | " # the resulting expression\n", 134 | " \n", 135 | " newKey = (newValue << len(numbers)) + newMask\n", 136 | " if (newKey not in solvedKeys):\n", 137 | " # We have reached a new entry.\n", 138 | " # This expression should now be added\n", 139 | " # to data structures and processed further\n", 140 | " # in the following steps.\n", 141 | "\n", 142 | " # Populate entries that describe newly created expression\n", 143 | " solvedKeys.add(newKey);\n", 144 | " if (op == 2 or op == 5):\n", 145 | " # Special cases - antireflexive operations\n", 146 | " # with interchanged operands\n", 147 | " keyToLeftParent[newKey]= keys_i\n", 148 | " keyToRightParent[newKey] = curKey\n", 149 | " else:\n", 150 | " keyToLeftParent[newKey] = curKey\n", 151 | " keyToRightParent[newKey]= keys_i\n", 152 | " keyToOperator[newKey] = opSign\n", 153 | " # Add expression to list of reachable expressions\n", 154 | " solvedKeys.add(newKey)\n", 155 | " # Add expression to the queue for further expansion\n", 156 | " queue.put(newKey)\n", 157 | " # Now print the solution if it has been found\n", 158 | " if (targetKey not in solvedKeys):\n", 159 | " print \"Solution has not been found.\"\n", 160 | " else:\n", 161 | " PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, targetKey, len(numbers))\n", 162 | " print \"={0}\".format(targetValue)\n" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 15, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "def PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, key, numbersCount):\n", 174 | " if (key not in keyToOperator):\n", 175 | " print \"{0}\".format(key >> numbersCount),\n", 176 | " else:\n", 177 | " print \"(\",\n", 178 | " # Recursively print the left operand\n", 179 | " PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, keyToLeftParent[key], numbersCount)\n", 180 | " # Then print the operation sign\n", 181 | " print keyToOperator[key],\n", 182 | " # Finally, print the right operand\n", 183 | " PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, keyToRightParent[key], numbersCount)\n", 184 | " print \")\"," 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 23, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "Solution has not been found.\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "SolveAndPrint([2,2,22], 24)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [] 214 | } 215 | ], 216 | "metadata": { 217 | "kernelspec": { 218 | "display_name": "Python 2", 219 | "language": "python", 220 | "name": "python2" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 2 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython2", 232 | "version": "2.7.9" 233 | } 234 | }, 235 | "nbformat": 4, 236 | "nbformat_minor": 0 237 | } 238 | -------------------------------------------------------------------------------- /Dictionary_lookup_with_default.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Dictionary handle missing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "{'0': 0,\n", 19 | " '1': 1,\n", 20 | " '2': 2,\n", 21 | " '3': 3,\n", 22 | " '4': 4,\n", 23 | " '5': 5,\n", 24 | " '6': 6,\n", 25 | " '7': 7,\n", 26 | " '8': 8,\n", 27 | " '9': 9}" 28 | ] 29 | }, 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "a = {f\"{i}\": i for i in range(10)}\n", 37 | "a" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "11" 49 | ] 50 | }, 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "def handle_missing():\n", 58 | " return 11\n", 59 | "handle_missing()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "68.8 ns ± 1.94 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "%%timeit\n", 77 | "handle_missing()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "119 ns ± 3.45 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "%%timeit \n", 95 | "a.get(\"11\") or 11" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 5, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "192 ns ± 6.18 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "%%timeit \n", 113 | "a.get(\"11\") or handle_missing()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "123 ns ± 5.15 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "%%timeit \n", 131 | "a.get(\"11\", 11)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 7, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "185 ns ± 10.8 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "%%timeit \n", 149 | "a.get(\"11\", handle_missing())" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 8, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "360 ns ± 33 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "%%timeit \n", 167 | "try:\n", 168 | " a[\"11\"]\n", 169 | "except KeyError:\n", 170 | " handle_missing()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 9, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "name": "stdout", 180 | "output_type": "stream", 181 | "text": [ 182 | "119 ns ± 9.23 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "%%timeit \n", 188 | "if \"11\" in a:\n", 189 | " a[\"11\"]\n", 190 | "else:\n", 191 | " handle_missing()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 10, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "11" 203 | ] 204 | }, 205 | "execution_count": 10, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "def dynamic_handle_missing(key):\n", 212 | " return int(key)\n", 213 | "key = \"11\"\n", 214 | "dynamic_handle_missing(key)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 11, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "278 ns ± 6.97 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "%%timeit\n", 232 | "dynamic_handle_missing(key)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 12, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "414 ns ± 7.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "%%timeit \n", 250 | "a.get(key) or dynamic_handle_missing(key)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 13, 256 | "metadata": {}, 257 | "outputs": [ 258 | { 259 | "name": "stdout", 260 | "output_type": "stream", 261 | "text": [ 262 | "416 ns ± 9.55 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 263 | ] 264 | } 265 | ], 266 | "source": [ 267 | "%%timeit \n", 268 | "a.get(key, dynamic_handle_missing(key))" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 14, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "556 ns ± 15.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "%%timeit \n", 286 | "try:\n", 287 | " a[key]\n", 288 | "except KeyError:\n", 289 | " dynamic_handle_missing(key)" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 15, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "337 ns ± 37.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n" 302 | ] 303 | } 304 | ], 305 | "source": [ 306 | "%%timeit \n", 307 | "if key in a:\n", 308 | " a[key]\n", 309 | "else:\n", 310 | " dynamic_handle_missing(key)" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": null, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [] 319 | } 320 | ], 321 | "metadata": { 322 | "kernelspec": { 323 | "display_name": "Python 3", 324 | "language": "python", 325 | "name": "python3" 326 | }, 327 | "language_info": { 328 | "codemirror_mode": { 329 | "name": "ipython", 330 | "version": 3 331 | }, 332 | "file_extension": ".py", 333 | "mimetype": "text/x-python", 334 | "name": "python", 335 | "nbconvert_exporter": "python", 336 | "pygments_lexer": "ipython3", 337 | "version": "3.6.7" 338 | } 339 | }, 340 | "nbformat": 4, 341 | "nbformat_minor": 2 342 | } 343 | -------------------------------------------------------------------------------- /Dynamic Programming.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Dynamic Programming\n", 8 | "\n", 9 | "Source: https://www.topcoder.com/community/data-science/data-science-tutorials/dynamic-programming-from-novice-to-advanced/" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Sum of coins\n", 17 | "Given coins of value $V_1, V_2, ... V_n$ find min coins required to create a sum $S$" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 44, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "def wrapper(S, coins):\n", 29 | " states = [(10000, set()) for k in range(S+1)]\n", 30 | " states = [(10000, []) for k in range(S+1)]\n", 31 | " return n_coins(S, coins, states)\n", 32 | "\n", 33 | "def n_coins(S, coins, states):\n", 34 | " if S < 1:\n", 35 | " return (10000, [])\n", 36 | " if S in coins:\n", 37 | " return (1, [S])\n", 38 | " if S < min(coins):\n", 39 | " return (10000, [])\n", 40 | " if states[S][0] < 10000:\n", 41 | " return states[S]\n", 42 | " for c in coins:\n", 43 | " print S, states[S]\n", 44 | " if c > S:\n", 45 | " continue\n", 46 | " new_s = S - c\n", 47 | " new_state = n_coins(new_s, coins, states)\n", 48 | " new_state = (new_state[0]+1, new_state[1] + [c])\n", 49 | " if new_state[0] < states[S][0]:\n", 50 | " states[S] = new_state\n", 51 | " return states[S]\n", 52 | "\n", 53 | "def n_coins_iter(S, coins):\n", 54 | " states = [(10000, []) for k in range(S+1)]\n", 55 | " states[0] = (0, [])\n", 56 | " for s in range(1,S+1):\n", 57 | " for c in coins:\n", 58 | " if c <= s and states[s-c][0] < states[s][0]:\n", 59 | " states[s] = (states[s-c][0] + 1, states[s-c][1] + [c])\n", 60 | " print states, states[S]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 46, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "S: 10\n", 75 | "Coins(comma seperated): 9,8\n", 76 | "10 set([8, 9])\n", 77 | "10 (10000, [])\n", 78 | "10 (10000, [])\n", 79 | "(10000, [])\n", 80 | "[(0, []), (10000, []), (10000, []), (10000, []), (10000, []), (10000, []), (10000, []), (10000, []), (1, [8]), (1, [9]), (10000, [])] (10000, [])\n", 81 | "None\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "S = int(raw_input(\"S: \"))\n", 87 | "coins = set(int(k) for k in raw_input(\"Coins(comma seperated): \").split(','))\n", 88 | "if min(coins) < 1:\n", 89 | " raise Exception(\"Coins should be positive values >= 1\")\n", 90 | "print S, coins\n", 91 | "print wrapper(S, coins)\n", 92 | "print n_coins_iter(S, coins)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Longest sequence problem\n", 100 | "Given a sequence of $N$ numbers – $A[1] , A[2] , …, A[N]$ . Find the length of the longest non-decreasing sequence.\n", 101 | "\n", 102 | "### Approach\n", 103 | "$len_{LSS}(A, 0, N) = min(\\{len_{LSS}(A, i, N-i) | i \\in [1,N]\\})$" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 95, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "def wrapper(arr):\n", 115 | " states = [1]*len(arr)\n", 116 | " return longest_sub(arr, len(arr)-1, states)\n", 117 | "\n", 118 | "def longest_sub(arr, i, states):\n", 119 | " if i <= 0:\n", 120 | " return 1\n", 121 | " if states[i] > 1:\n", 122 | " return states[i]\n", 123 | " for j in range(i):\n", 124 | " lj = longest_sub(arr, j, states)\n", 125 | " if arr[j] <= arr[i]:\n", 126 | " print j, i, states\n", 127 | " states[i] = lj + 1\n", 128 | " else:\n", 129 | " states[i] = lj\n", 130 | " return states[i]" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 96, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "Array(comma seperated): 5, 3, 4, 8, 6, 7\n", 145 | "[5, 3, 4, 8, 6, 7]\n", 146 | "0 5 [1, 1, 1, 1, 1, 1]\n", 147 | "1 5 [1, 1, 1, 1, 1, 2]\n", 148 | "1 2 [1, 1, 1, 1, 1, 2]\n", 149 | "2 5 [1, 1, 2, 1, 1, 2]\n", 150 | "0 3 [1, 1, 2, 1, 1, 3]\n", 151 | "1 3 [1, 1, 2, 2, 1, 3]\n", 152 | "2 3 [1, 1, 2, 2, 1, 3]\n", 153 | "0 4 [1, 1, 2, 3, 1, 3]\n", 154 | "1 4 [1, 1, 2, 3, 2, 3]\n", 155 | "2 4 [1, 1, 2, 3, 2, 3]\n", 156 | "4 5 [1, 1, 2, 3, 3, 3]\n", 157 | "4\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "arr = [int(k) for k in raw_input(\"Array(comma seperated): \").split(',')]\n", 163 | "print arr\n", 164 | "print wrapper(arr)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Apples on a table\n", 172 | "A table composed of N x M cells, each having a certain quantity of apples, is given. You start from the upper-left corner. At each step you can go down or right one cell. Find the maximum number of apples you can collect." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 97, 178 | "metadata": { 179 | "collapsed": true 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "def wrapper(mat, N, M):\n", 184 | " states = [[0 for c in range(M)] for r in range(N)]\n", 185 | " return apples(mat,0,0,N,M, states)\n", 186 | "\n", 187 | "\n", 188 | "def apples(mat,i,j,N,M, states):\n", 189 | " if i >= N or j >= M:\n", 190 | " return 0\n", 191 | " if states[i][j] > 0:\n", 192 | " return states[i][j]\n", 193 | " states[i][j] = mat[i][j] + max([apples(mat,i+1,j,N,M, states), apples(mat,i,j+1,N,M, states)])\n", 194 | " return states[i][j]" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 99, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "Array(comma seperated): 1,2,3,4,5,6\n", 209 | "N, M (comma seperated): 2,3\n", 210 | "[[1, 2, 3], [3, 4, 5]]\n" 211 | ] 212 | }, 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "13" 217 | ] 218 | }, 219 | "execution_count": 99, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "arr = [int(k) for k in raw_input(\"Array(comma seperated): \").split(',')]\n", 226 | "N, M = [int(k) for k in raw_input(\"N, M (comma seperated): \").split(',')]\n", 227 | "mat = [arr[i*N:i*N+M] for i in range(N)]\n", 228 | "print mat\n", 229 | "wrapper(mat, N, M)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": { 236 | "collapsed": true 237 | }, 238 | "outputs": [], 239 | "source": [] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Python 2", 245 | "language": "python", 246 | "name": "python2" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 2 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython2", 258 | "version": "2.7.10" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 0 263 | } 264 | -------------------------------------------------------------------------------- /Extract_unique_triadic_edges_from_triads.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from itertools import combinations" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 4, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "def get_directed_triads(undirected_triad):\n", 19 | " # Get all triplets of edges\n", 20 | " for candidate_edges in combinations(undirected_triad.items(), 3):\n", 21 | " # Get edges between unique pair of nodes\n", 22 | " unique_edges = set([tuple(sorted(k)) for k,v in candidate_edges])\n", 23 | " # Only consider triad in which the tree edges use a unique pair of nodes\n", 24 | " if len(unique_edges) == 3:\n", 25 | " yield dict(candidate_edges)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 5, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "undirected_triad = {\n", 35 | " ('lisa.jacobson@enron.com', 'phillip.allen@enron.com'):1, \n", 36 | " ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'):1,\n", 37 | " ('phillip.allen@enron.com', 'richard.shapiro@enron.com'):1,\n", 38 | " ('richard.shapiro@enron.com', 'phillip.allen@enron.com'):-1\n", 39 | "}" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 6, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "{('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1, ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1, ('phillip.allen@enron.com', 'richard.shapiro@enron.com'): 1}\n", 52 | "{('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1, ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1, ('richard.shapiro@enron.com', 'phillip.allen@enron.com'): -1}\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "for directed_triad in get_directed_triads(undirected_triad):\n", 58 | " print(directed_triad)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 7, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "[{('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1,\n", 70 | " ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1,\n", 71 | " ('phillip.allen@enron.com', 'richard.shapiro@enron.com'): 1},\n", 72 | " {('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1,\n", 73 | " ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1,\n", 74 | " ('richard.shapiro@enron.com', 'phillip.allen@enron.com'): -1}]" 75 | ] 76 | }, 77 | "execution_count": 7, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "all_directed_triads = list(get_directed_triads(undirected_triad))\n", 84 | "all_directed_triads" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "Python 3", 98 | "language": "python", 99 | "name": "python3" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": { 103 | "name": "ipython", 104 | "version": 3 105 | }, 106 | "file_extension": ".py", 107 | "mimetype": "text/x-python", 108 | "name": "python", 109 | "nbconvert_exporter": "python", 110 | "pygments_lexer": "ipython3", 111 | "version": "3.6.7" 112 | } 113 | }, 114 | "nbformat": 4, 115 | "nbformat_minor": 2 116 | } 117 | -------------------------------------------------------------------------------- /ITE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Information Theoretical Estimators (ITE) in Python\n", 8 | "\n", 9 | "https://bitbucket.org/szzoli/ite-in-python/src/master/\n", 10 | "\n", 11 | "Examples from: https://bitbucket.org/szzoli/ite-in-python/downloads/ITE-1.1_documentation.pdf" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import ite # import the ITE toolbox (1x)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "co1 = ite.cost.BHShannon_KnnK() # initialize the entropy (2nd character = ’H’) estimator" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "BHShannon_KnnK -> {'mult': True, 'knn_method': 'cKDTree', 'k': 3, 'eps': 0}\n", 51 | "BHShannon_KnnK -> {'mult': True, 'knn_method': 'cKDTree', 'k': 2, 'eps': 0.1}\n" 52 | ] 53 | }, 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "(0.12977836304496293, 0.11135464600911416)" 58 | ] 59 | }, 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "print(co1) # print estimator-1\n", 67 | "y = np.random.rand(1000, 3) # size: number of samples × dimension, {yt}\n", 68 | "h = co1.estimation(y) # entropy estimation\n", 69 | "co2 = ite.cost.BHShannon_KnnK(knn_method='cKDTree', k=2, eps=0.1) # with other estimator\n", 70 | "# parameters\n", 71 | "print(co2) # print estimator-2\n", 72 | "h2 = co2.estimation(y) # entropy estimation\n", 73 | "h, h2" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 5, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "-0.0005586476623290824" 85 | ] 86 | }, 87 | "execution_count": 5, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "co = ite.cost.MIShannon_DKL() # initialize the mutual information estimator\n", 94 | "# (MIShannon_DKL: 2nd character = ’I’)\n", 95 | "ds = np.array([2, 3, 4]) # y\n", 96 | "t = 2000 # number of samples\n", 97 | "y = np.random.randn(t, sum(ds)) # size: number of samples × dimension\n", 98 | "i = co.estimation(y, ds) # estimate mutual information\n", 99 | "i" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 6, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "-0.022326680988546732" 111 | ] 112 | }, 113 | "execution_count": 6, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "from numpy.random import randn # ’randn’ is used to generate our observations\n", 120 | "co = ite.cost.BDKL_KnnK() # initialize the divergence (2nd character = ’D’) estimator\n", 121 | "dim = 3 # y\n", 122 | "t1, t2 = 2000, 3000 # number of samples from y\n", 123 | "y1 = randn(t1, dim) # size: number of samples1 × dimension, {y\n", 124 | "\n", 125 | "y2 = randn(t2, dim) # size: number of samples2 × dimension, {y\n", 126 | "d = co.estimation(y1, y2) # estimate KL divergence\n", 127 | "d" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "from numpy import dot # create observations\n", 137 | "from numpy.random import rand, multivariate_normal # -||-\n", 138 | "dim1, dim2 = 1, 2 # y\n", 139 | "dim = dim1 + dim2 # y = [y\n", 140 | "\n", 141 | "t = 5000 # number of samples\n", 142 | "co = ite.cost.BcondHShannon_HShannon() # initialize the conditional entropy (’condH’)\n", 143 | "# estimator\n", 144 | "m, l = rand(dim), rand(dim, dim) # mean (m)\n", 145 | "c = dot(l, l.T) # covariance (Σ), y = N(m, Σ)\n", 146 | "y = multivariate_normal(m, c, t) # {yt}\n", 147 | "cond_h = co.estimation(y, dim1) # estimate conditional entropy\n", 148 | "cond_h" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [] 157 | } 158 | ], 159 | "metadata": { 160 | "kernelspec": { 161 | "display_name": "Python 3", 162 | "language": "python", 163 | "name": "python3" 164 | }, 165 | "language_info": { 166 | "codemirror_mode": { 167 | "name": "ipython", 168 | "version": 3 169 | }, 170 | "file_extension": ".py", 171 | "mimetype": "text/x-python", 172 | "name": "python", 173 | "nbconvert_exporter": "python", 174 | "pygments_lexer": "ipython3", 175 | "version": "3.7.3" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 4 180 | } 181 | -------------------------------------------------------------------------------- /Keras Char Word LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import numpy as np\n", 20 | "\n", 21 | "from keras.models import Sequential, Model\n", 22 | "from keras.layers import Convolution1D, MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D\n", 23 | "from keras.layers import Embedding, LSTM, Input, Merge, Dense, TimeDistributed" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "max_word_len=3\n", 35 | "max_seq_len=5\n", 36 | "w_embed_size=6\n", 37 | "c_embed_size=3\n", 38 | "max_chars=2\n", 39 | "max_words=20\n", 40 | "c_nb_filters=4\n", 41 | "c_filter_length=3\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "char_cnn_layer=Sequential()\n", 53 | "char_cnn_layer.add(Embedding(max_chars, c_embed_size, input_length=max_word_len, name=\"char_embed\"))\n", 54 | "char_cnn_layer.add(Convolution1D(c_nb_filters,c_filter_length, activation='relu'))\n", 55 | "char_cnn_layer.add(GlobalAveragePooling1D(name=\"char_based_word_embed\"))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "((None, 4), (None, 3))" 69 | ] 70 | }, 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "char_cnn_layer.output_shape, char_cnn_layer.input_shape" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "____________________________________________________________________________________________________\n", 92 | "Layer (type) Output Shape Param # Connected to \n", 93 | "====================================================================================================\n", 94 | "char_embed (Embedding) (None, 3, 3) 6 embedding_input_1[0][0] \n", 95 | "____________________________________________________________________________________________________\n", 96 | "convolution1d_1 (Convolution1D) (None, 1, 4) 40 char_embed[0][0] \n", 97 | "____________________________________________________________________________________________________\n", 98 | "char_based_word_embed (GlobalAve (None, 4) 0 convolution1d_1[0][0] \n", 99 | "====================================================================================================\n", 100 | "Total params: 46\n", 101 | "Trainable params: 46\n", 102 | "Non-trainable params: 0\n", 103 | "____________________________________________________________________________________________________\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "char_cnn_layer.summary()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "____________________________________________________________________________________________________\n", 123 | "Layer (type) Output Shape Param # Connected to \n", 124 | "====================================================================================================\n", 125 | "tdcnn (TimeDistributed) (None, 5, 4) 46 timedistributed_input_1[0][0] \n", 126 | "====================================================================================================\n", 127 | "Total params: 46\n", 128 | "Trainable params: 46\n", 129 | "Non-trainable params: 0\n", 130 | "____________________________________________________________________________________________________\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "char_seq_layer=Sequential()\n", 136 | "char_seq_layer.add(TimeDistributed(char_cnn_layer, input_shape=(max_seq_len, max_word_len), name=\"tdcnn\"))\n", 137 | "char_seq_layer.summary()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "____________________________________________________________________________________________________\n", 152 | "Layer (type) Output Shape Param # Connected to \n", 153 | "====================================================================================================\n", 154 | "word_embed (Embedding) (None, 5, 6) 120 embedding_input_2[0][0] \n", 155 | "====================================================================================================\n", 156 | "Total params: 120\n", 157 | "Trainable params: 120\n", 158 | "Non-trainable params: 0\n", 159 | "____________________________________________________________________________________________________\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "word_seq_layer=Sequential()\n", 165 | "word_seq_layer.add(Embedding(max_words, w_embed_size, input_length=max_seq_len, name=\"word_embed\"))\n", 166 | "word_seq_layer.summary()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 8, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "____________________________________________________________________________________________________\n", 181 | "Layer (type) Output Shape Param # Connected to \n", 182 | "====================================================================================================\n", 183 | "tdcnn (TimeDistributed) (None, 5, 4) 46 timedistributed_input_1[0][0] \n", 184 | "____________________________________________________________________________________________________\n", 185 | "word_embed (Embedding) (None, 5, 6) 120 embedding_input_2[0][0] \n", 186 | "____________________________________________________________________________________________________\n", 187 | "shared_lstm (LSTM) (None, 5, 10) 840 char_word_embedding[0][0] \n", 188 | "____________________________________________________________________________________________________\n", 189 | "timedistributed_1 (TimeDistribut (None, 5, 1) 11 shared_lstm[0][0] \n", 190 | "====================================================================================================\n", 191 | "Total params: 1,017\n", 192 | "Trainable params: 1,017\n", 193 | "Non-trainable params: 0\n", 194 | "____________________________________________________________________________________________________\n" 195 | ] 196 | } 197 | ], 198 | "source": [ 199 | "full_seq_layer=Sequential()\n", 200 | "full_seq_layer.add(Merge([char_seq_layer, word_seq_layer], mode=\"concat\", name=\"char_word_embedding\"))\n", 201 | "full_seq_layer.add(LSTM(10, return_sequences=True, name=\"shared_lstm\"))\n", 202 | "full_seq_layer.add(TimeDistributed(Dense(1, activation='sigmoid')))\n", 203 | "full_seq_layer.summary()" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 9, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "full_seq_layer.compile(loss='sparse_categorical_crossentropy', optimizer='sgd')" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 10, 220 | "metadata": { 221 | "collapsed": false 222 | }, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/plain": [ 227 | "[(None, 5, 3), (None, 5)]" 228 | ] 229 | }, 230 | "execution_count": 10, 231 | "metadata": {}, 232 | "output_type": "execute_result" 233 | } 234 | ], 235 | "source": [ 236 | "full_seq_layer.input_shape" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 11, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "((2, 5, 3), (2, 5))" 250 | ] 251 | }, 252 | "execution_count": 11, 253 | "metadata": {}, 254 | "output_type": "execute_result" 255 | } 256 | ], 257 | "source": [ 258 | "X_words = [\n", 259 | " [1,2,1,4,5],\n", 260 | " [1,2,1,4,5]\n", 261 | " ]\n", 262 | "X_chars = [\n", 263 | " [\n", 264 | " [0,1,0],\n", 265 | " [0,1,1],\n", 266 | " [0,1,0],\n", 267 | " [1,1,0],\n", 268 | " [0,1,0]\n", 269 | " ],\n", 270 | " [\n", 271 | " [0,1,1],\n", 272 | " [0,1,1],\n", 273 | " [0,1,0],\n", 274 | " [0,0,0],\n", 275 | " [0,1,0]\n", 276 | " ]\n", 277 | " ]\n", 278 | "X_words = np.array(X_words)\n", 279 | "X_chars = np.array(X_chars)\n", 280 | "X_chars.shape, X_words.shape" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 12, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/plain": [ 293 | "(2, 5, 1)" 294 | ] 295 | }, 296 | "execution_count": 12, 297 | "metadata": {}, 298 | "output_type": "execute_result" 299 | } 300 | ], 301 | "source": [ 302 | "y = [\n", 303 | " [0, 1, 1, 1, 0],\n", 304 | " [0, 0, 0, 1, 0]\n", 305 | "]\n", 306 | "y = np.expand_dims(np.array(y), -1)\n", 307 | "y.shape" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 13, 313 | "metadata": { 314 | "collapsed": false 315 | }, 316 | "outputs": [ 317 | { 318 | "name": "stdout", 319 | "output_type": "stream", 320 | "text": [ 321 | "Epoch 1/10\n", 322 | "2/2 [==============================] - 7s - loss: nan\n", 323 | "Epoch 2/10\n", 324 | "2/2 [==============================] - 0s - loss: nan\n", 325 | "Epoch 3/10\n", 326 | "2/2 [==============================] - 0s - loss: nan\n", 327 | "Epoch 4/10\n", 328 | "2/2 [==============================] - 0s - loss: nan\n", 329 | "Epoch 5/10\n", 330 | "2/2 [==============================] - 0s - loss: nan\n", 331 | "Epoch 6/10\n", 332 | "2/2 [==============================] - 0s - loss: nan\n", 333 | "Epoch 7/10\n", 334 | "2/2 [==============================] - 0s - loss: nan\n", 335 | "Epoch 8/10\n", 336 | "2/2 [==============================] - 0s - loss: nan\n", 337 | "Epoch 9/10\n", 338 | "2/2 [==============================] - 0s - loss: nan\n", 339 | "Epoch 10/10\n", 340 | "2/2 [==============================] - 0s - loss: nan\n" 341 | ] 342 | }, 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "" 347 | ] 348 | }, 349 | "execution_count": 13, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "full_seq_layer.fit([X_chars, X_words], y)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "collapsed": true 363 | }, 364 | "outputs": [], 365 | "source": [] 366 | } 367 | ], 368 | "metadata": { 369 | "kernelspec": { 370 | "display_name": "Python [conda root]", 371 | "language": "python", 372 | "name": "conda-root-py" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.5.2" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 2 389 | } 390 | -------------------------------------------------------------------------------- /Keras attention.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "Using TensorFlow backend.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "%matplotlib inline\n", 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import seaborn as sns\n", 21 | "\n", 22 | "\n", 23 | "from keras.layers import Embedding, Dense, Permute, RepeatVector\n", 24 | "from keras.layers import Lambda, Conv1D, Dropout, Activation, Multiply, Flatten\n", 25 | "from keras.models import Sequential, Input, Model\n", 26 | "from keras.models import K\n", 27 | "\n", 28 | "from keras.preprocessing import sequence" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "sns.set_context(\"paper\")\n", 38 | "sns.set_style(\"ticks\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "vocab_size=10\n", 48 | "emb_size=3\n", 49 | "num_filters=6\n", 50 | "kernel_size=3\n", 51 | "maxlen=5" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Helper functions" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "class Vocab(object):\n", 68 | " def __init__(\n", 69 | " self,\n", 70 | " pre_tokens=(\"__PAD__\", \"__BOS__\", \"__EOS__\", ),\n", 71 | " UNK=\"__UNK__\",\n", 72 | " preprocess=lambda x: x\n", 73 | " ):\n", 74 | " self.UNK = UNK\n", 75 | " self.word2idx = {}\n", 76 | " self.idx2word = []\n", 77 | " self.offset_tokens = set((self.UNK, ) + pre_tokens)\n", 78 | " self.preprocess = preprocess\n", 79 | " for token in pre_tokens + (self.UNK, ):\n", 80 | " self.add_token(token)\n", 81 | "\n", 82 | " def add_token(self, token):\n", 83 | " if token not in self.offset_tokens:\n", 84 | " token = self.preprocess(token)\n", 85 | " if token not in self.word2idx:\n", 86 | " self.word2idx[token] = len(self.word2idx)\n", 87 | " self.idx2word.append(token)\n", 88 | "\n", 89 | " def get_word2idx(self, token):\n", 90 | " if token not in self.offset_tokens:\n", 91 | " token = self.preprocess(token)\n", 92 | " return self.word2idx.get(token, self.UNK)\n", 93 | "\n", 94 | " def process_seq(self, seq):\n", 95 | " return [self.get_word2idx(token) for token in seq]\n", 96 | "\n", 97 | " \n", 98 | " \n", 99 | "def generate_vocab(documents):\n", 100 | " \"\"\"Generate vocab from list of documents\n", 101 | " \n", 102 | " Args:\n", 103 | " -----\n", 104 | " documents: list of document where each document is a list of words\n", 105 | " \n", 106 | " Returns:\n", 107 | " --------\n", 108 | " vocab: Vocab object\n", 109 | " \"\"\"\n", 110 | " vocab = Vocab()\n", 111 | " for document in (documents):\n", 112 | " for word in document:\n", 113 | " vocab.add_token(word)\n", 114 | " return vocab\n", 115 | "\n", 116 | "def create_sequences(documents, labels, vocab, maxlen):\n", 117 | " \"\"\"Create sequences for keras models\n", 118 | " \n", 119 | " Args:\n", 120 | " -----\n", 121 | " documents: list of document where each document is a list of words\n", 122 | " labels: list of labels per document. Only binary classification is supported, i.e. 0, 1\n", 123 | " \n", 124 | " Returns:\n", 125 | " --------\n", 126 | " \n", 127 | " \"\"\"\n", 128 | " X = []\n", 129 | " y = np.asarray(labels)\n", 130 | " for document in (documents):\n", 131 | " seq = []\n", 132 | " for word in document:\n", 133 | " seq.append(word)\n", 134 | " X.append(vocab.process_seq(seq))\n", 135 | " X_padded = sequence.pad_sequences(X, maxlen=maxlen)\n", 136 | " return X_padded, y\n", 137 | " " 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## Basic model which takes sequence and outputs sequence" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 5, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "def get_base_model():\n", 154 | " model = Sequential()\n", 155 | " # we start off with an efficient embedding layer which maps\n", 156 | " # our vocab indices into embedding_dims dimensions\n", 157 | " model.add(Embedding(vocab_size,\n", 158 | " emb_size,\n", 159 | " input_length=maxlen))\n", 160 | " model.add(Dropout(0.2))\n", 161 | " # we add a Convolution1D, which will learn filters\n", 162 | " # word group filters of size filter_length:\n", 163 | " model.add(\n", 164 | " Conv1D(\n", 165 | " num_filters,\n", 166 | " kernel_size,\n", 167 | " padding='same',\n", 168 | " activation='relu',\n", 169 | " strides=1\n", 170 | " )\n", 171 | " )\n", 172 | " return model\n", 173 | " \n", 174 | "def get_attention_model():\n", 175 | " attention = Sequential()\n", 176 | " attention.add(Dense(num_filters, input_shape=(maxlen, num_filters), activation=\"tanh\"))\n", 177 | " attention.add(Dense(1))\n", 178 | " attention.add(Flatten())\n", 179 | " attention.add(Activation(\"softmax\"))\n", 180 | " return attention\n", 181 | "\n", 182 | "def get_output(base_model, attention, inputs):\n", 183 | " activations = base_model(inputs)\n", 184 | " permited_activations = Permute((2,1))(activations)\n", 185 | " aligned_attention = RepeatVector(num_filters)(attention(activations))\n", 186 | " final_activation = Multiply()([\n", 187 | " permited_activations,\n", 188 | " aligned_attention\n", 189 | " ])\n", 190 | " final_score = Flatten()(final_activation)\n", 191 | " output = Lambda(lambda x: K.sum(x,-1, keepdims=True))(final_score)\n", 192 | " output = Activation(\"sigmoid\")(output)\n", 193 | " return output\n", 194 | "\n", 195 | "def get_model():\n", 196 | " inputs = Input((maxlen,))\n", 197 | " base_model = get_base_model()\n", 198 | " attention = get_attention_model()\n", 199 | " output = get_output(base_model, attention, inputs)\n", 200 | " model = Model(inputs=inputs, outputs=output)\n", 201 | " return model" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 6, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "model = get_model()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 7, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "__________________________________________________________________________________________________\n", 223 | "Layer (type) Output Shape Param # Connected to \n", 224 | "==================================================================================================\n", 225 | "input_1 (InputLayer) (None, 5) 0 \n", 226 | "__________________________________________________________________________________________________\n", 227 | "sequential_1 (Sequential) (None, 5, 6) 90 input_1[0][0] \n", 228 | "__________________________________________________________________________________________________\n", 229 | "sequential_2 (Sequential) (None, 5) 49 sequential_1[1][0] \n", 230 | "__________________________________________________________________________________________________\n", 231 | "permute_1 (Permute) (None, 6, 5) 0 sequential_1[1][0] \n", 232 | "__________________________________________________________________________________________________\n", 233 | "repeat_vector_1 (RepeatVector) (None, 6, 5) 0 sequential_2[1][0] \n", 234 | "__________________________________________________________________________________________________\n", 235 | "multiply_1 (Multiply) (None, 6, 5) 0 permute_1[0][0] \n", 236 | " repeat_vector_1[0][0] \n", 237 | "__________________________________________________________________________________________________\n", 238 | "flatten_2 (Flatten) (None, 30) 0 multiply_1[0][0] \n", 239 | "__________________________________________________________________________________________________\n", 240 | "lambda_1 (Lambda) (None, 1) 0 flatten_2[0][0] \n", 241 | "__________________________________________________________________________________________________\n", 242 | "activation_2 (Activation) (None, 1) 0 lambda_1[0][0] \n", 243 | "==================================================================================================\n", 244 | "Total params: 139\n", 245 | "Trainable params: 139\n", 246 | "Non-trainable params: 0\n", 247 | "__________________________________________________________________________________________________\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "model.summary()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 8, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 9, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/plain": [ 272 | "((100, 5), (100, 1))" 273 | ] 274 | }, 275 | "execution_count": 9, 276 | "metadata": {}, 277 | "output_type": "execute_result" 278 | } 279 | ], 280 | "source": [ 281 | "data_size=100\n", 282 | "X = np.random.randint(vocab_size, size=(data_size, maxlen))\n", 283 | "y = np.random.randint(2, size=(data_size,1))\n", 284 | "X.shape, y.shape" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 10, 290 | "metadata": {}, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "Epoch 1/1\n", 297 | "100/100 [==============================] - 2s 24ms/step - loss: 0.6960 - acc: 0.4600\n" 298 | ] 299 | }, 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "" 304 | ] 305 | }, 306 | "execution_count": 10, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "model.fit(X, y)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Fit model on data" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 11, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "documents = [\n", 329 | " [\"The\", \"world\", \"is\", \"on\", \"fire\"],\n", 330 | " [\"The\", \"earth\", \"is\", \"on\", \"fire\"],\n", 331 | " [\"The\", \"country\", \"is\", \"on\", \"ice\"],\n", 332 | " [\"The\", \"book\", \"is\", \"on\", \"fire\"],\n", 333 | " [\"The\", \"cat\", \"is\", \"on\", \"trampoline\"],\n", 334 | "]\n", 335 | "\n", 336 | "labels = [0,0,1,0,1]\n", 337 | "vocab = generate_vocab(documents)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 12, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [ 346 | "X, y = create_sequences(documents, labels, vocab, maxlen)" 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 13, 352 | "metadata": {}, 353 | "outputs": [ 354 | { 355 | "data": { 356 | "text/plain": [ 357 | "((5, 5), (5,))" 358 | ] 359 | }, 360 | "execution_count": 13, 361 | "metadata": {}, 362 | "output_type": "execute_result" 363 | } 364 | ], 365 | "source": [ 366 | "X.shape, y.shape" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 14, 372 | "metadata": {}, 373 | "outputs": [ 374 | { 375 | "name": "stdout", 376 | "output_type": "stream", 377 | "text": [ 378 | "Epoch 1/1\n", 379 | "\r", 380 | "5/5 [==============================] - 0s 3ms/step - loss: 0.6967 - acc: 0.4000\n" 381 | ] 382 | }, 383 | { 384 | "data": { 385 | "text/plain": [ 386 | "" 387 | ] 388 | }, 389 | "execution_count": 14, 390 | "metadata": {}, 391 | "output_type": "execute_result" 392 | } 393 | ], 394 | "source": [ 395 | "model.fit(X, y)" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 15, 401 | "metadata": {}, 402 | "outputs": [ 403 | { 404 | "data": { 405 | "text/plain": [ 406 | "[,\n", 407 | " ,\n", 408 | " ,\n", 409 | " ,\n", 410 | " ,\n", 411 | " ,\n", 412 | " ,\n", 413 | " ,\n", 414 | " ]" 415 | ] 416 | }, 417 | "execution_count": 15, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "model.layers" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 16, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "data": { 433 | "text/plain": [ 434 | "(,\n", 435 | " )" 436 | ] 437 | }, 438 | "execution_count": 16, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "model.layers[1].get_output_at(0), model.layers[2].get_output_at(0)" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 17, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "" 456 | ] 457 | }, 458 | "execution_count": 17, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "model.layers[1].get_input_at(0)" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 18, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [ 473 | "conv_activation = K.function(\n", 474 | " [\n", 475 | " model.layers[1].get_input_at(0),\n", 476 | " K.learning_phase()\n", 477 | " ],\n", 478 | " [model.layers[1].get_output_at(0)]\n", 479 | ")" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 19, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "" 491 | ] 492 | }, 493 | "execution_count": 19, 494 | "metadata": {}, 495 | "output_type": "execute_result" 496 | } 497 | ], 498 | "source": [ 499 | "conv_activation" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 20, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "[array([[[ 0. , 0. , 0.01670015, 0. , 0.01758255,\n", 511 | " 0. ],\n", 512 | " [ 0.01330368, 0. , 0. , 0. , 0. ,\n", 513 | " 0. ],\n", 514 | " [ 0. , 0.02499684, 0. , 0. , 0. ,\n", 515 | " 0.03441959],\n", 516 | " [ 0.00910494, 0. , 0.00106554, 0. , 0.02439762,\n", 517 | " 0. ],\n", 518 | " [ 0. , 0.01371591, 0. , 0. , 0. ,\n", 519 | " 0. ]]], dtype=float32)]" 520 | ] 521 | }, 522 | "execution_count": 20, 523 | "metadata": {}, 524 | "output_type": "execute_result" 525 | } 526 | ], 527 | "source": [ 528 | "conv_activation([\n", 529 | " [X[0]],\n", 530 | " 0.\n", 531 | "])" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 21, 537 | "metadata": {}, 538 | "outputs": [ 539 | { 540 | "data": { 541 | "text/plain": [ 542 | "" 543 | ] 544 | }, 545 | "execution_count": 21, 546 | "metadata": {}, 547 | "output_type": "execute_result" 548 | } 549 | ], 550 | "source": [ 551 | "model.layers[2].get_input_at(0)" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 22, 557 | "metadata": {}, 558 | "outputs": [], 559 | "source": [ 560 | "attention_activation = K.function(\n", 561 | " [\n", 562 | " model.layers[2].get_input_at(0),\n", 563 | " K.learning_phase()\n", 564 | " ],\n", 565 | " [model.layers[2].get_output_at(0)]\n", 566 | ")" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 23, 572 | "metadata": {}, 573 | "outputs": [ 574 | { 575 | "data": { 576 | "text/plain": [ 577 | "[array([[ 0.20158802, 0.19940449, 0.19962992, 0.19902216, 0.2003554 ]], dtype=float32)]" 578 | ] 579 | }, 580 | "execution_count": 23, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | } 584 | ], 585 | "source": [ 586 | "attention_activation([\n", 587 | " conv_activation([[X[0]],0.])[0],\n", 588 | " 0.\n", 589 | "])" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 24, 595 | "metadata": {}, 596 | "outputs": [], 597 | "source": [ 598 | "def get_activations(model, model_input):\n", 599 | " \"\"\"Taken from:\n", 600 | " https://github.com/philipperemy/keras-visualize-activations/blob/master/read_activations.py\n", 601 | " \"\"\"\n", 602 | " conv_activation_fn = K.function(\n", 603 | " [\n", 604 | " model.layers[1].get_input_at(0),\n", 605 | " K.learning_phase()\n", 606 | " ],\n", 607 | " [model.layers[1].get_output_at(0)]\n", 608 | " )\n", 609 | " conv_activations = conv_activation_fn([\n", 610 | " [model_input],\n", 611 | " 0.\n", 612 | " ])\n", 613 | " attention_activation_fn = K.function(\n", 614 | " [\n", 615 | " model.layers[2].get_input_at(0),\n", 616 | " K.learning_phase()\n", 617 | " ],\n", 618 | " [model.layers[2].get_output_at(0)]\n", 619 | " )\n", 620 | " attention_activations = attention_activation_fn([\n", 621 | " conv_activations[0],\n", 622 | " 0.\n", 623 | " ])[0]\n", 624 | " return attention_activations" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": 25, 630 | "metadata": {}, 631 | "outputs": [], 632 | "source": [ 633 | "attention_activations = get_activations(model, X[0])" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 26, 639 | "metadata": {}, 640 | "outputs": [], 641 | "source": [ 642 | "def plot_activations(model, model_input, xticklabels):\n", 643 | " attention_activations = get_activations(model, X[0])\n", 644 | " ax = sns.heatmap(attention_activations, xticklabels=xticklabels, square=True)\n", 645 | " return ax" 646 | ] 647 | }, 648 | { 649 | "cell_type": "code", 650 | "execution_count": 27, 651 | "metadata": {}, 652 | "outputs": [ 653 | { 654 | "data": { 655 | "text/plain": [ 656 | "" 657 | ] 658 | }, 659 | "execution_count": 27, 660 | "metadata": {}, 661 | "output_type": "execute_result" 662 | }, 663 | { 664 | "data": { 665 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWsAAADzCAYAAABABDfiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEnBJREFUeJzt3X9M3HWex/EXCDUx5QtluzMdEbh6oSGxhizitsb2qInF2G0X0gijcmhuu7BsbHa6SVdaqZCVxpmpbbJd3XQzye6a3S7HGCMkF+OujbF1z5zutnVP3GzoCr1be+0MxU4RoVtg5nt/9GT1+FGoHWY+3z4fCX8wfpnv+0v11bfv7+f7mQzbtm0BANJaZqoLAABcHWENAAYgrAHAAIQ1ABiAsAYAAxDWAGAAwhoADEBYA4ABCGsAMABhDQAGIKwBwABZqS4AAK6niaGBeR+bvfz2JFZyfdFZA4AB6KwBOEsinuoKkoKwBuAs8clUV5AUhDUAR7HtRKpLSArCGoCzJAhrAEh/Seyso9GoAoGAcnNzVVJSovr6eklSZ2enent7NTY2purqaq1Zs0bt7e3Ky8tTTk6OfD6fJKm7u1u9vb1qa2vT+Pi4tm7dqtWrVysrK0t79+6d89yENQBnSeINxq6uLjU0NKi8vFyNjY2qq6tTdna2LMuS3+9XLBZTR0eHLly4oMrKSm3ZskW7du1SNBrViRMndP78eU1OXpmpv//++1q6dKkyMzNVVlZ21XMT1gCcZQGddTgcVjgcnva61+uV1+ud9vrQ0JA8Ho8kybIsjYyMKD8/X5s3b9bo6KiCwaCampp09OhRVVRUSJLcbrcGBwe1adMmnTlzRqFQSJJUUFCgYDCo4uJi7dixQ+vWrZt675kQ1gAcxV7AapDZQnk2Ho9HkUhEHo9Hw8PDsixLktTf369Dhw7J5/OpsLBQfX19ikQikq6MTlwu17T3GhgYUG5urqQrwf9Zxz0bwhqAsyTxBmNtba0CgYC6u7tVVVUlv9+vlpYWNTc3q7S0VAcPHtSqVav02GOP6emnn9bJkydVVFQkt9s97b2Ki4u1b98+ud1uuVwuFRYWznnuDNu27WRdGAAstsun/n3ex968al0SK7m+6KwBOAtPMAKAAXgoBgAMwOPmAGAAnmAEgPRn28ysASD9MbMGAAMwBgEAA9BZA4AB4hOpriApCGsAzsIYBAAMwBgEAAxAZw0ABiCsASD92dxgBAADMLMGAAMwBgEAA9BZA4AB6KwBwAB01gBggKt8SripCGsAzkJnDQAGYGYNAAagswYAA9BZA4AB6KwBwACsBgEAA9h2qitICsIagLMwswYAAxDWAGAAbjACgAHi8VRXkBSENQBnYQwCAAYgrAHAAMysASD92QnWWQNA+mMMAgAGYDUIABggiZ11NBpVIBBQbm6uSkpKVF9fL0nq7OxUb2+vxsbGVF1drTVr1qi9vV15eXnKycmRz+eTJHV3d6u3t1dtbW0aHR2d8ZjZENYAnGUBYR0OhxUOh6e97vV65fV6p73e1dWlhoYGlZeXq7GxUXV1dcrOzpZlWfL7/YrFYuro6NCFCxdUWVmpLVu2aNeuXYpGozpx4oTOnz+vyf/baOq1116bdozb7Z61VsIagLMsYCOn2UJ5NkNDQ/J4PJIky7I0MjKi/Px8bd68WaOjowoGg2pqatLRo0dVUVEhSXK73RocHNSmTZt05swZhUKhqff6/8cQ1gBuHEkcg3g8HkUiEXk8Hg0PD8uyLElSf3+/Dh06JJ/Pp8LCQvX19SkSiUi6MjpxuVyzvtdcx3xe5nW+FgBIrYQ9/68Fqq2t1eHDh9XW1qaqqir5/X6Nj4+rublZly9f1sGDBxUKhfTAAw/o2LFjeuaZZ1RUVDRjxzyfYz4vw7YduvkrgBvSWPBf5n3sLS2/SGIl1xdjEACOYrPOGgAMwBOMAGAA9gYBAAPQWQOAASZ53BwA0h9jEAAwAGMQAEh/LN0DABPQWQOAAQhrADAAHz4AAOmPz2AEABMQ1gBgAFaDAIAB6KwBwACENQCkPzvOGAQA0h+dNQCkP5buAYAJCGsAMIAzR9aENQBnsSedmdaENQBncWZWE9YAnIUbjABgAjprAEh/dNYAYAI662s3MTSwGKdZVP9Uti3VJVx3/5i1LNUlJEXXuXdTXUJSvFdQnuoSkuLO0//2pX7enrxOhaQZOmsAjmLTWQOAAQhrAEh/dNYAYADCGgAMYMczUl1CUhDWAByFzhoADGAn6KwBIO3RWQOAAWw7eZ11NBpVIBBQbm6uSkpKVF9fL0nq7OxUb2+vxsbGVF1drTVr1qi9vV15eXnKycmRz+dTT0+Pjh8/rkuXLmn79u0qKCjQ1q1btXr1amVlZWnv3r1znpuwBuAoyeysu7q61NDQoPLycjU2Nqqurk7Z2dmyLEt+v1+xWEwdHR26cOGCKisrtWXLFu3atUvRaFQ9PT168cUX9dFHHykUCqm6ulpLly5VZmamysrKrnpuwhqAoyQWsBokHA4rHA5Pe93r9crr9U57fWhoSB6PR5JkWZZGRkaUn5+vzZs3a3R0VMFgUE1NTTp69KgqKiokSW63W4ODg8rKuhK3K1as0ODgoAoKChQMBlVcXKwdO3Zo3bp1U+89E8IagKMs5AbjbKE8G4/Ho0gkIo/Ho+HhYVmWJUnq7+/XoUOH5PP5VFhYqL6+PkUiEUlXRicul0uZmZmSpEgkIpfLpYGBAeXm5kq6EvyTk3NvakJYA3CUZK4Gqa2tVSAQUHd3t6qqquT3+9XS0qLm5maVlpbq4MGDWrVqlR577DE9/fTTOnnypIqKiuR2u1VTU6PW1lZ9+umnevLJJ2Xbtvbt2ye32y2Xy6XCwsI5z51h23bSN39l1z0zsOueWdh1b2anyzbO+9iV/3nkS51rMdFZA3AU1lkDgAGSuXQvlQhrAI4SZ28QAEh/dNYAYABm1gBggOSvb0sNwhqAo9BZA4AB4onMVJeQFIQ1AEdhDAIABkiwGgQA0p9Tl+5ddbgTj8d18eJFJRIO/fgFAI5i2/P/MsmcnfWvf/1rHTt2TJZl6ZNPPtHGjRtVW1s76/Gz7Q0bDu3/8pUCwDzckGOQ/v5+hUKhqe/b29vnDOvZ9oZ14q57ANLTDbka5OLFi/rjH/84teH2p59+ulh1AcA1MWy6MW9zhnVLS4vC4bCGhoZ06623avfu3YtVFwBckxtyDOJ2u/W9731vsWoBgC/NqatBWLoHwFGcum6NsAbgKLborAEg7U0yBgGA9EdnDQAGYGYNAAagswYAA9BZA4AB4nTWAJD+HPqpXoQ1AGdJ0FkDQPq7ITdyAgDTcIMRAAyQyGAMAgBpL57qApKEsAbgKKwGAQADsBoEAAzAahAAMABjEAAwAEv3AMAA8SR21tFoVIFAQLm5uSopKVF9fb0kqbOzU729vRobG1N1dbXWrFmj9vZ25eXlKScnRz6fTz09PTp+/LguXbqk7du3y+VyTTtmLoQ1AEdJZmfd1dWlhoYGlZeXq7GxUXV1dcrOzpZlWfL7/YrFYuro6NCFCxdUWVmpLVu2aNeuXYpGo+rp6dGLL76ojz76SKFQSGVlZdOOcbvds56bsAbgKAsJ63A4rHA4PO11r9crr9c77fWhoSF5PB5JkmVZGhkZUX5+vjZv3qzR0VEFg0E1NTXp6NGjqqiokCS53W4NDg4qK+tK3K5YsUKDg4MaGhqadkzKwzp7+e2LcRpJV375M/2Sr7f/+J83k36OzyzWNS22xbquw0k/w9/xZ5V6C/kIxtlCeTYej0eRSEQej0fDw8OyLEuS1N/fr0OHDsnn86mwsFB9fX2KRCKSroxOXC6XMjMzJUmRSEQul2vqvT5/zFwy539ZZpjpb0nTOfGaJGdelxOvSTLruhIL+Fqo2tpaHT58WG1tbaqqqpLf79f4+Liam5t1+fJlHTx4UKFQSA888ICOHTumZ555RkVFRXK73aqpqVFra6v279+v5ubmGY+ZC2MQAI6SzMfNv/rVr+rAgQPTXj9y5Mi015577rkvfL9p0yZt2rRpzmPmQlgDcBTWWQOAAVhnDQAGIKwNYcod64Vw4jVJzrwuJ16TZNZ1OXVvkAzbtp16bQBuQPuK/3nexz7534u5sPPLcVxnDeDGxocPAIABEg4dhBDWAByFG4wAYABn9tWGPm7+k5/8RDt37tS6deu0c+dO3XHHHTp//nyqy1oU27Zt+8L3r7zyil599dUUVbNwL7/8sj788MNUl4FZDAwMqKGhQd3d3aku5Zol83HzVDKys37iiSckXQmu/fv3KysrSz/60Y80MjKi9evX68EHH9SBAwd00003aXx8XHv27NGSJUtSXPXMmpub9dOf/lStra362te+pnvuuUePP/647r77bo2Pj2vr1q2KRqP67W9/q7KysqmfCwQCsm1bp06d0kMPPZTCK1iYc+fO6bXXXlNJSYnOnTunH/7wh8rLy0t1Wdfsz3/+s0KhkHJyclRcXKzXX39dGzZs0J/+9Cc9++yzUxv9mKKzs1N/+MMftHbtWj3//PM6ffq0KioqZNu2Tp8+rZGRET366KNf+Hcx3UxmOLO3NjKsZ/Ktb31LK1euVGNjoyYmJnTx4kUVFBQoFovp1KlTWr16dapLnNHq1at16tQpxeNxnThxQpcuXVJRUZH8fr8mJibU3Nysb3zjG9qwYYMeeeQRbdu2TQMDA7JtW7t371ZnZ2eqL+Ga5OXlacOGDbrllltSXcqXEgqFpjaQf+KJJ7R8+XJ997vf1QsvvKC+vj7dfffdqS5xQTZu3Di1+9vly5dVU1Oju+66Sw8//LAqKyuVmZmpt99+O63D2plR7aCwtixragtC27Z177336qGHHtIbb7xx1d2sUun+++/Xz3/+c5WWluqDDz7Q+++/r8+Wvmdk/H2Tg893aJ9//bM9ck2yfft2ZWdn61e/+pUuXbqk++67L9UlXbNEIjH155GRkaGcnBxJ0pIlS5RImPY/2tNZliXbtpWbm6udO3fq7Nmz+stf/pLqsuZk/m99Zub9lz4P3/zmN/XUU0/pgw8+0N/+9jdt2LAh1SXNqrS0VO+99562bdumRCKhM2fO6N5779WePXskSY2NjTp79uwXfmblypVasmSJgsGgPvzwQ9XU1KSi9Gv2wgsvqKCgQPF4XLffvnh7nSdDU1OTOjo6lJ+fr4qKCg0MDKS6pOtu6dKlWr9+vXbv3q1YLKYdO3akuqQ5OXXpHk8wAnCUJ//hkXkfu++//jWJlVxfjuysAdy4GIMAgAHiDh2DENYAHIXOGgAMYNNZA0D6o7MGAAM4dekeYQ3AUZwZ1YQ1AIeZdGhcE9YAHIUbjABgAG4wAoAB6KwBwAB01gBggLhD96YjrAE4CuusAcAAzKwBwADMrAHAAIxBAMAAjEEAwACsBgEAAzAGAQADcIMRAAzAzBoADJDMMUg0GlUgEFBubq5KSkpUX18/9c/eeustvfzyy/rxj3+sWCymtrY2feUrX5HL5VJzc7Neeukl/f73v9eyZctUU1OjO++8U16vVytXrpQktba2KicnZ9ZzE9YAHMVewA3GcDiscDg87XWv1yuv1zvt9a6uLjU0NKi8vFyNjY2qq6tTdna23nnnHf31r3/V6OioJOn48eOqqKjQ448/rl/+8pd699139Zvf/EY/+9nPZNu2vv/976u1tVVjY2PKzs7WbbfdNmdQS4Q1AIeJL6Czni2UZzM0NCSPxyNJsixLIyMjys/P19q1a7V27Vq9+eabkqTKykodOHBAe/fu1cTEhJYtW6bvfOc72r17twoKCjQ+Pq6bb75Zzz33nEpLSxUMBnXy5EmVl5fPeu7MeVcJAAZIyJ7310J5PB5FIhFJ0vDwsCzLmvG4WCym9evXa8+ePVq+fLluvfVWRSIRBQIBffvb31ZWVpbOnj2rjz/+WJKUl5eniYmJOc9NZw3AURYyBlmo2tpaBQIBdXd3q6qqSn6/Xy0tLVqyZMkXjsvPz1dPT4+OHDmipUuX6q677lIsFtMPfvADTU5OyufzacWKFQqFQvrd736nyclJff3rX5/z3Bl2Mq8MABbZfbdtnPexb545ksRKri86awCOwtI9ADAAj5sDgAF43BwADEBYA4ABnLpmgrAG4Ch01gBgAFaDAIAB4rYzN0klrAE4CjNrADAAM2sAMAAzawAwQIIxCACkPzprADAAq0EAwACMQQDAAIxBAMAAdNYAYAA6awAwQNyOp7qEpCCsATgKj5sDgAF43BwADEBnDQAGYDUIABiA1SAAYAAeNwcAAzCzBgADMLMGAAPQWQOAAVhnDQAGoLMGAAOwGgQADMANRgAwAGMQADAATzACgAHorAHAAE6dWWfYTv1rCAAcJDPVBQAAro6wBgADENYAYADCGgAMQFgDgAEIawAwAGENAAYgrAHAAIQ1ABiAsAYAAxDWAGAAwhoADPC/nYqou4x18BEAAAAASUVORK5CYII=\n", 666 | "text/plain": [ 667 | "" 668 | ] 669 | }, 670 | "metadata": {}, 671 | "output_type": "display_data" 672 | } 673 | ], 674 | "source": [ 675 | "plot_activations(model, X[0], documents[0])" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": null, 681 | "metadata": {}, 682 | "outputs": [], 683 | "source": [] 684 | } 685 | ], 686 | "metadata": { 687 | "kernelspec": { 688 | "display_name": "Python 3", 689 | "language": "python", 690 | "name": "python3" 691 | }, 692 | "language_info": { 693 | "codemirror_mode": { 694 | "name": "ipython", 695 | "version": 3 696 | }, 697 | "file_extension": ".py", 698 | "mimetype": "text/x-python", 699 | "name": "python", 700 | "nbconvert_exporter": "python", 701 | "pygments_lexer": "ipython3", 702 | "version": "3.6.4" 703 | } 704 | }, 705 | "nbformat": 4, 706 | "nbformat_minor": 2 707 | } 708 | -------------------------------------------------------------------------------- /Keras_Elmo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Keras-Elmo.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "metadata": { 31 | "id": "wfWTbo1Sctng", 32 | "colab_type": "code", 33 | "colab": {} 34 | }, 35 | "source": [ 36 | "! pip install nltk" 37 | ], 38 | "execution_count": 0, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "metadata": { 44 | "id": "MbREWBiU8VFw", 45 | "colab_type": "code", 46 | "outputId": "75dad872-eb94-49c6-85c4-2eb79d855ce3", 47 | "colab": { 48 | "base_uri": "https://localhost:8080/", 49 | "height": 51 50 | } 51 | }, 52 | "source": [ 53 | "import tensorflow as tf\n", 54 | "import tensorflow_hub as hub\n", 55 | "import tensorflow.keras.backend as K\n", 56 | "import numpy as np" 57 | ], 58 | "execution_count": 0, 59 | "outputs": [ 60 | { 61 | "output_type": "stream", 62 | "text": [ 63 | "WARNING: Logging before flag parsing goes to stderr.\n", 64 | "W0325 21:47:56.237649 139708550211456 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14\n" 65 | ], 66 | "name": "stderr" 67 | } 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "metadata": { 73 | "id": "P97jiyM49FUz", 74 | "colab_type": "code", 75 | "outputId": "f0e57295-42bf-4473-fac8-a924df13ee32", 76 | "colab": { 77 | "base_uri": "https://localhost:8080/", 78 | "height": 34 79 | } 80 | }, 81 | "source": [ 82 | "tf.__version__, np.__version__" 83 | ], 84 | "execution_count": 0, 85 | "outputs": [ 86 | { 87 | "output_type": "execute_result", 88 | "data": { 89 | "text/plain": [ 90 | "('1.13.1', '1.14.6')" 91 | ] 92 | }, 93 | "metadata": { 94 | "tags": [] 95 | }, 96 | "execution_count": 2 97 | } 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "wRW3a8Sk8mgb", 104 | "colab_type": "code", 105 | "colab": {} 106 | }, 107 | "source": [ 108 | "class ElmoEmbeddingLayer(tf.keras.layers.Layer):\n", 109 | " \"\"\"Taken from: \n", 110 | " https://github.com/strongio/keras-elmo/blob/master/Elmo%20Keras.ipynb\"\"\"\n", 111 | " def __init__(self, **kwargs):\n", 112 | " self.dimensions = 1024\n", 113 | " super(ElmoEmbeddingLayer, self).__init__(**kwargs)\n", 114 | "\n", 115 | " def build(self, input_shape):\n", 116 | " self.elmo = hub.Module(\n", 117 | " 'https://tfhub.dev/google/elmo/2', \n", 118 | " trainable=self.trainable,\n", 119 | " name=\"{}_module\".format(self.name)\n", 120 | " )\n", 121 | " if self.trainable:\n", 122 | " self._trainable_weights.extend(\n", 123 | " tf.trainable_variables(scope=\"^{}_module/.*\".format(self.name))\n", 124 | " )\n", 125 | " # Changed assuming trainable weights might be set using \n", 126 | " super(ElmoEmbeddingLayer, self).build(input_shape)\n", 127 | "\n", 128 | " def call(self, x, mask=None):\n", 129 | " result = self.elmo(\n", 130 | " K.squeeze(K.cast(x, tf.string), axis=1),\n", 131 | " as_dict=True,\n", 132 | " signature='default',\n", 133 | " )['default']\n", 134 | " return result\n", 135 | "\n", 136 | " def compute_mask(self, inputs, mask=None):\n", 137 | " return K.not_equal(inputs, '--PAD--')\n", 138 | "\n", 139 | " def compute_output_shape(self, input_shape):\n", 140 | " return (input_shape[0], self.dimensions)\n", 141 | "\n", 142 | "def create_model(train_elmo=False):\n", 143 | " # Create Sequential model\n", 144 | " model = tf.keras.Sequential([\n", 145 | " # Need to explicitly include input layer \n", 146 | " # to allow keras to accept string input\n", 147 | " # Taken from:\n", 148 | " # https://gist.github.com/colinmorris/9183206284b4fe3179809098e809d009\n", 149 | " tf.keras.layers.InputLayer(dtype='string', input_shape=(1,)),\n", 150 | " ElmoEmbeddingLayer(trainable=train_elmo),\n", 151 | " tf.keras.layers.Dense(1)\n", 152 | " ])\n", 153 | " \n", 154 | " # Needed to initialize elmo variables\n", 155 | " sess = K.get_session()\n", 156 | " init = tf.global_variables_initializer()\n", 157 | " sess.run(init)\n", 158 | " \n", 159 | " # Compile model\n", 160 | " model.compile(\n", 161 | " optimizer=\"adam\", \n", 162 | " loss=\"binary_crossentropy\", \n", 163 | " metrics=[\"accuracy\"]\n", 164 | " )\n", 165 | " return model\n", 166 | "\n", 167 | "\n" 168 | ], 169 | "execution_count": 0, 170 | "outputs": [] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "metadata": { 175 | "id": "37IaKUOP-Qdn", 176 | "colab_type": "code", 177 | "outputId": "22365931-13a8-411d-ddf1-513aeb7bc5d3", 178 | "colab": { 179 | "base_uri": "https://localhost:8080/", 180 | "height": 175 181 | } 182 | }, 183 | "source": [ 184 | "model = create_model(train_elmo=True)" 185 | ], 186 | "execution_count": 0, 187 | "outputs": [ 188 | { 189 | "output_type": "stream", 190 | "text": [ 191 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", 192 | "Instructions for updating:\n", 193 | "Colocations handled automatically by placer.\n" 194 | ], 195 | "name": "stdout" 196 | }, 197 | { 198 | "output_type": "stream", 199 | "text": [ 200 | "W0325 21:47:56.784782 139708550211456 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", 201 | "Instructions for updating:\n", 202 | "Colocations handled automatically by placer.\n" 203 | ], 204 | "name": "stderr" 205 | }, 206 | { 207 | "output_type": "stream", 208 | "text": [ 209 | "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n" 210 | ], 211 | "name": "stdout" 212 | }, 213 | { 214 | "output_type": "stream", 215 | "text": [ 216 | "I0325 21:47:57.561886 139708550211456 saver.py:1483] Saver not created because there are no variables in the graph to restore\n" 217 | ], 218 | "name": "stderr" 219 | } 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "metadata": { 225 | "id": "85pZjJ5e-Tx7", 226 | "colab_type": "code", 227 | "outputId": "d74a6472-29e5-42d8-eeac-1a7e84d4f52d", 228 | "colab": { 229 | "base_uri": "https://localhost:8080/", 230 | "height": 34 231 | } 232 | }, 233 | "source": [ 234 | "X = np.array([\n", 235 | " \"This is good\",\n", 236 | " \"This is bad\"\n", 237 | "]).reshape(2, 1)\n", 238 | "y = np.array([0, 1]).reshape(2, 1)\n", 239 | "X.shape, y.shape" 240 | ], 241 | "execution_count": 0, 242 | "outputs": [ 243 | { 244 | "output_type": "execute_result", 245 | "data": { 246 | "text/plain": [ 247 | "((2, 1), (2, 1))" 248 | ] 249 | }, 250 | "metadata": { 251 | "tags": [] 252 | }, 253 | "execution_count": 5 254 | } 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "metadata": { 260 | "id": "OsX2rPRE-edB", 261 | "colab_type": "code", 262 | "outputId": "e013e6d0-2288-4947-cab7-5969242d4534", 263 | "colab": { 264 | "base_uri": "https://localhost:8080/", 265 | "height": 175 266 | } 267 | }, 268 | "source": [ 269 | "model.fit(X, y)" 270 | ], 271 | "execution_count": 0, 272 | "outputs": [ 273 | { 274 | "output_type": "stream", 275 | "text": [ 276 | "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n", 277 | "Instructions for updating:\n", 278 | "Use tf.cast instead.\n" 279 | ], 280 | "name": "stdout" 281 | }, 282 | { 283 | "output_type": "stream", 284 | "text": [ 285 | "W0325 21:47:59.085000 139708550211456 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n", 286 | "Instructions for updating:\n", 287 | "Use tf.cast instead.\n" 288 | ], 289 | "name": "stderr" 290 | }, 291 | { 292 | "output_type": "stream", 293 | "text": [ 294 | "\r2/2 [==============================] - 2s 790ms/sample - loss: 8.0590 - acc: 0.5000\n" 295 | ], 296 | "name": "stdout" 297 | }, 298 | { 299 | "output_type": "execute_result", 300 | "data": { 301 | "text/plain": [ 302 | "" 303 | ] 304 | }, 305 | "metadata": { 306 | "tags": [] 307 | }, 308 | "execution_count": 6 309 | } 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "metadata": { 315 | "id": "vXO0YWtY-nrP", 316 | "colab_type": "code", 317 | "outputId": "28c24dc2-c5cf-4a21-c633-d710c6dc6706", 318 | "colab": { 319 | "base_uri": "https://localhost:8080/", 320 | "height": 207 321 | } 322 | }, 323 | "source": [ 324 | "model.summary()" 325 | ], 326 | "execution_count": 0, 327 | "outputs": [ 328 | { 329 | "output_type": "stream", 330 | "text": [ 331 | "_________________________________________________________________\n", 332 | "Layer (type) Output Shape Param # \n", 333 | "=================================================================\n", 334 | "elmo_embedding_layer (ElmoEm (None, 1024) 4 \n", 335 | "_________________________________________________________________\n", 336 | "dense (Dense) (None, 1) 1025 \n", 337 | "=================================================================\n", 338 | "Total params: 1,029\n", 339 | "Trainable params: 1,029\n", 340 | "Non-trainable params: 0\n", 341 | "_________________________________________________________________\n" 342 | ], 343 | "name": "stdout" 344 | } 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "metadata": { 350 | "id": "B44AaTUhv_w7", 351 | "colab_type": "code", 352 | "outputId": "bc282e75-9533-4162-e3e9-bbd5c75a6ffd", 353 | "colab": { 354 | "base_uri": "https://localhost:8080/", 355 | "height": 86 356 | } 357 | }, 358 | "source": [ 359 | "model.trainable_weights" 360 | ], 361 | "execution_count": 0, 362 | "outputs": [ 363 | { 364 | "output_type": "execute_result", 365 | "data": { 366 | "text/plain": [ 367 | "[,\n", 368 | " ,\n", 369 | " ,\n", 370 | " ]" 371 | ] 372 | }, 373 | "metadata": { 374 | "tags": [] 375 | }, 376 | "execution_count": 8 377 | } 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "metadata": { 383 | "id": "Gr_AJYCZwZlg", 384 | "colab_type": "code", 385 | "colab": {} 386 | }, 387 | "source": [ 388 | "elmo = model.layers[0].elmo" 389 | ], 390 | "execution_count": 0, 391 | "outputs": [] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "metadata": { 396 | "id": "62QVQ2fjxAQf", 397 | "colab_type": "code", 398 | "outputId": "e3a3b0fd-2518-4aa2-bfea-eed54ea50dad", 399 | "colab": { 400 | "base_uri": "https://localhost:8080/", 401 | "height": 693 402 | } 403 | }, 404 | "source": [ 405 | "elmo.variables" 406 | ], 407 | "execution_count": 0, 408 | "outputs": [ 409 | { 410 | "output_type": "execute_result", 411 | "data": { 412 | "text/plain": [ 413 | "[,\n", 414 | " ,\n", 415 | " ,\n", 416 | " ,\n", 417 | " ,\n", 418 | " ,\n", 419 | " ,\n", 420 | " ,\n", 421 | " ,\n", 422 | " ,\n", 423 | " ,\n", 424 | " ,\n", 425 | " ,\n", 426 | " ,\n", 427 | " ,\n", 428 | " ,\n", 429 | " ,\n", 430 | " ,\n", 431 | " ,\n", 432 | " ,\n", 433 | " ,\n", 434 | " ,\n", 435 | " ,\n", 436 | " ,\n", 437 | " ,\n", 438 | " ,\n", 439 | " ,\n", 440 | " ,\n", 441 | " ,\n", 442 | " ,\n", 443 | " ,\n", 444 | " ,\n", 445 | " ,\n", 446 | " ,\n", 447 | " ,\n", 448 | " ,\n", 449 | " ,\n", 450 | " ,\n", 451 | " ]" 452 | ] 453 | }, 454 | "metadata": { 455 | "tags": [] 456 | }, 457 | "execution_count": 10 458 | } 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "metadata": { 464 | "id": "zZB8f-_pxJ2N", 465 | "colab_type": "code", 466 | "outputId": "ff46524a-1176-4017-d672-80d8605723ec", 467 | "colab": { 468 | "base_uri": "https://localhost:8080/", 469 | "height": 51 470 | } 471 | }, 472 | "source": [ 473 | "model.layers[0].trainable_weights" 474 | ], 475 | "execution_count": 0, 476 | "outputs": [ 477 | { 478 | "output_type": "execute_result", 479 | "data": { 480 | "text/plain": [ 481 | "[,\n", 482 | " ]" 483 | ] 484 | }, 485 | "metadata": { 486 | "tags": [] 487 | }, 488 | "execution_count": 11 489 | } 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "metadata": { 495 | "id": "eZ8YljGYxfqj", 496 | "colab_type": "code", 497 | "outputId": "1172ab69-d225-44cf-f25a-a31e155c3f71", 498 | "colab": { 499 | "base_uri": "https://localhost:8080/", 500 | "height": 34 501 | } 502 | }, 503 | "source": [ 504 | "model.predict([[\"This is so cool\"]])" 505 | ], 506 | "execution_count": 0, 507 | "outputs": [ 508 | { 509 | "output_type": "execute_result", 510 | "data": { 511 | "text/plain": [ 512 | "array([[-0.363752]], dtype=float32)" 513 | ] 514 | }, 515 | "metadata": { 516 | "tags": [] 517 | }, 518 | "execution_count": 12 519 | } 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "metadata": { 525 | "id": "0IvIcjVi2l4k", 526 | "colab_type": "code", 527 | "colab": {} 528 | }, 529 | "source": [ 530 | "" 531 | ], 532 | "execution_count": 0, 533 | "outputs": [] 534 | } 535 | ] 536 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /LazyValues.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "LazyValues.ipynb", 7 | "provenance": [], 8 | "authorship_tag": "ABX9TyMlA+yFrEudIIn1Vg8z+ehW", 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "source": [ 33 | "# Lazy Loading Variables in Python\n", 34 | "\n", 35 | "How to get variables which are lazy loaded when first used" 36 | ], 37 | "metadata": { 38 | "id": "gWt4otPFwHwA" 39 | } 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": { 45 | "id": "X0W2LDCFvWIr" 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import time" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "source": [ 55 | "class LazyValues(object):\n", 56 | " def __init__(self, **value_fn_dict):\n", 57 | " self.value_fn_dict = value_fn_dict\n", 58 | " \n", 59 | " def __getattr__(self, name):\n", 60 | " if name not in self.value_fn_dict:\n", 61 | " raise AttributeError(f\"{name} not in {self.value_fn_dict.keys()}\")\n", 62 | " try:\n", 63 | " self.__getattribute__(name)\n", 64 | " except AttributeError:\n", 65 | " print(f\"Lazy loading value: {name}.\")\n", 66 | " value = self.value_fn_dict[name]()\n", 67 | " setattr(self, name, value)\n", 68 | " return self.__getattribute__(name)" 69 | ], 70 | "metadata": { 71 | "id": "Q4-yVWVNvZ5r" 72 | }, 73 | "execution_count": 2, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "source": [ 79 | "def slow_function(v, sleep_time=2):\n", 80 | " print(f\"In slow_function: Sleeping for: {sleep_time} secs before returning {v}.\")\n", 81 | " time.sleep(sleep_time)\n", 82 | " return v" 83 | ], 84 | "metadata": { 85 | "id": "kAdB_V40vbT0" 86 | }, 87 | "execution_count": 3, 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "source": [ 93 | "lazy_values = LazyValues(\n", 94 | " some_slow_value = lambda: slow_function(5, sleep_time=2),\n", 95 | " extremely_slow_value = lambda: slow_function(10, sleep_time=10)\n", 96 | ")" 97 | ], 98 | "metadata": { 99 | "id": "cLgpt7nKvdWi" 100 | }, 101 | "execution_count": 4, 102 | "outputs": [] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "source": [ 107 | "for i in range(10):\n", 108 | " print(f\"lazy_values.some_slow_value={lazy_values.some_slow_value}\")\n", 109 | " print(f\"lazy_values.extremely_slow_value={lazy_values.extremely_slow_value}\")" 110 | ], 111 | "metadata": { 112 | "colab": { 113 | "base_uri": "https://localhost:8080/" 114 | }, 115 | "id": "Q_n10qn7vffp", 116 | "outputId": "3a372426-e006-4b4a-a7ee-5d9abfaebf23" 117 | }, 118 | "execution_count": 5, 119 | "outputs": [ 120 | { 121 | "output_type": "stream", 122 | "name": "stdout", 123 | "text": [ 124 | "Lazy loading value: some_slow_value.\n", 125 | "In slow_function: Sleeping for: 2 secs before returning 5.\n", 126 | "lazy_values.some_slow_value=5\n", 127 | "Lazy loading value: extremely_slow_value.\n", 128 | "In slow_function: Sleeping for: 10 secs before returning 10.\n", 129 | "lazy_values.extremely_slow_value=10\n", 130 | "lazy_values.some_slow_value=5\n", 131 | "lazy_values.extremely_slow_value=10\n", 132 | "lazy_values.some_slow_value=5\n", 133 | "lazy_values.extremely_slow_value=10\n", 134 | "lazy_values.some_slow_value=5\n", 135 | "lazy_values.extremely_slow_value=10\n", 136 | "lazy_values.some_slow_value=5\n", 137 | "lazy_values.extremely_slow_value=10\n", 138 | "lazy_values.some_slow_value=5\n", 139 | "lazy_values.extremely_slow_value=10\n", 140 | "lazy_values.some_slow_value=5\n", 141 | "lazy_values.extremely_slow_value=10\n", 142 | "lazy_values.some_slow_value=5\n", 143 | "lazy_values.extremely_slow_value=10\n", 144 | "lazy_values.some_slow_value=5\n", 145 | "lazy_values.extremely_slow_value=10\n", 146 | "lazy_values.some_slow_value=5\n", 147 | "lazy_values.extremely_slow_value=10\n" 148 | ] 149 | } 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "source": [ 155 | "" 156 | ], 157 | "metadata": { 158 | "id": "0ygvGVp9vtNR" 159 | }, 160 | "execution_count": 5, 161 | "outputs": [] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "source": [ 166 | "" 167 | ], 168 | "metadata": { 169 | "id": "IFT4OU4Tv6Sl" 170 | }, 171 | "execution_count": 5, 172 | "outputs": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "source": [ 177 | "" 178 | ], 179 | "metadata": { 180 | "id": "rVf4LA9Xv605" 181 | }, 182 | "execution_count": 5, 183 | "outputs": [] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "source": [ 188 | "" 189 | ], 190 | "metadata": { 191 | "id": "gJMf9e81wBZn" 192 | }, 193 | "execution_count": null, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "" 200 | ], 201 | "metadata": { 202 | "id": "2RGOI9IxwBXI" 203 | }, 204 | "execution_count": null, 205 | "outputs": [] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "source": [ 210 | "" 211 | ], 212 | "metadata": { 213 | "id": "cirMdxbxwBUT" 214 | }, 215 | "execution_count": null, 216 | "outputs": [] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "source": [ 221 | "" 222 | ], 223 | "metadata": { 224 | "id": "zGS6H2jowBSE" 225 | }, 226 | "execution_count": null, 227 | "outputs": [] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "source": [ 232 | "" 233 | ], 234 | "metadata": { 235 | "id": "Tn7wl47ywBPO" 236 | }, 237 | "execution_count": null, 238 | "outputs": [] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "source": [ 243 | "" 244 | ], 245 | "metadata": { 246 | "id": "4rux0vdOwBMw" 247 | }, 248 | "execution_count": null, 249 | "outputs": [] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "source": [ 254 | "" 255 | ], 256 | "metadata": { 257 | "id": "qXToL9bVwBKU" 258 | }, 259 | "execution_count": null, 260 | "outputs": [] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "source": [ 265 | "" 266 | ], 267 | "metadata": { 268 | "id": "Qz5uupfdwBHg" 269 | }, 270 | "execution_count": null, 271 | "outputs": [] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "source": [ 276 | "" 277 | ], 278 | "metadata": { 279 | "id": "2i13liCTwA_9" 280 | }, 281 | "execution_count": null, 282 | "outputs": [] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "source": [ 287 | "" 288 | ], 289 | "metadata": { 290 | "id": "HeU2nJKFwA9e" 291 | }, 292 | "execution_count": null, 293 | "outputs": [] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "source": [ 298 | "" 299 | ], 300 | "metadata": { 301 | "id": "oniBfjo5wA7C" 302 | }, 303 | "execution_count": null, 304 | "outputs": [] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "source": [ 309 | "" 310 | ], 311 | "metadata": { 312 | "id": "d2giXlADwA4N" 313 | }, 314 | "execution_count": null, 315 | "outputs": [] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "source": [ 320 | "" 321 | ], 322 | "metadata": { 323 | "id": "kSZL5thLwA1u" 324 | }, 325 | "execution_count": null, 326 | "outputs": [] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "source": [ 331 | "" 332 | ], 333 | "metadata": { 334 | "id": "0vRPWAG5wAzH" 335 | }, 336 | "execution_count": null, 337 | "outputs": [] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "source": [ 342 | "" 343 | ], 344 | "metadata": { 345 | "id": "yB9Q6aQMwAwH" 346 | }, 347 | "execution_count": null, 348 | "outputs": [] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "source": [ 353 | "" 354 | ], 355 | "metadata": { 356 | "id": "7NuELLmvwApz" 357 | }, 358 | "execution_count": null, 359 | "outputs": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "source": [ 364 | "" 365 | ], 366 | "metadata": { 367 | "id": "Q51AntvswAdH" 368 | }, 369 | "execution_count": null, 370 | "outputs": [] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "source": [ 375 | "" 376 | ], 377 | "metadata": { 378 | "id": "RScVCMtCwAG_" 379 | }, 380 | "execution_count": null, 381 | "outputs": [] 382 | } 383 | ] 384 | } -------------------------------------------------------------------------------- /Naive Sudoku Solver.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Taken from https://www.hackerrank.com/contests/projecteuler/challenges/euler096/" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 32, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# Enter your code here. Read input from STDIN. Print output to STDOUT\n", 19 | "class SudokuSolver(object):\n", 20 | " \n", 21 | " def __init__(self, N=3, puzzle=None):\n", 22 | " self.N = N\n", 23 | " self.SIZE = self.N*self.N\n", 24 | " self.puzzle = [[set([1])]*(self.SIZE)]*(self.SIZE)\n", 25 | " if puzzle is None:\n", 26 | " for i in range(self.SIZE):\n", 27 | " self.puzzle[i] = [int(k) for k in raw_input()]\n", 28 | " else:\n", 29 | " self.puzzle = [[int(k) for k in line] for line in puzzle.splitlines()]\n", 30 | "\n", 31 | " self._empty_cells = 0 \n", 32 | " self._solutions = [[set([]) for i in range(self.SIZE)] for k in range(self.SIZE)]\n", 33 | " #print self.puzzle\n", 34 | " #print self._solutions\n", 35 | " for i in range(self.SIZE):\n", 36 | " for j in range(self.SIZE):\n", 37 | " if self.puzzle[i][j] < 1:\n", 38 | " self._solutions[i][j] = set(range(1,self.SIZE+1))\n", 39 | " self._empty_cells += 1\n", 40 | " #print \"puzzle[%s][%s] = %s\\tsolutions[%s][%s] = %s\" % (i,j, self.puzzle[i][j], i, j, self._solutions[i][j])\n", 41 | " else:\n", 42 | " self._solutions[i][j] = set([])\n", 43 | " #print self._solutions\n", 44 | "\n", 45 | "\n", 46 | " def get_remove_sets(self, i, j):\n", 47 | " remove_items = []\n", 48 | " remove_items.extend(self.puzzle[i]) # Row items\n", 49 | " remove_items.extend([self.puzzle[k][j] for k in range(self.SIZE)]) # Col items\n", 50 | " x, y = i/self.N, j/self.N\n", 51 | " remove_items.extend([self.puzzle[x*self.N + (k/3)][y*self.N + (k%3)] \\\n", 52 | " for k in range(self.SIZE)]) # Square items\n", 53 | " remove_set = set(remove_items) - set([0])\n", 54 | " #print \"Remove_Set[%s][%s] = %s\" % (i, j, remove_set)\n", 55 | " return remove_set\n", 56 | " \n", 57 | " def solve(self):\n", 58 | " run_times = self.SIZE*self.SIZE\n", 59 | " print \"Original with Empty Cells = %s\" % self._empty_cells\n", 60 | " self.showSudoku()\n", 61 | " changed_cell = True\n", 62 | " #print \"Original Solution Table\"\n", 63 | " #print self._solutions\n", 64 | " #self.showSudoku(arr=self._solutions, sep=\"\\t\")\n", 65 | " while(self._empty_cells > 0 and changed_cell):\n", 66 | " # Row wise selection of solutions\n", 67 | " #print \"Filling row wise\"\n", 68 | " changed_cell = False\n", 69 | " for i in range(self.SIZE):\n", 70 | " for j in range(self.SIZE):\n", 71 | " if self.puzzle[i][j] != 0:\n", 72 | " continue\n", 73 | " remove_set = self.get_remove_sets(i, j)\n", 74 | " #print \"Before solutions[%s][%s] = %s, Remaining: %s\" % (i, j, self._solutions[i][j], self._empty_cells)\n", 75 | " self._solutions[i][j] -= remove_set\n", 76 | " #print \"After solutions[%s][%s] = %s, Remaining: %s\" % (i, j, self._solutions[i][j], self._empty_cells)\n", 77 | " if len(self._solutions[i][j]) == 1:\n", 78 | " self.puzzle[i][j] = self._solutions[i][j].pop()\n", 79 | " self._empty_cells -= 1\n", 80 | " changed_cell = True\n", 81 | " #print \"Adding puzzle[%s][%s] = %s, Remaining: %s\" % (i, j, puzzle[i][j], empty_cells)\n", 82 | " continue\n", 83 | " run_times -= 1\n", 84 | " print \"Solved with Empty Cells = %s\" % self._empty_cells\n", 85 | " self.showSudoku()\n", 86 | " \n", 87 | " def solveRecurse(self, puzzle=None):\n", 88 | " if puzzle is None:\n", 89 | " puzzle = self.puzzle\n", 90 | " found = False\n", 91 | " for i in range(self.SIZE):\n", 92 | " for j in range(self.SIZE):\n", 93 | " if puzzle[i][j] == 0:\n", 94 | " found = True\n", 95 | " break\n", 96 | " if found:\n", 97 | " break\n", 98 | " found_pos = (i, j)\n", 99 | " if not found:\n", 100 | " self.showSudoku(arr=puzzle)\n", 101 | " \n", 102 | " remove_set = self.get_remove_sets(i, j)\n", 103 | " #print found_pos, remove_set\n", 104 | " for m in range(1, self.SIZE+1):\n", 105 | " if m not in remove_set:\n", 106 | " #print \"Puttin %s at %s\" % (m, found_pos)\n", 107 | " puzzle[found_pos[0]][found_pos[1]] = m\n", 108 | " self.solveRecurse(puzzle=puzzle)\n", 109 | " \n", 110 | " \n", 111 | " def showSudoku(self, arr = None, sep=\"\"):\n", 112 | " if arr is None:\n", 113 | " arr = self.puzzle\n", 114 | " # Print final matrix\n", 115 | " for i in range(self.SIZE):\n", 116 | " print sep.join([str(k) for k in arr[i]])\n", 117 | "\n" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 33, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "583927687\n", 132 | "987345001\n", 133 | "001806400\n", 134 | "008102900\n", 135 | "700000008\n", 136 | "006708200\n", 137 | "002609500\n", 138 | "800203009\n", 139 | "005010300\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "s = SudokuSolver(puzzle=\"003020600\\n900305001\\n001806400\\n008102900\\n700000008\\n006708200\\n002609500\\n800203009\\n005010300\")\n", 145 | "s.solveRecurse()\n", 146 | "s.showSudoku()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 10, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "name": "stdout", 158 | "output_type": "stream", 159 | "text": [ 160 | "Original with Empty Cells = 51\n", 161 | "200080300\n", 162 | "060070084\n", 163 | "030500209\n", 164 | "000105408\n", 165 | "000000000\n", 166 | "402706000\n", 167 | "301007040\n", 168 | "720040060\n", 169 | "004010003\n", 170 | "Solved with Empty Cells = 50\n", 171 | "200080300\n", 172 | "060070084\n", 173 | "030560209\n", 174 | "000105408\n", 175 | "000000000\n", 176 | "402706000\n", 177 | "301007040\n", 178 | "720040060\n", 179 | "004010003\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "p = \"\"\"200080300\n", 185 | "060070084\n", 186 | "030500209\n", 187 | "000105408\n", 188 | "000000000\n", 189 | "402706000\n", 190 | "301007040\n", 191 | "720040060\n", 192 | "004010003\"\"\"\n", 193 | "s = SudokuSolver(puzzle=p)\n", 194 | "s.solve()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 13, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "Done\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "print \"Done\"" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 29, 219 | "metadata": { 220 | "collapsed": true 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "# Another implementation taken from: http://pythontips.com/2013/09/01/sudoku-solver-in-python/\n", 225 | "import sys\n", 226 | "\n", 227 | "def same_row(i,j): return (i/9 == j/9)\n", 228 | "def same_col(i,j): return (i-j) % 9 == 0\n", 229 | "def same_block(i,j): return (i/27 == j/27 and i%9/3 == j%9/3)\n", 230 | "\n", 231 | "def r(a):\n", 232 | " i = a.find('0')\n", 233 | " if i == -1:\n", 234 | " print a\n", 235 | " print \"\\n\".join([a[k*9:k*9+9] for k in range(9)])\n", 236 | "\n", 237 | " excluded_numbers = set()\n", 238 | " for j in range(81):\n", 239 | " if same_row(i,j) or same_col(i,j) or same_block(i,j):\n", 240 | " excluded_numbers.add(a[j])\n", 241 | " #print i, i/9, i%9, excluded_numbers\n", 242 | " \n", 243 | " for m in '123456789':\n", 244 | " if m not in excluded_numbers:\n", 245 | " #print \"Puttin %s at %s, %s, %s\" % (m, i, i/9, i%9)\n", 246 | " r(a[:i]+m+a[i+1:])" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 30, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [ 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "245981376169273584837564219976125438513498627482736951391657842728349165654812793\n", 261 | "245981376\n", 262 | "169273584\n", 263 | "837564219\n", 264 | "976125438\n", 265 | "513498627\n", 266 | "482736951\n", 267 | "391657842\n", 268 | "728349165\n", 269 | "654812793\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "r(\"200080300060070084030500209000105408000000000402706000301007040720040060004010003\")" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 31, 280 | "metadata": { 281 | "collapsed": false 282 | }, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "483921657967345821251876493548132976729564138136798245372689514814253769695417382\n", 289 | "483921657\n", 290 | "967345821\n", 291 | "251876493\n", 292 | "548132976\n", 293 | "729564138\n", 294 | "136798245\n", 295 | "372689514\n", 296 | "814253769\n", 297 | "695417382\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "r(\"003020600900305001001806400008102900700000008006708200002609500800203009005010300\")" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 2", 318 | "language": "python", 319 | "name": "python2" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 2 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython2", 331 | "version": "2.7.9" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 0 336 | } 337 | -------------------------------------------------------------------------------- /Plotting Decision Boundaries.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "\n", 13 | "from sklearn.datasets import load_iris\n", 14 | "from sklearn.cross_validation import train_test_split\n", 15 | "from sklearn.preprocessing import OneHotEncoder\n", 16 | "\n", 17 | "import matplotlib.pyplot as plt\n", 18 | "\n", 19 | "iris = load_iris()" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "from sklearn.linear_model import LogisticRegression" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 18, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "(75L, 4L) (75L,) (25L, 4L) (25L,)\n" 45 | ] 46 | }, 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "1.0" 51 | ] 52 | }, 53 | "execution_count": 18, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "X_train, X_test, y_train, y_test = train_test_split(iris.data[(iris.target == 0) | (iris.target == 1)],\n", 60 | " iris.target[(iris.target == 0) | (iris.target == 1)])\n", 61 | "print X_train.shape, y_train.shape, X_test.shape, y_test.shape\n", 62 | "logit = LogisticRegression()\n", 63 | "logit.fit(X_train, y_train)\n", 64 | "logit.score(X_test, y_test)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 13, 70 | "metadata": { 71 | "collapsed": false 72 | }, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "array([[ 5.1, 3.5, 1.4, 0.2],\n", 78 | " [ 4.9, 3. , 1.4, 0.2],\n", 79 | " [ 4.7, 3.2, 1.3, 0.2],\n", 80 | " [ 4.6, 3.1, 1.5, 0.2],\n", 81 | " [ 5. , 3.6, 1.4, 0.2],\n", 82 | " [ 5.4, 3.9, 1.7, 0.4],\n", 83 | " [ 4.6, 3.4, 1.4, 0.3],\n", 84 | " [ 5. , 3.4, 1.5, 0.2],\n", 85 | " [ 4.4, 2.9, 1.4, 0.2],\n", 86 | " [ 4.9, 3.1, 1.5, 0.1],\n", 87 | " [ 5.4, 3.7, 1.5, 0.2],\n", 88 | " [ 4.8, 3.4, 1.6, 0.2],\n", 89 | " [ 4.8, 3. , 1.4, 0.1],\n", 90 | " [ 4.3, 3. , 1.1, 0.1],\n", 91 | " [ 5.8, 4. , 1.2, 0.2],\n", 92 | " [ 5.7, 4.4, 1.5, 0.4],\n", 93 | " [ 5.4, 3.9, 1.3, 0.4],\n", 94 | " [ 5.1, 3.5, 1.4, 0.3],\n", 95 | " [ 5.7, 3.8, 1.7, 0.3],\n", 96 | " [ 5.1, 3.8, 1.5, 0.3],\n", 97 | " [ 5.4, 3.4, 1.7, 0.2],\n", 98 | " [ 5.1, 3.7, 1.5, 0.4],\n", 99 | " [ 4.6, 3.6, 1. , 0.2],\n", 100 | " [ 5.1, 3.3, 1.7, 0.5],\n", 101 | " [ 4.8, 3.4, 1.9, 0.2],\n", 102 | " [ 5. , 3. , 1.6, 0.2],\n", 103 | " [ 5. , 3.4, 1.6, 0.4],\n", 104 | " [ 5.2, 3.5, 1.5, 0.2],\n", 105 | " [ 5.2, 3.4, 1.4, 0.2],\n", 106 | " [ 4.7, 3.2, 1.6, 0.2],\n", 107 | " [ 4.8, 3.1, 1.6, 0.2],\n", 108 | " [ 5.4, 3.4, 1.5, 0.4],\n", 109 | " [ 5.2, 4.1, 1.5, 0.1],\n", 110 | " [ 5.5, 4.2, 1.4, 0.2],\n", 111 | " [ 4.9, 3.1, 1.5, 0.1],\n", 112 | " [ 5. , 3.2, 1.2, 0.2],\n", 113 | " [ 5.5, 3.5, 1.3, 0.2],\n", 114 | " [ 4.9, 3.1, 1.5, 0.1],\n", 115 | " [ 4.4, 3. , 1.3, 0.2],\n", 116 | " [ 5.1, 3.4, 1.5, 0.2],\n", 117 | " [ 5. , 3.5, 1.3, 0.3],\n", 118 | " [ 4.5, 2.3, 1.3, 0.3],\n", 119 | " [ 4.4, 3.2, 1.3, 0.2],\n", 120 | " [ 5. , 3.5, 1.6, 0.6],\n", 121 | " [ 5.1, 3.8, 1.9, 0.4],\n", 122 | " [ 4.8, 3. , 1.4, 0.3],\n", 123 | " [ 5.1, 3.8, 1.6, 0.2],\n", 124 | " [ 4.6, 3.2, 1.4, 0.2],\n", 125 | " [ 5.3, 3.7, 1.5, 0.2],\n", 126 | " [ 5. , 3.3, 1.4, 0.2],\n", 127 | " [ 7. , 3.2, 4.7, 1.4],\n", 128 | " [ 6.4, 3.2, 4.5, 1.5],\n", 129 | " [ 6.9, 3.1, 4.9, 1.5],\n", 130 | " [ 5.5, 2.3, 4. , 1.3],\n", 131 | " [ 6.5, 2.8, 4.6, 1.5],\n", 132 | " [ 5.7, 2.8, 4.5, 1.3],\n", 133 | " [ 6.3, 3.3, 4.7, 1.6],\n", 134 | " [ 4.9, 2.4, 3.3, 1. ],\n", 135 | " [ 6.6, 2.9, 4.6, 1.3],\n", 136 | " [ 5.2, 2.7, 3.9, 1.4],\n", 137 | " [ 5. , 2. , 3.5, 1. ],\n", 138 | " [ 5.9, 3. , 4.2, 1.5],\n", 139 | " [ 6. , 2.2, 4. , 1. ],\n", 140 | " [ 6.1, 2.9, 4.7, 1.4],\n", 141 | " [ 5.6, 2.9, 3.6, 1.3],\n", 142 | " [ 6.7, 3.1, 4.4, 1.4],\n", 143 | " [ 5.6, 3. , 4.5, 1.5],\n", 144 | " [ 5.8, 2.7, 4.1, 1. ],\n", 145 | " [ 6.2, 2.2, 4.5, 1.5],\n", 146 | " [ 5.6, 2.5, 3.9, 1.1],\n", 147 | " [ 5.9, 3.2, 4.8, 1.8],\n", 148 | " [ 6.1, 2.8, 4. , 1.3],\n", 149 | " [ 6.3, 2.5, 4.9, 1.5],\n", 150 | " [ 6.1, 2.8, 4.7, 1.2],\n", 151 | " [ 6.4, 2.9, 4.3, 1.3],\n", 152 | " [ 6.6, 3. , 4.4, 1.4],\n", 153 | " [ 6.8, 2.8, 4.8, 1.4],\n", 154 | " [ 6.7, 3. , 5. , 1.7],\n", 155 | " [ 6. , 2.9, 4.5, 1.5],\n", 156 | " [ 5.7, 2.6, 3.5, 1. ],\n", 157 | " [ 5.5, 2.4, 3.8, 1.1],\n", 158 | " [ 5.5, 2.4, 3.7, 1. ],\n", 159 | " [ 5.8, 2.7, 3.9, 1.2],\n", 160 | " [ 6. , 2.7, 5.1, 1.6],\n", 161 | " [ 5.4, 3. , 4.5, 1.5],\n", 162 | " [ 6. , 3.4, 4.5, 1.6],\n", 163 | " [ 6.7, 3.1, 4.7, 1.5],\n", 164 | " [ 6.3, 2.3, 4.4, 1.3],\n", 165 | " [ 5.6, 3. , 4.1, 1.3],\n", 166 | " [ 5.5, 2.5, 4. , 1.3],\n", 167 | " [ 5.5, 2.6, 4.4, 1.2],\n", 168 | " [ 6.1, 3. , 4.6, 1.4],\n", 169 | " [ 5.8, 2.6, 4. , 1.2],\n", 170 | " [ 5. , 2.3, 3.3, 1. ],\n", 171 | " [ 5.6, 2.7, 4.2, 1.3],\n", 172 | " [ 5.7, 3. , 4.2, 1.2],\n", 173 | " [ 5.7, 2.9, 4.2, 1.3],\n", 174 | " [ 6.2, 2.9, 4.3, 1.3],\n", 175 | " [ 5.1, 2.5, 3. , 1.1],\n", 176 | " [ 5.7, 2.8, 4.1, 1.3]])" 177 | ] 178 | }, 179 | "execution_count": 13, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": true 191 | }, 192 | "outputs": [], 193 | "source": [] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 2", 199 | "language": "python", 200 | "name": "python2" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 2 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython2", 212 | "version": "2.7.9" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 0 217 | } 218 | -------------------------------------------------------------------------------- /Programming assignments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Questions taken from:\n", 8 | "http://mycsinterviewsexperiences.blogspot.com/2012/09/interviews-with-microsoft-bing-and.html\n", 9 | "\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "graph = {'A': set(['B', 'C']),\n", 21 | " 'B': set(['A', 'D', 'E']),\n", 22 | " 'C': set(['A', 'F']),\n", 23 | " 'D': set(['B']),\n", 24 | " 'E': set(['B', 'F']),\n", 25 | " 'F': set(['C', 'E'])}" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "collapsed": false, 33 | "scrolled": false 34 | }, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "Outside: A -> set([]) []\n", 41 | "Outside: B -> set(['A']) ['C']\n", 42 | "Outside: D -> set(['A', 'B']) ['C', 'E']\n", 43 | "Outside: E -> set(['A', 'B', 'D']) ['C']\n", 44 | "Outside: F -> set(['A', 'B', 'E', 'D']) ['C']\n", 45 | "Outside: C -> set(['A', 'B', 'E', 'D', 'F']) ['C']\n", 46 | "Outside: C -> set(['A', 'C', 'B', 'E', 'D', 'F']) []\n" 47 | ] 48 | }, 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "{'A', 'B', 'C', 'D', 'E', 'F'}" 53 | ] 54 | }, 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "def dfs(graph, start):\n", 62 | " visited, stack = (set(), [start])\n", 63 | " while stack:\n", 64 | " curr = stack.pop()\n", 65 | " print \"Outside: \", curr, \"->\", visited, stack\n", 66 | " if curr not in visited:\n", 67 | " visited.add(curr)\n", 68 | " stack.extend(graph[curr] - visited)\n", 69 | " return visited\n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | "dfs(graph, \"A\") " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "['A', 'B', 'E', 'F']\n", 88 | "['A', 'C', 'F']\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "def dfs_paths(graph, start, goal):\n", 94 | " stack = [(start, [start])]\n", 95 | " while stack:\n", 96 | " curr, path = stack.pop()\n", 97 | " for next in graph[curr] - set(path):\n", 98 | " if next == goal:\n", 99 | " print path + [next]\n", 100 | " else:\n", 101 | " stack.append((next, path + [next]))\n", 102 | " \n", 103 | "dfs_paths(graph, \"A\", \"F\") " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 4, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Visited: set([]), Queue: ['A']\n", 118 | "Outside: A\n", 119 | "Visited: set(['A', 'C', 'B']), Queue: ['F', 'E', 'D']\n", 120 | "Outside: F\n", 121 | "Visited: set(['A', 'C', 'B', 'E', 'F']), Queue: ['E', 'D']\n", 122 | "Outside: E\n", 123 | "Visited: set(['A', 'C', 'B', 'E', 'F']), Queue: ['D']\n", 124 | "Outside: D\n" 125 | ] 126 | }, 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "{'A', 'B', 'C', 'D', 'E', 'F'}" 131 | ] 132 | }, 133 | "execution_count": 4, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "def bfs(graph, start):\n", 140 | " visited, queue = (set(), [start])\n", 141 | " while queue:\n", 142 | " print \"Visited: %s, Queue: %s\" % (visited, queue)\n", 143 | " curr = queue.pop(0)\n", 144 | " print \"Outside: \", curr\n", 145 | " if curr not in visited:\n", 146 | " visited.add(curr)\n", 147 | " for k in graph[curr] - visited:\n", 148 | " queue.extend(graph[k] - visited)\n", 149 | " visited = visited.union(graph[curr])\n", 150 | " #print \"Curr: %s, Edges: %s\" % (curr, graph[curr])\n", 151 | " return visited\n", 152 | "\n", 153 | "bfs(graph, \"A\")" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 5, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "Visited: set([]), Queue: ['A']\n", 168 | "Outside: A\n", 169 | "Visited: set(['A']), Queue: ['C', 'B']\n", 170 | "Outside: C\n", 171 | "Visited: set(['A', 'C']), Queue: ['B', 'F']\n", 172 | "Outside: B\n", 173 | "Visited: set(['A', 'C', 'B']), Queue: ['F', 'E', 'D']\n", 174 | "Outside: F\n", 175 | "Visited: set(['A', 'C', 'B', 'F']), Queue: ['E', 'D', 'E']\n", 176 | "Outside: E\n", 177 | "Visited: set(['A', 'C', 'B', 'E', 'F']), Queue: ['D', 'E']\n", 178 | "Outside: D\n", 179 | "Visited: set(['A', 'C', 'B', 'E', 'D', 'F']), Queue: ['E']\n", 180 | "Outside: E\n" 181 | ] 182 | }, 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "{'A', 'B', 'C', 'D', 'E', 'F'}" 187 | ] 188 | }, 189 | "execution_count": 5, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "def bfs(graph, start):\n", 196 | " visited, queue = (set(), [start])\n", 197 | " while queue:\n", 198 | " print \"Visited: %s, Queue: %s\" % (visited, queue)\n", 199 | " curr = queue.pop(0)\n", 200 | " print \"Outside: \", curr\n", 201 | " if curr not in visited:\n", 202 | " visited.add(curr)\n", 203 | " queue.extend(graph[curr] - visited)\n", 204 | " return visited\n", 205 | "\n", 206 | "bfs(graph, \"A\")" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 6, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "['A', 'C', 'F']\n", 221 | "['A', 'B', 'E', 'F']\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "def bfs_paths(graph, start, goal):\n", 227 | " queue = [(start, [start])]\n", 228 | " while queue:\n", 229 | " curr, path = queue.pop(0)\n", 230 | " for next in graph[curr] - set(path):\n", 231 | " if next == goal:\n", 232 | " print path + [next]\n", 233 | " else:\n", 234 | " queue.append((next, path + [next]))\n", 235 | "\n", 236 | "bfs_paths(graph, \"A\", \"F\")" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 7, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [ 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "A A None\n", 251 | "A C ['A', 'C']\n", 252 | "A B ['A', 'B']\n", 253 | "A E ['A', 'B', 'E']\n", 254 | "A D ['A', 'B', 'D']\n", 255 | "A F ['A', 'C', 'F']\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "def shortest_path(graph, start, goal):\n", 261 | " queue = [(start, [start])]\n", 262 | " while queue:\n", 263 | " curr, path = queue.pop(0)\n", 264 | " for next in graph[curr] - set(path):\n", 265 | " if next == goal:\n", 266 | " return path + [next]\n", 267 | " else:\n", 268 | " queue.append((next, path + [next]))\n", 269 | " return None\n", 270 | "for k in graph.keys():\n", 271 | " print \"A\", k, shortest_path(graph, \"A\", k)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "## Q1. Given a binary tree, return true if it is a BST. Do it in a non-recursive way.\n", 279 | "Do DFS and at each step check the condition for BST. If fail return False. If passes all conditions return True. " 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 8, 285 | "metadata": { 286 | "collapsed": false 287 | }, 288 | "outputs": [], 289 | "source": [ 290 | "def append_bst(graph, root, item):\n", 291 | " if root is None:\n", 292 | " graph[item] = [None, None]\n", 293 | " return graph\n", 294 | " curr = root\n", 295 | " while (item < curr) or (item > curr):\n", 296 | " if item < curr:\n", 297 | " if graph[curr][0] is not None:\n", 298 | " curr = graph[curr][0]\n", 299 | " else:\n", 300 | " graph[curr][0] = item\n", 301 | " graph[item] = [None, None]\n", 302 | " elif item > curr:\n", 303 | " if graph[curr][1] is not None:\n", 304 | " curr = graph[curr][1]\n", 305 | " else:\n", 306 | " graph[curr][1] = item\n", 307 | " graph[item] = [None, None]\n", 308 | " return graph\n", 309 | "\n", 310 | "def gen_bst(arr):\n", 311 | " graph = append_bst({}, None, arr[0])\n", 312 | " root = arr[0]\n", 313 | " for item in arr[1:]:\n", 314 | " graph = append_bst(graph, root, item)\n", 315 | " #print item, graph\n", 316 | " return graph, root" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 9, 322 | "metadata": { 323 | "collapsed": false 324 | }, 325 | "outputs": [ 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "Final graph:\n", 331 | "({0: [None, 1], 1: [None, 2], 2: [None, 3], 3: [None, 4], 4: [None, 5], 5: [None, 6], 6: [None, 7], 7: [None, 8], 8: [None, 9], 9: [None, None]}, 0)\n" 332 | ] 333 | } 334 | ], 335 | "source": [ 336 | "print \"Final graph:\\n\", gen_bst(range(10))" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 10, 342 | "metadata": { 343 | "collapsed": false 344 | }, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "Final graph:\n", 351 | "({1: [None, None], 2: [1, None], 3: [2, None], 4: [3, 5], 5: [None, 10], 7: [None, None], 10: [7, None]}, 4)\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "print \"Final graph:\\n\", gen_bst([4,5,3,2,10,1,7])" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 11, 362 | "metadata": { 363 | "collapsed": true 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "def print_graph(graph, root):\n", 368 | " visited, queue = (set([None]), [root])\n", 369 | " level = 0\n", 370 | " while queue:\n", 371 | " curr = queue.pop(0)\n", 372 | " print curr \n", 373 | " if curr not in visited:\n", 374 | " visited.add(curr)\n", 375 | " queue.extend(set(graph[curr]) - visited)\n", 376 | " " 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 12, 382 | "metadata": { 383 | "collapsed": false 384 | }, 385 | "outputs": [ 386 | { 387 | "name": "stdout", 388 | "output_type": "stream", 389 | "text": [ 390 | "4\n", 391 | "3\n", 392 | "5\n", 393 | "2\n", 394 | "10\n", 395 | "1\n", 396 | "7\n" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "print_graph(*gen_bst([4,5,3,2,10,1,7]))" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 13, 407 | "metadata": { 408 | "collapsed": false 409 | }, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/plain": [ 414 | "1" 415 | ] 416 | }, 417 | "execution_count": 13, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "min([1, 2, 3])" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 14, 429 | "metadata": { 430 | "collapsed": true 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "def is_bst(graph, root):\n", 435 | " if root is None:\n", 436 | " return True\n", 437 | " visited, stack = (set([None]), [root])\n", 438 | " while stack:\n", 439 | " curr = stack.pop()\n", 440 | " left, right = graph[curr]\n", 441 | " if left is not None and left > curr:\n", 442 | " # Left is always smaller than parent\n", 443 | " return False\n", 444 | " if right is not None and right < curr:\n", 445 | " # Right is always greater than parent\n", 446 | " return False\n", 447 | " if curr not in visited:\n", 448 | " visited.add(curr)\n", 449 | " print curr\n", 450 | " stack.extend(set(graph[curr]) - visited)\n", 451 | " return True\n", 452 | " " 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 15, 458 | "metadata": { 459 | "collapsed": false 460 | }, 461 | "outputs": [ 462 | { 463 | "name": "stdout", 464 | "output_type": "stream", 465 | "text": [ 466 | "4\n", 467 | "5\n", 468 | "10\n", 469 | "7\n", 470 | "3\n", 471 | "2\n", 472 | "1\n" 473 | ] 474 | }, 475 | { 476 | "data": { 477 | "text/plain": [ 478 | "True" 479 | ] 480 | }, 481 | "execution_count": 15, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "graph, root = gen_bst([4,5,3,2,10,1,7])\n", 488 | "is_bst(graph, root)" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 16, 494 | "metadata": { 495 | "collapsed": false 496 | }, 497 | "outputs": [ 498 | { 499 | "data": { 500 | "text/plain": [ 501 | "False" 502 | ] 503 | }, 504 | "execution_count": 16, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "graph = {0: [1,2],\n", 511 | " 1: [None, None],\n", 512 | " 2: [None, None]}\n", 513 | "root = 0\n", 514 | "is_bst(graph, root)" 515 | ] 516 | }, 517 | { 518 | "cell_type": "markdown", 519 | "metadata": {}, 520 | "source": [ 521 | "## Q2. There is a 2D matrix of size 10 X 10, where you have to begin from the location (0,0) and move to the location (9,9). You can either move right on down. Find out the number of distinct paths in which you can reach (9,9) from (0,0).\n", 522 | "\n", 523 | "#paths(i,j) = #paths(i-1, j-1) + 2" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 17, 529 | "metadata": { 530 | "collapsed": false 531 | }, 532 | "outputs": [], 533 | "source": [ 534 | "def npaths(N=10):\n", 535 | " mat = [[0]*N for k in range(N)]\n", 536 | " mat[0][0] = 1\n", 537 | " for i in range(N):\n", 538 | " for j in range(N):\n", 539 | " if mat[i][j] == 0:\n", 540 | " if i == 0:\n", 541 | " mat[i][j] = mat[i][j-1] # Only one movement from left cell\n", 542 | " #print \"mat[%s][%s] = mat[%s][%s]\" % (i,j,i,j-1)\n", 543 | " #print_mat(mat)\n", 544 | " elif j == 0:\n", 545 | " mat[i][j] = mat[i-1][j] # Only one movement from top cell\n", 546 | " #print \"mat[%s][%s] = mat[%s][%s]\" % (i,j,i-1,j)\n", 547 | " else:\n", 548 | " mat[i][j] = mat[i-1][j] + mat[i][j-1]\n", 549 | " #print \"mat[%s][%s] = mat[%s][%s]+1\" % (i,j,i-1,j-1)\n", 550 | " return mat\n", 551 | "\n", 552 | "def print_mat(mat):\n", 553 | " for row in mat:\n", 554 | " print row" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "execution_count": 18, 560 | "metadata": { 561 | "collapsed": false 562 | }, 563 | "outputs": [ 564 | { 565 | "name": "stdout", 566 | "output_type": "stream", 567 | "text": [ 568 | "[1, 1]\n", 569 | "[1, 2]\n" 570 | ] 571 | } 572 | ], 573 | "source": [ 574 | "print_mat(npaths(N=2))" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": 19, 580 | "metadata": { 581 | "collapsed": false 582 | }, 583 | "outputs": [ 584 | { 585 | "name": "stdout", 586 | "output_type": "stream", 587 | "text": [ 588 | "[1, 1, 1]\n", 589 | "[1, 2, 3]\n", 590 | "[1, 3, 6]\n" 591 | ] 592 | } 593 | ], 594 | "source": [ 595 | "print_mat(npaths(N=3))" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 20, 601 | "metadata": { 602 | "collapsed": false 603 | }, 604 | "outputs": [ 605 | { 606 | "name": "stdout", 607 | "output_type": "stream", 608 | "text": [ 609 | "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n", 610 | "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", 611 | "[1, 3, 6, 10, 15, 21, 28, 36, 45, 55]\n", 612 | "[1, 4, 10, 20, 35, 56, 84, 120, 165, 220]\n", 613 | "[1, 5, 15, 35, 70, 126, 210, 330, 495, 715]\n", 614 | "[1, 6, 21, 56, 126, 252, 462, 792, 1287, 2002]\n", 615 | "[1, 7, 28, 84, 210, 462, 924, 1716, 3003, 5005]\n", 616 | "[1, 8, 36, 120, 330, 792, 1716, 3432, 6435, 11440]\n", 617 | "[1, 9, 45, 165, 495, 1287, 3003, 6435, 12870, 24310]\n", 618 | "[1, 10, 55, 220, 715, 2002, 5005, 11440, 24310, 48620]\n" 619 | ] 620 | } 621 | ], 622 | "source": [ 623 | "print_mat(npaths(N=10))" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "## Q3. Given an array with a special property, find the smallest number in it. The special property is that the elements in the array are monotonically decreasing and then increasing.\n", 631 | "\n", 632 | "Get middle element:\n", 633 | "\n", 634 | "* if middle element part of increasing sequence then search left\n", 635 | "* if middle element part of decreasing sequence then search right\n", 636 | "\n", 637 | "Stop when middle number such that left is higher and right is higher" 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "execution_count": 21, 643 | "metadata": { 644 | "collapsed": true 645 | }, 646 | "outputs": [], 647 | "source": [ 648 | "def find_smallest(arr):\n", 649 | " left, right = 0, len(arr)\n", 650 | " mid = (left + right) /2\n", 651 | " i = 1\n", 652 | " while mid > 0 and mid < len(arr)-1 and i < len(arr):\n", 653 | " if arr[mid-1] <= arr[mid] and arr[mid+1] >= arr[mid]:\n", 654 | " right = mid\n", 655 | " elif arr[mid-1] >= arr[mid] and arr[mid+1] <= arr[mid]:\n", 656 | " left = mid\n", 657 | " else:\n", 658 | " break\n", 659 | " mid = (left + right) /2\n", 660 | " i += 1\n", 661 | " print \"Iterations required: \", i\n", 662 | " return arr[mid]" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 22, 668 | "metadata": { 669 | "collapsed": false 670 | }, 671 | "outputs": [ 672 | { 673 | "name": "stdout", 674 | "output_type": "stream", 675 | "text": [ 676 | "Iterations required: 1\n" 677 | ] 678 | }, 679 | { 680 | "data": { 681 | "text/plain": [ 682 | "1" 683 | ] 684 | }, 685 | "execution_count": 22, 686 | "metadata": {}, 687 | "output_type": "execute_result" 688 | } 689 | ], 690 | "source": [ 691 | "find_smallest([1])" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": 23, 697 | "metadata": { 698 | "collapsed": false 699 | }, 700 | "outputs": [ 701 | { 702 | "name": "stdout", 703 | "output_type": "stream", 704 | "text": [ 705 | "Iterations required: 2\n" 706 | ] 707 | }, 708 | { 709 | "data": { 710 | "text/plain": [ 711 | "1" 712 | ] 713 | }, 714 | "execution_count": 23, 715 | "metadata": {}, 716 | "output_type": "execute_result" 717 | } 718 | ], 719 | "source": [ 720 | "find_smallest([1,2,3])" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 24, 726 | "metadata": { 727 | "collapsed": false 728 | }, 729 | "outputs": [ 730 | { 731 | "name": "stdout", 732 | "output_type": "stream", 733 | "text": [ 734 | "Iterations required: 3\n" 735 | ] 736 | }, 737 | { 738 | "data": { 739 | "text/plain": [ 740 | "7" 741 | ] 742 | }, 743 | "execution_count": 24, 744 | "metadata": {}, 745 | "output_type": "execute_result" 746 | } 747 | ], 748 | "source": [ 749 | "find_smallest([10,9,8,7,11,12,13,14])" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 25, 755 | "metadata": { 756 | "collapsed": false 757 | }, 758 | "outputs": [ 759 | { 760 | "name": "stdout", 761 | "output_type": "stream", 762 | "text": [ 763 | "Iterations required: 2\n" 764 | ] 765 | }, 766 | { 767 | "data": { 768 | "text/plain": [ 769 | "1" 770 | ] 771 | }, 772 | "execution_count": 25, 773 | "metadata": {}, 774 | "output_type": "execute_result" 775 | } 776 | ], 777 | "source": [ 778 | "find_smallest([3,2,1])" 779 | ] 780 | }, 781 | { 782 | "cell_type": "markdown", 783 | "metadata": {}, 784 | "source": [ 785 | "## Find if a string s3 can be formed by interleaving 2 other given strings s1 and s2\n", 786 | "Characters of each string s1 and s2 will appear in the same order they appear in the original string. All characters of s1 and s2 must be exhausted.\n", 787 | "\n", 788 | "E.g.\n", 789 | "```\n", 790 | "s1 = \"aaab\"\n", 791 | "s2 = \"aaac\"\n", 792 | "s3 = \"aaaacaab\" # is_interleaved = True\n", 793 | "s3 = \"aaaacaa\" # is_interleaved = False, as b is not present in s3\n", 794 | "s3 = \"aaaacaad\" # is_interleaved = False, as d is not present in s1 or s2\n", 795 | "```\n", 796 | "\n", 797 | "Approach: If s[i+j] can be formed by inteleaving s1[i-1] and s2[j] or s1[i] and s2[j-1] then s3[i+j+1] can just be formed by just matching the next character." 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": 34, 803 | "metadata": { 804 | "collapsed": true 805 | }, 806 | "outputs": [], 807 | "source": [ 808 | "from pprint import pprint" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": 141, 814 | "metadata": { 815 | "collapsed": false 816 | }, 817 | "outputs": [], 818 | "source": [ 819 | "mat = []\n", 820 | "\n", 821 | "def f(s1,s2,s3,i,j):\n", 822 | " if i < 0 and j < 0:\n", 823 | " return True\n", 824 | " if i < 0:\n", 825 | " return (s3[:j+1] == s2[:j+1])\n", 826 | " if j < 0:\n", 827 | " return (s3[:i+1] == s1[:i+1])\n", 828 | " if mat[i][j] == -1:\n", 829 | " mat[i][j] = (\n", 830 | " (f(s1,s2,s3,i-1,j) and (s3[i+j+1] == s1[i]))\n", 831 | " or (f(s1,s2,s3,i,j-1) and (s3[i+j+1] == s2[j]))\n", 832 | " )\n", 833 | " return mat[i][j]\n", 834 | "\n", 835 | "def is_interleaved(s1,s2,s3):\n", 836 | " global mat\n", 837 | " if len(s1) == 0 and len(s2) == 0 and len(s3) == 0:\n", 838 | " return True\n", 839 | " if len(s3) != (len(s1) + len(s2)):\n", 840 | " return False\n", 841 | " l_s1, l_s2 = len(s1), len(s2)\n", 842 | " mat = [[-1]*(l_s2) for i in xrange(l_s1)]\n", 843 | " for i in xrange(l_s1):\n", 844 | " for j in xrange(l_s2):\n", 845 | " check = f(s1,s2,s3,i,j)\n", 846 | " #print i,j, i+j+1, s1[:i+1], s2[:j+1], s3[:i+j+2], check\n", 847 | " #pprint(mat)\n", 848 | " return mat[-1][-1]\n", 849 | " \n", 850 | " " 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": 142, 856 | "metadata": { 857 | "collapsed": false 858 | }, 859 | "outputs": [ 860 | { 861 | "name": "stdout", 862 | "output_type": "stream", 863 | "text": [ 864 | "'aaaacaab'\tTrue\n", 865 | "'aaaacaa'\tFalse\n", 866 | "'aaaacaad'\tFalse\n", 867 | "'aaaadaad'\tFalse\n", 868 | "''\tFalse\n" 869 | ] 870 | } 871 | ], 872 | "source": [ 873 | "s1 = \"aaab\"\n", 874 | "s2 = \"aaac\"\n", 875 | "for s3 in [\"aaaacaab\", \"aaaacaa\", \"aaaacaad\", \"aaaadaad\", \"\"]:\n", 876 | " print \"'%s'\\t%s\" % (s3, is_interleaved(s1, s2, s3))" 877 | ] 878 | }, 879 | { 880 | "cell_type": "code", 881 | "execution_count": 143, 882 | "metadata": { 883 | "collapsed": false 884 | }, 885 | "outputs": [ 886 | { 887 | "name": "stdout", 888 | "output_type": "stream", 889 | "text": [ 890 | "'worldhello'\tTrue\n", 891 | "'heworlllod'\tTrue\n", 892 | "'helloworld'\tTrue\n", 893 | "''\tFalse\n" 894 | ] 895 | } 896 | ], 897 | "source": [ 898 | "s1 = \"hello\"\n", 899 | "s2 = \"world\"\n", 900 | "for s3 in [\"worldhello\", \"heworlllod\", \"helloworld\", \"\"]:\n", 901 | " print \"'%s'\\t%s\" % (s3, is_interleaved(s1, s2, s3))" 902 | ] 903 | }, 904 | { 905 | "cell_type": "code", 906 | "execution_count": 144, 907 | "metadata": { 908 | "collapsed": false 909 | }, 910 | "outputs": [ 911 | { 912 | "name": "stdout", 913 | "output_type": "stream", 914 | "text": [ 915 | "'worldhello'\tFalse\n", 916 | "'heworlllod'\tFalse\n", 917 | "'helloworld'\tFalse\n", 918 | "''\tTrue\n" 919 | ] 920 | } 921 | ], 922 | "source": [ 923 | "s1 = \"\"\n", 924 | "s2 = \"\"\n", 925 | "for s3 in [\"worldhello\", \"heworlllod\", \"helloworld\", \"\"]:\n", 926 | " print \"'%s'\\t%s\" % (s3, is_interleaved(s1, s2, s3))" 927 | ] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": 124, 932 | "metadata": { 933 | "collapsed": false 934 | }, 935 | "outputs": [ 936 | { 937 | "name": "stdout", 938 | "output_type": "stream", 939 | "text": [ 940 | "[]\n" 941 | ] 942 | } 943 | ], 944 | "source": [ 945 | "pprint(mat)" 946 | ] 947 | }, 948 | { 949 | "cell_type": "code", 950 | "execution_count": null, 951 | "metadata": { 952 | "collapsed": true 953 | }, 954 | "outputs": [], 955 | "source": [] 956 | } 957 | ], 958 | "metadata": { 959 | "kernelspec": { 960 | "display_name": "Python 2", 961 | "language": "python", 962 | "name": "python2" 963 | }, 964 | "language_info": { 965 | "codemirror_mode": { 966 | "name": "ipython", 967 | "version": 2 968 | }, 969 | "file_extension": ".py", 970 | "mimetype": "text/x-python", 971 | "name": "python", 972 | "nbconvert_exporter": "python", 973 | "pygments_lexer": "ipython2", 974 | "version": "2.7.11" 975 | } 976 | }, 977 | "nbformat": 4, 978 | "nbformat_minor": 0 979 | } 980 | -------------------------------------------------------------------------------- /PyMC Testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Import relevant modules\n", 12 | "import pymc\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "# Some data\n", 26 | "n = 5 * np.ones(4, dtype=int)\n", 27 | "x = np.array([-.86, -.3, -.05, .73])" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "# Priors on unknown parameters\n", 39 | "alpha = pymc.Normal('alpha', mu=0, tau=.01)\n", 40 | "beta = pymc.Normal('beta', mu=0, tau=.01)\n", 41 | "\n", 42 | "# Arbitrary deterministic function of parameters\n", 43 | "@pymc.deterministic\n", 44 | "def theta(a=alpha, b=beta):\n", 45 | " \"\"\"theta = logit^{-1}(a+b)\"\"\"\n", 46 | " return pymc.invlogit(a + b * x)\n", 47 | "\n", 48 | "# Binomial likelihood for data\n", 49 | "d = pymc.Binomial('d', n=n, p=theta, value=np.array([0., 1., 3., 5.]),\n", 50 | " observed=True)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 9, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "mymodel = pymc.Model([theta, d])\n", 62 | "S = pymc.MCMC(mymodel, db='pickle')" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 10, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "\r", 77 | " [-----------------100%-----------------] 10000 of 10000 complete in 0.3 sec" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "\n", 83 | "S.sample(iter=10000, burn=5000, thin=2)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 11, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "Plotting theta_0\n", 98 | "Plotting theta_1\n", 99 | "Plotting theta_2\n", 100 | "Plotting theta_3\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "pymc.Matplot.plot(S)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 12, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "plt.show()" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": { 123 | "collapsed": true 124 | }, 125 | "outputs": [], 126 | "source": [] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 2", 132 | "language": "python", 133 | "name": "python2" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 2 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython2", 145 | "version": "2.7.9" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 0 150 | } 151 | -------------------------------------------------------------------------------- /Quora Haqathon.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:183c9e17b73179464622911276d7e98d8c0d9fb2335ce136b293fe4f2d921f70" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "code", 13 | "collapsed": false, 14 | "input": [ 15 | "\"\"\"\n", 16 | "Archery problem: https://www.hackerrank.com/contests/quora-haqathon/challenges/archery\n", 17 | "\"\"\"" 18 | ], 19 | "language": "python", 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "metadata": {}, 24 | "output_type": "pyout", 25 | "prompt_number": 1, 26 | "text": [ 27 | "'\\nArchery problem: https://www.hackerrank.com/contests/quora-haqathon/challenges/archery\\n'" 28 | ] 29 | } 30 | ], 31 | "prompt_number": 1 32 | }, 33 | { 34 | "cell_type": "code", 35 | "collapsed": false, 36 | "input": [ 37 | "# Enter your code here. Read input from STDIN. Print output to STDOUT\n", 38 | "# radii = [1,2,3,4]\n", 39 | "def isQ(r,line):\n", 40 | " x1,y1,x2,y2 = line\n", 41 | " if y1 == y2 and x1 == x2:\n", 42 | " #print \"Arrow of zero length.\"\n", 43 | " return False\n", 44 | " if x1 == x2:\n", 45 | " y_sqr = (r**2 - c**2)\n", 46 | " if y_sqr < 0:\n", 47 | " #print \"No intersection\"\n", 48 | " return False\n", 49 | " y = y_sqr**0.5\n", 50 | " if (y-y1)*(y-y2) <= 0 and (-y-y1)*(-y-y2) > 0:\n", 51 | " #print \"Exactly one intersection with circle\"\n", 52 | " return True\n", 53 | " if (-y-y1)*(-y-y2) <= 0 and (y-y1)*(y-y2) > 0:\n", 54 | " #print \"Exactly one intersection with circle\"\n", 55 | " return True\n", 56 | " m = (y1-y2)/(1.0*(x1-x2))\n", 57 | " c = y1 -m*x1\n", 58 | " a = (m**2+1)\n", 59 | " b = 2*m*c\n", 60 | " d = c**2 - r**2\n", 61 | " #print \"Parameters (m,c,a,b,d): \", m,c,a,b,d\n", 62 | " sqrt_t = b**2 - 4*a*d\n", 63 | " #print \"SQRT_T\", sqrt_t\n", 64 | " if sqrt_t < 0:\n", 65 | " #print \"No intersection\"\n", 66 | " return False\n", 67 | " sqrt_t = sqrt_t ** 0.5\n", 68 | " #print \"SQRT_T\", sqrt_t\n", 69 | " x = (-b + sqrt_t)/(2*a)\n", 70 | " y = m*x + c\n", 71 | " count_p = 0\n", 72 | " #print \"Intersection point: \",x , y\n", 73 | " if (y-y1)*(y-y2) <= 0 and (x-x1)*(x-x2) <=0:\n", 74 | " count_p += 1\n", 75 | " x = (-b - sqrt_t)/(2*a)\n", 76 | " y = m*x + c\n", 77 | " #print \"Intersection point: \",x , y\n", 78 | " if (y-y1)*(y-y2) <= 0 and (x-x1)*(x-x2) <=0:\n", 79 | " count_p += 1\n", 80 | " if count_p != 1:\n", 81 | " #print \"More than one intersection with circle\", count_p\n", 82 | " return False\n", 83 | " return True\n", 84 | " \n", 85 | " " 86 | ], 87 | "language": "python", 88 | "metadata": {}, 89 | "outputs": [], 90 | "prompt_number": 7 91 | }, 92 | { 93 | "cell_type": "code", 94 | "collapsed": false, 95 | "input": [ 96 | "isQ(1,(1,1,0,0))" 97 | ], 98 | "language": "python", 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "metadata": {}, 103 | "output_type": "pyout", 104 | "prompt_number": 8, 105 | "text": [ 106 | "True" 107 | ] 108 | } 109 | ], 110 | "prompt_number": 8 111 | }, 112 | { 113 | "cell_type": "code", 114 | "collapsed": false, 115 | "input": [ 116 | "def calcQ(radii, lines):\n", 117 | " count = 0\n", 118 | " for r in radii:\n", 119 | " r = r**2\n", 120 | " for line in lines:\n", 121 | " x1,y1,x2,y2 = line\n", 122 | " min_r = min(((x1**2)+(y1**2)), ((x2**2)+(y2**2)))\n", 123 | " if min_r > r:\n", 124 | " print \"All points beyond this are discarded\", r, line\n", 125 | " break\n", 126 | " max_r = max(((x1**2)+(y1**2)), ((x2**2)+(y2**2)))\n", 127 | " if r!= min_r and r != max_r and (r-min_r)*(r-max_r) > 0:\n", 128 | " print \"Outside range: \", r, min_r, max_r, line\n", 129 | " continue\n", 130 | " count += 1\n", 131 | " #if isQ(r,line):\n", 132 | " #count += 1\n", 133 | " return count" 134 | ], 135 | "language": "python", 136 | "metadata": {}, 137 | "outputs": [], 138 | "prompt_number": 57 139 | }, 140 | { 141 | "cell_type": "code", 142 | "collapsed": false, 143 | "input": [ 144 | "n = int(input())\n", 145 | "radii = [int(k) for k in raw_input().split()]\n", 146 | "lines = []\n", 147 | "n = int(input())\n", 148 | "for i in range(n):\n", 149 | " lines.append([int(k) for k in raw_input().split()])\n", 150 | "print calcQ(radii,lines)" 151 | ], 152 | "language": "python", 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "stream": "stdout", 159 | "text": [ 160 | "4\n" 161 | ] 162 | }, 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "stream": "stdout", 167 | "text": [ 168 | "1 2 3 4\n" 169 | ] 170 | }, 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "stream": "stdout", 175 | "text": [ 176 | "3\n" 177 | ] 178 | }, 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "stream": "stdout", 183 | "text": [ 184 | "1 -1 4 -3\n" 185 | ] 186 | }, 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "stream": "stdout", 191 | "text": [ 192 | "2 1 1 2\n" 193 | ] 194 | }, 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "stream": "stdout", 199 | "text": [ 200 | "1 -2 3 -4\n" 201 | ] 202 | }, 203 | { 204 | "output_type": "stream", 205 | "stream": "stdout", 206 | "text": [ 207 | "3\n" 208 | ] 209 | } 210 | ], 211 | "prompt_number": 16 212 | }, 213 | { 214 | "cell_type": "code", 215 | "collapsed": false, 216 | "input": [ 217 | "n = int(input())\n", 218 | "radii = [int(k) for k in raw_input().split()]\n", 219 | "radii = sorted(radii)\n", 220 | "#print radii\n", 221 | "lines = []\n", 222 | "n = int(input())\n", 223 | "for i in range(n):\n", 224 | " lines.append([int(k) for k in raw_input().split()])\n", 225 | "lines.sort(key=lambda x: min(((x[0]**2)+(x[1]**2)), ((x[2]**2)+(x[3]**2))))\n", 226 | "print lines\n", 227 | "print calcQ(radii,lines)" 228 | ], 229 | "language": "python", 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "stream": "stdout", 236 | "text": [ 237 | "4\n" 238 | ] 239 | }, 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "stream": "stdout", 244 | "text": [ 245 | "1 2 3 4\n" 246 | ] 247 | }, 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "stream": "stdout", 252 | "text": [ 253 | "3\n" 254 | ] 255 | }, 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "stream": "stdout", 260 | "text": [ 261 | "1 -1 4 -3\n" 262 | ] 263 | }, 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "stream": "stdout", 268 | "text": [ 269 | "2 1 1 2\n" 270 | ] 271 | }, 272 | { 273 | "name": "stdout", 274 | "output_type": "stream", 275 | "stream": "stdout", 276 | "text": [ 277 | "1 -2 3 -4\n" 278 | ] 279 | }, 280 | { 281 | "output_type": "stream", 282 | "stream": "stdout", 283 | "text": [ 284 | "[[1, -1, 4, -3], [2, 1, 1, 2], [1, -2, 3, -4]]\n", 285 | "All points beyond this are discarded 1 [1, -1, 4, -3]\n", 286 | "All points beyond this are discarded 4 [2, 1, 1, 2]\n", 287 | "Outside range: 9 5 5 [2, 1, 1, 2]\n", 288 | "Outside range: 16 5 5 [2, 1, 1, 2]\n", 289 | "5\n" 290 | ] 291 | } 292 | ], 293 | "prompt_number": 58 294 | }, 295 | { 296 | "cell_type": "code", 297 | "collapsed": false, 298 | "input": [ 299 | "n = int(input())\n", 300 | "radii = [int(k) for k in raw_input().split()]\n", 301 | "radii = sorted(radii)\n", 302 | "#print radii\n", 303 | "lines = []\n", 304 | "n = int(input())\n", 305 | "for i in range(n):\n", 306 | " lines.append([int(k) for k in raw_input().split()])\n", 307 | "print calcQ(radii,lines)" 308 | ], 309 | "language": "python", 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "name": "stdout", 314 | "output_type": "stream", 315 | "stream": "stdout", 316 | "text": [ 317 | "4\n" 318 | ] 319 | }, 320 | { 321 | "name": "stdout", 322 | "output_type": "stream", 323 | "stream": "stdout", 324 | "text": [ 325 | "1 2 3 4\n" 326 | ] 327 | }, 328 | { 329 | "name": "stdout", 330 | "output_type": "stream", 331 | "stream": "stdout", 332 | "text": [ 333 | "3\n" 334 | ] 335 | }, 336 | { 337 | "name": "stdout", 338 | "output_type": "stream", 339 | "stream": "stdout", 340 | "text": [ 341 | "1 -1 4 -3\n" 342 | ] 343 | }, 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "stream": "stdout", 348 | "text": [ 349 | "2 1 1 2\n" 350 | ] 351 | }, 352 | { 353 | "name": "stdout", 354 | "output_type": "stream", 355 | "stream": "stdout", 356 | "text": [ 357 | "1 -2 3 -4\n" 358 | ] 359 | }, 360 | { 361 | "output_type": "stream", 362 | "stream": "stdout", 363 | "text": [ 364 | "Outside range: 1 [1, -1, 4, -3]\n", 365 | "Outside range: 1 [2, 1, 1, 2]\n", 366 | "Outside range: 2 [2, 1, 1, 2]\n", 367 | "Outside range: 3 [2, 1, 1, 2]\n", 368 | "Outside range: 4 [2, 1, 1, 2]\n", 369 | "Outside range: 1 [1, -2, 3, -4]\n", 370 | "Outside range: 2 [1, -2, 3, -4]\n", 371 | "Outside range: 3 [1, -2, 3, -4]\n", 372 | "Makes Q\n", 373 | "Outside range: 4 [1, -2, 3, -4]\n", 374 | "Makes Q\n", 375 | "5\n" 376 | ] 377 | } 378 | ], 379 | "prompt_number": 24 380 | }, 381 | { 382 | "cell_type": "code", 383 | "collapsed": false, 384 | "input": [], 385 | "language": "python", 386 | "metadata": {}, 387 | "outputs": [] 388 | } 389 | ], 390 | "metadata": {} 391 | } 392 | ] 393 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ipython-notebooks 2 | Some iPython Notebooks I have created for personal learning 3 | -------------------------------------------------------------------------------- /World Leaders DB.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Source: https://www.worldpresidentsdb.com/list/countries/" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import scrapy\n", 17 | "import logging" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "class WorldPresidents(scrapy.Spider):\n", 27 | " name = 'worldpresidentsdb'\n", 28 | " BASE_URL = \"https://www.worldpresidentsdb.com/\"\n", 29 | " start_urls = [\n", 30 | " BASE_URL + '/list/countries/',\n", 31 | " ]\n", 32 | " def parse(self, response):\n", 33 | " for country in response.css(\".container div.list-group a\"):\n", 34 | " country_url = country.css('::attr(\"href\")').get()\n", 35 | " country_url = WorldPresidents.BASE_URL + country_url\n", 36 | " yield response.follow(country_url, self.parse_country)\n", 37 | " \n", 38 | " def parse_country(self, response):\n", 39 | " for president in response.css(\".container div.list-group a\"):\n", 40 | " president_url = president.css('::attr(\"href\")').get()\n", 41 | " president_url = WorldPresidents.BASE_URL + president_url\n", 42 | " yield response.follow(president_url, self.parse_president)\n", 43 | " \n", 44 | " def parse_president(self, response):\n", 45 | " info = {\n", 46 | " \"url\": response.url\n", 47 | " }\n", 48 | " for p in response.css(\".container div.row div.col-md-8 p\"):\n", 49 | " p_info = self.extract_info(p)\n", 50 | " info.update(p_info)\n", 51 | " yield info\n", 52 | " \n", 53 | " def extract_info(self, p):\n", 54 | " info = {}\n", 55 | " k = None\n", 56 | " for text in p.css(\"::text\").extract(): \n", 57 | " text = text.strip()\n", 58 | " if not text: continue\n", 59 | " if text.endswith(\":\"):\n", 60 | " if k and len(info[k]) == 1 and k not in {\"Terms\"}:\n", 61 | " info[k] = info[k][0]\n", 62 | " k = text[:-1]\n", 63 | " info[k] = []\n", 64 | " else:\n", 65 | " v = text\n", 66 | " if k == \"Terms\":\n", 67 | " if text.startswith(\") \"):\n", 68 | " t = text[2:]\n", 69 | " if t.lower().startswith(\"in office since \"):\n", 70 | " start = t.split(\" since \")[1]\n", 71 | " end = None\n", 72 | " else:\n", 73 | " start, end = t.split(\" to \")\n", 74 | " v = {\"start\": start, \"end\": end}\n", 75 | " else:\n", 76 | " continue\n", 77 | " info[k].append(v)\n", 78 | " if len(info[k]) == 1 and k not in {\"Terms\"}:\n", 79 | " info[k] = info[k][0]\n", 80 | " return info" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "from scrapy.crawler import CrawlerProcess\n", 90 | "from scrapy.exporters import JsonLinesItemExporter" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stderr", 100 | "output_type": "stream", 101 | "text": [ 102 | "2021-01-04 18:41:20 [scrapy.utils.log] INFO: Scrapy 2.4.1 started (bot: scrapybot)\n", 103 | "2021-01-04 18:41:20 [scrapy.utils.log] INFO: Versions: lxml 4.6.2.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 20.3.0, Python 3.7.3 (default, Mar 27 2019, 16:54:48) - [Clang 4.0.1 (tags/RELEASE_401/final)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1i 8 Dec 2020), cryptography 2.8, Platform Darwin-19.6.0-x86_64-i386-64bit\n", 104 | "2021-01-04 18:41:20 [scrapy.crawler] INFO: Overridden settings:\n", 105 | "{'LOG_LEVEL': 'INFO',\n", 106 | " 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'}\n", 107 | "2021-01-04 18:41:20 [scrapy.extensions.telnet] INFO: Telnet Password: 28e109c5e0bb7348\n", 108 | "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled extensions:\n", 109 | "['scrapy.extensions.corestats.CoreStats',\n", 110 | " 'scrapy.extensions.telnet.TelnetConsole',\n", 111 | " 'scrapy.extensions.memusage.MemoryUsage',\n", 112 | " 'scrapy.extensions.feedexport.FeedExporter',\n", 113 | " 'scrapy.extensions.logstats.LogStats']\n", 114 | "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled downloader middlewares:\n", 115 | "['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',\n", 116 | " 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',\n", 117 | " 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',\n", 118 | " 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',\n", 119 | " 'scrapy.downloadermiddlewares.retry.RetryMiddleware',\n", 120 | " 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',\n", 121 | " 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',\n", 122 | " 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',\n", 123 | " 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',\n", 124 | " 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',\n", 125 | " 'scrapy.downloadermiddlewares.stats.DownloaderStats']\n", 126 | "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled spider middlewares:\n", 127 | "['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',\n", 128 | " 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',\n", 129 | " 'scrapy.spidermiddlewares.referer.RefererMiddleware',\n", 130 | " 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',\n", 131 | " 'scrapy.spidermiddlewares.depth.DepthMiddleware']\n", 132 | "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled item pipelines:\n", 133 | "[]\n", 134 | "2021-01-04 18:41:20 [scrapy.core.engine] INFO: Spider opened\n", 135 | "2021-01-04 18:41:20 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)\n", 136 | "2021-01-04 18:41:20 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6024\n" 137 | ] 138 | }, 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "" 143 | ] 144 | }, 145 | "execution_count": 4, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "process = CrawlerProcess({\n", 152 | " 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',\n", 153 | " \"FEEDS\": {\n", 154 | " \"worldpresidentsdb.json\": {\n", 155 | " \"format\": \"jsonlines\",\n", 156 | " 'encoding': 'utf8',\n", 157 | " 'overwrite': True\n", 158 | " },\n", 159 | " },\n", 160 | " \"LOG_LEVEL\": \"INFO\"\n", 161 | "})\n", 162 | "\n", 163 | "process.crawl(WorldPresidents)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 5, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stderr", 173 | "output_type": "stream", 174 | "text": [ 175 | "2021-01-04 18:41:27 [scrapy.core.engine] INFO: Closing spider (finished)\n", 176 | "2021-01-04 18:41:27 [scrapy.extensions.feedexport] INFO: Stored jsonlines feed (544 items) in: worldpresidentsdb.json\n", 177 | "2021-01-04 18:41:27 [scrapy.statscollectors] INFO: Dumping Scrapy stats:\n", 178 | "{'downloader/request_bytes': 241323,\n", 179 | " 'downloader/request_count': 618,\n", 180 | " 'downloader/request_method_count/GET': 618,\n", 181 | " 'downloader/response_bytes': 1933100,\n", 182 | " 'downloader/response_count': 618,\n", 183 | " 'downloader/response_status_count/200': 618,\n", 184 | " 'dupefilter/filtered': 36,\n", 185 | " 'elapsed_time_seconds': 7.062385,\n", 186 | " 'finish_reason': 'finished',\n", 187 | " 'finish_time': datetime.datetime(2021, 1, 4, 23, 41, 27, 632916),\n", 188 | " 'item_scraped_count': 544,\n", 189 | " 'log_count/INFO': 11,\n", 190 | " 'memusage/max': 69611520,\n", 191 | " 'memusage/startup': 69611520,\n", 192 | " 'request_depth_max': 2,\n", 193 | " 'response_received_count': 618,\n", 194 | " 'scheduler/dequeued': 618,\n", 195 | " 'scheduler/dequeued/memory': 618,\n", 196 | " 'scheduler/enqueued': 618,\n", 197 | " 'scheduler/enqueued/memory': 618,\n", 198 | " 'start_time': datetime.datetime(2021, 1, 4, 23, 41, 20, 570531)}\n", 199 | "2021-01-04 18:41:27 [scrapy.core.engine] INFO: Spider closed (finished)\n" 200 | ] 201 | }, 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "CPU times: user 6.19 s, sys: 59.6 ms, total: 6.25 s\n", 207 | "Wall time: 7.09 s\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "%%time\n", 213 | "process.start()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [] 222 | } 223 | ], 224 | "metadata": { 225 | "kernelspec": { 226 | "display_name": "Python 3", 227 | "language": "python", 228 | "name": "python3" 229 | }, 230 | "language_info": { 231 | "codemirror_mode": { 232 | "name": "ipython", 233 | "version": 3 234 | }, 235 | "file_extension": ".py", 236 | "mimetype": "text/x-python", 237 | "name": "python", 238 | "nbconvert_exporter": "python", 239 | "pygments_lexer": "ipython3", 240 | "version": "3.7.3" 241 | } 242 | }, 243 | "nbformat": 4, 244 | "nbformat_minor": 4 245 | } 246 | -------------------------------------------------------------------------------- /images/olympic_athlete.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/napsternxg/ipython-notebooks/f327874fcf7b99c19c82919ad1cdba01660d2ef1/images/olympic_athlete.PNG -------------------------------------------------------------------------------- /maxContigSum.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 33, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Max contiguous sum in [5, 15, -30, 10, -5, 40, 10, -20] is 55 : [10, -5, 40, 10]\n", 15 | "is 55\n", 16 | "is 55\n", 17 | "is 55\n", 18 | "is 55 : [10, -5, 40, 10]\n" 19 | ] 20 | } 21 | ], 22 | "source": [ 23 | "''' A collection of approaches to the maximum contiguous sequence sum problem.\n", 24 | "Ends up recursive, but no multiple reuse, so not really dynamic programming.\n", 25 | "'''\n", 26 | "def maxContigSum1(x):\n", 27 | " '''Basic idea first, storing intermediate results in an array.'''\n", 28 | " n = len(x) \n", 29 | " endingHere = [0]*n # endingHere[i] = best sum ending at i:my be length 0.\n", 30 | " maxSoFar = endingHere[0] = max(0, x[0])\n", 31 | " for i in range (1, n): # 1..n-1\n", 32 | " endingHere[i] = max(endingHere[i-1] + x[i], 0)\n", 33 | " maxSoFar = max(maxSoFar, endingHere[i])\n", 34 | " return maxSoFar\n", 35 | "\n", 36 | "def maxContigSum(x):\n", 37 | " '''Don't need the second array since only use previous value!'''\n", 38 | " n = len(x) \n", 39 | " endingHere = 0\n", 40 | " maxSoFar = 0\n", 41 | " for i in range (0, n): # 1..n-1\n", 42 | " endingHere = max(endingHere + x[i], 0)\n", 43 | " maxSoFar = max(maxSoFar, endingHere)\n", 44 | " return maxSoFar\n", 45 | "\n", 46 | "def maxContigSumWithLocation(x):\n", 47 | " '''Return (maxSum, startI, endI): value and location in sequence.'''\n", 48 | " n = len(x) \n", 49 | " endingHere = 0\n", 50 | " maxSoFar = 0\n", 51 | " bestStartI = 0 # start of optimal seq so far\n", 52 | " bestEndI = -1 #end of optimal seq so far - initially empty\n", 53 | " curStartI = 0 # start of best seq ending at current position\n", 54 | " for i in range (0, n): # 1..n-1\n", 55 | " endingHere = max(endingHere + x[i], 0)\n", 56 | " if endingHere == 0:\n", 57 | " curStartI = i+1\n", 58 | " elif maxSoFar < endingHere:\n", 59 | " bestStartI = curStartI\n", 60 | " bestEndI = i\n", 61 | " maxSoFar = endingHere \n", 62 | " return (maxSoFar, bestStartI, bestEndI)\n", 63 | "\n", 64 | "def maxContigSumNeg(x):\n", 65 | " # Handles case with negative number.\n", 66 | " # Taken from https://en.wikipedia.org/wiki/Maximum_subarray_problem\n", 67 | " endingHere = x[0]\n", 68 | " maxSoFar = x[0]\n", 69 | " for i in x[1:]:\n", 70 | " endingHere = max(endingHere + i, i)\n", 71 | " maxSoFar = max(endingHere, maxSoFar)\n", 72 | " return maxSoFar\n", 73 | "\n", 74 | "def maxContigSumNegWithLoc(x):\n", 75 | " # Handles case with negative number.\n", 76 | " # Modified version of the functions above\n", 77 | " endingHere = x[0]\n", 78 | " maxSoFar = x[0]\n", 79 | " bestStartI = 0\n", 80 | " bestEndI = 0\n", 81 | " curStartI = 0\n", 82 | " for i in range(1,len(x)):\n", 83 | " endingHere = max(endingHere + x[i], x[i])\n", 84 | " if endingHere == x[i]:\n", 85 | " curStartI = i\n", 86 | " if maxSoFar < endingHere:\n", 87 | " bestStartI = curStartI\n", 88 | " bestEndI = i\n", 89 | " maxSoFar = endingHere\n", 90 | " return (maxSoFar, bestStartI, bestEndI)\n", 91 | "\n", 92 | "#### Rest is for displaying results ############################\n", 93 | "def showMaxSum(nums):\n", 94 | " '''Display starting data and results.'''\n", 95 | " (maxSum, startI, endI) = maxContigSumWithLocation(nums)\n", 96 | " print \"Max contiguous sum in \", nums, \n", 97 | " print \"is\", maxSum, \": \", nums[startI : endI +1]\n", 98 | " print \"is\", maxContigSum(nums)\n", 99 | " print \"is\", maxContigSum1(nums)\n", 100 | " print \"is\", maxContigSumNeg(nums)\n", 101 | " (maxSum, startI, endI) = maxContigSumNegWithLoc(nums)\n", 102 | " print \"is\", maxSum, \": \", nums[startI : endI +1]\n", 103 | " \n", 104 | " \n", 105 | "showMaxSum([5, 15, -30, 10, -5, 40, 10, -20])" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 34, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "Max contiguous sum in [-5, -15, -30, -10, -5, -40, -10, -20] is 0 : []\n", 120 | "is 0\n", 121 | "is 0\n", 122 | "is -5\n", 123 | "is -5 : [-5]\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "showMaxSum([-5, -15, -30, -10, -5, -40, -10, -20])" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 35, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "Max contiguous sum in [-50, -15, -30, -10, -5, -40, -10, -20] is 0 : []\n", 143 | "is 0\n", 144 | "is 0\n", 145 | "is -5\n", 146 | "is -5 : [-5]\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "showMaxSum([-50, -15, -30, -10, -5, -40, -10, -20])" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": true 159 | }, 160 | "outputs": [], 161 | "source": [] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [] 171 | } 172 | ], 173 | "metadata": { 174 | "kernelspec": { 175 | "display_name": "Python 2", 176 | "language": "python", 177 | "name": "python2" 178 | }, 179 | "language_info": { 180 | "codemirror_mode": { 181 | "name": "ipython", 182 | "version": 2 183 | }, 184 | "file_extension": ".py", 185 | "mimetype": "text/x-python", 186 | "name": "python", 187 | "nbconvert_exporter": "python", 188 | "pygments_lexer": "ipython2", 189 | "version": "2.7.9" 190 | } 191 | }, 192 | "nbformat": 4, 193 | "nbformat_minor": 0 194 | } 195 | -------------------------------------------------------------------------------- /monte_carlo_circle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/napsternxg/ipython-notebooks/f327874fcf7b99c19c82919ad1cdba01660d2ef1/monte_carlo_circle.png -------------------------------------------------------------------------------- /olympic_athlete_data_download.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Download Olympic Athlete Data from official website\n", 8 | "\n", 9 | "* Get athete page name sitemaps from: https://olympics.com/en/xml-sitemap/\n", 10 | "```xml\n", 11 | "\n", 12 | "https://olympics.com/en/xml-sitemap/custom/athlete/1/50000\n", 13 | "\n", 14 | "\n", 15 | "https://olympics.com/en/xml-sitemap/custom/athlete/2/50000\n", 16 | "\n", 17 | "\n", 18 | "https://olympics.com/en/xml-sitemap/custom/athlete/3/50000\n", 19 | "\n", 20 | "\n", 21 | "https://olympics.com/en/xml-sitemap/custom/athlete/4/50000\n", 22 | "\n", 23 | "```\n", 24 | "* Get individual athlete pages from each site map: https://olympics.com/en/xml-sitemap/custom/athlete/1/50000\n", 25 | "```xml\n", 26 | "\n", 27 | "https://olympics.com/en/athletes/grant-holloway\n", 28 | "2024-03-05T23:02:24.551Z\n", 29 | "\n", 30 | "```\n", 31 | "* For each page get the data from the server script tag at the end of source: https://olympics.com/en/athletes/antoine-dupont\n", 32 | "```html\n", 33 | "\n", 34 | "