├── .gitignore
├── Adversarial_Autoencoder.ipynb
├── Author Topic Model.ipynb
├── Best Worst Scaling.ipynb
├── Binary Search.ipynb
├── Card24.ipynb
├── Charts Experiments.ipynb
├── Company Mission Statements.ipynb
├── Dictionary_lookup_with_default.ipynb
├── Dynamax.ipynb
├── Dynamic Programming.ipynb
├── Extract_unique_triadic_edges_from_triads.ipynb
├── FastRG - Python implementation.ipynb
├── Fourier Transform.ipynb
├── GMM Clusters.ipynb
├── Gamma Distribution.ipynb
├── Graph Diffusion.ipynb
├── Graph_Feature_Propagation.ipynb
├── IMCMC.ipynb
├── IRCTC Data Hack.ipynb
├── ITE.ipynb
├── Infinite Mixture Models.ipynb
├── KMeans_Functional.ipynb
├── Keras Char Word LSTM.ipynb
├── Keras Demo.ipynb
├── Keras attention.ipynb
├── Keras_Elmo.ipynb
├── LICENSE
├── LazyValues.ipynb
├── Lightweight_coreset_construction.ipynb
├── Likelihood+ratio.ipynb
├── Logistic Regression.ipynb
├── MazeSolving.ipynb
├── Mean_Media_Mode.ipynb
├── Monte Carlo Integration.ipynb
├── Monte Carlo.ipynb
├── MultiTask_Transformer_for_Token_Classification.ipynb
├── Naive Sudoku Solver.ipynb
├── Plotting Decision Boundaries.ipynb
├── Positive Semi-definite Matrix.ipynb
├── Prediction versus Explanation.ipynb
├── Programming assignments.ipynb
├── PyMC Disaster.ipynb
├── PyMC Testing.ipynb
├── PyMC_LDA.ipynb
├── Quora Haqathon.ipynb
├── README.md
├── Regression Coefficient Significance.ipynb
├── ReinforcementLearning.ipynb
├── SVD_Vis.ipynb
├── Self-attention.ipynb
├── Slide Notebooks
    ├── IMO 2021 - Problem 2.ipynb
    └── Product of consecutive numbers.ipynb
├── Stable_Craiyon.ipynb
├── Student Debt Over time.ipynb
├── System_Identification_DMD_Control_Example.ipynb
├── Time Dependent Models.ipynb
├── Unattacked Queens.ipynb
├── World Leaders DB.ipynb
├── data
    └── isl_wise_train_detail_03082015_v1.csv
├── images
    └── olympic_athlete.PNG
├── maxContigSum.ipynb
├── monte_carlo_circle.png
├── olympic_athlete_data_download.ipynb
├── temp.tsv
└── worldpresidentsdb.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | #iPython
60 | .ipynb_checkpoints/
61 | 


--------------------------------------------------------------------------------
/Author Topic Model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "slideshow": {
  7 |      "slide_type": "slide"
  8 |     }
  9 |    },
 10 |    "source": [
 11 |     "# Author Topic Model\n",
 12 |     "\n",
 13 |     "Implementation as described in http://mimno.infosci.cornell.edu/info6150/readings/398.pdf"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {
 20 |     "collapsed": true,
 21 |     "slideshow": {
 22 |      "slide_type": "subslide"
 23 |     }
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "%matplotlib inline\n",
 28 |     "\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "import numpy as np"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 69,
 36 |    "metadata": {
 37 |     "collapsed": false,
 38 |     "slideshow": {
 39 |      "slide_type": "notes"
 40 |     }
 41 |    },
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "[[ 0.  7.  7.  7.  5.]\n",
 48 |       " [ 1.  7.  5.  1.  0.]\n",
 49 |       " [ 5.  5.  6.  9.  2.]\n",
 50 |       " [ 4.  6.  0.  7.  2.]]\n",
 51 |       "[1 2 1 2 3]\n",
 52 |       "['V0' 'V1' 'V2' 'V3' 'V4' 'V5' 'V6' 'V7' 'V8' 'V9']\n",
 53 |       "[[  0.  14.   7.  14.  15.]\n",
 54 |       " [  1.  14.   5.   2.   0.]\n",
 55 |       " [  5.  10.   6.  18.   6.]\n",
 56 |       " [  4.  12.   0.  14.   6.]]\n"
 57 |      ]
 58 |     },
 59 |     {
 60 |      "data": {
 61 |       "text/plain": [
 62 |        "array([['V2', 'V1', 'V0', 'V2', 'V0'],\n",
 63 |        "       ['V3', 'V0', 'V2', 'V3', 'V3']], \n",
 64 |        "      dtype='|S2')"
 65 |       ]
 66 |      },
 67 |      "execution_count": 69,
 68 |      "metadata": {},
 69 |      "output_type": "execute_result"
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "np.ones([10])[[2,3,4], np.newaxis].repeat(5, axis=1)\n",
 74 |     "\n",
 75 |     "_a = np.random.randint(0,10,size=(4,5)) * 1.0\n",
 76 |     "print _a\n",
 77 |     "_b = np.array([1,2,1,2,3])\n",
 78 |     "print _b\n",
 79 |     "_c = np.array([\"V%s\" % k for k in xrange(10)])\n",
 80 |     "print _c\n",
 81 |     "print (_a * _b)\n",
 82 |     "_c[np.argsort(_a, axis=0)[::-1, :][:2, :]]"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 70,
 88 |    "metadata": {
 89 |     "collapsed": true,
 90 |     "slideshow": {
 91 |      "slide_type": "fragment"
 92 |     }
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "class AuthorTopicModel(object):\n",
 97 |     "    \"\"\"Implementation of an author topic model.\n",
 98 |     "    Generates each document based on a topic and author pair,\n",
 99 |     "    This is used to generate a word in the document.\n",
100 |     "    \"\"\"\n",
101 |     "    \n",
102 |     "    def __init__(self, K, doc_word_matrix, doc_author_matrix, vocab, authornames, alpha=0.1, beta=0.5):\n",
103 |     "        \"\"\"Constructor for the function\n",
104 |     "        K: number of topics\n",
105 |     "        doc_word_matrix: list of documents each represented as list of word ids\n",
106 |     "        doc_author_matrix: list of documents each represented as list of author ids\n",
107 |     "        vocab: dictionary of word ids mapped to word strings\n",
108 |     "        authornames: dictionary of author ids mapped to author names\n",
109 |     "        alpha: Author topic diritchelet parameter\n",
110 |     "        beta: Word topic diritchelet parameter        \n",
111 |     "        \"\"\"\n",
112 |     "        self.K = K\n",
113 |     "        self.doc_word_matrix = doc_word_matrix\n",
114 |     "        self.doc_author_matrix = doc_author_matrix\n",
115 |     "        self.N = len(doc_word_matrix)\n",
116 |     "        self.vocab = vocab\n",
117 |     "        self.W = len(vocab)\n",
118 |     "        self.authornames= authornames\n",
119 |     "        self.A = len(authornames)\n",
120 |     "        self.alpha = alpha\n",
121 |     "        self.beta = beta\n",
122 |     "        \n",
123 |     "        self.W_T = np.zeros([self.W, self.K])\n",
124 |     "        self.A_T = np.zeros([self.A, self.K])\n",
125 |     "        \n",
126 |     "        self.T_marginal = np.zeros(self.K)\n",
127 |     "        self.A_marginal = np.zeros(self.A)\n",
128 |     "        \n",
129 |     "        self.T_assigned = []\n",
130 |     "        self.A_assigned = []\n",
131 |     "        self._populate_vars()\n",
132 |     "        \n",
133 |     "    def _populate_vars(self):\n",
134 |     "        \"\"\"Populate the variables with the initial data\n",
135 |     "        \"\"\"\n",
136 |     "        for di, doc in enumerate(self.doc_word_matrix):\n",
137 |     "            auth = self.doc_author_matrix[di]\n",
138 |     "            self.T_assigned.append([])\n",
139 |     "            self.A_assigned.append([])\n",
140 |     "            for wi, w in enumerate(doc):\n",
141 |     "                # Randomly assign a topic to the word\n",
142 |     "                z = np.random.choice(self.K)\n",
143 |     "                # Randomly assign a topic to a random author\n",
144 |     "                a = np.random.choice(self.A)\n",
145 |     "                # Update all the word, topic and author topic counts\n",
146 |     "                self.W_T[w,z] += 1\n",
147 |     "                self.A_T[a,z] += 1\n",
148 |     "                # Update marginals\n",
149 |     "                self.T_marginal[z] += 1\n",
150 |     "                self.A_marginal[a] += 1\n",
151 |     "                # Record the sampled topic and author assignments\n",
152 |     "                self.T_assigned[-1].append(z)\n",
153 |     "                self.A_assigned[-1].append(a)\n",
154 |     "    \n",
155 |     "    def gibbs_sampling(self):\n",
156 |     "        \"\"\"Perform single gibbs sampling step\n",
157 |     "        \"\"\"\n",
158 |     "        for di, doc in enumerate(self.doc_word_matrix):\n",
159 |     "            auth = self.doc_author_matrix[di]\n",
160 |     "            for wi, w in enumerate(doc):\n",
161 |     "                # Extract the previous assignment\n",
162 |     "                z = self.T_assigned[di][wi]\n",
163 |     "                a = self.A_assigned[di][wi]\n",
164 |     "                # Substract the previous assignments\n",
165 |     "                # Update all the word, topic and author topic counts\n",
166 |     "                self.W_T[w,z] -= 1\n",
167 |     "                self.A_T[a,z] -= 1\n",
168 |     "                # Update marginals\n",
169 |     "                self.T_marginal[z] -= 1\n",
170 |     "                self.A_marginal[a] -= 1\n",
171 |     "                \n",
172 |     "                # Find probability of the word w belonging to each topic\n",
173 |     "                phi = (self.W_T[w,:] + self.beta) / (self.T_marginal + self.W*self.beta)\n",
174 |     "                # Find probability of each author in auth belonging to each topic\n",
175 |     "                theta = (self.A_T[auth,:] + self.alpha) / (self.A_marginal[auth, np.newaxis] + self.W*self.alpha)\n",
176 |     "                # Joint probability of word and author for all topics\n",
177 |     "                pdf = theta*phi\n",
178 |     "                pdf = pdf / pdf.sum()\n",
179 |     "                # Index of authors and topics\n",
180 |     "                auth_t_pairs = [(i,j) for i in auth for j in xrange(self.K)]\n",
181 |     "                # Sample an author and topic pair for the word\n",
182 |     "                #print auth_t_pairs, p.flatten()\n",
183 |     "                idx = np.random.choice(range(len(auth_t_pairs)), p=pdf.flatten())\n",
184 |     "                a, z = auth_t_pairs[idx]\n",
185 |     "                # Update all the word, topic and author topic counts\n",
186 |     "                self.W_T[w,z] += 1\n",
187 |     "                self.A_T[a,z] += 1\n",
188 |     "                # Update marginals\n",
189 |     "                self.T_marginal[z] += 1\n",
190 |     "                self.A_marginal[a] += 1\n",
191 |     "                # Record the sampled topic and author assignments\n",
192 |     "                self.T_assigned[di][wi] = z\n",
193 |     "                self.A_assigned[di][wi] = a\n",
194 |     "    \n",
195 |     "    def perform_iterations(self, burnin=100, max_iters=10, print_every=5):\n",
196 |     "        \"\"\"Perform max_iters of gibbs sampling steps\n",
197 |     "        \"\"\"\n",
198 |     "        print \"Performing %s gibbs sampling iterations burn in phase\" % burnin\n",
199 |     "        for i in xrange(burnin):\n",
200 |     "            self.gibbs_sampling()\n",
201 |     "        print \"Burn in complete\"\n",
202 |     "        print \"Topic proportions: %s\" % (self.T_marginal * 1. / self.T_marginal.sum())\n",
203 |     "        print \"Author proportions: %s\" % (self.A_marginal * 1. / self.A_marginal.sum())\n",
204 |     "        print \"W_T[w,z]:\\n%s\" % (self.W_T * 1./ self.W_T.sum())\n",
205 |     "        print \"A_T[a,z]:\\n%s\" % (self.A_T * 1./ self.A_T.sum())\n",
206 |     "        print \"Performing %s gibbs sampling iterations\" % max_iters\n",
207 |     "        for i in xrange(max_iters):\n",
208 |     "            if i%print_every == 0:\n",
209 |     "                print \"Iter %s:\" % i\n",
210 |     "                self.gibbs_sampling()\n",
211 |     "                print \"Topic proportions: %s\" % (self.T_marginal * 1. / self.T_marginal.sum())\n",
212 |     "                print \"Author proportions: %s\" % (self.A_marginal * 1. / self.A_marginal.sum())\n",
213 |     "                print \"W_T[w,z]:\\n%s\" % (self.W_T * 1./ self.W_T.sum())\n",
214 |     "                print \"A_T[a,z]:\\n%s\" % (self.A_T * 1./ self.A_T.sum())\n",
215 |     "        print \"Done\"\n",
216 |     "    \n",
217 |     "    def show_topics(self, topn_w=3, topn_a=3):\n",
218 |     "        print \"Top %s words per topic\" % topn_w\n",
219 |     "        print self.vocab[np.argsort(self.W_T, axis=0)[::-1, :][:topn_w, :]]\n",
220 |     "        print \"Top %s authors per topic\" % topn_a\n",
221 |     "        print self.authornames[np.argsort(self.A_T, axis=0)[::-1, :][:topn_a, :]]\n",
222 |     "        \n",
223 |     "    "
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": 71,
229 |    "metadata": {
230 |     "collapsed": false,
231 |     "slideshow": {
232 |      "slide_type": "slide"
233 |     }
234 |    },
235 |    "outputs": [
236 |     {
237 |      "data": {
238 |       "text/plain": [
239 |        "(3,\n",
240 |        " [[0, 0, 0, 1, 2, 1],\n",
241 |        "  [0, 0, 1, 1, 1, 1, 1],\n",
242 |        "  [2, 2, 2, 3, 3, 3],\n",
243 |        "  [0, 2, 2, 2, 3, 3, 1],\n",
244 |        "  [4, 4, 4, 0, 5, 5, 2],\n",
245 |        "  [4, 5, 5, 3, 0, 5, 5, 1]],\n",
246 |        " [[0, 1], [1, 2], [0, 1, 2], [2, 3], [4, 5, 3], [4, 5]],\n",
247 |        " array(['V0', 'V1', 'V2', 'V3', 'V4', 'V5'], \n",
248 |        "       dtype='|S2'),\n",
249 |        " array(['A0', 'A1', 'A2', 'A3', 'A4', 'A5'], \n",
250 |        "       dtype='|S2'))"
251 |       ]
252 |      },
253 |      "execution_count": 71,
254 |      "metadata": {},
255 |      "output_type": "execute_result"
256 |     }
257 |    ],
258 |    "source": [
259 |     "K = 3\n",
260 |     "doc_word_matrix = [[0,0,0,1,2,1],\n",
261 |     "                  [0,0,1,1,1,1,1],\n",
262 |     "                  [2,2,2,3,3,3],\n",
263 |     "                  [0,2,2,2,3,3,1],\n",
264 |     "                  [4,4,4,0,5,5,2],\n",
265 |     "                  [4,5,5,3,0,5,5,1]]\n",
266 |     "doc_author_matrix = [[0,1],\n",
267 |     "                     [1,2],\n",
268 |     "                     [0,1,2],\n",
269 |     "                     [2,3],\n",
270 |     "                     [4,5,3],\n",
271 |     "                     [4,5]]\n",
272 |     "vocab = np.array([\"V%s\" % k for k in xrange(6)])\n",
273 |     "authornames = np.array([\"A%s\" % k for k in xrange(6)])\n",
274 |     "\n",
275 |     "K, doc_word_matrix, doc_author_matrix, vocab, authornames"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 72,
281 |    "metadata": {
282 |     "collapsed": false,
283 |     "slideshow": {
284 |      "slide_type": "slide"
285 |     }
286 |    },
287 |    "outputs": [],
288 |    "source": [
289 |     "atm = AuthorTopicModel(K, doc_word_matrix, doc_author_matrix, vocab, authornames)"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 73,
295 |    "metadata": {
296 |     "collapsed": false,
297 |     "scrolled": false,
298 |     "slideshow": {
299 |      "slide_type": "subslide"
300 |     }
301 |    },
302 |    "outputs": [
303 |     {
304 |      "name": "stdout",
305 |      "output_type": "stream",
306 |      "text": [
307 |       "Performing 100 gibbs sampling iterations burn in phase\n",
308 |       "Burn in complete\n",
309 |       "Topic proportions: [ 0.24390244  0.51219512  0.24390244]\n",
310 |       "Author proportions: [ 0.19512195  0.14634146  0.14634146  0.14634146  0.2195122   0.14634146]\n",
311 |       "W_T[w,z]:\n",
312 |       "[[ 0.02439024  0.14634146  0.02439024]\n",
313 |       " [ 0.          0.12195122  0.09756098]\n",
314 |       " [ 0.04878049  0.04878049  0.09756098]\n",
315 |       " [ 0.          0.12195122  0.02439024]\n",
316 |       " [ 0.09756098  0.          0.        ]\n",
317 |       " [ 0.07317073  0.07317073  0.        ]]\n",
318 |       "A_T[a,z]:\n",
319 |       "[[ 0.          0.17073171  0.02439024]\n",
320 |       " [ 0.02439024  0.12195122  0.        ]\n",
321 |       " [ 0.          0.07317073  0.07317073]\n",
322 |       " [ 0.          0.          0.14634146]\n",
323 |       " [ 0.2195122   0.          0.        ]\n",
324 |       " [ 0.          0.14634146  0.        ]]\n",
325 |       "Performing 10 gibbs sampling iterations\n",
326 |       "Iter 0:\n",
327 |       "Topic proportions: [ 0.24390244  0.48780488  0.26829268]\n",
328 |       "Author proportions: [ 0.12195122  0.19512195  0.24390244  0.12195122  0.24390244  0.07317073]\n",
329 |       "W_T[w,z]:\n",
330 |       "[[ 0.          0.12195122  0.07317073]\n",
331 |       " [ 0.          0.14634146  0.07317073]\n",
332 |       " [ 0.          0.12195122  0.07317073]\n",
333 |       " [ 0.          0.09756098  0.04878049]\n",
334 |       " [ 0.09756098  0.          0.        ]\n",
335 |       " [ 0.14634146  0.          0.        ]]\n",
336 |       "A_T[a,z]:\n",
337 |       "[[ 0.          0.09756098  0.02439024]\n",
338 |       " [ 0.          0.19512195  0.        ]\n",
339 |       " [ 0.          0.12195122  0.12195122]\n",
340 |       " [ 0.          0.          0.12195122]\n",
341 |       " [ 0.24390244  0.          0.        ]\n",
342 |       " [ 0.          0.07317073  0.        ]]\n",
343 |       "Iter 5:\n",
344 |       "Topic proportions: [ 0.17073171  0.58536585  0.24390244]\n",
345 |       "Author proportions: [ 0.09756098  0.17073171  0.29268293  0.12195122  0.17073171  0.14634146]\n",
346 |       "W_T[w,z]:\n",
347 |       "[[ 0.          0.09756098  0.09756098]\n",
348 |       " [ 0.          0.2195122   0.        ]\n",
349 |       " [ 0.          0.17073171  0.02439024]\n",
350 |       " [ 0.          0.04878049  0.09756098]\n",
351 |       " [ 0.04878049  0.04878049  0.        ]\n",
352 |       " [ 0.12195122  0.          0.02439024]]\n",
353 |       "A_T[a,z]:\n",
354 |       "[[ 0.          0.09756098  0.        ]\n",
355 |       " [ 0.          0.17073171  0.        ]\n",
356 |       " [ 0.          0.2195122   0.07317073]\n",
357 |       " [ 0.          0.          0.12195122]\n",
358 |       " [ 0.17073171  0.          0.        ]\n",
359 |       " [ 0.          0.09756098  0.04878049]]\n",
360 |       "Done\n"
361 |      ]
362 |     }
363 |    ],
364 |    "source": [
365 |     "atm.perform_iterations(max_iters=10)"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 74,
371 |    "metadata": {
372 |     "collapsed": false,
373 |     "slideshow": {
374 |      "slide_type": "slide"
375 |     }
376 |    },
377 |    "outputs": [
378 |     {
379 |      "name": "stdout",
380 |      "output_type": "stream",
381 |      "text": [
382 |       "Top 3 words per topic\n",
383 |       "[['V5' 'V1' 'V3']\n",
384 |       " ['V4' 'V2' 'V0']\n",
385 |       " ['V3' 'V0' 'V5']]\n",
386 |       "Top 3 authors per topic\n",
387 |       "[['A4' 'A2' 'A3']\n",
388 |       " ['A5' 'A1' 'A2']\n",
389 |       " ['A3' 'A5' 'A5']]\n"
390 |      ]
391 |     }
392 |    ],
393 |    "source": [
394 |     "atm.show_topics()"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {
401 |     "collapsed": true,
402 |     "slideshow": {
403 |      "slide_type": "slide"
404 |     }
405 |    },
406 |    "outputs": [],
407 |    "source": []
408 |   }
409 |  ],
410 |  "metadata": {
411 |   "celltoolbar": "Slideshow",
412 |   "kernelspec": {
413 |    "display_name": "Python 2",
414 |    "language": "python",
415 |    "name": "python2"
416 |   },
417 |   "language_info": {
418 |    "codemirror_mode": {
419 |     "name": "ipython",
420 |     "version": 2
421 |    },
422 |    "file_extension": ".py",
423 |    "mimetype": "text/x-python",
424 |    "name": "python",
425 |    "nbconvert_exporter": "python",
426 |    "pygments_lexer": "ipython2",
427 |    "version": "2.7.11"
428 |   }
429 |  },
430 |  "nbformat": 4,
431 |  "nbformat_minor": 0
432 | }
433 | 


--------------------------------------------------------------------------------
/Best Worst Scaling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Code inspired from: \n",
  8 |     "\n",
  9 |     "* https://github.com/valeriobasile/bwstuples/blob/master/bws.py\n",
 10 |     "* http://valeriobasile.github.io/Best-worst-scaling-and-the-clock-of-Gauss/"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from math import gcd"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]"
 31 |       ]
 32 |      },
 33 |      "execution_count": 2,
 34 |      "metadata": {},
 35 |      "output_type": "execute_result"
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "instances = list(range(10))\n",
 40 |     "instances"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "k = 4\n",
 50 |     "p = 3"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 4,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "def bws_generation(instances, k, p):\n",
 60 |     "    n = len(instances)\n",
 61 |     "    \n",
 62 |     "    while gcd(n, k) != 1:\n",
 63 |     "        print(n, n-1, gcd(n, k))\n",
 64 |     "        n = max(n-1, 0)\n",
 65 |     "        \n",
 66 |     "    for j in range(p):\n",
 67 |     "        for x in range(n//k):\n",
 68 |     "            prefix = x*(k**(j+1))\n",
 69 |     "            t = [\n",
 70 |     "                (prefix + (i*(k**j))) % n \n",
 71 |     "                for i in range(k)\n",
 72 |     "            ]\n",
 73 |     "            yield t"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 5,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "10 9 2\n",
 86 |       "[0, 1, 2, 3]\n",
 87 |       "[4, 5, 6, 7]\n",
 88 |       "[0, 4, 8, 3]\n",
 89 |       "[7, 2, 6, 1]\n",
 90 |       "[0, 7, 5, 3]\n",
 91 |       "[1, 8, 6, 4]\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "for t in bws_generation(instances, k, p):\n",
 97 |     "    print(t)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "We can use the tuples generated to ask the annotators to pick the most positive and most negative instance. "
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": []
113 |   }
114 |  ],
115 |  "metadata": {
116 |   "kernelspec": {
117 |    "display_name": "Python 3",
118 |    "language": "python",
119 |    "name": "python3"
120 |   },
121 |   "language_info": {
122 |    "codemirror_mode": {
123 |     "name": "ipython",
124 |     "version": 3
125 |    },
126 |    "file_extension": ".py",
127 |    "mimetype": "text/x-python",
128 |    "name": "python",
129 |    "nbconvert_exporter": "python",
130 |    "pygments_lexer": "ipython3",
131 |    "version": "3.7.3"
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 4
136 | }
137 | 


--------------------------------------------------------------------------------
/Binary Search.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 16,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "def binary_search(arr, low, high, x):\n",
 12 |     "    if low > high:\n",
 13 |     "        return -1\n",
 14 |     "    mid = (low+high)/2\n",
 15 |     "    print \"Searching in range: %s, %s\" % (low, high)\n",
 16 |     "    print \"arr[%s]=%s\" % (mid, arr[mid])\n",
 17 |     "    if arr[mid] == x:\n",
 18 |     "        return mid\n",
 19 |     "    if arr[mid] < x:\n",
 20 |     "        return binary_search(arr, mid+1, high, x)\n",
 21 |     "    return binary_search(arr, low, mid -1, x)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 17,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [
 31 |     {
 32 |      "name": "stdout",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "Searching in range: 0, 9\n",
 36 |       "arr[4]=4\n",
 37 |       "Searching in range: 5, 9\n",
 38 |       "arr[7]=7\n",
 39 |       "Searching in range: 5, 6\n",
 40 |       "arr[5]=5\n"
 41 |      ]
 42 |     },
 43 |     {
 44 |      "data": {
 45 |       "text/plain": [
 46 |        "5"
 47 |       ]
 48 |      },
 49 |      "execution_count": 17,
 50 |      "metadata": {},
 51 |      "output_type": "execute_result"
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "arr = range(10)\n",
 56 |     "binary_search(arr, 0, len(arr)-1, 5)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 18,
 62 |    "metadata": {
 63 |     "collapsed": false
 64 |    },
 65 |    "outputs": [
 66 |     {
 67 |      "name": "stdout",
 68 |      "output_type": "stream",
 69 |      "text": [
 70 |       "Searching in range: 0, 1\n",
 71 |       "arr[0]=0\n",
 72 |       "Searching in range: 1, 1\n",
 73 |       "arr[1]=5\n"
 74 |      ]
 75 |     },
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "1"
 80 |       ]
 81 |      },
 82 |      "execution_count": 18,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "arr = [0,5]\n",
 89 |     "binary_search(arr, 0, len(arr)-1, 5)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "def reverse(arr):\n",
101 |     "    for i in range()\n",
102 |     "def rotate(arr, i):\n",
103 |     "    "
104 |    ]
105 |   }
106 |  ],
107 |  "metadata": {
108 |   "kernelspec": {
109 |    "display_name": "Python 2",
110 |    "language": "python",
111 |    "name": "python2"
112 |   },
113 |   "language_info": {
114 |    "codemirror_mode": {
115 |     "name": "ipython",
116 |     "version": 2
117 |    },
118 |    "file_extension": ".py",
119 |    "mimetype": "text/x-python",
120 |    "name": "python",
121 |    "nbconvert_exporter": "python",
122 |    "pygments_lexer": "ipython2",
123 |    "version": "2.7.9"
124 |   }
125 |  },
126 |  "nbformat": 4,
127 |  "nbformat_minor": 0
128 | }
129 | 


--------------------------------------------------------------------------------
/Card24.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Find mathematical expression which is equals a given number using a set of numbers\n",
  8 |     "===========================================\n",
  9 |     "# Problem Statement\n",
 10 |     "Goal of this exercise is to write code which accepts a set of numbers and then tries to devise an arithmetic expression that yields a requested value, using four basic arithmetic operations: addition, subtraction, multiplication and division. Each input number must be used exactly once in the expression. Division is applicable only to numbers that are divisible without remainder. All input numbers and the target number are integers greater than zero. There are no more than 5 input numbers and target number is not larger than 1000.\n",
 11 |     "Example 1: Suppose that numbers 4, 8 and 9 are given and value 18 should be constructed. One solution is: 9 * 8 / 4.\n",
 12 |     "Example 2: If numbers 6, 7 and 9 are given, number 3 requested, then solution is: 6 / (9 - 7).\n",
 13 |     "\n",
 14 |     "# References\n",
 15 |     "Python implementation of the solution posted at http://www.codinghelmet.com/?path=exercises/expression-from-numbers"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 18,
 21 |    "metadata": {
 22 |     "collapsed": false
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "from Queue import Queue"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 14,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "def SolveAndPrint(numbers, targetValue):\n",
 38 |     "    targetKey = (targetValue << len(numbers)) + (1 << len(numbers)) - 1\n",
 39 |     "    # (value << numbers.Length) represents expression value\n",
 40 |     "    # (1 << numbers.Length) - 1 represents mask with all bits set to 1,\n",
 41 |     "    # i.e. mask in which each input number has been used exactly once\n",
 42 |     "    # to build the expression.\n",
 43 |     "    \n",
 44 |     "    solvedKeys = set()\n",
 45 |     "    # Each number in the collection indicates that corresponding value + mask\n",
 46 |     "    # has been reached using arithmetical operations.\n",
 47 |     "    \n",
 48 |     "    keyToLeftParent  = dict()\n",
 49 |     "    # For each solved key (value + mask), there is an entry indicating\n",
 50 |     "    # result of the expression on the left side of the arithmetic\n",
 51 |     "    # operator. Missing value indicates that key represents the\n",
 52 |     "    # raw number (taken from the input list), rather than\n",
 53 |     "    # the result of a calculation.\n",
 54 |     "    \n",
 55 |     "    keyToRightParent = dict()\n",
 56 |     "    # Same as keyToLeftParent, only indicating the right parent\n",
 57 |     "    # used to build the expression.\n",
 58 |     "    \n",
 59 |     "    keyToOperator = dict()\n",
 60 |     "    # Indicates arithmetic operator used to build this node\n",
 61 |     "    # from left and right parent nodes. Missing value for a given key\n",
 62 |     "    # indicates that key is a raw value taken from input array,\n",
 63 |     "    # rather than result of an arithmetic operation.\n",
 64 |     "    \n",
 65 |     "    queue = Queue()\n",
 66 |     "    # Keys (value + mask pairs) that have not been processed yet\n",
 67 |     "\n",
 68 |     "    # First step is to initialize the structures:\n",
 69 |     "    # Add all input values into corresponding array entries and\n",
 70 |     "    # add them to the queue so that the operation can begin\n",
 71 |     "    for i in range(len(numbers)):\n",
 72 |     "        key = (numbers[i] << len(numbers)) + (1 << i)\n",
 73 |     "        solvedKeys.add(key)\n",
 74 |     "        queue.put(key)\n",
 75 |     "    \n",
 76 |     "    # Now expand entries one at the time until queue is empty,\n",
 77 |     "    # i.e. until there are no new entries populated.\n",
 78 |     "    # Additional stopping condition is that target key has been generated,\n",
 79 |     "    # which indicates that problem has been solved and there is no need to\n",
 80 |     "    # expand nodes any further.\n",
 81 |     "    while (not queue.empty() > 0 and (targetKey not in solvedKeys)):\n",
 82 |     "        curKey = queue.get()\n",
 83 |     "\n",
 84 |     "        curMask = curKey & ((1 << len(numbers)) - 1)\n",
 85 |     "        curValue = curKey >> len(numbers)\n",
 86 |     "        \n",
 87 |     "        # Now first take a snapshot of all keys that\n",
 88 |     "        # have been reached because this collection is going to\n",
 89 |     "        # change during the following operation\n",
 90 |     "        keys = solvedKeys.copy()\n",
 91 |     "\n",
 92 |     "        for keys_i in keys:\n",
 93 |     "            mask = keys_i & ((1 << len(numbers)) - 1)\n",
 94 |     "            value = keys_i >> len(numbers)\n",
 95 |     "\n",
 96 |     "            if ((mask & curMask) == 0):\n",
 97 |     "                # Masks are disjoint, i.e. two entries do not use\n",
 98 |     "                # the same input number twice.\n",
 99 |     "                # This is sufficient condition to combine the two entries\n",
100 |     "                for op in range(6):\n",
101 |     "                    opSign = '\\0'\n",
102 |     "                    newValue = 0\n",
103 |     "                    if op == 0: # Addition\n",
104 |     "                        newValue = curValue + value\n",
105 |     "                        opSign = '+'\n",
106 |     "                    elif op == 1: # Subtraction - another value subtracted from current\n",
107 |     "                        newValue = curValue - value\n",
108 |     "                        opSign = '-'\n",
109 |     "                    elif op == 2: # Subtraction - current value subtracted from another\n",
110 |     "                        newValue = value - curValue\n",
111 |     "                        opSign = '-'\n",
112 |     "                    elif op == 3: # Multiplication\n",
113 |     "                        newValue = curValue * value\n",
114 |     "                        opSign = '*'\n",
115 |     "                    elif op == 4: # Division - current divided by another\n",
116 |     "                        newValue = -1  # Indicates failure to divide\n",
117 |     "                        if (value != 0 and curValue % value == 0):\n",
118 |     "                            newValue = curValue / value\n",
119 |     "                        opSign = '/'\n",
120 |     "                    elif op == 5: # Division - other value divided by current\n",
121 |     "                        newValue = -1  # Indicates failure to divide\n",
122 |     "                        if (curValue != 0 and value % curValue == 0):\n",
123 |     "                            newValue = value / curValue\n",
124 |     "                        opSign = '/'\n",
125 |     "\n",
126 |     "                    if (newValue >= 0):\n",
127 |     "                        # Ignore negative values - they can always be created\n",
128 |     "                        # the other way around, by subtracting them\n",
129 |     "                        # from a larger value so that positive value is reached.\n",
130 |     "                        newMask = (curMask | mask)\n",
131 |     "                        # Combine the masks to indicate that all input numbers\n",
132 |     "                        # from both operands have been used to produce\n",
133 |     "                        # the resulting expression\n",
134 |     "                        \n",
135 |     "                        newKey = (newValue << len(numbers)) + newMask\n",
136 |     "                        if (newKey not in solvedKeys):\n",
137 |     "                            # We have reached a new entry.\n",
138 |     "                            # This expression should now be added\n",
139 |     "                            # to data structures and processed further\n",
140 |     "                            # in the following steps.\n",
141 |     "\n",
142 |     "                            # Populate entries that describe newly created expression\n",
143 |     "                            solvedKeys.add(newKey);\n",
144 |     "                            if (op == 2 or op == 5):\n",
145 |     "                                # Special cases - antireflexive operations\n",
146 |     "                                # with interchanged operands\n",
147 |     "                                keyToLeftParent[newKey]= keys_i\n",
148 |     "                                keyToRightParent[newKey] =  curKey\n",
149 |     "                            else:\n",
150 |     "                                keyToLeftParent[newKey] = curKey\n",
151 |     "                                keyToRightParent[newKey]= keys_i\n",
152 |     "                            keyToOperator[newKey] = opSign\n",
153 |     "                            # Add expression to list of reachable expressions\n",
154 |     "                            solvedKeys.add(newKey)\n",
155 |     "                            # Add expression to the queue for further expansion\n",
156 |     "                            queue.put(newKey)\n",
157 |     "    # Now print the solution if it has been found\n",
158 |     "    if (targetKey not in solvedKeys):\n",
159 |     "        print \"Solution has not been found.\"\n",
160 |     "    else:\n",
161 |     "        PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, targetKey, len(numbers))\n",
162 |     "        print \"={0}\".format(targetValue)\n"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 15,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "def PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, key, numbersCount):\n",
174 |     "    if (key not in keyToOperator):\n",
175 |     "        print \"{0}\".format(key >> numbersCount),\n",
176 |     "    else:\n",
177 |     "        print \"(\",\n",
178 |     "        # Recursively print the left operand\n",
179 |     "        PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, keyToLeftParent[key], numbersCount)\n",
180 |     "        # Then print the operation sign\n",
181 |     "        print keyToOperator[key],\n",
182 |     "        # Finally, print the right operand\n",
183 |     "        PrintExpression(keyToLeftParent, keyToRightParent, keyToOperator, keyToRightParent[key], numbersCount)\n",
184 |     "        print \")\","
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 23,
190 |    "metadata": {
191 |     "collapsed": false
192 |    },
193 |    "outputs": [
194 |     {
195 |      "name": "stdout",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "Solution has not been found.\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "SolveAndPrint([2,2,22], 24)"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {
210 |     "collapsed": true
211 |    },
212 |    "outputs": [],
213 |    "source": []
214 |   }
215 |  ],
216 |  "metadata": {
217 |   "kernelspec": {
218 |    "display_name": "Python 2",
219 |    "language": "python",
220 |    "name": "python2"
221 |   },
222 |   "language_info": {
223 |    "codemirror_mode": {
224 |     "name": "ipython",
225 |     "version": 2
226 |    },
227 |    "file_extension": ".py",
228 |    "mimetype": "text/x-python",
229 |    "name": "python",
230 |    "nbconvert_exporter": "python",
231 |    "pygments_lexer": "ipython2",
232 |    "version": "2.7.9"
233 |   }
234 |  },
235 |  "nbformat": 4,
236 |  "nbformat_minor": 0
237 | }
238 | 


--------------------------------------------------------------------------------
/Dictionary_lookup_with_default.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dictionary handle missing"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "text/plain": [
 18 |        "{'0': 0,\n",
 19 |        " '1': 1,\n",
 20 |        " '2': 2,\n",
 21 |        " '3': 3,\n",
 22 |        " '4': 4,\n",
 23 |        " '5': 5,\n",
 24 |        " '6': 6,\n",
 25 |        " '7': 7,\n",
 26 |        " '8': 8,\n",
 27 |        " '9': 9}"
 28 |       ]
 29 |      },
 30 |      "execution_count": 1,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "a = {f\"{i}\": i for i in range(10)}\n",
 37 |     "a"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {},
 44 |    "outputs": [
 45 |     {
 46 |      "data": {
 47 |       "text/plain": [
 48 |        "11"
 49 |       ]
 50 |      },
 51 |      "execution_count": 2,
 52 |      "metadata": {},
 53 |      "output_type": "execute_result"
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "def handle_missing():\n",
 58 |     "    return 11\n",
 59 |     "handle_missing()"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stdout",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "68.8 ns ± 1.94 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "%%timeit\n",
 77 |     "handle_missing()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 4,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "119 ns ± 3.45 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n"
 90 |      ]
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "%%timeit \n",
 95 |     "a.get(\"11\") or 11"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 5,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "192 ns ± 6.18 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "%%timeit \n",
113 |     "a.get(\"11\") or handle_missing()"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 6,
119 |    "metadata": {},
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "123 ns ± 5.15 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "%%timeit \n",
131 |     "a.get(\"11\", 11)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 7,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "185 ns ± 10.8 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "%%timeit \n",
149 |     "a.get(\"11\", handle_missing())"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 8,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "360 ns ± 33 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "%%timeit \n",
167 |     "try:\n",
168 |     "    a[\"11\"]\n",
169 |     "except KeyError:\n",
170 |     "    handle_missing()"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 9,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "name": "stdout",
180 |      "output_type": "stream",
181 |      "text": [
182 |       "119 ns ± 9.23 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "%%timeit \n",
188 |     "if \"11\" in a:\n",
189 |     "    a[\"11\"]\n",
190 |     "else:\n",
191 |     "    handle_missing()"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 10,
197 |    "metadata": {},
198 |    "outputs": [
199 |     {
200 |      "data": {
201 |       "text/plain": [
202 |        "11"
203 |       ]
204 |      },
205 |      "execution_count": 10,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "def dynamic_handle_missing(key):\n",
212 |     "    return int(key)\n",
213 |     "key = \"11\"\n",
214 |     "dynamic_handle_missing(key)"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 11,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "name": "stdout",
224 |      "output_type": "stream",
225 |      "text": [
226 |       "278 ns ± 6.97 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
227 |      ]
228 |     }
229 |    ],
230 |    "source": [
231 |     "%%timeit\n",
232 |     "dynamic_handle_missing(key)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": 12,
238 |    "metadata": {},
239 |    "outputs": [
240 |     {
241 |      "name": "stdout",
242 |      "output_type": "stream",
243 |      "text": [
244 |       "414 ns ± 7.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
245 |      ]
246 |     }
247 |    ],
248 |    "source": [
249 |     "%%timeit \n",
250 |     "a.get(key) or dynamic_handle_missing(key)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 13,
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "name": "stdout",
260 |      "output_type": "stream",
261 |      "text": [
262 |       "416 ns ± 9.55 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "%%timeit \n",
268 |     "a.get(key, dynamic_handle_missing(key))"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": 14,
274 |    "metadata": {},
275 |    "outputs": [
276 |     {
277 |      "name": "stdout",
278 |      "output_type": "stream",
279 |      "text": [
280 |       "556 ns ± 15.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
281 |      ]
282 |     }
283 |    ],
284 |    "source": [
285 |     "%%timeit \n",
286 |     "try:\n",
287 |     "    a[key]\n",
288 |     "except KeyError:\n",
289 |     "    dynamic_handle_missing(key)"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": 15,
295 |    "metadata": {},
296 |    "outputs": [
297 |     {
298 |      "name": "stdout",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "337 ns ± 37.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
302 |      ]
303 |     }
304 |    ],
305 |    "source": [
306 |     "%%timeit \n",
307 |     "if key in a:\n",
308 |     "    a[key]\n",
309 |     "else:\n",
310 |     "    dynamic_handle_missing(key)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "code",
315 |    "execution_count": null,
316 |    "metadata": {},
317 |    "outputs": [],
318 |    "source": []
319 |   }
320 |  ],
321 |  "metadata": {
322 |   "kernelspec": {
323 |    "display_name": "Python 3",
324 |    "language": "python",
325 |    "name": "python3"
326 |   },
327 |   "language_info": {
328 |    "codemirror_mode": {
329 |     "name": "ipython",
330 |     "version": 3
331 |    },
332 |    "file_extension": ".py",
333 |    "mimetype": "text/x-python",
334 |    "name": "python",
335 |    "nbconvert_exporter": "python",
336 |    "pygments_lexer": "ipython3",
337 |    "version": "3.6.7"
338 |   }
339 |  },
340 |  "nbformat": 4,
341 |  "nbformat_minor": 2
342 | }
343 | 


--------------------------------------------------------------------------------
/Dynamic Programming.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dynamic Programming\n",
  8 |     "\n",
  9 |     "Source: https://www.topcoder.com/community/data-science/data-science-tutorials/dynamic-programming-from-novice-to-advanced/"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Sum of coins\n",
 17 |     "Given coins of value $V_1, V_2, ... V_n$ find min coins required to create a sum $S$"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 44,
 23 |    "metadata": {
 24 |     "collapsed": true
 25 |    },
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "def wrapper(S, coins):\n",
 29 |     "    states = [(10000, set()) for k in range(S+1)]\n",
 30 |     "    states = [(10000, []) for k in range(S+1)]\n",
 31 |     "    return n_coins(S, coins, states)\n",
 32 |     "\n",
 33 |     "def n_coins(S, coins, states):\n",
 34 |     "    if S < 1:\n",
 35 |     "        return (10000, [])\n",
 36 |     "    if S in coins:\n",
 37 |     "        return (1, [S])\n",
 38 |     "    if S < min(coins):\n",
 39 |     "        return (10000, [])\n",
 40 |     "    if states[S][0] < 10000:\n",
 41 |     "        return states[S]\n",
 42 |     "    for c in coins:\n",
 43 |     "        print S, states[S]\n",
 44 |     "        if c > S:\n",
 45 |     "            continue\n",
 46 |     "        new_s = S - c\n",
 47 |     "        new_state = n_coins(new_s, coins, states)\n",
 48 |     "        new_state = (new_state[0]+1, new_state[1] + [c])\n",
 49 |     "        if new_state[0] < states[S][0]:\n",
 50 |     "            states[S] = new_state\n",
 51 |     "    return states[S]\n",
 52 |     "\n",
 53 |     "def n_coins_iter(S, coins):\n",
 54 |     "    states = [(10000, []) for k in range(S+1)]\n",
 55 |     "    states[0] = (0, [])\n",
 56 |     "    for s in range(1,S+1):\n",
 57 |     "        for c in coins:\n",
 58 |     "            if c <= s and states[s-c][0] < states[s][0]:\n",
 59 |     "                states[s] = (states[s-c][0] + 1, states[s-c][1] + [c])\n",
 60 |     "    print states, states[S]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 46,
 66 |    "metadata": {
 67 |     "collapsed": false
 68 |    },
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "S: 10\n",
 75 |       "Coins(comma seperated): 9,8\n",
 76 |       "10 set([8, 9])\n",
 77 |       "10 (10000, [])\n",
 78 |       "10 (10000, [])\n",
 79 |       "(10000, [])\n",
 80 |       "[(0, []), (10000, []), (10000, []), (10000, []), (10000, []), (10000, []), (10000, []), (10000, []), (1, [8]), (1, [9]), (10000, [])] (10000, [])\n",
 81 |       "None\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "S = int(raw_input(\"S: \"))\n",
 87 |     "coins = set(int(k) for k in raw_input(\"Coins(comma seperated): \").split(','))\n",
 88 |     "if min(coins) < 1:\n",
 89 |     "    raise Exception(\"Coins should be positive values >= 1\")\n",
 90 |     "print S, coins\n",
 91 |     "print wrapper(S, coins)\n",
 92 |     "print n_coins_iter(S, coins)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## Longest sequence problem\n",
100 |     "Given a sequence of $N$ numbers – $A[1] , A[2] , …, A[N]$ . Find the length of the longest non-decreasing sequence.\n",
101 |     "\n",
102 |     "### Approach\n",
103 |     "$len_{LSS}(A, 0, N) = min(\\{len_{LSS}(A, i, N-i) | i \\in [1,N]\\})$"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 95,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "def wrapper(arr):\n",
115 |     "    states = [1]*len(arr)\n",
116 |     "    return longest_sub(arr, len(arr)-1, states)\n",
117 |     "\n",
118 |     "def longest_sub(arr, i, states):\n",
119 |     "    if i <= 0:\n",
120 |     "        return 1\n",
121 |     "    if states[i] > 1:\n",
122 |     "        return states[i]\n",
123 |     "    for j in range(i):\n",
124 |     "        lj = longest_sub(arr, j, states)\n",
125 |     "        if arr[j] <= arr[i]:\n",
126 |     "            print j, i, states\n",
127 |     "            states[i] = lj + 1\n",
128 |     "        else:\n",
129 |     "            states[i] = lj\n",
130 |     "    return states[i]"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 96,
136 |    "metadata": {
137 |     "collapsed": false
138 |    },
139 |    "outputs": [
140 |     {
141 |      "name": "stdout",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "Array(comma seperated): 5, 3, 4, 8, 6, 7\n",
145 |       "[5, 3, 4, 8, 6, 7]\n",
146 |       "0 5 [1, 1, 1, 1, 1, 1]\n",
147 |       "1 5 [1, 1, 1, 1, 1, 2]\n",
148 |       "1 2 [1, 1, 1, 1, 1, 2]\n",
149 |       "2 5 [1, 1, 2, 1, 1, 2]\n",
150 |       "0 3 [1, 1, 2, 1, 1, 3]\n",
151 |       "1 3 [1, 1, 2, 2, 1, 3]\n",
152 |       "2 3 [1, 1, 2, 2, 1, 3]\n",
153 |       "0 4 [1, 1, 2, 3, 1, 3]\n",
154 |       "1 4 [1, 1, 2, 3, 2, 3]\n",
155 |       "2 4 [1, 1, 2, 3, 2, 3]\n",
156 |       "4 5 [1, 1, 2, 3, 3, 3]\n",
157 |       "4\n"
158 |      ]
159 |     }
160 |    ],
161 |    "source": [
162 |     "arr = [int(k) for k in raw_input(\"Array(comma seperated): \").split(',')]\n",
163 |     "print arr\n",
164 |     "print wrapper(arr)"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "markdown",
169 |    "metadata": {},
170 |    "source": [
171 |     "## Apples on a table\n",
172 |     "A table composed of N x M cells, each having a certain quantity of apples, is given. You start from the upper-left corner. At each step you can go down or right one cell. Find the maximum number of apples you can collect."
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 97,
178 |    "metadata": {
179 |     "collapsed": true
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "def wrapper(mat, N, M):\n",
184 |     "    states = [[0 for c in range(M)] for r in range(N)]\n",
185 |     "    return apples(mat,0,0,N,M, states)\n",
186 |     "\n",
187 |     "\n",
188 |     "def apples(mat,i,j,N,M, states):\n",
189 |     "    if i >= N or j >= M:\n",
190 |     "        return 0\n",
191 |     "    if states[i][j] > 0:\n",
192 |     "        return states[i][j]\n",
193 |     "    states[i][j] = mat[i][j] + max([apples(mat,i+1,j,N,M, states), apples(mat,i,j+1,N,M, states)])\n",
194 |     "    return states[i][j]"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 99,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "Array(comma seperated): 1,2,3,4,5,6\n",
209 |       "N, M (comma seperated): 2,3\n",
210 |       "[[1, 2, 3], [3, 4, 5]]\n"
211 |      ]
212 |     },
213 |     {
214 |      "data": {
215 |       "text/plain": [
216 |        "13"
217 |       ]
218 |      },
219 |      "execution_count": 99,
220 |      "metadata": {},
221 |      "output_type": "execute_result"
222 |     }
223 |    ],
224 |    "source": [
225 |     "arr = [int(k) for k in raw_input(\"Array(comma seperated): \").split(',')]\n",
226 |     "N, M = [int(k) for k in raw_input(\"N, M (comma seperated): \").split(',')]\n",
227 |     "mat = [arr[i*N:i*N+M] for i in range(N)]\n",
228 |     "print mat\n",
229 |     "wrapper(mat, N, M)"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {
236 |     "collapsed": true
237 |    },
238 |    "outputs": [],
239 |    "source": []
240 |   }
241 |  ],
242 |  "metadata": {
243 |   "kernelspec": {
244 |    "display_name": "Python 2",
245 |    "language": "python",
246 |    "name": "python2"
247 |   },
248 |   "language_info": {
249 |    "codemirror_mode": {
250 |     "name": "ipython",
251 |     "version": 2
252 |    },
253 |    "file_extension": ".py",
254 |    "mimetype": "text/x-python",
255 |    "name": "python",
256 |    "nbconvert_exporter": "python",
257 |    "pygments_lexer": "ipython2",
258 |    "version": "2.7.10"
259 |   }
260 |  },
261 |  "nbformat": 4,
262 |  "nbformat_minor": 0
263 | }
264 | 


--------------------------------------------------------------------------------
/Extract_unique_triadic_edges_from_triads.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from itertools import combinations"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 4,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "def get_directed_triads(undirected_triad):\n",
 19 |     "    # Get all triplets of edges\n",
 20 |     "    for candidate_edges in combinations(undirected_triad.items(), 3):\n",
 21 |     "        # Get edges between unique pair of nodes\n",
 22 |     "        unique_edges = set([tuple(sorted(k)) for k,v in candidate_edges])\n",
 23 |     "        # Only consider triad in which the tree edges use a unique pair of nodes\n",
 24 |     "        if len(unique_edges) == 3:\n",
 25 |     "            yield dict(candidate_edges)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 5,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "undirected_triad = {\n",
 35 |     "    ('lisa.jacobson@enron.com', 'phillip.allen@enron.com'):1, \n",
 36 |     "    ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'):1,\n",
 37 |     "    ('phillip.allen@enron.com', 'richard.shapiro@enron.com'):1,\n",
 38 |     "    ('richard.shapiro@enron.com', 'phillip.allen@enron.com'):-1\n",
 39 |     "}"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 6,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "{('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1, ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1, ('phillip.allen@enron.com', 'richard.shapiro@enron.com'): 1}\n",
 52 |       "{('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1, ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1, ('richard.shapiro@enron.com', 'phillip.allen@enron.com'): -1}\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "for directed_triad in get_directed_triads(undirected_triad):\n",
 58 |     "    print(directed_triad)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 7,
 64 |    "metadata": {},
 65 |    "outputs": [
 66 |     {
 67 |      "data": {
 68 |       "text/plain": [
 69 |        "[{('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1,\n",
 70 |        "  ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1,\n",
 71 |        "  ('phillip.allen@enron.com', 'richard.shapiro@enron.com'): 1},\n",
 72 |        " {('lisa.jacobson@enron.com', 'phillip.allen@enron.com'): 1,\n",
 73 |        "  ('lisa.jacobson@enron.com', 'richard.shapiro@enron.com'): 1,\n",
 74 |        "  ('richard.shapiro@enron.com', 'phillip.allen@enron.com'): -1}]"
 75 |       ]
 76 |      },
 77 |      "execution_count": 7,
 78 |      "metadata": {},
 79 |      "output_type": "execute_result"
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "all_directed_triads = list(get_directed_triads(undirected_triad))\n",
 84 |     "all_directed_triads"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": []
 93 |   }
 94 |  ],
 95 |  "metadata": {
 96 |   "kernelspec": {
 97 |    "display_name": "Python 3",
 98 |    "language": "python",
 99 |    "name": "python3"
100 |   },
101 |   "language_info": {
102 |    "codemirror_mode": {
103 |     "name": "ipython",
104 |     "version": 3
105 |    },
106 |    "file_extension": ".py",
107 |    "mimetype": "text/x-python",
108 |    "name": "python",
109 |    "nbconvert_exporter": "python",
110 |    "pygments_lexer": "ipython3",
111 |    "version": "3.6.7"
112 |   }
113 |  },
114 |  "nbformat": 4,
115 |  "nbformat_minor": 2
116 | }
117 | 


--------------------------------------------------------------------------------
/ITE.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Information Theoretical Estimators (ITE) in Python\n",
  8 |     "\n",
  9 |     "https://bitbucket.org/szzoli/ite-in-python/src/master/\n",
 10 |     "\n",
 11 |     "Examples from: https://bitbucket.org/szzoli/ite-in-python/downloads/ITE-1.1_documentation.pdf"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import ite # import the ITE toolbox (1x)"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 2,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import numpy as np"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "co1 = ite.cost.BHShannon_KnnK() # initialize the entropy (2nd character = ’H’) estimator"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 4,
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "name": "stdout",
 48 |      "output_type": "stream",
 49 |      "text": [
 50 |       "BHShannon_KnnK -> {'mult': True, 'knn_method': 'cKDTree', 'k': 3, 'eps': 0}\n",
 51 |       "BHShannon_KnnK -> {'mult': True, 'knn_method': 'cKDTree', 'k': 2, 'eps': 0.1}\n"
 52 |      ]
 53 |     },
 54 |     {
 55 |      "data": {
 56 |       "text/plain": [
 57 |        "(0.12977836304496293, 0.11135464600911416)"
 58 |       ]
 59 |      },
 60 |      "execution_count": 4,
 61 |      "metadata": {},
 62 |      "output_type": "execute_result"
 63 |     }
 64 |    ],
 65 |    "source": [
 66 |     "print(co1) # print estimator-1\n",
 67 |     "y = np.random.rand(1000, 3) # size: number of samples × dimension, {yt}\n",
 68 |     "h = co1.estimation(y) # entropy estimation\n",
 69 |     "co2 = ite.cost.BHShannon_KnnK(knn_method='cKDTree', k=2, eps=0.1) # with other estimator\n",
 70 |     "# parameters\n",
 71 |     "print(co2) # print estimator-2\n",
 72 |     "h2 = co2.estimation(y) # entropy estimation\n",
 73 |     "h, h2"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 5,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "text/plain": [
 84 |        "-0.0005586476623290824"
 85 |       ]
 86 |      },
 87 |      "execution_count": 5,
 88 |      "metadata": {},
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "co = ite.cost.MIShannon_DKL() # initialize the mutual information estimator\n",
 94 |     "# (MIShannon_DKL: 2nd character = ’I’)\n",
 95 |     "ds = np.array([2, 3, 4]) # y\n",
 96 |     "t = 2000 # number of samples\n",
 97 |     "y = np.random.randn(t, sum(ds)) # size: number of samples × dimension\n",
 98 |     "i = co.estimation(y, ds) # estimate mutual information\n",
 99 |     "i"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 6,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "-0.022326680988546732"
111 |       ]
112 |      },
113 |      "execution_count": 6,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "from numpy.random import randn # ’randn’ is used to generate our observations\n",
120 |     "co = ite.cost.BDKL_KnnK() # initialize the divergence (2nd character = ’D’) estimator\n",
121 |     "dim = 3 # y\n",
122 |     "t1, t2 = 2000, 3000 # number of samples from y\n",
123 |     "y1 = randn(t1, dim) # size: number of samples1 × dimension, {y\n",
124 |     "\n",
125 |     "y2 = randn(t2, dim) # size: number of samples2 × dimension, {y\n",
126 |     "d = co.estimation(y1, y2) # estimate KL divergence\n",
127 |     "d"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "from numpy import dot # create observations\n",
137 |     "from numpy.random import rand, multivariate_normal # -||-\n",
138 |     "dim1, dim2 = 1, 2 # y\n",
139 |     "dim = dim1 + dim2 # y = [y\n",
140 |     "\n",
141 |     "t = 5000 # number of samples\n",
142 |     "co = ite.cost.BcondHShannon_HShannon() # initialize the conditional entropy (’condH’)\n",
143 |     "# estimator\n",
144 |     "m, l = rand(dim), rand(dim, dim) # mean (m)\n",
145 |     "c = dot(l, l.T) # covariance (Σ), y = N(m, Σ)\n",
146 |     "y = multivariate_normal(m, c, t) # {yt}\n",
147 |     "cond_h = co.estimation(y, dim1) # estimate conditional entropy\n",
148 |     "cond_h"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": []
157 |   }
158 |  ],
159 |  "metadata": {
160 |   "kernelspec": {
161 |    "display_name": "Python 3",
162 |    "language": "python",
163 |    "name": "python3"
164 |   },
165 |   "language_info": {
166 |    "codemirror_mode": {
167 |     "name": "ipython",
168 |     "version": 3
169 |    },
170 |    "file_extension": ".py",
171 |    "mimetype": "text/x-python",
172 |    "name": "python",
173 |    "nbconvert_exporter": "python",
174 |    "pygments_lexer": "ipython3",
175 |    "version": "3.7.3"
176 |   }
177 |  },
178 |  "nbformat": 4,
179 |  "nbformat_minor": 4
180 | }
181 | 


--------------------------------------------------------------------------------
/Keras Char Word LSTM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stderr",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Using TensorFlow backend.\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "import numpy as np\n",
 20 |     "\n",
 21 |     "from keras.models import Sequential, Model\n",
 22 |     "from keras.layers import Convolution1D, MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D\n",
 23 |     "from keras.layers import Embedding, LSTM, Input, Merge, Dense, TimeDistributed"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "max_word_len=3\n",
 35 |     "max_seq_len=5\n",
 36 |     "w_embed_size=6\n",
 37 |     "c_embed_size=3\n",
 38 |     "max_chars=2\n",
 39 |     "max_words=20\n",
 40 |     "c_nb_filters=4\n",
 41 |     "c_filter_length=3\n"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "char_cnn_layer=Sequential()\n",
 53 |     "char_cnn_layer.add(Embedding(max_chars, c_embed_size, input_length=max_word_len, name=\"char_embed\"))\n",
 54 |     "char_cnn_layer.add(Convolution1D(c_nb_filters,c_filter_length, activation='relu'))\n",
 55 |     "char_cnn_layer.add(GlobalAveragePooling1D(name=\"char_based_word_embed\"))"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "((None, 4), (None, 3))"
 69 |       ]
 70 |      },
 71 |      "execution_count": 4,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "char_cnn_layer.output_shape, char_cnn_layer.input_shape"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 5,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "____________________________________________________________________________________________________\n",
 92 |       "Layer (type)                     Output Shape          Param #     Connected to                     \n",
 93 |       "====================================================================================================\n",
 94 |       "char_embed (Embedding)           (None, 3, 3)          6           embedding_input_1[0][0]          \n",
 95 |       "____________________________________________________________________________________________________\n",
 96 |       "convolution1d_1 (Convolution1D)  (None, 1, 4)          40          char_embed[0][0]                 \n",
 97 |       "____________________________________________________________________________________________________\n",
 98 |       "char_based_word_embed (GlobalAve (None, 4)             0           convolution1d_1[0][0]            \n",
 99 |       "====================================================================================================\n",
100 |       "Total params: 46\n",
101 |       "Trainable params: 46\n",
102 |       "Non-trainable params: 0\n",
103 |       "____________________________________________________________________________________________________\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "char_cnn_layer.summary()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {
115 |     "collapsed": false
116 |    },
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "____________________________________________________________________________________________________\n",
123 |       "Layer (type)                     Output Shape          Param #     Connected to                     \n",
124 |       "====================================================================================================\n",
125 |       "tdcnn (TimeDistributed)          (None, 5, 4)          46          timedistributed_input_1[0][0]    \n",
126 |       "====================================================================================================\n",
127 |       "Total params: 46\n",
128 |       "Trainable params: 46\n",
129 |       "Non-trainable params: 0\n",
130 |       "____________________________________________________________________________________________________\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "char_seq_layer=Sequential()\n",
136 |     "char_seq_layer.add(TimeDistributed(char_cnn_layer, input_shape=(max_seq_len, max_word_len), name=\"tdcnn\"))\n",
137 |     "char_seq_layer.summary()"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 7,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "____________________________________________________________________________________________________\n",
152 |       "Layer (type)                     Output Shape          Param #     Connected to                     \n",
153 |       "====================================================================================================\n",
154 |       "word_embed (Embedding)           (None, 5, 6)          120         embedding_input_2[0][0]          \n",
155 |       "====================================================================================================\n",
156 |       "Total params: 120\n",
157 |       "Trainable params: 120\n",
158 |       "Non-trainable params: 0\n",
159 |       "____________________________________________________________________________________________________\n"
160 |      ]
161 |     }
162 |    ],
163 |    "source": [
164 |     "word_seq_layer=Sequential()\n",
165 |     "word_seq_layer.add(Embedding(max_words, w_embed_size, input_length=max_seq_len, name=\"word_embed\"))\n",
166 |     "word_seq_layer.summary()"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 8,
172 |    "metadata": {
173 |     "collapsed": false
174 |    },
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "____________________________________________________________________________________________________\n",
181 |       "Layer (type)                     Output Shape          Param #     Connected to                     \n",
182 |       "====================================================================================================\n",
183 |       "tdcnn (TimeDistributed)          (None, 5, 4)          46          timedistributed_input_1[0][0]    \n",
184 |       "____________________________________________________________________________________________________\n",
185 |       "word_embed (Embedding)           (None, 5, 6)          120         embedding_input_2[0][0]          \n",
186 |       "____________________________________________________________________________________________________\n",
187 |       "shared_lstm (LSTM)               (None, 5, 10)         840         char_word_embedding[0][0]        \n",
188 |       "____________________________________________________________________________________________________\n",
189 |       "timedistributed_1 (TimeDistribut (None, 5, 1)          11          shared_lstm[0][0]                \n",
190 |       "====================================================================================================\n",
191 |       "Total params: 1,017\n",
192 |       "Trainable params: 1,017\n",
193 |       "Non-trainable params: 0\n",
194 |       "____________________________________________________________________________________________________\n"
195 |      ]
196 |     }
197 |    ],
198 |    "source": [
199 |     "full_seq_layer=Sequential()\n",
200 |     "full_seq_layer.add(Merge([char_seq_layer, word_seq_layer], mode=\"concat\", name=\"char_word_embedding\"))\n",
201 |     "full_seq_layer.add(LSTM(10, return_sequences=True, name=\"shared_lstm\"))\n",
202 |     "full_seq_layer.add(TimeDistributed(Dense(1, activation='sigmoid')))\n",
203 |     "full_seq_layer.summary()"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 9,
209 |    "metadata": {
210 |     "collapsed": true
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "full_seq_layer.compile(loss='sparse_categorical_crossentropy', optimizer='sgd')"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 10,
220 |    "metadata": {
221 |     "collapsed": false
222 |    },
223 |    "outputs": [
224 |     {
225 |      "data": {
226 |       "text/plain": [
227 |        "[(None, 5, 3), (None, 5)]"
228 |       ]
229 |      },
230 |      "execution_count": 10,
231 |      "metadata": {},
232 |      "output_type": "execute_result"
233 |     }
234 |    ],
235 |    "source": [
236 |     "full_seq_layer.input_shape"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 11,
242 |    "metadata": {
243 |     "collapsed": false
244 |    },
245 |    "outputs": [
246 |     {
247 |      "data": {
248 |       "text/plain": [
249 |        "((2, 5, 3), (2, 5))"
250 |       ]
251 |      },
252 |      "execution_count": 11,
253 |      "metadata": {},
254 |      "output_type": "execute_result"
255 |     }
256 |    ],
257 |    "source": [
258 |     "X_words = [\n",
259 |     "    [1,2,1,4,5],\n",
260 |     "    [1,2,1,4,5]\n",
261 |     "          ]\n",
262 |     "X_chars = [\n",
263 |     "    [\n",
264 |     "        [0,1,0],\n",
265 |     "        [0,1,1],\n",
266 |     "        [0,1,0],\n",
267 |     "        [1,1,0],\n",
268 |     "        [0,1,0]\n",
269 |     "    ],\n",
270 |     "    [\n",
271 |     "        [0,1,1],\n",
272 |     "        [0,1,1],\n",
273 |     "        [0,1,0],\n",
274 |     "        [0,0,0],\n",
275 |     "        [0,1,0]\n",
276 |     "    ]\n",
277 |     "          ]\n",
278 |     "X_words = np.array(X_words)\n",
279 |     "X_chars = np.array(X_chars)\n",
280 |     "X_chars.shape, X_words.shape"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 12,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [
290 |     {
291 |      "data": {
292 |       "text/plain": [
293 |        "(2, 5, 1)"
294 |       ]
295 |      },
296 |      "execution_count": 12,
297 |      "metadata": {},
298 |      "output_type": "execute_result"
299 |     }
300 |    ],
301 |    "source": [
302 |     "y = [\n",
303 |     "    [0, 1, 1, 1, 0],\n",
304 |     "    [0, 0, 0, 1, 0]\n",
305 |     "]\n",
306 |     "y = np.expand_dims(np.array(y), -1)\n",
307 |     "y.shape"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": 13,
313 |    "metadata": {
314 |     "collapsed": false
315 |    },
316 |    "outputs": [
317 |     {
318 |      "name": "stdout",
319 |      "output_type": "stream",
320 |      "text": [
321 |       "Epoch 1/10\n",
322 |       "2/2 [==============================] - 7s - loss: nan\n",
323 |       "Epoch 2/10\n",
324 |       "2/2 [==============================] - 0s - loss: nan\n",
325 |       "Epoch 3/10\n",
326 |       "2/2 [==============================] - 0s - loss: nan\n",
327 |       "Epoch 4/10\n",
328 |       "2/2 [==============================] - 0s - loss: nan\n",
329 |       "Epoch 5/10\n",
330 |       "2/2 [==============================] - 0s - loss: nan\n",
331 |       "Epoch 6/10\n",
332 |       "2/2 [==============================] - 0s - loss: nan\n",
333 |       "Epoch 7/10\n",
334 |       "2/2 [==============================] - 0s - loss: nan\n",
335 |       "Epoch 8/10\n",
336 |       "2/2 [==============================] - 0s - loss: nan\n",
337 |       "Epoch 9/10\n",
338 |       "2/2 [==============================] - 0s - loss: nan\n",
339 |       "Epoch 10/10\n",
340 |       "2/2 [==============================] - 0s - loss: nan\n"
341 |      ]
342 |     },
343 |     {
344 |      "data": {
345 |       "text/plain": [
346 |        "<keras.callbacks.History at 0x7f6641411da0>"
347 |       ]
348 |      },
349 |      "execution_count": 13,
350 |      "metadata": {},
351 |      "output_type": "execute_result"
352 |     }
353 |    ],
354 |    "source": [
355 |     "full_seq_layer.fit([X_chars, X_words], y)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {
362 |     "collapsed": true
363 |    },
364 |    "outputs": [],
365 |    "source": []
366 |   }
367 |  ],
368 |  "metadata": {
369 |   "kernelspec": {
370 |    "display_name": "Python [conda root]",
371 |    "language": "python",
372 |    "name": "conda-root-py"
373 |   },
374 |   "language_info": {
375 |    "codemirror_mode": {
376 |     "name": "ipython",
377 |     "version": 3
378 |    },
379 |    "file_extension": ".py",
380 |    "mimetype": "text/x-python",
381 |    "name": "python",
382 |    "nbconvert_exporter": "python",
383 |    "pygments_lexer": "ipython3",
384 |    "version": "3.5.2"
385 |   }
386 |  },
387 |  "nbformat": 4,
388 |  "nbformat_minor": 2
389 | }
390 | 


--------------------------------------------------------------------------------
/Keras attention.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Using TensorFlow backend.\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "%matplotlib inline\n",
 18 |     "import numpy as np\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import seaborn as sns\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "from keras.layers import Embedding, Dense, Permute, RepeatVector\n",
 24 |     "from keras.layers import Lambda, Conv1D, Dropout, Activation, Multiply, Flatten\n",
 25 |     "from keras.models import Sequential, Input, Model\n",
 26 |     "from keras.models import K\n",
 27 |     "\n",
 28 |     "from keras.preprocessing import sequence"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "sns.set_context(\"paper\")\n",
 38 |     "sns.set_style(\"ticks\")"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "vocab_size=10\n",
 48 |     "emb_size=3\n",
 49 |     "num_filters=6\n",
 50 |     "kernel_size=3\n",
 51 |     "maxlen=5"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "## Helper functions"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "class Vocab(object):\n",
 68 |     "    def __init__(\n",
 69 |     "        self,\n",
 70 |     "        pre_tokens=(\"__PAD__\", \"__BOS__\", \"__EOS__\", ),\n",
 71 |     "        UNK=\"__UNK__\",\n",
 72 |     "        preprocess=lambda x: x\n",
 73 |     "    ):\n",
 74 |     "        self.UNK = UNK\n",
 75 |     "        self.word2idx = {}\n",
 76 |     "        self.idx2word = []\n",
 77 |     "        self.offset_tokens = set((self.UNK, ) + pre_tokens)\n",
 78 |     "        self.preprocess = preprocess\n",
 79 |     "        for token in pre_tokens + (self.UNK, ):\n",
 80 |     "            self.add_token(token)\n",
 81 |     "\n",
 82 |     "    def add_token(self, token):\n",
 83 |     "        if token not in self.offset_tokens:\n",
 84 |     "            token = self.preprocess(token)\n",
 85 |     "        if token not in self.word2idx:\n",
 86 |     "            self.word2idx[token] = len(self.word2idx)\n",
 87 |     "            self.idx2word.append(token)\n",
 88 |     "\n",
 89 |     "    def get_word2idx(self, token):\n",
 90 |     "        if token not in self.offset_tokens:\n",
 91 |     "            token = self.preprocess(token)\n",
 92 |     "        return self.word2idx.get(token, self.UNK)\n",
 93 |     "\n",
 94 |     "    def process_seq(self, seq):\n",
 95 |     "        return [self.get_word2idx(token) for token in seq]\n",
 96 |     "\n",
 97 |     "    \n",
 98 |     "    \n",
 99 |     "def generate_vocab(documents):\n",
100 |     "    \"\"\"Generate vocab from list of documents\n",
101 |     "    \n",
102 |     "    Args:\n",
103 |     "    -----\n",
104 |     "        documents: list of document where each document is a list of words\n",
105 |     "    \n",
106 |     "    Returns:\n",
107 |     "    --------\n",
108 |     "        vocab: Vocab object\n",
109 |     "    \"\"\"\n",
110 |     "    vocab = Vocab()\n",
111 |     "    for document in (documents):\n",
112 |     "        for word in document:\n",
113 |     "            vocab.add_token(word)\n",
114 |     "    return vocab\n",
115 |     "\n",
116 |     "def create_sequences(documents, labels, vocab, maxlen):\n",
117 |     "    \"\"\"Create sequences for keras models\n",
118 |     "    \n",
119 |     "    Args:\n",
120 |     "    -----\n",
121 |     "        documents: list of document where each document is a list of words\n",
122 |     "        labels: list of labels per document. Only binary classification is supported, i.e. 0, 1\n",
123 |     "        \n",
124 |     "    Returns:\n",
125 |     "    --------\n",
126 |     "        \n",
127 |     "    \"\"\"\n",
128 |     "    X = []\n",
129 |     "    y = np.asarray(labels)\n",
130 |     "    for document in (documents):\n",
131 |     "        seq = []\n",
132 |     "        for word in document:\n",
133 |     "            seq.append(word)\n",
134 |     "        X.append(vocab.process_seq(seq))\n",
135 |     "    X_padded = sequence.pad_sequences(X, maxlen=maxlen)\n",
136 |     "    return X_padded, y\n",
137 |     "    "
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Basic model which takes sequence and outputs sequence"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 5,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "def get_base_model():\n",
154 |     "    model = Sequential()\n",
155 |     "    # we start off with an efficient embedding layer which maps\n",
156 |     "    # our vocab indices into embedding_dims dimensions\n",
157 |     "    model.add(Embedding(vocab_size,\n",
158 |     "                        emb_size,\n",
159 |     "                        input_length=maxlen))\n",
160 |     "    model.add(Dropout(0.2))\n",
161 |     "    # we add a Convolution1D, which will learn filters\n",
162 |     "    # word group filters of size filter_length:\n",
163 |     "    model.add(\n",
164 |     "        Conv1D(\n",
165 |     "            num_filters,\n",
166 |     "            kernel_size,\n",
167 |     "            padding='same',\n",
168 |     "            activation='relu',\n",
169 |     "            strides=1\n",
170 |     "        )\n",
171 |     "    )\n",
172 |     "    return model\n",
173 |     "    \n",
174 |     "def get_attention_model():\n",
175 |     "    attention = Sequential()\n",
176 |     "    attention.add(Dense(num_filters, input_shape=(maxlen, num_filters), activation=\"tanh\"))\n",
177 |     "    attention.add(Dense(1))\n",
178 |     "    attention.add(Flatten())\n",
179 |     "    attention.add(Activation(\"softmax\"))\n",
180 |     "    return attention\n",
181 |     "\n",
182 |     "def get_output(base_model, attention, inputs):\n",
183 |     "    activations = base_model(inputs)\n",
184 |     "    permited_activations = Permute((2,1))(activations)\n",
185 |     "    aligned_attention = RepeatVector(num_filters)(attention(activations))\n",
186 |     "    final_activation = Multiply()([\n",
187 |     "        permited_activations,\n",
188 |     "        aligned_attention\n",
189 |     "    ])\n",
190 |     "    final_score = Flatten()(final_activation)\n",
191 |     "    output = Lambda(lambda x: K.sum(x,-1, keepdims=True))(final_score)\n",
192 |     "    output = Activation(\"sigmoid\")(output)\n",
193 |     "    return output\n",
194 |     "\n",
195 |     "def get_model():\n",
196 |     "    inputs = Input((maxlen,))\n",
197 |     "    base_model = get_base_model()\n",
198 |     "    attention = get_attention_model()\n",
199 |     "    output = get_output(base_model, attention, inputs)\n",
200 |     "    model = Model(inputs=inputs, outputs=output)\n",
201 |     "    return model"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 6,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "model = get_model()"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": 7,
216 |    "metadata": {},
217 |    "outputs": [
218 |     {
219 |      "name": "stdout",
220 |      "output_type": "stream",
221 |      "text": [
222 |       "__________________________________________________________________________________________________\n",
223 |       "Layer (type)                    Output Shape         Param #     Connected to                     \n",
224 |       "==================================================================================================\n",
225 |       "input_1 (InputLayer)            (None, 5)            0                                            \n",
226 |       "__________________________________________________________________________________________________\n",
227 |       "sequential_1 (Sequential)       (None, 5, 6)         90          input_1[0][0]                    \n",
228 |       "__________________________________________________________________________________________________\n",
229 |       "sequential_2 (Sequential)       (None, 5)            49          sequential_1[1][0]               \n",
230 |       "__________________________________________________________________________________________________\n",
231 |       "permute_1 (Permute)             (None, 6, 5)         0           sequential_1[1][0]               \n",
232 |       "__________________________________________________________________________________________________\n",
233 |       "repeat_vector_1 (RepeatVector)  (None, 6, 5)         0           sequential_2[1][0]               \n",
234 |       "__________________________________________________________________________________________________\n",
235 |       "multiply_1 (Multiply)           (None, 6, 5)         0           permute_1[0][0]                  \n",
236 |       "                                                                 repeat_vector_1[0][0]            \n",
237 |       "__________________________________________________________________________________________________\n",
238 |       "flatten_2 (Flatten)             (None, 30)           0           multiply_1[0][0]                 \n",
239 |       "__________________________________________________________________________________________________\n",
240 |       "lambda_1 (Lambda)               (None, 1)            0           flatten_2[0][0]                  \n",
241 |       "__________________________________________________________________________________________________\n",
242 |       "activation_2 (Activation)       (None, 1)            0           lambda_1[0][0]                   \n",
243 |       "==================================================================================================\n",
244 |       "Total params: 139\n",
245 |       "Trainable params: 139\n",
246 |       "Non-trainable params: 0\n",
247 |       "__________________________________________________________________________________________________\n"
248 |      ]
249 |     }
250 |    ],
251 |    "source": [
252 |     "model.summary()"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 8,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "model.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 9,
267 |    "metadata": {},
268 |    "outputs": [
269 |     {
270 |      "data": {
271 |       "text/plain": [
272 |        "((100, 5), (100, 1))"
273 |       ]
274 |      },
275 |      "execution_count": 9,
276 |      "metadata": {},
277 |      "output_type": "execute_result"
278 |     }
279 |    ],
280 |    "source": [
281 |     "data_size=100\n",
282 |     "X = np.random.randint(vocab_size, size=(data_size, maxlen))\n",
283 |     "y = np.random.randint(2, size=(data_size,1))\n",
284 |     "X.shape, y.shape"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": 10,
290 |    "metadata": {},
291 |    "outputs": [
292 |     {
293 |      "name": "stdout",
294 |      "output_type": "stream",
295 |      "text": [
296 |       "Epoch 1/1\n",
297 |       "100/100 [==============================] - 2s 24ms/step - loss: 0.6960 - acc: 0.4600\n"
298 |      ]
299 |     },
300 |     {
301 |      "data": {
302 |       "text/plain": [
303 |        "<keras.callbacks.History at 0x2533848c128>"
304 |       ]
305 |      },
306 |      "execution_count": 10,
307 |      "metadata": {},
308 |      "output_type": "execute_result"
309 |     }
310 |    ],
311 |    "source": [
312 |     "model.fit(X, y)"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "## Fit model on data"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 11,
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": [
328 |     "documents = [\n",
329 |     "    [\"The\", \"world\", \"is\", \"on\", \"fire\"],\n",
330 |     "    [\"The\", \"earth\", \"is\", \"on\", \"fire\"],\n",
331 |     "    [\"The\", \"country\", \"is\", \"on\", \"ice\"],\n",
332 |     "    [\"The\", \"book\", \"is\", \"on\", \"fire\"],\n",
333 |     "    [\"The\", \"cat\", \"is\", \"on\", \"trampoline\"],\n",
334 |     "]\n",
335 |     "\n",
336 |     "labels = [0,0,1,0,1]\n",
337 |     "vocab = generate_vocab(documents)"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 12,
343 |    "metadata": {},
344 |    "outputs": [],
345 |    "source": [
346 |     "X, y = create_sequences(documents, labels, vocab, maxlen)"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 13,
352 |    "metadata": {},
353 |    "outputs": [
354 |     {
355 |      "data": {
356 |       "text/plain": [
357 |        "((5, 5), (5,))"
358 |       ]
359 |      },
360 |      "execution_count": 13,
361 |      "metadata": {},
362 |      "output_type": "execute_result"
363 |     }
364 |    ],
365 |    "source": [
366 |     "X.shape, y.shape"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 14,
372 |    "metadata": {},
373 |    "outputs": [
374 |     {
375 |      "name": "stdout",
376 |      "output_type": "stream",
377 |      "text": [
378 |       "Epoch 1/1\n",
379 |       "\r",
380 |       "5/5 [==============================] - 0s 3ms/step - loss: 0.6967 - acc: 0.4000\n"
381 |      ]
382 |     },
383 |     {
384 |      "data": {
385 |       "text/plain": [
386 |        "<keras.callbacks.History at 0x2533848c278>"
387 |       ]
388 |      },
389 |      "execution_count": 14,
390 |      "metadata": {},
391 |      "output_type": "execute_result"
392 |     }
393 |    ],
394 |    "source": [
395 |     "model.fit(X, y)"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": 15,
401 |    "metadata": {},
402 |    "outputs": [
403 |     {
404 |      "data": {
405 |       "text/plain": [
406 |        "[<keras.engine.topology.InputLayer at 0x2533848c390>,\n",
407 |        " <keras.models.Sequential at 0x2533894f630>,\n",
408 |        " <keras.models.Sequential at 0x25333c1b550>,\n",
409 |        " <keras.layers.core.Permute at 0x253385d5e48>,\n",
410 |        " <keras.layers.core.RepeatVector at 0x253385c1f98>,\n",
411 |        " <keras.layers.merge.Multiply at 0x2533866ba58>,\n",
412 |        " <keras.layers.core.Flatten at 0x2533861dbe0>,\n",
413 |        " <keras.layers.core.Lambda at 0x253386a8390>,\n",
414 |        " <keras.layers.core.Activation at 0x2537fe20c18>]"
415 |       ]
416 |      },
417 |      "execution_count": 15,
418 |      "metadata": {},
419 |      "output_type": "execute_result"
420 |     }
421 |    ],
422 |    "source": [
423 |     "model.layers"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": 16,
429 |    "metadata": {},
430 |    "outputs": [
431 |     {
432 |      "data": {
433 |       "text/plain": [
434 |        "(<tf.Tensor 'conv1d_1/Relu:0' shape=(?, 5, 6) dtype=float32>,\n",
435 |        " <tf.Tensor 'activation_1/Softmax:0' shape=(?, ?) dtype=float32>)"
436 |       ]
437 |      },
438 |      "execution_count": 16,
439 |      "metadata": {},
440 |      "output_type": "execute_result"
441 |     }
442 |    ],
443 |    "source": [
444 |     "model.layers[1].get_output_at(0), model.layers[2].get_output_at(0)"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": 17,
450 |    "metadata": {},
451 |    "outputs": [
452 |     {
453 |      "data": {
454 |       "text/plain": [
455 |        "<tf.Tensor 'embedding_1_input:0' shape=(?, 5) dtype=float32>"
456 |       ]
457 |      },
458 |      "execution_count": 17,
459 |      "metadata": {},
460 |      "output_type": "execute_result"
461 |     }
462 |    ],
463 |    "source": [
464 |     "model.layers[1].get_input_at(0)"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "code",
469 |    "execution_count": 18,
470 |    "metadata": {},
471 |    "outputs": [],
472 |    "source": [
473 |     "conv_activation = K.function(\n",
474 |     "    [\n",
475 |     "        model.layers[1].get_input_at(0),\n",
476 |     "        K.learning_phase()\n",
477 |     "    ],\n",
478 |     "    [model.layers[1].get_output_at(0)]\n",
479 |     ")"
480 |    ]
481 |   },
482 |   {
483 |    "cell_type": "code",
484 |    "execution_count": 19,
485 |    "metadata": {},
486 |    "outputs": [
487 |     {
488 |      "data": {
489 |       "text/plain": [
490 |        "<keras.backend.tensorflow_backend.Function at 0x253290c7390>"
491 |       ]
492 |      },
493 |      "execution_count": 19,
494 |      "metadata": {},
495 |      "output_type": "execute_result"
496 |     }
497 |    ],
498 |    "source": [
499 |     "conv_activation"
500 |    ]
501 |   },
502 |   {
503 |    "cell_type": "code",
504 |    "execution_count": 20,
505 |    "metadata": {},
506 |    "outputs": [
507 |     {
508 |      "data": {
509 |       "text/plain": [
510 |        "[array([[[ 0.        ,  0.        ,  0.01670015,  0.        ,  0.01758255,\n",
511 |        "           0.        ],\n",
512 |        "         [ 0.01330368,  0.        ,  0.        ,  0.        ,  0.        ,\n",
513 |        "           0.        ],\n",
514 |        "         [ 0.        ,  0.02499684,  0.        ,  0.        ,  0.        ,\n",
515 |        "           0.03441959],\n",
516 |        "         [ 0.00910494,  0.        ,  0.00106554,  0.        ,  0.02439762,\n",
517 |        "           0.        ],\n",
518 |        "         [ 0.        ,  0.01371591,  0.        ,  0.        ,  0.        ,\n",
519 |        "           0.        ]]], dtype=float32)]"
520 |       ]
521 |      },
522 |      "execution_count": 20,
523 |      "metadata": {},
524 |      "output_type": "execute_result"
525 |     }
526 |    ],
527 |    "source": [
528 |     "conv_activation([\n",
529 |     "    [X[0]],\n",
530 |     "    0.\n",
531 |     "])"
532 |    ]
533 |   },
534 |   {
535 |    "cell_type": "code",
536 |    "execution_count": 21,
537 |    "metadata": {},
538 |    "outputs": [
539 |     {
540 |      "data": {
541 |       "text/plain": [
542 |        "<tf.Tensor 'dense_1_input:0' shape=(?, 5, 6) dtype=float32>"
543 |       ]
544 |      },
545 |      "execution_count": 21,
546 |      "metadata": {},
547 |      "output_type": "execute_result"
548 |     }
549 |    ],
550 |    "source": [
551 |     "model.layers[2].get_input_at(0)"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "code",
556 |    "execution_count": 22,
557 |    "metadata": {},
558 |    "outputs": [],
559 |    "source": [
560 |     "attention_activation = K.function(\n",
561 |     "    [\n",
562 |     "        model.layers[2].get_input_at(0),\n",
563 |     "        K.learning_phase()\n",
564 |     "    ],\n",
565 |     "    [model.layers[2].get_output_at(0)]\n",
566 |     ")"
567 |    ]
568 |   },
569 |   {
570 |    "cell_type": "code",
571 |    "execution_count": 23,
572 |    "metadata": {},
573 |    "outputs": [
574 |     {
575 |      "data": {
576 |       "text/plain": [
577 |        "[array([[ 0.20158802,  0.19940449,  0.19962992,  0.19902216,  0.2003554 ]], dtype=float32)]"
578 |       ]
579 |      },
580 |      "execution_count": 23,
581 |      "metadata": {},
582 |      "output_type": "execute_result"
583 |     }
584 |    ],
585 |    "source": [
586 |     "attention_activation([\n",
587 |     "    conv_activation([[X[0]],0.])[0],\n",
588 |     "    0.\n",
589 |     "])"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "code",
594 |    "execution_count": 24,
595 |    "metadata": {},
596 |    "outputs": [],
597 |    "source": [
598 |     "def get_activations(model, model_input):\n",
599 |     "    \"\"\"Taken from:\n",
600 |     "    https://github.com/philipperemy/keras-visualize-activations/blob/master/read_activations.py\n",
601 |     "    \"\"\"\n",
602 |     "    conv_activation_fn = K.function(\n",
603 |     "        [\n",
604 |     "            model.layers[1].get_input_at(0),\n",
605 |     "            K.learning_phase()\n",
606 |     "        ],\n",
607 |     "        [model.layers[1].get_output_at(0)]\n",
608 |     "    )\n",
609 |     "    conv_activations = conv_activation_fn([\n",
610 |     "        [model_input],\n",
611 |     "        0.\n",
612 |     "    ])\n",
613 |     "    attention_activation_fn = K.function(\n",
614 |     "        [\n",
615 |     "            model.layers[2].get_input_at(0),\n",
616 |     "            K.learning_phase()\n",
617 |     "        ],\n",
618 |     "        [model.layers[2].get_output_at(0)]\n",
619 |     "    )\n",
620 |     "    attention_activations = attention_activation_fn([\n",
621 |     "        conv_activations[0],\n",
622 |     "        0.\n",
623 |     "    ])[0]\n",
624 |     "    return attention_activations"
625 |    ]
626 |   },
627 |   {
628 |    "cell_type": "code",
629 |    "execution_count": 25,
630 |    "metadata": {},
631 |    "outputs": [],
632 |    "source": [
633 |     "attention_activations = get_activations(model, X[0])"
634 |    ]
635 |   },
636 |   {
637 |    "cell_type": "code",
638 |    "execution_count": 26,
639 |    "metadata": {},
640 |    "outputs": [],
641 |    "source": [
642 |     "def plot_activations(model, model_input, xticklabels):\n",
643 |     "    attention_activations = get_activations(model, X[0])\n",
644 |     "    ax = sns.heatmap(attention_activations, xticklabels=xticklabels, square=True)\n",
645 |     "    return ax"
646 |    ]
647 |   },
648 |   {
649 |    "cell_type": "code",
650 |    "execution_count": 27,
651 |    "metadata": {},
652 |    "outputs": [
653 |     {
654 |      "data": {
655 |       "text/plain": [
656 |        "<matplotlib.axes._subplots.AxesSubplot at 0x25328d94ac8>"
657 |       ]
658 |      },
659 |      "execution_count": 27,
660 |      "metadata": {},
661 |      "output_type": "execute_result"
662 |     },
663 |     {
664 |      "data": {
665 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWsAAADzCAYAAABABDfiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEnBJREFUeJzt3X9M3HWex/EXCDUx5QtluzMdEbh6oSGxhizitsb2qInF2G0X0gijcmhuu7BsbHa6SVdaqZCVxpmpbbJd3XQzye6a3S7HGCMkF+OujbF1z5zutnVP3GzoCr1be+0MxU4RoVtg5nt/9GT1+FGoHWY+3z4fCX8wfpnv+0v11bfv7+f7mQzbtm0BANJaZqoLAABcHWENAAYgrAHAAIQ1ABiAsAYAAxDWAGAAwhoADEBYA4ABCGsAMABhDQAGIKwBwABZqS4AAK6niaGBeR+bvfz2JFZyfdFZA4AB6KwBOEsinuoKkoKwBuAs8clUV5AUhDUAR7HtRKpLSArCGoCzJAhrAEh/Seyso9GoAoGAcnNzVVJSovr6eklSZ2enent7NTY2purqaq1Zs0bt7e3Ky8tTTk6OfD6fJKm7u1u9vb1qa2vT+Pi4tm7dqtWrVysrK0t79+6d89yENQBnSeINxq6uLjU0NKi8vFyNjY2qq6tTdna2LMuS3+9XLBZTR0eHLly4oMrKSm3ZskW7du1SNBrViRMndP78eU1OXpmpv//++1q6dKkyMzNVVlZ21XMT1gCcZQGddTgcVjgcnva61+uV1+ud9vrQ0JA8Ho8kybIsjYyMKD8/X5s3b9bo6KiCwaCampp09OhRVVRUSJLcbrcGBwe1adMmnTlzRqFQSJJUUFCgYDCo4uJi7dixQ+vWrZt675kQ1gAcxV7AapDZQnk2Ho9HkUhEHo9Hw8PDsixLktTf369Dhw7J5/OpsLBQfX19ikQikq6MTlwu17T3GhgYUG5urqQrwf9Zxz0bwhqAsyTxBmNtba0CgYC6u7tVVVUlv9+vlpYWNTc3q7S0VAcPHtSqVav02GOP6emnn9bJkydVVFQkt9s97b2Ki4u1b98+ud1uuVwuFRYWznnuDNu27WRdGAAstsun/n3ex968al0SK7m+6KwBOAtPMAKAAXgoBgAMwOPmAGAAnmAEgPRn28ysASD9MbMGAAMwBgEAA9BZA4AB4hOpriApCGsAzsIYBAAMwBgEAAxAZw0ABiCsASD92dxgBAADMLMGAAMwBgEAA9BZA4AB6KwBwAB01gBggKt8SripCGsAzkJnDQAGYGYNAAagswYAA9BZA4AB6KwBwACsBgEAA9h2qitICsIagLMwswYAAxDWAGAAbjACgAHi8VRXkBSENQBnYQwCAAYgrAHAAMysASD92QnWWQNA+mMMAgAGYDUIABggiZ11NBpVIBBQbm6uSkpKVF9fL0nq7OxUb2+vxsbGVF1drTVr1qi9vV15eXnKycmRz+eTJHV3d6u3t1dtbW0aHR2d8ZjZENYAnGUBYR0OhxUOh6e97vV65fV6p73e1dWlhoYGlZeXq7GxUXV1dcrOzpZlWfL7/YrFYuro6NCFCxdUWVmpLVu2aNeuXYpGozpx4oTOnz+vyf/baOq1116bdozb7Z61VsIagLMsYCOn2UJ5NkNDQ/J4PJIky7I0MjKi/Px8bd68WaOjowoGg2pqatLRo0dVUVEhSXK73RocHNSmTZt05swZhUKhqff6/8cQ1gBuHEkcg3g8HkUiEXk8Hg0PD8uyLElSf3+/Dh06JJ/Pp8LCQvX19SkSiUi6MjpxuVyzvtdcx3xe5nW+FgBIrYQ9/68Fqq2t1eHDh9XW1qaqqir5/X6Nj4+rublZly9f1sGDBxUKhfTAAw/o2LFjeuaZZ1RUVDRjxzyfYz4vw7YduvkrgBvSWPBf5n3sLS2/SGIl1xdjEACOYrPOGgAMwBOMAGAA9gYBAAPQWQOAASZ53BwA0h9jEAAwAGMQAEh/LN0DABPQWQOAAQhrADAAHz4AAOmPz2AEABMQ1gBgAFaDAIAB6KwBwACENQCkPzvOGAQA0h+dNQCkP5buAYAJCGsAMIAzR9aENQBnsSedmdaENQBncWZWE9YAnIUbjABgAjprAEh/dNYAYAI662s3MTSwGKdZVP9Uti3VJVx3/5i1LNUlJEXXuXdTXUJSvFdQnuoSkuLO0//2pX7enrxOhaQZOmsAjmLTWQOAAQhrAEh/dNYAYADCGgAMYMczUl1CUhDWAByFzhoADGAn6KwBIO3RWQOAAWw7eZ11NBpVIBBQbm6uSkpKVF9fL0nq7OxUb2+vxsbGVF1drTVr1qi9vV15eXnKycmRz+dTT0+Pjh8/rkuXLmn79u0qKCjQ1q1btXr1amVlZWnv3r1znpuwBuAoyeysu7q61NDQoPLycjU2Nqqurk7Z2dmyLEt+v1+xWEwdHR26cOGCKisrtWXLFu3atUvRaFQ9PT168cUX9dFHHykUCqm6ulpLly5VZmamysrKrnpuwhqAoyQWsBokHA4rHA5Pe93r9crr9U57fWhoSB6PR5JkWZZGRkaUn5+vzZs3a3R0VMFgUE1NTTp69KgqKiokSW63W4ODg8rKuhK3K1as0ODgoAoKChQMBlVcXKwdO3Zo3bp1U+89E8IagKMs5AbjbKE8G4/Ho0gkIo/Ho+HhYVmWJUnq7+/XoUOH5PP5VFhYqL6+PkUiEUlXRicul0uZmZmSpEgkIpfLpYGBAeXm5kq6EvyTk3NvakJYA3CUZK4Gqa2tVSAQUHd3t6qqquT3+9XS0qLm5maVlpbq4MGDWrVqlR577DE9/fTTOnnypIqKiuR2u1VTU6PW1lZ9+umnevLJJ2Xbtvbt2ye32y2Xy6XCwsI5z51h23bSN39l1z0zsOueWdh1b2anyzbO+9iV/3nkS51rMdFZA3AU1lkDgAGSuXQvlQhrAI4SZ28QAEh/dNYAYABm1gBggOSvb0sNwhqAo9BZA4AB4onMVJeQFIQ1AEdhDAIABkiwGgQA0p9Tl+5ddbgTj8d18eJFJRIO/fgFAI5i2/P/MsmcnfWvf/1rHTt2TJZl6ZNPPtHGjRtVW1s76/Gz7Q0bDu3/8pUCwDzckGOQ/v5+hUKhqe/b29vnDOvZ9oZ14q57ANLTDbka5OLFi/rjH/84teH2p59+ulh1AcA1MWy6MW9zhnVLS4vC4bCGhoZ06623avfu3YtVFwBckxtyDOJ2u/W9731vsWoBgC/NqatBWLoHwFGcum6NsAbgKLborAEg7U0yBgGA9EdnDQAGYGYNAAagswYAA9BZA4AB4nTWAJD+HPqpXoQ1AGdJ0FkDQPq7ITdyAgDTcIMRAAyQyGAMAgBpL57qApKEsAbgKKwGAQADsBoEAAzAahAAMABjEAAwAEv3AMAA8SR21tFoVIFAQLm5uSopKVF9fb0kqbOzU729vRobG1N1dbXWrFmj9vZ25eXlKScnRz6fTz09PTp+/LguXbqk7du3y+VyTTtmLoQ1AEdJZmfd1dWlhoYGlZeXq7GxUXV1dcrOzpZlWfL7/YrFYuro6NCFCxdUWVmpLVu2aNeuXYpGo+rp6dGLL76ojz76SKFQSGVlZdOOcbvds56bsAbgKAsJ63A4rHA4PO11r9crr9c77fWhoSF5PB5JkmVZGhkZUX5+vjZv3qzR0VEFg0E1NTXp6NGjqqiokCS53W4NDg4qK+tK3K5YsUKDg4MaGhqadkzKwzp7+e2LcRpJV375M/2Sr7f/+J83k36OzyzWNS22xbquw0k/w9/xZ5V6C/kIxtlCeTYej0eRSEQej0fDw8OyLEuS1N/fr0OHDsnn86mwsFB9fX2KRCKSroxOXC6XMjMzJUmRSEQul2vqvT5/zFwy539ZZpjpb0nTOfGaJGdelxOvSTLruhIL+Fqo2tpaHT58WG1tbaqqqpLf79f4+Liam5t1+fJlHTx4UKFQSA888ICOHTumZ555RkVFRXK73aqpqVFra6v279+v5ubmGY+ZC2MQAI6SzMfNv/rVr+rAgQPTXj9y5Mi015577rkvfL9p0yZt2rRpzmPmQlgDcBTWWQOAAVhnDQAGIKwNYcod64Vw4jVJzrwuJ16TZNZ1OXVvkAzbtp16bQBuQPuK/3nexz7534u5sPPLcVxnDeDGxocPAIABEg4dhBDWAByFG4wAYABn9tWGPm7+k5/8RDt37tS6deu0c+dO3XHHHTp//nyqy1oU27Zt+8L3r7zyil599dUUVbNwL7/8sj788MNUl4FZDAwMqKGhQd3d3aku5Zol83HzVDKys37iiSckXQmu/fv3KysrSz/60Y80MjKi9evX68EHH9SBAwd00003aXx8XHv27NGSJUtSXPXMmpub9dOf/lStra362te+pnvuuUePP/647r77bo2Pj2vr1q2KRqP67W9/q7KysqmfCwQCsm1bp06d0kMPPZTCK1iYc+fO6bXXXlNJSYnOnTunH/7wh8rLy0t1Wdfsz3/+s0KhkHJyclRcXKzXX39dGzZs0J/+9Cc9++yzUxv9mKKzs1N/+MMftHbtWj3//PM6ffq0KioqZNu2Tp8+rZGRET366KNf+Hcx3UxmOLO3NjKsZ/Ktb31LK1euVGNjoyYmJnTx4kUVFBQoFovp1KlTWr16dapLnNHq1at16tQpxeNxnThxQpcuXVJRUZH8fr8mJibU3Nysb3zjG9qwYYMeeeQRbdu2TQMDA7JtW7t371ZnZ2eqL+Ga5OXlacOGDbrllltSXcqXEgqFpjaQf+KJJ7R8+XJ997vf1QsvvKC+vj7dfffdqS5xQTZu3Di1+9vly5dVU1Oju+66Sw8//LAqKyuVmZmpt99+O63D2plR7aCwtixragtC27Z177336qGHHtIbb7xx1d2sUun+++/Xz3/+c5WWluqDDz7Q+++/r8+Wvmdk/H2Tg893aJ9//bM9ck2yfft2ZWdn61e/+pUuXbqk++67L9UlXbNEIjH155GRkaGcnBxJ0pIlS5RImPY/2tNZliXbtpWbm6udO3fq7Nmz+stf/pLqsuZk/m99Zub9lz4P3/zmN/XUU0/pgw8+0N/+9jdt2LAh1SXNqrS0VO+99562bdumRCKhM2fO6N5779WePXskSY2NjTp79uwXfmblypVasmSJgsGgPvzwQ9XU1KSi9Gv2wgsvqKCgQPF4XLffvnh7nSdDU1OTOjo6lJ+fr4qKCg0MDKS6pOtu6dKlWr9+vXbv3q1YLKYdO3akuqQ5OXXpHk8wAnCUJ//hkXkfu++//jWJlVxfjuysAdy4GIMAgAHiDh2DENYAHIXOGgAMYNNZA0D6o7MGAAM4dekeYQ3AUZwZ1YQ1AIeZdGhcE9YAHIUbjABgAG4wAoAB6KwBwAB01gBggLhD96YjrAE4CuusAcAAzKwBwADMrAHAAIxBAMAAjEEAwACsBgEAAzAGAQADcIMRAAzAzBoADJDMMUg0GlUgEFBubq5KSkpUX18/9c/eeustvfzyy/rxj3+sWCymtrY2feUrX5HL5VJzc7Neeukl/f73v9eyZctUU1OjO++8U16vVytXrpQktba2KicnZ9ZzE9YAHMVewA3GcDiscDg87XWv1yuv1zvt9a6uLjU0NKi8vFyNjY2qq6tTdna23nnnHf31r3/V6OioJOn48eOqqKjQ448/rl/+8pd699139Zvf/EY/+9nPZNu2vv/976u1tVVjY2PKzs7WbbfdNmdQS4Q1AIeJL6Czni2UZzM0NCSPxyNJsixLIyMjys/P19q1a7V27Vq9+eabkqTKykodOHBAe/fu1cTEhJYtW6bvfOc72r17twoKCjQ+Pq6bb75Zzz33nEpLSxUMBnXy5EmVl5fPeu7MeVcJAAZIyJ7310J5PB5FIhFJ0vDwsCzLmvG4WCym9evXa8+ePVq+fLluvfVWRSIRBQIBffvb31ZWVpbOnj2rjz/+WJKUl5eniYmJOc9NZw3AURYyBlmo2tpaBQIBdXd3q6qqSn6/Xy0tLVqyZMkXjsvPz1dPT4+OHDmipUuX6q677lIsFtMPfvADTU5OyufzacWKFQqFQvrd736nyclJff3rX5/z3Bl2Mq8MABbZfbdtnPexb545ksRKri86awCOwtI9ADAAj5sDgAF43BwADEBYA4ABnLpmgrAG4Ch01gBgAFaDAIAB4rYzN0klrAE4CjNrADAAM2sAMAAzawAwQIIxCACkPzprADAAq0EAwACMQQDAAIxBAMAAdNYAYAA6awAwQNyOp7qEpCCsATgKj5sDgAF43BwADEBnDQAGYDUIABiA1SAAYAAeNwcAAzCzBgADMLMGAAPQWQOAAVhnDQAGoLMGAAOwGgQADMANRgAwAGMQADAATzACgAHorAHAAE6dWWfYTv1rCAAcJDPVBQAAro6wBgADENYAYADCGgAMQFgDgAEIawAwAGENAAYgrAHAAIQ1ABiAsAYAAxDWAGAAwhoADPC/nYqou4x18BEAAAAASUVORK5CYII=\n",
666 |       "text/plain": [
667 |        "<matplotlib.figure.Figure at 0x253291b8198>"
668 |       ]
669 |      },
670 |      "metadata": {},
671 |      "output_type": "display_data"
672 |     }
673 |    ],
674 |    "source": [
675 |     "plot_activations(model, X[0], documents[0])"
676 |    ]
677 |   },
678 |   {
679 |    "cell_type": "code",
680 |    "execution_count": null,
681 |    "metadata": {},
682 |    "outputs": [],
683 |    "source": []
684 |   }
685 |  ],
686 |  "metadata": {
687 |   "kernelspec": {
688 |    "display_name": "Python 3",
689 |    "language": "python",
690 |    "name": "python3"
691 |   },
692 |   "language_info": {
693 |    "codemirror_mode": {
694 |     "name": "ipython",
695 |     "version": 3
696 |    },
697 |    "file_extension": ".py",
698 |    "mimetype": "text/x-python",
699 |    "name": "python",
700 |    "nbconvert_exporter": "python",
701 |    "pygments_lexer": "ipython3",
702 |    "version": "3.6.4"
703 |   }
704 |  },
705 |  "nbformat": 4,
706 |  "nbformat_minor": 2
707 | }
708 | 


--------------------------------------------------------------------------------
/Keras_Elmo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Keras-Elmo.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/napsternxg/ipython-notebooks/blob/master/Keras_Elmo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "metadata": {
 31 |         "id": "wfWTbo1Sctng",
 32 |         "colab_type": "code",
 33 |         "colab": {}
 34 |       },
 35 |       "source": [
 36 |         "! pip install nltk"
 37 |       ],
 38 |       "execution_count": 0,
 39 |       "outputs": []
 40 |     },
 41 |     {
 42 |       "cell_type": "code",
 43 |       "metadata": {
 44 |         "id": "MbREWBiU8VFw",
 45 |         "colab_type": "code",
 46 |         "outputId": "75dad872-eb94-49c6-85c4-2eb79d855ce3",
 47 |         "colab": {
 48 |           "base_uri": "https://localhost:8080/",
 49 |           "height": 51
 50 |         }
 51 |       },
 52 |       "source": [
 53 |         "import tensorflow as tf\n",
 54 |         "import tensorflow_hub as hub\n",
 55 |         "import tensorflow.keras.backend as K\n",
 56 |         "import numpy as np"
 57 |       ],
 58 |       "execution_count": 0,
 59 |       "outputs": [
 60 |         {
 61 |           "output_type": "stream",
 62 |           "text": [
 63 |             "WARNING: Logging before flag parsing goes to stderr.\n",
 64 |             "W0325 21:47:56.237649 139708550211456 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14\n"
 65 |           ],
 66 |           "name": "stderr"
 67 |         }
 68 |       ]
 69 |     },
 70 |     {
 71 |       "cell_type": "code",
 72 |       "metadata": {
 73 |         "id": "P97jiyM49FUz",
 74 |         "colab_type": "code",
 75 |         "outputId": "f0e57295-42bf-4473-fac8-a924df13ee32",
 76 |         "colab": {
 77 |           "base_uri": "https://localhost:8080/",
 78 |           "height": 34
 79 |         }
 80 |       },
 81 |       "source": [
 82 |         "tf.__version__, np.__version__"
 83 |       ],
 84 |       "execution_count": 0,
 85 |       "outputs": [
 86 |         {
 87 |           "output_type": "execute_result",
 88 |           "data": {
 89 |             "text/plain": [
 90 |               "('1.13.1', '1.14.6')"
 91 |             ]
 92 |           },
 93 |           "metadata": {
 94 |             "tags": []
 95 |           },
 96 |           "execution_count": 2
 97 |         }
 98 |       ]
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "metadata": {
103 |         "id": "wRW3a8Sk8mgb",
104 |         "colab_type": "code",
105 |         "colab": {}
106 |       },
107 |       "source": [
108 |         "class ElmoEmbeddingLayer(tf.keras.layers.Layer):\n",
109 |         "    \"\"\"Taken from: \n",
110 |         "    https://github.com/strongio/keras-elmo/blob/master/Elmo%20Keras.ipynb\"\"\"\n",
111 |         "    def __init__(self, **kwargs):\n",
112 |         "        self.dimensions = 1024\n",
113 |         "        super(ElmoEmbeddingLayer, self).__init__(**kwargs)\n",
114 |         "\n",
115 |         "    def build(self, input_shape):\n",
116 |         "        self.elmo = hub.Module(\n",
117 |         "            'https://tfhub.dev/google/elmo/2', \n",
118 |         "            trainable=self.trainable,\n",
119 |         "            name=\"{}_module\".format(self.name)\n",
120 |         "        )\n",
121 |         "        if self.trainable:\n",
122 |         "          self._trainable_weights.extend(\n",
123 |         "              tf.trainable_variables(scope=\"^{}_module/.*\".format(self.name))\n",
124 |         "          )\n",
125 |         "        # Changed assuming trainable weights might be set using \n",
126 |         "        super(ElmoEmbeddingLayer, self).build(input_shape)\n",
127 |         "\n",
128 |         "    def call(self, x, mask=None):\n",
129 |         "        result = self.elmo(\n",
130 |         "            K.squeeze(K.cast(x, tf.string), axis=1),\n",
131 |         "            as_dict=True,\n",
132 |         "            signature='default',\n",
133 |         "        )['default']\n",
134 |         "        return result\n",
135 |         "\n",
136 |         "    def compute_mask(self, inputs, mask=None):\n",
137 |         "        return K.not_equal(inputs, '--PAD--')\n",
138 |         "\n",
139 |         "    def compute_output_shape(self, input_shape):\n",
140 |         "        return (input_shape[0], self.dimensions)\n",
141 |         "\n",
142 |         "def create_model(train_elmo=False):\n",
143 |         "  # Create Sequential model\n",
144 |         "  model = tf.keras.Sequential([\n",
145 |         "      # Need to explicitly include input layer \n",
146 |         "      # to allow keras to accept string input\n",
147 |         "      # Taken from:\n",
148 |         "      # https://gist.github.com/colinmorris/9183206284b4fe3179809098e809d009\n",
149 |         "      tf.keras.layers.InputLayer(dtype='string', input_shape=(1,)),\n",
150 |         "      ElmoEmbeddingLayer(trainable=train_elmo),\n",
151 |         "      tf.keras.layers.Dense(1)\n",
152 |         "  ])\n",
153 |         "  \n",
154 |         "  # Needed to initialize elmo variables\n",
155 |         "  sess = K.get_session()\n",
156 |         "  init = tf.global_variables_initializer()\n",
157 |         "  sess.run(init)\n",
158 |         "  \n",
159 |         "  # Compile model\n",
160 |         "  model.compile(\n",
161 |         "      optimizer=\"adam\", \n",
162 |         "      loss=\"binary_crossentropy\", \n",
163 |         "      metrics=[\"accuracy\"]\n",
164 |         "  )\n",
165 |         "  return model\n",
166 |         "\n",
167 |         "\n"
168 |       ],
169 |       "execution_count": 0,
170 |       "outputs": []
171 |     },
172 |     {
173 |       "cell_type": "code",
174 |       "metadata": {
175 |         "id": "37IaKUOP-Qdn",
176 |         "colab_type": "code",
177 |         "outputId": "22365931-13a8-411d-ddf1-513aeb7bc5d3",
178 |         "colab": {
179 |           "base_uri": "https://localhost:8080/",
180 |           "height": 175
181 |         }
182 |       },
183 |       "source": [
184 |         "model = create_model(train_elmo=True)"
185 |       ],
186 |       "execution_count": 0,
187 |       "outputs": [
188 |         {
189 |           "output_type": "stream",
190 |           "text": [
191 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
192 |             "Instructions for updating:\n",
193 |             "Colocations handled automatically by placer.\n"
194 |           ],
195 |           "name": "stdout"
196 |         },
197 |         {
198 |           "output_type": "stream",
199 |           "text": [
200 |             "W0325 21:47:56.784782 139708550211456 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
201 |             "Instructions for updating:\n",
202 |             "Colocations handled automatically by placer.\n"
203 |           ],
204 |           "name": "stderr"
205 |         },
206 |         {
207 |           "output_type": "stream",
208 |           "text": [
209 |             "INFO:tensorflow:Saver not created because there are no variables in the graph to restore\n"
210 |           ],
211 |           "name": "stdout"
212 |         },
213 |         {
214 |           "output_type": "stream",
215 |           "text": [
216 |             "I0325 21:47:57.561886 139708550211456 saver.py:1483] Saver not created because there are no variables in the graph to restore\n"
217 |           ],
218 |           "name": "stderr"
219 |         }
220 |       ]
221 |     },
222 |     {
223 |       "cell_type": "code",
224 |       "metadata": {
225 |         "id": "85pZjJ5e-Tx7",
226 |         "colab_type": "code",
227 |         "outputId": "d74a6472-29e5-42d8-eeac-1a7e84d4f52d",
228 |         "colab": {
229 |           "base_uri": "https://localhost:8080/",
230 |           "height": 34
231 |         }
232 |       },
233 |       "source": [
234 |         "X = np.array([\n",
235 |         "    \"This is good\",\n",
236 |         "    \"This is bad\"\n",
237 |         "]).reshape(2, 1)\n",
238 |         "y = np.array([0, 1]).reshape(2, 1)\n",
239 |         "X.shape, y.shape"
240 |       ],
241 |       "execution_count": 0,
242 |       "outputs": [
243 |         {
244 |           "output_type": "execute_result",
245 |           "data": {
246 |             "text/plain": [
247 |               "((2, 1), (2, 1))"
248 |             ]
249 |           },
250 |           "metadata": {
251 |             "tags": []
252 |           },
253 |           "execution_count": 5
254 |         }
255 |       ]
256 |     },
257 |     {
258 |       "cell_type": "code",
259 |       "metadata": {
260 |         "id": "OsX2rPRE-edB",
261 |         "colab_type": "code",
262 |         "outputId": "e013e6d0-2288-4947-cab7-5969242d4534",
263 |         "colab": {
264 |           "base_uri": "https://localhost:8080/",
265 |           "height": 175
266 |         }
267 |       },
268 |       "source": [
269 |         "model.fit(X, y)"
270 |       ],
271 |       "execution_count": 0,
272 |       "outputs": [
273 |         {
274 |           "output_type": "stream",
275 |           "text": [
276 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
277 |             "Instructions for updating:\n",
278 |             "Use tf.cast instead.\n"
279 |           ],
280 |           "name": "stdout"
281 |         },
282 |         {
283 |           "output_type": "stream",
284 |           "text": [
285 |             "W0325 21:47:59.085000 139708550211456 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
286 |             "Instructions for updating:\n",
287 |             "Use tf.cast instead.\n"
288 |           ],
289 |           "name": "stderr"
290 |         },
291 |         {
292 |           "output_type": "stream",
293 |           "text": [
294 |             "\r2/2 [==============================] - 2s 790ms/sample - loss: 8.0590 - acc: 0.5000\n"
295 |           ],
296 |           "name": "stdout"
297 |         },
298 |         {
299 |           "output_type": "execute_result",
300 |           "data": {
301 |             "text/plain": [
302 |               "<tensorflow.python.keras.callbacks.History at 0x7f0ff58115c0>"
303 |             ]
304 |           },
305 |           "metadata": {
306 |             "tags": []
307 |           },
308 |           "execution_count": 6
309 |         }
310 |       ]
311 |     },
312 |     {
313 |       "cell_type": "code",
314 |       "metadata": {
315 |         "id": "vXO0YWtY-nrP",
316 |         "colab_type": "code",
317 |         "outputId": "28c24dc2-c5cf-4a21-c633-d710c6dc6706",
318 |         "colab": {
319 |           "base_uri": "https://localhost:8080/",
320 |           "height": 207
321 |         }
322 |       },
323 |       "source": [
324 |         "model.summary()"
325 |       ],
326 |       "execution_count": 0,
327 |       "outputs": [
328 |         {
329 |           "output_type": "stream",
330 |           "text": [
331 |             "_________________________________________________________________\n",
332 |             "Layer (type)                 Output Shape              Param #   \n",
333 |             "=================================================================\n",
334 |             "elmo_embedding_layer (ElmoEm (None, 1024)              4         \n",
335 |             "_________________________________________________________________\n",
336 |             "dense (Dense)                (None, 1)                 1025      \n",
337 |             "=================================================================\n",
338 |             "Total params: 1,029\n",
339 |             "Trainable params: 1,029\n",
340 |             "Non-trainable params: 0\n",
341 |             "_________________________________________________________________\n"
342 |           ],
343 |           "name": "stdout"
344 |         }
345 |       ]
346 |     },
347 |     {
348 |       "cell_type": "code",
349 |       "metadata": {
350 |         "id": "B44AaTUhv_w7",
351 |         "colab_type": "code",
352 |         "outputId": "bc282e75-9533-4162-e3e9-bbd5c75a6ffd",
353 |         "colab": {
354 |           "base_uri": "https://localhost:8080/",
355 |           "height": 86
356 |         }
357 |       },
358 |       "source": [
359 |         "model.trainable_weights"
360 |       ],
361 |       "execution_count": 0,
362 |       "outputs": [
363 |         {
364 |           "output_type": "execute_result",
365 |           "data": {
366 |             "text/plain": [
367 |               "[<tf.Variable 'elmo_embedding_layer_module/aggregation/weights:0' shape=(3,) dtype=float32>,\n",
368 |               " <tf.Variable 'elmo_embedding_layer_module/aggregation/scaling:0' shape=() dtype=float32>,\n",
369 |               " <tf.Variable 'dense/kernel:0' shape=(1024, 1) dtype=float32>,\n",
370 |               " <tf.Variable 'dense/bias:0' shape=(1,) dtype=float32>]"
371 |             ]
372 |           },
373 |           "metadata": {
374 |             "tags": []
375 |           },
376 |           "execution_count": 8
377 |         }
378 |       ]
379 |     },
380 |     {
381 |       "cell_type": "code",
382 |       "metadata": {
383 |         "id": "Gr_AJYCZwZlg",
384 |         "colab_type": "code",
385 |         "colab": {}
386 |       },
387 |       "source": [
388 |         "elmo = model.layers[0].elmo"
389 |       ],
390 |       "execution_count": 0,
391 |       "outputs": []
392 |     },
393 |     {
394 |       "cell_type": "code",
395 |       "metadata": {
396 |         "id": "62QVQ2fjxAQf",
397 |         "colab_type": "code",
398 |         "outputId": "e3a3b0fd-2518-4aa2-bfea-eed54ea50dad",
399 |         "colab": {
400 |           "base_uri": "https://localhost:8080/",
401 |           "height": 693
402 |         }
403 |       },
404 |       "source": [
405 |         "elmo.variables"
406 |       ],
407 |       "execution_count": 0,
408 |       "outputs": [
409 |         {
410 |           "output_type": "execute_result",
411 |           "data": {
412 |             "text/plain": [
413 |               "[<tf.Variable 'elmo_embedding_layer_module/aggregation/scaling:0' shape=() dtype=float32>,\n",
414 |               " <tf.Variable 'elmo_embedding_layer_module/aggregation/weights:0' shape=(3,) dtype=float32>,\n",
415 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_0:0' shape=(1, 1, 16, 32) dtype=float32>,\n",
416 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_1:0' shape=(1, 2, 16, 32) dtype=float32>,\n",
417 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_2:0' shape=(1, 3, 16, 64) dtype=float32>,\n",
418 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_3:0' shape=(1, 4, 16, 128) dtype=float32>,\n",
419 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_4:0' shape=(1, 5, 16, 256) dtype=float32>,\n",
420 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_5:0' shape=(1, 6, 16, 512) dtype=float32>,\n",
421 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/W_cnn_6:0' shape=(1, 7, 16, 1024) dtype=float32>,\n",
422 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_0:0' shape=(32,) dtype=float32>,\n",
423 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_1:0' shape=(32,) dtype=float32>,\n",
424 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_2:0' shape=(64,) dtype=float32>,\n",
425 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_3:0' shape=(128,) dtype=float32>,\n",
426 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_4:0' shape=(256,) dtype=float32>,\n",
427 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_5:0' shape=(512,) dtype=float32>,\n",
428 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN/b_cnn_6:0' shape=(1024,) dtype=float32>,\n",
429 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_0/W_carry:0' shape=(2048, 2048) dtype=float32>,\n",
430 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_0/W_transform:0' shape=(2048, 2048) dtype=float32>,\n",
431 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_0/b_carry:0' shape=(2048,) dtype=float32>,\n",
432 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_0/b_transform:0' shape=(2048,) dtype=float32>,\n",
433 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_1/W_carry:0' shape=(2048, 2048) dtype=float32>,\n",
434 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_1/W_transform:0' shape=(2048, 2048) dtype=float32>,\n",
435 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_1/b_carry:0' shape=(2048,) dtype=float32>,\n",
436 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_high_1/b_transform:0' shape=(2048,) dtype=float32>,\n",
437 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_proj/W_proj:0' shape=(2048, 512) dtype=float32>,\n",
438 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/CNN_proj/b_proj:0' shape=(512,) dtype=float32>,\n",
439 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_0/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias:0' shape=(16384,) dtype=float32>,\n",
440 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_0/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel:0' shape=(1024, 16384) dtype=float32>,\n",
441 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_0/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/projection/kernel:0' shape=(4096, 512) dtype=float32>,\n",
442 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/bias:0' shape=(16384,) dtype=float32>,\n",
443 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/kernel:0' shape=(1024, 16384) dtype=float32>,\n",
444 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_0/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel:0' shape=(4096, 512) dtype=float32>,\n",
445 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/bias:0' shape=(16384,) dtype=float32>,\n",
446 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/kernel:0' shape=(1024, 16384) dtype=float32>,\n",
447 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_1/RNN/MultiRNNCell/Cell0/rnn/lstm_cell/projection/kernel:0' shape=(4096, 512) dtype=float32>,\n",
448 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_1/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/bias:0' shape=(16384,) dtype=float32>,\n",
449 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_1/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/kernel:0' shape=(1024, 16384) dtype=float32>,\n",
450 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/RNN_1/RNN/MultiRNNCell/Cell1/rnn/lstm_cell/projection/kernel:0' shape=(4096, 512) dtype=float32>,\n",
451 |               " <tf.Variable 'elmo_embedding_layer_module/bilm/char_embed:0' shape=(261, 16) dtype=float32>]"
452 |             ]
453 |           },
454 |           "metadata": {
455 |             "tags": []
456 |           },
457 |           "execution_count": 10
458 |         }
459 |       ]
460 |     },
461 |     {
462 |       "cell_type": "code",
463 |       "metadata": {
464 |         "id": "zZB8f-_pxJ2N",
465 |         "colab_type": "code",
466 |         "outputId": "ff46524a-1176-4017-d672-80d8605723ec",
467 |         "colab": {
468 |           "base_uri": "https://localhost:8080/",
469 |           "height": 51
470 |         }
471 |       },
472 |       "source": [
473 |         "model.layers[0].trainable_weights"
474 |       ],
475 |       "execution_count": 0,
476 |       "outputs": [
477 |         {
478 |           "output_type": "execute_result",
479 |           "data": {
480 |             "text/plain": [
481 |               "[<tf.Variable 'elmo_embedding_layer_module/aggregation/weights:0' shape=(3,) dtype=float32>,\n",
482 |               " <tf.Variable 'elmo_embedding_layer_module/aggregation/scaling:0' shape=() dtype=float32>]"
483 |             ]
484 |           },
485 |           "metadata": {
486 |             "tags": []
487 |           },
488 |           "execution_count": 11
489 |         }
490 |       ]
491 |     },
492 |     {
493 |       "cell_type": "code",
494 |       "metadata": {
495 |         "id": "eZ8YljGYxfqj",
496 |         "colab_type": "code",
497 |         "outputId": "1172ab69-d225-44cf-f25a-a31e155c3f71",
498 |         "colab": {
499 |           "base_uri": "https://localhost:8080/",
500 |           "height": 34
501 |         }
502 |       },
503 |       "source": [
504 |         "model.predict([[\"This is so cool\"]])"
505 |       ],
506 |       "execution_count": 0,
507 |       "outputs": [
508 |         {
509 |           "output_type": "execute_result",
510 |           "data": {
511 |             "text/plain": [
512 |               "array([[-0.363752]], dtype=float32)"
513 |             ]
514 |           },
515 |           "metadata": {
516 |             "tags": []
517 |           },
518 |           "execution_count": 12
519 |         }
520 |       ]
521 |     },
522 |     {
523 |       "cell_type": "code",
524 |       "metadata": {
525 |         "id": "0IvIcjVi2l4k",
526 |         "colab_type": "code",
527 |         "colab": {}
528 |       },
529 |       "source": [
530 |         ""
531 |       ],
532 |       "execution_count": 0,
533 |       "outputs": []
534 |     }
535 |   ]
536 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/LazyValues.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "LazyValues.ipynb",
  7 |       "provenance": [],
  8 |       "authorship_tag": "ABX9TyMlA+yFrEudIIn1Vg8z+ehW",
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/napsternxg/ipython-notebooks/blob/master/LazyValues.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "source": [
 33 |         "# Lazy Loading Variables in Python\n",
 34 |         "\n",
 35 |         "How to get variables which are lazy loaded when first used"
 36 |       ],
 37 |       "metadata": {
 38 |         "id": "gWt4otPFwHwA"
 39 |       }
 40 |     },
 41 |     {
 42 |       "cell_type": "code",
 43 |       "execution_count": 1,
 44 |       "metadata": {
 45 |         "id": "X0W2LDCFvWIr"
 46 |       },
 47 |       "outputs": [],
 48 |       "source": [
 49 |         "import time"
 50 |       ]
 51 |     },
 52 |     {
 53 |       "cell_type": "code",
 54 |       "source": [
 55 |         "class LazyValues(object):\n",
 56 |         "  def __init__(self, **value_fn_dict):\n",
 57 |         "    self.value_fn_dict = value_fn_dict\n",
 58 |         "  \n",
 59 |         "  def __getattr__(self, name):\n",
 60 |         "    if name not in self.value_fn_dict:\n",
 61 |         "      raise AttributeError(f\"{name} not in {self.value_fn_dict.keys()}\")\n",
 62 |         "    try:\n",
 63 |         "      self.__getattribute__(name)\n",
 64 |         "    except AttributeError:\n",
 65 |         "      print(f\"Lazy loading value: {name}.\")\n",
 66 |         "      value = self.value_fn_dict[name]()\n",
 67 |         "      setattr(self, name, value)\n",
 68 |         "    return self.__getattribute__(name)"
 69 |       ],
 70 |       "metadata": {
 71 |         "id": "Q4-yVWVNvZ5r"
 72 |       },
 73 |       "execution_count": 2,
 74 |       "outputs": []
 75 |     },
 76 |     {
 77 |       "cell_type": "code",
 78 |       "source": [
 79 |         "def slow_function(v, sleep_time=2):\n",
 80 |         "  print(f\"In slow_function: Sleeping for: {sleep_time} secs before returning {v}.\")\n",
 81 |         "  time.sleep(sleep_time)\n",
 82 |         "  return v"
 83 |       ],
 84 |       "metadata": {
 85 |         "id": "kAdB_V40vbT0"
 86 |       },
 87 |       "execution_count": 3,
 88 |       "outputs": []
 89 |     },
 90 |     {
 91 |       "cell_type": "code",
 92 |       "source": [
 93 |         "lazy_values = LazyValues(\n",
 94 |         "  some_slow_value = lambda: slow_function(5, sleep_time=2),\n",
 95 |         "  extremely_slow_value = lambda: slow_function(10, sleep_time=10)\n",
 96 |         ")"
 97 |       ],
 98 |       "metadata": {
 99 |         "id": "cLgpt7nKvdWi"
100 |       },
101 |       "execution_count": 4,
102 |       "outputs": []
103 |     },
104 |     {
105 |       "cell_type": "code",
106 |       "source": [
107 |         "for i in range(10):\n",
108 |         "  print(f\"lazy_values.some_slow_value={lazy_values.some_slow_value}\")\n",
109 |         "  print(f\"lazy_values.extremely_slow_value={lazy_values.extremely_slow_value}\")"
110 |       ],
111 |       "metadata": {
112 |         "colab": {
113 |           "base_uri": "https://localhost:8080/"
114 |         },
115 |         "id": "Q_n10qn7vffp",
116 |         "outputId": "3a372426-e006-4b4a-a7ee-5d9abfaebf23"
117 |       },
118 |       "execution_count": 5,
119 |       "outputs": [
120 |         {
121 |           "output_type": "stream",
122 |           "name": "stdout",
123 |           "text": [
124 |             "Lazy loading value: some_slow_value.\n",
125 |             "In slow_function: Sleeping for: 2 secs before returning 5.\n",
126 |             "lazy_values.some_slow_value=5\n",
127 |             "Lazy loading value: extremely_slow_value.\n",
128 |             "In slow_function: Sleeping for: 10 secs before returning 10.\n",
129 |             "lazy_values.extremely_slow_value=10\n",
130 |             "lazy_values.some_slow_value=5\n",
131 |             "lazy_values.extremely_slow_value=10\n",
132 |             "lazy_values.some_slow_value=5\n",
133 |             "lazy_values.extremely_slow_value=10\n",
134 |             "lazy_values.some_slow_value=5\n",
135 |             "lazy_values.extremely_slow_value=10\n",
136 |             "lazy_values.some_slow_value=5\n",
137 |             "lazy_values.extremely_slow_value=10\n",
138 |             "lazy_values.some_slow_value=5\n",
139 |             "lazy_values.extremely_slow_value=10\n",
140 |             "lazy_values.some_slow_value=5\n",
141 |             "lazy_values.extremely_slow_value=10\n",
142 |             "lazy_values.some_slow_value=5\n",
143 |             "lazy_values.extremely_slow_value=10\n",
144 |             "lazy_values.some_slow_value=5\n",
145 |             "lazy_values.extremely_slow_value=10\n",
146 |             "lazy_values.some_slow_value=5\n",
147 |             "lazy_values.extremely_slow_value=10\n"
148 |           ]
149 |         }
150 |       ]
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "source": [
155 |         ""
156 |       ],
157 |       "metadata": {
158 |         "id": "0ygvGVp9vtNR"
159 |       },
160 |       "execution_count": 5,
161 |       "outputs": []
162 |     },
163 |     {
164 |       "cell_type": "code",
165 |       "source": [
166 |         ""
167 |       ],
168 |       "metadata": {
169 |         "id": "IFT4OU4Tv6Sl"
170 |       },
171 |       "execution_count": 5,
172 |       "outputs": []
173 |     },
174 |     {
175 |       "cell_type": "code",
176 |       "source": [
177 |         ""
178 |       ],
179 |       "metadata": {
180 |         "id": "rVf4LA9Xv605"
181 |       },
182 |       "execution_count": 5,
183 |       "outputs": []
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "source": [
188 |         ""
189 |       ],
190 |       "metadata": {
191 |         "id": "gJMf9e81wBZn"
192 |       },
193 |       "execution_count": null,
194 |       "outputs": []
195 |     },
196 |     {
197 |       "cell_type": "code",
198 |       "source": [
199 |         ""
200 |       ],
201 |       "metadata": {
202 |         "id": "2RGOI9IxwBXI"
203 |       },
204 |       "execution_count": null,
205 |       "outputs": []
206 |     },
207 |     {
208 |       "cell_type": "code",
209 |       "source": [
210 |         ""
211 |       ],
212 |       "metadata": {
213 |         "id": "cirMdxbxwBUT"
214 |       },
215 |       "execution_count": null,
216 |       "outputs": []
217 |     },
218 |     {
219 |       "cell_type": "code",
220 |       "source": [
221 |         ""
222 |       ],
223 |       "metadata": {
224 |         "id": "zGS6H2jowBSE"
225 |       },
226 |       "execution_count": null,
227 |       "outputs": []
228 |     },
229 |     {
230 |       "cell_type": "code",
231 |       "source": [
232 |         ""
233 |       ],
234 |       "metadata": {
235 |         "id": "Tn7wl47ywBPO"
236 |       },
237 |       "execution_count": null,
238 |       "outputs": []
239 |     },
240 |     {
241 |       "cell_type": "code",
242 |       "source": [
243 |         ""
244 |       ],
245 |       "metadata": {
246 |         "id": "4rux0vdOwBMw"
247 |       },
248 |       "execution_count": null,
249 |       "outputs": []
250 |     },
251 |     {
252 |       "cell_type": "code",
253 |       "source": [
254 |         ""
255 |       ],
256 |       "metadata": {
257 |         "id": "qXToL9bVwBKU"
258 |       },
259 |       "execution_count": null,
260 |       "outputs": []
261 |     },
262 |     {
263 |       "cell_type": "code",
264 |       "source": [
265 |         ""
266 |       ],
267 |       "metadata": {
268 |         "id": "Qz5uupfdwBHg"
269 |       },
270 |       "execution_count": null,
271 |       "outputs": []
272 |     },
273 |     {
274 |       "cell_type": "code",
275 |       "source": [
276 |         ""
277 |       ],
278 |       "metadata": {
279 |         "id": "2i13liCTwA_9"
280 |       },
281 |       "execution_count": null,
282 |       "outputs": []
283 |     },
284 |     {
285 |       "cell_type": "code",
286 |       "source": [
287 |         ""
288 |       ],
289 |       "metadata": {
290 |         "id": "HeU2nJKFwA9e"
291 |       },
292 |       "execution_count": null,
293 |       "outputs": []
294 |     },
295 |     {
296 |       "cell_type": "code",
297 |       "source": [
298 |         ""
299 |       ],
300 |       "metadata": {
301 |         "id": "oniBfjo5wA7C"
302 |       },
303 |       "execution_count": null,
304 |       "outputs": []
305 |     },
306 |     {
307 |       "cell_type": "code",
308 |       "source": [
309 |         ""
310 |       ],
311 |       "metadata": {
312 |         "id": "d2giXlADwA4N"
313 |       },
314 |       "execution_count": null,
315 |       "outputs": []
316 |     },
317 |     {
318 |       "cell_type": "code",
319 |       "source": [
320 |         ""
321 |       ],
322 |       "metadata": {
323 |         "id": "kSZL5thLwA1u"
324 |       },
325 |       "execution_count": null,
326 |       "outputs": []
327 |     },
328 |     {
329 |       "cell_type": "code",
330 |       "source": [
331 |         ""
332 |       ],
333 |       "metadata": {
334 |         "id": "0vRPWAG5wAzH"
335 |       },
336 |       "execution_count": null,
337 |       "outputs": []
338 |     },
339 |     {
340 |       "cell_type": "code",
341 |       "source": [
342 |         ""
343 |       ],
344 |       "metadata": {
345 |         "id": "yB9Q6aQMwAwH"
346 |       },
347 |       "execution_count": null,
348 |       "outputs": []
349 |     },
350 |     {
351 |       "cell_type": "code",
352 |       "source": [
353 |         ""
354 |       ],
355 |       "metadata": {
356 |         "id": "7NuELLmvwApz"
357 |       },
358 |       "execution_count": null,
359 |       "outputs": []
360 |     },
361 |     {
362 |       "cell_type": "code",
363 |       "source": [
364 |         ""
365 |       ],
366 |       "metadata": {
367 |         "id": "Q51AntvswAdH"
368 |       },
369 |       "execution_count": null,
370 |       "outputs": []
371 |     },
372 |     {
373 |       "cell_type": "code",
374 |       "source": [
375 |         ""
376 |       ],
377 |       "metadata": {
378 |         "id": "RScVCMtCwAG_"
379 |       },
380 |       "execution_count": null,
381 |       "outputs": []
382 |     }
383 |   ]
384 | }


--------------------------------------------------------------------------------
/Naive Sudoku Solver.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Taken from https://www.hackerrank.com/contests/projecteuler/challenges/euler096/"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 32,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# Enter your code here. Read input from STDIN. Print output to STDOUT\n",
 19 |     "class SudokuSolver(object):\n",
 20 |     "    \n",
 21 |     "    def __init__(self, N=3, puzzle=None):\n",
 22 |     "        self.N = N\n",
 23 |     "        self.SIZE = self.N*self.N\n",
 24 |     "        self.puzzle = [[set([1])]*(self.SIZE)]*(self.SIZE)\n",
 25 |     "        if puzzle is None:\n",
 26 |     "            for i in range(self.SIZE):\n",
 27 |     "                self.puzzle[i] = [int(k) for k in raw_input()]\n",
 28 |     "        else:\n",
 29 |     "            self.puzzle = [[int(k) for k in line] for line in puzzle.splitlines()]\n",
 30 |     "\n",
 31 |     "        self._empty_cells = 0    \n",
 32 |     "        self._solutions = [[set([]) for i in range(self.SIZE)] for k in range(self.SIZE)]\n",
 33 |     "        #print self.puzzle\n",
 34 |     "        #print self._solutions\n",
 35 |     "        for i in range(self.SIZE):\n",
 36 |     "            for j in range(self.SIZE):\n",
 37 |     "                if self.puzzle[i][j] < 1:\n",
 38 |     "                    self._solutions[i][j] = set(range(1,self.SIZE+1))\n",
 39 |     "                    self._empty_cells += 1\n",
 40 |     "                    #print \"puzzle[%s][%s] = %s\\tsolutions[%s][%s] = %s\" % (i,j, self.puzzle[i][j], i, j, self._solutions[i][j])\n",
 41 |     "                else:\n",
 42 |     "                    self._solutions[i][j] = set([])\n",
 43 |     "        #print self._solutions\n",
 44 |     "\n",
 45 |     "\n",
 46 |     "    def get_remove_sets(self, i, j):\n",
 47 |     "        remove_items = []\n",
 48 |     "        remove_items.extend(self.puzzle[i]) # Row items\n",
 49 |     "        remove_items.extend([self.puzzle[k][j] for k in range(self.SIZE)]) # Col items\n",
 50 |     "        x, y = i/self.N, j/self.N\n",
 51 |     "        remove_items.extend([self.puzzle[x*self.N + (k/3)][y*self.N + (k%3)] \\\n",
 52 |     "                             for k in range(self.SIZE)]) # Square items\n",
 53 |     "        remove_set = set(remove_items) - set([0])\n",
 54 |     "        #print \"Remove_Set[%s][%s] = %s\" % (i, j, remove_set)\n",
 55 |     "        return remove_set\n",
 56 |     "    \n",
 57 |     "    def solve(self):\n",
 58 |     "        run_times = self.SIZE*self.SIZE\n",
 59 |     "        print \"Original with Empty Cells = %s\" % self._empty_cells\n",
 60 |     "        self.showSudoku()\n",
 61 |     "        changed_cell = True\n",
 62 |     "        #print \"Original Solution Table\"\n",
 63 |     "        #print self._solutions\n",
 64 |     "        #self.showSudoku(arr=self._solutions, sep=\"\\t\")\n",
 65 |     "        while(self._empty_cells > 0 and changed_cell):\n",
 66 |     "            # Row wise selection of solutions\n",
 67 |     "            #print \"Filling row wise\"\n",
 68 |     "            changed_cell = False\n",
 69 |     "            for i in range(self.SIZE):\n",
 70 |     "                for j in range(self.SIZE):\n",
 71 |     "                    if self.puzzle[i][j] != 0:\n",
 72 |     "                        continue\n",
 73 |     "                    remove_set = self.get_remove_sets(i, j)\n",
 74 |     "                    #print \"Before solutions[%s][%s] = %s, Remaining: %s\" % (i, j, self._solutions[i][j], self._empty_cells)\n",
 75 |     "                    self._solutions[i][j] -= remove_set\n",
 76 |     "                    #print \"After solutions[%s][%s] = %s, Remaining: %s\" % (i, j, self._solutions[i][j], self._empty_cells)\n",
 77 |     "                    if len(self._solutions[i][j]) == 1:\n",
 78 |     "                        self.puzzle[i][j] = self._solutions[i][j].pop()\n",
 79 |     "                        self._empty_cells -= 1\n",
 80 |     "                        changed_cell = True\n",
 81 |     "                        #print \"Adding puzzle[%s][%s] = %s, Remaining: %s\" % (i, j, puzzle[i][j], empty_cells)\n",
 82 |     "                        continue\n",
 83 |     "            run_times -= 1\n",
 84 |     "        print \"Solved with Empty Cells = %s\" % self._empty_cells\n",
 85 |     "        self.showSudoku()\n",
 86 |     "        \n",
 87 |     "    def solveRecurse(self, puzzle=None):\n",
 88 |     "        if puzzle is None:\n",
 89 |     "            puzzle = self.puzzle\n",
 90 |     "        found = False\n",
 91 |     "        for i in range(self.SIZE):\n",
 92 |     "            for j in range(self.SIZE):\n",
 93 |     "                if puzzle[i][j] == 0:\n",
 94 |     "                    found = True\n",
 95 |     "                    break\n",
 96 |     "            if found:\n",
 97 |     "                break\n",
 98 |     "        found_pos = (i, j)\n",
 99 |     "        if not found:\n",
100 |     "            self.showSudoku(arr=puzzle)\n",
101 |     "        \n",
102 |     "        remove_set = self.get_remove_sets(i, j)\n",
103 |     "        #print found_pos, remove_set\n",
104 |     "        for m in range(1, self.SIZE+1):\n",
105 |     "            if m not in remove_set:\n",
106 |     "                #print \"Puttin %s at %s\" % (m, found_pos)\n",
107 |     "                puzzle[found_pos[0]][found_pos[1]] = m\n",
108 |     "                self.solveRecurse(puzzle=puzzle)\n",
109 |     "        \n",
110 |     "    \n",
111 |     "    def showSudoku(self, arr = None, sep=\"\"):\n",
112 |     "        if arr is None:\n",
113 |     "            arr = self.puzzle\n",
114 |     "        # Print final matrix\n",
115 |     "        for i in range(self.SIZE):\n",
116 |     "             print sep.join([str(k) for k in arr[i]])\n",
117 |     "\n"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 33,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "583927687\n",
132 |       "987345001\n",
133 |       "001806400\n",
134 |       "008102900\n",
135 |       "700000008\n",
136 |       "006708200\n",
137 |       "002609500\n",
138 |       "800203009\n",
139 |       "005010300\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "s = SudokuSolver(puzzle=\"003020600\\n900305001\\n001806400\\n008102900\\n700000008\\n006708200\\n002609500\\n800203009\\n005010300\")\n",
145 |     "s.solveRecurse()\n",
146 |     "s.showSudoku()"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 10,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "Original with Empty Cells = 51\n",
161 |       "200080300\n",
162 |       "060070084\n",
163 |       "030500209\n",
164 |       "000105408\n",
165 |       "000000000\n",
166 |       "402706000\n",
167 |       "301007040\n",
168 |       "720040060\n",
169 |       "004010003\n",
170 |       "Solved with Empty Cells = 50\n",
171 |       "200080300\n",
172 |       "060070084\n",
173 |       "030560209\n",
174 |       "000105408\n",
175 |       "000000000\n",
176 |       "402706000\n",
177 |       "301007040\n",
178 |       "720040060\n",
179 |       "004010003\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "p = \"\"\"200080300\n",
185 |     "060070084\n",
186 |     "030500209\n",
187 |     "000105408\n",
188 |     "000000000\n",
189 |     "402706000\n",
190 |     "301007040\n",
191 |     "720040060\n",
192 |     "004010003\"\"\"\n",
193 |     "s = SudokuSolver(puzzle=p)\n",
194 |     "s.solve()"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 13,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [
204 |     {
205 |      "name": "stdout",
206 |      "output_type": "stream",
207 |      "text": [
208 |       "Done\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "print \"Done\""
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 29,
219 |    "metadata": {
220 |     "collapsed": true
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "# Another implementation taken from: http://pythontips.com/2013/09/01/sudoku-solver-in-python/\n",
225 |     "import sys\n",
226 |     "\n",
227 |     "def same_row(i,j): return (i/9 == j/9)\n",
228 |     "def same_col(i,j): return (i-j) % 9 == 0\n",
229 |     "def same_block(i,j): return (i/27 == j/27 and i%9/3 == j%9/3)\n",
230 |     "\n",
231 |     "def r(a):\n",
232 |     "    i = a.find('0')\n",
233 |     "    if i == -1:\n",
234 |     "        print a\n",
235 |     "        print \"\\n\".join([a[k*9:k*9+9] for k in range(9)])\n",
236 |     "\n",
237 |     "    excluded_numbers = set()\n",
238 |     "    for j in range(81):\n",
239 |     "        if same_row(i,j) or same_col(i,j) or same_block(i,j):\n",
240 |     "            excluded_numbers.add(a[j])\n",
241 |     "    #print i, i/9, i%9, excluded_numbers\n",
242 |     "    \n",
243 |     "    for m in '123456789':\n",
244 |     "        if m not in excluded_numbers:\n",
245 |     "            #print \"Puttin %s at %s, %s, %s\" % (m, i, i/9, i%9)\n",
246 |     "            r(a[:i]+m+a[i+1:])"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 30,
252 |    "metadata": {
253 |     "collapsed": false
254 |    },
255 |    "outputs": [
256 |     {
257 |      "name": "stdout",
258 |      "output_type": "stream",
259 |      "text": [
260 |       "245981376169273584837564219976125438513498627482736951391657842728349165654812793\n",
261 |       "245981376\n",
262 |       "169273584\n",
263 |       "837564219\n",
264 |       "976125438\n",
265 |       "513498627\n",
266 |       "482736951\n",
267 |       "391657842\n",
268 |       "728349165\n",
269 |       "654812793\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     "r(\"200080300060070084030500209000105408000000000402706000301007040720040060004010003\")"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 31,
280 |    "metadata": {
281 |     "collapsed": false
282 |    },
283 |    "outputs": [
284 |     {
285 |      "name": "stdout",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "483921657967345821251876493548132976729564138136798245372689514814253769695417382\n",
289 |       "483921657\n",
290 |       "967345821\n",
291 |       "251876493\n",
292 |       "548132976\n",
293 |       "729564138\n",
294 |       "136798245\n",
295 |       "372689514\n",
296 |       "814253769\n",
297 |       "695417382\n"
298 |      ]
299 |     }
300 |    ],
301 |    "source": [
302 |     "r(\"003020600900305001001806400008102900700000008006708200002609500800203009005010300\")"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {
309 |     "collapsed": true
310 |    },
311 |    "outputs": [],
312 |    "source": []
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "kernelspec": {
317 |    "display_name": "Python 2",
318 |    "language": "python",
319 |    "name": "python2"
320 |   },
321 |   "language_info": {
322 |    "codemirror_mode": {
323 |     "name": "ipython",
324 |     "version": 2
325 |    },
326 |    "file_extension": ".py",
327 |    "mimetype": "text/x-python",
328 |    "name": "python",
329 |    "nbconvert_exporter": "python",
330 |    "pygments_lexer": "ipython2",
331 |    "version": "2.7.9"
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 0
336 | }
337 | 


--------------------------------------------------------------------------------
/Plotting Decision Boundaries.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "\n",
 13 |     "from sklearn.datasets import load_iris\n",
 14 |     "from sklearn.cross_validation import train_test_split\n",
 15 |     "from sklearn.preprocessing import OneHotEncoder\n",
 16 |     "\n",
 17 |     "import matplotlib.pyplot as plt\n",
 18 |     "\n",
 19 |     "iris = load_iris()"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 3,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from sklearn.linear_model import LogisticRegression"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 18,
 36 |    "metadata": {
 37 |     "collapsed": false
 38 |    },
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "(75L, 4L) (75L,) (25L, 4L) (25L,)\n"
 45 |      ]
 46 |     },
 47 |     {
 48 |      "data": {
 49 |       "text/plain": [
 50 |        "1.0"
 51 |       ]
 52 |      },
 53 |      "execution_count": 18,
 54 |      "metadata": {},
 55 |      "output_type": "execute_result"
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "X_train, X_test, y_train, y_test = train_test_split(iris.data[(iris.target == 0) | (iris.target == 1)],\n",
 60 |     "                                                    iris.target[(iris.target == 0) | (iris.target == 1)])\n",
 61 |     "print X_train.shape, y_train.shape, X_test.shape, y_test.shape\n",
 62 |     "logit = LogisticRegression()\n",
 63 |     "logit.fit(X_train, y_train)\n",
 64 |     "logit.score(X_test, y_test)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 13,
 70 |    "metadata": {
 71 |     "collapsed": false
 72 |    },
 73 |    "outputs": [
 74 |     {
 75 |      "data": {
 76 |       "text/plain": [
 77 |        "array([[ 5.1,  3.5,  1.4,  0.2],\n",
 78 |        "       [ 4.9,  3. ,  1.4,  0.2],\n",
 79 |        "       [ 4.7,  3.2,  1.3,  0.2],\n",
 80 |        "       [ 4.6,  3.1,  1.5,  0.2],\n",
 81 |        "       [ 5. ,  3.6,  1.4,  0.2],\n",
 82 |        "       [ 5.4,  3.9,  1.7,  0.4],\n",
 83 |        "       [ 4.6,  3.4,  1.4,  0.3],\n",
 84 |        "       [ 5. ,  3.4,  1.5,  0.2],\n",
 85 |        "       [ 4.4,  2.9,  1.4,  0.2],\n",
 86 |        "       [ 4.9,  3.1,  1.5,  0.1],\n",
 87 |        "       [ 5.4,  3.7,  1.5,  0.2],\n",
 88 |        "       [ 4.8,  3.4,  1.6,  0.2],\n",
 89 |        "       [ 4.8,  3. ,  1.4,  0.1],\n",
 90 |        "       [ 4.3,  3. ,  1.1,  0.1],\n",
 91 |        "       [ 5.8,  4. ,  1.2,  0.2],\n",
 92 |        "       [ 5.7,  4.4,  1.5,  0.4],\n",
 93 |        "       [ 5.4,  3.9,  1.3,  0.4],\n",
 94 |        "       [ 5.1,  3.5,  1.4,  0.3],\n",
 95 |        "       [ 5.7,  3.8,  1.7,  0.3],\n",
 96 |        "       [ 5.1,  3.8,  1.5,  0.3],\n",
 97 |        "       [ 5.4,  3.4,  1.7,  0.2],\n",
 98 |        "       [ 5.1,  3.7,  1.5,  0.4],\n",
 99 |        "       [ 4.6,  3.6,  1. ,  0.2],\n",
100 |        "       [ 5.1,  3.3,  1.7,  0.5],\n",
101 |        "       [ 4.8,  3.4,  1.9,  0.2],\n",
102 |        "       [ 5. ,  3. ,  1.6,  0.2],\n",
103 |        "       [ 5. ,  3.4,  1.6,  0.4],\n",
104 |        "       [ 5.2,  3.5,  1.5,  0.2],\n",
105 |        "       [ 5.2,  3.4,  1.4,  0.2],\n",
106 |        "       [ 4.7,  3.2,  1.6,  0.2],\n",
107 |        "       [ 4.8,  3.1,  1.6,  0.2],\n",
108 |        "       [ 5.4,  3.4,  1.5,  0.4],\n",
109 |        "       [ 5.2,  4.1,  1.5,  0.1],\n",
110 |        "       [ 5.5,  4.2,  1.4,  0.2],\n",
111 |        "       [ 4.9,  3.1,  1.5,  0.1],\n",
112 |        "       [ 5. ,  3.2,  1.2,  0.2],\n",
113 |        "       [ 5.5,  3.5,  1.3,  0.2],\n",
114 |        "       [ 4.9,  3.1,  1.5,  0.1],\n",
115 |        "       [ 4.4,  3. ,  1.3,  0.2],\n",
116 |        "       [ 5.1,  3.4,  1.5,  0.2],\n",
117 |        "       [ 5. ,  3.5,  1.3,  0.3],\n",
118 |        "       [ 4.5,  2.3,  1.3,  0.3],\n",
119 |        "       [ 4.4,  3.2,  1.3,  0.2],\n",
120 |        "       [ 5. ,  3.5,  1.6,  0.6],\n",
121 |        "       [ 5.1,  3.8,  1.9,  0.4],\n",
122 |        "       [ 4.8,  3. ,  1.4,  0.3],\n",
123 |        "       [ 5.1,  3.8,  1.6,  0.2],\n",
124 |        "       [ 4.6,  3.2,  1.4,  0.2],\n",
125 |        "       [ 5.3,  3.7,  1.5,  0.2],\n",
126 |        "       [ 5. ,  3.3,  1.4,  0.2],\n",
127 |        "       [ 7. ,  3.2,  4.7,  1.4],\n",
128 |        "       [ 6.4,  3.2,  4.5,  1.5],\n",
129 |        "       [ 6.9,  3.1,  4.9,  1.5],\n",
130 |        "       [ 5.5,  2.3,  4. ,  1.3],\n",
131 |        "       [ 6.5,  2.8,  4.6,  1.5],\n",
132 |        "       [ 5.7,  2.8,  4.5,  1.3],\n",
133 |        "       [ 6.3,  3.3,  4.7,  1.6],\n",
134 |        "       [ 4.9,  2.4,  3.3,  1. ],\n",
135 |        "       [ 6.6,  2.9,  4.6,  1.3],\n",
136 |        "       [ 5.2,  2.7,  3.9,  1.4],\n",
137 |        "       [ 5. ,  2. ,  3.5,  1. ],\n",
138 |        "       [ 5.9,  3. ,  4.2,  1.5],\n",
139 |        "       [ 6. ,  2.2,  4. ,  1. ],\n",
140 |        "       [ 6.1,  2.9,  4.7,  1.4],\n",
141 |        "       [ 5.6,  2.9,  3.6,  1.3],\n",
142 |        "       [ 6.7,  3.1,  4.4,  1.4],\n",
143 |        "       [ 5.6,  3. ,  4.5,  1.5],\n",
144 |        "       [ 5.8,  2.7,  4.1,  1. ],\n",
145 |        "       [ 6.2,  2.2,  4.5,  1.5],\n",
146 |        "       [ 5.6,  2.5,  3.9,  1.1],\n",
147 |        "       [ 5.9,  3.2,  4.8,  1.8],\n",
148 |        "       [ 6.1,  2.8,  4. ,  1.3],\n",
149 |        "       [ 6.3,  2.5,  4.9,  1.5],\n",
150 |        "       [ 6.1,  2.8,  4.7,  1.2],\n",
151 |        "       [ 6.4,  2.9,  4.3,  1.3],\n",
152 |        "       [ 6.6,  3. ,  4.4,  1.4],\n",
153 |        "       [ 6.8,  2.8,  4.8,  1.4],\n",
154 |        "       [ 6.7,  3. ,  5. ,  1.7],\n",
155 |        "       [ 6. ,  2.9,  4.5,  1.5],\n",
156 |        "       [ 5.7,  2.6,  3.5,  1. ],\n",
157 |        "       [ 5.5,  2.4,  3.8,  1.1],\n",
158 |        "       [ 5.5,  2.4,  3.7,  1. ],\n",
159 |        "       [ 5.8,  2.7,  3.9,  1.2],\n",
160 |        "       [ 6. ,  2.7,  5.1,  1.6],\n",
161 |        "       [ 5.4,  3. ,  4.5,  1.5],\n",
162 |        "       [ 6. ,  3.4,  4.5,  1.6],\n",
163 |        "       [ 6.7,  3.1,  4.7,  1.5],\n",
164 |        "       [ 6.3,  2.3,  4.4,  1.3],\n",
165 |        "       [ 5.6,  3. ,  4.1,  1.3],\n",
166 |        "       [ 5.5,  2.5,  4. ,  1.3],\n",
167 |        "       [ 5.5,  2.6,  4.4,  1.2],\n",
168 |        "       [ 6.1,  3. ,  4.6,  1.4],\n",
169 |        "       [ 5.8,  2.6,  4. ,  1.2],\n",
170 |        "       [ 5. ,  2.3,  3.3,  1. ],\n",
171 |        "       [ 5.6,  2.7,  4.2,  1.3],\n",
172 |        "       [ 5.7,  3. ,  4.2,  1.2],\n",
173 |        "       [ 5.7,  2.9,  4.2,  1.3],\n",
174 |        "       [ 6.2,  2.9,  4.3,  1.3],\n",
175 |        "       [ 5.1,  2.5,  3. ,  1.1],\n",
176 |        "       [ 5.7,  2.8,  4.1,  1.3]])"
177 |       ]
178 |      },
179 |      "execution_count": 13,
180 |      "metadata": {},
181 |      "output_type": "execute_result"
182 |     }
183 |    ],
184 |    "source": []
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {
190 |     "collapsed": true
191 |    },
192 |    "outputs": [],
193 |    "source": []
194 |   }
195 |  ],
196 |  "metadata": {
197 |   "kernelspec": {
198 |    "display_name": "Python 2",
199 |    "language": "python",
200 |    "name": "python2"
201 |   },
202 |   "language_info": {
203 |    "codemirror_mode": {
204 |     "name": "ipython",
205 |     "version": 2
206 |    },
207 |    "file_extension": ".py",
208 |    "mimetype": "text/x-python",
209 |    "name": "python",
210 |    "nbconvert_exporter": "python",
211 |    "pygments_lexer": "ipython2",
212 |    "version": "2.7.9"
213 |   }
214 |  },
215 |  "nbformat": 4,
216 |  "nbformat_minor": 0
217 | }
218 | 


--------------------------------------------------------------------------------
/Programming assignments.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Questions taken from:\n",
  8 |     "http://mycsinterviewsexperiences.blogspot.com/2012/09/interviews-with-microsoft-bing-and.html\n",
  9 |     "\n"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "graph = {'A': set(['B', 'C']),\n",
 21 |     "         'B': set(['A', 'D', 'E']),\n",
 22 |     "         'C': set(['A', 'F']),\n",
 23 |     "         'D': set(['B']),\n",
 24 |     "         'E': set(['B', 'F']),\n",
 25 |     "         'F': set(['C', 'E'])}"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "collapsed": false,
 33 |     "scrolled": false
 34 |    },
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "Outside:  A -> set([]) []\n",
 41 |       "Outside:  B -> set(['A']) ['C']\n",
 42 |       "Outside:  D -> set(['A', 'B']) ['C', 'E']\n",
 43 |       "Outside:  E -> set(['A', 'B', 'D']) ['C']\n",
 44 |       "Outside:  F -> set(['A', 'B', 'E', 'D']) ['C']\n",
 45 |       "Outside:  C -> set(['A', 'B', 'E', 'D', 'F']) ['C']\n",
 46 |       "Outside:  C -> set(['A', 'C', 'B', 'E', 'D', 'F']) []\n"
 47 |      ]
 48 |     },
 49 |     {
 50 |      "data": {
 51 |       "text/plain": [
 52 |        "{'A', 'B', 'C', 'D', 'E', 'F'}"
 53 |       ]
 54 |      },
 55 |      "execution_count": 2,
 56 |      "metadata": {},
 57 |      "output_type": "execute_result"
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "def dfs(graph, start):\n",
 62 |     "    visited, stack = (set(), [start])\n",
 63 |     "    while stack:\n",
 64 |     "        curr = stack.pop()\n",
 65 |     "        print \"Outside: \", curr, \"->\", visited, stack\n",
 66 |     "        if curr not in visited:\n",
 67 |     "            visited.add(curr)\n",
 68 |     "            stack.extend(graph[curr] - visited)\n",
 69 |     "    return visited\n",
 70 |     "            \n",
 71 |     "           \n",
 72 |     "            \n",
 73 |     "dfs(graph, \"A\")        "
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 3,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [
 83 |     {
 84 |      "name": "stdout",
 85 |      "output_type": "stream",
 86 |      "text": [
 87 |       "['A', 'B', 'E', 'F']\n",
 88 |       "['A', 'C', 'F']\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "def dfs_paths(graph, start, goal):\n",
 94 |     "    stack = [(start, [start])]\n",
 95 |     "    while stack:\n",
 96 |     "        curr, path = stack.pop()\n",
 97 |     "        for next in graph[curr] - set(path):\n",
 98 |     "            if next == goal:\n",
 99 |     "                print path + [next]\n",
100 |     "            else:\n",
101 |     "                stack.append((next, path + [next]))\n",
102 |     "                \n",
103 |     "dfs_paths(graph, \"A\", \"F\")        "
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 4,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "Visited: set([]), Queue: ['A']\n",
118 |       "Outside:  A\n",
119 |       "Visited: set(['A', 'C', 'B']), Queue: ['F', 'E', 'D']\n",
120 |       "Outside:  F\n",
121 |       "Visited: set(['A', 'C', 'B', 'E', 'F']), Queue: ['E', 'D']\n",
122 |       "Outside:  E\n",
123 |       "Visited: set(['A', 'C', 'B', 'E', 'F']), Queue: ['D']\n",
124 |       "Outside:  D\n"
125 |      ]
126 |     },
127 |     {
128 |      "data": {
129 |       "text/plain": [
130 |        "{'A', 'B', 'C', 'D', 'E', 'F'}"
131 |       ]
132 |      },
133 |      "execution_count": 4,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "def bfs(graph, start):\n",
140 |     "    visited, queue = (set(), [start])\n",
141 |     "    while queue:\n",
142 |     "        print \"Visited: %s, Queue: %s\" % (visited, queue)\n",
143 |     "        curr = queue.pop(0)\n",
144 |     "        print \"Outside: \", curr\n",
145 |     "        if curr not in visited:\n",
146 |     "            visited.add(curr)\n",
147 |     "            for k in graph[curr] - visited:\n",
148 |     "                queue.extend(graph[k] - visited)\n",
149 |     "            visited = visited.union(graph[curr])\n",
150 |     "            #print \"Curr: %s, Edges: %s\" % (curr, graph[curr])\n",
151 |     "    return visited\n",
152 |     "\n",
153 |     "bfs(graph, \"A\")"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 5,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [
163 |     {
164 |      "name": "stdout",
165 |      "output_type": "stream",
166 |      "text": [
167 |       "Visited: set([]), Queue: ['A']\n",
168 |       "Outside:  A\n",
169 |       "Visited: set(['A']), Queue: ['C', 'B']\n",
170 |       "Outside:  C\n",
171 |       "Visited: set(['A', 'C']), Queue: ['B', 'F']\n",
172 |       "Outside:  B\n",
173 |       "Visited: set(['A', 'C', 'B']), Queue: ['F', 'E', 'D']\n",
174 |       "Outside:  F\n",
175 |       "Visited: set(['A', 'C', 'B', 'F']), Queue: ['E', 'D', 'E']\n",
176 |       "Outside:  E\n",
177 |       "Visited: set(['A', 'C', 'B', 'E', 'F']), Queue: ['D', 'E']\n",
178 |       "Outside:  D\n",
179 |       "Visited: set(['A', 'C', 'B', 'E', 'D', 'F']), Queue: ['E']\n",
180 |       "Outside:  E\n"
181 |      ]
182 |     },
183 |     {
184 |      "data": {
185 |       "text/plain": [
186 |        "{'A', 'B', 'C', 'D', 'E', 'F'}"
187 |       ]
188 |      },
189 |      "execution_count": 5,
190 |      "metadata": {},
191 |      "output_type": "execute_result"
192 |     }
193 |    ],
194 |    "source": [
195 |     "def bfs(graph, start):\n",
196 |     "    visited, queue = (set(), [start])\n",
197 |     "    while queue:\n",
198 |     "        print \"Visited: %s, Queue: %s\" % (visited, queue)\n",
199 |     "        curr = queue.pop(0)\n",
200 |     "        print \"Outside: \", curr\n",
201 |     "        if curr not in visited:\n",
202 |     "            visited.add(curr)\n",
203 |     "            queue.extend(graph[curr] - visited)\n",
204 |     "    return visited\n",
205 |     "\n",
206 |     "bfs(graph, \"A\")"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 6,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [
216 |     {
217 |      "name": "stdout",
218 |      "output_type": "stream",
219 |      "text": [
220 |       "['A', 'C', 'F']\n",
221 |       "['A', 'B', 'E', 'F']\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "def bfs_paths(graph, start, goal):\n",
227 |     "    queue = [(start, [start])]\n",
228 |     "    while queue:\n",
229 |     "        curr, path = queue.pop(0)\n",
230 |     "        for next in graph[curr] - set(path):\n",
231 |     "            if next == goal:\n",
232 |     "                print path + [next]\n",
233 |     "            else:\n",
234 |     "                queue.append((next, path + [next]))\n",
235 |     "\n",
236 |     "bfs_paths(graph, \"A\", \"F\")"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": 7,
242 |    "metadata": {
243 |     "collapsed": false
244 |    },
245 |    "outputs": [
246 |     {
247 |      "name": "stdout",
248 |      "output_type": "stream",
249 |      "text": [
250 |       "A A None\n",
251 |       "A C ['A', 'C']\n",
252 |       "A B ['A', 'B']\n",
253 |       "A E ['A', 'B', 'E']\n",
254 |       "A D ['A', 'B', 'D']\n",
255 |       "A F ['A', 'C', 'F']\n"
256 |      ]
257 |     }
258 |    ],
259 |    "source": [
260 |     "def shortest_path(graph, start, goal):\n",
261 |     "    queue = [(start, [start])]\n",
262 |     "    while queue:\n",
263 |     "        curr, path = queue.pop(0)\n",
264 |     "        for next in graph[curr] - set(path):\n",
265 |     "            if next == goal:\n",
266 |     "                return path + [next]\n",
267 |     "            else:\n",
268 |     "                queue.append((next, path + [next]))\n",
269 |     "    return None\n",
270 |     "for k in graph.keys():\n",
271 |     "    print \"A\", k, shortest_path(graph, \"A\", k)"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "## Q1. Given a binary tree, return true if it is a BST. Do it in a non-recursive way.\n",
279 |     "Do DFS and at each step check the condition for BST. If fail return False. If passes all conditions return True. "
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": 8,
285 |    "metadata": {
286 |     "collapsed": false
287 |    },
288 |    "outputs": [],
289 |    "source": [
290 |     "def append_bst(graph, root, item):\n",
291 |     "    if root is None:\n",
292 |     "        graph[item] = [None, None]\n",
293 |     "        return graph\n",
294 |     "    curr = root\n",
295 |     "    while (item < curr) or (item > curr):\n",
296 |     "        if item < curr:\n",
297 |     "            if graph[curr][0] is not None:\n",
298 |     "                curr = graph[curr][0]\n",
299 |     "            else:\n",
300 |     "                graph[curr][0] = item\n",
301 |     "                graph[item] = [None, None]\n",
302 |     "        elif item > curr:\n",
303 |     "            if graph[curr][1] is not None:\n",
304 |     "                curr = graph[curr][1]\n",
305 |     "            else:\n",
306 |     "                graph[curr][1] = item\n",
307 |     "                graph[item] = [None, None]\n",
308 |     "    return graph\n",
309 |     "\n",
310 |     "def gen_bst(arr):\n",
311 |     "    graph = append_bst({}, None, arr[0])\n",
312 |     "    root = arr[0]\n",
313 |     "    for item in arr[1:]:\n",
314 |     "        graph = append_bst(graph, root, item)\n",
315 |     "        #print item, graph\n",
316 |     "    return graph, root"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 9,
322 |    "metadata": {
323 |     "collapsed": false
324 |    },
325 |    "outputs": [
326 |     {
327 |      "name": "stdout",
328 |      "output_type": "stream",
329 |      "text": [
330 |       "Final graph:\n",
331 |       "({0: [None, 1], 1: [None, 2], 2: [None, 3], 3: [None, 4], 4: [None, 5], 5: [None, 6], 6: [None, 7], 7: [None, 8], 8: [None, 9], 9: [None, None]}, 0)\n"
332 |      ]
333 |     }
334 |    ],
335 |    "source": [
336 |     "print \"Final graph:\\n\", gen_bst(range(10))"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 10,
342 |    "metadata": {
343 |     "collapsed": false
344 |    },
345 |    "outputs": [
346 |     {
347 |      "name": "stdout",
348 |      "output_type": "stream",
349 |      "text": [
350 |       "Final graph:\n",
351 |       "({1: [None, None], 2: [1, None], 3: [2, None], 4: [3, 5], 5: [None, 10], 7: [None, None], 10: [7, None]}, 4)\n"
352 |      ]
353 |     }
354 |    ],
355 |    "source": [
356 |     "print \"Final graph:\\n\", gen_bst([4,5,3,2,10,1,7])"
357 |    ]
358 |   },
359 |   {
360 |    "cell_type": "code",
361 |    "execution_count": 11,
362 |    "metadata": {
363 |     "collapsed": true
364 |    },
365 |    "outputs": [],
366 |    "source": [
367 |     "def print_graph(graph, root):\n",
368 |     "    visited, queue = (set([None]), [root])\n",
369 |     "    level = 0\n",
370 |     "    while queue:\n",
371 |     "        curr = queue.pop(0)\n",
372 |     "        print curr \n",
373 |     "        if curr not in visited:\n",
374 |     "            visited.add(curr)\n",
375 |     "            queue.extend(set(graph[curr]) - visited)\n",
376 |     "    "
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": 12,
382 |    "metadata": {
383 |     "collapsed": false
384 |    },
385 |    "outputs": [
386 |     {
387 |      "name": "stdout",
388 |      "output_type": "stream",
389 |      "text": [
390 |       "4\n",
391 |       "3\n",
392 |       "5\n",
393 |       "2\n",
394 |       "10\n",
395 |       "1\n",
396 |       "7\n"
397 |      ]
398 |     }
399 |    ],
400 |    "source": [
401 |     "print_graph(*gen_bst([4,5,3,2,10,1,7]))"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": 13,
407 |    "metadata": {
408 |     "collapsed": false
409 |    },
410 |    "outputs": [
411 |     {
412 |      "data": {
413 |       "text/plain": [
414 |        "1"
415 |       ]
416 |      },
417 |      "execution_count": 13,
418 |      "metadata": {},
419 |      "output_type": "execute_result"
420 |     }
421 |    ],
422 |    "source": [
423 |     "min([1, 2, 3])"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": 14,
429 |    "metadata": {
430 |     "collapsed": true
431 |    },
432 |    "outputs": [],
433 |    "source": [
434 |     "def is_bst(graph, root):\n",
435 |     "    if root is None:\n",
436 |     "        return True\n",
437 |     "    visited, stack = (set([None]), [root])\n",
438 |     "    while stack:\n",
439 |     "        curr = stack.pop()\n",
440 |     "        left, right = graph[curr]\n",
441 |     "        if left is not None and left > curr:\n",
442 |     "            # Left is always smaller than parent\n",
443 |     "            return False\n",
444 |     "        if right is not None and right < curr:\n",
445 |     "            # Right is always greater than parent\n",
446 |     "            return False\n",
447 |     "        if curr not in visited:\n",
448 |     "            visited.add(curr)\n",
449 |     "            print curr\n",
450 |     "            stack.extend(set(graph[curr]) - visited)\n",
451 |     "    return True\n",
452 |     "            "
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 15,
458 |    "metadata": {
459 |     "collapsed": false
460 |    },
461 |    "outputs": [
462 |     {
463 |      "name": "stdout",
464 |      "output_type": "stream",
465 |      "text": [
466 |       "4\n",
467 |       "5\n",
468 |       "10\n",
469 |       "7\n",
470 |       "3\n",
471 |       "2\n",
472 |       "1\n"
473 |      ]
474 |     },
475 |     {
476 |      "data": {
477 |       "text/plain": [
478 |        "True"
479 |       ]
480 |      },
481 |      "execution_count": 15,
482 |      "metadata": {},
483 |      "output_type": "execute_result"
484 |     }
485 |    ],
486 |    "source": [
487 |     "graph, root = gen_bst([4,5,3,2,10,1,7])\n",
488 |     "is_bst(graph, root)"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "code",
493 |    "execution_count": 16,
494 |    "metadata": {
495 |     "collapsed": false
496 |    },
497 |    "outputs": [
498 |     {
499 |      "data": {
500 |       "text/plain": [
501 |        "False"
502 |       ]
503 |      },
504 |      "execution_count": 16,
505 |      "metadata": {},
506 |      "output_type": "execute_result"
507 |     }
508 |    ],
509 |    "source": [
510 |     "graph = {0: [1,2],\n",
511 |     "         1: [None, None],\n",
512 |     "         2: [None, None]}\n",
513 |     "root = 0\n",
514 |     "is_bst(graph, root)"
515 |    ]
516 |   },
517 |   {
518 |    "cell_type": "markdown",
519 |    "metadata": {},
520 |    "source": [
521 |     "## Q2. There is a 2D matrix of size 10 X 10, where you have to begin from the location (0,0) and move to the location (9,9). You can either move right on down. Find out the number of distinct paths in which you can reach (9,9) from (0,0).\n",
522 |     "\n",
523 |     "#paths(i,j) = #paths(i-1, j-1) + 2"
524 |    ]
525 |   },
526 |   {
527 |    "cell_type": "code",
528 |    "execution_count": 17,
529 |    "metadata": {
530 |     "collapsed": false
531 |    },
532 |    "outputs": [],
533 |    "source": [
534 |     "def npaths(N=10):\n",
535 |     "    mat = [[0]*N for k in range(N)]\n",
536 |     "    mat[0][0] = 1\n",
537 |     "    for i in range(N):\n",
538 |     "        for j in range(N):\n",
539 |     "            if mat[i][j] == 0:\n",
540 |     "                if i == 0:\n",
541 |     "                    mat[i][j] = mat[i][j-1] # Only one movement from left cell\n",
542 |     "                    #print \"mat[%s][%s] = mat[%s][%s]\" % (i,j,i,j-1)\n",
543 |     "                    #print_mat(mat)\n",
544 |     "                elif j == 0:\n",
545 |     "                    mat[i][j] = mat[i-1][j] # Only one movement from top cell\n",
546 |     "                    #print \"mat[%s][%s] = mat[%s][%s]\" % (i,j,i-1,j)\n",
547 |     "                else:\n",
548 |     "                    mat[i][j] = mat[i-1][j] + mat[i][j-1]\n",
549 |     "                    #print \"mat[%s][%s] = mat[%s][%s]+1\" % (i,j,i-1,j-1)\n",
550 |     "    return mat\n",
551 |     "\n",
552 |     "def print_mat(mat):\n",
553 |     "    for row in mat:\n",
554 |     "        print row"
555 |    ]
556 |   },
557 |   {
558 |    "cell_type": "code",
559 |    "execution_count": 18,
560 |    "metadata": {
561 |     "collapsed": false
562 |    },
563 |    "outputs": [
564 |     {
565 |      "name": "stdout",
566 |      "output_type": "stream",
567 |      "text": [
568 |       "[1, 1]\n",
569 |       "[1, 2]\n"
570 |      ]
571 |     }
572 |    ],
573 |    "source": [
574 |     "print_mat(npaths(N=2))"
575 |    ]
576 |   },
577 |   {
578 |    "cell_type": "code",
579 |    "execution_count": 19,
580 |    "metadata": {
581 |     "collapsed": false
582 |    },
583 |    "outputs": [
584 |     {
585 |      "name": "stdout",
586 |      "output_type": "stream",
587 |      "text": [
588 |       "[1, 1, 1]\n",
589 |       "[1, 2, 3]\n",
590 |       "[1, 3, 6]\n"
591 |      ]
592 |     }
593 |    ],
594 |    "source": [
595 |     "print_mat(npaths(N=3))"
596 |    ]
597 |   },
598 |   {
599 |    "cell_type": "code",
600 |    "execution_count": 20,
601 |    "metadata": {
602 |     "collapsed": false
603 |    },
604 |    "outputs": [
605 |     {
606 |      "name": "stdout",
607 |      "output_type": "stream",
608 |      "text": [
609 |       "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n",
610 |       "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
611 |       "[1, 3, 6, 10, 15, 21, 28, 36, 45, 55]\n",
612 |       "[1, 4, 10, 20, 35, 56, 84, 120, 165, 220]\n",
613 |       "[1, 5, 15, 35, 70, 126, 210, 330, 495, 715]\n",
614 |       "[1, 6, 21, 56, 126, 252, 462, 792, 1287, 2002]\n",
615 |       "[1, 7, 28, 84, 210, 462, 924, 1716, 3003, 5005]\n",
616 |       "[1, 8, 36, 120, 330, 792, 1716, 3432, 6435, 11440]\n",
617 |       "[1, 9, 45, 165, 495, 1287, 3003, 6435, 12870, 24310]\n",
618 |       "[1, 10, 55, 220, 715, 2002, 5005, 11440, 24310, 48620]\n"
619 |      ]
620 |     }
621 |    ],
622 |    "source": [
623 |     "print_mat(npaths(N=10))"
624 |    ]
625 |   },
626 |   {
627 |    "cell_type": "markdown",
628 |    "metadata": {},
629 |    "source": [
630 |     "## Q3. Given an array with a special property, find the smallest number in it. The special property is that the elements in the array are monotonically decreasing and then increasing.\n",
631 |     "\n",
632 |     "Get middle element:\n",
633 |     "\n",
634 |     "* if middle element part of increasing sequence then search left\n",
635 |     "* if middle element part of decreasing sequence then search right\n",
636 |     "\n",
637 |     "Stop when middle number such that left is higher and right is higher"
638 |    ]
639 |   },
640 |   {
641 |    "cell_type": "code",
642 |    "execution_count": 21,
643 |    "metadata": {
644 |     "collapsed": true
645 |    },
646 |    "outputs": [],
647 |    "source": [
648 |     "def find_smallest(arr):\n",
649 |     "    left, right = 0, len(arr)\n",
650 |     "    mid = (left + right) /2\n",
651 |     "    i = 1\n",
652 |     "    while mid > 0 and mid < len(arr)-1 and i < len(arr):\n",
653 |     "        if arr[mid-1] <= arr[mid] and arr[mid+1] >= arr[mid]:\n",
654 |     "            right = mid\n",
655 |     "        elif arr[mid-1] >= arr[mid] and arr[mid+1] <= arr[mid]:\n",
656 |     "            left = mid\n",
657 |     "        else:\n",
658 |     "            break\n",
659 |     "        mid = (left + right) /2\n",
660 |     "        i += 1\n",
661 |     "    print \"Iterations required: \", i\n",
662 |     "    return arr[mid]"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": 22,
668 |    "metadata": {
669 |     "collapsed": false
670 |    },
671 |    "outputs": [
672 |     {
673 |      "name": "stdout",
674 |      "output_type": "stream",
675 |      "text": [
676 |       "Iterations required:  1\n"
677 |      ]
678 |     },
679 |     {
680 |      "data": {
681 |       "text/plain": [
682 |        "1"
683 |       ]
684 |      },
685 |      "execution_count": 22,
686 |      "metadata": {},
687 |      "output_type": "execute_result"
688 |     }
689 |    ],
690 |    "source": [
691 |     "find_smallest([1])"
692 |    ]
693 |   },
694 |   {
695 |    "cell_type": "code",
696 |    "execution_count": 23,
697 |    "metadata": {
698 |     "collapsed": false
699 |    },
700 |    "outputs": [
701 |     {
702 |      "name": "stdout",
703 |      "output_type": "stream",
704 |      "text": [
705 |       "Iterations required:  2\n"
706 |      ]
707 |     },
708 |     {
709 |      "data": {
710 |       "text/plain": [
711 |        "1"
712 |       ]
713 |      },
714 |      "execution_count": 23,
715 |      "metadata": {},
716 |      "output_type": "execute_result"
717 |     }
718 |    ],
719 |    "source": [
720 |     "find_smallest([1,2,3])"
721 |    ]
722 |   },
723 |   {
724 |    "cell_type": "code",
725 |    "execution_count": 24,
726 |    "metadata": {
727 |     "collapsed": false
728 |    },
729 |    "outputs": [
730 |     {
731 |      "name": "stdout",
732 |      "output_type": "stream",
733 |      "text": [
734 |       "Iterations required:  3\n"
735 |      ]
736 |     },
737 |     {
738 |      "data": {
739 |       "text/plain": [
740 |        "7"
741 |       ]
742 |      },
743 |      "execution_count": 24,
744 |      "metadata": {},
745 |      "output_type": "execute_result"
746 |     }
747 |    ],
748 |    "source": [
749 |     "find_smallest([10,9,8,7,11,12,13,14])"
750 |    ]
751 |   },
752 |   {
753 |    "cell_type": "code",
754 |    "execution_count": 25,
755 |    "metadata": {
756 |     "collapsed": false
757 |    },
758 |    "outputs": [
759 |     {
760 |      "name": "stdout",
761 |      "output_type": "stream",
762 |      "text": [
763 |       "Iterations required:  2\n"
764 |      ]
765 |     },
766 |     {
767 |      "data": {
768 |       "text/plain": [
769 |        "1"
770 |       ]
771 |      },
772 |      "execution_count": 25,
773 |      "metadata": {},
774 |      "output_type": "execute_result"
775 |     }
776 |    ],
777 |    "source": [
778 |     "find_smallest([3,2,1])"
779 |    ]
780 |   },
781 |   {
782 |    "cell_type": "markdown",
783 |    "metadata": {},
784 |    "source": [
785 |     "## Find if a string s3 can be formed by interleaving 2 other given strings s1 and s2\n",
786 |     "Characters of each string s1 and s2 will appear in the same order they appear in the original string. All characters of s1 and s2 must be exhausted.\n",
787 |     "\n",
788 |     "E.g.\n",
789 |     "```\n",
790 |     "s1 = \"aaab\"\n",
791 |     "s2 = \"aaac\"\n",
792 |     "s3 = \"aaaacaab\" # is_interleaved = True\n",
793 |     "s3 = \"aaaacaa\" # is_interleaved = False, as b is not present in s3\n",
794 |     "s3 = \"aaaacaad\" # is_interleaved = False, as d is not present in s1 or s2\n",
795 |     "```\n",
796 |     "\n",
797 |     "Approach: If s[i+j] can be formed by inteleaving s1[i-1] and s2[j] or s1[i] and s2[j-1] then s3[i+j+1] can just be formed by just matching the next character."
798 |    ]
799 |   },
800 |   {
801 |    "cell_type": "code",
802 |    "execution_count": 34,
803 |    "metadata": {
804 |     "collapsed": true
805 |    },
806 |    "outputs": [],
807 |    "source": [
808 |     "from pprint import pprint"
809 |    ]
810 |   },
811 |   {
812 |    "cell_type": "code",
813 |    "execution_count": 141,
814 |    "metadata": {
815 |     "collapsed": false
816 |    },
817 |    "outputs": [],
818 |    "source": [
819 |     "mat = []\n",
820 |     "\n",
821 |     "def f(s1,s2,s3,i,j):\n",
822 |     "    if i < 0 and j < 0:\n",
823 |     "        return True\n",
824 |     "    if i < 0:\n",
825 |     "        return (s3[:j+1] == s2[:j+1])\n",
826 |     "    if j < 0:\n",
827 |     "        return (s3[:i+1] == s1[:i+1])\n",
828 |     "    if mat[i][j] == -1:\n",
829 |     "        mat[i][j] = (\n",
830 |     "            (f(s1,s2,s3,i-1,j) and (s3[i+j+1] == s1[i]))\n",
831 |     "            or (f(s1,s2,s3,i,j-1) and (s3[i+j+1] == s2[j]))\n",
832 |     "        )\n",
833 |     "    return mat[i][j]\n",
834 |     "\n",
835 |     "def is_interleaved(s1,s2,s3):\n",
836 |     "    global mat\n",
837 |     "    if len(s1) == 0 and len(s2) == 0 and len(s3) == 0:\n",
838 |     "        return True\n",
839 |     "    if len(s3) != (len(s1) + len(s2)):\n",
840 |     "        return False\n",
841 |     "    l_s1, l_s2 = len(s1), len(s2)\n",
842 |     "    mat = [[-1]*(l_s2) for i in xrange(l_s1)]\n",
843 |     "    for i in xrange(l_s1):\n",
844 |     "        for j in xrange(l_s2):\n",
845 |     "            check = f(s1,s2,s3,i,j)\n",
846 |     "            #print i,j, i+j+1, s1[:i+1], s2[:j+1], s3[:i+j+2], check\n",
847 |     "    #pprint(mat)\n",
848 |     "    return mat[-1][-1]\n",
849 |     "                \n",
850 |     "                "
851 |    ]
852 |   },
853 |   {
854 |    "cell_type": "code",
855 |    "execution_count": 142,
856 |    "metadata": {
857 |     "collapsed": false
858 |    },
859 |    "outputs": [
860 |     {
861 |      "name": "stdout",
862 |      "output_type": "stream",
863 |      "text": [
864 |       "'aaaacaab'\tTrue\n",
865 |       "'aaaacaa'\tFalse\n",
866 |       "'aaaacaad'\tFalse\n",
867 |       "'aaaadaad'\tFalse\n",
868 |       "''\tFalse\n"
869 |      ]
870 |     }
871 |    ],
872 |    "source": [
873 |     "s1 = \"aaab\"\n",
874 |     "s2 = \"aaac\"\n",
875 |     "for s3 in [\"aaaacaab\", \"aaaacaa\", \"aaaacaad\", \"aaaadaad\", \"\"]:\n",
876 |     "    print \"'%s'\\t%s\" % (s3, is_interleaved(s1, s2, s3))"
877 |    ]
878 |   },
879 |   {
880 |    "cell_type": "code",
881 |    "execution_count": 143,
882 |    "metadata": {
883 |     "collapsed": false
884 |    },
885 |    "outputs": [
886 |     {
887 |      "name": "stdout",
888 |      "output_type": "stream",
889 |      "text": [
890 |       "'worldhello'\tTrue\n",
891 |       "'heworlllod'\tTrue\n",
892 |       "'helloworld'\tTrue\n",
893 |       "''\tFalse\n"
894 |      ]
895 |     }
896 |    ],
897 |    "source": [
898 |     "s1 = \"hello\"\n",
899 |     "s2 = \"world\"\n",
900 |     "for s3 in [\"worldhello\", \"heworlllod\", \"helloworld\", \"\"]:\n",
901 |     "    print \"'%s'\\t%s\" % (s3, is_interleaved(s1, s2, s3))"
902 |    ]
903 |   },
904 |   {
905 |    "cell_type": "code",
906 |    "execution_count": 144,
907 |    "metadata": {
908 |     "collapsed": false
909 |    },
910 |    "outputs": [
911 |     {
912 |      "name": "stdout",
913 |      "output_type": "stream",
914 |      "text": [
915 |       "'worldhello'\tFalse\n",
916 |       "'heworlllod'\tFalse\n",
917 |       "'helloworld'\tFalse\n",
918 |       "''\tTrue\n"
919 |      ]
920 |     }
921 |    ],
922 |    "source": [
923 |     "s1 = \"\"\n",
924 |     "s2 = \"\"\n",
925 |     "for s3 in [\"worldhello\", \"heworlllod\", \"helloworld\", \"\"]:\n",
926 |     "    print \"'%s'\\t%s\" % (s3, is_interleaved(s1, s2, s3))"
927 |    ]
928 |   },
929 |   {
930 |    "cell_type": "code",
931 |    "execution_count": 124,
932 |    "metadata": {
933 |     "collapsed": false
934 |    },
935 |    "outputs": [
936 |     {
937 |      "name": "stdout",
938 |      "output_type": "stream",
939 |      "text": [
940 |       "[]\n"
941 |      ]
942 |     }
943 |    ],
944 |    "source": [
945 |     "pprint(mat)"
946 |    ]
947 |   },
948 |   {
949 |    "cell_type": "code",
950 |    "execution_count": null,
951 |    "metadata": {
952 |     "collapsed": true
953 |    },
954 |    "outputs": [],
955 |    "source": []
956 |   }
957 |  ],
958 |  "metadata": {
959 |   "kernelspec": {
960 |    "display_name": "Python 2",
961 |    "language": "python",
962 |    "name": "python2"
963 |   },
964 |   "language_info": {
965 |    "codemirror_mode": {
966 |     "name": "ipython",
967 |     "version": 2
968 |    },
969 |    "file_extension": ".py",
970 |    "mimetype": "text/x-python",
971 |    "name": "python",
972 |    "nbconvert_exporter": "python",
973 |    "pygments_lexer": "ipython2",
974 |    "version": "2.7.11"
975 |   }
976 |  },
977 |  "nbformat": 4,
978 |  "nbformat_minor": 0
979 | }
980 | 


--------------------------------------------------------------------------------
/PyMC Testing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Import relevant modules\n",
 12 |     "import pymc\n",
 13 |     "import numpy as np\n",
 14 |     "import matplotlib.pyplot as plt"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# Some data\n",
 26 |     "n = 5 * np.ones(4, dtype=int)\n",
 27 |     "x = np.array([-.86, -.3, -.05, .73])"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 3,
 33 |    "metadata": {
 34 |     "collapsed": true
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# Priors on unknown parameters\n",
 39 |     "alpha = pymc.Normal('alpha', mu=0, tau=.01)\n",
 40 |     "beta = pymc.Normal('beta', mu=0, tau=.01)\n",
 41 |     "\n",
 42 |     "# Arbitrary deterministic function of parameters\n",
 43 |     "@pymc.deterministic\n",
 44 |     "def theta(a=alpha, b=beta):\n",
 45 |     "    \"\"\"theta = logit^{-1}(a+b)\"\"\"\n",
 46 |     "    return pymc.invlogit(a + b * x)\n",
 47 |     "\n",
 48 |     "# Binomial likelihood for data\n",
 49 |     "d = pymc.Binomial('d', n=n, p=theta, value=np.array([0., 1., 3., 5.]),\n",
 50 |     "                  observed=True)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 9,
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "mymodel = pymc.Model([theta, d])\n",
 62 |     "S = pymc.MCMC(mymodel, db='pickle')"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 10,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "\r",
 77 |       " [-----------------100%-----------------] 10000 of 10000 complete in 0.3 sec"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "\n",
 83 |     "S.sample(iter=10000, burn=5000, thin=2)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 11,
 89 |    "metadata": {
 90 |     "collapsed": false
 91 |    },
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "Plotting theta_0\n",
 98 |       "Plotting theta_1\n",
 99 |       "Plotting theta_2\n",
100 |       "Plotting theta_3\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "pymc.Matplot.plot(S)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 12,
111 |    "metadata": {
112 |     "collapsed": true
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "plt.show()"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {
123 |     "collapsed": true
124 |    },
125 |    "outputs": [],
126 |    "source": []
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "Python 2",
132 |    "language": "python",
133 |    "name": "python2"
134 |   },
135 |   "language_info": {
136 |    "codemirror_mode": {
137 |     "name": "ipython",
138 |     "version": 2
139 |    },
140 |    "file_extension": ".py",
141 |    "mimetype": "text/x-python",
142 |    "name": "python",
143 |    "nbconvert_exporter": "python",
144 |    "pygments_lexer": "ipython2",
145 |    "version": "2.7.9"
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 0
150 | }
151 | 


--------------------------------------------------------------------------------
/Quora Haqathon.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "metadata": {
  3 |   "name": "",
  4 |   "signature": "sha256:183c9e17b73179464622911276d7e98d8c0d9fb2335ce136b293fe4f2d921f70"
  5 |  },
  6 |  "nbformat": 3,
  7 |  "nbformat_minor": 0,
  8 |  "worksheets": [
  9 |   {
 10 |    "cells": [
 11 |     {
 12 |      "cell_type": "code",
 13 |      "collapsed": false,
 14 |      "input": [
 15 |       "\"\"\"\n",
 16 |       "Archery problem: https://www.hackerrank.com/contests/quora-haqathon/challenges/archery\n",
 17 |       "\"\"\""
 18 |      ],
 19 |      "language": "python",
 20 |      "metadata": {},
 21 |      "outputs": [
 22 |       {
 23 |        "metadata": {},
 24 |        "output_type": "pyout",
 25 |        "prompt_number": 1,
 26 |        "text": [
 27 |         "'\\nArchery problem: https://www.hackerrank.com/contests/quora-haqathon/challenges/archery\\n'"
 28 |        ]
 29 |       }
 30 |      ],
 31 |      "prompt_number": 1
 32 |     },
 33 |     {
 34 |      "cell_type": "code",
 35 |      "collapsed": false,
 36 |      "input": [
 37 |       "# Enter your code here. Read input from STDIN. Print output to STDOUT\n",
 38 |       "# radii = [1,2,3,4]\n",
 39 |       "def isQ(r,line):\n",
 40 |       "    x1,y1,x2,y2 = line\n",
 41 |       "    if y1 == y2 and x1 == x2:\n",
 42 |       "        #print \"Arrow of zero length.\"\n",
 43 |       "        return False\n",
 44 |       "    if x1 == x2:\n",
 45 |       "        y_sqr = (r**2 - c**2)\n",
 46 |       "        if y_sqr < 0:\n",
 47 |       "            #print \"No intersection\"\n",
 48 |       "            return False\n",
 49 |       "        y = y_sqr**0.5\n",
 50 |       "        if (y-y1)*(y-y2) <= 0 and (-y-y1)*(-y-y2) > 0:\n",
 51 |       "            #print \"Exactly one intersection with circle\"\n",
 52 |       "            return True\n",
 53 |       "        if (-y-y1)*(-y-y2) <= 0 and (y-y1)*(y-y2) > 0:\n",
 54 |       "            #print \"Exactly one intersection with circle\"\n",
 55 |       "            return True\n",
 56 |       "    m = (y1-y2)/(1.0*(x1-x2))\n",
 57 |       "    c = y1 -m*x1\n",
 58 |       "    a = (m**2+1)\n",
 59 |       "    b = 2*m*c\n",
 60 |       "    d = c**2 - r**2\n",
 61 |       "    #print \"Parameters (m,c,a,b,d): \", m,c,a,b,d\n",
 62 |       "    sqrt_t = b**2 - 4*a*d\n",
 63 |       "    #print \"SQRT_T\", sqrt_t\n",
 64 |       "    if sqrt_t < 0:\n",
 65 |       "        #print \"No intersection\"\n",
 66 |       "        return False\n",
 67 |       "    sqrt_t = sqrt_t ** 0.5\n",
 68 |       "    #print \"SQRT_T\", sqrt_t\n",
 69 |       "    x = (-b + sqrt_t)/(2*a)\n",
 70 |       "    y = m*x + c\n",
 71 |       "    count_p = 0\n",
 72 |       "    #print \"Intersection point: \",x , y\n",
 73 |       "    if (y-y1)*(y-y2) <= 0 and (x-x1)*(x-x2) <=0:\n",
 74 |       "        count_p += 1\n",
 75 |       "    x = (-b - sqrt_t)/(2*a)\n",
 76 |       "    y = m*x + c\n",
 77 |       "    #print \"Intersection point: \",x , y\n",
 78 |       "    if (y-y1)*(y-y2) <= 0 and (x-x1)*(x-x2) <=0:\n",
 79 |       "        count_p += 1\n",
 80 |       "    if count_p != 1:\n",
 81 |       "        #print \"More than one intersection with circle\", count_p\n",
 82 |       "        return False\n",
 83 |       "    return True\n",
 84 |       "            \n",
 85 |       "        "
 86 |      ],
 87 |      "language": "python",
 88 |      "metadata": {},
 89 |      "outputs": [],
 90 |      "prompt_number": 7
 91 |     },
 92 |     {
 93 |      "cell_type": "code",
 94 |      "collapsed": false,
 95 |      "input": [
 96 |       "isQ(1,(1,1,0,0))"
 97 |      ],
 98 |      "language": "python",
 99 |      "metadata": {},
100 |      "outputs": [
101 |       {
102 |        "metadata": {},
103 |        "output_type": "pyout",
104 |        "prompt_number": 8,
105 |        "text": [
106 |         "True"
107 |        ]
108 |       }
109 |      ],
110 |      "prompt_number": 8
111 |     },
112 |     {
113 |      "cell_type": "code",
114 |      "collapsed": false,
115 |      "input": [
116 |       "def calcQ(radii, lines):\n",
117 |       "    count = 0\n",
118 |       "    for r in radii:\n",
119 |       "        r = r**2\n",
120 |       "        for line in lines:\n",
121 |       "            x1,y1,x2,y2 = line\n",
122 |       "            min_r = min(((x1**2)+(y1**2)), ((x2**2)+(y2**2)))\n",
123 |       "            if min_r > r:\n",
124 |       "                print \"All points beyond this are discarded\", r, line\n",
125 |       "                break\n",
126 |       "            max_r = max(((x1**2)+(y1**2)), ((x2**2)+(y2**2)))\n",
127 |       "            if r!= min_r and r != max_r and (r-min_r)*(r-max_r) > 0:\n",
128 |       "                print \"Outside range: \", r, min_r, max_r, line\n",
129 |       "                continue\n",
130 |       "            count += 1\n",
131 |       "            #if isQ(r,line):\n",
132 |       "                #count += 1\n",
133 |       "    return count"
134 |      ],
135 |      "language": "python",
136 |      "metadata": {},
137 |      "outputs": [],
138 |      "prompt_number": 57
139 |     },
140 |     {
141 |      "cell_type": "code",
142 |      "collapsed": false,
143 |      "input": [
144 |       "n = int(input())\n",
145 |       "radii = [int(k) for k in raw_input().split()]\n",
146 |       "lines = []\n",
147 |       "n = int(input())\n",
148 |       "for i in range(n):\n",
149 |       "    lines.append([int(k) for k in raw_input().split()])\n",
150 |       "print calcQ(radii,lines)"
151 |      ],
152 |      "language": "python",
153 |      "metadata": {},
154 |      "outputs": [
155 |       {
156 |        "name": "stdout",
157 |        "output_type": "stream",
158 |        "stream": "stdout",
159 |        "text": [
160 |         "4\n"
161 |        ]
162 |       },
163 |       {
164 |        "name": "stdout",
165 |        "output_type": "stream",
166 |        "stream": "stdout",
167 |        "text": [
168 |         "1 2 3 4\n"
169 |        ]
170 |       },
171 |       {
172 |        "name": "stdout",
173 |        "output_type": "stream",
174 |        "stream": "stdout",
175 |        "text": [
176 |         "3\n"
177 |        ]
178 |       },
179 |       {
180 |        "name": "stdout",
181 |        "output_type": "stream",
182 |        "stream": "stdout",
183 |        "text": [
184 |         "1 -1 4 -3\n"
185 |        ]
186 |       },
187 |       {
188 |        "name": "stdout",
189 |        "output_type": "stream",
190 |        "stream": "stdout",
191 |        "text": [
192 |         "2 1 1 2\n"
193 |        ]
194 |       },
195 |       {
196 |        "name": "stdout",
197 |        "output_type": "stream",
198 |        "stream": "stdout",
199 |        "text": [
200 |         "1 -2 3 -4\n"
201 |        ]
202 |       },
203 |       {
204 |        "output_type": "stream",
205 |        "stream": "stdout",
206 |        "text": [
207 |         "3\n"
208 |        ]
209 |       }
210 |      ],
211 |      "prompt_number": 16
212 |     },
213 |     {
214 |      "cell_type": "code",
215 |      "collapsed": false,
216 |      "input": [
217 |       "n = int(input())\n",
218 |       "radii = [int(k) for k in raw_input().split()]\n",
219 |       "radii = sorted(radii)\n",
220 |       "#print radii\n",
221 |       "lines = []\n",
222 |       "n = int(input())\n",
223 |       "for i in range(n):\n",
224 |       "    lines.append([int(k) for k in raw_input().split()])\n",
225 |       "lines.sort(key=lambda x: min(((x[0]**2)+(x[1]**2)), ((x[2]**2)+(x[3]**2))))\n",
226 |       "print lines\n",
227 |       "print calcQ(radii,lines)"
228 |      ],
229 |      "language": "python",
230 |      "metadata": {},
231 |      "outputs": [
232 |       {
233 |        "name": "stdout",
234 |        "output_type": "stream",
235 |        "stream": "stdout",
236 |        "text": [
237 |         "4\n"
238 |        ]
239 |       },
240 |       {
241 |        "name": "stdout",
242 |        "output_type": "stream",
243 |        "stream": "stdout",
244 |        "text": [
245 |         "1 2 3 4\n"
246 |        ]
247 |       },
248 |       {
249 |        "name": "stdout",
250 |        "output_type": "stream",
251 |        "stream": "stdout",
252 |        "text": [
253 |         "3\n"
254 |        ]
255 |       },
256 |       {
257 |        "name": "stdout",
258 |        "output_type": "stream",
259 |        "stream": "stdout",
260 |        "text": [
261 |         "1 -1 4 -3\n"
262 |        ]
263 |       },
264 |       {
265 |        "name": "stdout",
266 |        "output_type": "stream",
267 |        "stream": "stdout",
268 |        "text": [
269 |         "2 1 1 2\n"
270 |        ]
271 |       },
272 |       {
273 |        "name": "stdout",
274 |        "output_type": "stream",
275 |        "stream": "stdout",
276 |        "text": [
277 |         "1 -2 3 -4\n"
278 |        ]
279 |       },
280 |       {
281 |        "output_type": "stream",
282 |        "stream": "stdout",
283 |        "text": [
284 |         "[[1, -1, 4, -3], [2, 1, 1, 2], [1, -2, 3, -4]]\n",
285 |         "All points beyond this are discarded 1 [1, -1, 4, -3]\n",
286 |         "All points beyond this are discarded 4 [2, 1, 1, 2]\n",
287 |         "Outside range:  9 5 5 [2, 1, 1, 2]\n",
288 |         "Outside range:  16 5 5 [2, 1, 1, 2]\n",
289 |         "5\n"
290 |        ]
291 |       }
292 |      ],
293 |      "prompt_number": 58
294 |     },
295 |     {
296 |      "cell_type": "code",
297 |      "collapsed": false,
298 |      "input": [
299 |       "n = int(input())\n",
300 |       "radii = [int(k) for k in raw_input().split()]\n",
301 |       "radii = sorted(radii)\n",
302 |       "#print radii\n",
303 |       "lines = []\n",
304 |       "n = int(input())\n",
305 |       "for i in range(n):\n",
306 |       "    lines.append([int(k) for k in raw_input().split()])\n",
307 |       "print calcQ(radii,lines)"
308 |      ],
309 |      "language": "python",
310 |      "metadata": {},
311 |      "outputs": [
312 |       {
313 |        "name": "stdout",
314 |        "output_type": "stream",
315 |        "stream": "stdout",
316 |        "text": [
317 |         "4\n"
318 |        ]
319 |       },
320 |       {
321 |        "name": "stdout",
322 |        "output_type": "stream",
323 |        "stream": "stdout",
324 |        "text": [
325 |         "1 2 3 4\n"
326 |        ]
327 |       },
328 |       {
329 |        "name": "stdout",
330 |        "output_type": "stream",
331 |        "stream": "stdout",
332 |        "text": [
333 |         "3\n"
334 |        ]
335 |       },
336 |       {
337 |        "name": "stdout",
338 |        "output_type": "stream",
339 |        "stream": "stdout",
340 |        "text": [
341 |         "1 -1 4 -3\n"
342 |        ]
343 |       },
344 |       {
345 |        "name": "stdout",
346 |        "output_type": "stream",
347 |        "stream": "stdout",
348 |        "text": [
349 |         "2 1 1 2\n"
350 |        ]
351 |       },
352 |       {
353 |        "name": "stdout",
354 |        "output_type": "stream",
355 |        "stream": "stdout",
356 |        "text": [
357 |         "1 -2 3 -4\n"
358 |        ]
359 |       },
360 |       {
361 |        "output_type": "stream",
362 |        "stream": "stdout",
363 |        "text": [
364 |         "Outside range:  1 [1, -1, 4, -3]\n",
365 |         "Outside range:  1 [2, 1, 1, 2]\n",
366 |         "Outside range:  2 [2, 1, 1, 2]\n",
367 |         "Outside range:  3 [2, 1, 1, 2]\n",
368 |         "Outside range:  4 [2, 1, 1, 2]\n",
369 |         "Outside range:  1 [1, -2, 3, -4]\n",
370 |         "Outside range:  2 [1, -2, 3, -4]\n",
371 |         "Outside range:  3 [1, -2, 3, -4]\n",
372 |         "Makes Q\n",
373 |         "Outside range:  4 [1, -2, 3, -4]\n",
374 |         "Makes Q\n",
375 |         "5\n"
376 |        ]
377 |       }
378 |      ],
379 |      "prompt_number": 24
380 |     },
381 |     {
382 |      "cell_type": "code",
383 |      "collapsed": false,
384 |      "input": [],
385 |      "language": "python",
386 |      "metadata": {},
387 |      "outputs": []
388 |     }
389 |    ],
390 |    "metadata": {}
391 |   }
392 |  ]
393 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ipython-notebooks
2 | Some iPython Notebooks I have created for personal learning
3 | 


--------------------------------------------------------------------------------
/World Leaders DB.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Source: https://www.worldpresidentsdb.com/list/countries/"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import scrapy\n",
 17 |     "import logging"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "class WorldPresidents(scrapy.Spider):\n",
 27 |     "    name = 'worldpresidentsdb'\n",
 28 |     "    BASE_URL = \"https://www.worldpresidentsdb.com/\"\n",
 29 |     "    start_urls = [\n",
 30 |     "        BASE_URL + '/list/countries/',\n",
 31 |     "    ]\n",
 32 |     "    def parse(self, response):\n",
 33 |     "        for country in response.css(\".container div.list-group a\"):\n",
 34 |     "            country_url = country.css('::attr(\"href\")').get()\n",
 35 |     "            country_url = WorldPresidents.BASE_URL + country_url\n",
 36 |     "            yield response.follow(country_url, self.parse_country)\n",
 37 |     "                \n",
 38 |     "    def parse_country(self, response):\n",
 39 |     "        for president in response.css(\".container div.list-group a\"):\n",
 40 |     "            president_url = president.css('::attr(\"href\")').get()\n",
 41 |     "            president_url = WorldPresidents.BASE_URL + president_url\n",
 42 |     "            yield response.follow(president_url, self.parse_president)\n",
 43 |     "            \n",
 44 |     "    def parse_president(self, response):\n",
 45 |     "        info = {\n",
 46 |     "            \"url\": response.url\n",
 47 |     "        }\n",
 48 |     "        for p in response.css(\".container div.row div.col-md-8 p\"):\n",
 49 |     "            p_info = self.extract_info(p)\n",
 50 |     "            info.update(p_info)\n",
 51 |     "        yield info\n",
 52 |     "            \n",
 53 |     "    def extract_info(self, p):\n",
 54 |     "        info = {}\n",
 55 |     "        k = None\n",
 56 |     "        for text in p.css(\"::text\").extract():                \n",
 57 |     "            text = text.strip()\n",
 58 |     "            if not text: continue\n",
 59 |     "            if text.endswith(\":\"):\n",
 60 |     "                if k and len(info[k]) == 1 and k not in {\"Terms\"}:\n",
 61 |     "                    info[k] = info[k][0]\n",
 62 |     "                k = text[:-1]\n",
 63 |     "                info[k] = []\n",
 64 |     "            else:\n",
 65 |     "                v = text\n",
 66 |     "                if k == \"Terms\":\n",
 67 |     "                    if text.startswith(\") \"):\n",
 68 |     "                        t = text[2:]\n",
 69 |     "                        if t.lower().startswith(\"in office since \"):\n",
 70 |     "                            start = t.split(\" since \")[1]\n",
 71 |     "                            end = None\n",
 72 |     "                        else:\n",
 73 |     "                            start, end = t.split(\" to \")\n",
 74 |     "                        v = {\"start\": start, \"end\": end}\n",
 75 |     "                    else:\n",
 76 |     "                        continue\n",
 77 |     "                info[k].append(v)\n",
 78 |     "        if len(info[k]) == 1 and k not in  {\"Terms\"}:\n",
 79 |     "            info[k] = info[k][0]\n",
 80 |     "        return info"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 3,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "from scrapy.crawler import CrawlerProcess\n",
 90 |     "from scrapy.exporters import JsonLinesItemExporter"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 4,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stderr",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "2021-01-04 18:41:20 [scrapy.utils.log] INFO: Scrapy 2.4.1 started (bot: scrapybot)\n",
103 |       "2021-01-04 18:41:20 [scrapy.utils.log] INFO: Versions: lxml 4.6.2.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 20.3.0, Python 3.7.3 (default, Mar 27 2019, 16:54:48) - [Clang 4.0.1 (tags/RELEASE_401/final)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1i  8 Dec 2020), cryptography 2.8, Platform Darwin-19.6.0-x86_64-i386-64bit\n",
104 |       "2021-01-04 18:41:20 [scrapy.crawler] INFO: Overridden settings:\n",
105 |       "{'LOG_LEVEL': 'INFO',\n",
106 |       " 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'}\n",
107 |       "2021-01-04 18:41:20 [scrapy.extensions.telnet] INFO: Telnet Password: 28e109c5e0bb7348\n",
108 |       "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled extensions:\n",
109 |       "['scrapy.extensions.corestats.CoreStats',\n",
110 |       " 'scrapy.extensions.telnet.TelnetConsole',\n",
111 |       " 'scrapy.extensions.memusage.MemoryUsage',\n",
112 |       " 'scrapy.extensions.feedexport.FeedExporter',\n",
113 |       " 'scrapy.extensions.logstats.LogStats']\n",
114 |       "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled downloader middlewares:\n",
115 |       "['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',\n",
116 |       " 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',\n",
117 |       " 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',\n",
118 |       " 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',\n",
119 |       " 'scrapy.downloadermiddlewares.retry.RetryMiddleware',\n",
120 |       " 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',\n",
121 |       " 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',\n",
122 |       " 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',\n",
123 |       " 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',\n",
124 |       " 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',\n",
125 |       " 'scrapy.downloadermiddlewares.stats.DownloaderStats']\n",
126 |       "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled spider middlewares:\n",
127 |       "['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',\n",
128 |       " 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',\n",
129 |       " 'scrapy.spidermiddlewares.referer.RefererMiddleware',\n",
130 |       " 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',\n",
131 |       " 'scrapy.spidermiddlewares.depth.DepthMiddleware']\n",
132 |       "2021-01-04 18:41:20 [scrapy.middleware] INFO: Enabled item pipelines:\n",
133 |       "[]\n",
134 |       "2021-01-04 18:41:20 [scrapy.core.engine] INFO: Spider opened\n",
135 |       "2021-01-04 18:41:20 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)\n",
136 |       "2021-01-04 18:41:20 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6024\n"
137 |      ]
138 |     },
139 |     {
140 |      "data": {
141 |       "text/plain": [
142 |        "<Deferred at 0x7fcc60570cf8>"
143 |       ]
144 |      },
145 |      "execution_count": 4,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "process = CrawlerProcess({\n",
152 |     "    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',\n",
153 |     "    \"FEEDS\": {\n",
154 |     "        \"worldpresidentsdb.json\": {\n",
155 |     "            \"format\": \"jsonlines\",\n",
156 |     "            'encoding': 'utf8',\n",
157 |     "            'overwrite': True\n",
158 |     "        },\n",
159 |     "    },\n",
160 |     "    \"LOG_LEVEL\": \"INFO\"\n",
161 |     "})\n",
162 |     "\n",
163 |     "process.crawl(WorldPresidents)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 5,
169 |    "metadata": {},
170 |    "outputs": [
171 |     {
172 |      "name": "stderr",
173 |      "output_type": "stream",
174 |      "text": [
175 |       "2021-01-04 18:41:27 [scrapy.core.engine] INFO: Closing spider (finished)\n",
176 |       "2021-01-04 18:41:27 [scrapy.extensions.feedexport] INFO: Stored jsonlines feed (544 items) in: worldpresidentsdb.json\n",
177 |       "2021-01-04 18:41:27 [scrapy.statscollectors] INFO: Dumping Scrapy stats:\n",
178 |       "{'downloader/request_bytes': 241323,\n",
179 |       " 'downloader/request_count': 618,\n",
180 |       " 'downloader/request_method_count/GET': 618,\n",
181 |       " 'downloader/response_bytes': 1933100,\n",
182 |       " 'downloader/response_count': 618,\n",
183 |       " 'downloader/response_status_count/200': 618,\n",
184 |       " 'dupefilter/filtered': 36,\n",
185 |       " 'elapsed_time_seconds': 7.062385,\n",
186 |       " 'finish_reason': 'finished',\n",
187 |       " 'finish_time': datetime.datetime(2021, 1, 4, 23, 41, 27, 632916),\n",
188 |       " 'item_scraped_count': 544,\n",
189 |       " 'log_count/INFO': 11,\n",
190 |       " 'memusage/max': 69611520,\n",
191 |       " 'memusage/startup': 69611520,\n",
192 |       " 'request_depth_max': 2,\n",
193 |       " 'response_received_count': 618,\n",
194 |       " 'scheduler/dequeued': 618,\n",
195 |       " 'scheduler/dequeued/memory': 618,\n",
196 |       " 'scheduler/enqueued': 618,\n",
197 |       " 'scheduler/enqueued/memory': 618,\n",
198 |       " 'start_time': datetime.datetime(2021, 1, 4, 23, 41, 20, 570531)}\n",
199 |       "2021-01-04 18:41:27 [scrapy.core.engine] INFO: Spider closed (finished)\n"
200 |      ]
201 |     },
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "CPU times: user 6.19 s, sys: 59.6 ms, total: 6.25 s\n",
207 |       "Wall time: 7.09 s\n"
208 |      ]
209 |     }
210 |    ],
211 |    "source": [
212 |     "%%time\n",
213 |     "process.start()"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {},
220 |    "outputs": [],
221 |    "source": []
222 |   }
223 |  ],
224 |  "metadata": {
225 |   "kernelspec": {
226 |    "display_name": "Python 3",
227 |    "language": "python",
228 |    "name": "python3"
229 |   },
230 |   "language_info": {
231 |    "codemirror_mode": {
232 |     "name": "ipython",
233 |     "version": 3
234 |    },
235 |    "file_extension": ".py",
236 |    "mimetype": "text/x-python",
237 |    "name": "python",
238 |    "nbconvert_exporter": "python",
239 |    "pygments_lexer": "ipython3",
240 |    "version": "3.7.3"
241 |   }
242 |  },
243 |  "nbformat": 4,
244 |  "nbformat_minor": 4
245 | }
246 | 


--------------------------------------------------------------------------------
/images/olympic_athlete.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/napsternxg/ipython-notebooks/f327874fcf7b99c19c82919ad1cdba01660d2ef1/images/olympic_athlete.PNG


--------------------------------------------------------------------------------
/maxContigSum.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 33,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Max contiguous sum in  [5, 15, -30, 10, -5, 40, 10, -20] is 55 :  [10, -5, 40, 10]\n",
 15 |       "is 55\n",
 16 |       "is 55\n",
 17 |       "is 55\n",
 18 |       "is 55 :  [10, -5, 40, 10]\n"
 19 |      ]
 20 |     }
 21 |    ],
 22 |    "source": [
 23 |     "''' A collection of approaches to the maximum contiguous sequence sum problem.\n",
 24 |     "Ends up recursive, but no multiple reuse, so not really dynamic programming.\n",
 25 |     "'''\n",
 26 |     "def maxContigSum1(x):\n",
 27 |     "    '''Basic idea first, storing intermediate results in an array.'''\n",
 28 |     "    n = len(x) \n",
 29 |     "    endingHere = [0]*n # endingHere[i] = best sum ending at i:my be length 0.\n",
 30 |     "    maxSoFar = endingHere[0] = max(0, x[0])\n",
 31 |     "    for i in range (1, n): # 1..n-1\n",
 32 |     "        endingHere[i] = max(endingHere[i-1] + x[i], 0)\n",
 33 |     "        maxSoFar = max(maxSoFar, endingHere[i])\n",
 34 |     "    return maxSoFar\n",
 35 |     "\n",
 36 |     "def maxContigSum(x):\n",
 37 |     "    '''Don't need the second array since only use previous value!'''\n",
 38 |     "    n = len(x) \n",
 39 |     "    endingHere = 0\n",
 40 |     "    maxSoFar = 0\n",
 41 |     "    for i in range (0, n): # 1..n-1\n",
 42 |     "        endingHere = max(endingHere + x[i], 0)\n",
 43 |     "        maxSoFar = max(maxSoFar, endingHere)\n",
 44 |     "    return maxSoFar\n",
 45 |     "\n",
 46 |     "def maxContigSumWithLocation(x):\n",
 47 |     "    '''Return (maxSum, startI, endI): value and location in sequence.'''\n",
 48 |     "    n = len(x) \n",
 49 |     "    endingHere = 0\n",
 50 |     "    maxSoFar = 0\n",
 51 |     "    bestStartI = 0  # start of optimal seq so far\n",
 52 |     "    bestEndI = -1 #end of optimal seq so far - initially empty\n",
 53 |     "    curStartI = 0  # start of best seq ending at current position\n",
 54 |     "    for i in range (0, n): # 1..n-1\n",
 55 |     "        endingHere = max(endingHere + x[i], 0)\n",
 56 |     "        if endingHere == 0:\n",
 57 |     "           curStartI = i+1\n",
 58 |     "        elif maxSoFar < endingHere:\n",
 59 |     "           bestStartI = curStartI\n",
 60 |     "           bestEndI = i\n",
 61 |     "           maxSoFar = endingHere    \n",
 62 |     "    return (maxSoFar, bestStartI, bestEndI)\n",
 63 |     "\n",
 64 |     "def maxContigSumNeg(x):\n",
 65 |     "    # Handles case with negative number.\n",
 66 |     "    # Taken from https://en.wikipedia.org/wiki/Maximum_subarray_problem\n",
 67 |     "    endingHere = x[0]\n",
 68 |     "    maxSoFar = x[0]\n",
 69 |     "    for i in x[1:]:\n",
 70 |     "        endingHere = max(endingHere + i, i)\n",
 71 |     "        maxSoFar = max(endingHere, maxSoFar)\n",
 72 |     "    return maxSoFar\n",
 73 |     "\n",
 74 |     "def maxContigSumNegWithLoc(x):\n",
 75 |     "    # Handles case with negative number.\n",
 76 |     "    # Modified version of the functions above\n",
 77 |     "    endingHere = x[0]\n",
 78 |     "    maxSoFar = x[0]\n",
 79 |     "    bestStartI = 0\n",
 80 |     "    bestEndI = 0\n",
 81 |     "    curStartI = 0\n",
 82 |     "    for i in range(1,len(x)):\n",
 83 |     "        endingHere = max(endingHere + x[i], x[i])\n",
 84 |     "        if endingHere == x[i]:\n",
 85 |     "            curStartI = i\n",
 86 |     "        if maxSoFar < endingHere:\n",
 87 |     "            bestStartI = curStartI\n",
 88 |     "            bestEndI = i\n",
 89 |     "            maxSoFar = endingHere\n",
 90 |     "    return (maxSoFar, bestStartI, bestEndI)\n",
 91 |     "\n",
 92 |     "#### Rest is for displaying results ############################\n",
 93 |     "def showMaxSum(nums):\n",
 94 |     "    '''Display starting data and results.'''\n",
 95 |     "    (maxSum, startI, endI) = maxContigSumWithLocation(nums)\n",
 96 |     "    print \"Max contiguous sum in \", nums, \n",
 97 |     "    print \"is\", maxSum, \": \", nums[startI : endI +1]\n",
 98 |     "    print \"is\", maxContigSum(nums)\n",
 99 |     "    print \"is\", maxContigSum1(nums)\n",
100 |     "    print \"is\", maxContigSumNeg(nums)\n",
101 |     "    (maxSum, startI, endI) = maxContigSumNegWithLoc(nums)\n",
102 |     "    print \"is\", maxSum, \": \", nums[startI : endI +1]\n",
103 |     "    \n",
104 |     "    \n",
105 |     "showMaxSum([5, 15, -30, 10, -5, 40, 10, -20])"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 34,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "Max contiguous sum in  [-5, -15, -30, -10, -5, -40, -10, -20] is 0 :  []\n",
120 |       "is 0\n",
121 |       "is 0\n",
122 |       "is -5\n",
123 |       "is -5 :  [-5]\n"
124 |      ]
125 |     }
126 |    ],
127 |    "source": [
128 |     "showMaxSum([-5, -15, -30, -10, -5, -40, -10, -20])"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 35,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "Max contiguous sum in  [-50, -15, -30, -10, -5, -40, -10, -20] is 0 :  []\n",
143 |       "is 0\n",
144 |       "is 0\n",
145 |       "is -5\n",
146 |       "is -5 :  [-5]\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "showMaxSum([-50, -15, -30, -10, -5, -40, -10, -20])"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": true
159 |    },
160 |    "outputs": [],
161 |    "source": []
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {
167 |     "collapsed": true
168 |    },
169 |    "outputs": [],
170 |    "source": []
171 |   }
172 |  ],
173 |  "metadata": {
174 |   "kernelspec": {
175 |    "display_name": "Python 2",
176 |    "language": "python",
177 |    "name": "python2"
178 |   },
179 |   "language_info": {
180 |    "codemirror_mode": {
181 |     "name": "ipython",
182 |     "version": 2
183 |    },
184 |    "file_extension": ".py",
185 |    "mimetype": "text/x-python",
186 |    "name": "python",
187 |    "nbconvert_exporter": "python",
188 |    "pygments_lexer": "ipython2",
189 |    "version": "2.7.9"
190 |   }
191 |  },
192 |  "nbformat": 4,
193 |  "nbformat_minor": 0
194 | }
195 | 


--------------------------------------------------------------------------------
/monte_carlo_circle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/napsternxg/ipython-notebooks/f327874fcf7b99c19c82919ad1cdba01660d2ef1/monte_carlo_circle.png


--------------------------------------------------------------------------------
/olympic_athlete_data_download.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Download Olympic Athlete Data from official website\n",
 8 |     "\n",
 9 |     "* Get athete page name sitemaps from: https://olympics.com/en/xml-sitemap/\n",
10 |     "```xml\n",
11 |     "<sitemap>\n",
12 |     "<loc>https://olympics.com/en/xml-sitemap/custom/athlete/1/50000</loc>\n",
13 |     "</sitemap>\n",
14 |     "<sitemap>\n",
15 |     "<loc>https://olympics.com/en/xml-sitemap/custom/athlete/2/50000</loc>\n",
16 |     "</sitemap>\n",
17 |     "<sitemap>\n",
18 |     "<loc>https://olympics.com/en/xml-sitemap/custom/athlete/3/50000</loc>\n",
19 |     "</sitemap>\n",
20 |     "<sitemap>\n",
21 |     "<loc>https://olympics.com/en/xml-sitemap/custom/athlete/4/50000</loc>\n",
22 |     "</sitemap>\n",
23 |     "```\n",
24 |     "* Get individual athlete pages from each site map: https://olympics.com/en/xml-sitemap/custom/athlete/1/50000\n",
25 |     "```xml\n",
26 |     "<url>\n",
27 |     "<loc>https://olympics.com/en/athletes/grant-holloway</loc>\n",
28 |     "<lastmod>2024-03-05T23:02:24.551Z</lastmod>\n",
29 |     "</url>\n",
30 |     "```\n",
31 |     "* For each page get the data from the server script tag at the end of source: https://olympics.com/en/athletes/antoine-dupont\n",
32 |     "```html\n",
33 |     "<template data-dgst=\"DYNAMIC_SERVER_USAGE\"></template>\n",
34 |     "<!--/$--><!--/$--></div><script id=\"__NEXT_DATA__\" type=\"application/json\">\n",
35 |     "```\n",
36 |     "* This data contains the following JSON:\n",
37 |     "\n",
38 |     "![JSON data](./images/olympic_athlete.PNG)\n",
39 |     "* Save this data and extract relevant fields into CSV for analysis"
40 |    ]
41 |   }
42 |  ],
43 |  "metadata": {
44 |   "language_info": {
45 |    "name": "python"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 2
50 | }
51 | 


--------------------------------------------------------------------------------
/temp.tsv:
--------------------------------------------------------------------------------
 1 | B	A_mean	A_std	A_log_mean	A_log_std
 2 | 10	0.714602439048092	0.18596711502853155	0.7058273146766356	0.7058273146766356
 3 | 11	0.4971257285821444	0.28754774944387995	0.47244484939873344	0.47244484939873344
 4 | 12	0.33146129098789867	0.28814805056017534	0.30444172693653737	0.30444172693653737
 5 | 13	0.40194930431903436	0.3354006675219303	0.3680287533540716	0.3680287533540716
 6 | 14	0.5365678987931479	0.30449058531450507	0.5090181376730747	0.5090181376730747
 7 | 15	0.4790838093516074	0.2322818768065271	0.4620129210643693	0.4620129210643693
 8 | 16	0.49808221983452156	0.2714254166211001	0.4749880812057554	0.4749880812057554
 9 | 17	0.5362859106825243	0.23910019621098794	0.5193963685044889	0.5193963685044889
10 | 18	0.4341769418671373	0.29980223025229213	0.4062553630378207	0.4062553630378207
11 | 19	0.45272261807906267	0.2770384211020521	0.4297227283145406	0.4297227283145406
12 | 


--------------------------------------------------------------------------------