├── README.md
├── Nato
    ├── Alphabet.wav
    └── characters.csv
├── Intro
    ├── images
    │   ├── both.gif
    │   ├── launch.png
    │   ├── toolbar.png
    │   ├── navigator.png
    │   └── architecture.png
    └── source
    │   ├── salida.txt
    │   ├── location.py
    │   └── hello.txt
├── LazyEvaluation.ipynb
├── meshgrid.ipynb
├── Polynomial features.ipynb
├── Memoization.ipynb
├── .gitignore
├── partiallyd-scrape.ipynb
├── youtube-captions-2.ipynb
├── python.ipynb
├── casey-neistat-analisys.ipynb
├── tloz-scrape.ipynb
├── youtube-captions.ipynb
├── bokeh
    └── x.html
├── Intro.ipynb
├── mt-scraper.ipynb
├── Scalers.ipynb
├── bokeh.ipynb
└── insta-api.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # nbstuff
2 | Notebook stuff!
3 | 


--------------------------------------------------------------------------------
/Nato/Alphabet.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thatcsharpguy/nbstuff/master/Nato/Alphabet.wav


--------------------------------------------------------------------------------
/Intro/images/both.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thatcsharpguy/nbstuff/master/Intro/images/both.gif


--------------------------------------------------------------------------------
/Intro/images/launch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thatcsharpguy/nbstuff/master/Intro/images/launch.png


--------------------------------------------------------------------------------
/Intro/images/toolbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thatcsharpguy/nbstuff/master/Intro/images/toolbar.png


--------------------------------------------------------------------------------
/Intro/images/navigator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thatcsharpguy/nbstuff/master/Intro/images/navigator.png


--------------------------------------------------------------------------------
/Intro/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thatcsharpguy/nbstuff/master/Intro/images/architecture.png


--------------------------------------------------------------------------------
/Intro/source/salida.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | #
 3 | ##
 4 | ###
 5 | ####
 6 | #####
 7 | ######
 8 | #######
 9 | ########
10 | #########
11 | 


--------------------------------------------------------------------------------
/Intro/source/location.py:
--------------------------------------------------------------------------------
1 | class Location:
2 |     def __init__(self, name, region):
3 |         self.name = name
4 |         self.region = region
5 | 


--------------------------------------------------------------------------------
/Intro/source/hello.txt:
--------------------------------------------------------------------------------
1 |   ________          __     ______  __ __                      
2 |  /_  __/ /_  ____ _/ /_   / ____/_/ // /_   ____ ___  ____  __
3 |   / / / __ \/ __ `/ __/  / /   /_  _  __/  / __ `/ / / / / / /
4 |  / / / / / / /_/ / /_   / /___/_  _  __/  / /_/ / /_/ / /_/ / 
5 | /_/ /_/ /_/\__,_/\__/   \____/ /_//_/     \__, /\__,_/\__, /  
6 |                                          /____/      /____/   


--------------------------------------------------------------------------------
/LazyEvaluation.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "a = 2\n",
12 |     "b = 3\n",
13 |     "def odds(n):\n",
14 |     "    \n",
15 |     "    return a + b\n",
16 |     "\n",
17 |     "def odds_lazy():\n",
18 |     "    yield a + b"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": null,
24 |    "metadata": {
25 |     "collapsed": true
26 |    },
27 |    "outputs": [],
28 |    "source": [
29 |     "c = calculo()\n",
30 |     "print(calculo())\n",
31 |     "\n"
32 |    ]
33 |   }
34 |  ],
35 |  "metadata": {
36 |   "kernelspec": {
37 |    "display_name": "Python 3",
38 |    "language": "python",
39 |    "name": "python3"
40 |   },
41 |   "language_info": {
42 |    "codemirror_mode": {
43 |     "name": "ipython",
44 |     "version": 3
45 |    },
46 |    "file_extension": ".py",
47 |    "mimetype": "text/x-python",
48 |    "name": "python",
49 |    "nbconvert_exporter": "python",
50 |    "pygments_lexer": "ipython3",
51 |    "version": "3.6.1"
52 |   }
53 |  },
54 |  "nbformat": 4,
55 |  "nbformat_minor": 2
56 | }
57 | 


--------------------------------------------------------------------------------
/meshgrid.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "\n",
 13 |     "u = np.linspace(-2, 2, 6)\n",
 14 |     "v = np.linspace(-1, 1, 4)\n",
 15 |     "\n",
 16 |     "# Generate 2-D arrays from u and v: X, Y\n",
 17 |     "X,Y = np.meshgrid(u,v)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 17,
 23 |    "metadata": {},
 24 |    "outputs": [
 25 |     {
 26 |      "name": "stdout",
 27 |      "output_type": "stream",
 28 |      "text": [
 29 |       "[-2.  -1.2 -0.4  0.4  1.2  2. ]\n",
 30 |       "[[-2.  -1.2 -0.4  0.4  1.2  2. ]\n",
 31 |       " [-2.  -1.2 -0.4  0.4  1.2  2. ]\n",
 32 |       " [-2.  -1.2 -0.4  0.4  1.2  2. ]\n",
 33 |       " [-2.  -1.2 -0.4  0.4  1.2  2. ]]\n",
 34 |       "\n",
 35 |       "[-1.         -0.33333333  0.33333333  1.        ]\n",
 36 |       "[[-1.         -1.         -1.         -1.         -1.         -1.        ]\n",
 37 |       " [-0.33333333 -0.33333333 -0.33333333 -0.33333333 -0.33333333 -0.33333333]\n",
 38 |       " [ 0.33333333  0.33333333  0.33333333  0.33333333  0.33333333  0.33333333]\n",
 39 |       " [ 1.          1.          1.          1.          1.          1.        ]]\n"
 40 |      ]
 41 |     }
 42 |    ],
 43 |    "source": [
 44 |     "print(u)\n",
 45 |     "print(X)\n",
 46 |     "print(\"\")\n",
 47 |     "print(v)\n",
 48 |     "print(Y)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "6675.5"
 60 |       ]
 61 |      },
 62 |      "execution_count": 3,
 63 |      "metadata": {},
 64 |      "output_type": "execute_result"
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "a = np.array([7921,5184,8836,4761])\n",
 69 |     "a.mean()"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 4,
 75 |    "metadata": {
 76 |     "collapsed": true
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "r = a.max() - a.min()\n",
 81 |     "b = a - a.mean()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 5,
 87 |    "metadata": {
 88 |     "collapsed": true
 89 |    },
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "b = b/ r"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 6,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "text/plain": [
103 |        "array([ 0.30564417, -0.36601227,  0.53018405, -0.46981595])"
104 |       ]
105 |      },
106 |      "execution_count": 6,
107 |      "metadata": {},
108 |      "output_type": "execute_result"
109 |     }
110 |    ],
111 |    "source": [
112 |     "b"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": true
120 |    },
121 |    "outputs": [],
122 |    "source": []
123 |   }
124 |  ],
125 |  "metadata": {
126 |   "kernelspec": {
127 |    "display_name": "Python 3",
128 |    "language": "python",
129 |    "name": "python3"
130 |   },
131 |   "language_info": {
132 |    "codemirror_mode": {
133 |     "name": "ipython",
134 |     "version": 3
135 |    },
136 |    "file_extension": ".py",
137 |    "mimetype": "text/x-python",
138 |    "name": "python",
139 |    "nbconvert_exporter": "python",
140 |    "pygments_lexer": "ipython3",
141 |    "version": "3.6.1"
142 |   }
143 |  },
144 |  "nbformat": 4,
145 |  "nbformat_minor": 2
146 | }
147 | 


--------------------------------------------------------------------------------
/Polynomial features.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Size of train (2, 2)\n",
 13 |       "[[2 3]\n",
 14 |       " [2 4]]\n",
 15 |       "Size of train (2, 6)\n",
 16 |       "[[  1.   2.   3.   4.   6.   9.]\n",
 17 |       " [  1.   2.   4.   4.   8.  16.]]\n"
 18 |      ]
 19 |     },
 20 |     {
 21 |      "data": {
 22 |       "text/html": [
 23 |        "<div>\n",
 24 |        "<style>\n",
 25 |        "    .dataframe thead tr:only-child th {\n",
 26 |        "        text-align: right;\n",
 27 |        "    }\n",
 28 |        "\n",
 29 |        "    .dataframe thead th {\n",
 30 |        "        text-align: left;\n",
 31 |        "    }\n",
 32 |        "\n",
 33 |        "    .dataframe tbody tr th {\n",
 34 |        "        vertical-align: top;\n",
 35 |        "    }\n",
 36 |        "</style>\n",
 37 |        "<table border=\"1\" class=\"dataframe\">\n",
 38 |        "  <thead>\n",
 39 |        "    <tr style=\"text-align: right;\">\n",
 40 |        "      <th></th>\n",
 41 |        "      <th>1</th>\n",
 42 |        "      <th>x0</th>\n",
 43 |        "      <th>x1</th>\n",
 44 |        "      <th>x0^2</th>\n",
 45 |        "      <th>x0 x1</th>\n",
 46 |        "      <th>x1^2</th>\n",
 47 |        "    </tr>\n",
 48 |        "  </thead>\n",
 49 |        "  <tbody>\n",
 50 |        "    <tr>\n",
 51 |        "      <th>0</th>\n",
 52 |        "      <td>1.0</td>\n",
 53 |        "      <td>2.0</td>\n",
 54 |        "      <td>3.0</td>\n",
 55 |        "      <td>4.0</td>\n",
 56 |        "      <td>6.0</td>\n",
 57 |        "      <td>9.0</td>\n",
 58 |        "    </tr>\n",
 59 |        "    <tr>\n",
 60 |        "      <th>1</th>\n",
 61 |        "      <td>1.0</td>\n",
 62 |        "      <td>2.0</td>\n",
 63 |        "      <td>4.0</td>\n",
 64 |        "      <td>4.0</td>\n",
 65 |        "      <td>8.0</td>\n",
 66 |        "      <td>16.0</td>\n",
 67 |        "    </tr>\n",
 68 |        "  </tbody>\n",
 69 |        "</table>\n",
 70 |        "</div>"
 71 |       ],
 72 |       "text/plain": [
 73 |        "     1   x0   x1  x0^2  x0 x1  x1^2\n",
 74 |        "0  1.0  2.0  3.0   4.0    6.0   9.0\n",
 75 |        "1  1.0  2.0  4.0   4.0    8.0  16.0"
 76 |       ]
 77 |      },
 78 |      "execution_count": 4,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "import pandas as pd\n",
 85 |     "import numpy as np\n",
 86 |     "from sklearn.preprocessing import PolynomialFeatures\n",
 87 |     "pf = PolynomialFeatures(degree=2, include_bias=True)\n",
 88 |     "\n",
 89 |     "test = np.array([\n",
 90 |     "    [2, 3],\n",
 91 |     "    [2, 4]\n",
 92 |     "])\n",
 93 |     "\n",
 94 |     "pf.fit(test)\n",
 95 |     "print(\"Size of train\", test.shape)\n",
 96 |     "print(test)\n",
 97 |     "x = pf.transform(test)\n",
 98 |     "print(\"Size of train\", x.shape)\n",
 99 |     "print(x)\n",
100 |     "\n",
101 |     "df = pd.DataFrame(x)\n",
102 |     "df.columns = pf.get_feature_names()\n",
103 |     "df.head()"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "collapsed": true
111 |    },
112 |    "outputs": [],
113 |    "source": []
114 |   }
115 |  ],
116 |  "metadata": {
117 |   "kernelspec": {
118 |    "display_name": "Python 3",
119 |    "language": "python",
120 |    "name": "python3"
121 |   },
122 |   "language_info": {
123 |    "codemirror_mode": {
124 |     "name": "ipython",
125 |     "version": 3
126 |    },
127 |    "file_extension": ".py",
128 |    "mimetype": "text/x-python",
129 |    "name": "python",
130 |    "nbconvert_exporter": "python",
131 |    "pygments_lexer": "ipython3",
132 |    "version": "3.6.1"
133 |   }
134 |  },
135 |  "nbformat": 4,
136 |  "nbformat_minor": 2
137 | }
138 | 


--------------------------------------------------------------------------------
/Memoization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Memoization"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Sin memoización"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "def fib(n):\n",
 26 |     "    if n == 0 or n == 1:\n",
 27 |     "        return 1\n",
 28 |     "    return fib(n-1) + fib(n-2)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "14930352\n",
 41 |       "CPU times: user 5.62 s, sys: 30.4 ms, total: 5.65 s\n",
 42 |       "Wall time: 5.71 s\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "%%time\n",
 48 |     "result = fib(35)\n",
 49 |     "print(result)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Memoizado"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "metadata": {
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# Casos base\n",
 68 |     "storage = dict([\n",
 69 |     "    (0, 1), \n",
 70 |     "    (1, 1)\n",
 71 |     "])\n",
 72 |     "\n",
 73 |     "def fib(n):\n",
 74 |     "    if n in storage:\n",
 75 |     "        return storage[n]\n",
 76 |     "    storage[n] = fib(n-1) + fib(n-2)\n",
 77 |     "    return storage[n]"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 4,
 83 |    "metadata": {},
 84 |    "outputs": [
 85 |     {
 86 |      "name": "stdout",
 87 |      "output_type": "stream",
 88 |      "text": [
 89 |       "14930352\n",
 90 |       "CPU times: user 601 µs, sys: 328 µs, total: 929 µs\n",
 91 |       "Wall time: 712 µs\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "%%time\n",
 97 |     "result = fib(35)\n",
 98 |     "print(result)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 5,
104 |    "metadata": {},
105 |    "outputs": [
106 |     {
107 |      "name": "stdout",
108 |      "output_type": "stream",
109 |      "text": [
110 |       "20365011074\n",
111 |       "CPU times: user 861 µs, sys: 825 µs, total: 1.69 ms\n",
112 |       "Wall time: 1.56 ms\n"
113 |      ]
114 |     }
115 |    ],
116 |    "source": [
117 |     "%%time\n",
118 |     "result = fib(50)\n",
119 |     "print(result)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 6,
125 |    "metadata": {},
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "573147844013817084101\n",
132 |       "CPU times: user 509 µs, sys: 402 µs, total: 911 µs\n",
133 |       "Wall time: 617 µs\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "%%time\n",
139 |     "result = fib(100)\n",
140 |     "print(result)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": []
151 |   }
152 |  ],
153 |  "metadata": {
154 |   "kernelspec": {
155 |    "display_name": "Python 3",
156 |    "language": "python",
157 |    "name": "python3"
158 |   },
159 |   "language_info": {
160 |    "codemirror_mode": {
161 |     "name": "ipython",
162 |     "version": 3
163 |    },
164 |    "file_extension": ".py",
165 |    "mimetype": "text/x-python",
166 |    "name": "python",
167 |    "nbconvert_exporter": "python",
168 |    "pygments_lexer": "ipython3",
169 |    "version": "3.6.1"
170 |   }
171 |  },
172 |  "nbformat": 4,
173 |  "nbformat_minor": 2
174 | }
175 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python,pycharm,jupyternotebook
  3 | 
  4 | ### JupyterNotebook ###
  5 | .ipynb_checkpoints
  6 | */.ipynb_checkpoints/*
  7 | 
  8 | # Remove previous ipynb_checkpoints
  9 | #   git rm -r .ipynb_checkpoints/
 10 | #
 11 | ### PyCharm ###
 12 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 13 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 14 | 
 15 | # User-specific stuff:
 16 | .idea/**/workspace.xml
 17 | .idea/**/tasks.xml
 18 | .idea/dictionaries
 19 | 
 20 | # Sensitive or high-churn files:
 21 | .idea/**/dataSources/
 22 | .idea/**/dataSources.ids
 23 | .idea/**/dataSources.xml
 24 | .idea/**/dataSources.local.xml
 25 | .idea/**/sqlDataSources.xml
 26 | .idea/**/dynamic.xml
 27 | .idea/**/uiDesigner.xml
 28 | 
 29 | # Gradle:
 30 | .idea/**/gradle.xml
 31 | .idea/**/libraries
 32 | 
 33 | # CMake
 34 | cmake-build-debug/
 35 | 
 36 | # Mongo Explorer plugin:
 37 | .idea/**/mongoSettings.xml
 38 | 
 39 | ## File-based project format:
 40 | *.iws
 41 | 
 42 | ## Plugin-specific files:
 43 | 
 44 | # IntelliJ
 45 | /out/
 46 | 
 47 | # mpeltonen/sbt-idea plugin
 48 | .idea_modules/
 49 | 
 50 | # JIRA plugin
 51 | atlassian-ide-plugin.xml
 52 | 
 53 | # Cursive Clojure plugin
 54 | .idea/replstate.xml
 55 | 
 56 | # Ruby plugin and RubyMine
 57 | /.rakeTasks
 58 | 
 59 | # Crashlytics plugin (for Android Studio and IntelliJ)
 60 | com_crashlytics_export_strings.xml
 61 | crashlytics.properties
 62 | crashlytics-build.properties
 63 | fabric.properties
 64 | 
 65 | ### PyCharm Patch ###
 66 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
 67 | 
 68 | # *.iml
 69 | # modules.xml
 70 | # .idea/misc.xml
 71 | # *.ipr
 72 | 
 73 | # Sonarlint plugin
 74 | .idea/sonarlint
 75 | 
 76 | ### Python ###
 77 | # Byte-compiled / optimized / DLL files
 78 | __pycache__/
 79 | *.py[cod]
 80 | *$py.class
 81 | 
 82 | # C extensions
 83 | *.so
 84 | 
 85 | # Distribution / packaging
 86 | .Python
 87 | build/
 88 | develop-eggs/
 89 | dist/
 90 | downloads/
 91 | eggs/
 92 | .eggs/
 93 | lib/
 94 | lib64/
 95 | parts/
 96 | sdist/
 97 | var/
 98 | wheels/
 99 | *.egg-info/
100 | .installed.cfg
101 | *.egg
102 | 
103 | # PyInstaller
104 | #  Usually these files are written by a python script from a template
105 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
106 | *.manifest
107 | *.spec
108 | 
109 | # Installer logs
110 | pip-log.txt
111 | pip-delete-this-directory.txt
112 | 
113 | # Unit test / coverage reports
114 | htmlcov/
115 | .tox/
116 | .coverage
117 | .coverage.*
118 | .cache
119 | nosetests.xml
120 | coverage.xml
121 | *.cover
122 | .hypothesis/
123 | 
124 | # Translations
125 | *.mo
126 | *.pot
127 | 
128 | # Django stuff:
129 | *.log
130 | local_settings.py
131 | 
132 | # Flask stuff:
133 | instance/
134 | .webassets-cache
135 | 
136 | # Scrapy stuff:
137 | .scrapy
138 | 
139 | # Sphinx documentation
140 | docs/_build/
141 | 
142 | # PyBuilder
143 | target/
144 | 
145 | # Jupyter Notebook
146 | 
147 | # pyenv
148 | .python-version
149 | 
150 | # celery beat schedule file
151 | celerybeat-schedule
152 | 
153 | # SageMath parsed files
154 | *.sage.py
155 | 
156 | # Environments
157 | .env
158 | .venv
159 | env/
160 | venv/
161 | ENV/
162 | env.bak/
163 | venv.bak/
164 | 
165 | # Spyder project settings
166 | .spyderproject
167 | .spyproject
168 | 
169 | # Rope project settings
170 | .ropeproject
171 | 
172 | # mkdocs documentation
173 | /site
174 | 
175 | # mypy
176 | .mypy_cache/
177 | ### macOS ###
178 | *.DS_Store
179 | .AppleDouble
180 | .LSOverride
181 | 
182 | # Icon must end with two \r
183 | Icon
184 | 
185 | # Thumbnails
186 | ._*
187 | 
188 | # Files that might appear in the root of a volume
189 | .DocumentRevisions-V100
190 | .fseventsd
191 | .Spotlight-V100
192 | .TemporaryItems
193 | .Trashes
194 | .VolumeIcon.icns
195 | .com.apple.timemachine.donotpresent
196 | 
197 | # Directories potentially created on remote AFP share
198 | .AppleDB
199 | .AppleDesktop
200 | Network Trash Folder
201 | Temporary Items
202 | .apdisk
203 | 
204 | ### Windows ###
205 | # Windows thumbnail cache files
206 | Thumbs.db
207 | ehthumbs.db
208 | ehthumbs_vista.db
209 | 
210 | # Folder config file
211 | Desktop.ini
212 | 
213 | # Recycle Bin used on file shares
214 | $RECYCLE.BIN/
215 | 
216 | # Windows Installer files
217 | *.cab
218 | *.msi
219 | *.msm
220 | *.msp
221 | 
222 | # Windows shortcuts
223 | *.lnk
224 | 
225 | # End of https://www.gitignore.io/api/macos,windows
226 | 
227 | # Captions files
228 | youtube-captions/*
229 | casey-neistat-analisys/*
230 | mt-scraper/*
231 | partiallyd-scrape/*
232 | tloz-scrape/*
233 | /.metadata/
234 | 


--------------------------------------------------------------------------------
/Nato/characters.csv:
--------------------------------------------------------------------------------
 1 | Character,Code word,ICAO 2008 IPA convention[5],ICAO recording (1955)[12],Wikipedia IPA and respelling,ICAO 2008 respelling [5],ITU-R 2007 (WRC-07) respelling[7],IMO respelling[citation needed],FAA respelling[8][9],SIA[13] (France aeronautical),NATO & U.S. Army respelling[14]
 2 | A,Alfa,ˈælfɑ,[ˈælfʌ],/ˈælfɑː/ AL-fah,AL FAH,AL FAH,AL FAH,ALFAH or AL-FAH,al fah,AL fah
 3 | B,Bravo,ˈbrɑːˈvo,[brɑˈvoʊ],/ˌbrɑːˈvoʊ/ BRAH-VOH,BRAH VOH,BRAH VOH,BRAH VOH (1955: BRAH VOH),BRAHVOH or BRAH-VO,bra vo,BRAH voh
 4 | C,Charlie,ˈtʃɑːli  or ˈʃɑːli,"[ˈtʃɑ˞li], [ˈʃɑ˞li]",/ˈtʃɑːrliː/ CHAR-lee or /ˈʃɑːrliː/ SHAR-lee,CHAR LEE or SHAR LEE,CHAR LEE or SHAR LEE,CHAR LEE,CHARLEE or CHAR-LEE,"tchah li, char li",CHAR lee
 5 | D,Delta,ˈdeltɑ,[ˈdɛltʌ],/ˈdɛltɑː/ DEL-tah,DELL TAH,DELL TAH,DELL TAH,DELLTAH or DELL-TAH,del tah,DEL tah
 6 | E,Echo,ˈeko,[ˈɛkoʊ],/ˈɛkoʊ/,ECK OH,ECK OH,ECK OH,ECKOH or ECK-OH,èk o,EKK oh
 7 | F,Foxtrot,ˈfɔkstrɔt,[ˈfɑkstrɑt],/ˈfɒkstrɒt/ FOKS-trot,FOKS TROT,FOKS TROT,FOKS TROT,FOKSTROT or FOKS-TROT,fox trott,FOKS trot
 8 | G,Golf,ɡʌlf  [sic],[ˈɡʌl(f)],/ˈɡɒlf/ GOLF,GOLF,GOLF,GOLF,GOLF,golf,Golf
 9 | H,Hotel,hoːˈtel,[hoʊˈtɛl],/hoʊˈtɛl/ hoh-TEL,HOH TELL,HOH TELL,HOH TELL,HOHTELL or HOH-TELL,ho tèll,HO tell
10 | I,India,ˈindiˑɑ,[ˈɪndi.ʌ],/ˈɪndiːɑː/ IN-dee-ah,IN DEE AH,IN DEE AH,IN DEE AH,INDEE AH or IN-DEE-AH,in di ah,IN dee ah
11 | J,Juliett,ˈdʒuːliˑˈet,[ˌdʒuliˈɛt],/ˈdʒuːliːɛt/ JEW-lee-et or /ˌdʒuːliːˈɛt/ JEW-lee-ET,JEW LEE ETT,JEW LEE ETT,JEW LEE ETT,JEWLEE ETT or JEW-LEE-ETT,djou li ètt,JEW lee ett
12 | K,Kilo,ˈkiːlo,[ˈkiloʊ],/ˈkiːloʊ/ KEE-loh,KEY LOH,KEY LOH,KEY LOH,KEYLOH or KEY-LOH,ki lo,KEY loh
13 | L,Lima,ˈliːmɑ,[ˈlimʌ],/ˈliːmɑː/ LEE-mah,LEE MAH,LEE MAH,LEE MAH,LEEMAH or LEE-MAH,li mah,LEE mah
14 | M,Mike,mɑik,[ˈmʌɪk],/ˈmaɪk/ MYK,MIKE,MIKE,MIKE,MIKE,maïk,Mike
15 | N,November,noˈvembə,[noʊˈvɛmbɹ̩],/noʊˈvɛmbər/ noh-VEM-bər[15],NO VEM BER,NO VEM BER,NO VEM BER,NOVEMBER or NO-VEM-BER,no vèmm ber,NOH vem ber
16 | O,Oscar,ˈɔskɑ,[ˈɑskɹ̩],/ˈɒskɑː/ OS-kah,OSS CAH,OSS CAH,OSS CAH,OSS-SCAR or OSS-CAR,oss kar,OSS car
17 | P,Papa,pəˈpɑ,[pəˈpɑ],/pɑːˈpɑː/ pah-PAH,PAH PAH,PAH PAH,PAH PAH,PAHPAH or PAH-PAH,pah pah,PAH pah
18 | Q,Quebec,keˈbek,[kɛˈbɛk],/kɛˈbɛk/ ke-BEK,KEH BECK,KEH BECK,KEH BECK,KEHBECK or KWUH-BECK,ké bèk,keh BECK
19 | R,Romeo,ˈroːmiˑo,[ˈɹoʊmi.oʊ],/ˈroʊmiːoʊ/ ROH-mee-oh,ROW ME OH,ROW ME OH,ROW ME OH,ROWME OH or ROW-ME-OH,ro mi o,ROW me oh
20 | S,Sierra,siˈerɑ,[siˈɛɾʌ],/siːˈɛrɑː/ see-ERR-ah,SEE AIR RAH,SEE AIR RAH,SEE AIR RAH,SEEAIRAH or SEE-AIR-AH,si èr rah,see AIR ah
21 | T,Tango,ˈtænɡo,[ˈtæŋɡoʊ],/ˈtæŋɡoʊ/ TANG-goh,TANG OH,TANG GO,TANG GO,TANGGO or TANG-GO,tang go,TANG go
22 | U,Uniform,ˈjuːnifɔːm  or ˈuːnifɔrm,"[ˈjunɪ̈fɔ˞m], [ˈunɪ̈fɔ˞m]",/ˈjuːniːfɔːrm/ EW-nee-form or /ˈuːniːfɔːrm/ OO-nee-form,YOU NEE FORM or OO NEE FORM,YOU NEE FORM or OO NEE FORM,YOU NEE FORM or OO NEE FORM,YOUNEE FORM or YOU-NEE-FORM or OO-NEE-FORM,"you ni form, ou ni form",YOU nee form
23 | V,Victor,ˈviktɑ,[ˈvɪktəɹ],/ˈvɪktɑː/ VIK-tah,VIK TAH,VIK TAH,VIK TAH,VIKTAH or VIK-TAR,vik tar,VIK ter
24 | W,Whiskey,ˈwiski,[ˈwɪski],/ˈwɪskiː/ WIS-kee,WISS KEY,WISS KEY,WISS KEY,WISSKEY or WISS-KEY,ouiss ki,WISS key
25 | X,X-ray or Xray,ˈeksˈrei,[ˈɛksɹeɪ],/ˈɛksreɪ/ EKS-ray or /ˌɛksˈreɪ/ EKS-RAY,ECKS RAY,ECKS RAY,ECKS RAY,ECKSRAY  [sic] or ECKS-RAY,èkss ré,EKS ray
26 | Y,Yankee,ˈjænki,[ˈjæŋki],/ˈjæŋkiː/ YANG-kee,YANG KEY,YANG KEY,YANG KEY,YANGKEY  [sic] or YANG-KEY,yang ki,YANG kee
27 | Z,Zulu,ˈzuːluː,[ˈzulu],/ˈzuːluː/ ZOO-loo,ZOO LOO,ZOO LOO,ZOO LOO,ZOOLOO or ZOO-LOO,zou lou,ZOO luu
28 | 0,Zero,,,/ˈziːroʊ/ ZEE-roh /ˌnɑːˌdɑːˌzeɪˈroʊ/ NAH-DAH-ZAY-ROH,ZE-RO,NAH-DAH-ZAY-ROH,NADAZERO,Zero,zi ro,ZE-RO
29 | 1,One,,,/ˈwʌn/ WUN /ˌuːˌnɑːˈwʌn/ OO-NAH-WUN,WUN,OO-NAH-WUN,UNAONE,One,ouann,WUN; Won (USMC)[16]
30 | 2,Two,,,/ˈtuː/ TOO /ˌbiːˌsoʊˈtuː/ BEE-SOH-TOO,TOO,BEES-SOH-TOO,BISSOTWO,Two,tou,TOO
31 | 3,Three,,,/ˈtriː/ TREE /ˌteɪˌrɑːˈtriː/ TAY-RAH-TREE,TREE,TAY-RAH-TREE,TERRATHREE,Three,tri,TREE
32 | 4,Four,,,/ˈfoʊ.ər/ FOH-ər /ˌkɑːrˌteɪˈfoʊ.ər/ KAR-TAY-FOH-ər,FOW-er,KAR-TAY-FOWER,KARTEFOUR,Four,fo eur,FOW-ER
33 | 5,Five,,,/ˈfaɪf/ FYF[17] /ˌpænˌtɑːˈfaɪv/ PAN-TAH-FYV,FIFE,PAN-TAH-FIVE,PANTAFIVE,Five,fa ïf,FIFE
34 | 6,Six,,,/ˈsɪks/ SIKS /ˌsɔːkˌsiːˈsɪks/ SOK-SEE-SIKS,SIX,SOK-SEE-SIX,SOXISIX,Six,siks,SIX
35 | 7,Seven,,,/ˈsɛvɛn/ SEV-en /ˌseɪˌteɪˈsɛvɛn/ SAY-TAY-SEV-en,SEV-en,SAY-TAY-SEVEN,SETTESEVEN,Seven,sèv n,SEV-EN
36 | 8,Eight,,,/ˈeɪt/ AYT /ˌɔːkˌtoʊˈeɪt/ OK-TOH-AYT,AIT,OK-TOH-AIT,OKTOEIGHT,Eight,eït,AIT
37 | 9,Nine,,,/ˈnaɪnər/ NY-nər[18] /ˌnɔːvˌeɪˈnaɪnər/ NOV-AY-NY-nər,NIN-er,NO-VAY-NINER,NOVENINE,Niner,naï neu,NIN-ER
38 | . (decimal point),Decimal point,,,/ˌdeɪˌsiːˈmæl/ DAY-SEE-MAL,DAY-SEE-MAL,,,POINT,dè si mal,DAY-SEE-MAL (ITU)
39 | 100,Hundred,,,/ˈhʌndrɛd/ HUN-dred,HUN-dred,,,,hun-dred,
40 | 1000,Thousand,,,/ˌtaʊˈsænd/ TOW-ZEND[19],TOU-SAND,,,,taou zend,TOU-SAND
41 | - (hyphen),Dash,,,/ˈdæʃ/ DASH,,,imo,faa,,
42 | . (full stop),Period,,,/ˈstɒp/ STOP,,,,,,STOP (ITU)
43 | 


--------------------------------------------------------------------------------
/partiallyd-scrape.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os\n",
 12 |     "import requests\n",
 13 |     "import json\n",
 14 |     "import re\n",
 15 |     "import pandas as pd\n",
 16 |     "import matplotlib.pyplot as plt\n",
 17 |     "from bs4 import BeautifulSoup\n",
 18 |     "\n",
 19 |     "base_dir = \"partiallyd-scrape\"\n",
 20 |     "if not os.path.exists(base_dir):\n",
 21 |     "    os.makedirs(base_dir)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "url = \"http://partiallyderivative.com/podcast/\"\n",
 33 |     "podcast_page = BeautifulSoup(requests.get(url).text, \"lxml\")"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "raw_urls = []\n",
 45 |     "raw_titles = []\n",
 46 |     "raw_dates = []\n",
 47 |     "\n",
 48 |     "post_list = podcast_page.find('ul', {'class':'post-list'})\n",
 49 |     "for li in post_list.findAll('li'):\n",
 50 |     "    link = li.find('a')\n",
 51 |     "    span = li.find('span')\n",
 52 |     "    raw_dates.append(span.text)\n",
 53 |     "    raw_urls.append(link['href'])\n",
 54 |     "    name = li.text[:-len(span.text)]\n",
 55 |     "    raw_titles.append(name)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "links = { }\n",
 65 |     "for raw_url in raw_urls:\n",
 66 |     "    podcast_article = BeautifulSoup(requests.get(raw_url).text, \"lxml\")\n",
 67 |     "    ol = podcast_article.find('ol')\n",
 68 |     "    links[raw_url] = []\n",
 69 |     "    if ol is None:\n",
 70 |     "        continue\n",
 71 |     "    for li in ol.findAll('li'):\n",
 72 |     "        a = li.find('a')\n",
 73 |     "        try:\n",
 74 |     "            links[raw_url].append([a.text, a['href']])\n",
 75 |     "        except:\n",
 76 |     "            print(raw_url)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "reg = re.compile('([0-9]{4})/([0-9]{2})/([0-9]{2})/([a-zA-Z0-9-_]+)')\n",
 86 |     "dates = []\n",
 87 |     "slug = []\n",
 88 |     "count = []\n",
 89 |     "\n",
 90 |     "for u in raw_urls:\n",
 91 |     "    match = reg.search(u)\n",
 92 |     "    if match:\n",
 93 |     "        day = int(match.group(3))\n",
 94 |     "        month = int(match.group(2))\n",
 95 |     "        year = int(match.group(1))\n",
 96 |     "        dt_str = \"%04d-%02d-%02d\" % (year,month,day)\n",
 97 |     "        try:\n",
 98 |     "            slug.append(match.group(4))\n",
 99 |     "            dates.append(pd.to_datetime(dt_str))\n",
100 |     "            count.append(len(links[u]))\n",
101 |     "        except:\n",
102 |     "            print(dt_str)\n",
103 |     "podcasts_df = pd.DataFrame({\n",
104 |     "    'id': slug,\n",
105 |     "    'date': dates,\n",
106 |     "    'count': count\n",
107 |     "})\n",
108 |     "\n",
109 |     "podcasts_df = podcasts_df.set_index('date')\n",
110 |     "\n",
111 |     "print(podcasts_df.head())\n",
112 |     "podcasts_df.to_csv(\"partiallyd-scrape/podcasts.csv\")"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "podcasts_df.plot(figsize=(15,4))\n",
122 |     "plt.show()"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "podcasts_w_links = podcasts_df[podcasts_df['count'] != 0]\n",
132 |     "podcasts_w_links.plot(figsize=(15,4))\n",
133 |     "plt.show()"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "podcasts_w_links.describe()"
143 |    ]
144 |   }
145 |  ],
146 |  "metadata": {
147 |   "kernelspec": {
148 |    "display_name": "Python 3",
149 |    "language": "python",
150 |    "name": "python3"
151 |   },
152 |   "language_info": {
153 |    "codemirror_mode": {
154 |     "name": "ipython",
155 |     "version": 3
156 |    },
157 |    "file_extension": ".py",
158 |    "mimetype": "text/x-python",
159 |    "name": "python",
160 |    "nbconvert_exporter": "python",
161 |    "pygments_lexer": "ipython3",
162 |    "version": "3.6.1"
163 |   }
164 |  },
165 |  "nbformat": 4,
166 |  "nbformat_minor": 2
167 | }
168 | 


--------------------------------------------------------------------------------
/youtube-captions-2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os\n",
 12 |     "from os.path import join\n",
 13 |     "import requests\n",
 14 |     "import json\n",
 15 |     "import urllib\n",
 16 |     "import pandas as pd\n",
 17 |     "from bs4 import BeautifulSoup\n",
 18 |     "from urllib.parse import urlencode\n",
 19 |     "from slugify import slugify\n",
 20 |     "import nltk\n",
 21 |     "from nltk.tokenize import word_tokenize, sent_tokenize\n",
 22 |     "from pytube import YouTube"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "song = \"\"\"Ella existio solo en un sueño \n",
 34 |     "el es un poema que el poeta \n",
 35 |     "nunca escribió \n",
 36 |     "y en la eternidad los dos \n",
 37 |     "unieron sus almas para darle vida \n",
 38 |     "\"\"\""
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "collapsed": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from nltk.corpus import stopwords\n",
 50 |     "from string import punctuation\n",
 51 |     "spanish_stopwords = set(list(punctuation)) "
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Cleaning song\n",
 61 |     "song_wo_stopwords = [word.lower() for word in word_tokenize(song) if word.lower() not in spanish_stopwords]\n",
 62 |     "print(song_wo_stopwords)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# Load words:\n",
 74 |     "candidates = {}\n",
 75 |     "\n",
 76 |     "for w in song_wo_stopwords:\n",
 77 |     "    candidates[w] = []\n",
 78 |     "    \n",
 79 |     "directory = 'youtube-captions/captions'\n",
 80 |     "for file in os.listdir(directory):\n",
 81 |     "    if file.endswith(\"json\"):\n",
 82 |     "        with open(join(directory, file), 'r') as captions_file:\n",
 83 |     "            video = json.load(captions_file)\n",
 84 |     "            captions = video['captions_parsed']\n",
 85 |     "            for caption in captions:\n",
 86 |     "                if caption['content'] == None:\n",
 87 |     "                    continue\n",
 88 |     "                tokenized = word_tokenize(caption['content'])\n",
 89 |     "                for w1 in tokenized:\n",
 90 |     "                    if w1.lower() in candidates:\n",
 91 |     "                        caption['id'] = video['id']\n",
 92 |     "                        caption['count'] = len(tokenized)\n",
 93 |     "                        candidates[w1.lower()].append(caption)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "complete_df = pd.read_csv(\"youtube-captions/complete.csv\", index_col=0,parse_dates=['published_at'])\n",
103 |     "print(complete_df.info())\n",
104 |     "complete_df.head()"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {
111 |     "collapsed": true
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "def start_to_time(s: float):\n",
116 |     "    seconds = float(s) / 60\n",
117 |     "    ceil_seconds = math.floor(seconds)\n",
118 |     "    minutes = ceil_seconds\n",
119 |     "    seconds = round((seconds - ceil_seconds) * 60)\n",
120 |     "    return str(minutes) +\"m\" + str(seconds)+ \"s\""
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {
127 |     "scrolled": false
128 |    },
129 |    "outputs": [],
130 |    "source": [
131 |     "import math\n",
132 |     "\n",
133 |     "video_url = \"http://youtube.com/watch?v=%s&t=%s\"\n",
134 |     "for candidate_key in candidates:\n",
135 |     "    print(candidate_key)\n",
136 |     "    if len(candidates[candidate_key]) == 0:\n",
137 |     "        continue\n",
138 |     "    candidates[candidate_key].sort(key=lambda x: x['count'], reverse=True)\n",
139 |     "    for c in candidates[candidate_key][:4]:\n",
140 |     "        tokenized = word_tokenize(c['content'])\n",
141 |     "        word_count = len(tokenized)\n",
142 |     "        duration, start = float(c['duration']), float(c['start'])\n",
143 |     "        word_duration = word_count / duration\n",
144 |     "        word_location = tokenized.index(candidate_key)\n",
145 |     "        tentative_word_start = (start + word_location * word_duration) - 1\n",
146 |     "        print(video_url % (c['id'], start_to_time(tentative_word_start)))\n",
147 |     "#        print(word_duration)\n",
148 |     "#        print(video_url % (c['id'], start_to_time(text['start'])))"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {
155 |     "collapsed": true
156 |    },
157 |    "outputs": [],
158 |    "source": []
159 |   }
160 |  ],
161 |  "metadata": {
162 |   "kernelspec": {
163 |    "display_name": "Python 3",
164 |    "language": "python",
165 |    "name": "python3"
166 |   },
167 |   "language_info": {
168 |    "codemirror_mode": {
169 |     "name": "ipython",
170 |     "version": 3
171 |    },
172 |    "file_extension": ".py",
173 |    "mimetype": "text/x-python",
174 |    "name": "python",
175 |    "nbconvert_exporter": "python",
176 |    "pygments_lexer": "ipython3",
177 |    "version": "3.6.1"
178 |   }
179 |  },
180 |  "nbformat": 4,
181 |  "nbformat_minor": 2
182 | }
183 | 


--------------------------------------------------------------------------------
/python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python.\n",
  8 |     "\n",
  9 |     "Como muchos de ustedes ya sabrán, Python es un lenguaje de programación, que como muy pocos seguramente saben, tomó su nombre no de una serpiente, si no de un programa de comedia británico, pero en fin. Python fue publicado en 1991 por Guido van Rossum, inicialmente fue pensado como un simple lenguaje de scripting pero en la actualidad se ha infiltrado en el desarrollo web, la ciencia de datos, machine learning y ramas afines.\n",
 10 |     "\n",
 11 |     "## Filosofía. \n",
 12 |     "\n",
 13 |     "La filosofía detrás de Python podría estar resumida en un documento que fue creado en 1999 llamado , ocho años después de su creación. Pueden consultar el documento en este enlace: pero les voy a decir algunos de estos principios que sí, suenan muy filosóficos:  \n",
 14 |     "\n",
 15 |     " - Beautiful is better than ugly\n",
 16 |     " - Explicit is better than implicit\n",
 17 |     " - Simple is better than complex\n",
 18 |     " - Readability counts\n",
 19 |     " - There should be one—and preferably only one—obvious way to do it.\n",
 20 |     " - If the implementation is hard to explain, it's a bad idea.\n",
 21 |     "\n",
 22 |     "Lo cierto es que mientras que estos principios suenan bonitos, el escribir software todavía recae en los humanos, así que estos principios no se aplican muchas veces. Y, por ejemplo, puedes encontrar que en Python es normal que encuentres más de una manera de hacer las cosas.  \n",
 23 |     "\n",
 24 |     "## Características\n",
 25 |     "\n",
 26 |     "**Es dinámicamente tipado**: Porque podemos hacer algo como esto:  "
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 1,
 32 |    "metadata": {
 33 |     "collapsed": true
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "a = 1\n",
 38 |     "b = 'C'\n",
 39 |     "c = [0.1, 0.5]"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Es decir, no es necesario especificar el tipo de dato de una variable antes de declararla. Y no existe un compilador, ni el intérprete, que esté comprobando esto antes de que el programa se esté ejecutando. \n",
 47 |     "\n",
 48 |     "También permite algo como esto:"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "a = 1\n",
 60 |     "a = 'C'\n",
 61 |     "a = [0.1, 0.5]"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "Es decir, cambiar por completo el tipo de dato de una variable sin que nadie diga nada. Y créanme, esto puede ser motivo de muchas confusiones, pero una vez que te acostumbras, puede llegar a ser una herramienta muy útil.  \n",
 69 |     "\n",
 70 |     "Sin embargo, también es considerado un lenguaje **fuertemente tipado** (cabe recalcar que puede existir esta combinación: dinámico y fuertemente tipado a la vez). Es considerado fuertemente tipado porque el lenguaje define un conjunto de reglas (de comportamientos) bajo las cuales los tipos de dato se pueden mezclar entre ellos, y romper esas reglas generará una excepción. Toma por ejemplo el siguiente código:"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": 3,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "ename": "TypeError",
 80 |      "evalue": "must be str, not int",
 81 |      "output_type": "error",
 82 |      "traceback": [
 83 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 84 |       "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
 85 |       "\u001b[1;32m<ipython-input-3-4f7add1ce1c5>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0ma3\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"a\"\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
 86 |       "\u001b[1;31mTypeError\u001b[0m: must be str, not int"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "a3 = \"a\" + 3 "
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "por increíble que parezca, esto nos generaría un error puesto que los tipos de dato int y string no definen una forma de mezclarse, si quieres concatenar las cadenas tendrías que primero convertir el entero a cadena.\n",
 99 |     "\n",
100 |     "**No existen los corchetes (o llaves)**: sino que los bloques de código se definen usando indentaciones (tabs o espacios, lo que importa es que seas consistente en el método de indentación que usas), es decir un bloque `if` se define de la siguiente manera: "
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 4,
106 |    "metadata": {},
107 |    "outputs": [
108 |     {
109 |      "name": "stdout",
110 |      "output_type": "stream",
111 |      "text": [
112 |       "b es C\n"
113 |      ]
114 |     }
115 |    ],
116 |    "source": [
117 |     "if b == 'C':\n",
118 |     "    print(\"b es C\")\n",
119 |     "elif b == 'A':\n",
120 |     "    print(\"b es A\")"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "markdown",
125 |    "metadata": {},
126 |    "source": [
127 |     "O un código un poco más elaborado se vería así:  "
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "def del_none(d):\n",
139 |     "    for key, value in list(d.items()):\n",
140 |     "        if value is None:\n",
141 |     "            del d[key]\n",
142 |     "        elif isinstance(value, str):\n",
143 |     "            d[key] = d[key].strip()\n",
144 |     "        elif isinstance(value, dict):\n",
145 |     "            del_none(value)\n",
146 |     "    return d"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": [
153 |     "Ah, seguramente lo notaste, pero Python tampoco requiere que uses un `;` para terminar cada instrucción, la idea es que exista una instrucción por cada línea.\n",
154 |     "\n",
155 |     "Como tal vez pudiste ver, es también un **lenguaje de alto nivel**: La idea es abstraer (esconder) la mayor cantidad de detalles de implementación. Es un lenguaje de alto nivel y en ocasiones es muy sencillo leer programas escritos en este lenguaje, y a mi parecer, en muchos casos como si estuvieras leyendo un libro escrito en inglés.   \n",
156 |     "\n",
157 |     "Python es también **multiparadigma**, puedes organizar tu código en clases, o utilizarlo como un lenguaje funcional, o puedes simplemente crear un programa que se ejecute proceduralmente... o una combinación de todo esto.  \n",
158 |     "\n",
159 |     "**Altamente extensible**: tiene soporte para descargar módulos o bibliotecas de repositorios de paquetes que permiten que añadirle funcionalidad a tus programas, así que es normal que cuando descargues un proyecto tengas que descargar los paquetes asociados con instrucciones como las siguientes:  \n",
160 |     "\n",
161 |     "```\n",
162 |     "pip install package-name\n",
163 |     "easy_install package-name\n",
164 |     "```  \n",
165 |     "\n",
166 |     "Cuenta con una **consola interactiva** o REPL\n",
167 |     "\n",
168 |     "Es **multiplataforma** y no está fuertemente ligado a un sistema operativo ni a un entorno de desarrollo.\n",
169 |     "  \n",
170 |     "## Desventajas  \n",
171 |     "\n",
172 |     "- Considerado **lento**\n",
173 |     "- A pesar de ser muy usado, hay áreas en las que no tiene mucho impacto, como el desarrollo para móviles   \n",
174 |     "- Consume mucha memoria y facilita la escritura de código que, aunque funciona, no está muy optimizado  \n",
175 |     "- Puede hacer que otros lenguajes sean difíciles de trabajar, uno se acostumbra muy rápido a las bondades de Python, a mi de pronto ya se me olvida poner puntos y coma en C#  \n",
176 |     "\n",
177 |     "## Razones para aprender  \n",
178 |     "\n",
179 |     " - Quieres desarrollar aplicaciones web\n",
180 |     " - Te interesa automatizar tareas repetitivas\n",
181 |     " - Quieres analizar datos\n",
182 |     " - Es entretenido\n",
183 |     " - Una herramienta más en tus habilidades  \n",
184 |     " \n",
185 |     "## IDEs  \n",
186 |     "\n",
187 |     " - [Thonny](http://thonny.org/)\n",
188 |     " - [PyCharm](https://www.jetbrains.com/pycharm/)\n",
189 |     " - [PyScripter](https://github.com/pyscripter/pyscripter)\n",
190 |     " - [Visual Studio](https://www.visualstudio.com/es/vs/python/) y [Visual Studio Code](https://code.visualstudio.com/docs/languages/python) (con plugins)\n",
191 |     " - [PyDev (Eclipse)](http://www.pydev.org/)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {
197 |     "collapsed": true
198 |    },
199 |    "source": [
200 |     "## Recursos para aprender\n",
201 |     "\n",
202 |     " - [LearnPython.org (en español)](https://www.learnpython.org/es/)\n",
203 |     " - [\"Python instantáneo\"](http://rapto.arrakis.es/AprendaPython.html)\n",
204 |     " - [Python in one pic](https://github.com/coodict/python3-in-one-pic/blob/master/notebooks/py3-in-one-pic.ipynb) and [Python in one pic (interactive)](http://coodict.github.io/python3-in-one-pic/)\n",
205 |     " - [A Byte of Python](https://python.swaroopch.com/)\n",
206 |     " - [Introduction to Programming with Python](https://mva.microsoft.com/en-US/training-courses/introduction-to-programming-with-python-8360)\n",
207 |     " - [Automate the Boring Stuff with Python](http://automatetheboringstuff.com/)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {
214 |     "collapsed": true
215 |    },
216 |    "outputs": [],
217 |    "source": []
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.6.2"
237 |   }
238 |  },
239 |  "nbformat": 4,
240 |  "nbformat_minor": 2
241 | }
242 | 


--------------------------------------------------------------------------------
/casey-neistat-analisys.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import matplotlib.pyplot as plt\n",
 12 |     "import pandas as pd\n",
 13 |     "import requests\n",
 14 |     "import json\n",
 15 |     "import urllib\n",
 16 |     "import isodate\n",
 17 |     "import os\n",
 18 |     "from bs4 import BeautifulSoup\n",
 19 |     "from urllib.parse import urlencode\n",
 20 |     "from slugify import slugify\n",
 21 |     "from pytube import YouTube\n",
 22 |     "\n",
 23 |     "if not os.path.exists(\"casey-neistat-analisys\"):\n",
 24 |     "    os.makedirs(\"casey-neistat-analisys\")"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "api_key = \"\" # Place your YT api key here\n",
 34 |     "assert api_key != \"\"\n",
 35 |     "channel_id = 'UCtinbF-Q-fVthA0qrFQTgXQ'\n",
 36 |     "\n",
 37 |     "playlists_parameters = {\n",
 38 |     "    'part': 'contentDetails',\n",
 39 |     "    'id': channel_id,\n",
 40 |     "    'key': api_key\n",
 41 |     "}\n",
 42 |     "\n",
 43 |     "categories_parameters = {\n",
 44 |     "    'part': 'snippet',\n",
 45 |     "    'regionCode': 'US',\n",
 46 |     "    'key': api_key\n",
 47 |     "}\n",
 48 |     "\n",
 49 |     "parameters = {\n",
 50 |     "    'key': api_key,\n",
 51 |     "    'part': 'snippet',\n",
 52 |     "    'type': 'video',\n",
 53 |     "    'channelId': channel_id,\n",
 54 |     "    'maxResults': 50,\n",
 55 |     "    'order': 'date'\n",
 56 |     "}\n",
 57 |     "max_pages = 100\n",
 58 |     "query_string = urlencode(parameters)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "get_categories_url = \"https://www.googleapis.com/youtube/v3/videoCategories?\" + urlencode(categories_parameters)\n",
 70 |     "r = requests.get(get_categories_url)\n",
 71 |     "result = json.loads(r.text)\n",
 72 |     "categoryId = []\n",
 73 |     "categoryNames = []\n",
 74 |     "for category in result['items']:\n",
 75 |     "    categoryId.append(int(category['id']))\n",
 76 |     "    categoryNames.append(category['snippet']['title'])\n",
 77 |     "categories_df = pd.DataFrame({'category': categoryId, 'name': categoryNames})\n",
 78 |     "categories_df.head()\n",
 79 |     "categories_df.to_csv(\"casey-neistat-analisys/categories_US.csv\", encoding='utf-8')"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "get_playlists_url = \"https://www.googleapis.com/youtube/v3/channels?\" + urlencode(playlists_parameters)\n",
 89 |     "r = requests.get(get_playlists_url)\n",
 90 |     "result = json.loads(r.text)\n",
 91 |     "\n",
 92 |     "playlist_id = result['items'][0]['contentDetails']['relatedPlaylists']['uploads']\n",
 93 |     "print(playlist_id)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "count = 0\n",
103 |     "videos = []\n",
104 |     "search_url = \"https://www.googleapis.com/youtube/v3/playlistItems?\"\n",
105 |     "parameters['playlistId'] = playlist_id\n",
106 |     "query_string = urlencode(parameters)\n",
107 |     "pages = max_pages\n",
108 |     "page_token = 'FIRST TIME!'\n",
109 |     "while pages > 0 and len(page_token) > 0:\n",
110 |     "    qurl = search_url + query_string\n",
111 |     "    r = requests.get(search_url + query_string)\n",
112 |     "    result = json.loads(r.text)\n",
113 |     "    try:\n",
114 |     "        page_token = result[\"nextPageToken\"]\n",
115 |     "    except:\n",
116 |     "        page_token = ''\n",
117 |     "    parameters['pageToken'] = page_token\n",
118 |     "    pages = pages - 1\n",
119 |     "    videos.extend(result['items'])\n",
120 |     "    count += len(result['items'])\n",
121 |     "    query_string = urlencode(parameters)\n",
122 |     "print(\"Done, found\", count)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "# Conversion to dataframes\n",
132 |     "ids = []\n",
133 |     "pub = []\n",
134 |     "titles = []\n",
135 |     "for v in videos:\n",
136 |     "    videoId = v['snippet']['resourceId']['videoId']\n",
137 |     "    #print(json.dumps(v))\n",
138 |     "    publishedDate = v['snippet']['publishedAt']\n",
139 |     "    title =  v['snippet']['title']\n",
140 |     "    ids.append(videoId)\n",
141 |     "    pub.append(publishedDate)\n",
142 |     "    titles.append(title)\n",
143 |     "initial_df = pd.DataFrame({\n",
144 |     "    'id': ids,\n",
145 |     "    'published_at': pub,\n",
146 |     "    'title': titles\n",
147 |     "})\n",
148 |     "initial_df['published_at'] = pd.to_datetime(initial_df['published_at'])\n",
149 |     "initial_df.to_csv(\"casey-neistat-analisys/casey_initial.csv\", encoding='utf-8')\n",
150 |     "print(initial_df.info())"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "ids = list(initial_df['id'].values)\n",
160 |     "categories = []\n",
161 |     "default_language = []\n",
162 |     "durations = []\n",
163 |     "license = []\n",
164 |     "viewCounts = []\n",
165 |     "likeCounts = []\n",
166 |     "dislikeCounts = []\n",
167 |     "favoriteCounts = []\n",
168 |     "commentCounts = []\n",
169 |     "a=True\n",
170 |     "batch_size = 50\n",
171 |     "i = 0\n",
172 |     "video_details = \"https://www.googleapis.com/youtube/v3/videos?id=%s&part=snippet,statistics,contentDetails&key=%s\" \n",
173 |     "while i < len(ids):\n",
174 |     "    ids_to_query = ','.join(ids[i:i+batch_size])\n",
175 |     "    q = video_details % (ids_to_query, api_key)\n",
176 |     "    r = requests.get(q)\n",
177 |     "    resultlist = json.loads(r.text)\n",
178 |     "    for result in resultlist['items']:\n",
179 |     "        snippet = result['snippet']\n",
180 |     "        contentDetails = result['contentDetails']\n",
181 |     "        statistics = result['statistics']\n",
182 |     "\n",
183 |     "        categories.append(snippet['categoryId'])\n",
184 |     "        if 'defaultAudioLanguage' in snippet:\n",
185 |     "            default_language.append(snippet['defaultAudioLanguage'])\n",
186 |     "        else:\n",
187 |     "            default_language.append('-')\n",
188 |     "        durations.append(contentDetails['duration'])\n",
189 |     "        license.append(contentDetails['licensedContent'])\n",
190 |     "        viewCounts.append(statistics['viewCount'])\n",
191 |     "        favoriteCounts.append(statistics['favoriteCount'])\n",
192 |     "        likeCount = -1\n",
193 |     "        dislikeCount = -1\n",
194 |     "        commentCount = -1\n",
195 |     "        if 'likeCount' in statistics:\n",
196 |     "            likeCount = int(statistics['likeCount'])\n",
197 |     "            dislikeCount = int(statistics['dislikeCount'])\n",
198 |     "        if 'commentCount' in statistics:\n",
199 |     "            commentCount = int(statistics['commentCount'])\n",
200 |     "        likeCounts.append(likeCount)\n",
201 |     "        dislikeCounts.append(dislikeCount)\n",
202 |     "        commentCounts.append(commentCount)\n",
203 |     "    \n",
204 |     "    i += batch_size\n",
205 |     "\n",
206 |     "details_df = pd.DataFrame({\n",
207 |     "    'id': ids,\n",
208 |     "    'category':categories,\n",
209 |     "    'language': default_language,\n",
210 |     "    'duration': durations,\n",
211 |     "    'license': license,\n",
212 |     "    'views': viewCounts,\n",
213 |     "    'likes': likeCounts,\n",
214 |     "    'dislikes': dislikeCounts,\n",
215 |     "    'favs': favoriteCounts,\n",
216 |     "    'comments': commentCounts\n",
217 |     "})\n",
218 |     "\n",
219 |     "details_df.to_csv(\"casey-neistat-analisys/casey_detailed.csv\", encoding='utf-8')\n",
220 |     "print(details_df.info())"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "initial_df = pd.read_csv(\"casey-neistat-analisys/casey_initial.csv\", index_col=0, \n",
230 |     "                         parse_dates=['published_at'], na_values=[-1, ''])\n",
231 |     "details_df = pd.read_csv(\"casey-neistat-analisys/casey_detailed.csv\", index_col=0, na_values=[-1, ''])\n",
232 |     "\n",
233 |     "\n",
234 |     "initial_df = initial_df.drop_duplicates()\n",
235 |     "details_df = details_df.drop_duplicates()\n",
236 |     "details_df.duration = details_df.duration.apply(lambda iso: isodate.parse_duration(iso).total_seconds())\n",
237 |     "\n",
238 |     "complete_df = pd.merge(left=initial_df, right=details_df, on='id')\n",
239 |     "complete_df.fillna(-1)\n",
240 |     "complete_df.set_index('published_at', inplace=True)\n",
241 |     "\n",
242 |     "print(complete_df.tail())\n",
243 |     "complete_df.to_csv(\"casey-neistat-analisys/casey_complete.csv\", encoding='utf-8')"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {},
250 |    "outputs": [],
251 |    "source": [
252 |     "complete_df = pd.read_csv(\"casey-neistat-analisys/casey_complete.csv\", parse_dates=['published_at'], index_col=0)\n",
253 |     "complete_df = complete_df.tz_localize('UTC').tz_convert('US/Pacific')\n",
254 |     "complete_df.head(10)"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {
261 |     "scrolled": false
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "# When he didnt uploaded a vlog:\n",
266 |     "vlog_start,vlog_end = '2015-03-24', '2016-11-19'\n",
267 |     "daily_vlog_count = complete_df.loc[vlog_start:vlog_end,['views']].resample('D').count()\n",
268 |     "daily_vlog_count.columns = ['videos']\n",
269 |     "print(daily_vlog_count[daily_vlog_count['videos'] == 0])\n",
270 |     "print(daily_vlog_count['videos']['2015'])"
271 |    ]
272 |   }
273 |  ],
274 |  "metadata": {
275 |   "kernelspec": {
276 |    "display_name": "Python 3",
277 |    "language": "python",
278 |    "name": "python3"
279 |   },
280 |   "language_info": {
281 |    "codemirror_mode": {
282 |     "name": "ipython",
283 |     "version": 3
284 |    },
285 |    "file_extension": ".py",
286 |    "mimetype": "text/x-python",
287 |    "name": "python",
288 |    "nbconvert_exporter": "python",
289 |    "pygments_lexer": "ipython3",
290 |    "version": "3.6.1"
291 |   }
292 |  },
293 |  "nbformat": 4,
294 |  "nbformat_minor": 2
295 | }
296 | 


--------------------------------------------------------------------------------
/tloz-scrape.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 76,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import os\n",
 12 |     "import requests\n",
 13 |     "import json\n",
 14 |     "import re\n",
 15 |     "import pandas as pd\n",
 16 |     "import matplotlib.pyplot as plt\n",
 17 |     "from os.path import join\n",
 18 |     "from slugify import slugify\n",
 19 |     "from bs4 import BeautifulSoup\n",
 20 |     "from bs4.element import NavigableString\n",
 21 |     "\n",
 22 |     "base_dir = \"tloz-scrape\"\n",
 23 |     "if not os.path.exists(base_dir):\n",
 24 |     "    os.makedirs(base_dir)"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 79,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "base_url = \"http://zelda.wikia.com\"\n",
 34 |     "characters = base_url + \"/wiki/The_Legend_of_Zelda_recurring_characters\"\n",
 35 |     "recurring_characters = BeautifulSoup(requests.get(characters).text, \"lxml\")"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 80,
 41 |    "metadata": {
 42 |     "scrolled": false
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "mw_content_text = recurring_characters.find('div', {\"id\":\"mw-content-text\"})\n",
 47 |     "\n",
 48 |     "attributes = set()\n",
 49 |     "characters = []\n",
 50 |     "\n",
 51 |     "for character_li in mw_content_text.findAll('li'):\n",
 52 |     "    a = character_li.find('a')\n",
 53 |     "    character_page = BeautifulSoup(requests.get(base_url + a['href']).text, \"lxml\")\n",
 54 |     "    aside = character_page.find('aside')\n",
 55 |     "    if aside is None:\n",
 56 |     "        continue\n",
 57 |     "    pi_datas = aside.findAll('div', {'class':'pi-item'}, recursive=False)\n",
 58 |     "    character = {\n",
 59 |     "        'name': a.text.strip()\n",
 60 |     "    }\n",
 61 |     "    for pi_data in pi_datas:\n",
 62 |     "        if isinstance(pi_data, NavigableString):\n",
 63 |     "            continue\n",
 64 |     "        label = pi_data.find('h3').text.strip()\n",
 65 |     "        value = pi_data.find('div', {\"class\":\"pi-data-value\"})\n",
 66 |     "        attributes.add(label)\n",
 67 |     "        if \"Appears in\" == label or \\\n",
 68 |     "           \"Appereance(s)\" == label:\n",
 69 |     "            i = value.findAll('i')\n",
 70 |     "            character[slugify(label, separator=\"_\")] = [ap.text.strip() for ap in i]\n",
 71 |     "        elif \"Title(s)\" == label or \\\n",
 72 |     "            \"Kindred\" == label or \\\n",
 73 |     "            \"Attack method\"== label or \\\n",
 74 |     "            \"Effective weapon(s)\" == label or \\\n",
 75 |     "            \"Spoils\" == label or \\\n",
 76 |     "            \"Alternate form(s)\" == label or \\\n",
 77 |     "            \"Alternate form of\" == label:\n",
 78 |     "            character[slugify(label, separator=\"_\")] = str(value)\n",
 79 |     "        elif \"Homeland\" == label or \\\n",
 80 |     "            \"Race\" == label or \\\n",
 81 |     "            \"Hometown\" == label or \\\n",
 82 |     "            \"Location(s)\" == label or \\\n",
 83 |     "            \"Affiliation(s)\" == label:\n",
 84 |     "            character[slugify(label, separator=\"_\")] = []\n",
 85 |     "            game_name = ''\n",
 86 |     "            for element in value.descendants:\n",
 87 |     "                if element.name == \"u\":\n",
 88 |     "                    game_name = element.text.strip()\n",
 89 |     "                elif element.name == \"a\":\n",
 90 |     "                    if game_name == '': # Solo tiene una aparición\n",
 91 |     "                        game_name = 'ORIGINAL'\n",
 92 |     "                    character[slugify(label, separator=\"_\")].append({'game': game_name,\n",
 93 |     "                                                                     'value': element.text.strip()})\n",
 94 |     "        else:\n",
 95 |     "            character[slugify(label, separator=\"_\")] = value.text.strip()\n",
 96 |     "    characters.append(character)\n",
 97 |     "    \n",
 98 |     "with open(join(base_dir,'data.txt'), 'w') as outfile:\n",
 99 |     "    json.dump(characters, outfile, indent=4)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 71,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stdout",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "Different attributes: ['kindred', 'homeland', 'affiliation_s', 'attack_method', 'effective_weapon_s', 'location_s', 'appearances', 'hometown', 'race', 'age', 'spoils', 'gender', 'alternate_form_s', 'first_appearance', 'appears_in', 'title_s', 'alternate_form_of']\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "with open(join(base_dir,'data.txt')) as json_data:\n",
117 |     "    characters = json.load(json_data)\n",
118 |     "\n",
119 |     "attributes = set()\n",
120 |     "for c in characters:\n",
121 |     "    attributes.update(c.keys())\n",
122 |     "attributes.remove('name')\n",
123 |     "attributes = list(attributes)\n",
124 |     "\n",
125 |     "print(\"Different attributes:\", attributes)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 72,
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "fa_regex = re.compile('([\\w\\s\\'&]+)\\(([0-9]{4})\\)')\n",
137 |     "different_games = set()\n",
138 |     "name, first_game, first_year, gender = [], [], [], []\n",
139 |     "\n",
140 |     "for c in characters:\n",
141 |     "    match = fa_regex.search(c['first_appearance'])\n",
142 |     "    if match:\n",
143 |     "        different_games.add(match.group(1).strip())\n",
144 |     "        \n",
145 |     "        first_game.append(match.group(1).strip())\n",
146 |     "        first_year.append(int(match.group(2)))\n",
147 |     "        name.append(c['name'])\n",
148 |     "        \n",
149 |     "        if 'gender' in c:\n",
150 |     "            gender.append(c['gender'])\n",
151 |     "        else:\n",
152 |     "            gender.append('')\n",
153 |     "        \n",
154 |     "characters_initial_df = pd.DataFrame({'name': name,\n",
155 |     "                                      'gender': gender,\n",
156 |     "                                      'first_game': first_game,\n",
157 |     "                                      'first_year': first_year\n",
158 |     "                             })"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 73,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/html": [
169 |        "<div>\n",
170 |        "<style>\n",
171 |        "    .dataframe thead tr:only-child th {\n",
172 |        "        text-align: right;\n",
173 |        "    }\n",
174 |        "\n",
175 |        "    .dataframe thead th {\n",
176 |        "        text-align: left;\n",
177 |        "    }\n",
178 |        "\n",
179 |        "    .dataframe tbody tr th {\n",
180 |        "        vertical-align: top;\n",
181 |        "    }\n",
182 |        "</style>\n",
183 |        "<table border=\"1\" class=\"dataframe\">\n",
184 |        "  <thead>\n",
185 |        "    <tr style=\"text-align: right;\">\n",
186 |        "      <th></th>\n",
187 |        "      <th>first_game</th>\n",
188 |        "      <th>first_year</th>\n",
189 |        "      <th>gender</th>\n",
190 |        "      <th>name</th>\n",
191 |        "    </tr>\n",
192 |        "  </thead>\n",
193 |        "  <tbody>\n",
194 |        "    <tr>\n",
195 |        "      <th>0</th>\n",
196 |        "      <td>Ocarina of Time</td>\n",
197 |        "      <td>1998</td>\n",
198 |        "      <td>Female</td>\n",
199 |        "      <td>Anju</td>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>1</th>\n",
203 |        "      <td>The Wind Waker</td>\n",
204 |        "      <td>2003</td>\n",
205 |        "      <td>Male</td>\n",
206 |        "      <td>Ankle</td>\n",
207 |        "    </tr>\n",
208 |        "    <tr>\n",
209 |        "      <th>2</th>\n",
210 |        "      <td>Ocarina of Time</td>\n",
211 |        "      <td>1998</td>\n",
212 |        "      <td>Female</td>\n",
213 |        "      <td>Aveil</td>\n",
214 |        "    </tr>\n",
215 |        "    <tr>\n",
216 |        "      <th>3</th>\n",
217 |        "      <td>Ocarina of Time</td>\n",
218 |        "      <td>1998</td>\n",
219 |        "      <td>Male</td>\n",
220 |        "      <td>Bean Seller</td>\n",
221 |        "    </tr>\n",
222 |        "    <tr>\n",
223 |        "      <th>4</th>\n",
224 |        "      <td>The Wind Waker</td>\n",
225 |        "      <td>2003</td>\n",
226 |        "      <td>Male</td>\n",
227 |        "      <td>Beedle</td>\n",
228 |        "    </tr>\n",
229 |        "  </tbody>\n",
230 |        "</table>\n",
231 |        "</div>"
232 |       ],
233 |       "text/plain": [
234 |        "        first_game  first_year  gender         name\n",
235 |        "0  Ocarina of Time        1998  Female         Anju\n",
236 |        "1   The Wind Waker        2003    Male        Ankle\n",
237 |        "2  Ocarina of Time        1998  Female        Aveil\n",
238 |        "3  Ocarina of Time        1998    Male  Bean Seller\n",
239 |        "4   The Wind Waker        2003    Male       Beedle"
240 |       ]
241 |      },
242 |      "execution_count": 73,
243 |      "metadata": {},
244 |      "output_type": "execute_result"
245 |     }
246 |    ],
247 |    "source": [
248 |     "characters_initial_df.to_csv(join(base_dir,'characters_initial.csv'))\n",
249 |     "characters_initial_df.head()"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 74,
255 |    "metadata": {
256 |     "collapsed": true,
257 |     "scrolled": false
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "game_regex = re.compile('^\\(([0-9a-zA-Z/\\'\\s&]+)\\)')\n",
262 |     "\n",
263 |     "attributes = ['kindred', 'title_s']\n",
264 |     "\n",
265 |     "for i in range(len(characters)):\n",
266 |     "    for attr in attributes:\n",
267 |     "        if attr in characters[i]:\n",
268 |     "            kindred = characters[i][attr][len('<div class=\"pi-data-value pi-font\">'):-len(\"</div>\")]\n",
269 |     "            kindred_list = []\n",
270 |     "            game = 'ORIGINAL'\n",
271 |     "            for kind in kindred.split(\"<br/>\"):\n",
272 |     "                soup_content = BeautifulSoup(kind, \"html.parser\").text.strip()\n",
273 |     "                match = game_regex.search(soup_content)\n",
274 |     "                if match:\n",
275 |     "                    game = match.group(1).strip()\n",
276 |     "                else:\n",
277 |     "                    kindred_list.append({'game':game, 'value':soup_content})\n",
278 |     "            characters[i][attr] = kindred_list\n",
279 |     "\n",
280 |     "for i in range(len(characters)):       \n",
281 |     "    \n",
282 |     "with open(join(base_dir,'data1.txt'), 'w') as outfile:\n",
283 |     "    json.dump(characters, outfile, indent=4)"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {
290 |     "collapsed": true
291 |    },
292 |    "outputs": [],
293 |    "source": []
294 |   }
295 |  ],
296 |  "metadata": {
297 |   "kernelspec": {
298 |    "display_name": "Python 3",
299 |    "language": "python",
300 |    "name": "python3"
301 |   },
302 |   "language_info": {
303 |    "codemirror_mode": {
304 |     "name": "ipython",
305 |     "version": 3
306 |    },
307 |    "file_extension": ".py",
308 |    "mimetype": "text/x-python",
309 |    "name": "python",
310 |    "nbconvert_exporter": "python",
311 |    "pygments_lexer": "ipython3",
312 |    "version": "3.6.1"
313 |   }
314 |  },
315 |  "nbformat": 4,
316 |  "nbformat_minor": 2
317 | }
318 | 


--------------------------------------------------------------------------------
/youtube-captions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Before coding\n",
  8 |     "\n",
  9 |     "Create a new project\n",
 10 |     "\n",
 11 |     "https://console.developers.google.com/projectcreate\n",
 12 |     "\n",
 13 |     "Once you have created the project, enable access to the YouTube Data API\n",
 14 |     "\n",
 15 |     "https://console.developers.google.com/apis/library\n",
 16 |     "\n",
 17 |     "Once enabled, it is important that you get credentials for your project\n",
 18 |     "\n",
 19 |     "https://console.developers.google.com/apis/credentials/wizard?api=youtube.googleapis.com\n",
 20 |     "\n",
 21 |     "From the options select:  \n",
 22 |     "\n",
 23 |     "| Option  | Value |\n",
 24 |     "| ------------- | ------------- |\n",
 25 |     "| ¿Qué API estás usando?  | **YouTube Data API v3**  |\n",
 26 |     "| ¿Desde dónde llamarás a la API? | **Servidor Web**  |\n",
 27 |     "| ¿A qué tipo de datos accederás? | **Datos públicos**  |  \n",
 28 |     "\n",
 29 |     "Having selected such values, press: **\"¿Qué credenciales necesito?\"**  and you will be given an alphanumeric string that is your API key, place this value into the `api_key` variable:"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {
 36 |     "collapsed": true
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "api_key = \"\" # Place your YT api key here\n",
 41 |     "assert api_key != \"\""
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "## Now, coding\n",
 49 |     "\n",
 50 |     "Import the necessary packages"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "import requests\n",
 62 |     "import json\n",
 63 |     "import urllib\n",
 64 |     "import isodate\n",
 65 |     "import os\n",
 66 |     "import pandas as pd\n",
 67 |     "from bs4 import BeautifulSoup\n",
 68 |     "from urllib.parse import urlencode\n",
 69 |     "from slugify import slugify\n",
 70 |     "from pytube import YouTube"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "search_url = \"https://www.googleapis.com/youtube/v3/search?\"\n",
 82 |     "caption_url = \"https://www.youtube.com/api/timedtext?\""
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "language_preferences = ['es-MX','es']\n",
 94 |     "channels = {\n",
 95 |     "    'h3h3Productions' : 'UCDWIvJwLJsE4LG1Atne2blQ',\n",
 96 |     "}\n",
 97 |     "starting_channel = 'UCDWIvJwLJsE4LG1Atne2blQ'"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": true
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "parameters = {\n",
109 |     "    'key': api_key,\n",
110 |     "    'part': 'snippet',\n",
111 |     "    'type': 'video',\n",
112 |     "    'channelId': starting_channel,\n",
113 |     "    'maxResults': 50,\n",
114 |     "    'order': 'date'\n",
115 |     "}\n",
116 |     "max_pages = 15\n",
117 |     "query_string = urlencode(parameters)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "videos = {}\n",
127 |     "count = 0\n",
128 |     "for channel in channels:\n",
129 |     "    print(\"Searching for\", channel)\n",
130 |     "    parameters['channelId'] = channels[channel]\n",
131 |     "    videos[channel] = []\n",
132 |     "    if 'pageToken' in parameters:\n",
133 |     "        del(parameters['pageToken'])\n",
134 |     "        query_string = urlencode(parameters)\n",
135 |     "    pages = max_pages\n",
136 |     "    page_token = 'FIRST TIME!'\n",
137 |     "    while pages > 0 and len(page_token) > 0:\n",
138 |     "        qurl = search_url + query_string\n",
139 |     "        print(qurl)\n",
140 |     "        r = requests.get(search_url + query_string)\n",
141 |     "        result = json.loads(r.text)\n",
142 |     "        try:\n",
143 |     "            page_token = result[\"nextPageToken\"]\n",
144 |     "        except:\n",
145 |     "            page_token = ''\n",
146 |     "        parameters['pageToken'] = page_token\n",
147 |     "        pages = pages - 1\n",
148 |     "        print(len(result['items']), page_token)\n",
149 |     "        videos[channel].extend(result['items'])\n",
150 |     "        count += len(result['items'])\n",
151 |     "        query_string = urlencode(parameters)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "# Conversion to dataframes\n",
161 |     "chn = []\n",
162 |     "ids = []\n",
163 |     "pub = []\n",
164 |     "titles = []\n",
165 |     "for c in channels:\n",
166 |     "    for v in videos[c]:\n",
167 |     "        videoId = v['id']['videoId']\n",
168 |     "        publishedDate = v['snippet']['publishedAt']\n",
169 |     "        title =  v['snippet']['title']\n",
170 |     "        chn.append(slugify(c))\n",
171 |     "        ids.append(videoId)\n",
172 |     "        pub.append(publishedDate)\n",
173 |     "        titles.append(title)\n",
174 |     "initial_df = pd.DataFrame({\n",
175 |     "    'channel':chn,\n",
176 |     "    'id': ids,\n",
177 |     "    'published_at': pub,\n",
178 |     "    'title': titles\n",
179 |     "})\n",
180 |     "initial_df['published_at'] = pd.to_datetime(initial_df['published_at'])\n",
181 |     "initial_df.to_csv(\"youtube-captions/h3h3_initial.csv\", encoding='utf-8')\n",
182 |     "print(initial_df.info())"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "ids = list(initial_df['id'].values)\n",
192 |     "categories = []\n",
193 |     "default_language = []\n",
194 |     "durations = []\n",
195 |     "license = []\n",
196 |     "viewCounts = []\n",
197 |     "likeCounts = []\n",
198 |     "dislikeCounts = []\n",
199 |     "favoriteCounts = []\n",
200 |     "commentCounts = []\n",
201 |     "\n",
202 |     "batch_size = 50\n",
203 |     "i = 0\n",
204 |     "video_details = \"https://www.googleapis.com/youtube/v3/videos?id=%s&part=snippet,statistics,contentDetails&key=%s\" \n",
205 |     "while i < len(ids):\n",
206 |     "    ids_to_query = ','.join(ids[i:i+batch_size])\n",
207 |     "    q = video_details % (ids_to_query, api_key)\n",
208 |     "    r = requests.get(q)\n",
209 |     "    resultlist = json.loads(r.text)\n",
210 |     "    for result in resultlist['items']:\n",
211 |     "        snippet = result['snippet']\n",
212 |     "        contentDetails = result['contentDetails']\n",
213 |     "        statistics = result['statistics']\n",
214 |     "\n",
215 |     "        categories.append(snippet['categoryId'])\n",
216 |     "        if 'defaultAudioLanguage' in snippet:\n",
217 |     "            default_language.append(snippet['defaultAudioLanguage'])\n",
218 |     "        else:\n",
219 |     "            default_language.append('-')\n",
220 |     "        durations.append(contentDetails['duration'])\n",
221 |     "        license.append(contentDetails['licensedContent'])\n",
222 |     "        viewCounts.append(statistics['viewCount'])\n",
223 |     "        favoriteCounts.append(statistics['favoriteCount'])\n",
224 |     "        likeCount = -1\n",
225 |     "        dislikeCount = -1\n",
226 |     "        commentCount = -1\n",
227 |     "        if 'likeCount' in statistics:\n",
228 |     "            likeCount = int(statistics['likeCount'])\n",
229 |     "            dislikeCount = int(statistics['dislikeCount'])\n",
230 |     "        if 'commentCount' in statistics:\n",
231 |     "            commentCount = int(statistics['commentCount'])\n",
232 |     "        likeCounts.append(likeCount)\n",
233 |     "        dislikeCounts.append(dislikeCount)\n",
234 |     "        commentCounts.append(commentCount)\n",
235 |     "    \n",
236 |     "    i += batch_size\n",
237 |     "\n",
238 |     "details_df = pd.DataFrame({\n",
239 |     "    'id': ids,\n",
240 |     "    'category':categories,\n",
241 |     "    'language': default_language,\n",
242 |     "    'duration': durations,\n",
243 |     "    'license': license,\n",
244 |     "    'views': viewCounts,\n",
245 |     "    'likes': likeCounts,\n",
246 |     "    'dislikes': dislikeCounts,\n",
247 |     "    'favs': favoriteCounts,\n",
248 |     "    'comments': commentCounts\n",
249 |     "})\n",
250 |     "\n",
251 |     "details_df.to_csv(\"youtube-captions/h3h3_details.csv\", encoding='utf-8')\n",
252 |     "print(details_df.info())"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": [
261 |     "initial_df = pd.read_csv(\"youtube-captions/h3h3_initial.csv\", index_col=0, parse_dates=['published_at'], na_values=[-1, ''])\n",
262 |     "details_df = pd.read_csv(\"youtube-captions/h3h3_details.csv\", index_col=0, na_values=[-1, ''])\n",
263 |     "\n",
264 |     "\n",
265 |     "initial_df = initial_df.drop_duplicates()\n",
266 |     "details_df = details_df.drop_duplicates()\n",
267 |     "details_df.duration = details_df.duration.apply(lambda iso: isodate.parse_duration(iso).total_seconds())\n",
268 |     "\n",
269 |     "#print(initial_df.info())\n",
270 |     "#print(details_df.info())\n",
271 |     "\n",
272 |     "complete_df = pd.merge(left=initial_df, right=details_df, on='id')\n",
273 |     "complete_df.fillna(-1)\n",
274 |     "complete_df.set_index('id', inplace=True)\n",
275 |     "\n",
276 |     "print(complete_df.describe())\n",
277 |     "complete_df.to_csv(\"youtube-captions/h3h3_complete.csv\", encoding='utf-8')"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": null,
283 |    "metadata": {
284 |     "collapsed": true
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "vids_subs = []\n",
289 |     "errors = []\n",
290 |     "for index, row in complete_df.iterrows():\n",
291 |     "    videoId = index\n",
292 |     "    title = row['title']\n",
293 |     "    subtitles = ''\n",
294 |     "    i = 0\n",
295 |     "    try:\n",
296 |     "        yt = YouTube('https://www.youtube.com/watch?v=' + videoId)\n",
297 |     "        while len(subtitles) == 0 and i < len(language_preferences):\n",
298 |     "            lang = language_preferences[i]\n",
299 |     "            if yt.captions.get_by_language_code(lang) is not None:\n",
300 |     "                subtitles = yt.captions.get_by_language_code(lang).xml_captions\n",
301 |     "            i = i + 1\n",
302 |     "    except:\n",
303 |     "        print(\"Error\", videoId)\n",
304 |     "        errors.append(videoId)\n",
305 |     "    if len(subtitles) > 0:\n",
306 |     "        vids_subs.append({'id': videoId, 'title': title, 'captions': subtitles })\n",
307 |     "print(\"Done!\")"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {
314 |     "collapsed": true
315 |    },
316 |    "outputs": [],
317 |    "source": [
318 |     "print(language_preferences)\n",
319 |     "## Getting subs & cleaning them\n",
320 |     "for subs in vids_subs:\n",
321 |     "    soup = BeautifulSoup(subs['captions'], \"lxml\")\n",
322 |     "    texts = soup.find_all('text')\n",
323 |     "    sub_entries = []\n",
324 |     "    for text in texts:\n",
325 |     "        sub_entry = {\n",
326 |     "            'duration': text.get('dur'),\n",
327 |     "            'start': text.get('start'),\n",
328 |     "            'content': BeautifulSoup(text.get_text(), \"lxml\").text\n",
329 |     "        }\n",
330 |     "        sub_entries.append(sub_entry)\n",
331 |     "    del(subs['captions'])\n",
332 |     "    subs['captions_parsed'] = sub_entries"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {
339 |     "collapsed": true
340 |    },
341 |    "outputs": [],
342 |    "source": [
343 |     "from os.path import join\n",
344 |     "## Now saving the good stuff\n",
345 |     "directory = \"youtube-captions\\\\captions\"\n",
346 |     "if not os.path.exists(directory):\n",
347 |     "    os.makedirs(directory)\n",
348 |     "print(\"Saving to\", directory)\n",
349 |     "for vid in vids_subs:\n",
350 |     "    file_path = join(directory, slugify(vid['title']) + '.json')\n",
351 |     "    with open(file_path, 'w') as outfile:\n",
352 |     "        json.dump(vid, outfile, indent=4)\n",
353 |     "print(\"Done!\")"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {
360 |     "collapsed": true
361 |    },
362 |    "outputs": [],
363 |    "source": [
364 |     "cleaning_subs = \"\"\"directories = ['amlo', 'presidencia', 'epn']\n",
365 |     "for d in directories:\n",
366 |     "    directory = join('youtube-captions', d)\n",
367 |     "    for file in os.listdir(directory):\n",
368 |     "        if file.endswith(\"json\"):\n",
369 |     "            video = None\n",
370 |     "            file1 = join(directory, file)\n",
371 |     "            with open(file1, 'r') as captions_file:\n",
372 |     "                video = json.load(captions_file)\n",
373 |     "                captions = video['captions_parsed']\n",
374 |     "                for cap in captions:\n",
375 |     "                    try:\n",
376 |     "                        cap['content'] = BeautifulSoup(cap['content'], \"lxml\").get_text()\n",
377 |     "                    except:\n",
378 |     "                        print(\"Error\", file1)\n",
379 |     "            with open(join(directory, file), 'w') as captions_file:\n",
380 |     "                json.dump(video, captions_file)\n",
381 |     "print(\"Done!\")\n",
382 |     "\"\"\""
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "metadata": {
389 |     "collapsed": true
390 |    },
391 |    "outputs": [],
392 |    "source": []
393 |   }
394 |  ],
395 |  "metadata": {
396 |   "kernelspec": {
397 |    "display_name": "Python 3",
398 |    "language": "python",
399 |    "name": "python3"
400 |   },
401 |   "language_info": {
402 |    "codemirror_mode": {
403 |     "name": "ipython",
404 |     "version": 3
405 |    },
406 |    "file_extension": ".py",
407 |    "mimetype": "text/x-python",
408 |    "name": "python",
409 |    "nbconvert_exporter": "python",
410 |    "pygments_lexer": "ipython3",
411 |    "version": "3.6.1"
412 |   }
413 |  },
414 |  "nbformat": 4,
415 |  "nbformat_minor": 2
416 | }
417 | 


--------------------------------------------------------------------------------
/bokeh/x.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html>
 3 | <html lang="en">
 4 |     <head>
 5 |         <meta charset="utf-8">
 6 |         <title>Bokeh Plot</title>
 7 |         
 8 | <link rel="stylesheet" href="https://cdn.pydata.org/bokeh/release/bokeh-0.12.7.min.css" type="text/css" />
 9 |         
10 | <script type="text/javascript" src="https://cdn.pydata.org/bokeh/release/bokeh-0.12.7.min.js"></script>
11 | <script type="text/javascript">
12 |     Bokeh.set_log_level("info");
13 | </script>
14 |         <style>
15 |           html {
16 |             width: 100%;
17 |             height: 100%;
18 |           }
19 |           body {
20 |             width: 90%;
21 |             height: 100%;
22 |             margin: auto;
23 |           }
24 |         </style>
25 |     </head>
26 |     <body>
27 |         
28 |         <div class="bk-root">
29 |             <div class="bk-plotdiv" id="bc0f9365-dfb0-4d58-a63f-4823257016d8"></div>
30 |         </div>
31 |         
32 |         <script type="text/javascript">
33 |             (function() {
34 |           var fn = function() {
35 |             Bokeh.safely(function() {
36 |               var docs_json = {"f19f551c-0c29-457d-8a2e-c1caf4fb9c62":{"roots":{"references":[{"attributes":{"callback":null},"id":"33038416-dbe7-4b44-af9a-808bec434f00","type":"DataRange1d"},{"attributes":{"formatter":{"id":"120bd21a-08c1-4c79-a1ec-a634a06850e4","type":"BasicTickFormatter"},"plot":{"id":"3a01c725-f1c4-4d1e-bb83-39edd3a07cf4","subtype":"Figure","type":"Plot"},"ticker":{"id":"1a1a38ea-f6cc-4ffe-badc-825ed1534164","type":"BasicTicker"}},"id":"f81fc1a1-13fb-470a-a317-6ca9126a0f40","type":"LinearAxis"},{"attributes":{},"id":"bc71daa3-6814-439b-86b6-f636aea471a1","type":"BasicTickFormatter"},{"attributes":{},"id":"9368e81d-188b-4e83-879e-b47df2cc75c4","type":"PanTool"},{"attributes":{"overlay":{"id":"66b41d9e-7153-4d77-8c45-89a60014f219","type":"BoxAnnotation"}},"id":"972ccae8-1270-4f24-89da-99817625db8f","type":"BoxZoomTool"},{"attributes":{"data_source":{"id":"d6b1a5e6-8279-4339-8515-4805752df0b8","type":"ColumnDataSource"},"glyph":{"id":"f1e27e6f-cd50-4a2a-af70-c78c889b84de","type":"Circle"},"hover_glyph":null,"muted_glyph":null,"nonselection_glyph":{"id":"8ad2ad69-8644-4faa-a8ed-0610cc370ecf","type":"Circle"},"selection_glyph":null,"view":{"id":"cefcf9d4-7983-4bb7-80ac-9c5dc54e69fb","type":"CDSView"}},"id":"3e7d1229-1173-4ee7-9c3d-6c4134df0fd0","type":"GlyphRenderer"},{"attributes":{"below":[{"id":"8887aa1d-c0b6-4c89-bb76-ba892da8e81a","type":"LinearAxis"}],"left":[{"id":"bf2150c5-7c7d-4a18-b432-f460fb36c1ec","type":"LinearAxis"}],"plot_height":400,"plot_width":400,"renderers":[{"id":"8887aa1d-c0b6-4c89-bb76-ba892da8e81a","type":"LinearAxis"},{"id":"a3f6cb23-1e56-41d4-bf64-895c4c9062cf","type":"Grid"},{"id":"bf2150c5-7c7d-4a18-b432-f460fb36c1ec","type":"LinearAxis"},{"id":"f6431223-d5d3-416a-af2a-f5dcd278de6a","type":"Grid"},{"id":"5e3c141f-0946-41d1-8f6c-fa0e2c6f3a5b","type":"BoxAnnotation"},{"id":"f8a3c423-e17d-4b1d-8f81-1d3840e4a9d0","type":"GlyphRenderer"}],"title":{"id":"efaa2c1f-42ad-48de-af14-bcefbc654e8e","type":"Title"},"toolbar":{"id":"68e58385-5c52-4b3d-9dc4-da0592c34f09","type":"Toolbar"},"x_range":{"id":"4282b9aa-7885-4590-9abe-b6deedfc401c","type":"DataRange1d"},"x_scale":{"id":"ecb71062-d250-4a07-a2cb-9ea82a5df1ac","type":"LinearScale"},"y_range":{"id":"2a22769f-609b-4a56-b719-0864778f2089","type":"DataRange1d"},"y_scale":{"id":"48120450-a3e0-42d1-b539-6198c2002ebb","type":"LinearScale"}},"id":"54dc4c01-932f-482f-b8b2-28673dfe9a8a","subtype":"Figure","type":"Plot"},{"attributes":{"plot":null,"text":""},"id":"c630d7a2-f7ff-4b61-874d-51945665e340","type":"Title"},{"attributes":{},"id":"7415e282-eac7-4e74-813d-96cc3d426b6a","type":"LinearScale"},{"attributes":{},"id":"f4039914-a6a6-473e-9aff-26ba71e572da","type":"BasicTickFormatter"},{"attributes":{"source":{"id":"756f45ff-e571-40ab-a3f6-33ff88cc8924","type":"ColumnDataSource"}},"id":"4176a6d1-320f-43ec-b805-ec65c344ac1d","type":"CDSView"},{"attributes":{"callback":null},"id":"f81537dd-5d16-42d3-8b7a-9eac29efea88","type":"DataRange1d"},{"attributes":{},"id":"fb3c7c59-7e68-4706-b8c3-36fbe3693687","type":"BasicTickFormatter"},{"attributes":{"data_source":{"id":"756f45ff-e571-40ab-a3f6-33ff88cc8924","type":"ColumnDataSource"},"glyph":{"id":"e02647b0-a6ca-4556-99ea-822461140c3d","type":"Circle"},"hover_glyph":null,"muted_glyph":null,"nonselection_glyph":{"id":"52c9834c-2e1c-4a5e-ae07-568aa42e1dde","type":"Circle"},"selection_glyph":null,"view":{"id":"4176a6d1-320f-43ec-b805-ec65c344ac1d","type":"CDSView"}},"id":"834ab610-b1d8-49c5-a4e1-aee0c1d1c2e1","type":"GlyphRenderer"},{"attributes":{},"id":"0d513bed-b499-4cc9-91ef-9f0e53714c84","type":"LinearScale"},{"attributes":{},"id":"dc5849e9-1077-46c9-8352-c00b4bec8a42","type":"BasicTickFormatter"},{"attributes":{"formatter":{"id":"f4039914-a6a6-473e-9aff-26ba71e572da","type":"BasicTickFormatter"},"plot":{"id":"54dc4c01-932f-482f-b8b2-28673dfe9a8a","subtype":"Figure","type":"Plot"},"ticker":{"id":"468b6842-130c-43fb-b09f-f0c7dbe029cb","type":"BasicTicker"}},"id":"bf2150c5-7c7d-4a18-b432-f460fb36c1ec","type":"LinearAxis"},{"attributes":{"callback":null,"column_names":["x","y"],"data":{"x":[1,3,4,7],"y":[6,4,6,1]}},"id":"06745886-9df8-4077-abc5-2578eb728cf0","type":"ColumnDataSource"},{"attributes":{"formatter":{"id":"d785e5e0-5a6c-4b14-ac91-c3fa402b7577","type":"BasicTickFormatter"},"plot":{"id":"54dc4c01-932f-482f-b8b2-28673dfe9a8a","subtype":"Figure","type":"Plot"},"ticker":{"id":"72a451e0-574c-4702-be3e-fd4a09a8817a","type":"BasicTicker"}},"id":"8887aa1d-c0b6-4c89-bb76-ba892da8e81a","type":"LinearAxis"},{"attributes":{},"id":"845fde73-584b-4cc7-9a1b-b95532bcc0dd","type":"PanTool"},{"attributes":{"fill_alpha":{"value":0.1},"fill_color":{"value":"#1f77b4"},"line_alpha":{"value":0.1},"line_color":{"value":"#1f77b4"},"x":{"field":"x"},"y":{"field":"y"}},"id":"8ad2ad69-8644-4faa-a8ed-0610cc370ecf","type":"Circle"},{"attributes":{},"id":"48120450-a3e0-42d1-b539-6198c2002ebb","type":"LinearScale"},{"attributes":{},"id":"d785e5e0-5a6c-4b14-ac91-c3fa402b7577","type":"BasicTickFormatter"},{"attributes":{"callback":null,"column_names":["x","y"],"data":{"x":[1,3,4,7],"y":[6,4,6,1]}},"id":"756f45ff-e571-40ab-a3f6-33ff88cc8924","type":"ColumnDataSource"},{"attributes":{"plot":null,"text":""},"id":"6eba92af-afac-4019-a86f-ade54bb682af","type":"Title"},{"attributes":{},"id":"1a1a38ea-f6cc-4ffe-badc-825ed1534164","type":"BasicTicker"},{"attributes":{"formatter":{"id":"bc71daa3-6814-439b-86b6-f636aea471a1","type":"BasicTickFormatter"},"plot":{"id":"3a01c725-f1c4-4d1e-bb83-39edd3a07cf4","subtype":"Figure","type":"Plot"},"ticker":{"id":"bf6c8b87-53b6-4ebf-9296-2675fbfb5d5a","type":"BasicTicker"}},"id":"a7423efe-ab34-4532-b6d4-475660f491b6","type":"LinearAxis"},{"attributes":{"dimension":1,"plot":{"id":"3a01c725-f1c4-4d1e-bb83-39edd3a07cf4","subtype":"Figure","type":"Plot"},"ticker":{"id":"bf6c8b87-53b6-4ebf-9296-2675fbfb5d5a","type":"BasicTicker"}},"id":"bc65f4d1-2564-4af3-84ca-bb61bbdcdf48","type":"Grid"},{"attributes":{},"id":"ecb71062-d250-4a07-a2cb-9ea82a5df1ac","type":"LinearScale"},{"attributes":{},"id":"bf6c8b87-53b6-4ebf-9296-2675fbfb5d5a","type":"BasicTicker"},{"attributes":{"callback":null,"column_names":["x","y"],"data":{"x":[1,3,4,7],"y":[6,4,6,1]}},"id":"d6b1a5e6-8279-4339-8515-4805752df0b8","type":"ColumnDataSource"},{"attributes":{"fill_color":{"value":"#1f77b4"},"line_color":{"value":"#1f77b4"},"x":{"field":"x"},"y":{"field":"y"}},"id":"e02647b0-a6ca-4556-99ea-822461140c3d","type":"Circle"},{"attributes":{"source":{"id":"06745886-9df8-4077-abc5-2578eb728cf0","type":"ColumnDataSource"}},"id":"df97428e-f895-490d-9594-340e90e9de57","type":"CDSView"},{"attributes":{"active_drag":"auto","active_inspect":"auto","active_scroll":"auto","active_tap":"auto","tools":[{"id":"9368e81d-188b-4e83-879e-b47df2cc75c4","type":"PanTool"},{"id":"37dfb14a-e469-44b4-b801-495a4887e821","type":"BoxZoomTool"}]},"id":"68e58385-5c52-4b3d-9dc4-da0592c34f09","type":"Toolbar"},{"attributes":{},"id":"468b6842-130c-43fb-b09f-f0c7dbe029cb","type":"BasicTicker"},{"attributes":{"below":[{"id":"172d2366-d644-4b03-a458-f5e3aaf6466f","type":"LinearAxis"}],"left":[{"id":"f1e7c738-a499-413f-be8d-a31704398a50","type":"LinearAxis"}],"plot_width":400,"renderers":[{"id":"172d2366-d644-4b03-a458-f5e3aaf6466f","type":"LinearAxis"},{"id":"a3336e8c-85f8-4a6d-8a50-b8ee2884dfd7","type":"Grid"},{"id":"f1e7c738-a499-413f-be8d-a31704398a50","type":"LinearAxis"},{"id":"761f3f0d-6c66-4fe9-835a-9cbde7f8419a","type":"Grid"},{"id":"87a97c0c-2c7f-4f22-a5bb-a206bf45b349","type":"BoxAnnotation"},{"id":"3e7d1229-1173-4ee7-9c3d-6c4134df0fd0","type":"GlyphRenderer"}],"title":{"id":"c630d7a2-f7ff-4b61-874d-51945665e340","type":"Title"},"toolbar":{"id":"ac0547da-990a-465b-8bdb-a2115c83612b","type":"Toolbar"},"x_range":{"id":"02530ffe-a598-4df1-9e5d-0666bc6388f2","type":"DataRange1d"},"x_scale":{"id":"811e354f-2303-485a-9c88-514de979b811","type":"LinearScale"},"y_range":{"id":"155a8248-845a-414f-acb1-bf10f3d02506","type":"DataRange1d"},"y_scale":{"id":"5664d900-f0fe-4709-ab3a-b0771183de9a","type":"LinearScale"}},"id":"bbc34ee3-d027-4539-bb6b-261382e73a2a","subtype":"Figure","type":"Plot"},{"attributes":{"fill_alpha":{"value":0.1},"fill_color":{"value":"#1f77b4"},"line_alpha":{"value":0.1},"line_color":{"value":"#1f77b4"},"x":{"field":"x"},"y":{"field":"y"}},"id":"3ceb6d9a-e1aa-458f-91ff-b51dfee9a77c","type":"Circle"},{"attributes":{"dimension":1,"plot":{"id":"54dc4c01-932f-482f-b8b2-28673dfe9a8a","subtype":"Figure","type":"Plot"},"ticker":{"id":"468b6842-130c-43fb-b09f-f0c7dbe029cb","type":"BasicTicker"}},"id":"f6431223-d5d3-416a-af2a-f5dcd278de6a","type":"Grid"},{"attributes":{"callback":null},"id":"4282b9aa-7885-4590-9abe-b6deedfc401c","type":"DataRange1d"},{"attributes":{},"id":"811e354f-2303-485a-9c88-514de979b811","type":"LinearScale"},{"attributes":{"plot":null,"text":""},"id":"efaa2c1f-42ad-48de-af14-bcefbc654e8e","type":"Title"},{"attributes":{"data_source":{"id":"06745886-9df8-4077-abc5-2578eb728cf0","type":"ColumnDataSource"},"glyph":{"id":"1f83b727-488a-4abc-b9ca-19a01e960f6a","type":"Circle"},"hover_glyph":null,"muted_glyph":null,"nonselection_glyph":{"id":"3ceb6d9a-e1aa-458f-91ff-b51dfee9a77c","type":"Circle"},"selection_glyph":null,"view":{"id":"df97428e-f895-490d-9594-340e90e9de57","type":"CDSView"}},"id":"f8a3c423-e17d-4b1d-8f81-1d3840e4a9d0","type":"GlyphRenderer"},{"attributes":{"callback":null},"id":"02530ffe-a598-4df1-9e5d-0666bc6388f2","type":"DataRange1d"},{"attributes":{"plot":{"id":"54dc4c01-932f-482f-b8b2-28673dfe9a8a","subtype":"Figure","type":"Plot"},"ticker":{"id":"72a451e0-574c-4702-be3e-fd4a09a8817a","type":"BasicTicker"}},"id":"a3f6cb23-1e56-41d4-bf64-895c4c9062cf","type":"Grid"},{"attributes":{"callback":null},"id":"155a8248-845a-414f-acb1-bf10f3d02506","type":"DataRange1d"},{"attributes":{},"id":"5664d900-f0fe-4709-ab3a-b0771183de9a","type":"LinearScale"},{"attributes":{"below":[{"id":"f81fc1a1-13fb-470a-a317-6ca9126a0f40","type":"LinearAxis"}],"left":[{"id":"a7423efe-ab34-4532-b6d4-475660f491b6","type":"LinearAxis"}],"plot_height":400,"plot_width":400,"renderers":[{"id":"f81fc1a1-13fb-470a-a317-6ca9126a0f40","type":"LinearAxis"},{"id":"8a6e0933-8313-4df0-a2c2-b77d0e30df8c","type":"Grid"},{"id":"a7423efe-ab34-4532-b6d4-475660f491b6","type":"LinearAxis"},{"id":"bc65f4d1-2564-4af3-84ca-bb61bbdcdf48","type":"Grid"},{"id":"66b41d9e-7153-4d77-8c45-89a60014f219","type":"BoxAnnotation"},{"id":"834ab610-b1d8-49c5-a4e1-aee0c1d1c2e1","type":"GlyphRenderer"}],"title":{"id":"6eba92af-afac-4019-a86f-ade54bb682af","type":"Title"},"toolbar":{"id":"94c6b4d5-3f4c-4dba-84a7-550698adb76e","type":"Toolbar"},"x_range":{"id":"33038416-dbe7-4b44-af9a-808bec434f00","type":"DataRange1d"},"x_scale":{"id":"7415e282-eac7-4e74-813d-96cc3d426b6a","type":"LinearScale"},"y_range":{"id":"f81537dd-5d16-42d3-8b7a-9eac29efea88","type":"DataRange1d"},"y_scale":{"id":"0d513bed-b499-4cc9-91ef-9f0e53714c84","type":"LinearScale"}},"id":"3a01c725-f1c4-4d1e-bb83-39edd3a07cf4","subtype":"Figure","type":"Plot"},{"attributes":{"bottom_units":"screen","fill_alpha":{"value":0.5},"fill_color":{"value":"lightgrey"},"left_units":"screen","level":"overlay","line_alpha":{"value":1.0},"line_color":{"value":"black"},"line_dash":[4,4],"line_width":{"value":2},"plot":null,"render_mode":"css","right_units":"screen","top_units":"screen"},"id":"66b41d9e-7153-4d77-8c45-89a60014f219","type":"BoxAnnotation"},{"attributes":{"active_drag":"auto","active_inspect":"auto","active_scroll":"auto","active_tap":"auto","tools":[{"id":"845fde73-584b-4cc7-9a1b-b95532bcc0dd","type":"PanTool"},{"id":"972ccae8-1270-4f24-89da-99817625db8f","type":"BoxZoomTool"}]},"id":"94c6b4d5-3f4c-4dba-84a7-550698adb76e","type":"Toolbar"},{"attributes":{"active_drag":"auto","active_inspect":"auto","active_scroll":"auto","active_tap":"auto","tools":[{"id":"6c4cf0cc-6d26-4e10-95f9-650029c678e1","type":"PanTool"},{"id":"3c655b83-02f6-48bf-9609-2d80f8221766","type":"BoxZoomTool"}]},"id":"ac0547da-990a-465b-8bdb-a2115c83612b","type":"Toolbar"},{"attributes":{"plot":{"id":"3a01c725-f1c4-4d1e-bb83-39edd3a07cf4","subtype":"Figure","type":"Plot"},"ticker":{"id":"1a1a38ea-f6cc-4ffe-badc-825ed1534164","type":"BasicTicker"}},"id":"8a6e0933-8313-4df0-a2c2-b77d0e30df8c","type":"Grid"},{"attributes":{"bottom_units":"screen","fill_alpha":{"value":0.5},"fill_color":{"value":"lightgrey"},"left_units":"screen","level":"overlay","line_alpha":{"value":1.0},"line_color":{"value":"black"},"line_dash":[4,4],"line_width":{"value":2},"plot":null,"render_mode":"css","right_units":"screen","top_units":"screen"},"id":"87a97c0c-2c7f-4f22-a5bb-a206bf45b349","type":"BoxAnnotation"},{"attributes":{"formatter":{"id":"fb3c7c59-7e68-4706-b8c3-36fbe3693687","type":"BasicTickFormatter"},"plot":{"id":"bbc34ee3-d027-4539-bb6b-261382e73a2a","subtype":"Figure","type":"Plot"},"ticker":{"id":"3ba4a68b-e5ea-4a17-8880-4d6b3bfefbfe","type":"BasicTicker"}},"id":"172d2366-d644-4b03-a458-f5e3aaf6466f","type":"LinearAxis"},{"attributes":{"source":{"id":"d6b1a5e6-8279-4339-8515-4805752df0b8","type":"ColumnDataSource"}},"id":"cefcf9d4-7983-4bb7-80ac-9c5dc54e69fb","type":"CDSView"},{"attributes":{"bottom_units":"screen","fill_alpha":{"value":0.5},"fill_color":{"value":"lightgrey"},"left_units":"screen","level":"overlay","line_alpha":{"value":1.0},"line_color":{"value":"black"},"line_dash":[4,4],"line_width":{"value":2},"plot":null,"render_mode":"css","right_units":"screen","top_units":"screen"},"id":"5e3c141f-0946-41d1-8f6c-fa0e2c6f3a5b","type":"BoxAnnotation"},{"attributes":{},"id":"3ba4a68b-e5ea-4a17-8880-4d6b3bfefbfe","type":"BasicTicker"},{"attributes":{"callback":null},"id":"2a22769f-609b-4a56-b719-0864778f2089","type":"DataRange1d"},{"attributes":{"plot":{"id":"bbc34ee3-d027-4539-bb6b-261382e73a2a","subtype":"Figure","type":"Plot"},"ticker":{"id":"3ba4a68b-e5ea-4a17-8880-4d6b3bfefbfe","type":"BasicTicker"}},"id":"a3336e8c-85f8-4a6d-8a50-b8ee2884dfd7","type":"Grid"},{"attributes":{"overlay":{"id":"5e3c141f-0946-41d1-8f6c-fa0e2c6f3a5b","type":"BoxAnnotation"}},"id":"37dfb14a-e469-44b4-b801-495a4887e821","type":"BoxZoomTool"},{"attributes":{"formatter":{"id":"dc5849e9-1077-46c9-8352-c00b4bec8a42","type":"BasicTickFormatter"},"plot":{"id":"bbc34ee3-d027-4539-bb6b-261382e73a2a","subtype":"Figure","type":"Plot"},"ticker":{"id":"5b57c998-6e2e-412b-ba5b-2880ccc9afe2","type":"BasicTicker"}},"id":"f1e7c738-a499-413f-be8d-a31704398a50","type":"LinearAxis"},{"attributes":{"fill_color":{"value":"#1f77b4"},"line_color":{"value":"#1f77b4"},"x":{"field":"x"},"y":{"field":"y"}},"id":"f1e27e6f-cd50-4a2a-af70-c78c889b84de","type":"Circle"},{"attributes":{},"id":"5b57c998-6e2e-412b-ba5b-2880ccc9afe2","type":"BasicTicker"},{"attributes":{"dimension":1,"plot":{"id":"bbc34ee3-d027-4539-bb6b-261382e73a2a","subtype":"Figure","type":"Plot"},"ticker":{"id":"5b57c998-6e2e-412b-ba5b-2880ccc9afe2","type":"BasicTicker"}},"id":"761f3f0d-6c66-4fe9-835a-9cbde7f8419a","type":"Grid"},{"attributes":{},"id":"72a451e0-574c-4702-be3e-fd4a09a8817a","type":"BasicTicker"},{"attributes":{"overlay":{"id":"87a97c0c-2c7f-4f22-a5bb-a206bf45b349","type":"BoxAnnotation"}},"id":"3c655b83-02f6-48bf-9609-2d80f8221766","type":"BoxZoomTool"},{"attributes":{"fill_alpha":{"value":0.1},"fill_color":{"value":"#1f77b4"},"line_alpha":{"value":0.1},"line_color":{"value":"#1f77b4"},"x":{"field":"x"},"y":{"field":"y"}},"id":"52c9834c-2e1c-4a5e-ae07-568aa42e1dde","type":"Circle"},{"attributes":{},"id":"6c4cf0cc-6d26-4e10-95f9-650029c678e1","type":"PanTool"},{"attributes":{"fill_color":{"value":"#1f77b4"},"line_color":{"value":"#1f77b4"},"x":{"field":"x"},"y":{"field":"y"}},"id":"1f83b727-488a-4abc-b9ca-19a01e960f6a","type":"Circle"},{"attributes":{},"id":"120bd21a-08c1-4c79-a1ec-a634a06850e4","type":"BasicTickFormatter"}],"root_ids":["bbc34ee3-d027-4539-bb6b-261382e73a2a","54dc4c01-932f-482f-b8b2-28673dfe9a8a","3a01c725-f1c4-4d1e-bb83-39edd3a07cf4"]},"title":"Bokeh Application","version":"0.12.7"}};
37 |               var render_items = [{"docid":"f19f551c-0c29-457d-8a2e-c1caf4fb9c62","elementid":"bc0f9365-dfb0-4d58-a63f-4823257016d8","modelid":"3a01c725-f1c4-4d1e-bb83-39edd3a07cf4"}];
38 |               
39 |               Bokeh.embed.embed_items(docs_json, render_items);
40 |             });
41 |           };
42 |           if (document.readyState != "loading") fn();
43 |           else document.addEventListener("DOMContentLoaded", fn);
44 |         })();
45 |         
46 |         </script>
47 |     </body>
48 | </html>


--------------------------------------------------------------------------------
/Intro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Jupyter Notebook App  \n",
  8 |     "\n",
  9 |     "Aplicación compuesta por dos elementos: un servidor, y un cliente y a través del protocolo HTTP... como una aplicación web tradicional. El cliente es esto que estás viendo en este momento y se accede a él a través de un navegador web.\n",
 10 |     "\n",
 11 |     "<img src=\"Intro/images/architecture.png\" />\n",
 12 |     "\n",
 13 |     "### El cliente  \n",
 14 |     "\n",
 15 |     "Con la Notebook App podemos crear los interesantes *Notebook Documents*, o simplemente *Notebooks*, que son archivos que contienen código (como por ejemplo, código de Python), texto enriquecido (es decir, con cosas como negritas, cursivas, imágenes, links, html...) y otros elementos interactivos como gráficas o simulaciones.\n",
 16 |     "\n",
 17 |     "Como te podrás imaginar, los *Notebooks* son un lugar ideal para documentar el código, explicar lo que sucede y complementarlo con __matemáticas__ diagramas, gráficas y demás elementos."
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "#### Ejemplo: \n",
 25 |     "Podrías tener algo como esto (una explicación matemática seguida de su implementación en código):\n",
 26 |     "\n",
 27 |     "**An infinite-state model**. Consider a sequence of $n+1$ messages that arrive over a period of time of length $T$"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "messages = [1,2,3,5,6,7,9,17]  # n+1 message arrival times\n",
 37 |     "gaps = [m2 - m1 for m1, m2 in zip(messages[:-1], messages[1:])]\n",
 38 |     "T = sum(gaps)\n",
 39 |     "print(T,\":\",gaps)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Los *Notebooks* forman parte del componente del cliente de la Notebook App.\n",
 47 |     "\n",
 48 |     "\n",
 49 |     "### El servidor \n",
 50 |     "\n",
 51 |     "El elemento del servidor en la aplicación es un servidor web que se encarga de ejecutar el código que nosotros introducimos a través del cliente. La forma de hacerlo es a través de los *kernels*, que son procesos que se ejecutan en el servidor en el que está corriendo la app, cada uno de los *Notebooks* tiene un *kernel* asociado el cual estará activo mientras que tu no te deshagas de él. \n",
 52 |     "\n",
 53 |     "Dependiendo de lo que estés tratando de ejecutar, cada *kernel* consumirá recursos de la computadora en donde se está ejecutando el servidor.\n",
 54 |     "\n",
 55 |     "#### Pero... why not both?\n",
 56 |     "\n",
 57 |     "<img src=\"Intro/images/both.gif\" />\n",
 58 |     "\n",
 59 |     "Puede que te encuentres con que el cliente y el servidor se están ejecutando en la misma computadora, y que no necesitas de conexión a internet para ejecutarlo, esto es muy común y bastante útil puesto que tener el servidor a la mano nos permite usar directamente los archivos de nuestra PC, cosa que usando el navegador web no podríamos hacer.  \n",
 60 |     "\n",
 61 |     "Una vez que has terminado de jugar *ahem*... trabajar con tu *Notebook* podrías subirlo ahora sí a un servidor más poderoso para que se ejecute más rápido, o si lo colocas en un lugar público (uh, tal vez esto no sea buena idea) esté accesible a través de internet."
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "source": [
 70 |     "## Instalación  \n",
 71 |     "\n",
 72 |     "Si eres un novato como yo, la forma recomendada de obtener los *Notebooks* es a través de [Anaconda](https://www.anaconda.com/download), anda, ve a instalarlo si es que aún no lo has hecho, puedes dejar todo por default por el momento.  \n",
 73 |     "`...`  \n",
 74 |     "`...`\n",
 75 |     "\n",
 76 |     "Suficiente tiempo.\n",
 77 |     "\n",
 78 |     "Si todo salió bien, verás un ícono como este en tu Menú Inicio o en el Launchpad:\n",
 79 |     "\n",
 80 |     "<img src=\"Intro/images/navigator.png\" />\n",
 81 |     "\n",
 82 |     "Al abrir esta aplicación aparecerá un *Dashboard* con diferentes aplicaciones, de las cuales deberás seleccionar y ejecutar Jupyter Notebook:\n",
 83 |     "\n",
 84 |     "<img src=\"Intro/images/launch.png\" />  \n",
 85 |     "\n",
 86 |     "Si todo sale bien, se abrirá un navegador con lo que es conocido como el **Dashboard** de Jupyter, que en ciertos aspectos podrías ver como el Finder de Mac o el Explorador de Windows. Por ahora se abrirá en un folder de tu computadora, probablemente sea la carpeta de tu usuario o en tus documentos, pero esto es algo que puedes configurar más adelante.\n",
 87 |     "\n",
 88 |     "Vamos a crear un nuevo *Notebook*, así que deberás dar click en `Nuevo` o (`New`) y seleccionar Python 3 (o dos dependiendo de tu versión). Y cuando hayas terminado verás algo como esto:"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": []
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {
103 |     "collapsed": true
104 |    },
105 |    "source": [
106 |     "Sí, un cuadro gris llamado celda (`cell`) en el que puedes escribir. Por ahora está en modo código, pero siempre podrás usar el selector de acá arriba para cambiar entre código y texto en Markdown (y también HTML). Seguramente ya tendrán tiempo para jugar con esto más adelante, pero por ahora les voy explicar lo básico de los *Notebooks*.  \n",
107 |     "## Las celdas\n",
108 |     "\n",
109 |     "Un *Notebook* se compone de celdas. Cada celda puede contener código o texto \"enriquecido\": Las celdas de texto aceptan tres tipos de formato: HTML, Markdown y un poco de LaTeX para fórmulas matemáticas. Así mismo, cada celda tiene cuatro modos diferentes: **reposo**, **edición**, **ejecución** y **ejecutadas**. Sí, inclusive las celdas de texto tienen que ser ejecutadas. Para ejecutar las celdas usualmente se usa la combinación de teclas <kbd>SHIFT</kbd> + <kbd>ENTER</kbd>.\n",
110 |     "\n",
111 |     "\n",
112 |     "Las siguiente celda contiene cada una de las tres cosas (puedes ver el \"código fuente\" de cada celda si le das doble click o si le presiones enter mientras está seleccionada):"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "### Esto es Markdown:\n",
120 |     "\n",
121 |     "Emphasis, aka italics, with *asterisks* or _underscores_.\n",
122 |     "\n",
123 |     "Strong emphasis, aka bold, with **asterisks** or __underscores__.\n",
124 |     "\n",
125 |     "Combined emphasis with **asterisks and _underscores_**.\n",
126 |     "\n",
127 |     "Strikethrough uses two tildes. ~~Scratch this.~~  \n",
128 |     "\n",
129 |     "Puedes ver una guía rápida de lo que te ofrece Markdown [aquí](https://github.github.com/gfm/).\n",
130 |     "\n",
131 |     "<h3>Esto es HTML:</h3>\n",
132 |     "\n",
133 |     "Emphasis, aka italics, with <code>&lt;i&gt;&lt;/i&gt;</code>, ehhmhmhm... <i>itallics</i>.\n",
134 |     "\n",
135 |     "Strong emphasis, aka bold, with <code>&lt;b&gt;&lt;/b&gt;</code>, ehhmhmhm... <b>bold</b>.\n",
136 |     "\n",
137 |     "Combined emphasis with <code>&lt;i&gt;&lt;/i&gt;</code> and <code>&lt;b&gt;&lt;/b&gt;</code>, ehhmhmhm... <i><b>both</b></i>.\n",
138 |     "\n",
139 |     "Strikethrough uses <code>&lt;del&gt;&lt;/del&gt;</code>, <del>like this</del>.\n",
140 |     "\n",
141 |     "\n",
142 |     "Puedes tener cosas sencillas como esta: $a = 10$, o un poco más complicadas como esto: $y = \\frac{1}{{\\sqrt {2\\pi } }}e^{ - \\frac{{z^2 }}{2}}  = .3989e^{ - 5z^2 }$ o, ¿quieres que una fórmula resalte realmente? la puedes poner así:\n",
143 |     "\n",
144 |     "$$y = \\frac{1}{{\\sqrt {2\\pi } }}e^{ - \\frac{{z^2 }}{2}}  = .3989e^{ - 5z^2 }$$\n",
145 |     "\n"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 5,
151 |    "metadata": {
152 |     "collapsed": true
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "# Y esto es código\n",
157 |     "value = \"¡Buen día señor sol!\"\n",
158 |     "a = 2\n",
159 |     "b = 3"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "La verdad es que no hay mucho que decir sobre las celdas de texto, solo que son muy, muy útiles.\n",
167 |     "\n",
168 |     "### Celdas de código\n",
169 |     "\n",
170 |     "Puedes identificar las celdas de código porque tienen un `In [1]:` al lado, en donde el número es el órden en el que se ejecutó dicha celda. Cada vez que una celda está pendiente de concluir su ejecución aparecerá un `*` en donde debería estar el número. Esto te indicará que hay algo pendiente de terminar de ejecutarse.  \n",
171 |     "\n",
172 |     "Cada *Notebook* está asociado con un *Kernel*, y todo lo que se define en una celda está disponible para todas las demás. Por ejemplo, en la celda anterior a esta declaramos tres variables: `a`, `b` y `value`. En la celda siguiente podemos hacer uso de ellas sin problema:"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 6,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "c = a + b\n",
182 |     "print(value)\n",
183 |     "print(c)"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {
190 |     "collapsed": true
191 |    },
192 |    "outputs": [],
193 |    "source": []
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "También, como ya te habrás dado cuenta, cuando imprimimos algo a \"consola\", los *Notebooks* lo presentan justo debajo de la celda en la que se llama ala función `print`.\n"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "También, como ya te habrás dado cuenta, cuando imprimimos algo a \"consola\", los *Notebooks* lo presentan justo debajo de la celda en la que se llama ala función `print`.\n"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "También, como ya te habrás dado cuenta, cuando imprimimos algo a \"consola\", los *Notebooks* lo presentan justo debajo de la celda en la que se llama ala función `print`.\n"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "### Toolbar\n",
221 |     "\n",
222 |     "<img src=\"Intro/images/toolbar.png\" />  \n",
223 |     "\n",
224 |     "Las primeras opciones (de izquierda a derecha) son bastante sencillas de entender.\n",
225 |     "\n",
226 |     " - **Guardar** _Notebook_  \n",
227 |     " - **Agregar** nueva celda\n",
228 |     " - **Cortar** celda\n",
229 |     " - **Copiar** celda\n",
230 |     " - **Pegar** celda\n",
231 |     " - Mover celda **hacia arriba**\n",
232 |     " - Mover celda **hacia abajo**  \n",
233 |     "\n",
234 |     "Las siguientes tres requieren de un poco más de explicación  \n",
235 |     "\n",
236 |     " - **Ejecutar** celda (y seleccionar la siguiente). Que es como presionar <kbd>SHIFT</kbd> + <kbd>ENTER</kbd>, así que la verdad es que creo que será muy raro que presiones este botón muy seguido.  \n",
237 |     " - **Interrumpir _kernel_**. Digamos que estás ejecutando una celda y te das cuenta de que hay un error en ella y que no vale la pena que se siga ejecutando. Este el caso de uso perfecto para este botón. Interrumpir *kernel* únicamente detendrá la ejecución de las tareas pendientes, el resto del kernel (y todo lo que hayamos definido antes) seguirá vivo.  \n",
238 |     " - **Reiniciar _kernel_**. Hay ocasiones en las que tal vez necesitemos comenzar desde cero con el *notebook*, este es el botón que podemos usar para esto. Reiniciar el _kernel_ removerá de memoria todo lo que se ejecutó previamente, así que tendrás que volver a ejecutar todo de nuevo. \n",
239 |     " \n",
240 |     "Y por último \n",
241 |     "\n",
242 |     " - Cambiar el tipo de celda.  \n",
243 |     " - Abrir el catálogo de comandos. Un botón bastante útil para descubrir los comandos que tenemos disponibles."
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {
249 |     "collapsed": true
250 |    },
251 |    "source": [
252 |     "### Volviéndo al código  \n",
253 |     "\n",
254 |     "Pero no creas que solamente es para códigos \"sencillos\", además de variables también podemos definir funciones y clases:"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 7,
260 |    "metadata": {
261 |     "collapsed": true
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "def suma(a, b):\n",
266 |     "    print(\"Sumando\", a, \"+\", b)\n",
267 |     "    return a + b"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 8,
273 |    "metadata": {},
274 |    "outputs": [],
275 |    "source": [
276 |     "resultado = suma(10, 34)\n",
277 |     "print(resultado)"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "code",
282 |    "execution_count": 9,
283 |    "metadata": {
284 |     "collapsed": true
285 |    },
286 |    "outputs": [],
287 |    "source": [
288 |     "class Pokemon:\n",
289 |     "    def __init__(self, nombre, level):\n",
290 |     "        self.nombre = nombre\n",
291 |     "        self.level = level"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 10,
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "pokachu = Pokemon(\"Pokachu\", 10)\n",
301 |     "print(pokachu.nombre, pokachu.level)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "### Leyendo y escribiendo archivos  \n",
309 |     "\n",
310 |     "Con los *Notebooks* podemos leer y escribir archivos, solamente recuerda: los archivos deben existir en el lado del servidor y no del cliente. En este caso, este *Notebook* tiene varios archivos en una ruta relativa `Intro/...`. Y en específico hay un archivo llamado `hello.txt`.  \n",
311 |     "\n",
312 |     "Para leerlo basta con hacer lo siguiente:"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 11,
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": [
321 |     "with open('Intro/source/hello.txt', 'r') as hello:\n",
322 |     "    for l in hello.readlines():\n",
323 |     "        print(l)"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "markdown",
328 |    "metadata": {},
329 |    "source": [
330 |     "Mientras que para escribir un archivo es suficiente hacer esto:"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": 12,
336 |    "metadata": {
337 |     "collapsed": true
338 |    },
339 |    "outputs": [],
340 |    "source": [
341 |     "with open('Intro/source/salida.txt', 'w') as salida:\n",
342 |     "    for i in range(10):\n",
343 |     "        salida.write((\"#\" * i) + \"\\n\")"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "### Importando código  \n",
351 |     "\n",
352 |     "Además de todo, también podemos importar código, ya sea de algún módulo que venga ya dentro de Python, alguno que hayamos instalado con `pip` o directamente desde algún archivo de código fuente:"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": 14,
358 |    "metadata": {
359 |     "collapsed": true
360 |    },
361 |    "outputs": [],
362 |    "source": [
363 |     "# Este viene con Python\n",
364 |     "import os\n",
365 |     "# Usa 'pip install requests' para instalar requests\n",
366 |     "import json, requests\n",
367 |     "# Esta es una clase dentro de un archivo local\n",
368 |     "from Intro.source.location import Location"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 15,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": [
377 |     "print(\"Contenido de Intro/source:\")\n",
378 |     "for filename in os.listdir('Intro/source'):\n",
379 |     "    print(filename)"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": 16,
385 |    "metadata": {
386 |     "collapsed": true
387 |    },
388 |    "outputs": [],
389 |    "source": [
390 |     "resp = requests.get('http://pokeapi.co/api/v2/location/154')\n",
391 |     "data = json.loads(resp.text)"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 17,
397 |    "metadata": {},
398 |    "outputs": [],
399 |    "source": [
400 |     "viridianCity = Location(data[\"names\"][0][\"name\"], data[\"region\"][\"name\"])\n",
401 |     "print(viridianCity.name + \", \" + viridianCity.region)"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "Acá hay [una lista](https://github.com/jupyter/jupyter/wiki/A-gallery-of-interesting-Jupyter-Notebooks) muy completa con ejemplos de _Notebooks_, tan solo para que veas todo el poder que estos tienen. Te invito a que los sigas usando y te diviertas un buen rato con ellos.\n",
409 |     "\n",
410 |     "Y pues bien, por el momento es todo, si quieres saber más házmelo saber a [@io_exception](https://twitter.com/io_exception) o a [feregrino@thatcsharpguy.com](malito:feregrino@thatcsharpguy.com)"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {
417 |     "collapsed": true
418 |    },
419 |    "outputs": [],
420 |    "source": []
421 |   }
422 |  ],
423 |  "metadata": {
424 |   "kernelspec": {
425 |    "display_name": "Python 3",
426 |    "language": "python",
427 |    "name": "python3"
428 |   },
429 |   "language_info": {
430 |    "codemirror_mode": {
431 |     "name": "ipython",
432 |     "version": 3
433 |    },
434 |    "file_extension": ".py",
435 |    "mimetype": "text/x-python",
436 |    "name": "python",
437 |    "nbconvert_exporter": "python",
438 |    "pygments_lexer": "ipython3",
439 |    "version": "3.6.2"
440 |   }
441 |  },
442 |  "nbformat": 4,
443 |  "nbformat_minor": 2
444 | }
445 | 


--------------------------------------------------------------------------------
/mt-scraper.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "antonio-feregrino-bolanos\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "import requests\n",
 18 |     "import re\n",
 19 |     "import pandas as pd\n",
 20 |     "import numpy as np\n",
 21 |     "import datetime\n",
 22 |     "import slugify\n",
 23 |     "import os\n",
 24 |     "from urllib.parse import urljoin\n",
 25 |     "from bs4 import BeautifulSoup\n",
 26 |     "\n",
 27 |     "print(slugify.slugify('Antonio Feregrino Bolaños'))\n",
 28 |     "\n",
 29 |     "if not os.path.exists(\"mt-scraper\"):\n",
 30 |     "    os.makedirs(\"mt-scraper/defensiva\")\n",
 31 |     "    os.makedirs(\"mt-scraper/ofensiva\")"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Get base page\n",
 43 |     "base_url = \"http://www.mediotiempo.com/liga/futbol/ligamx/tabla-general/\"\n",
 44 |     "base_page = requests.get(base_url).text\n",
 45 |     "base_soup = BeautifulSoup(base_page, \"lxml\")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "tournament_container = base_soup.find(\"div\", { \"class\" : \"dropdown-container\" })\n",
 57 |     "ul = tournament_container.find('ul')\n",
 58 |     "tournaments = []\n",
 59 |     "for li in ul.findAll('li'):\n",
 60 |     "    tournaments.append(li.get('value'))"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# Get tables\n",
 72 |     "c = { 'Team': 0, 'PTS':1, 'JJ':2, 'DG':3, 'JG':4,'JE':5, 'JP':6, 'GF': 7, 'GC': 8 }\n",
 73 |     "print(\"Tournaments\", len(tournaments))\n",
 74 |     "scraped = {}\n",
 75 |     "for tournament in tournaments:\n",
 76 |     "    results = []\n",
 77 |     "    url = urljoin(base_url, tournament)\n",
 78 |     "    tournament_page = requests.get(url).text\n",
 79 |     "    tournament_soup = BeautifulSoup(tournament_page, \"lxml\")\n",
 80 |     "    tables = tournament_soup.findAll(\"div\", { \"class\" :'table-positions' })\n",
 81 |     "    for table in tables:\n",
 82 |     "        # need to find the table inside div.scroll:\n",
 83 |     "        table = table.find('div', {'class':'scroll'}).find('table', {'class':'mt-table'})\n",
 84 |     "        rows = table.tbody.findAll('tr')\n",
 85 |     "        for row in rows:\n",
 86 |     "            tds = row.findAll('td')\n",
 87 |     "            team = tds[c['Team']].text.strip()\n",
 88 |     "            pts =  tds[c['PTS']].text.strip()\n",
 89 |     "            jj =  tds[c['JJ']].text.strip()\n",
 90 |     "            dg =  tds[c['DG']].text.strip()\n",
 91 |     "            jg =  tds[c['JG']].text.strip()\n",
 92 |     "            je =  tds[c['JE']].text.strip()\n",
 93 |     "            jp =  tds[c['JP']].text.strip()\n",
 94 |     "            gf =  tds[c['GF']].text.strip()\n",
 95 |     "            gc =  tds[c['GC']].text.strip()\n",
 96 |     "            team_stat = {\n",
 97 |     "                'team':team,\n",
 98 |     "                'pts':pts,\n",
 99 |     "                'jj':jj,\n",
100 |     "                'dg':dg,\n",
101 |     "                'jg':jg,\n",
102 |     "                'je':je,\n",
103 |     "                'jp':jp,\n",
104 |     "                'gf':gf,\n",
105 |     "                'gc':gc\n",
106 |     "            }\n",
107 |     "            results.append(team_stat)\n",
108 |     "    scraped[tournament] = results"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "collapsed": true
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "torneo_largo = re.compile('(\\w+)-([0-9]{4})-+([0-9]{4})')\n",
120 |     "torneo_corto = re.compile('(\\w+)-([0-9]{4})')"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {
127 |     "collapsed": true,
128 |     "scrolled": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "dos = set(['invierno', 'apertura'])\n",
133 |     "uno = set(['verano', 'clausura', 'bicentenario'])\n",
134 |     "intermediate = []\n",
135 |     "for torneo in scraped:\n",
136 |     "    match_torneo_largo = torneo_largo.search(torneo)\n",
137 |     "    match_torneo_corto = torneo_corto.search(torneo)\n",
138 |     "    name = ''\n",
139 |     "    if match_torneo_largo:\n",
140 |     "        t = match_torneo_largo.group(1)\n",
141 |     "        inicio = int(match_torneo_largo.group(2).upper())\n",
142 |     "        fin = int(match_torneo_largo.group(3).upper())\n",
143 |     "        if t == \"temporada\":\n",
144 |     "            name = \"Temporada de \" + str(inicio) + \" a \"+ str(fin)\n",
145 |     "        if t == \"liguilla\":\n",
146 |     "            name = \"Liguilla de \" + str(inicio) + \" a \"+ str(fin)\n",
147 |     "    elif match_torneo_corto:\n",
148 |     "        t = match_torneo_corto.group(1)\n",
149 |     "        c = int(match_torneo_corto.group(2).upper())\n",
150 |     "        if t in dos:\n",
151 |     "            name = \"Torneo corto \" + str(c)  + \"-2\"\n",
152 |     "        if t in uno:\n",
153 |     "            name = \"Torneo corto \" + str(c)  + \"-1\"\n",
154 |     "    for result in scraped[torneo]:\n",
155 |     "        intermediate.append([\n",
156 |     "            torneo,\n",
157 |     "            result['team'],\n",
158 |     "            result['pts'],\n",
159 |     "            result['jj'],\n",
160 |     "            result['dg'],\n",
161 |     "            result['jg'],\n",
162 |     "            result['je'],\n",
163 |     "            result['jp'],\n",
164 |     "            result['gf'],\n",
165 |     "            result['gc']\n",
166 |     "        ])\n",
167 |     "tournament_df = pd.DataFrame(intermediate)\n",
168 |     "tournament_df.columns = ['tournament', 'team', 'pts', 'jj', 'dg', 'jg','je', 'jp', 'gf', 'gc']\n",
169 |     "tournament_df.set_index(['tournament', 'team'], inplace=True)\n",
170 |     "print(tournament_df.tail())"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {
177 |     "collapsed": true
178 |    },
179 |    "outputs": [],
180 |    "source": [
181 |     "tournament_df.to_csv('mt-scraper/tournaments.csv')"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {
188 |     "collapsed": true
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "tournament_df = pd.read_csv('mt-scraper/tournaments.csv', index_col=[0, 1])\n",
193 |     "print(tournament_df.head())"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {
200 |     "collapsed": true
201 |    },
202 |    "outputs": [],
203 |    "source": [
204 |     "all_tournaments = tournament_df.index.get_level_values(0).unique().values\n",
205 |     "url = \"http://www.mediotiempo.com/liga/futbol/ligamx/calendario/\"\n",
206 |     "seasons = []\n",
207 |     "seasonRounds = []\n",
208 |     "rounds = []\n",
209 |     "for season in all_tournaments:\n",
210 |     "    #print(\"Scraping season\", season)\n",
211 |     "    season_url = url + tournament + \"/\"\n",
212 |     "    season_page = requests.get(season_url).text\n",
213 |     "    season_soup = BeautifulSoup(season_page, \"lxml\")\n",
214 |     "    seasonRound_ul = season_soup.find('ul', { \"name\": \"seasonRound\"})\n",
215 |     "    if seasonRound_ul is None: \n",
216 |     "        continue\n",
217 |     "    for li in seasonRound_ul.findAll('li'):\n",
218 |     "        seasonRound = li.get('value')\n",
219 |     "        seasonRound_url = season_url + seasonRound + \"/\"\n",
220 |     "        seasonRound_page = requests.get(seasonRound_url).text\n",
221 |     "        seasonRound_soup = BeautifulSoup(seasonRound_page, \"lxml\")\n",
222 |     "        round_ul = seasonRound_soup.find('ul', { \"name\": \"round\"})\n",
223 |     "        if round_ul is None: \n",
224 |     "            continue\n",
225 |     "        for li in round_ul.findAll('li'):\n",
226 |     "            _round = li.get('value')\n",
227 |     "            seasons.append(season)\n",
228 |     "            seasonRounds.append(seasonRound)\n",
229 |     "            rounds.append(_round)\n",
230 |     "        \n",
231 |     "print(\"Found\",len(seasons), len(seasonRounds), len(rounds), \"rounds\")"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {
238 |     "collapsed": true
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "url = \"http://www.mediotiempo.com/liga/futbol/ligamx/calendario/%s/%s/%s\"\n",
243 |     "\n",
244 |     "matches_seasons = []\n",
245 |     "matches_seasonRounds = []\n",
246 |     "matches_rounds = []\n",
247 |     "matches_date = []\n",
248 |     "matches_time = []\n",
249 |     "matches_home_team = []\n",
250 |     "matches_result = []\n",
251 |     "matches_away_team = []\n",
252 |     "matches_venue = []\n",
253 |     "\n",
254 |     "for season,seasonRound,_round in zip(seasons,seasonRounds,rounds):\n",
255 |     "    query_url = url % (season,seasonRound,_round)\n",
256 |     "#    print(query_url)\n",
257 |     "    scrape = requests.get(query_url).text\n",
258 |     "    scrape_soup = BeautifulSoup(scrape, \"lxml\")\n",
259 |     "    calendar_groups = scrape_soup.findAll('div', {\"class\":\"mt-calendar-group\"})\n",
260 |     "    for calendar_group in calendar_groups:\n",
261 |     "        date = calendar_group.find('div', {\"class\":\"calendar-date-wrapper\"}).text.strip()\n",
262 |     "        match_wrappers = calendar_group.findAll('div', {\"class\":\"mt-calendar-match\"},recursive=False)\n",
263 |     "        for match_wrapper in match_wrappers:\n",
264 |     "            try:\n",
265 |     "                divs = match_wrapper.findAll('div')\n",
266 |     "                time = divs[0].text.strip()\n",
267 |     "                _as = divs[1].findAll('a')\n",
268 |     "                home_team = _as[0].text.strip()\n",
269 |     "                result = _as[1].text.replace(\"\\n\", \" \").strip()\n",
270 |     "                away_team = _as[2].text.strip()\n",
271 |     "                venue_div =  divs[1].find('div', {'class':'venue-wrapper'})\n",
272 |     "                if venue_div is None:\n",
273 |     "                    venue = ''\n",
274 |     "                else:\n",
275 |     "                    venue = venue_div.text.strip()\n",
276 |     "\n",
277 |     "                matches_seasons.append(season)\n",
278 |     "                matches_seasonRounds.append(seasonRound)\n",
279 |     "                matches_rounds.append(_round)\n",
280 |     "                matches_date.append(date)\n",
281 |     "                matches_time.append(time)\n",
282 |     "                matches_home_team.append(home_team)\n",
283 |     "                matches_result.append(result)\n",
284 |     "                matches_away_team.append(away_team)\n",
285 |     "                matches_venue.append(venue)\n",
286 |     "            except:\n",
287 |     "                print(\"Error\", query_url)\n",
288 |     "print(\"Done scraping\")"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": null,
294 |    "metadata": {
295 |     "collapsed": true
296 |    },
297 |    "outputs": [],
298 |    "source": [
299 |     "matches_df = pd.DataFrame({\n",
300 |     "    'season' : matches_seasons,\n",
301 |     "    'season_round': matches_seasonRounds,\n",
302 |     "    'round': matches_rounds,\n",
303 |     "    'date': matches_date,\n",
304 |     "    'time': matches_time,\n",
305 |     "    'home_team': matches_home_team,\n",
306 |     "    'result': matches_result,\n",
307 |     "    'away_team': matches_away_team,\n",
308 |     "    'venue': matches_venue\n",
309 |     "})\n",
310 |     "\n",
311 |     "print(matches_df.info())\n",
312 |     "matches_df.to_csv('mt-scraper/matches_raw.csv')"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {
319 |     "collapsed": true
320 |    },
321 |    "outputs": [],
322 |    "source": [
323 |     "matches_df = pd.read_csv('mt-scraper/matches_raw.csv', index_col=0)\n",
324 |     "\n",
325 |     "month_dict = {\n",
326 |     "    'enero': 1,\n",
327 |     "    'febrero': 2,\n",
328 |     "    'marzo': 3,\n",
329 |     "    'abril':4,\n",
330 |     "    'mayo': 5,\n",
331 |     "    'junio': 6,\n",
332 |     "    'julio': 7,\n",
333 |     "    'agosto': 8,\n",
334 |     "    'septiembre': 9,\n",
335 |     "    'octubre': 10,\n",
336 |     "    'noviembre': 11,\n",
337 |     "    'diciembre': 12\n",
338 |     "}\n",
339 |     "\n",
340 |     "mt_date_re = re.compile('(\\w{3})\\s([0-9]+)\\sde\\s(\\w+),\\s([0-9]{4})\\s([0-9]{2}):([0-9]{2})')\n",
341 |     "def parse_dates(text_date):\n",
342 |     "    match = mt_date_re.search(text_date)\n",
343 |     "    if match:\n",
344 |     "        day = int(match.group(2))\n",
345 |     "        month = month_dict[match.group(3)] \n",
346 |     "        year = int(match.group(4))\n",
347 |     "        hour =int(match.group(5))\n",
348 |     "        minute =int(match.group(6))\n",
349 |     "        dt_str = \"%04d-%02d-%02d %02d:%02d\" % (year,month,day,hour,minute)\n",
350 |     "        try:\n",
351 |     "            return pd.to_datetime(dt_str)\n",
352 |     "        except:\n",
353 |     "            print(dt_str)\n",
354 |     "\n",
355 |     "mt_score = re.compile('([0-9]+)\\s*-\\s*([0-9]+)')\n",
356 |     "def get_scores(raw_score):\n",
357 |     "    match = mt_score.search(raw_score)\n",
358 |     "    if match:\n",
359 |     "        return int(match.group(1)), int(match.group(2))\n",
360 |     "    return np.nan, np.nan\n",
361 |     "    \n",
362 |     "    \n",
363 |     "date_time = matches_df['date'] + \" \" + matches_df[\"time\"]\n",
364 |     "\n",
365 |     "#matches_df['match_datetime']\n",
366 |     "matches_df['match_datetime'] = date_time.apply(parse_dates)\n",
367 |     "matches_df['home_score'],  matches_df['away_score'] = zip(*matches_df['result'].apply(get_scores))\n",
368 |     "matches_df.tail()\n",
369 |     "\n",
370 |     "#del matches_df['date'], matches_df['time'], matches_df['result']\n",
371 |     "\n",
372 |     "#matches_df.info()\n",
373 |     "matches_df.to_csv('mt-scraper/matches_processed.csv')\n",
374 |     "no_date = matches_df[matches_df['match_datetime'].isnull()]\n",
375 |     "no_date.head(11)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {
382 |     "collapsed": true
383 |    },
384 |    "outputs": [],
385 |    "source": [
386 |     "matches_processed_df = pd.read_csv('mt-scraper/matches_processed.csv', index_col=0, parse_dates=['match_datetime'])\n",
387 |     "matches_processed_df.info()"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "metadata": {
394 |     "collapsed": true
395 |    },
396 |    "outputs": [],
397 |    "source": [
398 |     "no_date = matches_processed_df[matches_processed_df['match_datetime'].isnull()]\n",
399 |     "no_date.head(11)"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {
406 |     "collapsed": true
407 |    },
408 |    "outputs": [],
409 |    "source": [
410 |     "# Get details about each match (insane):\n",
411 |     "_ = '''\n",
412 |     "url = \"http://www.mediotiempo.com/partido/futbol/ligamx/%s/%s/ficha\"\n",
413 |     "url_alt = \"http://www.mediotiempo.com/partido/futbol/liga-mx/%s/%s/ficha\"\n",
414 |     "a = matches_processed_df[['home_team','away_team','match_datetime']].values\n",
415 |     "for r in a[4300:4305]:\n",
416 |     "    s = r[0] + \" vs \" + r[1]\n",
417 |     "    _url = url % (slugify.slugify(s), pd.to_datetime(r[2]).strftime(\"%Y/%m/%d\"))\n",
418 |     "    rq = requests.get(_url)\n",
419 |     "    if rq.status_code != 200:\n",
420 |     "        _url = url_alt % (slugify.slugify(s), pd.to_datetime(r[2]).strftime(\"%Y/%m/%d\"))\n",
421 |     "        rq = requests.get(_url)\n",
422 |     "        if rq.status_code != 200:\n",
423 |     "            rq = None\n",
424 |     "            _url = None\n",
425 |     "    \n",
426 |     "    if rq is not None:\n",
427 |     "        print(_url)\n",
428 |     "'''"
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": null,
434 |    "metadata": {
435 |     "collapsed": true
436 |    },
437 |    "outputs": [],
438 |    "source": [
439 |     "all_tournaments = tournament_df.index.get_level_values(0).unique().values"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": null,
445 |    "metadata": {
446 |     "collapsed": true
447 |    },
448 |    "outputs": [],
449 |    "source": [
450 |     "# Ofensiva\n",
451 |     "of_url = \"http://www.mediotiempo.com/liga/futbol/ligamx/estadisticas/equipos/%s/ofensiva?tabla=mas-goleadores\"\n",
452 |     "ofensiva_tables = {}\n",
453 |     "for season in all_tournaments:\n",
454 |     "    url = of_url % season\n",
455 |     "    r = requests.get(url)\n",
456 |     "    scrape_soup = BeautifulSoup(r.text, \"lxml\")\n",
457 |     "    table = scrape_soup.find('div', {'class': 'table-containers'})\n",
458 |     "    if table is not None:\n",
459 |     "        table_body = table.find('div', {'class': 'scroll'}).find('tbody', {'class':'mt-table-body'})\n",
460 |     "        rows = table_body.findAll('tr')\n",
461 |     "        if len(rows) == 0:\n",
462 |     "            continue\n",
463 |     "        \n",
464 |     "        gf = []\n",
465 |     "        equipo = []\n",
466 |     "        tt = []\n",
467 |     "        tg = []\n",
468 |     "        prec = []\n",
469 |     "        g_c = []\n",
470 |     "        ll = []\n",
471 |     "        lla = []\n",
472 |     "        fdl = []\n",
473 |     "        \n",
474 |     "        for row in rows:\n",
475 |     "            all_tds = row.findAll('td')\n",
476 |     "            equipo.append(all_tds[0].text.strip())\n",
477 |     "            if len(all_tds) == 2:\n",
478 |     "                gf.append(int(all_tds[1].text.strip()))\n",
479 |     "                tt.append(np.nan)\n",
480 |     "                tg.append(np.nan)\n",
481 |     "                prec.append(np.nan)\n",
482 |     "                g_c.append(np.nan)\n",
483 |     "                ll.append(np.nan)\n",
484 |     "                lla.append(np.nan)\n",
485 |     "                fdl.append(np.nan)\n",
486 |     "            else:\n",
487 |     "                gf.append(int(all_tds[3].text.strip()))\n",
488 |     "                tt.append(int(all_tds[1].text.strip()))\n",
489 |     "                tg.append(int(all_tds[2].text.strip()))\n",
490 |     "                prec.append(all_tds[4].text.strip())\n",
491 |     "                g_c.append(float(all_tds[5].text.strip()))\n",
492 |     "                ll.append(int(all_tds[6].text.strip()))\n",
493 |     "                lla.append(int(all_tds[7].text.strip()))\n",
494 |     "                fdl.append(int(all_tds[8].text.strip()))\n",
495 |     "        ofensiva_tables[season] = pd.DataFrame({'Equipo': equipo, \n",
496 |     "                                                'GF': gf,\n",
497 |     "                                                'TT': tt,\n",
498 |     "                                                'TG': tg,\n",
499 |     "                                                'PREC': prec,\n",
500 |     "                                                'G_C': g_c,\n",
501 |     "                                                'LL': ll,\n",
502 |     "                                                'LLA': lla,\n",
503 |     "                                                'FDL': fdl})"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "code",
508 |    "execution_count": null,
509 |    "metadata": {
510 |     "collapsed": true
511 |    },
512 |    "outputs": [],
513 |    "source": [
514 |     "# Defensiva\n",
515 |     "of_url = \"http://www.mediotiempo.com/liga/futbol/ligamx/estadisticas/equipos/%s/defensiva?tabla=menos-goleados\"\n",
516 |     "defensiva_tables = {}\n",
517 |     "for season in all_tournaments:\n",
518 |     "    url = of_url % season\n",
519 |     "    r = requests.get(url)\n",
520 |     "    scrape_soup = BeautifulSoup(r.text, \"lxml\")\n",
521 |     "    table = scrape_soup.find('div', {'class': 'table-containers'})\n",
522 |     "    if table is not None:\n",
523 |     "        table_body = table.find('div', {'class': 'scroll'}).find('tbody', {'class':'mt-table-body'})\n",
524 |     "        rows = table_body.findAll('tr')\n",
525 |     "        if len(rows) == 0:\n",
526 |     "            continue\n",
527 |     "        \n",
528 |     "        gc = []\n",
529 |     "        equipo = []\n",
530 |     "        ttp = []\n",
531 |     "        tgp = []\n",
532 |     "        blq = []\n",
533 |     "        tblq = []\n",
534 |     "        cblq = []\n",
535 |     "        pblq = []\n",
536 |     "        _int = []\n",
537 |     "        \n",
538 |     "        for row in rows:\n",
539 |     "            all_tds = row.findAll('td')\n",
540 |     "            equipo.append(all_tds[0].text.strip())\n",
541 |     "            if len(all_tds) == 2:\n",
542 |     "                gc.append(int(all_tds[1].text.strip()))\n",
543 |     "                ttp.append(np.nan)\n",
544 |     "                tgp.append(np.nan)\n",
545 |     "                blq.append(np.nan)\n",
546 |     "                tblq.append(np.nan)\n",
547 |     "                cblq.append(np.nan)\n",
548 |     "                pblq.append(np.nan)\n",
549 |     "                _int.append(np.nan)\n",
550 |     "            else:\n",
551 |     "                gc.append(int(all_tds[3].text.strip()))\n",
552 |     "                ttp.append(int(all_tds[1].text.strip()))\n",
553 |     "                tgp.append(int(all_tds[2].text.strip()))\n",
554 |     "                blq.append(int(all_tds[4].text.strip()))\n",
555 |     "                tblq.append(int(all_tds[5].text.strip()))\n",
556 |     "                cblq.append(int(all_tds[6].text.strip()))\n",
557 |     "                pblq.append(int(all_tds[7].text.strip()))\n",
558 |     "                _int.append(int(all_tds[8].text.strip()))\n",
559 |     "        defensiva_tables[season] = pd.DataFrame({'Equipo': equipo, \n",
560 |     "                                                 'TTP': ttp,\n",
561 |     "                                                 'TGP': tgp,\n",
562 |     "                                                 'GC': gc,\n",
563 |     "                                                 'BLQ': blq,\n",
564 |     "                                                 'TBLQ': tblq,\n",
565 |     "                                                 'CBLQ': cblq,\n",
566 |     "                                                 'PBLQ': pblq,\n",
567 |     "                                                 'INT': _int})"
568 |    ]
569 |   },
570 |   {
571 |    "cell_type": "code",
572 |    "execution_count": null,
573 |    "metadata": {
574 |     "collapsed": true
575 |    },
576 |    "outputs": [],
577 |    "source": [
578 |     "for season in all_tournaments:\n",
579 |     "    if season in defensiva_tables and season in ofensiva_tables:\n",
580 |     "        defensiva_tables[season].to_csv('mt-scraper/defensiva/' + season +'.csv')\n",
581 |     "        ofensiva_tables[season].to_csv('mt-scraper/ofensiva/' + season +'.csv')"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "code",
586 |    "execution_count": null,
587 |    "metadata": {
588 |     "collapsed": true
589 |    },
590 |    "outputs": [],
591 |    "source": []
592 |   }
593 |  ],
594 |  "metadata": {
595 |   "kernelspec": {
596 |    "display_name": "Python 3",
597 |    "language": "python",
598 |    "name": "python3"
599 |   },
600 |   "language_info": {
601 |    "codemirror_mode": {
602 |     "name": "ipython",
603 |     "version": 3
604 |    },
605 |    "file_extension": ".py",
606 |    "mimetype": "text/x-python",
607 |    "name": "python",
608 |    "nbconvert_exporter": "python",
609 |    "pygments_lexer": "ipython3",
610 |    "version": "3.6.1"
611 |   }
612 |  },
613 |  "nbformat": 4,
614 |  "nbformat_minor": 2
615 | }
616 | 


--------------------------------------------------------------------------------
/Scalers.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 31,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler, RobustScaler\n",
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 32,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "data": {
 23 |       "text/html": [
 24 |        "<div>\n",
 25 |        "<style>\n",
 26 |        "    .dataframe thead tr:only-child th {\n",
 27 |        "        text-align: right;\n",
 28 |        "    }\n",
 29 |        "\n",
 30 |        "    .dataframe thead th {\n",
 31 |        "        text-align: left;\n",
 32 |        "    }\n",
 33 |        "\n",
 34 |        "    .dataframe tbody tr th {\n",
 35 |        "        vertical-align: top;\n",
 36 |        "    }\n",
 37 |        "</style>\n",
 38 |        "<table border=\"1\" class=\"dataframe\">\n",
 39 |        "  <thead>\n",
 40 |        "    <tr style=\"text-align: right;\">\n",
 41 |        "      <th></th>\n",
 42 |        "      <th>hundreds</th>\n",
 43 |        "      <th>neg_thousands</th>\n",
 44 |        "      <th>outliers</th>\n",
 45 |        "      <th>tens</th>\n",
 46 |        "      <th>thousands</th>\n",
 47 |        "    </tr>\n",
 48 |        "  </thead>\n",
 49 |        "  <tbody>\n",
 50 |        "    <tr>\n",
 51 |        "      <th>0</th>\n",
 52 |        "      <td>1.0</td>\n",
 53 |        "      <td>-1000.000000</td>\n",
 54 |        "      <td>-3.212321e+06</td>\n",
 55 |        "      <td>1.0</td>\n",
 56 |        "      <td>1.0</td>\n",
 57 |        "    </tr>\n",
 58 |        "    <tr>\n",
 59 |        "      <th>1</th>\n",
 60 |        "      <td>12.0</td>\n",
 61 |        "      <td>-777.777778</td>\n",
 62 |        "      <td>-7.777778e+02</td>\n",
 63 |        "      <td>2.0</td>\n",
 64 |        "      <td>112.0</td>\n",
 65 |        "    </tr>\n",
 66 |        "    <tr>\n",
 67 |        "      <th>2</th>\n",
 68 |        "      <td>23.0</td>\n",
 69 |        "      <td>-555.555556</td>\n",
 70 |        "      <td>-5.555556e+02</td>\n",
 71 |        "      <td>3.0</td>\n",
 72 |        "      <td>223.0</td>\n",
 73 |        "    </tr>\n",
 74 |        "    <tr>\n",
 75 |        "      <th>3</th>\n",
 76 |        "      <td>34.0</td>\n",
 77 |        "      <td>-333.333333</td>\n",
 78 |        "      <td>-3.333333e+02</td>\n",
 79 |        "      <td>4.0</td>\n",
 80 |        "      <td>334.0</td>\n",
 81 |        "    </tr>\n",
 82 |        "    <tr>\n",
 83 |        "      <th>4</th>\n",
 84 |        "      <td>45.0</td>\n",
 85 |        "      <td>-111.111111</td>\n",
 86 |        "      <td>-1.111111e+02</td>\n",
 87 |        "      <td>5.0</td>\n",
 88 |        "      <td>445.0</td>\n",
 89 |        "    </tr>\n",
 90 |        "    <tr>\n",
 91 |        "      <th>5</th>\n",
 92 |        "      <td>56.0</td>\n",
 93 |        "      <td>111.111111</td>\n",
 94 |        "      <td>1.111111e+02</td>\n",
 95 |        "      <td>6.0</td>\n",
 96 |        "      <td>556.0</td>\n",
 97 |        "    </tr>\n",
 98 |        "    <tr>\n",
 99 |        "      <th>6</th>\n",
100 |        "      <td>67.0</td>\n",
101 |        "      <td>333.333333</td>\n",
102 |        "      <td>3.333333e+02</td>\n",
103 |        "      <td>7.0</td>\n",
104 |        "      <td>667.0</td>\n",
105 |        "    </tr>\n",
106 |        "    <tr>\n",
107 |        "      <th>7</th>\n",
108 |        "      <td>78.0</td>\n",
109 |        "      <td>555.555556</td>\n",
110 |        "      <td>5.555556e+02</td>\n",
111 |        "      <td>8.0</td>\n",
112 |        "      <td>778.0</td>\n",
113 |        "    </tr>\n",
114 |        "    <tr>\n",
115 |        "      <th>8</th>\n",
116 |        "      <td>89.0</td>\n",
117 |        "      <td>777.777778</td>\n",
118 |        "      <td>7.777778e+02</td>\n",
119 |        "      <td>9.0</td>\n",
120 |        "      <td>889.0</td>\n",
121 |        "    </tr>\n",
122 |        "    <tr>\n",
123 |        "      <th>9</th>\n",
124 |        "      <td>100.0</td>\n",
125 |        "      <td>1000.000000</td>\n",
126 |        "      <td>3.212321e+06</td>\n",
127 |        "      <td>10.0</td>\n",
128 |        "      <td>1000.0</td>\n",
129 |        "    </tr>\n",
130 |        "  </tbody>\n",
131 |        "</table>\n",
132 |        "</div>"
133 |       ],
134 |       "text/plain": [
135 |        "   hundreds  neg_thousands      outliers  tens  thousands\n",
136 |        "0       1.0   -1000.000000 -3.212321e+06   1.0        1.0\n",
137 |        "1      12.0    -777.777778 -7.777778e+02   2.0      112.0\n",
138 |        "2      23.0    -555.555556 -5.555556e+02   3.0      223.0\n",
139 |        "3      34.0    -333.333333 -3.333333e+02   4.0      334.0\n",
140 |        "4      45.0    -111.111111 -1.111111e+02   5.0      445.0\n",
141 |        "5      56.0     111.111111  1.111111e+02   6.0      556.0\n",
142 |        "6      67.0     333.333333  3.333333e+02   7.0      667.0\n",
143 |        "7      78.0     555.555556  5.555556e+02   8.0      778.0\n",
144 |        "8      89.0     777.777778  7.777778e+02   9.0      889.0\n",
145 |        "9     100.0    1000.000000  3.212321e+06  10.0     1000.0"
146 |       ]
147 |      },
148 |      "execution_count": 32,
149 |      "metadata": {},
150 |      "output_type": "execute_result"
151 |     }
152 |    ],
153 |    "source": [
154 |     "tens = np.linspace(1, 10, 10)\n",
155 |     "hundreds = np.linspace(1, 100, 10)\n",
156 |     "thousands = np.linspace(1, 1000, 10)\n",
157 |     "neg_thousands = np.linspace(-1000, 1000, 10)\n",
158 |     "outliers = neg_thousands.copy()\n",
159 |     "outliers[0] = -3212321\n",
160 |     "outliers[9] = 3212321\n",
161 |     "\n",
162 |     "data = pd.DataFrame({\n",
163 |     "    'tens': tens,\n",
164 |     "    'hundreds': hundreds,\n",
165 |     "    'thousands': thousands,\n",
166 |     "    'neg_thousands': neg_thousands,\n",
167 |     "    'outliers': outliers\n",
168 |     "})\n",
169 |     "columns = data.columns\n",
170 |     "data.head(10)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 33,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "data": {
180 |       "text/html": [
181 |        "<div>\n",
182 |        "<style>\n",
183 |        "    .dataframe thead tr:only-child th {\n",
184 |        "        text-align: right;\n",
185 |        "    }\n",
186 |        "\n",
187 |        "    .dataframe thead th {\n",
188 |        "        text-align: left;\n",
189 |        "    }\n",
190 |        "\n",
191 |        "    .dataframe tbody tr th {\n",
192 |        "        vertical-align: top;\n",
193 |        "    }\n",
194 |        "</style>\n",
195 |        "<table border=\"1\" class=\"dataframe\">\n",
196 |        "  <thead>\n",
197 |        "    <tr style=\"text-align: right;\">\n",
198 |        "      <th></th>\n",
199 |        "      <th>hundreds</th>\n",
200 |        "      <th>neg_thousands</th>\n",
201 |        "      <th>outliers</th>\n",
202 |        "      <th>tens</th>\n",
203 |        "      <th>thousands</th>\n",
204 |        "    </tr>\n",
205 |        "  </thead>\n",
206 |        "  <tbody>\n",
207 |        "    <tr>\n",
208 |        "      <th>0</th>\n",
209 |        "      <td>0.000000</td>\n",
210 |        "      <td>0.000000</td>\n",
211 |        "      <td>0.000000</td>\n",
212 |        "      <td>0.000000</td>\n",
213 |        "      <td>0.000000</td>\n",
214 |        "    </tr>\n",
215 |        "    <tr>\n",
216 |        "      <th>1</th>\n",
217 |        "      <td>0.111111</td>\n",
218 |        "      <td>0.111111</td>\n",
219 |        "      <td>0.499879</td>\n",
220 |        "      <td>0.111111</td>\n",
221 |        "      <td>0.111111</td>\n",
222 |        "    </tr>\n",
223 |        "    <tr>\n",
224 |        "      <th>2</th>\n",
225 |        "      <td>0.222222</td>\n",
226 |        "      <td>0.222222</td>\n",
227 |        "      <td>0.499914</td>\n",
228 |        "      <td>0.222222</td>\n",
229 |        "      <td>0.222222</td>\n",
230 |        "    </tr>\n",
231 |        "    <tr>\n",
232 |        "      <th>3</th>\n",
233 |        "      <td>0.333333</td>\n",
234 |        "      <td>0.333333</td>\n",
235 |        "      <td>0.499948</td>\n",
236 |        "      <td>0.333333</td>\n",
237 |        "      <td>0.333333</td>\n",
238 |        "    </tr>\n",
239 |        "    <tr>\n",
240 |        "      <th>4</th>\n",
241 |        "      <td>0.444444</td>\n",
242 |        "      <td>0.444444</td>\n",
243 |        "      <td>0.499983</td>\n",
244 |        "      <td>0.444444</td>\n",
245 |        "      <td>0.444444</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <th>5</th>\n",
249 |        "      <td>0.555556</td>\n",
250 |        "      <td>0.555556</td>\n",
251 |        "      <td>0.500017</td>\n",
252 |        "      <td>0.555556</td>\n",
253 |        "      <td>0.555556</td>\n",
254 |        "    </tr>\n",
255 |        "    <tr>\n",
256 |        "      <th>6</th>\n",
257 |        "      <td>0.666667</td>\n",
258 |        "      <td>0.666667</td>\n",
259 |        "      <td>0.500052</td>\n",
260 |        "      <td>0.666667</td>\n",
261 |        "      <td>0.666667</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>7</th>\n",
265 |        "      <td>0.777778</td>\n",
266 |        "      <td>0.777778</td>\n",
267 |        "      <td>0.500086</td>\n",
268 |        "      <td>0.777778</td>\n",
269 |        "      <td>0.777778</td>\n",
270 |        "    </tr>\n",
271 |        "    <tr>\n",
272 |        "      <th>8</th>\n",
273 |        "      <td>0.888889</td>\n",
274 |        "      <td>0.888889</td>\n",
275 |        "      <td>0.500121</td>\n",
276 |        "      <td>0.888889</td>\n",
277 |        "      <td>0.888889</td>\n",
278 |        "    </tr>\n",
279 |        "    <tr>\n",
280 |        "      <th>9</th>\n",
281 |        "      <td>1.000000</td>\n",
282 |        "      <td>1.000000</td>\n",
283 |        "      <td>1.000000</td>\n",
284 |        "      <td>1.000000</td>\n",
285 |        "      <td>1.000000</td>\n",
286 |        "    </tr>\n",
287 |        "  </tbody>\n",
288 |        "</table>\n",
289 |        "</div>"
290 |       ],
291 |       "text/plain": [
292 |        "   hundreds  neg_thousands  outliers      tens  thousands\n",
293 |        "0  0.000000       0.000000  0.000000  0.000000   0.000000\n",
294 |        "1  0.111111       0.111111  0.499879  0.111111   0.111111\n",
295 |        "2  0.222222       0.222222  0.499914  0.222222   0.222222\n",
296 |        "3  0.333333       0.333333  0.499948  0.333333   0.333333\n",
297 |        "4  0.444444       0.444444  0.499983  0.444444   0.444444\n",
298 |        "5  0.555556       0.555556  0.500017  0.555556   0.555556\n",
299 |        "6  0.666667       0.666667  0.500052  0.666667   0.666667\n",
300 |        "7  0.777778       0.777778  0.500086  0.777778   0.777778\n",
301 |        "8  0.888889       0.888889  0.500121  0.888889   0.888889\n",
302 |        "9  1.000000       1.000000  1.000000  1.000000   1.000000"
303 |       ]
304 |      },
305 |      "execution_count": 33,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "minmax = MinMaxScaler()\n",
312 |     "minmax.fit(data)\n",
313 |     "data_minmax_scaled = minmax.transform(data)\n",
314 |     "pd.DataFrame(data_minmax_scaled, columns=columns)"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 34,
320 |    "metadata": {},
321 |    "outputs": [
322 |     {
323 |      "data": {
324 |       "text/html": [
325 |        "<div>\n",
326 |        "<style>\n",
327 |        "    .dataframe thead tr:only-child th {\n",
328 |        "        text-align: right;\n",
329 |        "    }\n",
330 |        "\n",
331 |        "    .dataframe thead th {\n",
332 |        "        text-align: left;\n",
333 |        "    }\n",
334 |        "\n",
335 |        "    .dataframe tbody tr th {\n",
336 |        "        vertical-align: top;\n",
337 |        "    }\n",
338 |        "</style>\n",
339 |        "<table border=\"1\" class=\"dataframe\">\n",
340 |        "  <thead>\n",
341 |        "    <tr style=\"text-align: right;\">\n",
342 |        "      <th></th>\n",
343 |        "      <th>hundreds</th>\n",
344 |        "      <th>neg_thousands</th>\n",
345 |        "      <th>outliers</th>\n",
346 |        "      <th>tens</th>\n",
347 |        "      <th>thousands</th>\n",
348 |        "    </tr>\n",
349 |        "  </thead>\n",
350 |        "  <tbody>\n",
351 |        "    <tr>\n",
352 |        "      <th>0</th>\n",
353 |        "      <td>0.01</td>\n",
354 |        "      <td>-1.000000</td>\n",
355 |        "      <td>-1.000000</td>\n",
356 |        "      <td>0.1</td>\n",
357 |        "      <td>0.001</td>\n",
358 |        "    </tr>\n",
359 |        "    <tr>\n",
360 |        "      <th>1</th>\n",
361 |        "      <td>0.12</td>\n",
362 |        "      <td>-0.777778</td>\n",
363 |        "      <td>-0.000242</td>\n",
364 |        "      <td>0.2</td>\n",
365 |        "      <td>0.112</td>\n",
366 |        "    </tr>\n",
367 |        "    <tr>\n",
368 |        "      <th>2</th>\n",
369 |        "      <td>0.23</td>\n",
370 |        "      <td>-0.555556</td>\n",
371 |        "      <td>-0.000173</td>\n",
372 |        "      <td>0.3</td>\n",
373 |        "      <td>0.223</td>\n",
374 |        "    </tr>\n",
375 |        "    <tr>\n",
376 |        "      <th>3</th>\n",
377 |        "      <td>0.34</td>\n",
378 |        "      <td>-0.333333</td>\n",
379 |        "      <td>-0.000104</td>\n",
380 |        "      <td>0.4</td>\n",
381 |        "      <td>0.334</td>\n",
382 |        "    </tr>\n",
383 |        "    <tr>\n",
384 |        "      <th>4</th>\n",
385 |        "      <td>0.45</td>\n",
386 |        "      <td>-0.111111</td>\n",
387 |        "      <td>-0.000035</td>\n",
388 |        "      <td>0.5</td>\n",
389 |        "      <td>0.445</td>\n",
390 |        "    </tr>\n",
391 |        "    <tr>\n",
392 |        "      <th>5</th>\n",
393 |        "      <td>0.56</td>\n",
394 |        "      <td>0.111111</td>\n",
395 |        "      <td>0.000035</td>\n",
396 |        "      <td>0.6</td>\n",
397 |        "      <td>0.556</td>\n",
398 |        "    </tr>\n",
399 |        "    <tr>\n",
400 |        "      <th>6</th>\n",
401 |        "      <td>0.67</td>\n",
402 |        "      <td>0.333333</td>\n",
403 |        "      <td>0.000104</td>\n",
404 |        "      <td>0.7</td>\n",
405 |        "      <td>0.667</td>\n",
406 |        "    </tr>\n",
407 |        "    <tr>\n",
408 |        "      <th>7</th>\n",
409 |        "      <td>0.78</td>\n",
410 |        "      <td>0.555556</td>\n",
411 |        "      <td>0.000173</td>\n",
412 |        "      <td>0.8</td>\n",
413 |        "      <td>0.778</td>\n",
414 |        "    </tr>\n",
415 |        "    <tr>\n",
416 |        "      <th>8</th>\n",
417 |        "      <td>0.89</td>\n",
418 |        "      <td>0.777778</td>\n",
419 |        "      <td>0.000242</td>\n",
420 |        "      <td>0.9</td>\n",
421 |        "      <td>0.889</td>\n",
422 |        "    </tr>\n",
423 |        "    <tr>\n",
424 |        "      <th>9</th>\n",
425 |        "      <td>1.00</td>\n",
426 |        "      <td>1.000000</td>\n",
427 |        "      <td>1.000000</td>\n",
428 |        "      <td>1.0</td>\n",
429 |        "      <td>1.000</td>\n",
430 |        "    </tr>\n",
431 |        "  </tbody>\n",
432 |        "</table>\n",
433 |        "</div>"
434 |       ],
435 |       "text/plain": [
436 |        "   hundreds  neg_thousands  outliers  tens  thousands\n",
437 |        "0      0.01      -1.000000 -1.000000   0.1      0.001\n",
438 |        "1      0.12      -0.777778 -0.000242   0.2      0.112\n",
439 |        "2      0.23      -0.555556 -0.000173   0.3      0.223\n",
440 |        "3      0.34      -0.333333 -0.000104   0.4      0.334\n",
441 |        "4      0.45      -0.111111 -0.000035   0.5      0.445\n",
442 |        "5      0.56       0.111111  0.000035   0.6      0.556\n",
443 |        "6      0.67       0.333333  0.000104   0.7      0.667\n",
444 |        "7      0.78       0.555556  0.000173   0.8      0.778\n",
445 |        "8      0.89       0.777778  0.000242   0.9      0.889\n",
446 |        "9      1.00       1.000000  1.000000   1.0      1.000"
447 |       ]
448 |      },
449 |      "execution_count": 34,
450 |      "metadata": {},
451 |      "output_type": "execute_result"
452 |     }
453 |    ],
454 |    "source": [
455 |     "maxabs = MaxAbsScaler()\n",
456 |     "maxabs.fit(data)\n",
457 |     "data_maxabs_scaled = maxabs.transform(data)\n",
458 |     "pd.DataFrame(data_maxabs_scaled, columns=columns)"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 38,
464 |    "metadata": {
465 |     "scrolled": true
466 |    },
467 |    "outputs": [
468 |     {
469 |      "data": {
470 |       "text/html": [
471 |        "<div>\n",
472 |        "<style>\n",
473 |        "    .dataframe thead tr:only-child th {\n",
474 |        "        text-align: right;\n",
475 |        "    }\n",
476 |        "\n",
477 |        "    .dataframe thead th {\n",
478 |        "        text-align: left;\n",
479 |        "    }\n",
480 |        "\n",
481 |        "    .dataframe tbody tr th {\n",
482 |        "        vertical-align: top;\n",
483 |        "    }\n",
484 |        "</style>\n",
485 |        "<table border=\"1\" class=\"dataframe\">\n",
486 |        "  <thead>\n",
487 |        "    <tr style=\"text-align: right;\">\n",
488 |        "      <th></th>\n",
489 |        "      <th>hundreds</th>\n",
490 |        "      <th>neg_thousands</th>\n",
491 |        "      <th>outliers</th>\n",
492 |        "      <th>tens</th>\n",
493 |        "      <th>thousands</th>\n",
494 |        "    </tr>\n",
495 |        "  </thead>\n",
496 |        "  <tbody>\n",
497 |        "    <tr>\n",
498 |        "      <th>0</th>\n",
499 |        "      <td>-1.000000</td>\n",
500 |        "      <td>-1.000000</td>\n",
501 |        "      <td>-3212.321000</td>\n",
502 |        "      <td>-1.000000</td>\n",
503 |        "      <td>-1.000000</td>\n",
504 |        "    </tr>\n",
505 |        "    <tr>\n",
506 |        "      <th>1</th>\n",
507 |        "      <td>-0.777778</td>\n",
508 |        "      <td>-0.777778</td>\n",
509 |        "      <td>-0.777778</td>\n",
510 |        "      <td>-0.777778</td>\n",
511 |        "      <td>-0.777778</td>\n",
512 |        "    </tr>\n",
513 |        "    <tr>\n",
514 |        "      <th>2</th>\n",
515 |        "      <td>-0.555556</td>\n",
516 |        "      <td>-0.555556</td>\n",
517 |        "      <td>-0.555556</td>\n",
518 |        "      <td>-0.555556</td>\n",
519 |        "      <td>-0.555556</td>\n",
520 |        "    </tr>\n",
521 |        "    <tr>\n",
522 |        "      <th>3</th>\n",
523 |        "      <td>-0.333333</td>\n",
524 |        "      <td>-0.333333</td>\n",
525 |        "      <td>-0.333333</td>\n",
526 |        "      <td>-0.333333</td>\n",
527 |        "      <td>-0.333333</td>\n",
528 |        "    </tr>\n",
529 |        "    <tr>\n",
530 |        "      <th>4</th>\n",
531 |        "      <td>-0.111111</td>\n",
532 |        "      <td>-0.111111</td>\n",
533 |        "      <td>-0.111111</td>\n",
534 |        "      <td>-0.111111</td>\n",
535 |        "      <td>-0.111111</td>\n",
536 |        "    </tr>\n",
537 |        "    <tr>\n",
538 |        "      <th>5</th>\n",
539 |        "      <td>0.111111</td>\n",
540 |        "      <td>0.111111</td>\n",
541 |        "      <td>0.111111</td>\n",
542 |        "      <td>0.111111</td>\n",
543 |        "      <td>0.111111</td>\n",
544 |        "    </tr>\n",
545 |        "    <tr>\n",
546 |        "      <th>6</th>\n",
547 |        "      <td>0.333333</td>\n",
548 |        "      <td>0.333333</td>\n",
549 |        "      <td>0.333333</td>\n",
550 |        "      <td>0.333333</td>\n",
551 |        "      <td>0.333333</td>\n",
552 |        "    </tr>\n",
553 |        "    <tr>\n",
554 |        "      <th>7</th>\n",
555 |        "      <td>0.555556</td>\n",
556 |        "      <td>0.555556</td>\n",
557 |        "      <td>0.555556</td>\n",
558 |        "      <td>0.555556</td>\n",
559 |        "      <td>0.555556</td>\n",
560 |        "    </tr>\n",
561 |        "    <tr>\n",
562 |        "      <th>8</th>\n",
563 |        "      <td>0.777778</td>\n",
564 |        "      <td>0.777778</td>\n",
565 |        "      <td>0.777778</td>\n",
566 |        "      <td>0.777778</td>\n",
567 |        "      <td>0.777778</td>\n",
568 |        "    </tr>\n",
569 |        "    <tr>\n",
570 |        "      <th>9</th>\n",
571 |        "      <td>1.000000</td>\n",
572 |        "      <td>1.000000</td>\n",
573 |        "      <td>3212.321000</td>\n",
574 |        "      <td>1.000000</td>\n",
575 |        "      <td>1.000000</td>\n",
576 |        "    </tr>\n",
577 |        "  </tbody>\n",
578 |        "</table>\n",
579 |        "</div>"
580 |       ],
581 |       "text/plain": [
582 |        "   hundreds  neg_thousands     outliers      tens  thousands\n",
583 |        "0 -1.000000      -1.000000 -3212.321000 -1.000000  -1.000000\n",
584 |        "1 -0.777778      -0.777778    -0.777778 -0.777778  -0.777778\n",
585 |        "2 -0.555556      -0.555556    -0.555556 -0.555556  -0.555556\n",
586 |        "3 -0.333333      -0.333333    -0.333333 -0.333333  -0.333333\n",
587 |        "4 -0.111111      -0.111111    -0.111111 -0.111111  -0.111111\n",
588 |        "5  0.111111       0.111111     0.111111  0.111111   0.111111\n",
589 |        "6  0.333333       0.333333     0.333333  0.333333   0.333333\n",
590 |        "7  0.555556       0.555556     0.555556  0.555556   0.555556\n",
591 |        "8  0.777778       0.777778     0.777778  0.777778   0.777778\n",
592 |        "9  1.000000       1.000000  3212.321000  1.000000   1.000000"
593 |       ]
594 |      },
595 |      "execution_count": 38,
596 |      "metadata": {},
597 |      "output_type": "execute_result"
598 |     }
599 |    ],
600 |    "source": [
601 |     "robust = RobustScaler()\n",
602 |     "robust.fit(data)\n",
603 |     "data_robust_scaled = robust.transform(data)\n",
604 |     "pd.DataFrame(data_robust_scaled, columns=columns)"
605 |    ]
606 |   },
607 |   {
608 |    "cell_type": "code",
609 |    "execution_count": 39,
610 |    "metadata": {},
611 |    "outputs": [
612 |     {
613 |      "data": {
614 |       "text/html": [
615 |        "<div>\n",
616 |        "<style>\n",
617 |        "    .dataframe thead tr:only-child th {\n",
618 |        "        text-align: right;\n",
619 |        "    }\n",
620 |        "\n",
621 |        "    .dataframe thead th {\n",
622 |        "        text-align: left;\n",
623 |        "    }\n",
624 |        "\n",
625 |        "    .dataframe tbody tr th {\n",
626 |        "        vertical-align: top;\n",
627 |        "    }\n",
628 |        "</style>\n",
629 |        "<table border=\"1\" class=\"dataframe\">\n",
630 |        "  <thead>\n",
631 |        "    <tr style=\"text-align: right;\">\n",
632 |        "      <th></th>\n",
633 |        "      <th>hundreds</th>\n",
634 |        "      <th>neg_thousands</th>\n",
635 |        "      <th>outliers</th>\n",
636 |        "      <th>tens</th>\n",
637 |        "      <th>thousands</th>\n",
638 |        "    </tr>\n",
639 |        "  </thead>\n",
640 |        "  <tbody>\n",
641 |        "    <tr>\n",
642 |        "      <th>0</th>\n",
643 |        "      <td>-1.566699</td>\n",
644 |        "      <td>-1.566699</td>\n",
645 |        "      <td>-2.236068</td>\n",
646 |        "      <td>-1.566699</td>\n",
647 |        "      <td>-1.566699</td>\n",
648 |        "    </tr>\n",
649 |        "    <tr>\n",
650 |        "      <th>1</th>\n",
651 |        "      <td>-1.218544</td>\n",
652 |        "      <td>-1.218544</td>\n",
653 |        "      <td>-0.000541</td>\n",
654 |        "      <td>-1.218544</td>\n",
655 |        "      <td>-1.218544</td>\n",
656 |        "    </tr>\n",
657 |        "    <tr>\n",
658 |        "      <th>2</th>\n",
659 |        "      <td>-0.870388</td>\n",
660 |        "      <td>-0.870388</td>\n",
661 |        "      <td>-0.000387</td>\n",
662 |        "      <td>-0.870388</td>\n",
663 |        "      <td>-0.870388</td>\n",
664 |        "    </tr>\n",
665 |        "    <tr>\n",
666 |        "      <th>3</th>\n",
667 |        "      <td>-0.522233</td>\n",
668 |        "      <td>-0.522233</td>\n",
669 |        "      <td>-0.000232</td>\n",
670 |        "      <td>-0.522233</td>\n",
671 |        "      <td>-0.522233</td>\n",
672 |        "    </tr>\n",
673 |        "    <tr>\n",
674 |        "      <th>4</th>\n",
675 |        "      <td>-0.174078</td>\n",
676 |        "      <td>-0.174078</td>\n",
677 |        "      <td>-0.000077</td>\n",
678 |        "      <td>-0.174078</td>\n",
679 |        "      <td>-0.174078</td>\n",
680 |        "    </tr>\n",
681 |        "    <tr>\n",
682 |        "      <th>5</th>\n",
683 |        "      <td>0.174078</td>\n",
684 |        "      <td>0.174078</td>\n",
685 |        "      <td>0.000077</td>\n",
686 |        "      <td>0.174078</td>\n",
687 |        "      <td>0.174078</td>\n",
688 |        "    </tr>\n",
689 |        "    <tr>\n",
690 |        "      <th>6</th>\n",
691 |        "      <td>0.522233</td>\n",
692 |        "      <td>0.522233</td>\n",
693 |        "      <td>0.000232</td>\n",
694 |        "      <td>0.522233</td>\n",
695 |        "      <td>0.522233</td>\n",
696 |        "    </tr>\n",
697 |        "    <tr>\n",
698 |        "      <th>7</th>\n",
699 |        "      <td>0.870388</td>\n",
700 |        "      <td>0.870388</td>\n",
701 |        "      <td>0.000387</td>\n",
702 |        "      <td>0.870388</td>\n",
703 |        "      <td>0.870388</td>\n",
704 |        "    </tr>\n",
705 |        "    <tr>\n",
706 |        "      <th>8</th>\n",
707 |        "      <td>1.218544</td>\n",
708 |        "      <td>1.218544</td>\n",
709 |        "      <td>0.000541</td>\n",
710 |        "      <td>1.218544</td>\n",
711 |        "      <td>1.218544</td>\n",
712 |        "    </tr>\n",
713 |        "    <tr>\n",
714 |        "      <th>9</th>\n",
715 |        "      <td>1.566699</td>\n",
716 |        "      <td>1.566699</td>\n",
717 |        "      <td>2.236068</td>\n",
718 |        "      <td>1.566699</td>\n",
719 |        "      <td>1.566699</td>\n",
720 |        "    </tr>\n",
721 |        "  </tbody>\n",
722 |        "</table>\n",
723 |        "</div>"
724 |       ],
725 |       "text/plain": [
726 |        "   hundreds  neg_thousands  outliers      tens  thousands\n",
727 |        "0 -1.566699      -1.566699 -2.236068 -1.566699  -1.566699\n",
728 |        "1 -1.218544      -1.218544 -0.000541 -1.218544  -1.218544\n",
729 |        "2 -0.870388      -0.870388 -0.000387 -0.870388  -0.870388\n",
730 |        "3 -0.522233      -0.522233 -0.000232 -0.522233  -0.522233\n",
731 |        "4 -0.174078      -0.174078 -0.000077 -0.174078  -0.174078\n",
732 |        "5  0.174078       0.174078  0.000077  0.174078   0.174078\n",
733 |        "6  0.522233       0.522233  0.000232  0.522233   0.522233\n",
734 |        "7  0.870388       0.870388  0.000387  0.870388   0.870388\n",
735 |        "8  1.218544       1.218544  0.000541  1.218544   1.218544\n",
736 |        "9  1.566699       1.566699  2.236068  1.566699   1.566699"
737 |       ]
738 |      },
739 |      "execution_count": 39,
740 |      "metadata": {},
741 |      "output_type": "execute_result"
742 |     }
743 |    ],
744 |    "source": [
745 |     "standard = StandardScaler()\n",
746 |     "standard.fit(data)\n",
747 |     "data_standard_scaled = standard.transform(data)\n",
748 |     "pd.DataFrame(data_standard_scaled, columns=columns)"
749 |    ]
750 |   },
751 |   {
752 |    "cell_type": "code",
753 |    "execution_count": null,
754 |    "metadata": {
755 |     "collapsed": true
756 |    },
757 |    "outputs": [],
758 |    "source": []
759 |   }
760 |  ],
761 |  "metadata": {
762 |   "kernelspec": {
763 |    "display_name": "Python 3",
764 |    "language": "python",
765 |    "name": "python3"
766 |   },
767 |   "language_info": {
768 |    "codemirror_mode": {
769 |     "name": "ipython",
770 |     "version": 3
771 |    },
772 |    "file_extension": ".py",
773 |    "mimetype": "text/x-python",
774 |    "name": "python",
775 |    "nbconvert_exporter": "python",
776 |    "pygments_lexer": "ipython3",
777 |    "version": "3.6.1"
778 |   }
779 |  },
780 |  "nbformat": 4,
781 |  "nbformat_minor": 2
782 | }
783 | 


--------------------------------------------------------------------------------
/bokeh.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from bokeh.plotting import figure, output_file, show\n",
 10 |     "from bokeh.io import output_notebook"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 4,
 16 |    "metadata": {
 17 |     "collapsed": true
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "x = [1,3,4,7]\n",
 22 |     "y = [6,4,6,1]"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 8,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "plot = figure(plot_width=400, plot_height=400, tools='pan,box_zoom')"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 9,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "data": {
 43 |       "text/html": [
 44 |        "<div style=\"display: table;\"><div style=\"display: table-row;\"><div style=\"display: table-cell;\"><b title=\"bokeh.models.renderers.GlyphRenderer\">GlyphRenderer</b>(</div><div style=\"display: table-cell;\">id&nbsp;=&nbsp;'f8a3c423-e17d-4b1d-8f81-1d3840e4a9d0', <span id=\"df32f8a0-35c6-4be2-ad0c-27e9ef639699\" style=\"cursor: pointer;\">&hellip;)</span></div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">data_source&nbsp;=&nbsp;ColumnDataSource(id='06745886-9df8-4077-abc5-2578eb728cf0', ...),</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">glyph&nbsp;=&nbsp;Circle(id='1f83b727-488a-4abc-b9ca-19a01e960f6a', ...),</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">hover_glyph&nbsp;=&nbsp;None,</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">js_event_callbacks&nbsp;=&nbsp;{},</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">js_property_callbacks&nbsp;=&nbsp;{},</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">level&nbsp;=&nbsp;'glyph',</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">muted&nbsp;=&nbsp;False,</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">muted_glyph&nbsp;=&nbsp;None,</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">name&nbsp;=&nbsp;None,</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">nonselection_glyph&nbsp;=&nbsp;Circle(id='3ceb6d9a-e1aa-458f-91ff-b51dfee9a77c', ...),</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">selection_glyph&nbsp;=&nbsp;None,</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">subscribed_events&nbsp;=&nbsp;[],</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">tags&nbsp;=&nbsp;[],</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">view&nbsp;=&nbsp;CDSView(id='df97428e-f895-490d-9594-340e90e9de57', ...),</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">visible&nbsp;=&nbsp;True,</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">x_range_name&nbsp;=&nbsp;'default',</div></div><div class=\"c672e27a-c899-40d9-a475-cace1227ced7\" style=\"display: none;\"><div style=\"display: table-cell;\"></div><div style=\"display: table-cell;\">y_range_name&nbsp;=&nbsp;'default')</div></div></div>\n",
 45 |        "<script>\n",
 46 |        "(function() {\n",
 47 |        "  var expanded = false;\n",
 48 |        "  var ellipsis = document.getElementById(\"df32f8a0-35c6-4be2-ad0c-27e9ef639699\");\n",
 49 |        "  ellipsis.addEventListener(\"click\", function() {\n",
 50 |        "    var rows = document.getElementsByClassName(\"c672e27a-c899-40d9-a475-cace1227ced7\");\n",
 51 |        "    for (var i = 0; i < rows.length; i++) {\n",
 52 |        "      var el = rows[i];\n",
 53 |        "      el.style.display = expanded ? \"none\" : \"table-row\";\n",
 54 |        "    }\n",
 55 |        "    ellipsis.innerHTML = expanded ? \"&hellip;)\" : \"&lsaquo;&lsaquo;&lsaquo;\";\n",
 56 |        "    expanded = !expanded;\n",
 57 |        "  });\n",
 58 |        "})();\n",
 59 |        "</script>\n"
 60 |       ],
 61 |       "text/plain": [
 62 |        "GlyphRenderer(id='f8a3c423-e17d-4b1d-8f81-1d3840e4a9d0', ...)"
 63 |       ]
 64 |      },
 65 |      "execution_count": 9,
 66 |      "metadata": {},
 67 |      "output_type": "execute_result"
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "plot.circle(x, y)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 11,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/html": [
 82 |        "\n",
 83 |        "    <div class=\"bk-root\">\n",
 84 |        "        <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
 85 |        "        <span id=\"b0b348ba-b56f-4ff7-b7e4-9ce1f4aabd8e\">Loading BokehJS ...</span>\n",
 86 |        "    </div>"
 87 |       ]
 88 |      },
 89 |      "metadata": {},
 90 |      "output_type": "display_data"
 91 |     },
 92 |     {
 93 |      "data": {
 94 |       "application/javascript": [
 95 |        "\n",
 96 |        "(function(root) {\n",
 97 |        "  function now() {\n",
 98 |        "    return new Date();\n",
 99 |        "  }\n",
100 |        "\n",
101 |        "  var force = true;\n",
102 |        "\n",
103 |        "  if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n",
104 |        "    root._bokeh_onload_callbacks = [];\n",
105 |        "    root._bokeh_is_loading = undefined;\n",
106 |        "  }\n",
107 |        "\n",
108 |        "\n",
109 |        "  \n",
110 |        "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
111 |        "    root._bokeh_timeout = Date.now() + 5000;\n",
112 |        "    root._bokeh_failed_load = false;\n",
113 |        "  }\n",
114 |        "\n",
115 |        "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
116 |        "     \"<div style='background-color: #fdd'>\\n\"+\n",
117 |        "     \"<p>\\n\"+\n",
118 |        "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
119 |        "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
120 |        "     \"</p>\\n\"+\n",
121 |        "     \"<ul>\\n\"+\n",
122 |        "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
123 |        "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
124 |        "     \"</ul>\\n\"+\n",
125 |        "     \"<code>\\n\"+\n",
126 |        "     \"from bokeh.resources import INLINE\\n\"+\n",
127 |        "     \"output_notebook(resources=INLINE)\\n\"+\n",
128 |        "     \"</code>\\n\"+\n",
129 |        "     \"</div>\"}};\n",
130 |        "\n",
131 |        "  function display_loaded() {\n",
132 |        "    if (root.Bokeh !== undefined) {\n",
133 |        "      var el = document.getElementById(\"b0b348ba-b56f-4ff7-b7e4-9ce1f4aabd8e\");\n",
134 |        "      if (el != null) {\n",
135 |        "        el.textContent = \"BokehJS \" + Bokeh.version + \" successfully loaded.\";\n",
136 |        "      }\n",
137 |        "    } else if (Date.now() < root._bokeh_timeout) {\n",
138 |        "      setTimeout(display_loaded, 100)\n",
139 |        "    }\n",
140 |        "  }\n",
141 |        "\n",
142 |        "\n",
143 |        "  function run_callbacks() {\n",
144 |        "    try {\n",
145 |        "      root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
146 |        "    }\n",
147 |        "    finally {\n",
148 |        "      delete root._bokeh_onload_callbacks\n",
149 |        "    }\n",
150 |        "    console.info(\"Bokeh: all callbacks have finished\");\n",
151 |        "  }\n",
152 |        "\n",
153 |        "  function load_libs(js_urls, callback) {\n",
154 |        "    root._bokeh_onload_callbacks.push(callback);\n",
155 |        "    if (root._bokeh_is_loading > 0) {\n",
156 |        "      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
157 |        "      return null;\n",
158 |        "    }\n",
159 |        "    if (js_urls == null || js_urls.length === 0) {\n",
160 |        "      run_callbacks();\n",
161 |        "      return null;\n",
162 |        "    }\n",
163 |        "    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
164 |        "    root._bokeh_is_loading = js_urls.length;\n",
165 |        "    for (var i = 0; i < js_urls.length; i++) {\n",
166 |        "      var url = js_urls[i];\n",
167 |        "      var s = document.createElement('script');\n",
168 |        "      s.src = url;\n",
169 |        "      s.async = false;\n",
170 |        "      s.onreadystatechange = s.onload = function() {\n",
171 |        "        root._bokeh_is_loading--;\n",
172 |        "        if (root._bokeh_is_loading === 0) {\n",
173 |        "          console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
174 |        "          run_callbacks()\n",
175 |        "        }\n",
176 |        "      };\n",
177 |        "      s.onerror = function() {\n",
178 |        "        console.warn(\"failed to load library \" + url);\n",
179 |        "      };\n",
180 |        "      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
181 |        "      document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
182 |        "    }\n",
183 |        "  };var element = document.getElementById(\"b0b348ba-b56f-4ff7-b7e4-9ce1f4aabd8e\");\n",
184 |        "  if (element == null) {\n",
185 |        "    console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'b0b348ba-b56f-4ff7-b7e4-9ce1f4aabd8e' but no matching script tag was found. \")\n",
186 |        "    return false;\n",
187 |        "  }\n",
188 |        "\n",
189 |        "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.7.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.7.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.7.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.7.min.js\"];\n",
190 |        "\n",
191 |        "  var inline_js = [\n",
192 |        "    function(Bokeh) {\n",
193 |        "      Bokeh.set_log_level(\"info\");\n",
194 |        "    },\n",
195 |        "    \n",
196 |        "    function(Bokeh) {\n",
197 |        "      \n",
198 |        "    },\n",
199 |        "    \n",
200 |        "    function(Bokeh) {\n",
201 |        "      \n",
202 |        "      document.getElementById(\"b0b348ba-b56f-4ff7-b7e4-9ce1f4aabd8e\").textContent = \"BokehJS is loading...\";\n",
203 |        "    },\n",
204 |        "    function(Bokeh) {\n",
205 |        "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.7.min.css\");\n",
206 |        "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.7.min.css\");\n",
207 |        "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.7.min.css\");\n",
208 |        "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.7.min.css\");\n",
209 |        "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.7.min.css\");\n",
210 |        "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.7.min.css\");\n",
211 |        "    }\n",
212 |        "  ];\n",
213 |        "\n",
214 |        "  function run_inline_js() {\n",
215 |        "    \n",
216 |        "    if ((root.Bokeh !== undefined) || (force === true)) {\n",
217 |        "      for (var i = 0; i < inline_js.length; i++) {\n",
218 |        "        inline_js[i].call(root, root.Bokeh);\n",
219 |        "      }if (force === true) {\n",
220 |        "        display_loaded();\n",
221 |        "      }} else if (Date.now() < root._bokeh_timeout) {\n",
222 |        "      setTimeout(run_inline_js, 100);\n",
223 |        "    } else if (!root._bokeh_failed_load) {\n",
224 |        "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
225 |        "      root._bokeh_failed_load = true;\n",
226 |        "    } else if (force !== true) {\n",
227 |        "      var cell = $(document.getElementById(\"b0b348ba-b56f-4ff7-b7e4-9ce1f4aabd8e\")).parents('.cell').data().cell;\n",
228 |        "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
229 |        "    }\n",
230 |        "\n",
231 |        "  }\n",
232 |        "\n",
233 |        "  if (root._bokeh_is_loading === 0) {\n",
234 |        "    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
235 |        "    run_inline_js();\n",
236 |        "  } else {\n",
237 |        "    load_libs(js_urls, function() {\n",
238 |        "      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
239 |        "      run_inline_js();\n",
240 |        "    });\n",
241 |        "  }\n",
242 |        "}(window));"
243 |       ]
244 |      },
245 |      "metadata": {},
246 |      "output_type": "display_data"
247 |     },
248 |     {
249 |      "data": {
250 |       "text/html": [
251 |        "\n",
252 |        "\n",
253 |        "    <div class=\"bk-root\">\n",
254 |        "        <div class=\"bk-plotdiv\" id=\"958f0b8a-b583-4642-b8c2-93d58da42a40\"></div>\n",
255 |        "    </div>\n",
256 |        "<script type=\"text/javascript\">\n",
257 |        "  \n",
258 |        "  (function(root) {\n",
259 |        "    function now() {\n",
260 |        "      return new Date();\n",
261 |        "    }\n",
262 |        "  \n",
263 |        "    var force = false;\n",
264 |        "  \n",
265 |        "    if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n",
266 |        "      root._bokeh_onload_callbacks = [];\n",
267 |        "      root._bokeh_is_loading = undefined;\n",
268 |        "    }\n",
269 |        "  \n",
270 |        "  \n",
271 |        "    \n",
272 |        "    if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
273 |        "      root._bokeh_timeout = Date.now() + 0;\n",
274 |        "      root._bokeh_failed_load = false;\n",
275 |        "    }\n",
276 |        "  \n",
277 |        "    var NB_LOAD_WARNING = {'data': {'text/html':\n",
278 |        "       \"<div style='background-color: #fdd'>\\n\"+\n",
279 |        "       \"<p>\\n\"+\n",
280 |        "       \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
281 |        "       \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
282 |        "       \"</p>\\n\"+\n",
283 |        "       \"<ul>\\n\"+\n",
284 |        "       \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
285 |        "       \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
286 |        "       \"</ul>\\n\"+\n",
287 |        "       \"<code>\\n\"+\n",
288 |        "       \"from bokeh.resources import INLINE\\n\"+\n",
289 |        "       \"output_notebook(resources=INLINE)\\n\"+\n",
290 |        "       \"</code>\\n\"+\n",
291 |        "       \"</div>\"}};\n",
292 |        "  \n",
293 |        "    function display_loaded() {\n",
294 |        "      if (root.Bokeh !== undefined) {\n",
295 |        "        var el = document.getElementById(\"958f0b8a-b583-4642-b8c2-93d58da42a40\");\n",
296 |        "        if (el != null) {\n",
297 |        "          el.textContent = \"BokehJS \" + Bokeh.version + \" successfully loaded.\";\n",
298 |        "        }\n",
299 |        "      } else if (Date.now() < root._bokeh_timeout) {\n",
300 |        "        setTimeout(display_loaded, 100)\n",
301 |        "      }\n",
302 |        "    }\n",
303 |        "  \n",
304 |        "  \n",
305 |        "    function run_callbacks() {\n",
306 |        "      try {\n",
307 |        "        root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
308 |        "      }\n",
309 |        "      finally {\n",
310 |        "        delete root._bokeh_onload_callbacks\n",
311 |        "      }\n",
312 |        "      console.info(\"Bokeh: all callbacks have finished\");\n",
313 |        "    }\n",
314 |        "  \n",
315 |        "    function load_libs(js_urls, callback) {\n",
316 |        "      root._bokeh_onload_callbacks.push(callback);\n",
317 |        "      if (root._bokeh_is_loading > 0) {\n",
318 |        "        console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
319 |        "        return null;\n",
320 |        "      }\n",
321 |        "      if (js_urls == null || js_urls.length === 0) {\n",
322 |        "        run_callbacks();\n",
323 |        "        return null;\n",
324 |        "      }\n",
325 |        "      console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
326 |        "      root._bokeh_is_loading = js_urls.length;\n",
327 |        "      for (var i = 0; i < js_urls.length; i++) {\n",
328 |        "        var url = js_urls[i];\n",
329 |        "        var s = document.createElement('script');\n",
330 |        "        s.src = url;\n",
331 |        "        s.async = false;\n",
332 |        "        s.onreadystatechange = s.onload = function() {\n",
333 |        "          root._bokeh_is_loading--;\n",
334 |        "          if (root._bokeh_is_loading === 0) {\n",
335 |        "            console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
336 |        "            run_callbacks()\n",
337 |        "          }\n",
338 |        "        };\n",
339 |        "        s.onerror = function() {\n",
340 |        "          console.warn(\"failed to load library \" + url);\n",
341 |        "        };\n",
342 |        "        console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
343 |        "        document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
344 |        "      }\n",
345 |        "    };var element = document.getElementById(\"958f0b8a-b583-4642-b8c2-93d58da42a40\");\n",
346 |        "    if (element == null) {\n",
347 |        "      console.log(\"Bokeh: ERROR: autoload.js configured with elementid '958f0b8a-b583-4642-b8c2-93d58da42a40' but no matching script tag was found. \")\n",
348 |        "      return false;\n",
349 |        "    }\n",
350 |        "  \n",
351 |        "    var js_urls = [];\n",
352 |        "  \n",
353 |        "    var inline_js = [\n",
354 |        "      function(Bokeh) {\n",
355 |        "        (function() {\n",
356 |        "          var fn = function() {\n",
357 |        "            var docs_json = {\"7f21f22a-3e90-46b1-8c88-f26a74ca79f9\":{\"roots\":{\"references\":[{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"3ceb6d9a-e1aa-458f-91ff-b51dfee9a77c\",\"type\":\"Circle\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"468b6842-130c-43fb-b09f-f0c7dbe029cb\",\"type\":\"BasicTicker\"}},\"id\":\"f6431223-d5d3-416a-af2a-f5dcd278de6a\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null},\"id\":\"4282b9aa-7885-4590-9abe-b6deedfc401c\",\"type\":\"DataRange1d\"},{\"attributes\":{\"plot\":null,\"text\":\"\"},\"id\":\"efaa2c1f-42ad-48de-af14-bcefbc654e8e\",\"type\":\"Title\"},{\"attributes\":{\"data_source\":{\"id\":\"06745886-9df8-4077-abc5-2578eb728cf0\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1f83b727-488a-4abc-b9ca-19a01e960f6a\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"3ceb6d9a-e1aa-458f-91ff-b51dfee9a77c\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"df97428e-f895-490d-9594-340e90e9de57\",\"type\":\"CDSView\"}},\"id\":\"f8a3c423-e17d-4b1d-8f81-1d3840e4a9d0\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"plot\":{\"id\":\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"72a451e0-574c-4702-be3e-fd4a09a8817a\",\"type\":\"BasicTicker\"}},\"id\":\"a3f6cb23-1e56-41d4-bf64-895c4c9062cf\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"9368e81d-188b-4e83-879e-b47df2cc75c4\",\"type\":\"PanTool\"},{\"attributes\":{\"below\":[{\"id\":\"8887aa1d-c0b6-4c89-bb76-ba892da8e81a\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"bf2150c5-7c7d-4a18-b432-f460fb36c1ec\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"8887aa1d-c0b6-4c89-bb76-ba892da8e81a\",\"type\":\"LinearAxis\"},{\"id\":\"a3f6cb23-1e56-41d4-bf64-895c4c9062cf\",\"type\":\"Grid\"},{\"id\":\"bf2150c5-7c7d-4a18-b432-f460fb36c1ec\",\"type\":\"LinearAxis\"},{\"id\":\"f6431223-d5d3-416a-af2a-f5dcd278de6a\",\"type\":\"Grid\"},{\"id\":\"5e3c141f-0946-41d1-8f6c-fa0e2c6f3a5b\",\"type\":\"BoxAnnotation\"},{\"id\":\"f8a3c423-e17d-4b1d-8f81-1d3840e4a9d0\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"efaa2c1f-42ad-48de-af14-bcefbc654e8e\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"68e58385-5c52-4b3d-9dc4-da0592c34f09\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"4282b9aa-7885-4590-9abe-b6deedfc401c\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"ecb71062-d250-4a07-a2cb-9ea82a5df1ac\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"2a22769f-609b-4a56-b719-0864778f2089\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"48120450-a3e0-42d1-b539-6198c2002ebb\",\"type\":\"LinearScale\"}},\"id\":\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"f4039914-a6a6-473e-9aff-26ba71e572da\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"5e3c141f-0946-41d1-8f6c-fa0e2c6f3a5b\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"callback\":null},\"id\":\"2a22769f-609b-4a56-b719-0864778f2089\",\"type\":\"DataRange1d\"},{\"attributes\":{\"formatter\":{\"id\":\"f4039914-a6a6-473e-9aff-26ba71e572da\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"468b6842-130c-43fb-b09f-f0c7dbe029cb\",\"type\":\"BasicTicker\"}},\"id\":\"bf2150c5-7c7d-4a18-b432-f460fb36c1ec\",\"type\":\"LinearAxis\"},{\"attributes\":{\"callback\":null,\"column_names\":[\"x\",\"y\"],\"data\":{\"x\":[1,3,4,7],\"y\":[6,4,6,1]}},\"id\":\"06745886-9df8-4077-abc5-2578eb728cf0\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"overlay\":{\"id\":\"5e3c141f-0946-41d1-8f6c-fa0e2c6f3a5b\",\"type\":\"BoxAnnotation\"}},\"id\":\"37dfb14a-e469-44b4-b801-495a4887e821\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"formatter\":{\"id\":\"d785e5e0-5a6c-4b14-ac91-c3fa402b7577\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"72a451e0-574c-4702-be3e-fd4a09a8817a\",\"type\":\"BasicTicker\"}},\"id\":\"8887aa1d-c0b6-4c89-bb76-ba892da8e81a\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"48120450-a3e0-42d1-b539-6198c2002ebb\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"d785e5e0-5a6c-4b14-ac91-c3fa402b7577\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"72a451e0-574c-4702-be3e-fd4a09a8817a\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"ecb71062-d250-4a07-a2cb-9ea82a5df1ac\",\"type\":\"LinearScale\"},{\"attributes\":{\"source\":{\"id\":\"06745886-9df8-4077-abc5-2578eb728cf0\",\"type\":\"ColumnDataSource\"}},\"id\":\"df97428e-f895-490d-9594-340e90e9de57\",\"type\":\"CDSView\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"9368e81d-188b-4e83-879e-b47df2cc75c4\",\"type\":\"PanTool\"},{\"id\":\"37dfb14a-e469-44b4-b801-495a4887e821\",\"type\":\"BoxZoomTool\"}]},\"id\":\"68e58385-5c52-4b3d-9dc4-da0592c34f09\",\"type\":\"Toolbar\"},{\"attributes\":{\"fill_color\":{\"value\":\"#1f77b4\"},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1f83b727-488a-4abc-b9ca-19a01e960f6a\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"468b6842-130c-43fb-b09f-f0c7dbe029cb\",\"type\":\"BasicTicker\"}],\"root_ids\":[\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.7\"}};\n",
358 |        "            var render_items = [{\"docid\":\"7f21f22a-3e90-46b1-8c88-f26a74ca79f9\",\"elementid\":\"958f0b8a-b583-4642-b8c2-93d58da42a40\",\"modelid\":\"54dc4c01-932f-482f-b8b2-28673dfe9a8a\"}];\n",
359 |        "            \n",
360 |        "            Bokeh.embed.embed_items(docs_json, render_items);\n",
361 |        "          };\n",
362 |        "          if (document.readyState != \"loading\") fn();\n",
363 |        "          else document.addEventListener(\"DOMContentLoaded\", fn);\n",
364 |        "        })();\n",
365 |        "      },\n",
366 |        "      function(Bokeh) {\n",
367 |        "      }\n",
368 |        "    ];\n",
369 |        "  \n",
370 |        "    function run_inline_js() {\n",
371 |        "      \n",
372 |        "      if ((root.Bokeh !== undefined) || (force === true)) {\n",
373 |        "        for (var i = 0; i < inline_js.length; i++) {\n",
374 |        "          inline_js[i].call(root, root.Bokeh);\n",
375 |        "        }if (force === true) {\n",
376 |        "          display_loaded();\n",
377 |        "        }} else if (Date.now() < root._bokeh_timeout) {\n",
378 |        "        setTimeout(run_inline_js, 100);\n",
379 |        "      } else if (!root._bokeh_failed_load) {\n",
380 |        "        console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
381 |        "        root._bokeh_failed_load = true;\n",
382 |        "      } else if (force !== true) {\n",
383 |        "        var cell = $(document.getElementById(\"958f0b8a-b583-4642-b8c2-93d58da42a40\")).parents('.cell').data().cell;\n",
384 |        "        cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
385 |        "      }\n",
386 |        "  \n",
387 |        "    }\n",
388 |        "  \n",
389 |        "    if (root._bokeh_is_loading === 0) {\n",
390 |        "      console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
391 |        "      run_inline_js();\n",
392 |        "    } else {\n",
393 |        "      load_libs(js_urls, function() {\n",
394 |        "        console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
395 |        "        run_inline_js();\n",
396 |        "      });\n",
397 |        "    }\n",
398 |        "  }(window));\n",
399 |        "</script>"
400 |       ]
401 |      },
402 |      "metadata": {},
403 |      "output_type": "display_data"
404 |     }
405 |    ],
406 |    "source": [
407 |     "output_notebook()\n",
408 |     "output_file('x.html')\n",
409 |     "show(plot)"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {
416 |     "collapsed": true
417 |    },
418 |    "outputs": [],
419 |    "source": []
420 |   }
421 |  ],
422 |  "metadata": {
423 |   "kernelspec": {
424 |    "display_name": "Python 3",
425 |    "language": "python",
426 |    "name": "python3"
427 |   },
428 |   "language_info": {
429 |    "codemirror_mode": {
430 |     "name": "ipython",
431 |     "version": 3
432 |    },
433 |    "file_extension": ".py",
434 |    "mimetype": "text/x-python",
435 |    "name": "python",
436 |    "nbconvert_exporter": "python",
437 |    "pygments_lexer": "ipython3",
438 |    "version": "3.6.2"
439 |   }
440 |  },
441 |  "nbformat": 4,
442 |  "nbformat_minor": 2
443 | }
444 | 


--------------------------------------------------------------------------------
/insta-api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import requests\n",
 12 |     "import json"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "payload = { \"access_token\" : \"31183420.4d93899.54d8d5ac8c444e1eb82bfad2db04cc59\" }\n",
 24 |     "recent_media_url = \"https://api.instagram.com/v1/users/self/media/recent\"\n",
 25 |     "self_url = \"https://api.instagram.com/v1/users/self\""
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 4,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "self_r = requests.get(self_url, params=payload)\n",
 35 |     "self_response = json.loads(self_r.text)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 5,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "31183420\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "user_id=self_response[\"data\"][\"id\"]\n",
 53 |     "print(user_id)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 26,
 59 |    "metadata": {
 60 |     "collapsed": true
 61 |    },
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "payload[\"count\"] = 30\n",
 65 |     "recent_r = requests.get(recent_media_url, params=payload)\n",
 66 |     "recent_response = json.loads(recent_r.text)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 27,
 72 |    "metadata": {},
 73 |    "outputs": [
 74 |     {
 75 |      "name": "stdout",
 76 |      "output_type": "stream",
 77 |      "text": [
 78 |       "{'pagination': {}, 'data': [{'id': '1681981992014369647_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25025257_155335971856366_1103702618208731136_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25025257_155335971856366_1103702618208731136_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25025257_155335971856366_1103702618208731136_n.jpg'}}, 'created_time': '1514727900', 'caption': {'id': '17914981123041327', 'text': 'altre giorno, altre chiesa chiusa', 'created_time': '1514727900', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 7}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BdXmbXQntNv04XrUNgY0-wVbSMwBK1Aa-Ja-FY0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1681736231435551385_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/c12.0.1055.1055/26183027_1563413867109927_2500941975943905280_n.jpg'}, 'low_resolution': {'width': 320, 'height': 312, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/26183027_1563413867109927_2500941975943905280_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 625, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/26183027_1563413867109927_2500941975943905280_n.jpg'}}, 'created_time': '1514698603', 'caption': None, 'user_has_liked': False, 'likes': {'count': 12}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 1}, 'type': 'image', 'link': 'https://www.instagram.com/p/BdWujE4nsKZ0E0UtcwIc83vrowPD581WUm6CGw0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1680702603817386395_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25026020_1858431304447509_6023051016792965120_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25026020_1858431304447509_6023051016792965120_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25026020_1858431304447509_6023051016792965120_n.jpg'}}, 'created_time': '1514575385', 'caption': {'id': '17915023630026303', 'text': 'Uffff, saca el parkour.', 'created_time': '1514575385', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 22}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BdTDh0JnAGbte9B-Ds6pgPFZ4UsOV4LYGLKZU80/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1680439648563563854_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/26151573_135144960497382_8535851668024066048_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/26151573_135144960497382_8535851668024066048_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/26151573_135144960497382_8535851668024066048_n.jpg'}}, 'created_time': '1514544038', 'caption': {'id': '17890735372186348', 'text': 'Igualita que en el videojuego 😘', 'created_time': '1514544038', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 24}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 1}, 'type': 'image', 'link': 'https://www.instagram.com/p/BdSHvT_HQ1OJlXDxBojPyVvEXBfhd3zsomLrJs0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1679910026432338103_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/26153439_831026207079998_5080606892787499008_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/26153439_831026207079998_5080606892787499008_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/26153439_831026207079998_5080606892787499008_n.jpg'}}, 'created_time': '1514480902', 'caption': {'id': '17915541166002316', 'text': 'No se roben las monedas, no sean culeros.', 'created_time': '1514480902', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 14}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 2}, 'type': 'image', 'link': 'https://www.instagram.com/p/BdQPUS8Hdi3-W70izTto69BTmnNX1laYWe9SR80/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1679021698421887813_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/26181522_387098571730770_3171841448404320256_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/26181522_387098571730770_3171841448404320256_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/26181522_387098571730770_3171841448404320256_n.jpg'}}, 'created_time': '1514375005', 'caption': {'id': '17890693324188171', 'text': 'Vámonos a...', 'created_time': '1514375005', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 13}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 2}, 'type': 'image', 'link': 'https://www.instagram.com/p/BdNFVbAnBNFrU2_iSC8PNecdpovQGXA0uRRDWQ0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1674999869294427671_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25015721_1320395731399431_8685616632174739456_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25015721_1320395731399431_8685616632174739456_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25015721_1320395731399431_8685616632174739456_n.jpg'}}, 'created_time': '1513895566', 'caption': None, 'user_has_liked': False, 'likes': {'count': 7}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 2}, 'type': 'image', 'link': 'https://www.instagram.com/p/Bc-y4GwHr4Xqxtn3yv7Mn2zZdwsyIVF1RIggmo0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1674032295035421447_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25014979_329774084172531_4407919425441759232_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25014979_329774084172531_4407919425441759232_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25014979_329774084172531_4407919425441759232_n.jpg'}}, 'created_time': '1513780222', 'caption': {'id': '17909104462066315', 'text': '谢谢你，潘颖和媛媛', 'created_time': '1513780222', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 7}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 9}, 'type': 'image', 'link': 'https://www.instagram.com/p/Bc7W4DAHkMHjCBOgiyqgp67Yc3Jmveo2az6yFg0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1674010133448856437_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25011930_1271132223032509_3755382961101340672_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25011930_1271132223032509_3755382961101340672_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25011930_1271132223032509_3755382961101340672_n.jpg'}}, 'created_time': '1513777580', 'caption': {'id': '17896847521091120', 'text': 'A weekend in the city', 'created_time': '1513777580', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 12}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/Bc7R1jan5N1N9oOpGMX8KfL1zHAdq9ylJ-ImSo0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1672145732269708444_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25005863_2011006885843364_2919785679116304384_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25005863_2011006885843364_2919785679116304384_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25005863_2011006885843364_2919785679116304384_n.jpg'}}, 'created_time': '1513555326', 'caption': {'id': '17912745739029924', 'text': 'Sunset... at 3:45 pm ಠ_ಠ', 'created_time': '1513555326', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 21}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/Bc0p68gnxScyzvS8js4B1Jhftmam-scSl1w5VQ0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1668390594446992112_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25021900_302093603634224_7936031481950896128_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25021900_302093603634224_7936031481950896128_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25021900_302093603634224_7936031481950896128_n.jpg'}}, 'created_time': '1513107679', 'caption': {'id': '17899060315114792', 'text': 'So close yet so far...', 'created_time': '1513107679', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 31}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 3}, 'type': 'image', 'link': 'https://www.instagram.com/p/BcnUGf4naLwmj2yi94jhwEFZn_7vILa7sMo7ZU0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1668070783884004995_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/24332123_522726954764035_3757694933406842880_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/24332123_522726954764035_3757694933406842880_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/24332123_522726954764035_3757694933406842880_n.jpg'}}, 'created_time': '1513069554', 'caption': {'id': '17897912872124421', 'text': 'Me maman los barcos.', 'created_time': '1513069554', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 11}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BcmLYpFnQ6De0R6KFsN_M7NZgon9zeipb7WRZs0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1666813262884001340_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/25006487_920484838106751_4570086678890283008_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/25006487_920484838106751_4570086678890283008_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/25006487_920484838106751_4570086678890283008_n.jpg'}}, 'created_time': '1512919646', 'caption': {'id': '17895482461093328', 'text': 'The Mist.', 'created_time': '1512919646', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 23}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 3}, 'type': 'image', 'link': 'https://www.instagram.com/p/BchtdTZHFY8HdN0iROARHzcc7QOBqo99YEdaTQ0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1665504933486422957_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e15/25009038_306129216543111_9187458035721699328_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e15/25009038_306129216543111_9187458035721699328_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/25009038_306129216543111_9187458035721699328_n.jpg'}}, 'created_time': '1512763689', 'caption': None, 'user_has_liked': False, 'likes': {'count': 15}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'video', 'link': 'https://www.instagram.com/p/BcdD-msHmut5nH7fDIrZRitj7SKwkDvs-fxtY80/', 'location': None, 'attribution': None, 'users_in_photo': [], 'videos': {'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/vp/9910ba22d473534fcdedba4c5404f8af/5A4C5962/t50.2886-16/24995044_167376300536108_7108479399560740864_n.mp4', 'id': '17852753245214683'}, 'low_bandwidth': {'width': 480, 'height': 480, 'url': 'https://scontent.cdninstagram.com/vp/b056dad1cd9083db4bf75315e0d8e36e/5A4C3972/t50.2886-16/24725170_1592298994151375_8035081218867331072_n.mp4', 'id': '17885335735155490'}, 'low_resolution': {'width': 480, 'height': 480, 'url': 'https://scontent.cdninstagram.com/vp/b056dad1cd9083db4bf75315e0d8e36e/5A4C3972/t50.2886-16/24725170_1592298994151375_8035081218867331072_n.mp4', 'id': '17885335735155490'}}}, {'id': '1663373170467375776_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/24327395_1304976632939488_3726275581189292032_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/24327395_1304976632939488_3726275581189292032_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/24327395_1304976632939488_3726275581189292032_n.jpg'}}, 'created_time': '1512509555', 'caption': {'id': '17886434065190830', 'text': \"It's empty... *cries in mexican*\", 'created_time': '1512509555', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 18}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 2}, 'type': 'image', 'link': 'https://www.instagram.com/p/BcVfRXqn4qgEeYznl41MovN9fYBBlZdWgEQsqo0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1661918870146857860_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/24327585_275410129651873_2301266468156735488_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/24327585_275410129651873_2301266468156735488_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/24327585_275410129651873_2301266468156735488_n.jpg'}}, 'created_time': '1512336189', 'caption': {'id': '17898830200101820', 'text': 'Tiene un bati-extintor xD', 'created_time': '1512336189', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 13}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BcQUmg8nzuEsNRX3cKxWmXOuOmip551lmgB3dc0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1661155796838170867_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/24274343_539108086454845_5548137920025591808_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/24274343_539108086454845_5548137920025591808_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/24274343_539108086454845_5548137920025591808_n.jpg'}}, 'created_time': '1512245224', 'caption': {'id': '17898059296115180', 'text': 'SeeWoo', 'created_time': '1512245224', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 12}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BcNnGVjHsDz5mzqSBkxEDdU-rhBmTNufSj0zQU0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1661145077321086188_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/24177831_1974250396181172_1652928519031750656_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/24177831_1974250396181172_1652928519031750656_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/24177831_1974250396181172_1652928519031750656_n.jpg'}}, 'created_time': '1512243946', 'caption': {'id': '17897764885097041', 'text': 'El bicolor', 'created_time': '1512243946', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 6}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BcNkqWOHLTs-6oUzjqYc0gk1SxzBBYrVjbBR3c0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1653967032260771215_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/23734649_144027666320958_3382375844696555520_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/23734649_144027666320958_3382375844696555520_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/23734649_144027666320958_3382375844696555520_n.jpg'}}, 'created_time': '1511388256', 'caption': {'id': '17885803885141166', 'text': 'Y justo te das cuenta de que dejaste las memorias en México.', 'created_time': '1511388256', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 14}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/Bb0EkC0HD2PEl57Em85QK3Kg_Dv5d6w3H2C2-g0/', 'location': None, 'attribution': None, 'users_in_photo': []}, {'id': '1651740249104875103_31183420', 'user': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}, 'images': {'thumbnail': {'width': 150, 'height': 150, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s150x150/e35/23667806_167912247134192_8247260630583607296_n.jpg'}, 'low_resolution': {'width': 320, 'height': 320, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s320x320/e35/23667806_167912247134192_8247260630583607296_n.jpg'}, 'standard_resolution': {'width': 640, 'height': 640, 'url': 'https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/23667806_167912247134192_8247260630583607296_n.jpg'}}, 'created_time': '1511122803', 'caption': {'id': '17850978850205561', 'text': 'Saca la vaca', 'created_time': '1511122803', 'from': {'id': '31183420', 'full_name': 'Antonio', 'profile_picture': 'https://scontent.cdninstagram.com/t51.2885-19/s150x150/24125006_161374611141852_1337836751690924032_n.jpg', 'username': 'fferegrino'}}, 'user_has_liked': False, 'likes': {'count': 13}, 'tags': [], 'filter': 'Normal', 'comments': {'count': 0}, 'type': 'image', 'link': 'https://www.instagram.com/p/BbsKQFZHH5fp_QZZbmZuNjZuh9ADSLTxejGb5E0/', 'location': None, 'attribution': None, 'users_in_photo': []}], 'meta': {'code': 200}}\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "print(recent_response)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 28,
 89 |    "metadata": {
 90 |     "scrolled": true
 91 |    },
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "Done\n"
 98 |      ]
 99 |     }
100 |    ],
101 |    "source": [
102 |     "imgs_temp = recent_response[\"data\"]\n",
103 |     "images=[]\n",
104 |     "\n",
105 |     "while \"pagination\" in recent_response and 'next_url' in recent_response[\"pagination\"]:\n",
106 |     "    for img in imgs_temp:\n",
107 |     "        images.append(img)\n",
108 |     "    imgs_temp.clear()\n",
109 |     "    if \"pagination\" in recent_response and 'next_url' in recent_response[\"pagination\"]:\n",
110 |     "        next_url = recent_response[\"pagination\"]['next_url']\n",
111 |     "        print(next_url)\n",
112 |     "        recent_r = requests.get(next_url)\n",
113 |     "        recent_response = json.loads(recent_r.text)\n",
114 |     "        recent_response[\"data\"]\n",
115 |     "print(\"Done\")"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 29,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "name": "stdout",
125 |      "output_type": "stream",
126 |      "text": [
127 |       "0\n"
128 |      ]
129 |     }
130 |    ],
131 |    "source": [
132 |     "print(len(images))"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": true
140 |    },
141 |    "outputs": [],
142 |    "source": []
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "Python 3",
148 |    "language": "python",
149 |    "name": "python3"
150 |   },
151 |   "language_info": {
152 |    "codemirror_mode": {
153 |     "name": "ipython",
154 |     "version": 3
155 |    },
156 |    "file_extension": ".py",
157 |    "mimetype": "text/x-python",
158 |    "name": "python",
159 |    "nbconvert_exporter": "python",
160 |    "pygments_lexer": "ipython3",
161 |    "version": "3.6.1"
162 |   }
163 |  },
164 |  "nbformat": 4,
165 |  "nbformat_minor": 2
166 | }
167 | 


--------------------------------------------------------------------------------