├── .gitattributes ├── .gitignore ├── README.md ├── azure_documentdb ├── AzureDocumentDBLab_Basics.ipynb └── GoogleDoc_DocDB_Demo.ipynb ├── azure_machine_learning ├── A_Simple_Confusion_Matrix.ipynb └── AnomalyDetection.ipynb ├── cntk ├── Emotions_ConvolutionalNeuralNetwork_CNTK.ipynb └── Emotions_Data_CNTK_Format.ipynb ├── cognitive_services ├── Computer_Vision_API.ipynb ├── Emotion_Analysis_Example.ipynb ├── EntityLinking.ipynb ├── TextAnalytics.ipynb └── VideoIndexer_Insights_Python.ipynb ├── dask ├── dask-digit-classification.ipynb ├── dask-image-processing.ipynb └── dask-sklearn.ipynb ├── datatools ├── DealingWithGeospatialImages.ipynb ├── imgs │ ├── back_to_mask_monarch.png │ ├── cheetah-mom-cubs.jpg │ ├── converted_back_rgb_monarch.png │ ├── cv2_read_monarch.png │ ├── ladybug-daisy.jpg │ ├── ladybug-leaf.jpg │ ├── mask.jpg │ ├── monarch.jpg │ ├── plt_read_monarch.png │ ├── rgb2gray_monarch.png │ ├── scale_abs_monarch.png │ └── snowleopard_cub.png └── tinyimage.ipynb ├── from_scratch ├── A_Simple_Numpy_NeuralNet_Example.ipynb ├── adaline_batch.py ├── adaline_sgd.py └── leukemia_notebook.ipynb ├── general └── nb_diagram.png ├── images └── automl_options.png ├── multi_framework └── ConvNet_Comparisons.ipynb ├── opencv └── ImagePolygons.ipynb ├── primers ├── First_Look_R.ipynb ├── JavaScript_Primer.ipynb ├── Jupyter_and_JavaScript.ipynb ├── NotebookAnatomy.ipynb ├── Numpy_Image.ipynb └── nb_diagram.png ├── pytorch ├── A_Simple_PyTorch_NeuralNet_Example.ipynb ├── DL_with_PyTorch │ └── 01_TensorFundamentals.ipynb ├── PyTorch_MLP.ipynb └── PyTorch_Percetron.ipynb ├── scikit-learn └── Bear-Detector.ipynb ├── spark └── pyspark_firstgo.ipynb └── tensorflow └── TF_3layer_MNIST.ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mp4 2 | 3 | # Jupyter 4 | .ipynb_checkpoints 5 | 6 | # Windows image file caches 7 | Thumbs.db 8 | ehthumbs.db 9 | 10 | # Folder config file 11 | Desktop.ini 12 | 13 | # Recycle Bin used on file shares 14 | $RECYCLE.BIN/ 15 | 16 | # Windows Installer files 17 | *.cab 18 | *.msi 19 | *.msm 20 | *.msp 21 | 22 | # Windows shortcuts 23 | *.lnk 24 | 25 | # ========================= 26 | # Operating System Files 27 | # ========================= 28 | 29 | # OSX 30 | # ========================= 31 | 32 | .DS_Store 33 | .AppleDouble 34 | .LSOverride 35 | 36 | # Thumbnails 37 | ._* 38 | 39 | # Files that might appear in the root of a volume 40 | .DocumentRevisions-V100 41 | .fseventsd 42 | .Spotlight-V100 43 | .TemporaryItems 44 | .Trashes 45 | .VolumeIcon.icns 46 | 47 | # Directories potentially created on remote AFP share 48 | .AppleDB 49 | .AppleDesktop 50 | Network Trash Folder 51 | Temporary Items 52 | .apdisk 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-jupyter-notebooks 2 | 3 | Some sample python jupyter notebooks 4 | * General 5 | * Azure SDKs (e.g. storage, documentdb) 6 | -------------------------------------------------------------------------------- /azure_documentdb/AzureDocumentDBLab_Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Azure DocumentDB Lab - The Basics" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Install the DocumentDB Python SDK" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 12, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Requirement already satisfied (use --upgrade to upgrade): pydocumentdb in /home/nbcommon/anaconda3_410/lib/python3.5/site-packages\n", 29 | "Requirement already satisfied (use --upgrade to upgrade): requests==2.10.0 in /home/nbcommon/anaconda3_410/lib/python3.5/site-packages (from pydocumentdb)\n", 30 | "Requirement already satisfied (use --upgrade to upgrade): six>=1.6 in /home/nbcommon/anaconda3_410/lib/python3.5/site-packages (from pydocumentdb)\n", 31 | "\u001b[33mYou are using pip version 8.1.2, however version 9.0.1 is available.\n", 32 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "!pip install pydocumentdb" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Imports" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 13, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "import json\n", 56 | "from urllib.request import urlopen, Request\n", 57 | "\n", 58 | "import pydocumentdb.documents as documents\n", 59 | "import pydocumentdb.document_client as document_client\n", 60 | "import pydocumentdb.errors as errors" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 32, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# Fill these in with your specific information\n", 72 | "DOCUMENTDB_ACCOUNT = 'your docdb account name here'\n", 73 | "KEY = 'your key here=='\n", 74 | "\n", 75 | "# Host or URI\n", 76 | "URI = 'https://%s.documents.azure.com:443/' % DOCUMENTDB_ACCOUNT\n", 77 | "\n", 78 | "# Name of the database and collection (these will get created in lab)\n", 79 | "DATABASE = 'test_docdb'\n", 80 | "COLLECTION = 'test_coll'" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Create a database" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 37, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "name": "stderr", 99 | "output_type": "stream", 100 | "text": [ 101 | "INFO:Starting new HTTPS connection (1): training-docdb.documents.azure.com\n", 102 | "INFO:Starting new HTTPS connection (1): training-docdb-westus.documents.azure.com\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "# Firstly, make a client connection to \n", 108 | "\n", 109 | "client = document_client.DocumentClient(URI, {'masterKey': KEY})\n", 110 | "\n", 111 | "# Select database with id listed in our configuration settings\n", 112 | "# and create an iterable object over databases\n", 113 | "query_iterable = client.QueryDatabases('SELECT * FROM root r WHERE r.id=\"%s\"' % DATABASE)\n", 114 | "it = iter(query_iterable)\n", 115 | "\n", 116 | "# Grab the database or if it doesn't exist, none is returned\n", 117 | "test_db = next(it, None)\n", 118 | "\n", 119 | "# Create the database only if it does not exist (which it should not if torn down properly)\n", 120 | "if test_db is None:\n", 121 | " test_db = client.CreateDatabase({'id' : DATABASE})\n", 122 | " print(\"Created database: \", DATABASE)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Create a collection" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 38, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "[{'_colls': 'colls/', '_self': 'dbs/X2A0AA==/', '_users': 'users/', 'id': 'ToDoList', '_ts': 1483569552, '_rid': 'X2A0AA==', '_etag': '\"00007100-0000-0000-0000-586d79950000\"'}, {'_colls': 'colls/', '_self': 'dbs/lShQAA==/', '_users': 'users/', 'id': 'test_docdb', '_ts': 1483572531, '_rid': 'lShQAA==', '_etag': '\"00002301-0000-0000-0000-586d85380000\"'}]\n" 144 | ] 145 | }, 146 | { 147 | "data": { 148 | "text/plain": [ 149 | "[{'_conflicts': 'conflicts/',\n", 150 | " '_docs': 'docs/',\n", 151 | " '_etag': '\"00004701-0000-0000-0000-586e337a0000\"',\n", 152 | " '_rid': 'lShQAOYrOA0=',\n", 153 | " '_self': 'dbs/lShQAA==/colls/lShQAOYrOA0=/',\n", 154 | " '_sprocs': 'sprocs/',\n", 155 | " '_triggers': 'triggers/',\n", 156 | " '_ts': 1483617140,\n", 157 | " '_udfs': 'udfs/',\n", 158 | " 'defaultTtl': 5,\n", 159 | " 'id': 'test_coll',\n", 160 | " 'indexingPolicy': {'automatic': True,\n", 161 | " 'excludedPaths': [],\n", 162 | " 'includedPaths': [{'indexes': [{'dataType': 'Number',\n", 163 | " 'kind': 'Range',\n", 164 | " 'precision': -1},\n", 165 | " {'dataType': 'String', 'kind': 'Hash', 'precision': 3}],\n", 166 | " 'path': '/*'}],\n", 167 | " 'indexingMode': 'consistent'}}]" 168 | ] 169 | }, 170 | "execution_count": 38, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "# Again, read databases into a list to check if ours is there\n", 177 | "existing_databases = list(client.ReadDatabases())\n", 178 | "print(existing_databases)\n", 179 | "\n", 180 | "# Create the test collection only when it's not already present\n", 181 | "query_iterable = client.QueryCollections(test_db['_self'],\n", 182 | " 'SELECT * FROM root r WHERE r.id=\"%s\"' % COLLECTION)\n", 183 | "it = iter(query_iterable)\n", 184 | "\n", 185 | "# Grab the collection (named in configuration info) or if it doesn't exist, none is returned\n", 186 | "test_coll = next(it, None)\n", 187 | "\n", 188 | "# Create the collection only if it doesn't exist (which it should not if torn down properly)\n", 189 | "if test_coll is None:\n", 190 | " test_coll = client.CreateCollection(test_db['_self'], {'id' : COLLECTION})\n", 191 | " print(\"Created collection: \", COLLECTION)\n", 192 | "\n", 193 | "# List existing collections\n", 194 | "list(client.ReadCollections(test_db['_self']))" 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "### Working with documents" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 39, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "# Read json documents and add to the collection\n", 213 | "\n", 214 | "# a place to store the document definitions\n", 215 | "doc_definitions = []\n", 216 | "\n", 217 | "urls = ['https://gist.githubusercontent.com/michhar/dfa446fd2336f9661a7b3938bd692970/raw/59d38e137c3d86b1052b3a9be2aa7fbe16bb3c05/movie001.json',\n", 218 | " 'https://gist.githubusercontent.com/michhar/dfa446fd2336f9661a7b3938bd692970/raw/59d38e137c3d86b1052b3a9be2aa7fbe16bb3c05/movie002.json',\n", 219 | " 'https://gist.githubusercontent.com/michhar/dfa446fd2336f9661a7b3938bd692970/raw/59d38e137c3d86b1052b3a9be2aa7fbe16bb3c05/movie003.json']\n", 220 | " \n", 221 | "# collect all json documents from the URLs\n", 222 | "for doc in urls:\n", 223 | " \n", 224 | " # try clause to grab our json document data and read in to a dictionary\n", 225 | " try:\n", 226 | " # Send out url request\n", 227 | " response = urlopen(doc)\n", 228 | "\n", 229 | " # Response will be in json file format, in Python3 must decode\n", 230 | " respjson = response.read().decode(\"utf-8\")\n", 231 | "\n", 232 | " # Let's convert json to a python dictionary\n", 233 | " respdict = json.loads(respjson)\n", 234 | " \n", 235 | " doc_definitions.append(respdict)\n", 236 | " \n", 237 | " # Exception handling\n", 238 | " except HTTPError as e:\n", 239 | " print('HTTP Error message: %s' % e.message)\n", 240 | " except URLError as e:\n", 241 | " print('HTTP Error message: %s' % e.args)\n", 242 | " except HTTPException as e:\n", 243 | " print('HTTP Error message: %s' % e.args)\n", 244 | " except Exception:\n", 245 | " import traceback\n", 246 | " print('generic exception: ' + traceback.format_exc())" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 40, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [ 256 | { 257 | "name": "stdout", 258 | "output_type": "stream", 259 | "text": [ 260 | "{'Cache-Control': 'no-store, no-cache', 'x-ms-session-token': '0:59', 'Content-Type': 'application/json', 'x-ms-xp-role': '1', 'Pragma': 'no-cache', 'x-ms-alt-content-path': 'dbs/test_docdb/colls/test_coll', 'Content-Location': 'https://training-docdb-westus.documents.azure.com/dbs/lShQAA==/colls/lShQAOYrOA0=/docs/lShQAOYrOA0GAAAAAAAAAA==/', 'x-ms-gatewayversion': 'version=1.10.85.2', 'Transfer-Encoding': 'chunked', 'Date': 'Thu, 05 Jan 2017 11:57:07 GMT', 'x-ms-request-charge': '1', 'x-ms-schemaversion': '1.2', 'x-ms-resource-usage': 'documentSize=0;documentsSize=5;collectionSize=5;', 'etag': '\"1f02548b-0000-0000-0000-586e34910000\"', 'x-ms-serviceversion': 'version=1.10.85.2', 'Strict-Transport-Security': 'max-age=31536000', 'Server': 'Microsoft-HTTPAPI/2.0', 'x-ms-last-state-change-utc': 'Thu, 05 Jan 2017 08:02:06.014 GMT', 'x-ms-resource-quota': 'documentSize=10240;documentsSize=10485760;collectionSize=10485760;', 'x-ms-activity-id': '14775447-4e99-4fbf-8631-22251b91b659'}\n" 261 | ] 262 | } 263 | ], 264 | "source": [ 265 | "# An empty container to save document ids for later\n", 266 | "# (these are generated when we create the doc)\n", 267 | "doc_ids = []\n", 268 | "\n", 269 | "# Go through each document definition from reading in the documents above and\n", 270 | "# create a document (insert one) in our DocumentDB collection\n", 271 | "for doc_def in doc_definitions:\n", 272 | "\n", 273 | " # Create document in the DocDB database/collection\n", 274 | " # - this will create a document with a unique id - so doc will not be overwritten\n", 275 | " # - if one wishes to overwrite docs, it's easy enough, as we are saving ids\n", 276 | " # - or one could give the doc an id (e.g., doc['id'] = filename)\n", 277 | " created_doc = client.CreateDocument(test_coll['_self'], doc_def)\n", 278 | "\n", 279 | " # Save the document id in case we need to reference these later\n", 280 | " doc_ids.append(created_doc['id'])\n", 281 | "\n", 282 | "# Check location from last doc added using the document's link\n", 283 | "client.ReadDocument(created_doc['_self'])\n", 284 | "print(client.last_response_headers)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### Query the documents" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 41, 297 | "metadata": { 298 | "collapsed": false 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "# an order by query\n", 303 | "query = {\n", 304 | " 'query': 'SELECT * FROM test_coll'\n", 305 | "} \n", 306 | "\n", 307 | "options = {} \n", 308 | "options['enableCrossPartitionQuery'] = True\n", 309 | "options['maxItemCount'] = 2\n", 310 | "\n", 311 | "# collection_link = database_link + '/colls/%s' % test_coll['id']\n", 312 | "\n", 313 | "result_iterable = client.QueryDocuments(test_coll['_self'], query, options)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 42, 319 | "metadata": { 320 | "collapsed": false 321 | }, 322 | "outputs": [ 323 | { 324 | "name": "stdout", 325 | "output_type": "stream", 326 | "text": [ 327 | "{ '_attachments': 'attachments/',\n", 328 | " '_etag': '\"1f02518b-0000-0000-0000-586e34910000\"',\n", 329 | " '_rid': 'lShQAOYrOA0EAAAAAAAAAA==',\n", 330 | " '_self': 'dbs/lShQAA==/colls/lShQAOYrOA0=/docs/lShQAOYrOA0EAAAAAAAAAA==/',\n", 331 | " '_ts': 1483617425,\n", 332 | " 'actors': [ 'Chris Pratt',\n", 333 | " 'Bryce Dallas Howard',\n", 334 | " 'Irrfan Khan',\n", 335 | " \"Vincent D'Onofrio\"],\n", 336 | " 'director': ['Colin Trevorrow'],\n", 337 | " 'genre': 'Action, Adventure, Sci-Fi',\n", 338 | " 'id': '2140a235-a238-46f7-896b-090b87ced864',\n", 339 | " 'imdbid': 'tt0369610',\n", 340 | " 'language': ['English'],\n", 341 | " 'plot': 'A new theme park is built on the original site of Jurassic Park. '\n", 342 | " \"Everything is going well until the park's newest attraction - a \"\n", 343 | " 'genetically modified giant stealth killing machine - escapes '\n", 344 | " 'containment and goes on a killing spree.',\n", 345 | " 'released': '12 Jun 2015',\n", 346 | " 'runtime': '124 min',\n", 347 | " 'title': 'Jurassic World',\n", 348 | " 'titleid': 1,\n", 349 | " 'year': '2015'}\n", 350 | "\n", 351 | "\n", 352 | "{ '_attachments': 'attachments/',\n", 353 | " '_etag': '\"1f02528b-0000-0000-0000-586e34910000\"',\n", 354 | " '_rid': 'lShQAOYrOA0FAAAAAAAAAA==',\n", 355 | " '_self': 'dbs/lShQAA==/colls/lShQAOYrOA0=/docs/lShQAOYrOA0FAAAAAAAAAA==/',\n", 356 | " '_ts': 1483617425,\n", 357 | " 'actors': ['Bradley Cooper', 'Kyle Gallner', 'Cole Konis', 'Ben Reed'],\n", 358 | " 'director': ['Clint Eastwood'],\n", 359 | " 'genre': 'Action, Biography, Drama',\n", 360 | " 'id': '0d24ef58-7d01-4905-a221-eef7cbd8c0af',\n", 361 | " 'imdbid': 'tt2179136',\n", 362 | " 'language': ['English', 'Arabic'],\n", 363 | " 'plot': 'Chris Kyle was nothing more than a Texan man who dreamed of '\n", 364 | " 'becoming a cowboy, but in his thirties he found out that maybe '\n", 365 | " 'his life needed something different, something where he could '\n", 366 | " 'express his real talent, something that could help America in its '\n", 367 | " 'fight against terrorism. So he joined the SEALs in order to '\n", 368 | " 'become a sniper. After marrying, Kyle and the other members of '\n", 369 | " \"the team are called for their first tour of Iraq. Kyle's struggle \"\n", 370 | " \"isn't with his missions, but about his relationship with the \"\n", 371 | " 'reality of the war and, once returned at home, how he manages to '\n", 372 | " 'handle it with his urban life, his wife and kids.',\n", 373 | " 'released': '16 Jan 2015',\n", 374 | " 'runtime': '133 min',\n", 375 | " 'title': 'American Sniper',\n", 376 | " 'titleid': 2,\n", 377 | " 'year': '2014'}\n", 378 | "\n", 379 | "\n", 380 | "{ '_attachments': 'attachments/',\n", 381 | " '_etag': '\"1f02548b-0000-0000-0000-586e34910000\"',\n", 382 | " '_rid': 'lShQAOYrOA0GAAAAAAAAAA==',\n", 383 | " '_self': 'dbs/lShQAA==/colls/lShQAOYrOA0=/docs/lShQAOYrOA0GAAAAAAAAAA==/',\n", 384 | " '_ts': 1483617425,\n", 385 | " 'actors': [ 'Vin Diesel',\n", 386 | " 'Paul Walker',\n", 387 | " 'Jason Statham',\n", 388 | " 'Michelle Rodriguez'],\n", 389 | " 'director': ['James Wan'],\n", 390 | " 'genre': 'Action, Crime, Thriller',\n", 391 | " 'id': 'da338bdb-6749-4012-962b-f9621147c2ad',\n", 392 | " 'imdbid': 'tt2820852',\n", 393 | " 'language': ['English'],\n", 394 | " 'plot': 'Deckard Shaw seeks revenge against Dominic Toretto and his family '\n", 395 | " 'for his comatose brother.',\n", 396 | " 'released': '03 Apr 2015',\n", 397 | " 'runtime': '137 min',\n", 398 | " 'title': 'Furious 7',\n", 399 | " 'titleid': 3,\n", 400 | " 'year': '2015'}\n", 401 | "\n", 402 | "\n" 403 | ] 404 | } 405 | ], 406 | "source": [ 407 | "import pprint\n", 408 | "pp = pprint.PrettyPrinter(indent=4)\n", 409 | "\n", 410 | "it = iter(result_iterable)\n", 411 | "for _ in it:\n", 412 | " pp.pprint(_)\n", 413 | " print('\\n')" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": { 419 | "collapsed": true 420 | }, 421 | "source": [ 422 | "### Delete the collection (which also deletes the documents)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": 31, 428 | "metadata": { 429 | "collapsed": false, 430 | "scrolled": false 431 | }, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "[]" 437 | ] 438 | }, 439 | "execution_count": 31, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "database_link = test_db['_self']\n", 446 | "\n", 447 | "# Build a collection link from the collection id\n", 448 | "collection_link = test_coll['_self']\n", 449 | "\n", 450 | "# Delete the collection, deleting also all of the documents contained wherein\n", 451 | "del_coll = client.DeleteCollection(collection_link)\n", 452 | "\n", 453 | "# We could also have queried for this collection and created the collection link from that\n", 454 | "\n", 455 | "# List existing collections\n", 456 | "list(client.ReadCollections(test_db['_self']))" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "collapsed": true 464 | }, 465 | "outputs": [], 466 | "source": [] 467 | } 468 | ], 469 | "metadata": { 470 | "kernelspec": { 471 | "display_name": "Python 3", 472 | "language": "python", 473 | "name": "python3" 474 | }, 475 | "language_info": { 476 | "codemirror_mode": { 477 | "name": "ipython", 478 | "version": 3 479 | }, 480 | "file_extension": ".py", 481 | "mimetype": "text/x-python", 482 | "name": "python", 483 | "nbconvert_exporter": "python", 484 | "pygments_lexer": "ipython3", 485 | "version": "3.5.1" 486 | } 487 | }, 488 | "nbformat": 4, 489 | "nbformat_minor": 2 490 | } 491 | -------------------------------------------------------------------------------- /azure_documentdb/GoogleDoc_DocDB_Demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Collaborating on Google Spreadsheets with Azure DocumentDB\n", 8 | "### using Google Drive API with `gspread` + DocumentDB python SDK" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "# My imports\n", 20 | "import json\n", 21 | "import os\n", 22 | "import gspread\n", 23 | "from oauth2client.client import SignedJwtAssertionCredentials\n", 24 | "import pandas as pd\n", 25 | "import pydocumentdb.document_client as document_client\n", 26 | "from pandas.io.json import read_json" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "### Using `gspread` module to interact with Google Drive API" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "# Specify my google drive api credentials\n", 45 | "json_key = json.load(open('MessyDoc-8f814e3f2a78.json'))\n", 46 | "scope = ['https://spreadsheets.google.com/feeds']\n", 47 | "credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'].encode(), scope)\n", 48 | "\n", 49 | "# Using gspread module and my credentials, grab the google doc I want\n", 50 | "gc = gspread.authorize(credentials)\n", 51 | "wksheet = gc.open(\"SSF_Crop_Master_2012_Master_crop_master\").worksheet('latest')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "### Read out a document from DocDB database/collection and put into `pandas` df" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "# Specify my DocumentDB settings\n", 70 | "DOCUMENTDB_HOST = 'https://testingflask.documents.azure.com:443/'\n", 71 | "DOCUMENTDB_KEY = 's610r3ylWxHNW8...=='\n", 72 | "DOCDB_DATABASE = 'mladsapp'\n", 73 | "DOCDB_COLLECTION_USER = 'user_collection'\n", 74 | "DOCDB_COLLECTION_MASTER = 'master_collection'\n", 75 | "DOCDB_MASTER_DOC = 'masterdoc'\n", 76 | "\n", 77 | "# make a client connection\n", 78 | "client = document_client.DocumentClient(DOCUMENTDB_HOST, {'masterKey': DOCUMENTDB_KEY})\n", 79 | "\n", 80 | "# Read databases and get our working database\n", 81 | "db = next((data for data in client.ReadDatabases() if data['id'] == DOCDB_DATABASE))\n", 82 | "\n", 83 | "# Read collections and get the \"master collection\"\n", 84 | "coll_master = next((coll for coll in client.ReadCollections(db['_self']) if coll['id'] == DOCDB_COLLECTION_MASTER))\n", 85 | "\n", 86 | "# Read master document and place data into dataframe\n", 87 | "master_doc = next((doc for doc in client.ReadDocuments(coll_master['_self']) if doc['id'] == DOCDB_MASTER_DOC))\n", 88 | "raw_data_df = read_json(master_doc['data'])\n", 89 | "raw_data_df.columns = read_json(master_doc['data_headers'])\n", 90 | "\n", 91 | "print(raw_data_df.shape)\n", 92 | "\n", 93 | "# Tidy up column names\n", 94 | "cols = raw_data_df.columns\n", 95 | "raw_data_df.columns = [e[0].encode('utf-8') for e in cols]" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "### Do something to `pandas` df and update it in DocDB" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "# Let's add a new column\n", 114 | "#print(raw_data_df.columns)\n", 115 | "a = raw_data_df['Seedingdate']\n", 116 | "a = [e + '-2012' for e in a]\n", 117 | "from datetime import datetime\n", 118 | "t1 = datetime.strptime(a[0], '%d-%b-%Y')\n", 119 | "\n", 120 | "b = raw_data_df['harvestdate'].iloc[:,0]\n", 121 | "b = [e + '-2012' for e in b]\n", 122 | "import time\n", 123 | "t2 = datetime.strptime(b[0], '%d-%b-%Y')\n", 124 | "\n", 125 | "days = (t2 - t1).days\n", 126 | "\n", 127 | "# Add this column to data\n", 128 | "raw_data_df['growingperiod_days'] = days" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### Update this data in DocDB" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "# make a client connection\n", 147 | "client = document_client.DocumentClient(DOCUMENTDB_HOST, {'masterKey': DOCUMENTDB_KEY})\n", 148 | "\n", 149 | "# Read databases and get our working database\n", 150 | "db = next((data for data in client.ReadDatabases() if data['id'] == DOCDB_DATABASE))\n", 151 | "\n", 152 | "# Read collections and get the \"master collection\"\n", 153 | "coll_master = next((coll for coll in client.ReadCollections(db['_self']) if coll['id'] == DOCDB_COLLECTION_MASTER))\n", 154 | "\n", 155 | "# Convert data values in df to json list of lists\n", 156 | "values = raw_data_df.to_json(orient = 'values')\n", 157 | "\n", 158 | "# Define a document definition\n", 159 | "document_definition = { 'id': DOCDB_MASTER_DOC,\n", 160 | " 'timestamp': datetime.now().strftime('%c'),\n", 161 | " 'data': values,\n", 162 | " 'data_headers': pd.Series(raw_data_df.columns).to_json(orient = 'values')}\n", 163 | "\n", 164 | "# Update the document in DocDB!\n", 165 | "doc_updated = client.UpsertDocument(coll_master['_self'], document_definition)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "collapsed": true 172 | }, 173 | "source": [ 174 | "### Publish back to a google spreadsheet" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "# Some functions for updating (and concurrently) publishing a google spreadsheet doc\n", 186 | "def numberToLetters(q):\n", 187 | " '''This converts a number,q, into proper column name format for spreadsheet (e.g. R1C28 -> AB1).'''\n", 188 | " q = q - 1\n", 189 | " result = ''\n", 190 | " while q >= 0:\n", 191 | " remain = q % 26\n", 192 | " result = chr(remain+65) + result;\n", 193 | " q = q//26 - 1\n", 194 | " return result\n", 195 | "\n", 196 | "def update_worksheet(wksheet, df):\n", 197 | " '''This function updates a given worksheet (wksheet)\n", 198 | " with the values in the dataframe (df).'''\n", 199 | "\n", 200 | " # TODO: confirm there are enough columns in existing doc to match query\n", 201 | "\n", 202 | " columns = df.columns.values.tolist()\n", 203 | " # selection of the range that will be updated\n", 204 | " cell_list = wksheet.range('A1:'+numberToLetters(len(columns))+'1')\n", 205 | "\n", 206 | " # modifying the values in the range\n", 207 | " for cell in cell_list:\n", 208 | " val = columns[cell.col-1]\n", 209 | " if type(val) is str:\n", 210 | " val = val.decode('utf-8')\n", 211 | " cell.value = val\n", 212 | " # update in batch\n", 213 | " wksheet.update_cells(cell_list)\n", 214 | "\n", 215 | " #number of lines and columns\n", 216 | " num_lines, num_columns = df.shape\n", 217 | " # selection of the range that will be updated\n", 218 | " cell_list = wksheet.range('A2:'+numberToLetters(num_columns)+str(num_lines+1))\n", 219 | " # modifying the values in the range\n", 220 | " for cell in cell_list:\n", 221 | " val = df.iloc[cell.row-2,cell.col-1]\n", 222 | " if type(val) is str:\n", 223 | " val = val.decode('utf-8')\n", 224 | " elif isinstance(val, (int, long, float, complex)):\n", 225 | " # note that we round all numbers\n", 226 | " val = int(round(val))\n", 227 | " cell.value = val\n", 228 | " # update in batch\n", 229 | " wksheet.update_cells(cell_list)\n", 230 | "\n", 231 | "# Specify my DocumentDB settings\n", 232 | "DOCUMENTDB_HOST = 'https://testingflask.documents.azure.com:443/'\n", 233 | "DOCUMENTDB_KEY = 's610r3ylWxHNW87xKJYOmIzPWW/bHJNM7r4JCZ4PmSyJ2gUIEnasqH5wO9qkCY2LFkPV8kMulRa/U8+Ws9csoA=='\n", 234 | "DOCDB_DATABASE = 'mladsapp'\n", 235 | "DOCDB_COLLECTION_MASTER = 'master_collection'\n", 236 | "DOCDB_MASTER_DOC = 'masterdoc'\n", 237 | "\n", 238 | "# Again, specify my google drive api credentials\n", 239 | "json_key = json.load(open('MessyDoc-8f814e3f2a78.json'))\n", 240 | "scope = ['https://spreadsheets.google.com/feeds']\n", 241 | "credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'].encode(), scope)\n", 242 | "\n", 243 | "# Using gspread module and my credentials, grab the google doc I want\n", 244 | "gc = gspread.authorize(credentials)\n", 245 | "wksheet = gc.open(\"SSF_Crop_Master_2012_Master_crop_master\").worksheet('latest')\n", 246 | "\n", 247 | "# make a client connection\n", 248 | "client = document_client.DocumentClient(DOCUMENTDB_HOST, {'masterKey': DOCUMENTDB_KEY})\n", 249 | "\n", 250 | "# Read databases and get our working database\n", 251 | "db = next((data for data in client.ReadDatabases() if data['id'] == DOCDB_DATABASE))\n", 252 | "\n", 253 | "# Read collections and get the \"user collection\"\n", 254 | "coll_master = next((coll for coll in client.ReadCollections(db['_self']) if coll['id'] == DOCDB_COLLECTION_MASTER))\n", 255 | "\n", 256 | "# Get master doc from DocDB and place into dataframe\n", 257 | "master_doc = next((doc for doc in client.ReadDocuments(coll_master['_self']) if doc['id'] == DOCDB_MASTER_DOC))\n", 258 | "master_data_df = read_json(master_doc['data'])\n", 259 | "headers = read_json(master_doc['data_headers'])\n", 260 | "master_data_df.columns = headers\n", 261 | "\n", 262 | "# update all cells in master google doc with data in master doc from db\n", 263 | "# this takes a minute or two (maybe put into a separate view function)\n", 264 | "update_worksheet(wksheet, master_data_df)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "collapsed": true 272 | }, 273 | "outputs": [], 274 | "source": [] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python 2", 280 | "language": "python", 281 | "name": "python2" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 2 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython2", 293 | "version": "2.7.11" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 0 298 | } 299 | -------------------------------------------------------------------------------- /azure_machine_learning/AnomalyDetection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Anomaly Detection with Azure Machine Learning APIs\n", 8 | "\n", 9 | "When is it good to have an anomaly detection service? Perhaps to watch out for:\n", 10 | "\n", 11 | "* Too many login failures\n", 12 | "* Spikes or dips in customer checkouts\n", 13 | "* An increase in the dynamic range of file ingestion speeds in a cloud service\n", 14 | "* An upward trend in system temperature\n", 15 | "\n", 16 | "These are cases found from monitoring a system where a closer look may be called for. They are indicative of abnormal or anomalous behavior and could indicate a problem. The data could be streaming from a device or come from log files, but no matter the source an anomaly detection model could help predict when a system needs to be examined further." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "#### For python 2 and 3 compatibility we have a few imports" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import json\n", 35 | "\n", 36 | "# Import compatibility libraries (python 2/3 support)\n", 37 | "from __future__ import absolute_import\n", 38 | "from __future__ import division\n", 39 | "from __future__ import print_function\n", 40 | "from __future__ import unicode_literals\n", 41 | "\n", 42 | "# Python 3\n", 43 | "try:\n", 44 | " from urllib.request import urlopen, Request\n", 45 | " from urllib.parse import urlparse, urlencode\n", 46 | " from http.client import HTTPSConnection\n", 47 | "# Python 2.7\n", 48 | "except ImportError:\n", 49 | " from urlparse import urlparse\n", 50 | " from urllib import urlencode\n", 51 | " from urllib2 import Request, urlopen\n", 52 | " from httplib import HTTPSConnection " 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "**Data**\n", 60 | "\n", 61 | "This is non-seasonal time series data." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "collapsed": false 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "body = json.loads('''\n", 73 | "{\n", 74 | " \"data\": [\n", 75 | " [ \"9/21/2014 11:05:00 AM\", \"1.3\" ],\n", 76 | " [ \"9/21/2014 11:10:00 AM\", \"9.09\" ],\n", 77 | " [ \"9/21/2014 11:15:00 AM\", \"2.4\" ],\n", 78 | " [ \"9/21/2014 11:20:00 AM\", \"2.5\" ],\n", 79 | " [ \"9/21/2014 11:25:00 AM\", \"2.6\" ],\n", 80 | " [ \"9/21/2014 11:30:00 AM\", \"2.1\" ],\n", 81 | " [ \"9/21/2014 11:35:00 AM\", \"3.5\" ],\n", 82 | " [ \"9/21/2014 11:40:00 AM\", \"0\" ],\n", 83 | " [ \"9/21/2014 11:45:00 AM\", \"2.8\" ],\n", 84 | " [ \"9/21/2014 11:50:00 AM\", \"2.3\" ]\n", 85 | " ],\n", 86 | " \"params\": {\n", 87 | " \"tspikedetector.sensitivity\": \"4\",\n", 88 | " \"zspikedetector.sensitivity\": \"4\",\n", 89 | " \"trenddetector.sensitivity\": \"3.25\",\n", 90 | " \"bileveldetector.sensitivity\": \"3.25\",\n", 91 | " \"postprocess.tailRows\": \"0\"\n", 92 | " }\n", 93 | "}\n", 94 | "''')\n", 95 | "\n", 96 | "print(body)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "**The headers and parameters**\n", 104 | "\n", 105 | "The subscription key for Microsoft Azure DataMarket was placed in `config.json`. The key can be found by going to your account in the [Azure DataMarket](https://datamarket.azure.com/account/keys) (you may need to register)." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "f = urlopen('https://gist.githubusercontent.com/antriv/a6962d2c7580a0f7db4b7aabd6d768c5/raw/38a66f77c7fd0641324c8cbbff77828207041edc/config.json')\n", 117 | "url = f.read()\n", 118 | "CONFIG = json.loads(url)\n", 119 | "\n", 120 | "subscription_key = CONFIG['subscription_key_ADM']\n", 121 | "\n", 122 | "import base64\n", 123 | "creds = base64.b64encode('userid:' + subscription_key)\n", 124 | "\n", 125 | "headers = {'Content-Type':'application/json', 'Authorization':('Basic '+ creds)} \n", 126 | "\n", 127 | "# params will be added to POST in url request\n", 128 | "# right now it's empty because for this request we don't need any params\n", 129 | "# although we could have included 'selection' and 'offset' - see docs\n", 130 | "params = urlencode({})" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "**Make the request using the REST API**\n", 138 | "\n", 139 | "Note, we are using non-seasonal time series mock data." 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "try:\n", 151 | " \n", 152 | " # Post method request - note: body of request is converted from json to string\n", 153 | "\n", 154 | " conn = HTTPSConnection('api.datamarket.azure.com')\n", 155 | " \n", 156 | " conn.request(\"POST\", \"/data.ashx/aml_labs/anomalydetection/v2/Score/\", \n", 157 | " body = json.dumps(body), headers = headers)\n", 158 | " \n", 159 | " response = conn.getresponse()\n", 160 | " data = response.read()\n", 161 | " conn.close()\n", 162 | "except Exception as e:\n", 163 | " print(\"[Error: {0}] \".format(e))\n", 164 | " \n", 165 | "try:\n", 166 | " # Print the results - json response format\n", 167 | " print(json.dumps(json.loads(json.loads(data)['ADOutput']), \n", 168 | " sort_keys=True,\n", 169 | " indent=4, \n", 170 | " separators=(',', ': ')))\n", 171 | "except Exception as e:\n", 172 | " print(data)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "source": [ 181 | "Output column meaning from [docs](https://azure.microsoft.com/en-us/documentation/articles/machine-learning-apps-anomaly-detection/):\n", 182 | "* Time (input)\n", 183 | "* Data (input)\n", 184 | "* TSpike: Binary indicator to indicate whether a spike is detected by TSpike Detector (1 = spike)\n", 185 | "* ZSpike: Binary indicator to indicate whether a spike is detected by ZSpike Detector (1 = spike)\n", 186 | "* RPScore: A floating number representing anomaly score on bidirectional level change\n", 187 | "* RPAlert: 1/0 value indicating there is an bi directional level change anomaly based on the input sensitivity\n", 188 | "* TScore: A floating number representing anomaly score on positive trend\n", 189 | "* TAlert: 1/0 value indicating there is a positive trend anomaly based on the input sensitivity" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "anaconda-cloud": {}, 204 | "kernelspec": { 205 | "display_name": "Python 2", 206 | "language": "python", 207 | "name": "python2" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 2 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython2", 219 | "version": "2.7.11" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 0 224 | } 225 | -------------------------------------------------------------------------------- /cognitive_services/Computer_Vision_API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": "true" 7 | }, 8 | "source": [ 9 | " # Table of Contents\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Computer Vision API example" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### Instructions for Jupyter notebook usage\n", 25 | "* This is a jupyter notebook so that means interactive coding in a browser\n", 26 | "* You want to run each grey cell (the Python, in this case, code) in order and one at a time\n", 27 | "* Make sure each code cell has finished running (a number will appear inside the In[] when the cell is done)\n", 28 | "* You can also see if the notebook is running by looking for a filled in circle in the upper right corner of this notebook\n", 29 | "* Feel free to change code (even if you don't know Python)\n", 30 | "* There are many helper methods in this notebook so don't worry if you don't understand\n", 31 | "* Have fun with this and save the notebook often :)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Let's take care of some installs" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# !conda install --no-deps -c menpo opencv=2.4.11 --yes" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# ! pip uninstall matplotlib --yes" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# ! pip install matplotlib -q" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## Let's import some modules" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "import time \n", 82 | "import requests\n", 83 | "import cv2\n", 84 | "import operator\n", 85 | "import numpy as np\n", 86 | "import json\n", 87 | "\n", 88 | "# Import compatibility libraries (python 2/3 support)\n", 89 | "from __future__ import absolute_import\n", 90 | "from __future__ import division\n", 91 | "from __future__ import print_function\n", 92 | "from __future__ import unicode_literals\n", 93 | "\n", 94 | "# Python 3\n", 95 | "try:\n", 96 | " from urllib.request import urlopen, Request\n", 97 | " from urllib.parse import urlparse, urlencode\n", 98 | " from http.client import HTTPSConnection\n", 99 | "# Python 2.7\n", 100 | "except ImportError:\n", 101 | " from urlparse import urlparse\n", 102 | " from urllib import urlencode\n", 103 | " from urllib2 import Request, urlopen\n", 104 | " from httplib import HTTPSConnection\n", 105 | "\n", 106 | "# Import library to display results\n", 107 | "import matplotlib.pyplot as plt\n", 108 | "%matplotlib inline \n", 109 | "# Display images within Jupyter" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "## Now to set some constants" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "* The Cognitive Services keys (grabbing the computer vision one)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# Paste in your API key here to the 'subscription_key' variable or create a config.json\n", 133 | "\n", 134 | "f = urlopen('https://gist.githubusercontent.com/michhar/03b38c07be7a3a26aa83005e4b38d4f6/raw/6a214f22f63f2e28905f48c991438736bcc77eef/config.json')\n", 135 | "\n", 136 | "# This could also be an external file\n", 137 | "config_json = '''{\n", 138 | "\"subscription_key_computer_vision\": \"PASTE YOUR COMPUTER VISION KEY HERE\"\n", 139 | "}'''\n", 140 | "\n", 141 | "CONFIG=json.loads(config_json)\n", 142 | "# grab key from json in config\n", 143 | "subscription_key = CONFIG['subscription_key_computer_vision']" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "* **The Cognitive Services Computer Vision web service request url**" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# Variables\n", 160 | "\n", 161 | "# Make sure to take your Endpoint from Overview page of API in Azure Portal\n", 162 | "_url = 'https://westeurope.api.cognitive.microsoft.com/vision/v1.0/describe' # change the ending to 'analyze' or 'describe'\n", 163 | "_key = subscription_key\n", 164 | "\n", 165 | "_maxNumRetries = 10" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Helper functions" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "def processRequest( json, data, headers, params ):\n", 182 | "\n", 183 | " \"\"\"\n", 184 | " Helper function to process the request to Project Oxford\n", 185 | "\n", 186 | " Parameters:\n", 187 | " json: Used when processing images from its URL. See API Documentation\n", 188 | " data: Used when processing image read from disk. See API Documentation\n", 189 | " headers: Used to pass the key information and the data type request\n", 190 | " \"\"\"\n", 191 | "\n", 192 | " retries = 0\n", 193 | " result = None\n", 194 | "\n", 195 | " while True:\n", 196 | " # Use the requests library to make the POST call\n", 197 | " response = requests.request( 'post', \n", 198 | " _url, \n", 199 | " json=json, \n", 200 | " data=data, \n", 201 | " headers=headers, \n", 202 | " params=params )\n", 203 | "\n", 204 | " if response.status_code == 429: \n", 205 | " print( \"Message: %s\" % ( response.json()['error']['message'] ) )\n", 206 | " if retries <= _maxNumRetries: \n", 207 | " time.sleep(1) \n", 208 | " retries += 1\n", 209 | " continue\n", 210 | " else: \n", 211 | " print( 'Error: failed after retrying!' )\n", 212 | " break\n", 213 | " \n", 214 | " # We have a successful response, but let's do some tests on response data\n", 215 | " elif response.status_code == 200 or response.status_code == 201:\n", 216 | " if 'content-length' in response.headers and \\\n", 217 | " int(response.headers['content-length']) == 0: \n", 218 | " \n", 219 | " result = None\n", 220 | " \n", 221 | " elif 'content-type' in response.headers and \\\n", 222 | " isinstance(response.headers['content-type'], str):\n", 223 | " \n", 224 | " if 'application/json' in response.headers['content-type'].lower(): \n", 225 | " result = response.json() if response.content else None\n", 226 | " \n", 227 | " elif 'image' in response.headers['content-type'].lower(): \n", 228 | " result = response.content\n", 229 | " else:\n", 230 | " print( \"Error code: %d\" % ( response.status_code ) )\n", 231 | " print( \"Message: %s\" % ( response.json() ) )\n", 232 | "\n", 233 | " break\n", 234 | " \n", 235 | " return result" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 8, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "def renderResultOnImage( result, img ):\n", 245 | " \"\"\"Display the obtained results onto the input image\"\"\"\n", 246 | "\n", 247 | " if 'description' in result:\n", 248 | " descrip = result['description']['captions'][0]['text']\n", 249 | " print(descrip)\n", 250 | " cv2.putText(img, descrip, (30,70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)\n", 251 | " if 'categories' in result:\n", 252 | " # Put a rectangle around the image to reflect accent colors\n", 253 | " R = int(result['color']['accentColor'][:2],16)\n", 254 | " G = int(result['color']['accentColor'][2:4],16)\n", 255 | " B = int(result['color']['accentColor'][4:],16)\n", 256 | " if img is not None:\n", 257 | " cv2.rectangle(img,(0,0), (img.shape[1], img.shape[0]), color=(R,G,B), thickness=25)\n", 258 | "\n", 259 | " categoryName = sorted(result['categories'], key=lambda x: x['score'])[0]['name']\n", 260 | " print(categoryName)\n", 261 | " cv2.putText(img, categoryName, (30,70), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,0,0), 3)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "## Analysis of an image retrieved via URL\n", 269 | "\n", 270 | "* **Place an image URL here in the `urlImage` variable (must be a good quality, large image)**\n", 271 | "\n", 272 | "Input requirements:\n", 273 | "* Supported image formats: JPEG, PNG, GIF, BMP.\n", 274 | "* Image file size must be less than 4MB.\n", 275 | "* Image dimensions should be greater than 50 x 50." 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 9, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "urlImage = 'http://gb.fotolibra.com/images/previews/392412-pair-of-swans-at-stanborough-park.jpeg'\n", 285 | "# urlImage = 'you own url'" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 10, 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "a swan swimming in a body of water\n" 298 | ] 299 | }, 300 | { 301 | "data": { 302 | "text/html": [ 303 | "" 304 | ], 305 | "text/plain": [ 306 | "" 307 | ] 308 | }, 309 | "metadata": {}, 310 | "output_type": "display_data" 311 | } 312 | ], 313 | "source": [ 314 | "from IPython.display import display, Image\n", 315 | "\n", 316 | "\n", 317 | "# Computer Vision parameters\n", 318 | "params = { 'visualFeatures' : 'Color,Categories'} \n", 319 | "\n", 320 | "headers = dict()\n", 321 | "headers['Ocp-Apim-Subscription-Key'] = _key\n", 322 | "headers['Content-Type'] = 'application/json' \n", 323 | "\n", 324 | "json = { 'url': urlImage } \n", 325 | "data = None\n", 326 | "\n", 327 | "result = processRequest(json, data, headers, params)\n", 328 | "\n", 329 | "if result is not None:\n", 330 | " # Load the original image, fetched from the URL\n", 331 | " arr = np.asarray( bytearray( requests.get( urlImage ).content ), \n", 332 | " dtype=np.uint8 )\n", 333 | " arr = arr[...,::-1]\n", 334 | " \n", 335 | " img = cv2.imdecode(arr, cv2.IMREAD_COLOR)\n", 336 | " \n", 337 | " \n", 338 | "\n", 339 | " renderResultOnImage(result, img)\n", 340 | " \n", 341 | " if img is not None:\n", 342 | " plt.subplots(figsize=(15, 20))\n", 343 | " plt.imshow(img)\n", 344 | " else:\n", 345 | " img = Image(url=urlImage, embed=False)\n", 346 | " display(img)\n" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "\n", 354 | "\n", 355 | "\n", 356 | "\n", 357 | "\n", 358 | "\n", 359 | "\n", 360 | "\n", 361 | "\n", 362 | "\n", 363 | "\n", 364 | "\n", 365 | "\n", 366 | "\n", 367 | "\n", 368 | "\n", 369 | "\n", 370 | "\n", 371 | "\n", 372 | "\n", 373 | "\n", 374 | "\n", 375 | "\n", 376 | "\n", 377 | "\n", 378 | "\n", 379 | "\n", 380 | "\n", 381 | "\n", 382 | "\n", 383 | "\n", 384 | "\n", 385 | "\n", 386 | "\n", 387 | "\n", 388 | "\n", 389 | "\n", 390 | "\n", 391 | "\n", 392 | "\n", 393 | "\n", 394 | "\n", 395 | "\n", 396 | "\n", 397 | "\n", 398 | "\n", 399 | "\n", 400 | "\n", 401 | "\n", 402 | "\n", 403 | "\n", 404 | "\n", 405 | "\n", 406 | "\n", 407 | "\n", 408 | "\n", 409 | "\n", 410 | "\n", 411 | "\n", 412 | "\n", 413 | "\n", 414 | "\n", 415 | "\n", 416 | "\n", 417 | "\n", 418 | "\n", 419 | "\n", 420 | "\n", 421 | "\n", 422 | "\n", 423 | "\n", 424 | "\n", 425 | "\n", 426 | "\n", 427 | "\n", 428 | "\n", 429 | "\n", 430 | "\n", 431 | "\n", 432 | "\n", 433 | "\n", 434 | "\n", 435 | "\n", 436 | "\n", 437 | "\n", 438 | "\n", 439 | "\n", 440 | "\n", 441 | "\n", 442 | "\n", 443 | "\n", 444 | "\n", 445 | "\n", 446 | "\n", 447 | "\n", 448 | "\n", 449 | "\n", 450 | "\n", 451 | "\n", 452 | "\n", 453 | "\n", 454 | "\n", 455 | "\n", 456 | "\n", 457 | "\n", 458 | "\n", 459 | "\n", 460 | "\n", 461 | "\n", 462 | "\n", 463 | "\n", 464 | "\n", 465 | "\n", 466 | "\n", 467 | "\n", 468 | "\n", 469 | "\n", 470 | "\n", 471 | "\n", 472 | "\n", 473 | "\n", 474 | "\n", 475 | "\n", 476 | "\n", 477 | "\n", 478 | "\n", 479 | "\n", 480 | "\n", 481 | "\n", 482 | "\n", 483 | "\n", 484 | "\n", 485 | "\n", 486 | "\n", 487 | "\n", 488 | "\n", 489 | "\n", 490 | "\n", 491 | "\n", 492 | "\n", 493 | "\n", 494 | "\n", 495 | "\n", 496 | "\n", 497 | "\n", 498 | "\n", 499 | "\n", 500 | "\n", 501 | "\n", 502 | "\n", 503 | "\n", 504 | "\n", 505 | "\n", 506 | "\n", 507 | "\n", 508 | "\n", 509 | "\n", 510 | "\n", 511 | "\n", 512 | "\n", 513 | "\n", 514 | "\n", 515 | "\n", 516 | "\n", 517 | "\n", 518 | "\n", 519 | "\n", 520 | "\n", 521 | "\n", 522 | "\n", 523 | "\n", 524 | "\n", 525 | "\n", 526 | "\n", 527 | "\n", 528 | "\n", 529 | "\n", 530 | "\n", 531 | "\n", 532 | "\n", 533 | "\n", 534 | "\n", 535 | "\n", 536 | "\n", 537 | "\n", 538 | "\n", 539 | "\n", 540 | "\n", 541 | "\n", 542 | "\n", 543 | "\n", 544 | "\n", 545 | "\n", 546 | "\n", 547 | "\n", 548 | "\n", 549 | "\n", 550 | "\n", 551 | "\n", 552 | "\n", 553 | "\n", 554 | "\n", 555 | "\n", 556 | "\n", 557 | "\n", 558 | "\n", 559 | "\n", 560 | "\n", 561 | "\n", 562 | "\n", 563 | "\n", 564 | "\n", 565 | "\n", 566 | "\n", 567 | "\n", 568 | "\n", 569 | "\n", 570 | "\n", 571 | "\n", 572 | "\n", 573 | "\n", 574 | "\n", 575 | "\n", 576 | "\n", 577 | "\n", 578 | "\n", 579 | "\n", 580 | "\n", 581 | "\n", 582 | "\n", 583 | "\n", 584 | "\n", 585 | "\n", 586 | "\n", 587 | "\n", 588 | "\n", 589 | "\n", 590 | "\n", 591 | "\n", 592 | "\n", 593 | "\n", 594 | "\n", 595 | "\n", 596 | "\n", 597 | "\n", 598 | "\n", 599 | "\n", 600 | "\n", 601 | "\n", 602 | "\n", 603 | "\n", 604 | "\n", 605 | "\n", 606 | "\n", 607 | "\n", 608 | "\n", 609 | "\n", 610 | "\n", 611 | "\n", 612 | "\n", 613 | "\n", 614 | "\n", 615 | "\n", 616 | "\n", 617 | "\n", 618 | "\n", 619 | "\n", 620 | "\n", 621 | "\n", 622 | "\n", 623 | "\n", 624 | "\n", 625 | "\n", 626 | "\n", 627 | "\n", 628 | "\n", 629 | "\n", 630 | "\n", 631 | "\n", 632 | "\n", 633 | "\n", 634 | "\n", 635 | "\n", 636 | "\n", 637 | "\n", 638 | "\n", 639 | "\n", 640 | "\n", 641 | "\n", 642 | "\n", 643 | "\n", 644 | "\n", 645 | "\n", 646 | "\n", 647 | "\n", 648 | "\n", 649 | "\n", 650 | "\n", 651 | "\n", 652 | "\n", 653 | "\n", 654 | "\n", 655 | "\n", 656 | "\n", 657 | "\n", 658 | "\n", 659 | "\n", 660 | "\n", 661 | "\n", 662 | "\n", 663 | "\n", 664 | "\n", 665 | "\n", 666 | "\n", 667 | "\n", 668 | "\n", 669 | "\n", 670 | "\n", 671 | "\n", 672 | "\n", 673 | "\n", 674 | "\n", 675 | "\n", 676 | "\n", 677 | "\n", 678 | "\n", 679 | "\n", 680 | "\n", 681 | "\n", 682 | "\n", 683 | "\n", 684 | "\n", 685 | "\n", 686 | "\n", 687 | "\n", 688 | "\n", 689 | "\n", 690 | "\n", 691 | "\n", 692 | "\n", 693 | "\n", 694 | "\n", 695 | "\n", 696 | "\n", 697 | "\n", 698 | "\n", 699 | "\n", 700 | "\n", 701 | "\n", 702 | "\n", 703 | "\n", 704 | "\n", 705 | "\n", 706 | "\n", 707 | "\n", 708 | "\n", 709 | "\n", 710 | "\n", 711 | "\n", 712 | "\n", 713 | "\n", 714 | "\n", 715 | "\n", 716 | "\n", 717 | "\n", 718 | "\n", 719 | "\n", 720 | "\n", 721 | "\n", 722 | "\n", 723 | "\n", 724 | "\n", 725 | "\n", 726 | "\n", 727 | "\n", 728 | "\n", 729 | "\n", 730 | "\n", 731 | "\n", 732 | "\n", 733 | "\n", 734 | "\n", 735 | "\n", 736 | "\n", 737 | "\n", 738 | "\n", 739 | "\n", 740 | "\n", 741 | "\n", 742 | "\n", 743 | "\n", 744 | "\n", 745 | "\n", 746 | "\n", 747 | "\n", 748 | "\n", 749 | "\n", 750 | "\n", 751 | "\n", 752 | "\n", 753 | "\n", 754 | "\n", 755 | "\n", 756 | "\n", 757 | "\n", 758 | "\n", 759 | "\n", 760 | "\n", 761 | "\n", 762 | "\n", 763 | "\n", 764 | "\n", 765 | "\n", 766 | "\n", 767 | "\n", 768 | "\n", 769 | "\n", 770 | "\n", 771 | "\n", 772 | "\n", 773 | "\n", 774 | "\n", 775 | "\n", 776 | "\n", 777 | "\n", 778 | "\n", 779 | "\n", 780 | "\n", 781 | "\n", 782 | "\n", 783 | "\n", 784 | "\n", 785 | "\n", 786 | "\n", 787 | "\n", 788 | "\n", 789 | "\n", 790 | "\n", 791 | "\n", 792 | "\n", 793 | "\n", 794 | "\n", 795 | "\n", 796 | "\n", 797 | "\n", 798 | "\n", 799 | "\n", 800 | "\n", 801 | "\n", 802 | "\n", 803 | "\n", 804 | "\n", 805 | "\n", 806 | "\n", 807 | "\n", 808 | "\n", 809 | "\n", 810 | "\n", 811 | "\n", 812 | "\n", 813 | "\n", 814 | "\n", 815 | "\n", 816 | "\n", 817 | "\n", 818 | "\n", 819 | "\n", 820 | "\n", 821 | "\n", 822 | "\n", 823 | "\n", 824 | "\n", 825 | "\n", 826 | "\n", 827 | "\n", 828 | "\n", 829 | "\n", 830 | "\n", 831 | "\n", 832 | "\n", 833 | "\n", 834 | "\n", 835 | "\n", 836 | "\n", 837 | "\n", 838 | "\n", 839 | "\n", 840 | "\n", 841 | "\n", 842 | "\n", 843 | "\n", 844 | "\n", 845 | "\n", 846 | "\n", 847 | "\n", 848 | "\n", 849 | "\n", 850 | "\n", 851 | "\n", 852 | "\n", 853 | "\n", 854 | "\n", 855 | "\n", 856 | "\n", 857 | "\n", 858 | "\n", 859 | "\n", 860 | "\n", 861 | "\n", 862 | "\n", 863 | "\n", 864 | "\n", 865 | "\n", 866 | "\n", 867 | "\n", 868 | "\n", 869 | "\n", 870 | "\n", 871 | "\n", 872 | "\n", 873 | "\n", 874 | "\n", 875 | "\n", 876 | "\n", 877 | "\n", 878 | "\n", 879 | "\n", 880 | "\n", 881 | "\n", 882 | "\n", 883 | "\n", 884 | "\n", 885 | "\n", 886 | "\n", 887 | "\n", 888 | "\n", 889 | "\n", 890 | "\n", 891 | "\n", 892 | "\n", 893 | "\n", 894 | "\n", 895 | "\n", 896 | "\n", 897 | "\n", 898 | "\n", 899 | "\n", 900 | "\n", 901 | "\n", 902 | "\n", 903 | "\n", 904 | "\n", 905 | "\n", 906 | "\n", 907 | "\n", 908 | "\n", 909 | "\n", 910 | "\n", 911 | "\n", 912 | "\n", 913 | "\n", 914 | "\n", 915 | "\n", 916 | "\n", 917 | "\n", 918 | "\n", 919 | "\n", 920 | "\n", 921 | "\n", 922 | "\n", 923 | "\n", 924 | "\n", 925 | "\n", 926 | "\n", 927 | "\n", 928 | "\n", 929 | "\n", 930 | "\n", 931 | "\n", 932 | "\n", 933 | "\n", 934 | "\n", 935 | "\n", 936 | "\n", 937 | "\n", 938 | "\n", 939 | "\n", 940 | "\n", 941 | "\n", 942 | "\n", 943 | "\n", 944 | "\n", 945 | "\n", 946 | "\n", 947 | "\n", 948 | "\n", 949 | "\n", 950 | "\n", 951 | "\n", 952 | "\n", 953 | "\n", 954 | "\n", 955 | "\n", 956 | "\n", 957 | "\n", 958 | "\n", 959 | "\n", 960 | "\n", 961 | "\n", 962 | "\n", 963 | "\n", 964 | "\n", 965 | "\n", 966 | "\n", 967 | "\n", 968 | "\n", 969 | "### EXERCISE" 970 | ] 971 | }, 972 | { 973 | "cell_type": "code", 974 | "execution_count": 11, 975 | "metadata": {}, 976 | "outputs": [], 977 | "source": [ 978 | "# try changing the word 'describe' to 'analyze' in the _url (web service url for REST api) cell towards the beginning" 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "execution_count": null, 984 | "metadata": {}, 985 | "outputs": [], 986 | "source": [] 987 | } 988 | ], 989 | "metadata": { 990 | "anaconda-cloud": {}, 991 | "kernelspec": { 992 | "display_name": "Python [Anaconda3]", 993 | "language": "python", 994 | "name": "Python [Anaconda3]" 995 | }, 996 | "language_info": { 997 | "codemirror_mode": { 998 | "name": "ipython", 999 | "version": 3 1000 | }, 1001 | "file_extension": ".py", 1002 | "mimetype": "text/x-python", 1003 | "name": "python", 1004 | "nbconvert_exporter": "python", 1005 | "pygments_lexer": "ipython3", 1006 | "version": "3.5.2" 1007 | }, 1008 | "toc": { 1009 | "nav_menu": {}, 1010 | "number_sections": true, 1011 | "sideBar": true, 1012 | "skip_h1_title": false, 1013 | "toc_cell": true, 1014 | "toc_position": {}, 1015 | "toc_section_display": "block", 1016 | "toc_window_display": false 1017 | } 1018 | }, 1019 | "nbformat": 4, 1020 | "nbformat_minor": 1 1021 | } 1022 | -------------------------------------------------------------------------------- /cognitive_services/Emotion_Analysis_Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Emotion analysis example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### This Python 2 Jupyter notebook shows you how to get started with the Emotion API in Python, and how to visualize your results." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "import time \n", 26 | "import requests\n", 27 | "import operator\n", 28 | "import numpy as np\n", 29 | "from __future__ import print_function\n", 30 | "import json\n", 31 | "from urlparse import urlparse\n", 32 | "from urllib import urlencode\n", 33 | "from urllib2 import Request, urlopen\n", 34 | "from httplib import HTTPSConnection \n", 35 | "# Import library to display results\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "import matplotlib.patches as patches\n", 38 | "from PIL import Image as PILImage\n", 39 | "%matplotlib inline \n", 40 | "# Display images within Jupyter" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "f = urlopen('https://gist.githubusercontent.com/antriv/a6962d2c7580a0f7db4b7aabd6d768c5/raw/38a66f77c7fd0641324c8cbbff77828207041edc/config.json')\n", 52 | "CONFIG=json.loads(f.read())\n", 53 | "# grab key from json in config\n", 54 | "subscription_key = CONFIG['subscription_key_Emotion']" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "# Variables\n", 66 | "\n", 67 | "_url = 'https://api.projectoxford.ai/emotion/v1.0/recognize'\n", 68 | "_key = subscription_key\n", 69 | "_maxNumRetries = 10" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Helper functions" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 4, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "def processRequest( json, data, headers, params ):\n", 88 | "\n", 89 | " \"\"\"\n", 90 | " Helper function to process the request to Project Oxford\n", 91 | "\n", 92 | " Parameters:\n", 93 | " json: Used when processing images from its URL. See API Documentation\n", 94 | " data: Used when processing image read from disk. See API Documentation\n", 95 | " headers: Used to pass the key information and the data type request\n", 96 | " \"\"\"\n", 97 | "\n", 98 | " retries = 0\n", 99 | " result = None\n", 100 | "\n", 101 | " while True:\n", 102 | "\n", 103 | " response = requests.request( 'post', _url, json = json, data = data, headers = headers, params = params )\n", 104 | "\n", 105 | " if response.status_code == 429: \n", 106 | "\n", 107 | " print( \"Message: %s\" % ( response.json()['error']['message'] ) )\n", 108 | "\n", 109 | " if retries <= _maxNumRetries: \n", 110 | " time.sleep(1) \n", 111 | " retries += 1\n", 112 | " continue\n", 113 | " else: \n", 114 | " print( 'Error: failed after retrying!' )\n", 115 | " break\n", 116 | "\n", 117 | " elif response.status_code == 200 or response.status_code == 201:\n", 118 | "\n", 119 | " if 'content-length' in response.headers and int(response.headers['content-length']) == 0: \n", 120 | " result = None \n", 121 | " elif 'content-type' in response.headers and isinstance(response.headers['content-type'], str): \n", 122 | " if 'application/json' in response.headers['content-type'].lower(): \n", 123 | " result = response.json() if response.content else None \n", 124 | " elif 'image' in response.headers['content-type'].lower(): \n", 125 | " result = response.content\n", 126 | " else:\n", 127 | " print( \"Error code: %d\" % ( response.status_code ) )\n", 128 | " print( \"Message: %s\" % ( response.json()['error']['message'] ) )\n", 129 | "\n", 130 | " break\n", 131 | " \n", 132 | " return result" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 5, 138 | "metadata": { 139 | "collapsed": true 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "def renderResultOnImage( result, img ):\n", 144 | " \n", 145 | " \"\"\"Display the obtained results onto the input image\"\"\"\n", 146 | " \n", 147 | " for currFace in result:\n", 148 | " faceRectangle = currFace['faceRectangle']\n", 149 | " cv2.rectangle( img,(faceRectangle['left'],faceRectangle['top']),\n", 150 | " (faceRectangle['left']+faceRectangle['width'], faceRectangle['top'] + faceRectangle['height']),\n", 151 | " color = (255,0,0), thickness = 5 )\n", 152 | "\n", 153 | "\n", 154 | " for currFace in result:\n", 155 | " faceRectangle = currFace['faceRectangle']\n", 156 | " currEmotion = max(currFace['scores'].items(), key=operator.itemgetter(1))[0]\n", 157 | "\n", 158 | "\n", 159 | " textToWrite = \"%s\" % ( currEmotion )\n", 160 | " cv2.putText( img, textToWrite, (faceRectangle['left'],faceRectangle['top']-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 1 )" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 6, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "# def renderResultOnImage2( result, img):\n", 172 | "import matplotlib.pyplot as plt\n", 173 | "\n", 174 | "\n", 175 | "urlImage = ''\n", 176 | "\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "## Detect faces from an image retrieved via URL" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgsAAAFkCAYAAACuFXjcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAFVhJREFUeJzt3X+M7XV95/HXmx+V4Ops7G3u1exNkKQiblN0RhpZVrcN\nFUqNRgIWB1jpxXXDQtPd6aY/kq6hkrTEtmDYDSy0sr2XqBNx/8K26SVQu9t4QevM4rZdQIPQBqtX\ntPayCij2fvaPc0aH2Tufe8+5M+fcuffxSE7CfOb7Pd/P+TjOPO/3e35Uay0AAOs5adoTAACObWIB\nAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUAoEssAABdYgEA6Bo5FqrqzVV1b1V9uaoOVtU7jmCf\nn6yqpap6vqq+UFVXjzddAGDSxjmz8NIkDye5LslhP1iiqs5I8odJHkhyTpJbk3y4qt46xrEBgAmr\no/kgqao6mOSdrbV7O9t8MMnFrbUfXzW2mGSmtfazYx8cAJiISTxn4U1J7l8ztjfJeRM4NgBwlE6Z\nwDF2JNm/Zmx/kpdX1Utaa99Zu0NV/XCSi5I8meT5TZ8hABw/TktyRpK9rbVvbMQdTiIWxnFRko9O\nexIAsIVdmeRjG3FHk4iFrybZvmZse5JnDnVWYejJJPnIRz6Ss88+exOnxmoLCwv50Ic+NO1pnFCs\n+eRZ88mz5pP1yCOP5KqrrkqGf0s3wiRi4cEkF68Zu3A4vp7nk+Tss8/O7OzsZs2LNWZmZqz3hFnz\nybPmk2fNp2bDLuOP8z4LL62qc6rq9cOhM4df7xx+/6aq2rNqlzuG23ywqs6qquuSXJbklqOePQCw\n6cZ5NcQbk/yvJEsZvM/CzUmWk3xg+P0dSXaubNxaezLJ25L8dAbvz7CQ5L2ttbWvkAAAjkEjX4Zo\nrf2PdCKjtbbrEGP/M8ncqMcCAKbPZ0PwffPz89OewgnHmk+eNZ88a771HdU7OG6WqppNsrS0tORJ\nMQAwguXl5czNzSXJXGtteSPu05kFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUA\noEssAABdYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6xAIA\n0CUWAIAusQAAdIkFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUAoEssAABdYgEA\n6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6xAIA0CUWAIAusQAA\ndIkFAKBrrFioquur6omqeq6qHqqqcw+z/ZVV9XBVfbuq/q6q7qqqV4w3ZQBgkkaOhaq6PMnNSW5I\n8oYkn0+yt6q2rbP9+Un2JPn9JK9LclmSn0jye2POGQCYoHHOLCwkubO1dndr7dEk1yZ5Nsk162z/\npiRPtNZua639TWttX5I7MwgGAOAYN1IsVNWpSeaSPLAy1lprSe5Pct46uz2YZGdVXTy8j+1J3pXk\nj8aZMAAwWaOeWdiW5OQk+9eM70+y41A7DM8kXJXk41X13SRfSfLNJL8w4rEBgCk4ZbMPUFWvS3Jr\nkt9Icl+SVyb53QwuRfyb3r4LCwuZmZl50dj8/Hzm5+c3Za4AsJUsLi5mcXHxRWMHDhzY8OPU4CrC\nEW48uAzxbJJLW2v3rhrfnWSmtXbJIfa5O8lprbWfWzV2fpI/T/LK1trasxSpqtkkS0tLS5mdnR3h\n4QDAiW15eTlzc3NJMtdaW96I+xzpMkRr7YUkS0kuWBmrqhp+vW+d3U5P8r01YweTtCQ1yvEBgMkb\n59UQtyR5X1W9p6pem+SODIJgd5JU1U1VtWfV9p9McmlVXVtVrx6eVbg1yWdaa189uukDAJtt5Ocs\ntNbuGb6nwo1Jtid5OMlFrbWnh5vsSLJz1fZ7quqfJLk+g+cq/EMGr6b4taOcOwAwAWM9wbG1dnuS\n29f53q5DjN2W5LZxjgUATJfPhgAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6\nxAIA0CUWAIAusQAAdIkFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUAoEssAABd\nYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6xAIA0CUWAIAu\nsQAAdIkFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUAoEssAABdYgEA6BILAECX\nWAAAusQCANAlFgCArrFioaqur6onquq5qnqoqs49zPY/VFW/WVVPVtXzVfWlqvr5sWYMAEzUKaPu\nUFWXJ7k5yb9N8tkkC0n2VtVrWmtfX2e3TyT5kSS7kjye5JVxVgMAtoSRYyGDOLiztXZ3klTVtUne\nluSaJL+9duOq+pkkb05yZmvtH4bDfzvedAGASRvpX/dVdWqSuSQPrIy11lqS+5Oct85ub0/yuSS/\nWlVPVdVjVfU7VXXamHMGACZo1DML25KcnGT/mvH9Sc5aZ58zMziz8HySdw7v478meUWS9454fABg\nwsa5DDGqk5IcTHJFa+1bSVJVv5TkE1V1XWvtO+vtuLCwkJmZmReNzc/PZ35+fjPnCwBbwuLiYhYX\nF180duDAgQ0/Tg2uIhzhxoPLEM8mubS1du+q8d1JZlprlxxin91J/kVr7TWrxl6b5K+TvKa19vgh\n9plNsrS0tJTZ2dkjfzQAcIJbXl7O3Nxcksy11pY34j5Hes5Ca+2FJEtJLlgZq6oafr1vnd0+neRV\nVXX6qrGzMjjb8NRIswUAJm6cly/ekuR9VfWe4RmCO5KcnmR3klTVTVW1Z9X2H0vyjSR/UFVnV9Vb\nMnjVxF29SxAAwLFh5OcstNbuqaptSW5Msj3Jw0kuaq09PdxkR5Kdq7b/dlW9Ncl/SfIXGYTDx5O8\n/yjnDgBMwFhPcGyt3Z7k9nW+t+sQY19IctE4xwIApsu7KAIAXWIBAOgSCwBAl1gAALrEAgDQJRYA\ngC6xAAB0iQUAoEssAABdYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsA\nQJdYAAC6xAIA0CUWAIAusQAAdIkFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUA\noEssAABdYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6xAIA\n0CUWAIAusQAAdIkFAKBLLAAAXWIBAOgSCwBA11ixUFXXV9UTVfVcVT1UVece4X7nV9ULVbU8znEB\ngMkbORaq6vIkNye5Ickbknw+yd6q2naY/WaS7Ely/xjzBACmZJwzCwtJ7myt3d1aezTJtUmeTXLN\nYfa7I8lHkzw0xjEBgCkZKRaq6tQkc0keWBlrrbUMzhac19lvV5JXJ/nAeNMEAKbllBG335bk5CT7\n14zvT3LWoXaoqh9N8ltJ/mVr7WBVjTxJAGB6Ro2FkVTVSRlcerihtfb4yvCR7r+wsJCZmZkXjc3P\nz2d+fn7jJgkAW9Ti4mIWFxdfNHbgwIENP04NriIc4caDyxDPJrm0tXbvqvHdSWZaa5es2X4myTeT\nfC8/iISThv/9vSQXttb+7BDHmU2ytLS0lNnZ2VEeDwCc0JaXlzM3N5ckc621DXn14UjPWWitvZBk\nKckFK2M1uK5wQZJ9h9jlmSQ/luT1Sc4Z3u5I8ujwvz8z1qwBgIkZ5zLELUl2V9VSks9m8OqI05Ps\nTpKquinJq1prVw+f/Ph/Vu9cVV9L8nxr7ZGjmTgAMBkjx0Jr7Z7heyrcmGR7koeTXNRae3q4yY4k\nOzduigDANI31BMfW2u1Jbl/ne7sOs+8H4iWUALBl+GwIAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQ\nJRYAgC6xAAB0iQUAoEssAABdYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDo\nEgsAQJdYAAC6xAIA0CUWAIAusQAAdIkFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0\niQUAoEssAABdYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6\nxAIA0CUWAIAusQAAdIkFAKBLLAAAXWIBAOgaKxaq6vqqeqKqnquqh6rq3M62l1TVfVX1tao6UFX7\nqurC8acMAEzSyLFQVZcnuTnJDUnekOTzSfZW1bZ1dnlLkvuSXJxkNsmnknyyqs4Za8YAwESNc2Zh\nIcmdrbW7W2uPJrk2ybNJrjnUxq21hdba77bWllprj7fWfj3JF5O8fexZAwATM1IsVNWpSeaSPLAy\n1lprSe5Pct4R3kcleVmSvx/l2ADAdIx6ZmFbkpOT7F8zvj/JjiO8j19O8tIk94x4bABgCk6Z5MGq\n6ook70/yjtba1w+3/cLCQmZmZl40Nj8/n/n5+U2aIQBsHYuLi1lcXHzR2IEDBzb8ODW4inCEGw8u\nQzyb5NLW2r2rxncnmWmtXdLZ991JPpzkstbanxzmOLNJlpaWljI7O3vE8wOAE93y8nLm5uaSZK61\ntrwR9znSZYjW2gtJlpJcsDI2fA7CBUn2rbdfVc0nuSvJuw8XCgDAsWWcyxC3JNldVUtJPpvBqyNO\nT7I7SarqpiSvaq1dPfz6iuH3fjHJX1TV9uH9PNdae+aoZg8AbLqRY6G1ds/wPRVuTLI9ycNJLmqt\nPT3cZEeSnat2eV8GT4q8bXhbsSfrvNwSADh2jPUEx9ba7UluX+d7u9Z8/VPjHAMAODb4bAgAoEss\nAABdYgEA6BILAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6xAIA0CUW\nAIAusQAAdIkFAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUAoEssAABdYgEA6BIL\nAECXWAAAusQCANAlFgCALrEAAHSJBQCgSywAAF1iAQDoEgsAQJdYAAC6xAIA0CUWAIAusQAAdIkF\nAKBLLAAAXWIBAOgSCwBAl1gAALrEAgDQJRYAgC6xAAB0iQUAoEssAABdYoHvW1xcnPYUTjjWfPKs\n+eRZ861vrFioquur6omqeq6qHqqqcw+z/U9W1VJVPV9VX6iqq8ebLpvJ/6Enz5pPnjWfPGu+9Y0c\nC1V1eZKbk9yQ5A1JPp9kb1VtW2f7M5L8YZIHkpyT5NYkH66qt443ZQBgksY5s7CQ5M7W2t2ttUeT\nXJvk2STXrLP9v0vypdbar7TWHmut3Zbkvw/vBwA4xo0UC1V1apK5DM4SJElaay3J/UnOW2e3Nw2/\nv9rezvYAwDHklBG335bk5CT714zvT3LWOvvsWGf7l1fVS1pr3znEPqclySOPPDLi9DgaBw4cyPLy\n8rSncUKx5pNnzSfPmk/Wqr+dp23UfY4aC5NyRpJcddVVU57GiWdubm7aUzjhWPPJs+aTZ82n4owk\n+zbijkaNha8n+cck29eMb0/y1XX2+eo62z+zzlmFZHCZ4sokTyZ5fsQ5AsCJ7LQMQmHvRt3hSLHQ\nWnuhqpaSXJDk3iSpqhp+/Z/X2e3BJBevGbtwOL7ecb6R5GOjzA0A+L4NOaOwYpxXQ9yS5H1V9Z6q\nem2SO5KcnmR3klTVTVW1Z9X2dyQ5s6o+WFVnVdV1SS4b3g8AcIwb+TkLrbV7hu+pcGMGlxMeTnJR\na+3p4SY7kuxctf2TVfW2JB9K8otJnkry3tba2ldIAADHoBq88hEA4NB8NgQA0CUWAICuqcSCD6Ka\nvFHWvKouqar7quprVXWgqvZV1YWTnO/xYNSf81X7nV9VL1SVd7EZ0Ri/W36oqn6zqp4c/n75UlX9\n/ISme1wYY82vrKqHq+rbVfV3VXVXVb1iUvPd6qrqzVV1b1V9uaoOVtU7jmCfo/4bOvFY8EFUkzfq\nmid5S5L7MnjJ62ySTyX5ZFWdM4HpHhfGWPOV/WaS7Mn//xbpHMaYa/6JJD+VZFeS1ySZT/LYJk/1\nuDHG7/PzM/j5/v0kr8vglXE/keT3JjLh48NLM3hhwXVJDvukww37G9pam+gtyUNJbl31dWXwColf\nWWf7Dyb532vGFpP88aTnvlVvo675OvfxV0n+07Qfy1a5jbvmw5/tD2Twy3d52o9jK93G+N3yM0n+\nPsk/nfbct+ptjDX/j0m+uGbsF5L87bQfy1a8JTmY5B2H2WZD/oZO9MyCD6KavDHXfO19VJKXZfCL\nlcMYd82raleSV2cQC4xgzDV/e5LPJfnVqnqqqh6rqt+pqg17P/3j2Zhr/mCSnVV18fA+tid5V5I/\n2tzZntA25G/opC9D9D6Iasc6+3Q/iGpjp3dcGmfN1/rlDE593bOB8zqejbzmVfWjSX4ryZWttYOb\nO73j0jg/52cmeXOSf57knUn+fQanxW/bpDkeb0Ze89baviRXJfl4VX03yVeSfDODswtsjg35G+rV\nEHRV1RVJ3p/kXa21r097PsejqjopyUeT3NBae3xleIpTOlGclMFp3Ctaa59rrf1Jkl9KcrV/iGyO\nqnpdBtfMfyOD50NdlMHZtDunOC2OwKQ/dXJSH0TFD4yz5kmSqnp3Bk88uqy19qnNmd5xadQ1f1mS\nNyZ5fVWt/Kv2pAyuAH03yYWttT/bpLkeL8b5Of9Kki+31r61auyRDELtnyV5/JB7sWKcNf+1JJ9u\nra283f9fDT8C4M+r6tdba2v/BczR25C/oRM9s9BaeyHJygdRJXnRB1Gt96EXD67efqj7QVT8wJhr\nnqqaT3JXkncP/8XFERpjzZ9J8mNJXp/Bs5XPyeAzVR4d/vdnNnnKW96YP+efTvKqqjp91dhZGZxt\neGqTpnrcGHPNT0/yvTVjBzN4Vr+zaZtjY/6GTuHZmz+X5Nkk70ny2gxOP30jyY8Mv39Tkj2rtj8j\nyf/N4BmdZ2XwcpHvJvnpaT8TdavcxljzK4ZrfG0GBbpye/m0H8tWuY265ofY36shNnnNM3gezt8k\n+XiSszN4yfBjSe6Y9mPZKrcx1vzqJN8Z/m55dZLzk3w2yb5pP5atchv+3J6TwT8uDib5D8Ovd66z\n5hvyN3RaD/a6JE8meS6Dunnjqu/9QZI/XbP9WzIo2OeSfDHJv572/2Bb7TbKmmfwvgr/eIjbf5v2\n49hKt1F/ztfsKxYmsOYZvLfC3iTfGobDbyd5ybQfx1a6jbHm1yf5y+GaP5XB+y68ctqPY6vckvyr\nYSQc8vfzZv0N9UFSAECXV0MAAF1iAQDoEgsAQJdYAAC6xAIA0CUWAIAusQAAdIkFAKBLLAAAXWIB\nAOgSCwBA1/8DBAe+bZJxNlwAAAAASUVORK5CYII=\n", 196 | "text/plain": [ 197 | "" 198 | ] 199 | }, 200 | "metadata": {}, 201 | "output_type": "display_data" 202 | } 203 | ], 204 | "source": [ 205 | "# URL direction to image\n", 206 | "urlImage = 'https://raw.githubusercontent.com/Microsoft/ProjectOxford-ClientSDK/master/Face/Windows/Data/detection3.jpg'\n", 207 | "\n", 208 | "headers = dict()\n", 209 | "headers['Ocp-Apim-Subscription-Key'] = _key\n", 210 | "headers['Content-Type'] = 'application/json' \n", 211 | "\n", 212 | "json = { 'url': urlImage } \n", 213 | "data = None\n", 214 | "params = None\n", 215 | "\n", 216 | "result = processRequest( json, data, headers, params )\n", 217 | "\n", 218 | "import cStringIO\n", 219 | "\n", 220 | "# Create figure and axes\n", 221 | "fig,ax = plt.subplots(1)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "collapsed": false 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# Display the image\n", 233 | "myfile = cStringIO.StringIO(urlopen(urlImage).read())\n", 234 | "im = PILImage.open(myfile)\n", 235 | "ax.imshow(im)\n", 236 | "\n", 237 | "for currFace in result:\n", 238 | " faceRectangle = currFace['faceRectangle']\n", 239 | "\n", 240 | " # Create a Rectangle patch\n", 241 | " rect = patches.Rectangle((faceRectangle['left'], faceRectangle['top']),faceRectangle['width'], faceRectangle['height'],\n", 242 | " linewidth=1,edgecolor='r',facecolor='none' )\n", 243 | "\n", 244 | " # Add the patch to the Axes\n", 245 | " ax.add_patch(rect)\n", 246 | " \n", 247 | " currEmotion = str(max(currFace['scores'].items(), key=operator.itemgetter(1))[0])\n", 248 | " plt.text(faceRectangle['left'],faceRectangle['top'], currEmotion, color = 'r')\n", 249 | "\n", 250 | "plt.show()" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": true 258 | }, 259 | "outputs": [], 260 | "source": [] 261 | } 262 | ], 263 | "metadata": { 264 | "kernelspec": { 265 | "display_name": "Python 2", 266 | "language": "python", 267 | "name": "python2" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 2 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython2", 279 | "version": "2.7.11" 280 | } 281 | }, 282 | "nbformat": 4, 283 | "nbformat_minor": 0 284 | } 285 | -------------------------------------------------------------------------------- /cognitive_services/EntityLinking.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Entity Linking with Microsoft Cognitive Services Entity Linking Intelligence Service API\n", 8 | "\n", 9 | "Entity Linking is a natural language processing tool to help analyzing text for your application. Entity Linking recognize a named-entity from given text and aligning a textual mention of the entity to an appropriate entry in a knowledge base. -*from ELIS API Reference*" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "source": [ 18 | "#### For python 2 and 3 compatibility we have a few imports" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import json\n", 30 | "\n", 31 | "# Import compatibility libraries (python 2/3 support)\n", 32 | "from __future__ import absolute_import\n", 33 | "from __future__ import division\n", 34 | "from __future__ import print_function\n", 35 | "from __future__ import unicode_literals\n", 36 | "\n", 37 | "# Python 3\n", 38 | "try:\n", 39 | " import json\n", 40 | " from urllib.request import urlopen, Request\n", 41 | " from urllib.parse import urlparse, urlencode\n", 42 | " from http.client import HTTPSConnection\n", 43 | "# Python 2.7\n", 44 | "except ImportError:\n", 45 | " import json\n", 46 | " from urlparse import urlparse\n", 47 | " from urllib import urlencode\n", 48 | " from urllib2 import Request, urlopen\n", 49 | " from httplib import HTTPSConnection" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "**Load our configuration file (just has subscription key as of now)**" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "response = urlopen('https://gist.githubusercontent.com/antriv/a6962d2c7580a0f7db4b7aabd6d768c5/raw/66d2f4219a566e2af995f6ce160e48851bf7811e/config.json')\n", 68 | "data = response.read().decode(\"utf-8\")\n", 69 | "CONFIG = json.loads(data)\n", 70 | "subscription_key = CONFIG['subscription_key_ELIS']" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "**Load our text data from a file**\n", 78 | "\n", 79 | "ELIS expects it in UTF-8 encoded plain text." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "f = urlopen('https://raw.githubusercontent.com/michhar/bot-education/master/Student-Resources/CognitiveServices/Notebooks/sample_text.txt')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "# Read in a process to decode the strange quotes\n", 102 | "text = f.read().decode('utf-8')\n", 103 | "\n", 104 | "# Substitute decoded quotes with regular single quotes\n", 105 | "import re\n", 106 | "text = re.sub('\\u2019|\\u201c|\\u201d', \"'\", text).replace('\\n', ' ')\n", 107 | "text = text.encode('utf-8')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "You can also try some of your own text either in a file or a string literal in a code cell here." 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "**Set up the header and parameter part of request**\n", 122 | "\n", 123 | "Our content type is `'text/plain'` this time." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "# http headers needed for POST request\n", 135 | "# we keep these as dict\n", 136 | "headers = {\n", 137 | " # Request headers - note content type is text/plain!\n", 138 | " 'Content-Type': 'text/plain',\n", 139 | " 'Ocp-Apim-Subscription-Key': subscription_key,\n", 140 | "}\n", 141 | "\n", 142 | "# params will be added to POST in url request\n", 143 | "# right now it's empty because for this request we don't need any params\n", 144 | "# although we could have included 'selection' and 'offset' - see docs\n", 145 | "params = urlencode({})" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "**Make the API request call**\n", 153 | "\n", 154 | "Given a specific paragraph of text within a document, the Entity Linking Intelligence Service will recognize and identify each separate entity based on the context" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "try:\n", 166 | " conn = HTTPSConnection('api.projectoxford.ai')\n", 167 | " \n", 168 | " # Post method request - note: body of request is converted from json to string\n", 169 | " conn.request(\"POST\", \"/entitylinking/v1.0/link?%s\" % params, body = text, headers = headers)\n", 170 | " response = conn.getresponse()\n", 171 | " data = response.read()\n", 172 | " conn.close()\n", 173 | "except Exception as e:\n", 174 | " print(\"[Error: {0}] \".format(e))\n", 175 | " \n", 176 | "# Print the results - json response format\n", 177 | "print(json.dumps(json.loads(data), \n", 178 | " sort_keys=True,\n", 179 | " indent=4, \n", 180 | " separators=(',', ': ')))" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": true 188 | }, 189 | "outputs": [], 190 | "source": [] 191 | } 192 | ], 193 | "metadata": { 194 | "kernelspec": { 195 | "display_name": "Python 2", 196 | "language": "python", 197 | "name": "python2" 198 | }, 199 | "language_info": { 200 | "codemirror_mode": { 201 | "name": "ipython", 202 | "version": 2 203 | }, 204 | "file_extension": ".py", 205 | "mimetype": "text/x-python", 206 | "name": "python", 207 | "nbconvert_exporter": "python", 208 | "pygments_lexer": "ipython2", 209 | "version": "2.7.11" 210 | } 211 | }, 212 | "nbformat": 4, 213 | "nbformat_minor": 0 214 | } 215 | -------------------------------------------------------------------------------- /cognitive_services/TextAnalytics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Key phrase detection with Microsoft Cognitive Services Text Analytics API\n", 8 | "\n", 9 | "The API returns a list of strings denoting the key talking points in the input text. We employ techniques from Microsoft Office's sophisticated Natural Language Processing toolkit. Currently, the following languages are supported: English, German, Spanish and Japanese." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "source": [ 18 | "#### For python 2 and 3 compatibility we have a few imports" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import json\n", 30 | "\n", 31 | "# Import compatibility libraries (python 2/3 support)\n", 32 | "from __future__ import absolute_import\n", 33 | "from __future__ import division\n", 34 | "from __future__ import print_function\n", 35 | "from __future__ import unicode_literals\n", 36 | "\n", 37 | "# Python 3\n", 38 | "try:\n", 39 | " from urllib.request import urlopen, Request\n", 40 | " from urllib.parse import urlparse, urlencode\n", 41 | " from http.client import HTTPSConnection\n", 42 | "# Python 2.7\n", 43 | "except ImportError:\n", 44 | " from urlparse import urlparse\n", 45 | " from urllib import urlencode\n", 46 | " from urllib2 import Request, urlopen\n", 47 | " from httplib import HTTPSConnection " 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "**Load our configuration file (just has subscription key as of now)**" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 2, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "response = urlopen('https://gist.githubusercontent.com/antriv/a6962d2c7580a0f7db4b7aabd6d768c5/raw/66d2f4219a566e2af995f6ce160e48851bf7811e/config.json')\n", 66 | "data = response.read().decode(\"utf-8\")\n", 67 | "CONFIG = json.loads(data)\n", 68 | "subscription_key = CONFIG['subscription_key_text_analytics']" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "**Set up the header and parameter part of request**" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 3, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "# grab key from json in config\n", 87 | "subscription_key = CONFIG['subscription_key_text_analytics']\n", 88 | "\n", 89 | "# http headers needed for POST request\n", 90 | "# we keep these as dict\n", 91 | "headers = {\n", 92 | " # Request headers\n", 93 | " 'Content-Type': 'application/json',\n", 94 | " 'Ocp-Apim-Subscription-Key': subscription_key,\n", 95 | "}\n", 96 | "\n", 97 | "# params will be added to POST in url request\n", 98 | "# right now it's empty because for this request we don't need any params\n", 99 | "params = urlencode({})" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "**Here is our text for analysis in a Text Analytics-friendly format**\n", 107 | "* Note, you can have multipe chunks of text for separate analyses" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 11, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "# The request body - contains our corpus for analysis (load into json format)\n", 119 | "body = json.loads('''{\n", 120 | " \"documents\": [\n", 121 | " {\n", 122 | " \"id\": \"1\",\n", 123 | " \"text\": \"I have been a happy man ever since January 1, 1990, when I no longer had an email address. I'd used email since about 1975, and it seems to me that 15 years of email is plenty for one lifetime. Email is a wonderful thing for people whose role in life is to be on top of things. But not for me; my role is to be on the bottom of things. What I do takes long hours of studying and uninterruptible concentration. I try to learn certain areas of computer science exhaustively; then I try to digest that knowledge into a form that is accessible to people who don't have time for such study. On the other hand, I need to communicate with thousands of people all over the world as I write my books. I also want to be responsive to the people who read those books and have questions or comments. \" },\n", 124 | " {\n", 125 | " \"id\": \"2\",\n", 126 | " \"text\": \"Sure, I'm always ready to buy more electronics\"\n", 127 | " },\n", 128 | " {\n", 129 | " \"id\": \"3\",\n", 130 | " \"text\": \"[I'm a happy camper or feel free to change what is written here]\"\n", 131 | " }\n", 132 | " ]\n", 133 | "}''')" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "**Make the API request call**" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 12, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "{\"documents\":[{\"score\":0.9816817,\"id\":\"1\"},{\"score\":0.8170025,\"id\":\"2\"},{\"score\":0.8936079,\"id\":\"3\"}],\"errors\":[]}\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "try:\n", 160 | " conn = HTTPSConnection('westus.api.cognitive.microsoft.com')\n", 161 | " # https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment\n", 162 | " # Post method request - note: body of request is converted from json to string\n", 163 | " conn.request(\"POST\", \"/text/analytics/v2.0/sentiment?%s\" % params, body = json.dumps(body), headers = headers)\n", 164 | " response = conn.getresponse()\n", 165 | " data = response.read()\n", 166 | " print(data)\n", 167 | " conn.close()\n", 168 | "except Exception as e:\n", 169 | " print(\"[Error: {0}] \".format(e))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "collapsed": true 177 | }, 178 | "outputs": [], 179 | "source": [] 180 | } 181 | ], 182 | "metadata": { 183 | "anaconda-cloud": {}, 184 | "kernelspec": { 185 | "display_name": "Python 2", 186 | "language": "python", 187 | "name": "python2" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 2 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython2", 199 | "version": "2.7.11" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 0 204 | } 205 | -------------------------------------------------------------------------------- /cognitive_services/VideoIndexer_Insights_Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Video Indexer Python Sample\n", 10 | "\n", 11 | "This sample will be using a simple `requests` library wrapper around the Microsoft Video Indexer API based on an OSS project: https://github.com/bklim5/python_video_indexer_lib.\n", 12 | "\n", 13 | "Resources:\n", 14 | "\n", 15 | "* Video Indexer Docs\n", 16 | "* Video Indexer Developer Portal\n", 17 | "* Video Indexer API Reference\n", 18 | "* Python Requests library" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "## Setup and imports" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import os\n", 35 | "import requests" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Download a public video of fish swimming, locally:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "! curl -O https://github.com/Azadehkhojandi/computer-vision-fish-frame-proposal/raw/master/videos/video1.mp4" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "# A local file (we can also send URLs such as private blob storage links)\n", 61 | "video_file = 'video1.mp4'" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## Initialize with Video Indexer (VI) account info\n", 69 | "\n", 70 | "SUBSCRIPTION_KEY can be found at https://api-portal.videoindexer.ai/developer (go to your user name --> Profile) and LOCATION & ACCOUNT_ID can be found at https://www.videoindexer.ai/settings/account. If this is a trial account, use \"trial\" as LOCATION." 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "CONFIG = {\n", 80 | " 'SUBSCRIPTION_KEY': '',\n", 81 | " 'LOCATION': '',\n", 82 | " 'ACCOUNT_ID': ''\n", 83 | "}" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "headers = {\n", 93 | " 'Ocp-Apim-Subscription-Key': CONFIG['SUBSCRIPTION_KEY']\n", 94 | "}\n", 95 | "\n", 96 | "params = {\n", 97 | " 'allowEdit': 'True'\n", 98 | "}\n", 99 | "\n", 100 | "access_token_req = requests.get(\n", 101 | " 'https://api.videoindexer.ai/auth/{loc}/Accounts/{acc_id}/AccessToken'.format(\n", 102 | " loc=CONFIG['LOCATION'],\n", 103 | " acc_id=CONFIG['ACCOUNT_ID']\n", 104 | " ),\n", 105 | " params=params,\n", 106 | " headers=headers\n", 107 | ")\n", 108 | "\n", 109 | "access_token = access_token_req.text[1:-1]\n", 110 | "print('Access Token: {}'.format(access_token))" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Upload a video for indexing\n", 118 | "\n", 119 | "More information on parameters can be found at https://docs.microsoft.com/en-us/azure/media-services/video-indexer/upload-index-videos." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "def upload_to_video_indexer(access_token, input_filename, video_name='', video_language='English'):\n", 129 | " \"\"\"Upload a video file to a Video Indexer account\"\"\"\n", 130 | " print('Uploading video to video indexer...')\n", 131 | " params = {\n", 132 | " 'streamingPreset': 'Default',\n", 133 | " 'indexingPreset': 'Default',\n", 134 | " 'language': video_language,\n", 135 | " 'name': video_name,\n", 136 | " 'accessToken': access_token\n", 137 | " }\n", 138 | "\n", 139 | " files = {\n", 140 | " 'file': open(input_filename, 'rb')\n", 141 | " }\n", 142 | "\n", 143 | " upload_video_req = requests.post(\n", 144 | " 'https://api.videoindexer.ai/{loc}/Accounts/{acc_id}/Videos'.format(\n", 145 | " loc=CONFIG['LOCATION'],\n", 146 | " acc_id=CONFIG['ACCOUNT_ID']\n", 147 | " ),\n", 148 | " params=params,\n", 149 | " files=files\n", 150 | " )\n", 151 | "\n", 152 | " if upload_video_req.status_code != 200:\n", 153 | " print('Error uploading video to video indexer: {}'.format(upload_video_req.json()))\n", 154 | " raise Exception('Error uploading video to video indexer')\n", 155 | "\n", 156 | " response = upload_video_req.json()\n", 157 | " return response['id']" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "video_id = upload_to_video_indexer(access_token, video_file, video_name='australian-fish-sample')" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "## Analyze video with Video Indexer\n", 174 | "\n", 175 | "The `get_video_info` below will indicate the processing status by video id." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "def get_video_info(access_token, video_id, video_language='English'):\n", 185 | " \"\"\"Indicate the processing status\"\"\"\n", 186 | " params = {\n", 187 | " 'accessToken': access_token,\n", 188 | " 'language': video_language\n", 189 | " }\n", 190 | " print('Getting video info for: {}'.format(video_id))\n", 191 | "\n", 192 | " get_video_info_req = requests.get(\n", 193 | " 'https://api.videoindexer.ai/{loc}/Accounts/{acc_id}/Videos/{video_id}/Index'.format(\n", 194 | " loc=CONFIG['LOCATION'],\n", 195 | " acc_id=CONFIG['ACCOUNT_ID'],\n", 196 | " video_id=video_id\n", 197 | " ),\n", 198 | " params=params\n", 199 | " )\n", 200 | " response = get_video_info_req.json()\n", 201 | "\n", 202 | " if response['state'] == 'Processing':\n", 203 | " print('Video still processing, current status: {}'.format(\n", 204 | " response['videos'][0]['processingProgress']))\n", 205 | "\n", 206 | " return response" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "response_video_info = get_video_info(access_token, video_id)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "## Extract the summary" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "def extract_text_summary_from_video_indexer_info(info):\n", 232 | " \"\"\"\n", 233 | " Extract text features like keywords, sentiment and\n", 234 | " transcript\n", 235 | " \"\"\"\n", 236 | " return {\n", 237 | " 'durationInSeconds': info['durationInSeconds'],\n", 238 | " 'numberOfKeywords': len(info['summarizedInsights'].get('keywords', [])),\n", 239 | " 'keywords': info['summarizedInsights'].get('keywords', []),\n", 240 | " 'sumOfWordCount': sum(info['summarizedInsights']['statistics']['speakerWordCount'].values()),\n", 241 | " 'sentimentSeenDurationRatio': {\n", 242 | " x['sentimentKey']: x['seenDurationRatio'] for x in info['summarizedInsights']['sentiments']\n", 243 | " },\n", 244 | " 'sentimentScore': {\n", 245 | " x['sentimentType']: x['averageScore'] for x in info['videos'][0]['insights'].get('sentiments', [])\n", 246 | " },\n", 247 | " 'transcript': [\n", 248 | " {\n", 249 | " 'confidence': x['confidence'],\n", 250 | " 'text': x['text'],\n", 251 | " 'textLength': len(x['text'].split()),\n", 252 | " 'confidencePerText': x['confidence'] * len(x['text'].split())\n", 253 | " } for x in info['videos'][0]['insights'].get('transcript', [])\n", 254 | " ]\n", 255 | " }" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "extract_text_summary_from_video_indexer_info(response_video_info)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "def extract_vision_summary_from_video_indexer_info(info):\n", 274 | " \"\"\"Extract visual detected object names\"\"\"\n", 275 | " results = {}\n", 276 | " for i, label in enumerate(info['summarizedInsights']['labels']):\n", 277 | " if 'detected_labels' in results:\n", 278 | " results['detected_labels'].append(label['name'])\n", 279 | " else:\n", 280 | " results['detected_labels'] = [label['name']]\n", 281 | " return results" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "extract_vision_summary_from_video_indexer_info(response_video_info)" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "## Appendix A: Get a video into Blob Storage" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "Run a script such as: https://github.com/michhar/azure-and-ml-utils/blob/master/azure/upload_to_blob_storage.py or, if running the notebook locally, the following snippet with do the trick." 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "import os\n", 314 | "from azure.storage.blob import BlockBlobService, PublicAccess\n", 315 | "import glob\n", 316 | "\n", 317 | "# Create the BlockBlockService that is used to call the Blob service for the storage account\n", 318 | "block_blob_service = BlockBlobService(account_name=args.account, account_key=args.key) \n", 319 | "\n", 320 | "# Create a container\n", 321 | "container_name = args.container\n", 322 | "block_blob_service.create_container(container_name) \n", 323 | "\n", 324 | "# Set the permission so the blobs are public.\n", 325 | "block_blob_service.set_container_acl(container_name, public_access=PublicAccess.Container)\n", 326 | "\n", 327 | "for filename in glob.iglob(os.path.join(args.directory, '**', '*.'+args.suffix), recursive=True):\n", 328 | " print('Uploading ', filename)\n", 329 | " # Upload the created file, use local_file_name for the blob name\n", 330 | " block_blob_service.create_blob_from_path(container_name, filename, filename)\n", 331 | "\n", 332 | "# Check that the files uploaded correctly to blob\n", 333 | "generator = block_blob_service.list_blobs(container_name)\n", 334 | "for blob in generator:\n", 335 | " print(\"Blob name in Azure: \" + blob.name)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "## Get a video from Blob Storage" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "from azure.storage.blob.baseblobservice import BaseBlobService\n", 352 | "from azure.storage.blob import BlockBlobService, BlobPermissions\n", 353 | "from datetime import datetime, timedelta\n", 354 | "\n", 355 | "account_name = ''\n", 356 | "account_key = ''\n", 357 | "container_name = ''\n", 358 | "blob_name = ''\n", 359 | "service = BaseBlobService(account_name=account_name, account_key=account_key)\n", 360 | "\n", 361 | "token = service.generate_blob_shared_access_signature(container_name, blob_name, BlobPermissions.READ, datetime.utcnow() + timedelta(hours=1),)\n", 362 | "blobUrlWithSas = f\"https://{account_name}.blob.core.windows.net/{container_name}/{blob_name}?{token}\"" 363 | ] 364 | } 365 | ], 366 | "metadata": { 367 | "kernelspec": { 368 | "display_name": "Python 3", 369 | "language": "python", 370 | "name": "python3" 371 | }, 372 | "language_info": { 373 | "codemirror_mode": { 374 | "name": "ipython", 375 | "version": 3 376 | }, 377 | "file_extension": ".py", 378 | "mimetype": "text/x-python", 379 | "name": "python", 380 | "nbconvert_exporter": "python", 381 | "pygments_lexer": "ipython3", 382 | "version": "3.7.3" 383 | } 384 | }, 385 | "nbformat": 4, 386 | "nbformat_minor": 2 387 | } 388 | -------------------------------------------------------------------------------- /datatools/imgs/back_to_mask_monarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/back_to_mask_monarch.png -------------------------------------------------------------------------------- /datatools/imgs/cheetah-mom-cubs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/cheetah-mom-cubs.jpg -------------------------------------------------------------------------------- /datatools/imgs/converted_back_rgb_monarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/converted_back_rgb_monarch.png -------------------------------------------------------------------------------- /datatools/imgs/cv2_read_monarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/cv2_read_monarch.png -------------------------------------------------------------------------------- /datatools/imgs/ladybug-daisy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/ladybug-daisy.jpg -------------------------------------------------------------------------------- /datatools/imgs/ladybug-leaf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/ladybug-leaf.jpg -------------------------------------------------------------------------------- /datatools/imgs/mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/mask.jpg -------------------------------------------------------------------------------- /datatools/imgs/monarch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/monarch.jpg -------------------------------------------------------------------------------- /datatools/imgs/plt_read_monarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/plt_read_monarch.png -------------------------------------------------------------------------------- /datatools/imgs/rgb2gray_monarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/rgb2gray_monarch.png -------------------------------------------------------------------------------- /datatools/imgs/scale_abs_monarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/scale_abs_monarch.png -------------------------------------------------------------------------------- /datatools/imgs/snowleopard_cub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/datatools/imgs/snowleopard_cub.png -------------------------------------------------------------------------------- /from_scratch/adaline_batch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class AdalineGD: 4 | """Adaptive Linear Neuron classifier. 5 | 6 | Parameters 7 | ---------- 8 | 9 | eta : float 10 | Learning rate ([0,1]) 11 | niter : int 12 | Iterations on training dataset (epochs) 13 | 14 | Attributes 15 | ---------- 16 | 17 | w_ : 1d-array 18 | Weights post-fitting 19 | errors_ : list 20 | Count of misclassifications in each epoch 21 | 22 | """ 23 | 24 | def __init__(self, eta=0.01, niter=50): 25 | self.eta = eta 26 | self.niter = niter 27 | 28 | def fit(self, X, y): 29 | """Fit the training data. 30 | 31 | Parameters 32 | ---------- 33 | X : nd array-like, shape = [n_samples, n_features] 34 | Training data (1-n dimensional - represented as 35 | a matrix), where n_samples is the number of 36 | data points/samples and n_features is the number 37 | of features. 38 | y : 1d array-like, shape = [n_samples] 39 | Target labels/values (1d-array) 40 | 41 | Returns 42 | ------- 43 | self : object 44 | 45 | """ 46 | 47 | self.w_ = np.zeros(1 + X.shape[1]) 48 | self.cost_ = [] 49 | 50 | # Iterate over data (all data processed each time) 51 | for i in range(self.niter): 52 | output = self.net_input(X) 53 | 54 | # True label minus output value 55 | errors = (y - output) 56 | 57 | # Update weights based on sum of all errors 58 | self.w_[1:] += self.eta * X.T.dot(errors) 59 | 60 | # Update bias based on sum of all errors 61 | self.w_[0] += self.eta * errors.sum() 62 | 63 | # Cost function 64 | cost = (errors**2).sum() / 2.0 65 | self.cost_.append(cost) 66 | 67 | return self 68 | 69 | def net_input(self, X): 70 | """Calculate net input into network""" 71 | return np.dot(X, self.w_[1:]) + self.w_[0] 72 | 73 | def activation(self, X): 74 | """Compute linear activation""" 75 | return self.net_input(X) 76 | 77 | def predict(self, X): 78 | """Predict class label after applying linear activation function""" 79 | return np.where(self.activation(X) >= 0.0, 1, -1) 80 | 81 | -------------------------------------------------------------------------------- /from_scratch/adaline_sgd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import seed 3 | import math 4 | 5 | class AdalineSGD: 6 | """Adaptive Linear Neuron classifier with sigmoid activation. 7 | 8 | Parameters 9 | ---------- 10 | 11 | eta : float 12 | Learning rate ([0,1]) 13 | niter : int 14 | Iterations on training dataset (epochs) 15 | 16 | Attributes 17 | ---------- 18 | 19 | w_ : 1d-array 20 | Weights post-fitting 21 | errors_ : list 22 | Count of misclassifications in each epoch 23 | shuffle : bool (default: True) 24 | If True, shuffle data each epoch to avoid 25 | cycles. 26 | random_state : int (default: None) 27 | Random state used for shuffling and 28 | initializing the weights. 29 | 30 | """ 31 | 32 | def __init__(self, eta=0.01, niter=10, shuffle=True, 33 | random_state=None): 34 | self.eta = eta 35 | self.niter = niter 36 | self.w_initialized = False 37 | self.shuffle = shuffle 38 | if random_state: 39 | seed(random_state) 40 | 41 | def fit(self, X, y): 42 | """Fit the training data. 43 | 44 | Parameters 45 | ---------- 46 | X : array-like, shape (n_samples, n_features) 47 | Training data (n-dimensional - represented as 48 | a matrix), where n_samples is the number of 49 | data points/samples and n_features is the number 50 | of features. 51 | y : array-like, shape (n_samples) 52 | Target labels/values (1d-array) 53 | 54 | Returns 55 | ------- 56 | self : object 57 | 58 | """ 59 | 60 | self._initialize_weights(X.shape[1]) 61 | self.cost_ = [] 62 | for i in range(self.niter): 63 | if self.shuffle: 64 | X, y = self._shuffle(X, y) 65 | cost = [] 66 | for xi, target in zip(X, y): 67 | cost.append(self._update_weights(xi, target)) 68 | mean_cost = np.mean(cost) 69 | self.cost_.append(mean_cost) 70 | return self 71 | 72 | def partial_fit(self, X, y): 73 | """Fit the training data without reinitializing the 74 | weights.""" 75 | if not self.w_initialized: 76 | self._initialize_weights(X.shape[1]) 77 | if y.ravel().shape[0] > 1: 78 | for xi, target in zip(X, y): 79 | self._update_weights(xi, target) 80 | else: 81 | self._update_weights(X, y) 82 | return self 83 | 84 | def net_input(self, X): 85 | """Calculate net input into network""" 86 | return np.dot(X, self.w_[1:]) + self.w_[0] 87 | 88 | def activation(self, X): 89 | """Compute sigmoidal activation 90 | 91 | Returns 92 | ------- 93 | A 1d array of length n_samples 94 | 95 | """ 96 | x = self.net_input(X) 97 | func = lambda v: 1 / (1 + math.exp(-v)) 98 | return np.array(list(map(func, x))) 99 | 100 | def predict(self, X): 101 | """Predict class label after applying activation function""" 102 | return np.where(self.activation(X) >= 0.5, 1, -1) 103 | 104 | def _shuffle(self, X, y): 105 | """Shuffle training data""" 106 | r = np.random.permutation(len(y)) 107 | return X[r], y[r] 108 | 109 | def _initialize_weights(self, m): 110 | """Initialize weights to zero""" 111 | self.w_ = np.zeros(1 + m) 112 | self.w_initialized = True 113 | 114 | def _update_weights(self, xi, target): 115 | """Apply the adaline learning rule to update the 116 | weights.""" 117 | output = self.net_input(xi) 118 | error = (target - output) 119 | self.w_[1:] += self.eta * xi.dot(error) 120 | self.w_[0] += self.eta * error 121 | cost = error**2 * 0.5 122 | return cost 123 | 124 | 125 | -------------------------------------------------------------------------------- /general/nb_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/general/nb_diagram.png -------------------------------------------------------------------------------- /images/automl_options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/images/automl_options.png -------------------------------------------------------------------------------- /primers/First_Look_R.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# A First Look at R\n", 8 | "\n", 9 | "This notebook is intended to provide you with a first look at the R programming language, starting from basic calculations and moving up through a linear regression. \n", 10 | "\n", 11 | "If you are unfamiliar with notebooks, please review some basics [here](https://github.com/michhar/useR2016-tutorial-jupyter). \n", 12 | "\n", 13 | "## Essential Tips\n", 14 | "\n", 15 | "A very brief summary of the critical components and commands within jupyter are:\n", 16 | "\n", 17 | "1. Critically, press `Ctrl+Enter` to run (or render) the current cell.\n", 18 | "2. Output will print to the notebook. You may have to scroll up to see it all.\n", 19 | "3. Get help for any function by typing a question mark and then its name into\n", 20 | " the console: `?rxLinMod`. It will split the window, and will bring up the documentation for \n", 21 | " that function below.\n", 22 | "5. Files will appear in the specified directory. You can find them by selecting File in the menu bar and selecting \"Open...\". This will open a new browser window with a file navigator.\n", 23 | "6. R objects can be viewed by typing `ls()` in an R cell.\n", 24 | "7. Run all the example code!\n", 25 | "\n", 26 | "There are a number of hands-on exercises in the document, so while you can run the notebook from beginning to end, you will get a lot more out of it by actually walking through cell-by-cell, and filling out the corresponding exercises.\n", 27 | "\n", 28 | "These notebooks are based on a tutorial presented at a Microsoft conference in June of 2016. The original files are available [here](https://github.com/joseph-rickert/MLADS_JUNE_2016)\n", 29 | "\n", 30 | "\n", 31 | "The first thing to note is that R is an interpretive language. At its most basic, the console can be used as a simple calculator:" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "2 + 2 " 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "Putting \";\" after a statement lets you put several statements on the same line." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "log(10); sqrt(99); sin(pi)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Exercise\n", 68 | "\n", 69 | "Enter some math commands in the cell below. For example, take the square root of 1, or compute the cosine of pi/2." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "# Exercise code: Place code in this cell and execute it with ctrl+ente\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## R was designed for statistical computing. \n", 88 | "\n", 89 | "Once you've established that you can work the interpreter as a calculator, it's useful to note that R was designed for statistical computing, so there are a number of functions that are already implemented to deal with statistical procedures.\n", 90 | "\n", 91 | "For example, in the next cell, we draw 1000 samples from a Normal (Gaussian) distribution with mean of 0 and variance of 1. We assign the 1000 samples to the vector `x` in **one line of code**! " 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "x <- rnorm(1000)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## R is quiet.\n", 110 | "\n", 111 | "Notice that you do not get any feedback!\n", 112 | "\n", 113 | "R usually does not give you any output unless you explicitly ask for it. Here are some basic ways of asking for output. " 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "head(x, 10) # Look at the first 10 numbers in the vector x\n", 125 | "tail(x,5) # Look at the last 5 numbers in the vector xlength(x) \n", 126 | "length(x) # To check on the length of x\n", 127 | "summary(x) # Get a summary of x\n", 128 | "plot(x) # Produce a scatter plot of x vs. the index (the numbers 1 to 1000)\n", 129 | "hist(x) # Create a histogram of x" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "Notice that typing `#` is the way to introduct a comment. \n", 137 | "\n", 138 | "Next, we produce a fancier histogram with some color, a superimposed standard normal distribution curve and a \"rug\" underneath showing where the points are." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "hist(x, freq = FALSE, col = \"pink\") \n", 150 | "curve(dnorm, # plot normal density\n", 151 | " col = \"dark blue\", # set coor of curve\n", 152 | " lwd=2, # fill in the area under the curve\n", 153 | " add = TRUE) # add curve to existing plot\n", 154 | "rug(x,col=\"red\") " 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## R has a rich set of built-in functions.\n", 162 | "In addition to the mathematical functions `sin()`, `log()`, we have just seen a number of other functions that are available, e.g.:\n", 163 | "\n", 164 | "- `rnorm()`\n", 165 | "- `head()`\n", 166 | "- `tail()`\n", 167 | "- `length()`\n", 168 | "- `summary()`\n", 169 | "- `plot()`\n", 170 | "- `hist()`\n", 171 | "- `curve()`\n", 172 | "\n", 173 | "To get help with a function named foo, type `?foo`. \n", 174 | "\n", 175 | "## Exercise\n", 176 | "\n", 177 | "We just used `hist()` a couple of different times, once with only the data as an argument, and a second time with a couple of additional arguments (`freq` and `col`). Use `?` to get help on `hist()`, see what other options are available. Can you use an argument to change the shading density of the columns?" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": true 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "## Get help here!\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## R is vectorized. \n", 196 | "\n", 197 | "We can frequently work with vectors in whole, rather than writing loops to process individual elements." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "y <- 1:10 # Assign the numbers 1 to 10 to a vector\n", 209 | "y # Print the vector\n", 210 | "2 + y; 2 * y # Perform vector addition and multiplication" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "## There are a lot of built-in data sets in R\n", 218 | "\n", 219 | "Type `data()` to see what data sets are available. " 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": true 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "data()" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## Exercise\n", 238 | "\n", 239 | "The output which will appear under a new tab when you type this in the IDE depends on what packages you have loaded. In order to find out more, you can use `data(dataset_name)` to load the data set, and get help on the dataset by typing either `help(dataset_name)` or `?dataset_name`. Then type `dataset_name` to look at it. Load the `mtcars` dataset using `data()`, and then get help on it. Many functions work on datasets - try using some of the functions we used earlier on `x` on `mtcars` (e.g. `head()`, `tail()`, `summary()`" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": { 246 | "collapsed": true 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "## Place exercise code here:\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "The fundamental data structure for doing statistical analysis is a `data.frame`. \n", 258 | "`mtcars` is a `data.frame`. You can check this with the `class()` function." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": { 265 | "collapsed": false 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "class(mtcars) # To check on the class of an R object\n", 270 | "class(x) # what kind of object is x?" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "## R shines at exploratory data analysis\n", 278 | "\n", 279 | "We can work with variables within a dataset in a variety of ways.\n", 280 | "\n", 281 | "One common way is to use the `with()` function to work with the variables in the data frame:" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "with(mtcars,cor(drat,mpg)) \n", 293 | "# There is a positive correlation beteen therear axel ratio and mpg!!" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "## Extracting individual variables\n", 301 | "\n", 302 | "If we want to extract a single variable from a dataset, we can use `$` to refer directly to the variables in the data frame mtcars. " 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "plot(mtcars$drat,mtcars$mpg) " 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "Notice this is the same `plot()` function we used above for 1 variable!" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "## R is really vectorized\n", 328 | "\n", 329 | "We just saw that `cor()` computes the correlations between two variables. However, if we pass a `data.frame` as an argument to `cor()`, then it actually computes *all pair-wise correlations*!\n" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "collapsed": false 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "cor(mtcars)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "## R is functional\n", 348 | "\n", 349 | "We can use \"higher-level\" functions to dispatch functions across multiple variables as well!\n", 350 | "\n", 351 | "For example, we can use the `sapply()` function to apply the `scale()` function to every column in the data frame simultaneously. Then, we can also summarize and visualize the new `data.frame` in order to check that the `sapply()` call worked as expected.\n", 352 | "\n" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": { 359 | "collapsed": false 360 | }, 361 | "outputs": [], 362 | "source": [ 363 | "df <-sapply(mtcars,scale) # Center and scale all of the variables\n", 364 | "summary(df)\n", 365 | "boxplot(df, col=\"yellow\") # Compare the distributions of the variables" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "## Fit a simple regression model\n", 373 | "\n", 374 | "R also has a number of built-in functions for model fitting and estimation. For example, the `lm` function is used to estimate an ordinary-least-squares regression. \n", 375 | "\n", 376 | "Model specification is defined in a unique way with R - we use a `formula` syntax. We will talk about this in more depth later, but for now, we can create a model predicting miles per gallon based on rear axle ratio with the following command:" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": { 383 | "collapsed": true 384 | }, 385 | "outputs": [], 386 | "source": [ 387 | "reg1 <- lm(mtcars$mpg ~ mtcars$drat)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "Notice again that when I run `lm()`, no output is provided. In order to actually provide some output to the user, we need to actually do something to the `reg1` object that we just created. One of the most useful functions we can use on the output of a call to `lm()` is `summary()`:" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": null, 400 | "metadata": { 401 | "collapsed": false 402 | }, 403 | "outputs": [], 404 | "source": [ 405 | "summary(reg1) " 406 | ] 407 | }, 408 | { 409 | "cell_type": "markdown", 410 | "metadata": {}, 411 | "source": [ 412 | "Notice that we are using the same `summary()` function that we used above. We have used `summary()` to summarize our vector of samples, the mtcars dataset, and, now, an estimated regression model. \n", 413 | "\n", 414 | "While we can use `summary()`, there are also a host of other functions that we can use to investigate the results.\n" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": { 421 | "collapsed": false 422 | }, 423 | "outputs": [], 424 | "source": [ 425 | "anova(reg1) # Standard analysis of variance table\n", 426 | "coef(reg1) # extract the coefficients\n", 427 | "str(reg1) # examine the structure of the reg1 result\n", 428 | "fitted(reg1) # extract the fitted values\n", 429 | "resid(reg1) # extract the residuals" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "## Visualizing the Model\n", 437 | "\n", 438 | "Moreover, we can use various functions to either plot a regression line on-top of a scatter plot, or to create diagnostic plots to examine the model." 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": { 445 | "collapsed": false 446 | }, 447 | "outputs": [], 448 | "source": [ 449 | "\n", 450 | "# Plot the data with the regression line added\n", 451 | "plot(mtcars$drat,mtcars$mpg, main=\"Regression Model\")\n", 452 | "abline(reg1,col=\"red\")\n", 453 | "\n", 454 | "## diagnostic plots\n", 455 | "par(mfrow=c(2,2)) # Some code to put the 4 plots together\n", 456 | "plot(reg1) \n" 457 | ] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": {}, 462 | "source": [ 463 | "## Fit multiple regression model\n", 464 | "\n", 465 | "Fitting a multiple regression model involves the same exact function - `lm()`. The only difference is that we can specify more than one variable on the right hand side of the `formula`, and concatenate them with a `+` sign in order to indicate the inclusion of only additive effects. " 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": null, 471 | "metadata": { 472 | "collapsed": true 473 | }, 474 | "outputs": [], 475 | "source": [ 476 | "reg2 <- lm(mpg ~ wt + qsec + gear + vs + am, data = mtcars) # Note the use of the data argument" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "Once we've estimated this model, we can use the *exact same* functions to interrogate this one as well. We can summarize it using `summary()`, extract coefficients using `coef()`, and even plot diagnostics with `plot()`:" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": { 490 | "collapsed": false 491 | }, 492 | "outputs": [], 493 | "source": [ 494 | "summary(reg2)\n", 495 | "coef(reg2)\n", 496 | "par(mfrow = c(2,2))\n", 497 | "plot(reg2)\n", 498 | "#str(reg2) # Look at the model object" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "## Exercise\n", 506 | "\n", 507 | "Can you extract the coefficient of determination (i.e. the `R^2`) value from the reg2 analysis? (Hint: it is *not* available as a field in `reg2`, but it *is* available as a field in its summary...).\n", 508 | "\n", 509 | "Can you fit a model with only the predictors that are significant in `reg2`? What is the `R^2` for that model?" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": null, 515 | "metadata": { 516 | "collapsed": true 517 | }, 518 | "outputs": [], 519 | "source": [ 520 | "## Place exercise code here" 521 | ] 522 | }, 523 | { 524 | "cell_type": "markdown", 525 | "metadata": {}, 526 | "source": [ 527 | "## Clean up\n", 528 | "\n", 529 | "Finally we can use the `rm()` functions to remove objects from our workspace. This will remove any objects we have, so if we want to do something with them, we need to recreate them!" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": { 536 | "collapsed": false 537 | }, 538 | "outputs": [], 539 | "source": [ 540 | "rm(mtcars) # Remove mtcars\n", 541 | "rm(list=ls()) # Remove everything: USE WITH GREAT CARE!!" 542 | ] 543 | } 544 | ], 545 | "metadata": { 546 | "anaconda-cloud": {}, 547 | "kernelspec": { 548 | "display_name": "R", 549 | "language": "R", 550 | "name": "r" 551 | }, 552 | "language_info": { 553 | "codemirror_mode": "r", 554 | "file_extension": ".r", 555 | "mimetype": "text/x-r-source", 556 | "name": "R", 557 | "pygments_lexer": "r", 558 | "version": "3.3.0" 559 | } 560 | }, 561 | "nbformat": 4, 562 | "nbformat_minor": 0 563 | } 564 | -------------------------------------------------------------------------------- /primers/JavaScript_Primer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Instructions - please read through these :)\n", 8 | "\n", 9 | "\n", 10 | "* This is a jupyter notebook so that means **interactive coding** in a browser environment\n", 11 | "* You want to run each grey cell (the javascript or python) **in order and one at a time**\n", 12 | "* Make sure each code cell has **finished running** (a number will appear inside the In[] when the cell is done)\n", 13 | "* You can also see if the notebook is running by looking for a filled in circle in the upper right corner of this notebook\n", 14 | "* Feel free to **change code** (even if you don't know javascript)\n", 15 | "* The cells that **you should work with** will have TASKs\n", 16 | "* **Have fun** with this and save the notebook often :)" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### Objects\n", 24 | "* NOTE: Everything really is an object in JavaScript, but here's a sipmle classic object\n", 25 | "\n", 26 | "They can look like:\n", 27 | "\n", 28 | "```javascript\n", 29 | "// This is an object\n", 30 | "var myvar = {\n", 31 | " foo: \"bar\"\n", 32 | "}\n", 33 | "\n", 34 | "// This is a variable\n", 35 | "var result = myvar.foo;\n", 36 | "```\n", 37 | "Then we'll print it in a python code cell (just the trick we need to print vars in js)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "%%javascript\n", 49 | "\n", 50 | "// TASK: Create an object\n", 51 | "\n", 52 | "\n", 53 | "\n", 54 | "// TASK save one attribute to the result variable (to print out)\n", 55 | "var result = _____\n", 56 | "\n", 57 | "// A trick save these variables back to python variables to work with later\n", 58 | "IPython.notebook.kernel.execute('result=\"' + result + '\";');" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "# This is python - just using it to print\n", 70 | "print(result)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "### Functions\n", 78 | "\n", 79 | "\n", 80 | "Example function:\n", 81 | "\n", 82 | "```javascript\n", 83 | "\n", 84 | "// Define the function\n", 85 | "var myfunc = function() {\n", 86 | " return \"hello\";\n", 87 | "}\n", 88 | "\n", 89 | "// Use the funciton\n", 90 | "var result = myfunc();\n", 91 | "```\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "%%javascript\n", 103 | "\n", 104 | "// assign an anonymous func to a variable\n", 105 | "var printBacon = function() {\n", 106 | "\n", 107 | " // TASK: return something, otherwise undefined\n", 108 | "};\n", 109 | "\n", 110 | "// TASK: use this fucntion and place result in \"result\"\n", 111 | "\n", 112 | "\n", 113 | "// Our trick: save these variables back to python variables to work with later\n", 114 | "IPython.notebook.kernel.execute('result=\"' + result + '\";');" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# In python\n", 126 | "print(result)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "### Understand this and public vs. private\n", 134 | "\n", 135 | "\n", 136 | "\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "collapsed": false 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "%%javascript\n", 148 | "\n", 149 | "function Container(param) {\n", 150 | " // this attaches \"member\" to an instance of this class\n", 151 | " this.member = param;\n", 152 | "}\n", 153 | "\n", 154 | "var myContainer = new Container('abc');\n", 155 | "\n", 156 | "// TASK: Access the string value in the instatiation of myContainer\n", 157 | "\n", 158 | "\n", 159 | "IPython.notebook.kernel.execute('result=\"' + result + '\";');" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": true 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "print(result)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "%%javascript\n", 182 | "\n", 183 | "function GymMember(name) {\n", 184 | " this.membername = name;\n", 185 | " \n", 186 | " // These are private\n", 187 | " var id = 3;\n", 188 | " var that = this;\n", 189 | " \n", 190 | "\n", 191 | "};\n", 192 | "\n", 193 | "\n", 194 | "// TASKS: \n", 195 | "// 1) instance of class \n", 196 | "// 2) access id from the instance object\n", 197 | "// 3) write an anonymous function in class \n", 198 | "// 4) access that class method\n", 199 | "\n", 200 | "\n", 201 | "// What is the issue?\n", 202 | "\n", 203 | "// You can find more info at: http://www.crockford.com/javascript/private.html\n", 204 | "\n", 205 | "IPython.notebook.kernel.execute('result=\"' + result + '\";');" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": false 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "print(result)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "### A very simple game to show off classes in javascript\n", 224 | "* Fill in the places that have a \"TASK\"" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "collapsed": false 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "%%javascript\n", 236 | "\n", 237 | "// a User class:\n", 238 | "function User() {\n", 239 | " this.name = \"\";\n", 240 | " this.life = 100;\n", 241 | " this.giveLife = function giveLife(targetPlayer) {\n", 242 | " targetPlayer.life += 1;\n", 243 | " \n", 244 | " // TASK: We can give life points, but what about taking away life points\n", 245 | " }\n", 246 | "}\n", 247 | "\n", 248 | "// Use class method\n", 249 | "var Alfred = new User();\n", 250 | "var Mich = new User();\n", 251 | "\n", 252 | "// Names were blank so give them name values\n", 253 | "Alfred.name = \"Alfred\";\n", 254 | "Mich.name = \"Mich\";\n", 255 | "\n", 256 | "\n", 257 | "// TASK: give the life points from Alfred to Mich\n", 258 | "\n", 259 | "\n", 260 | "// Save these variables back to python variables to work with later\n", 261 | "IPython.notebook.kernel.execute('Alfred_life=\"' + Alfred.life + '\";');\n", 262 | "IPython.notebook.kernel.execute('Mich_life=\"' + Mich.life + '\";');" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "print(Alfred_life)\n", 274 | "print(Mich_life)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "### Let's look at prototyping\n", 282 | "* It's where we can add a method to a class after the fact\n", 283 | "\n", 284 | "Such as\n", 285 | "\n", 286 | "```javascript\n", 287 | "\n", 288 | "// Change a user's name\n", 289 | "User.prototype.newFancyMethod = function(newname) {\n", 290 | " this.name = newname;\n", 291 | "}\n", 292 | "```" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 15, 298 | "metadata": { 299 | "collapsed": false 300 | }, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "application/javascript": [ 305 | "\n", 306 | "// a User class:\n", 307 | "function User() {\n", 308 | " this.name = \"\";\n", 309 | " this.life = 100;\n", 310 | " this.giveLife = function giveLife(targetPlayer) {\n", 311 | " targetPlayer.life += 1; \n", 312 | " this.life-=1;\n", 313 | " }\n", 314 | "}\n", 315 | "\n", 316 | "// TASK: Create a prototyped extra method to do something harmful to a player (e.g. punch)\n", 317 | "User.prototype.punch = function(targetPlayer) {\n", 318 | " targetPlayer -= 2;\n", 319 | " this.life += 3;\n", 320 | "}\n", 321 | "\n", 322 | "\n", 323 | "// Use class method\n", 324 | "var Alfred = new User();\n", 325 | "var Mich = new User();\n", 326 | "\n", 327 | "// Names were blank so give them name values\n", 328 | "Alfred.name = \"Alfred\";\n", 329 | "Mich.name = \"Mich\";\n", 330 | "\n", 331 | "\n", 332 | "// give the life points from Alfred to Mich\n", 333 | "Alfred.giveLife(Mich);\n", 334 | "\n", 335 | "\n", 336 | "// TASK: Use your harmful method\n", 337 | "Alfred.punch(Mich);\n", 338 | "\n", 339 | "\n", 340 | "// Save these variables back to python variables to work with later\n", 341 | "IPython.notebook.kernel.execute('Alfred_life=\"' + Alfred.life + '\";');\n", 342 | "IPython.notebook.kernel.execute('Mich_life=\"' + Mich.life + '\";');" 343 | ], 344 | "text/plain": [ 345 | "" 346 | ] 347 | }, 348 | "metadata": {}, 349 | "output_type": "display_data" 350 | } 351 | ], 352 | "source": [ 353 | "%%javascript\n", 354 | "\n", 355 | "// a User class:\n", 356 | "function User() {\n", 357 | " this.name = \"\";\n", 358 | " this.life = 100;\n", 359 | " this.giveLife = function giveLife(targetPlayer) {\n", 360 | " targetPlayer.life += 1; \n", 361 | " this.life-=1;\n", 362 | " }\n", 363 | "}\n", 364 | "\n", 365 | "// TASK: Create a prototyped extra method to do something harmful to a player (e.g. punch)\n", 366 | "\n", 367 | "\n", 368 | "\n", 369 | "// Use class method\n", 370 | "var Alfred = new User();\n", 371 | "var Mich = new User();\n", 372 | "\n", 373 | "// Names were blank so give them name values\n", 374 | "Alfred.name = \"Alfred\";\n", 375 | "Mich.name = \"Mich\";\n", 376 | "\n", 377 | "\n", 378 | "// give the life points from Alfred to Mich\n", 379 | "Alfred.giveLife(Mich);\n", 380 | "\n", 381 | "\n", 382 | "// TASK: Use your harmful method\n", 383 | "Alfred.punch(Mich);\n", 384 | "\n", 385 | "\n", 386 | "// Save these variables back to python variables to work with later\n", 387 | "IPython.notebook.kernel.execute('Alfred_life=\"' + Alfred.life + '\";');\n", 388 | "IPython.notebook.kernel.execute('Mich_life=\"' + Mich.life + '\";');" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 16, 394 | "metadata": { 395 | "collapsed": false 396 | }, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "102\n", 403 | "101\n" 404 | ] 405 | } 406 | ], 407 | "source": [ 408 | "print(Alfred_life)\n", 409 | "print(Mich_life)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### BTW This would all be on the server side (for Node.js)\n", 417 | "\n", 418 | "Let's talk about callbacks for a moement. Callback explanation for this example:\n", 419 | "\n", 420 | "```unix\n", 421 | "I'm the server and I gave an order to chef. The chef will call me \n", 422 | " back when order is ready, meanwhile, I'll be doing something \n", 423 | " else (like taking the next order to place).\n", 424 | "```" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "collapsed": false 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "%%javascript\n", 436 | "\n", 437 | "//==========IGNORE THIS STUFF============\n", 438 | "\n", 439 | "function sleep(milliseconds) {\n", 440 | " var start = new Date().getTime();\n", 441 | " for (var i = 0; i < 1e7; i++) {\n", 442 | " if ((new Date().getTime() - start) > milliseconds){\n", 443 | " break;\n", 444 | " }\n", 445 | " }\n", 446 | "}\n", 447 | "//========================================\n", 448 | "\n", 449 | "\n", 450 | "\n", 451 | "var result;\n", 452 | "var message = ''; // you will be modifying this\n", 453 | "\n", 454 | "function placeAnOrder(orderNumber) {\n", 455 | " result = orderNumber;\n", 456 | " \n", 457 | " // It's a function as an argument\n", 458 | " cookAndDeliverFood(function() {\n", 459 | " // TASK: change the \"message\" var to reflect the order number (Hint: orderNumber.toString())\n", 460 | " });\n", 461 | "\n", 462 | "}\n", 463 | "\n", 464 | "// This function we want to run asynchronously (but we're not here)\n", 465 | "// This is (should be) simulating some time consuming operation like connecting to a database\n", 466 | "function cookAndDeliverFood(callback) {\n", 467 | " sleep(5000);\n", 468 | " callback(); // usually this is done asynchronously with setTimeout\n", 469 | " message += ' cooking, ';\n", 470 | "}\n", 471 | "\n", 472 | "// TASK: Place a few orders\n", 473 | "\n", 474 | "\n", 475 | "\n", 476 | "IPython.notebook.kernel.execute('result=\"' + message + '\";');" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": { 483 | "collapsed": false 484 | }, 485 | "outputs": [], 486 | "source": [ 487 | "print(result)" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 39, 493 | "metadata": { 494 | "collapsed": false 495 | }, 496 | "outputs": [ 497 | { 498 | "data": { 499 | "application/javascript": [ 500 | "\n", 501 | "var message = \"\";\n", 502 | "// var currentdate = new Date();\n", 503 | "var currentdate = Date.now();\n", 504 | "// var currentsecs = currentdate.getSeconds()\n", 505 | "setInterval(function() {\n", 506 | " message += \"hot dog \\n\";\n", 507 | "}, 2000);\n", 508 | "var nextdate = Date.now();\n", 509 | "\n", 510 | "var result = currentdate.toString();\n", 511 | "var result2 = nextdate.toString()\n", 512 | "\n", 513 | "IPython.notebook.kernel.execute('result=\"' + result + '\";');\n", 514 | "IPython.notebook.kernel.execute('result2=\"' + result2 + '\";');\n", 515 | "IPython.notebook.kernel.execute('result3=\"' + message + '\";');" 516 | ], 517 | "text/plain": [ 518 | "" 519 | ] 520 | }, 521 | "metadata": {}, 522 | "output_type": "display_data" 523 | } 524 | ], 525 | "source": [ 526 | "%%javascript\n", 527 | "\n", 528 | "var message = \"\";\n", 529 | "// var currentdate = new Date();\n", 530 | "var currentdate = Date.now();\n", 531 | "// var currentsecs = currentdate.getSeconds()\n", 532 | "setInterval(function() {\n", 533 | " message += \"hot dog \\n\";\n", 534 | "}, 2000);\n", 535 | "\n", 536 | "var nextdate = Date.now();\n", 537 | "\n", 538 | "var result = currentdate.toString();\n", 539 | "var result2 = nextdate.toString()\n", 540 | "\n", 541 | "IPython.notebook.kernel.execute('result=\"' + result + '\";');\n", 542 | "IPython.notebook.kernel.execute('result2=\"' + result2 + '\";');\n", 543 | "IPython.notebook.kernel.execute('result3=\"' + message + '\";');" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 40, 549 | "metadata": { 550 | "collapsed": false 551 | }, 552 | "outputs": [ 553 | { 554 | "name": "stdout", 555 | "output_type": "stream", 556 | "text": [ 557 | "1487571361086\n", 558 | "1487571361086\n", 559 | "\n" 560 | ] 561 | } 562 | ], 563 | "source": [ 564 | "print(result)\n", 565 | "print(result2)\n", 566 | "print(result3)" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "collapsed": true 574 | }, 575 | "outputs": [], 576 | "source": [] 577 | } 578 | ], 579 | "metadata": { 580 | "kernelspec": { 581 | "display_name": "Python 3", 582 | "language": "python", 583 | "name": "python3" 584 | }, 585 | "language_info": { 586 | "codemirror_mode": { 587 | "name": "ipython", 588 | "version": 3 589 | }, 590 | "file_extension": ".py", 591 | "mimetype": "text/x-python", 592 | "name": "python", 593 | "nbconvert_exporter": "python", 594 | "pygments_lexer": "ipython3", 595 | "version": "3.5.1" 596 | } 597 | }, 598 | "nbformat": 4, 599 | "nbformat_minor": 2 600 | } 601 | -------------------------------------------------------------------------------- /primers/Jupyter_and_JavaScript.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## The basic idea" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/html": [ 20 | "
100
" 21 | ], 22 | "text/plain": [ 23 | "" 24 | ] 25 | }, 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "output_type": "execute_result" 29 | } 30 | ], 31 | "source": [ 32 | "# Here's a python cell!\n", 33 | "\n", 34 | "from IPython.display import HTML\n", 35 | "\n", 36 | "mypynumber = str(100)\n", 37 | "\n", 38 | "# Now we embed this number in the DOM by calling the HTML widget \n", 39 | "# with a div element\n", 40 | "HTML(\"
\" + mypynumber + \"
\")\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "application/javascript": [ 53 | "\n", 54 | "// This is a javascript cell!\n", 55 | "\n", 56 | "// Let's get the number we made in python from the DOM\n", 57 | "var myjsnumber = Number(document.getElementById('textid').innerHTML);\n", 58 | "\n", 59 | "// Let's increment the number at hand\n", 60 | "myjsnumber+=1\n", 61 | "\n", 62 | "// Send this back as a python variable (this just executes python statements)\n", 63 | "IPython.notebook.kernel.execute('mypynumber=\"' + myjsnumber + '\";');" 64 | ], 65 | "text/plain": [ 66 | "" 67 | ] 68 | }, 69 | "metadata": {}, 70 | "output_type": "display_data" 71 | } 72 | ], 73 | "source": [ 74 | "%%javascript\n", 75 | "\n", 76 | "// This is a javascript cell!\n", 77 | "\n", 78 | "// Let's get the number we made in python from the DOM\n", 79 | "var myjsnumber = Number(document.getElementById('textid').innerHTML);\n", 80 | "\n", 81 | "// Let's increment the number at hand\n", 82 | "myjsnumber+=1\n", 83 | "\n", 84 | "// Send this back as a python variable (this just executes python statements)\n", 85 | "IPython.notebook.kernel.execute('mypynumber=\"' + myjsnumber + '\";');" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 3, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "'100'" 99 | ] 100 | }, 101 | "execution_count": 3, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "# This is a python cell!\n", 108 | "\n", 109 | "# Has this been incremented?\n", 110 | "mypynumber" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### A Game" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 4, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/html": [ 130 | "
100
" 131 | ], 132 | "text/plain": [ 133 | "" 134 | ] 135 | }, 136 | "execution_count": 4, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "# Python\n", 143 | "from IPython.display import HTML\n", 144 | "\n", 145 | "pystartlife = str(100)\n", 146 | "HTML(\"
\" + pystartlife + \"
\")" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 7, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "application/javascript": [ 159 | "\n", 160 | "// Get the python variable from the DOM\n", 161 | "var startlife = document.getElementById('textid').innerHTML;\n", 162 | "\n", 163 | "// Define a User class with a method\n", 164 | "function User() {\n", 165 | " this.name = '';\n", 166 | " this.life = Number(startlife);\n", 167 | " this.giveLife = function giveLife(targetPlayer) {\n", 168 | " targetPlayer.life += 1;\n", 169 | " this.life -= 1;\n", 170 | " }\n", 171 | "}\n", 172 | "\n", 173 | "// Use class\n", 174 | "var Alfred = new User();\n", 175 | "var Wallace = new User();\n", 176 | "\n", 177 | "// Names were blank so give them name values\n", 178 | "Alfred.name = 'Alfred';\n", 179 | "Wallace.name = 'Wallace';\n", 180 | "\n", 181 | "// Let's play a game!\n", 182 | "\n", 183 | "// Let Alfred give life to Wallace\n", 184 | "Alfred.giveLife(Wallace);\n", 185 | "\n", 186 | "// Save these variables back to python variables to work with later\n", 187 | "IPython.notebook.kernel.execute('Alfred_life=\"' + Alfred.life + '\";');\n", 188 | "IPython.notebook.kernel.execute('Wallace_life=\"' + Wallace.life + '\";');" 189 | ], 190 | "text/plain": [ 191 | "" 192 | ] 193 | }, 194 | "metadata": {}, 195 | "output_type": "display_data" 196 | } 197 | ], 198 | "source": [ 199 | "%%javascript\n", 200 | "\n", 201 | "// Get the python variable from the DOM\n", 202 | "var startlife = document.getElementById('textid').innerHTML;\n", 203 | "\n", 204 | "// Define a User class with a method\n", 205 | "function User() {\n", 206 | " this.name = '';\n", 207 | " this.life = Number(startlife);\n", 208 | " this.giveLife = function giveLife(targetPlayer) {\n", 209 | " targetPlayer.life += 1;\n", 210 | " this.life -= 1;\n", 211 | " }\n", 212 | "}\n", 213 | "\n", 214 | "// Use class\n", 215 | "var Alfred = new User();\n", 216 | "var Wallace = new User();\n", 217 | "\n", 218 | "// Names were blank so give them name values\n", 219 | "Alfred.name = 'Alfred';\n", 220 | "Wallace.name = 'Wallace';\n", 221 | "\n", 222 | "// Let's play a game!\n", 223 | "\n", 224 | "// Let Alfred give life to Wallace\n", 225 | "Alfred.giveLife(Wallace);\n", 226 | "\n", 227 | "// Save these variables back to python variables to work with later\n", 228 | "IPython.notebook.kernel.execute('Alfred_life=\"' + Alfred.life + '\";');\n", 229 | "IPython.notebook.kernel.execute('Wallace_life=\"' + Wallace.life + '\";');" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 8, 235 | "metadata": { 236 | "collapsed": false 237 | }, 238 | "outputs": [ 239 | { 240 | "name": "stdout", 241 | "output_type": "stream", 242 | "text": [ 243 | "99\n", 244 | "101\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "# Python\n", 250 | "print(Alfred_life)\n", 251 | "print(Wallace_life)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "metadata": { 258 | "collapsed": true 259 | }, 260 | "outputs": [], 261 | "source": [] 262 | } 263 | ], 264 | "metadata": { 265 | "kernelspec": { 266 | "display_name": "Python 3", 267 | "language": "python", 268 | "name": "python3" 269 | }, 270 | "language_info": { 271 | "codemirror_mode": { 272 | "name": "ipython", 273 | "version": 3 274 | }, 275 | "file_extension": ".py", 276 | "mimetype": "text/x-python", 277 | "name": "python", 278 | "nbconvert_exporter": "python", 279 | "pygments_lexer": "ipython3", 280 | "version": "3.5.2" 281 | }, 282 | "nav_menu": {}, 283 | "toc": { 284 | "navigate_menu": true, 285 | "number_sections": true, 286 | "sideBar": true, 287 | "threshold": 6, 288 | "toc_cell": false, 289 | "toc_section_display": "block", 290 | "toc_window_display": false 291 | }, 292 | "widgets": { 293 | "state": {}, 294 | "version": "1.1.2" 295 | } 296 | }, 297 | "nbformat": 4, 298 | "nbformat_minor": 0 299 | } 300 | -------------------------------------------------------------------------------- /primers/NotebookAnatomy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Basic Anatomy of a Notebook and General Guide\n", 8 | "* Note this notebook was created in python 3, but should be version agnostic" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "# Import compatibility libraries (python 2/3 support)\n", 20 | "from __future__ import absolute_import\n", 21 | "from __future__ import division\n", 22 | "from __future__ import print_function\n", 23 | "from __future__ import unicode_literals" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### My Disclaimers:\n", 31 | "1. Notebooks are no substitute for an IDE for developing apps.\n", 32 | "* Notebooks are not suitable for debugging code (yet).\n", 33 | "* They are no substitute for publication quality publishing, however they are very useful for interactive blogging\n", 34 | "* My main use of notebooks are for interactive teaching mostly and as a playground for some code that I might like to share at some point (I can add useful and pretty markup text, pics, videos, etc).\n", 35 | "* I'm a fan also because github render's ipynb files nicely." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Welcome to Jupyter! Here are a few notebook notes\n", 43 | "
\n", 44 | "This is a little diagram of the anatomy of the notebook toolbar:
\n", 45 | "\"Smiley" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "## Shortcuts!\n", 53 | "* A complete list is [here](https://sowingseasons.com/blog/reference/2016/01/jupyter-keyboard-shortcuts/23298516), but these are my favorites. There is a *command* mode and *edit* mode much like the unix editor `vi/vim`. `Esc` will take you into command mode. `Enter` (when a cell is highlighted) will take you into edit mode.\n", 54 | "\n", 55 | "Mode | What | Shortcut\n", 56 | "------------- | ------------- | -------------\n", 57 | "Command (Press `Esc` to enter) | Run cell | Shift-Enter\n", 58 | "Command | Add cell below | B\n", 59 | "Command | Add cell above | A\n", 60 | "Command | Delete a cell | d-d\n", 61 | "Command | Go into edit mode | Enter\n", 62 | "Edit (Press `Enter` to enable) | Run cell | Shift-Enter\n", 63 | "Edit | Indent | Clrl-]\n", 64 | "Edit | Unindent | Ctrl-[\n", 65 | "Edit | Comment section | Ctrl-/\n", 66 | "Edit | Function introspection | Shift-Tab\n", 67 | "\n", 68 | "**You can also left-double-click with the mouse to \"Enter\" a markdown cell for modifying text**\n", 69 | "\n", 70 | "Try some below" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "# This is a comment\n", 82 | "\n", 83 | "print('this line is python code')\n", 84 | "\n", 85 | "# Hit Shift+Enter at same time as a shortcut to run this cell" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "## Markdown!\n", 93 | "**OK, change the next cell to code and then back to markdown using shortcuts above**\n", 94 | "\n", 95 | "**If you are unfamiliar with markdown syntax this is a good cell to study (Enter to see the raw markdown and Shift-Enter to run it or see final product)**" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "\n", 103 | "# This will be Heading1\n", 104 | "1. first thing\n", 105 | "* second thing\n", 106 | "* third thing\n", 107 | "\n", 108 | "A horizontal rule:\n", 109 | "\n", 110 | "---\n", 111 | "> Indented text\n", 112 | "\n", 113 | "Code snippet:\n", 114 | "\n", 115 | "```python\n", 116 | "import numpy as np\n", 117 | "a2d = np.random.randn(100).reshape(10, 10)\n", 118 | "```\n", 119 | "\n", 120 | "LaTeX inline equation:\n", 121 | "\n", 122 | "$\\Delta =\\sum_{i=1}^N w_i (x_i - \\bar{x})^2$\n", 123 | "\n", 124 | "LaTeX table:\n", 125 | "\n", 126 | "First Header | Second Header\n", 127 | "------------- | -------------\n", 128 | "Content Cell | Content Cell\n", 129 | "Content Cell | Content Cell\n", 130 | "\n", 131 | "HTML:\n", 132 | "\n", 133 | "\"You" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### As you can see on your jupyter homepage, you can open up any notebook\n", 150 | "\n", 151 | "NB: You can return to the homepage by clicking the Jupyter icon in the very upper left corner at any time\n", 152 | "\n", 153 | "### You can also Upload a notebook (button on upper right)\n", 154 | "\n", 155 | "\n", 156 | "![Upload button](http://www.ciser.cornell.edu/data_wrangling/python_intro/images/JupyterUpload.gif)\n", 157 | "\n", 158 | "\n", 159 | "### As well as start a new notebook with a specific kernel (button to the right of Upload)\n", 160 | "\n", 161 | "\n", 162 | "![New menu](https://www.ibm.com/developerworks/community/blogs/jfp/resource/BLOGS_UPLOADED_IMAGES/irkernel48.png)\n", 163 | "\n", 164 | "> So, what's that number after `In` or `Out`? That's the order of running this cell relative to other cells (useful for keeping track of what order cells have been run). When you save this notebook that number along with any output shown will also be saved. To reset a notebook go to Cell -> All Output -> Clear and then Save it.\n", 165 | "\n", 166 | "You can do something like this to render a publicly available notebook on github statically (this I do as a backup for presentations and course stuff):\n", 167 | "\n", 168 | "```\n", 169 | "http://nbviewer.jupyter.org/github///blob/master/.ipynb\n", 170 | "```\n", 171 | "like:
\n", 172 | "http://nbviewer.jupyter.org/github/michhar/rpy2_sample_notebooks/blob/master/TestingRpy2.ipynb\n", 173 | "\n", 174 | "
\n", 175 | "Also, you can upload, consume existing notebooks or start a new interactive one for free nby going here (Microsoft's free notebook service):
\n", 176 | "https://notebooks.azure.com/\n", 177 | "
\n", 178 | "\n", 179 | "

\n", 180 | "Also, notebook service available in Azure Machine Learning Studio (free and all you need is MS live ID/Account). Go [here](https://studio.azureml.net/)\n", 181 | "\n", 182 | "> The nifty thing about Jupyter notebooks (and the .ipynb files which you can download and upload) is that you can share these. They are just written in JSON language. I put them up in places like GitHub and point people in that direction. \n", 183 | "\n", 184 | "> Some people (like [this guy](http://www.r-bloggers.com/why-i-dont-like-jupyter-fka-ipython-notebook/) who misses the point I think) really dislike notebooks, but they are really good for what they are good at - sharing code ideas plus neat notes and stuff in dev, teaching interactively, even chaining languages together in a polyglot style. And doing all of this on github works really well (as long as you remember to always clear your output before checking in - version control can get a bit crazy otherwise).\n", 185 | "\n", 186 | "### Some additional features\n", 187 | "* tab completion\n", 188 | "* function introspection\n", 189 | "* help" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": true 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "?sum()" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "import json\n", 212 | "?json" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "import json\n", 224 | "\n", 225 | "# place cursor in parentheses and hit Shift-Tab and see what happens\n", 226 | "json.loads()" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": { 232 | "collapsed": true 233 | }, 234 | "source": [ 235 | "The MIT License (MIT)
\n", 236 | "Copyright (c) 2016 Micheleen Harris" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": true 244 | }, 245 | "outputs": [], 246 | "source": [] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python 3", 252 | "language": "python", 253 | "name": "python3" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.5.1" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 0 270 | } 271 | -------------------------------------------------------------------------------- /primers/nb_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michhar/python-jupyter-notebooks/007fc0af1509870c996590099aaf40c4ec5071c9/primers/nb_diagram.png -------------------------------------------------------------------------------- /pytorch/DL_with_PyTorch/01_TensorFundamentals.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Tensor fundamentals\n", 8 | "
\n", 9 | "
Text paraphrased from Deep Learning with PyTorch
" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import torch" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "'1.3.1'" 30 | ] 31 | }, 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "torch.__version__" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Tensors and storage" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "tensor([[1., 4.],\n", 57 | " [2., 1.],\n", 58 | " [3., 5.]])" 59 | ] 60 | }, 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])\n", 68 | "points" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 4, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "torch.Size([3, 2])" 80 | ] 81 | }, 82 | "execution_count": 4, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "points.shape" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 8, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "tensor([[0., 0.],\n", 100 | " [0., 0.],\n", 101 | " [0., 0.]])" 102 | ] 103 | }, 104 | "execution_count": 8, 105 | "metadata": {}, 106 | "output_type": "execute_result" 107 | } 108 | ], 109 | "source": [ 110 | "# Use zeros or ones and provide size as tuple\n", 111 | "points = torch.zeros(3, 2)\n", 112 | "points" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Note: when indexing, what you get as output is another _tensor_, however this is just what is called a _view_ into the original tensor (no copying or allocating new physical memory)." 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## Tensors and storage\n", 127 | "\n", 128 | "Numbers in tensors are allocated in contiguous chunks in memory, managed by instances of the `torch.Storage` class. A _storage_ is a 1D array of numerical data that could be a contiguous chunk of _float_, for instance. The PyTorch `Tensor` is, in fact, just a _view_ over the `Storage` object that's capable of indexing into it by using an offset and per-dimension strides.\n", 129 | "\n", 130 | "You can access the _storage_ for any tensor with the `.storage` property. The layout of a _storage_ is always one-dimensional." 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 11, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | " 1.0\n", 142 | " 4.0\n", 143 | " 2.0\n", 144 | " 1.0\n", 145 | " 3.0\n", 146 | " 5.0\n", 147 | "[torch.FloatStorage of size 6]" 148 | ] 149 | }, 150 | "execution_count": 11, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])\n", 157 | "points.storage()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 13, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "4.0" 169 | ] 170 | }, 171 | "execution_count": 13, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | } 175 | ], 176 | "source": [ 177 | "# Index into storage manually\n", 178 | "points.storage()[1]" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 16, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": [ 189 | " 2.0\n", 190 | " 4.0\n", 191 | " 2.0\n", 192 | " 1.0\n", 193 | " 3.0\n", 194 | " 5.0\n", 195 | "[torch.FloatStorage of size 6]" 196 | ] 197 | }, 198 | "execution_count": 16, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "# Reassignment\n", 205 | "points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])\n", 206 | "points_storage = points.storage()\n", 207 | "points_storage[0] = 2.0\n", 208 | "points.storage()" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "## Size, storage offset and strides\n", 216 | "A tensor is defined by its contents in _storage_ as well as its _size_, _storage offset_ and _stride_." 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])\n", 226 | "second_point = points[1]" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 17, 232 | "metadata": {}, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "2" 238 | ] 239 | }, 240 | "execution_count": 17, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "# Storage offset\n", 247 | "second_point.storage_offset()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 18, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "torch.Size([2])" 259 | ] 260 | }, 261 | "execution_count": 18, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "# Size\n", 268 | "second_point.size()" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 19, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "torch.Size([2])" 280 | ] 281 | }, 282 | "execution_count": 19, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "# Same as shape property\n", 289 | "second_point.shape" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 20, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "(2, 1)" 301 | ] 302 | }, 303 | "execution_count": 20, 304 | "metadata": {}, 305 | "output_type": "execute_result" 306 | } 307 | ], 308 | "source": [ 309 | "# Stride\n", 310 | "points.stride()" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "## Numeric types\n", 318 | "\n", 319 | "PyTorch's default type or `dtype` is 32-bit floating-point, `torch.float32` or `torch.float` and corresponding to the class `torch.FloatTensor`. Thus, `torch.Tensor` defaults to `torch.FloatTensor`.\n", 320 | "\n", 321 | "`dtype` can be used with the constructor to specify type." 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 21, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "double_points = torch.ones(10, 2, dtype=torch.double)\n", 331 | "short_points = torch.tensor([[1, 2], [3, 4]], dtype=torch.short)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 22, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "torch.int16" 343 | ] 344 | }, 345 | "execution_count": 22, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "short_points.dtype" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "We can also cast to the right type." 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 26, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "double_points = torch.zeros(10, 2).double()\n", 368 | "short_points = torch.ones(10, 2).short()" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [] 377 | } 378 | ], 379 | "metadata": { 380 | "kernelspec": { 381 | "display_name": "Python [conda env:py36]", 382 | "language": "python", 383 | "name": "conda-env-py36-py" 384 | }, 385 | "language_info": { 386 | "codemirror_mode": { 387 | "name": "ipython", 388 | "version": 3 389 | }, 390 | "file_extension": ".py", 391 | "mimetype": "text/x-python", 392 | "name": "python", 393 | "nbconvert_exporter": "python", 394 | "pygments_lexer": "ipython3", 395 | "version": "3.6.8" 396 | }, 397 | "nav_menu": {}, 398 | "toc": { 399 | "navigate_menu": true, 400 | "number_sections": true, 401 | "sideBar": true, 402 | "threshold": 6, 403 | "toc_cell": false, 404 | "toc_section_display": "block", 405 | "toc_window_display": false 406 | } 407 | }, 408 | "nbformat": 4, 409 | "nbformat_minor": 2 410 | } 411 | -------------------------------------------------------------------------------- /pytorch/PyTorch_MLP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MLPs with PyTorch\n", 8 | "This is my practice of the original by S. Raschka found [here](https://github.com/rasbt/deep-learning-book/blob/master/code/model_zoo/pytorch_ipynb/multilayer-perceptron.ipynb) except for the use of Fashion-MNIST and using `nn.Sequential` instead of subclassing `nn.Modules`." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "collapsed": true 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "from torchvision import datasets\n", 20 | "from torchvision import transforms\n", 21 | "from torch.utils.data import DataLoader\n", 22 | "import torch\n", 23 | "import numpy as np\n", 24 | "\n", 25 | "%matplotlib inline" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "image batch shape = torch.Size([64, 1, 28, 28])\n", 38 | "image label shape = torch.Size([64])\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "############# Parameters #############\n", 44 | "\n", 45 | "# Hyperparameters\n", 46 | "learning_rate = 0.01\n", 47 | "num_epochs = 10\n", 48 | "batch_size = 64\n", 49 | "\n", 50 | "# Architecture\n", 51 | "num_features = 784\n", 52 | "num_hidden_1 = 128\n", 53 | "num_hidden_2 = 256\n", 54 | "num_classes = 10\n", 55 | "\n", 56 | "############# Fashion MNIST #############\n", 57 | "\n", 58 | "# Note transforms.ToTensor() scales input images\n", 59 | "# to 0-1 range\n", 60 | "train_dataset = datasets.FashionMNIST(root='data',\n", 61 | " train=True,\n", 62 | " transform=transforms.ToTensor(),\n", 63 | " download=True)\n", 64 | "\n", 65 | "test_dataset = datasets.FashionMNIST(root='data',\n", 66 | " train=False,\n", 67 | " transform=transforms.ToTensor())\n", 68 | "\n", 69 | "train_loader = DataLoader(dataset=train_dataset,\n", 70 | " batch_size=batch_size,\n", 71 | " shuffle=True)\n", 72 | "\n", 73 | "test_loader = DataLoader(dataset=test_dataset,\n", 74 | " batch_size=batch_size,\n", 75 | " shuffle=False)\n", 76 | "\n", 77 | "\n", 78 | "# Check datasets\n", 79 | "for images, labels in train_loader:\n", 80 | " print('image batch shape = ', images.shape),\n", 81 | " print('image label shape = ', labels.shape)\n", 82 | " break" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "# Use the nn package to define our model and loss function.\n", 94 | "model = torch.nn.Sequential(\n", 95 | " \"\"\"This architecture is a densely connected 2 hidden layer network.\"\"\"\n", 96 | " torch.nn.Linear(num_features, num_hidden_1),\n", 97 | " torch.nn.ReLU(),\n", 98 | " torch.nn.Linear(num_hidden_1, num_hidden_2),\n", 99 | " torch.nn.ReLU(),\n", 100 | " torch.nn.Linear(num_hidden_2, num_classes)\n", 101 | " )\n", 102 | "\n", 103 | "if torch.cuda.is_available():\n", 104 | " model.cuda()\n", 105 | " \n", 106 | "############# Define cost function and optimizer #############\n", 107 | "cost_fn = torch.nn.CrossEntropyLoss()\n", 108 | "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 9, 114 | "metadata": { 115 | "scrolled": false 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "Epoch: 001/010 | Batch: 000/937 | Cost: 0.5567\n", 123 | "Epoch: 001/010 | Batch: 100/937 | Cost: 0.4060\n", 124 | "Epoch: 001/010 | Batch: 200/937 | Cost: 0.7009\n", 125 | "Epoch: 001/010 | Batch: 300/937 | Cost: 0.4633\n", 126 | "Epoch: 001/010 | Batch: 400/937 | Cost: 0.3740\n", 127 | "Epoch: 001/010 | Batch: 500/937 | Cost: 0.4304\n", 128 | "Epoch: 001/010 | Batch: 600/937 | Cost: 0.4961\n", 129 | "Epoch: 001/010 | Batch: 700/937 | Cost: 0.5775\n", 130 | "Epoch: 001/010 | Batch: 800/937 | Cost: 0.4623\n", 131 | "Epoch: 001/010 | Batch: 900/937 | Cost: 0.6826\n", 132 | "Epoch: 001/010 training accuracy 81.35\n", 133 | "Epoch: 002/010 | Batch: 000/937 | Cost: 0.7363\n", 134 | "Epoch: 002/010 | Batch: 100/937 | Cost: 0.4921\n", 135 | "Epoch: 002/010 | Batch: 200/937 | Cost: 0.4393\n", 136 | "Epoch: 002/010 | Batch: 300/937 | Cost: 0.4756\n", 137 | "Epoch: 002/010 | Batch: 400/937 | Cost: 0.5830\n", 138 | "Epoch: 002/010 | Batch: 500/937 | Cost: 0.5604\n", 139 | "Epoch: 002/010 | Batch: 600/937 | Cost: 0.4138\n", 140 | "Epoch: 002/010 | Batch: 700/937 | Cost: 0.4318\n", 141 | "Epoch: 002/010 | Batch: 800/937 | Cost: 0.3804\n", 142 | "Epoch: 002/010 | Batch: 900/937 | Cost: 0.6541\n", 143 | "Epoch: 002/010 training accuracy 82.52\n", 144 | "Epoch: 003/010 | Batch: 000/937 | Cost: 0.5762\n", 145 | "Epoch: 003/010 | Batch: 100/937 | Cost: 0.4029\n", 146 | "Epoch: 003/010 | Batch: 200/937 | Cost: 0.5816\n", 147 | "Epoch: 003/010 | Batch: 300/937 | Cost: 0.4135\n", 148 | "Epoch: 003/010 | Batch: 400/937 | Cost: 0.5920\n", 149 | "Epoch: 003/010 | Batch: 500/937 | Cost: 0.7270\n", 150 | "Epoch: 003/010 | Batch: 600/937 | Cost: 0.3995\n", 151 | "Epoch: 003/010 | Batch: 700/937 | Cost: 0.3823\n", 152 | "Epoch: 003/010 | Batch: 800/937 | Cost: 0.4450\n", 153 | "Epoch: 003/010 | Batch: 900/937 | Cost: 0.4741\n", 154 | "Epoch: 003/010 training accuracy 81.34\n", 155 | "Epoch: 004/010 | Batch: 000/937 | Cost: 0.3365\n", 156 | "Epoch: 004/010 | Batch: 100/937 | Cost: 0.5269\n", 157 | "Epoch: 004/010 | Batch: 200/937 | Cost: 0.5372\n", 158 | "Epoch: 004/010 | Batch: 300/937 | Cost: 0.4681\n", 159 | "Epoch: 004/010 | Batch: 400/937 | Cost: 0.5937\n", 160 | "Epoch: 004/010 | Batch: 500/937 | Cost: 0.4109\n", 161 | "Epoch: 004/010 | Batch: 600/937 | Cost: 0.4091\n", 162 | "Epoch: 004/010 | Batch: 700/937 | Cost: 0.4267\n", 163 | "Epoch: 004/010 | Batch: 800/937 | Cost: 0.2575\n", 164 | "Epoch: 004/010 | Batch: 900/937 | Cost: 0.6468\n", 165 | "Epoch: 004/010 training accuracy 83.83\n", 166 | "Epoch: 005/010 | Batch: 000/937 | Cost: 0.4662\n", 167 | "Epoch: 005/010 | Batch: 100/937 | Cost: 0.4755\n", 168 | "Epoch: 005/010 | Batch: 200/937 | Cost: 0.3346\n", 169 | "Epoch: 005/010 | Batch: 300/937 | Cost: 0.4247\n", 170 | "Epoch: 005/010 | Batch: 400/937 | Cost: 0.3385\n", 171 | "Epoch: 005/010 | Batch: 500/937 | Cost: 0.4485\n", 172 | "Epoch: 005/010 | Batch: 600/937 | Cost: 0.3808\n", 173 | "Epoch: 005/010 | Batch: 700/937 | Cost: 0.4676\n", 174 | "Epoch: 005/010 | Batch: 800/937 | Cost: 0.4396\n", 175 | "Epoch: 005/010 | Batch: 900/937 | Cost: 0.3931\n", 176 | "Epoch: 005/010 training accuracy 81.58\n", 177 | "Epoch: 006/010 | Batch: 000/937 | Cost: 0.4061\n", 178 | "Epoch: 006/010 | Batch: 100/937 | Cost: 0.6326\n", 179 | "Epoch: 006/010 | Batch: 200/937 | Cost: 0.5673\n", 180 | "Epoch: 006/010 | Batch: 300/937 | Cost: 0.3146\n", 181 | "Epoch: 006/010 | Batch: 400/937 | Cost: 0.3310\n", 182 | "Epoch: 006/010 | Batch: 500/937 | Cost: 0.4976\n", 183 | "Epoch: 006/010 | Batch: 600/937 | Cost: 0.3823\n", 184 | "Epoch: 006/010 | Batch: 700/937 | Cost: 0.4208\n", 185 | "Epoch: 006/010 | Batch: 800/937 | Cost: 0.4248\n", 186 | "Epoch: 006/010 | Batch: 900/937 | Cost: 0.7198\n", 187 | "Epoch: 006/010 training accuracy 84.74\n", 188 | "Epoch: 007/010 | Batch: 000/937 | Cost: 0.5067\n", 189 | "Epoch: 007/010 | Batch: 100/937 | Cost: 0.5938\n", 190 | "Epoch: 007/010 | Batch: 200/937 | Cost: 0.2869\n", 191 | "Epoch: 007/010 | Batch: 300/937 | Cost: 0.3096\n", 192 | "Epoch: 007/010 | Batch: 400/937 | Cost: 0.2665\n", 193 | "Epoch: 007/010 | Batch: 500/937 | Cost: 0.5094\n", 194 | "Epoch: 007/010 | Batch: 600/937 | Cost: 0.4507\n", 195 | "Epoch: 007/010 | Batch: 700/937 | Cost: 0.3364\n", 196 | "Epoch: 007/010 | Batch: 800/937 | Cost: 0.4091\n", 197 | "Epoch: 007/010 | Batch: 900/937 | Cost: 0.3704\n", 198 | "Epoch: 007/010 training accuracy 84.30\n", 199 | "Epoch: 008/010 | Batch: 000/937 | Cost: 0.4296\n", 200 | "Epoch: 008/010 | Batch: 100/937 | Cost: 0.6715\n", 201 | "Epoch: 008/010 | Batch: 200/937 | Cost: 0.5632\n", 202 | "Epoch: 008/010 | Batch: 300/937 | Cost: 0.4350\n", 203 | "Epoch: 008/010 | Batch: 400/937 | Cost: 0.6145\n", 204 | "Epoch: 008/010 | Batch: 500/937 | Cost: 0.3033\n", 205 | "Epoch: 008/010 | Batch: 600/937 | Cost: 0.3907\n", 206 | "Epoch: 008/010 | Batch: 700/937 | Cost: 0.4165\n", 207 | "Epoch: 008/010 | Batch: 800/937 | Cost: 0.6448\n", 208 | "Epoch: 008/010 | Batch: 900/937 | Cost: 0.2495\n", 209 | "Epoch: 008/010 training accuracy 84.96\n", 210 | "Epoch: 009/010 | Batch: 000/937 | Cost: 0.5013\n", 211 | "Epoch: 009/010 | Batch: 100/937 | Cost: 0.4081\n", 212 | "Epoch: 009/010 | Batch: 200/937 | Cost: 0.3088\n", 213 | "Epoch: 009/010 | Batch: 300/937 | Cost: 0.3234\n", 214 | "Epoch: 009/010 | Batch: 400/937 | Cost: 0.3099\n", 215 | "Epoch: 009/010 | Batch: 500/937 | Cost: 0.5318\n", 216 | "Epoch: 009/010 | Batch: 600/937 | Cost: 0.3479\n", 217 | "Epoch: 009/010 | Batch: 700/937 | Cost: 0.4059\n", 218 | "Epoch: 009/010 | Batch: 800/937 | Cost: 0.3749\n", 219 | "Epoch: 009/010 | Batch: 900/937 | Cost: 0.2546\n", 220 | "Epoch: 009/010 training accuracy 85.82\n", 221 | "Epoch: 010/010 | Batch: 000/937 | Cost: 0.3412\n", 222 | "Epoch: 010/010 | Batch: 100/937 | Cost: 0.2776\n", 223 | "Epoch: 010/010 | Batch: 200/937 | Cost: 0.4153\n", 224 | "Epoch: 010/010 | Batch: 300/937 | Cost: 0.4111\n", 225 | "Epoch: 010/010 | Batch: 400/937 | Cost: 0.4410\n", 226 | "Epoch: 010/010 | Batch: 500/937 | Cost: 0.3881\n", 227 | "Epoch: 010/010 | Batch: 600/937 | Cost: 0.4396\n", 228 | "Epoch: 010/010 | Batch: 700/937 | Cost: 0.4177\n", 229 | "Epoch: 010/010 | Batch: 800/937 | Cost: 0.5288\n", 230 | "Epoch: 010/010 | Batch: 900/937 | Cost: 0.4540\n", 231 | "Epoch: 010/010 training accuracy 85.99\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "from torch.autograd import Variable\n", 237 | "import torch.nn.functional as F\n", 238 | "\n", 239 | "def compute_accuracy(model, data_loader):\n", 240 | " correct_pred, num_examples = 0, 0\n", 241 | " for features, targets in data_loader:\n", 242 | " features = Variable(features.view(-1, 28*28))\n", 243 | " if torch.cuda.is_available():\n", 244 | " features = features.cuda()\n", 245 | "\n", 246 | " logits = model(features)\n", 247 | " probas = F.softmax(logits, dim=1)\n", 248 | "\n", 249 | " _, predicted_labels = torch.max(probas.data, 1)\n", 250 | " num_examples += targets.size(0)\n", 251 | " correct_pred += (predicted_labels.cpu() == targets).sum()\n", 252 | " return correct_pred / num_examples * 100\n", 253 | "\n", 254 | "for epoch in range(num_epochs):\n", 255 | " \"\"\"Train\"\"\"\n", 256 | " for batch_idx, (features, targets) in enumerate(train_loader):\n", 257 | " \n", 258 | " features = Variable(features.view(-1, 28*28))\n", 259 | " targets = Variable(targets)\n", 260 | " \n", 261 | " if torch.cuda.is_available():\n", 262 | " features, targets = features.cuda(), targets.cuda()\n", 263 | " \n", 264 | " ### Forward\n", 265 | " logits = model(features)\n", 266 | " probas = F.softmax(logits, dim=1)\n", 267 | " cost = cost_fn(logits, targets)\n", 268 | " # Sets gradients of all model parameters to zero:\n", 269 | " optimizer.zero_grad()\n", 270 | " \n", 271 | " # Backprop\n", 272 | " cost.backward()\n", 273 | " \n", 274 | " ### Update model parameters\n", 275 | " optimizer.step()\n", 276 | " \n", 277 | " ### Logging\n", 278 | " if not batch_idx % 100:\n", 279 | " print('Epoch: %03d/%03d | Batch: %03d/%03d | Cost: %.4f' %\n", 280 | " (epoch+1, num_epochs, batch_idx,\n", 281 | " len(train_dataset)//batch_size, cost.data[0]))\n", 282 | " \n", 283 | " print('Epoch: %03d/%03d training accuracy %.2f' %\n", 284 | " (epoch+1, num_epochs,\n", 285 | " compute_accuracy(model, train_loader)))" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 10, 291 | "metadata": {}, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "Test accuracy: 84.46%\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.5.3" 332 | }, 333 | "nav_menu": {}, 334 | "toc": { 335 | "navigate_menu": true, 336 | "number_sections": true, 337 | "sideBar": true, 338 | "threshold": 6, 339 | "toc_cell": false, 340 | "toc_section_display": "block", 341 | "toc_window_display": false 342 | } 343 | }, 344 | "nbformat": 4, 345 | "nbformat_minor": 2 346 | } 347 | -------------------------------------------------------------------------------- /spark/pyspark_firstgo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Use the findspark package to grab the brew installed apache-spark" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 12, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2016-07-31T13:55:11.665123", 16 | "start_time": "2016-07-31T13:55:11.660403" 17 | }, 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import findspark\n", 23 | "import os\n", 24 | "findspark.init()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "Create a local spark context" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 18, 37 | "metadata": { 38 | "ExecuteTime": { 39 | "end_time": "2016-07-31T14:02:12.241634", 40 | "start_time": "2016-07-31T14:02:11.978836" 41 | }, 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import pyspark\n", 47 | "sc = pyspark.SparkContext()" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "Process a text file (take a readme from one of my github dirs)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 19, 60 | "metadata": { 61 | "ExecuteTime": { 62 | "end_time": "2016-07-31T14:02:24.905497", 63 | "start_time": "2016-07-31T14:02:22.856040" 64 | }, 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "27" 72 | ] 73 | }, 74 | "execution_count": 19, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "lines = sc.textFile(os.path.expanduser('README.md'))\n", 81 | "lines = sc.textFile('README.md')\n", 82 | "lines_nonempty = lines.filter( lambda x: len(x) > 0 )\n", 83 | "lines_nonempty.count()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 23, 89 | "metadata": { 90 | "ExecuteTime": { 91 | "end_time": "2016-07-31T14:03:33.856621", 92 | "start_time": "2016-07-31T14:03:33.839047" 93 | }, 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "res = lines.combineByKey(str, len, len)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 26, 104 | "metadata": { 105 | "ExecuteTime": { 106 | "end_time": "2016-07-31T14:04:24.666030", 107 | "start_time": "2016-07-31T14:04:24.657795" 108 | }, 109 | "collapsed": false 110 | }, 111 | "outputs": [ 112 | { 113 | "data": { 114 | "text/plain": [ 115 | "org.apache.spark.api.java.JavaPairRDD@7e90d7d0" 116 | ] 117 | }, 118 | "execution_count": 26, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "res.cartesian(res)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 17, 130 | "metadata": { 131 | "ExecuteTime": { 132 | "end_time": "2016-07-31T14:02:06.866595", 133 | "start_time": "2016-07-31T14:02:06.506845" 134 | }, 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "sc.stop()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "Python 3", 155 | "language": "python", 156 | "name": "python3" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.5.2" 169 | }, 170 | "nav_menu": {}, 171 | "toc": { 172 | "navigate_menu": true, 173 | "number_sections": true, 174 | "sideBar": true, 175 | "threshold": 6, 176 | "toc_cell": false, 177 | "toc_section_display": "block", 178 | "toc_window_display": false 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 0 183 | } 184 | -------------------------------------------------------------------------------- /tensorflow/TF_3layer_MNIST.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## A graph representation of a simple calculation" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook is adapted from a tensorflow tutorial from this [blog post](http://adventuresinmachinelearning.com/python-tensorflow-tutorial/).\n", 15 | "\n", 16 | "![](http://adventuresinmachinelearning.com/wp-content/uploads/2017/03/Simple-graph-example.png)\n", 17 | "\n", 18 | "```\n", 19 | "d = b + c\n", 20 | "e = c + 2\n", 21 | "a = d * e\n", 22 | "```" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Some simple tensorflow for this simple calculation\n", 30 | "\n", 31 | "These variables don't even get declared until the whole thing (including the graph) is initialized." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 9, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "import tensorflow as tf\n", 43 | "import numpy as np" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 2, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Input constant and variables\n", 53 | "const = tf.constant(2.0, name='const')\n", 54 | "b = tf.Variable(2.0, name='b')\n", 55 | "c = tf.Variable(1.0, name='c')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "Now for the operations (only setting these up)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 3, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "d = tf.add(b, c, name='d')\n", 74 | "e = tf.add(c, const, name='e')\n", 75 | "a = tf.multiply(d, e, name='a')" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "The next step is to setup an object to initialise the variables and the graph structure" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 4, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "init_op = tf.global_variables_initializer()" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "The TensorFlow session is an object where all operations are run. Using the with Python syntax, we can run the graph with the following code" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "Variable a = 9.0\n" 113 | ] 114 | } 115 | ], 116 | "source": [ 117 | "with tf.Session() as sess:\n", 118 | " # Initialise the (global) variables\n", 119 | " sess.run(init_op)\n", 120 | " \n", 121 | " # Comput the output of the graph\n", 122 | " a_out = sess.run(a)\n", 123 | " \n", 124 | " # Print\n", 125 | " print('Variable a = {}'.format(a_out))" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### Let's complicate it: what if b was an unknown array of values" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "We can use a placeholder and declare an unknown-size array" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 11, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "b = tf.placeholder(tf.float32, shape=[None, 1], name='b')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 12, 156 | "metadata": { 157 | "collapsed": true 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "# Let's redefine the other variables\n", 162 | "const = tf.constant(2.0, name='const')\n", 163 | "c = tf.Variable(1.0, name='c')\n", 164 | "\n", 165 | "# And operations to the graph\n", 166 | "d = tf.add(b, c, name='d')\n", 167 | "e = tf.add(c, const, name='e')\n", 168 | "a = tf.multiply(d, e, name='a')" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 13, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "Variable a = [[ 3.]\n", 181 | " [ 6.]\n", 182 | " [ 9.]\n", 183 | " [ 12.]\n", 184 | " [ 15.]\n", 185 | " [ 18.]\n", 186 | " [ 21.]\n", 187 | " [ 24.]\n", 188 | " [ 27.]\n", 189 | " [ 30.]]\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "init_op = tf.global_variables_initializer()\n", 195 | "\n", 196 | "with tf.Session() as sess:\n", 197 | " # Initialise the (global) variables\n", 198 | " sess.run(init_op)\n", 199 | " \n", 200 | " # Comput the output of the graph\n", 201 | " a_out = sess.run(a, feed_dict={b: np.arange(0, 10)[:, np.newaxis]})\n", 202 | " \n", 203 | " # Print\n", 204 | " print('Variable a = {}'.format(a_out))" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## Neural network time\n", 212 | "\n", 213 | "Let's build a three-layer dense NN!" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "Get the MNIST data from tensorflow examples." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 14, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\n", 233 | "Extracting MNIST_data/train-images-idx3-ubyte.gz\n", 234 | "Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\n", 235 | "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n", 236 | "Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\n", 237 | "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n", 238 | "Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\n", 239 | "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n" 240 | ] 241 | } 242 | ], 243 | "source": [ 244 | "from tensorflow.examples.tutorials.mnist import input_data\n", 245 | "mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "### Set up the network\n", 253 | "\n", 254 | "Summary of steps:\n", 255 | "1. Define parameters.\n", 256 | "2. Input and output layer placeholders.\n", 257 | "3. Input layer to hidden layer tensors (1. weights and 2. bias)\n", 258 | "4. Hidden layer to output layer tensors (1. weights and 2. bias)\n", 259 | "5. Input, hidden layer and output operations.\n", 260 | "6. Cost function for the optimizer.\n", 261 | "7. Optimizer.\n", 262 | "8. Initialization operation.\n", 263 | "9. Accuracy operation.\n", 264 | "\n", 265 | "Input layer is 784 nodes. Output layer is 10 nodes." 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 15, 271 | "metadata": { 272 | "collapsed": true 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "learning_rate = 0.5\n", 277 | "epochs = 10\n", 278 | "batch_size = 100\n", 279 | "\n", 280 | "# Input for x - dim is 28x28 or 784 pixel values\n", 281 | "x = tf.placeholder(tf.float32, shape=[None, 784])\n", 282 | "# Input for y (the one-hot labels, or output)\n", 283 | "y = tf.placeholder(tf.float32, shape=[None, 10])" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "Now we need to setup the weight and bias variables for the three layer neural network. There are always L-1 number of weights/bias tensors, where L is the number of layers. So in this case, we need to setup two tensors for each.\n", 291 | "\n", 292 | "\n", 293 | "This neural network will have 300 nodes in the hidden layer, so the size of the weight tensor W1 is [784, 300]. Likewise, we create W2 and b2 variables to connect the hidden layer to the output layer of the neural network." 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 17, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "# Input layer to hidden layer tensors\n", 303 | "W1 = tf.Variable(tf.random_normal([784, 300], stddev=0.03), name='W1')\n", 304 | "b1 = tf.Variable(tf.random_normal([300]), name='b1')\n", 305 | "\n", 306 | "# Hidden layer to output layer tensors\n", 307 | "W2 = tf.Variable(tf.random_normal([300, 10], stddev=0.03), name='W2')\n", 308 | "b2 = tf.Variable(tf.random_normal([10]), name='b2')" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "Our input and hidden layer operations. Node inputs and activation function setup." 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 18, 321 | "metadata": { 322 | "collapsed": true 323 | }, 324 | "outputs": [], 325 | "source": [ 326 | "hidden_out = tf.add(tf.matmul(x, W1), b1)\n", 327 | "hidden_out = tf.nn.relu(hidden_out)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "Our output layer operations. Softmax to get probabilities." 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 19, 340 | "metadata": { 341 | "collapsed": true 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "y_ = tf.nn.softmax(tf.add(tf.matmul(hidden_out, W2), b2))\n" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "metadata": {}, 351 | "source": [ 352 | "The cost function for the optimizer to work on.\n", 353 | "\n", 354 | "The first line is an operation converting the output `y_` to a clipped version, limited between 1e-10 to 0.999999. This is to make sure that we never get a case were we have a log(0) operation occurring during training.\n", 355 | "\n", 356 | "The second line is the cross entropy calculation. (See the blog post for more details around this calculation)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 21, 362 | "metadata": { 363 | "collapsed": true 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "y_clipped = tf.clip_by_value(y_, 1e-10, 0.9999999)\n", 368 | "cross_entropy = -tf.reduce_mean(tf.reduce_sum(\n", 369 | " y * tf.log(y_clipped)\n", 370 | " + (1 - y)\n", 371 | " * tf.log(1 - y_clipped),\n", 372 | " axis=1))" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "Set up the optimizer. This function will then perform the gradient descent and the backpropagation for you. " 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": 22, 385 | "metadata": { 386 | "collapsed": true 387 | }, 388 | "outputs": [], 389 | "source": [ 390 | "optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cross_entropy)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "Set up the variable initialisation operation and an operation to measure the accuracy of our predictions." 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 23, 403 | "metadata": { 404 | "collapsed": true 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "init_op = tf.global_variables_initializer()\n", 409 | "\n", 410 | "# Accuracy calculation\n", 411 | "correct_prediction = tf.equal(tf.argmax(y, axis=1), tf.argmax(y_, axis=1))\n", 412 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))" 413 | ] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": {}, 418 | "source": [ 419 | "### Set up the training" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 26, 425 | "metadata": {}, 426 | "outputs": [ 427 | { 428 | "name": "stdout", 429 | "output_type": "stream", 430 | "text": [ 431 | "Epoch 1 Cost = 0.684\n", 432 | "Epoch 2 Cost = 0.252\n", 433 | "Epoch 3 Cost = 0.188\n", 434 | "Epoch 4 Cost = 0.150\n", 435 | "Epoch 5 Cost = 0.125\n", 436 | "Epoch 6 Cost = 0.105\n", 437 | "Epoch 7 Cost = 0.091\n", 438 | "Epoch 8 Cost = 0.076\n", 439 | "Epoch 9 Cost = 0.066\n", 440 | "Epoch 10 Cost = 0.057\n", 441 | "0.9748\n" 442 | ] 443 | } 444 | ], 445 | "source": [ 446 | "with tf.Session() as sess:\n", 447 | " # Run the initialization\n", 448 | " sess.run(init_op)\n", 449 | " # How many batches - training samples / minibatch size we set\n", 450 | " total_batch = int(len(mnist.train.labels) / batch_size)\n", 451 | " for epoch in range(epochs):\n", 452 | " avg_cost = 0\n", 453 | " # Go through each training sample in minibatch chunk\n", 454 | " for i in range(total_batch):\n", 455 | " batch_x, batch_y = mnist.train.next_batch(batch_size=batch_size)\n", 456 | " # Cost\n", 457 | " _, c = sess.run([optimizer, cross_entropy],\n", 458 | " feed_dict={x: batch_x, y: batch_y})\n", 459 | " avg_cost += c\n", 460 | " print('Epoch ', epoch+1, 'Cost = ', '{:.3f}'.format(avg_cost / total_batch))\n", 461 | " print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels}))" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": null, 467 | "metadata": { 468 | "collapsed": true 469 | }, 470 | "outputs": [], 471 | "source": [] 472 | } 473 | ], 474 | "metadata": { 475 | "kernelspec": { 476 | "display_name": "Python 3", 477 | "language": "python", 478 | "name": "python3" 479 | }, 480 | "language_info": { 481 | "codemirror_mode": { 482 | "name": "ipython", 483 | "version": 3 484 | }, 485 | "file_extension": ".py", 486 | "mimetype": "text/x-python", 487 | "name": "python", 488 | "nbconvert_exporter": "python", 489 | "pygments_lexer": "ipython3", 490 | "version": "3.5.3" 491 | }, 492 | "nav_menu": {}, 493 | "toc": { 494 | "navigate_menu": true, 495 | "number_sections": true, 496 | "sideBar": true, 497 | "threshold": 6, 498 | "toc_cell": false, 499 | "toc_section_display": "block", 500 | "toc_window_display": false 501 | } 502 | }, 503 | "nbformat": 4, 504 | "nbformat_minor": 2 505 | } 506 | --------------------------------------------------------------------------------